BitMagic-C++
Typedefs | Functions | Variables
inv_list.cpp File Reference

Utility to compress test sets of inverted lists. More...

#include <iostream>
#include <chrono>
#include <thread>
#include <time.h>
#include <stdio.h>
#include <cstdlib>
#include <vector>
#include <map>
#include "bm.h"
#include "bmalgo.h"
#include "bmserial.h"
#include "bmsparsevec.h"
#include "bmsparsevec_compr.h"
#include "bmsparsevec_algo.h"
#include "bmsparsevec_serial.h"
#include "bmalgo_similarity.h"
#include "bmdbg.h"
#include "bmtimer.h"
#include "bmundef.h"
Include dependency graph for inv_list.cpp:

Go to the source code of this file.

Typedefs

typedef bm::sparse_vector< unsigned, bm::bvector<> > sparse_vector_u32
 
typedef bm::rsc_sparse_vector< unsigned, sparse_vector_u32rsc_sparse_vector_u32
 

Functions

static void show_help ()
  More...
 
static int parse_args (int argc, char *argv[])
  More...
 
template<class VT >
int io_read_u32_coll (std::ifstream &fin, VT &vec)
 Read 32-bit vector size-prefix format (length:0, 1, 2, 3, ....) More...
 
template<typename VT >
int validate_inp_vec (const VT &vec, typename VT::value_type &min_delta, typename VT::value_type &min_delta_cnt)
 Check if input vector is monotonously sorted (true inverted list) along the way in computes a minimal delta between values. More...
 
template<typename VT , typename BV >
int compare_vect (const VT &vec, const BV &bv)
 Verification check if integer vector is equivalent to a bit-vector. More...
 
template<typename BV >
bool is_super_sparse (const BV &bv)
 Debug utility to detect super sparse bit-vectors which probably get bad compression rate. More...
 
template<typename VT >
bool write_as_bvector (std::ofstream &bv_file, const VT &vec, bm::serializer< bm::bvector<> > &bvs, bm::serializer< bm::bvector<> >::buffer &sbuf)
 convert vector into bit-vector and append to the file More...
 
template<typename VT >
void write_as_svector (std::ofstream &sv_file, const VT &vec, unsigned min_delta, bm::sparse_vector_serial_layout< sparse_vector_u32 > &sv_lay)
 convert vector into delta coded bit-transposed vector and append to the file More...
 
template<typename VT >
void write_as_rsc_svector (std::ofstream &sv_file, const VT &vec, unsigned min_delta, bm::sparse_vector_serial_layout< rsc_sparse_vector_u32 > &sv_lay)
 convert vector into delta coded bit-transposed vector and append to the file More...
 
static void compress_inv_dump_file (const std::string &fname, const std::string &bv_out_fname, const std::string &sv_out_fname)
 read the input collection sequence, write using various compression schemes More...
 
static int read_bvector (std::ifstream &bv_file, bm::bvector<> &bv, bm::serializer< bm::bvector<> >::buffer &sbuf)
 read and desrialize bit-bector from the dump file More...
 
static void verify_inv_dump_file (const std::string &fname, const std::string &bv_in_fname)
 read the input collection sequence and dump file, verify correctness More...
 
static void decode_test_dump_file (const std::string &bv_in_fname)
 read and decode the compressed dump file More...
 
int main (int argc, char *argv[])
  More...
 

Variables

std::string bv_in_file
  More...
 
std::string bv_out_file
  More...
 
std::string sv_in_file
  More...
 
std::string sv_out_file
  More...
 
std::string u32_in_file
  More...
 
std::string u32_out_file
 
bool is_diag = false
  More...
 
bool is_timing = false
  More...
 
bool is_verify = false
  More...
 
bool is_silent = false
  More...
 
bool is_decode = false
  More...
 
unsigned c_level = bm::set_compression_default
  More...
 
bm::chrono_taker ::duration_map_type timing_map
  More...
 

Detailed Description

Utility to compress test sets of inverted lists.

Definition in file inv_list.cpp.

Function Documentation

◆ compare_vect()

template<typename VT , typename BV >
int compare_vect ( const VT &  vec,
const BV &  bv 
)

Verification check if integer vector is equivalent to a bit-vector.

Examples
inv_list.cpp.

Definition at line 305 of file inv_list.cpp.

Referenced by verify_inv_dump_file().

◆ compress_inv_dump_file()

static void compress_inv_dump_file ( const std::string &  fname,
const std::string &  bv_out_fname,
const std::string &  sv_out_fname 
)
static

◆ decode_test_dump_file()

static void decode_test_dump_file ( const std::string &  bv_in_fname)
static

read and decode the compressed dump file

Examples
inv_list.cpp.

Definition at line 763 of file inv_list.cpp.

References is_silent, read_bvector(), and timing_map.

Referenced by main().

◆ io_read_u32_coll()

template<class VT >
int io_read_u32_coll ( std::ifstream &  fin,
VT &  vec 
)

Read 32-bit vector size-prefix format (length:0, 1, 2, 3, ....)

Examples
inv_list.cpp.

Definition at line 248 of file inv_list.cpp.

Referenced by compress_inv_dump_file(), and verify_inv_dump_file().

◆ is_super_sparse()

template<typename BV >
bool is_super_sparse ( const BV &  bv)

Debug utility to detect super sparse bit-vectors which probably get bad compression rate.

Examples
inv_list.cpp.

Definition at line 325 of file inv_list.cpp.

◆ main()

int main ( int  argc,
char *  argv[] 
)

◆ parse_args()

static int parse_args ( int  argc,
char *  argv[] 
)
static

◆ read_bvector()

static int read_bvector ( std::ifstream &  bv_file,
bm::bvector<> &  bv,
bm::serializer< bm::bvector<> >::buffer &  sbuf 
)
static

read and desrialize bit-bector from the dump file

Examples
inv_list.cpp.

Definition at line 650 of file inv_list.cpp.

References bm::deserialize().

Referenced by decode_test_dump_file(), and verify_inv_dump_file().

◆ show_help()

static void show_help ( )
static
Examples
inv_list.cpp.

Definition at line 62 of file inv_list.cpp.

Referenced by main(), and parse_args().

◆ validate_inp_vec()

template<typename VT >
int validate_inp_vec ( const VT &  vec,
typename VT::value_type &  min_delta,
typename VT::value_type &  min_delta_cnt 
)

Check if input vector is monotonously sorted (true inverted list) along the way in computes a minimal delta between values.

Examples
inv_list.cpp.

Definition at line 273 of file inv_list.cpp.

Referenced by compress_inv_dump_file().

◆ verify_inv_dump_file()

static void verify_inv_dump_file ( const std::string &  fname,
const std::string &  bv_in_fname 
)
static

read the input collection sequence and dump file, verify correctness

Examples
inv_list.cpp.

Definition at line 677 of file inv_list.cpp.

References compare_vect(), io_read_u32_coll(), is_silent, read_bvector(), and timing_map.

Referenced by main().

◆ write_as_bvector()

template<typename VT >
bool write_as_bvector ( std::ofstream &  bv_file,
const VT &  vec,
bm::serializer< bm::bvector<> > &  bvs,
bm::serializer< bm::bvector<> >::buffer &  sbuf 
)

convert vector into bit-vector and append to the file

Returns
true if vector was detected as very low cardinality
Examples
inv_list.cpp.

Definition at line 343 of file inv_list.cpp.

References BM_DECLARE_TEMP_BLOCK, bm::BM_SORTED, bm::bvector< Alloc >::optimize(), and bm::bvector< Alloc >::set().

Referenced by compress_inv_dump_file().

◆ write_as_rsc_svector()

template<typename VT >
void write_as_rsc_svector ( std::ofstream &  sv_file,
const VT &  vec,
unsigned  min_delta,
bm::sparse_vector_serial_layout< rsc_sparse_vector_u32 > &  sv_lay 
)

◆ write_as_svector()

template<typename VT >
void write_as_svector ( std::ofstream &  sv_file,
const VT &  vec,
unsigned  min_delta,
bm::sparse_vector_serial_layout< sparse_vector_u32 > &  sv_lay 
)