BitMagic-C++
Typedefs | Functions | Variables
inv_list.cpp File Reference

Utility to compress test sets of inverted lists. More...

#include <iostream>
#include <chrono>
#include <thread>
#include <time.h>
#include <stdio.h>
#include <cstdlib>
#include <vector>
#include <map>
#include "bm.h"
#include "bmalgo.h"
#include "bmserial.h"
#include "bmsparsevec.h"
#include "bmsparsevec_compr.h"
#include "bmsparsevec_algo.h"
#include "bmsparsevec_serial.h"
#include "bmalgo_similarity.h"
#include "bmdbg.h"
#include "bmtimer.h"
#include "bmundef.h"
Include dependency graph for inv_list.cpp:

Go to the source code of this file.

Typedefs

typedef bm::sparse_vector< unsigned, bm::bvector<> > sparse_vector_u32
 
typedef bm::rsc_sparse_vector< unsigned, sparse_vector_u32rsc_sparse_vector_u32
 

Functions

static void show_help ()
 
static int parse_args (int argc, char *argv[])
 
template<class VT >
int io_read_u32_coll (std::ifstream &fin, VT &vec)
 Read 32-bit vector size-prefix format (length:0, 1, 2, 3, ....) More...
 
template<typename VT >
int validate_inp_vec (const VT &vec, typename VT::value_type &min_delta, typename VT::value_type &min_delta_cnt)
 Check if input vector is monotonously sorted (true inverted list) along the way in computes a minimal delta between values. More...
 
template<typename VT , typename BV >
int compare_vect (const VT &vec, const BV &bv)
 Verification check if integer vector is equivalent to a bit-vector. More...
 
template<typename BV >
bool is_super_sparse (const BV &bv)
 Debug utility to detect super sparse bit-vectors which probably get bad compression rate. More...
 
template<typename VT >
bool write_as_bvector (std::ofstream &bv_file, const VT &vec, bm::serializer< bm::bvector<> > &bvs, bm::serializer< bm::bvector<> >::buffer &sbuf)
 convert vector into bit-vector and append to the file More...
 
template<typename VT >
void write_as_svector (std::ofstream &sv_file, const VT &vec, unsigned min_delta, bm::sparse_vector_serial_layout< sparse_vector_u32 > &sv_lay)
 convert vector into delta coded bit-transposed vector and append to the file More...
 
template<typename VT >
void write_as_rsc_svector (std::ofstream &sv_file, const VT &vec, unsigned min_delta, bm::sparse_vector_serial_layout< rsc_sparse_vector_u32 > &sv_lay)
 convert vector into delta coded bit-transposed vector and append to the file More...
 
static void compress_inv_dump_file (const std::string &fname, const std::string &bv_out_fname, const std::string &sv_out_fname)
 read the input collection sequence, write using various compression schemes More...
 
static int read_bvector (std::ifstream &bv_file, bm::bvector<> &bv, bm::serializer< bm::bvector<> >::buffer &sbuf)
 read and desrialize bit-bector from the dump file More...
 
static void verify_inv_dump_file (const std::string &fname, const std::string &bv_in_fname)
 read the input collection sequence and dump file, verify correctness More...
 
static void decode_test_dump_file (const std::string &bv_in_fname)
 read and decode the compressed dump file More...
 
int main (int argc, char *argv[])
 

Variables

std::string bv_in_file
 
std::string bv_out_file
 
std::string sv_in_file
 
std::string sv_out_file
 
std::string u32_in_file
 
std::string u32_out_file
 
bool is_diag = false
 
bool is_timing = false
 
bool is_verify = false
 
bool is_silent = false
 
bool is_decode = false
 
unsigned c_level = bm::set_compression_default
 
bm::chrono_taker::duration_map_type timing_map
 

Detailed Description

Utility to compress test sets of inverted lists.

Definition in file inv_list.cpp.

Typedef Documentation

◆ rsc_sparse_vector_u32

Examples:
inv_list.cpp.

Definition at line 239 of file inv_list.cpp.

◆ sparse_vector_u32

Examples:
inv_list.cpp.

Definition at line 238 of file inv_list.cpp.

Function Documentation

◆ compare_vect()

template<typename VT , typename BV >
int compare_vect ( const VT &  vec,
const BV &  bv 
)

Verification check if integer vector is equivalent to a bit-vector.

Examples:
inv_list.cpp.

Definition at line 305 of file inv_list.cpp.

Referenced by verify_inv_dump_file().

◆ compress_inv_dump_file()

static void compress_inv_dump_file ( const std::string &  fname,
const std::string &  bv_out_fname,
const std::string &  sv_out_fname 
)
static

◆ decode_test_dump_file()

static void decode_test_dump_file ( const std::string &  bv_in_fname)
static

read and decode the compressed dump file

Examples:
inv_list.cpp.

Definition at line 763 of file inv_list.cpp.

References is_silent, read_bvector(), and timing_map.

Referenced by main().

◆ io_read_u32_coll()

template<class VT >
int io_read_u32_coll ( std::ifstream &  fin,
VT &  vec 
)

Read 32-bit vector size-prefix format (length:0, 1, 2, 3, ....)

Examples:
inv_list.cpp.

Definition at line 248 of file inv_list.cpp.

Referenced by compress_inv_dump_file(), and verify_inv_dump_file().

◆ is_super_sparse()

template<typename BV >
bool is_super_sparse ( const BV &  bv)

Debug utility to detect super sparse bit-vectors which probably get bad compression rate.

Examples:
inv_list.cpp.

Definition at line 325 of file inv_list.cpp.

◆ main()

int main ( int  argc,
char *  argv[] 
)

◆ parse_args()

static int parse_args ( int  argc,
char *  argv[] 
)
static
Examples:
inv_list.cpp.

Definition at line 103 of file inv_list.cpp.

References bv_in_file, bv_out_file, c_level, is_decode, is_diag, is_silent, is_timing, is_verify, show_help(), sv_in_file, sv_out_file, and u32_in_file.

Referenced by main().

◆ read_bvector()

static int read_bvector ( std::ifstream &  bv_file,
bm::bvector<> &  bv,
bm::serializer< bm::bvector<> >::buffer &  sbuf 
)
static

read and desrialize bit-bector from the dump file

Examples:
inv_list.cpp.

Definition at line 650 of file inv_list.cpp.

References bm::deserialize().

Referenced by decode_test_dump_file(), and verify_inv_dump_file().

◆ show_help()

static void show_help ( )
static
Examples:
inv_list.cpp.

Definition at line 62 of file inv_list.cpp.

Referenced by main(), and parse_args().

◆ validate_inp_vec()

template<typename VT >
int validate_inp_vec ( const VT &  vec,
typename VT::value_type &  min_delta,
typename VT::value_type &  min_delta_cnt 
)

Check if input vector is monotonously sorted (true inverted list) along the way in computes a minimal delta between values.

Examples:
inv_list.cpp.

Definition at line 273 of file inv_list.cpp.

Referenced by compress_inv_dump_file().

◆ verify_inv_dump_file()

static void verify_inv_dump_file ( const std::string &  fname,
const std::string &  bv_in_fname 
)
static

read the input collection sequence and dump file, verify correctness

Examples:
inv_list.cpp.

Definition at line 677 of file inv_list.cpp.

References compare_vect(), io_read_u32_coll(), is_silent, read_bvector(), and timing_map.

Referenced by main().

◆ write_as_bvector()

template<typename VT >
bool write_as_bvector ( std::ofstream &  bv_file,
const VT &  vec,
bm::serializer< bm::bvector<> > &  bvs,
bm::serializer< bm::bvector<> >::buffer &  sbuf 
)

convert vector into bit-vector and append to the file

Returns
true if vector was detected as very low cardinality
Examples:
inv_list.cpp.

Definition at line 343 of file inv_list.cpp.

References BM_DECLARE_TEMP_BLOCK, bm::BM_SORTED, bm::bvector< Alloc >::optimize(), and bm::bvector< Alloc >::set().

Referenced by compress_inv_dump_file().

◆ write_as_rsc_svector()

template<typename VT >
void write_as_rsc_svector ( std::ofstream &  sv_file,
const VT &  vec,
unsigned  min_delta,
bm::sparse_vector_serial_layout< rsc_sparse_vector_u32 > &  sv_lay 
)

◆ write_as_svector()

template<typename VT >
void write_as_svector ( std::ofstream &  sv_file,
const VT &  vec,
unsigned  min_delta,
bm::sparse_vector_serial_layout< sparse_vector_u32 > &  sv_lay 
)

Variable Documentation

◆ bv_in_file

std::string bv_in_file
Examples:
inv_list.cpp.

Definition at line 85 of file inv_list.cpp.

Referenced by main(), and parse_args().

◆ bv_out_file

std::string bv_out_file
Examples:
inv_list.cpp.

Definition at line 86 of file inv_list.cpp.

Referenced by main(), and parse_args().

◆ c_level

unsigned c_level = bm::set_compression_default
Examples:
inv_list.cpp.

Definition at line 99 of file inv_list.cpp.

Referenced by compress_inv_dump_file(), and parse_args().

◆ is_decode

bool is_decode = false
Examples:
inv_list.cpp.

Definition at line 97 of file inv_list.cpp.

Referenced by main(), and parse_args().

◆ is_diag

bool is_diag = false
Examples:
inv_list.cpp.

Definition at line 93 of file inv_list.cpp.

Referenced by parse_args().

◆ is_silent

bool is_silent = false

◆ is_timing

bool is_timing = false
Examples:
inv_list.cpp.

Definition at line 94 of file inv_list.cpp.

Referenced by main(), and parse_args().

◆ is_verify

bool is_verify = false
Examples:
inv_list.cpp.

Definition at line 95 of file inv_list.cpp.

Referenced by main(), and parse_args().

◆ sv_in_file

std::string sv_in_file
Examples:
inv_list.cpp.

Definition at line 88 of file inv_list.cpp.

Referenced by parse_args().

◆ sv_out_file

std::string sv_out_file
Examples:
inv_list.cpp.

Definition at line 89 of file inv_list.cpp.

Referenced by main(), and parse_args().

◆ timing_map

Examples:
inv_list.cpp.

Definition at line 242 of file inv_list.cpp.

Referenced by compress_inv_dump_file(), decode_test_dump_file(), main(), and verify_inv_dump_file().

◆ u32_in_file

std::string u32_in_file
Examples:
inv_list.cpp.

Definition at line 90 of file inv_list.cpp.

Referenced by main(), and parse_args().

◆ u32_out_file

std::string u32_out_file
Examples:
inv_list.cpp.

Definition at line 91 of file inv_list.cpp.