BitMagic-C++
|
Utility to compress test sets of inverted lists. More...
#include <iostream>
#include <chrono>
#include <thread>
#include <time.h>
#include <stdio.h>
#include <cstdlib>
#include <vector>
#include <map>
#include "bm.h"
#include "bmalgo.h"
#include "bmserial.h"
#include "bmsparsevec.h"
#include "bmsparsevec_compr.h"
#include "bmsparsevec_algo.h"
#include "bmsparsevec_serial.h"
#include "bmalgo_similarity.h"
#include "bmdbg.h"
#include "bmtimer.h"
#include "bmundef.h"
Go to the source code of this file.
Typedefs | |
typedef bm::sparse_vector< unsigned, bm::bvector<> > | sparse_vector_u32 |
typedef bm::rsc_sparse_vector< unsigned, sparse_vector_u32 > | rsc_sparse_vector_u32 |
Functions | |
static void | show_help () |
More... | |
static int | parse_args (int argc, char *argv[]) |
More... | |
template<class VT > | |
int | io_read_u32_coll (std::ifstream &fin, VT &vec) |
Read 32-bit vector size-prefix format (length:0, 1, 2, 3, ....) More... | |
template<typename VT > | |
int | validate_inp_vec (const VT &vec, typename VT::value_type &min_delta, typename VT::value_type &min_delta_cnt) |
Check if input vector is monotonously sorted (true inverted list) along the way in computes a minimal delta between values. More... | |
template<typename VT , typename BV > | |
int | compare_vect (const VT &vec, const BV &bv) |
Verification check if integer vector is equivalent to a bit-vector. More... | |
template<typename BV > | |
bool | is_super_sparse (const BV &bv) |
Debug utility to detect super sparse bit-vectors which probably get bad compression rate. More... | |
template<typename VT > | |
bool | write_as_bvector (std::ofstream &bv_file, const VT &vec, bm::serializer< bm::bvector<> > &bvs, bm::serializer< bm::bvector<> >::buffer &sbuf) |
convert vector into bit-vector and append to the file More... | |
template<typename VT > | |
void | write_as_svector (std::ofstream &sv_file, const VT &vec, unsigned min_delta, bm::sparse_vector_serial_layout< sparse_vector_u32 > &sv_lay) |
convert vector into delta coded bit-transposed vector and append to the file More... | |
template<typename VT > | |
void | write_as_rsc_svector (std::ofstream &sv_file, const VT &vec, unsigned min_delta, bm::sparse_vector_serial_layout< rsc_sparse_vector_u32 > &sv_lay) |
convert vector into delta coded bit-transposed vector and append to the file More... | |
static void | compress_inv_dump_file (const std::string &fname, const std::string &bv_out_fname, const std::string &sv_out_fname) |
read the input collection sequence, write using various compression schemes More... | |
static int | read_bvector (std::ifstream &bv_file, bm::bvector<> &bv, bm::serializer< bm::bvector<> >::buffer &sbuf) |
read and desrialize bit-bector from the dump file More... | |
static void | verify_inv_dump_file (const std::string &fname, const std::string &bv_in_fname) |
read the input collection sequence and dump file, verify correctness More... | |
static void | decode_test_dump_file (const std::string &bv_in_fname) |
read and decode the compressed dump file More... | |
int | main (int argc, char *argv[]) |
More... | |
Variables | |
std::string | bv_in_file |
More... | |
std::string | bv_out_file |
More... | |
std::string | sv_in_file |
More... | |
std::string | sv_out_file |
More... | |
std::string | u32_in_file |
More... | |
std::string | u32_out_file |
bool | is_diag = false |
More... | |
bool | is_timing = false |
More... | |
bool | is_verify = false |
More... | |
bool | is_silent = false |
More... | |
bool | is_decode = false |
More... | |
unsigned | c_level = bm::set_compression_default |
More... | |
bm::chrono_taker ::duration_map_type | timing_map |
More... | |
Utility to compress test sets of inverted lists.
Definition in file inv_list.cpp.
int compare_vect | ( | const VT & | vec, |
const BV & | bv | ||
) |
Verification check if integer vector is equivalent to a bit-vector.
Definition at line 305 of file inv_list.cpp.
Referenced by verify_inv_dump_file().
|
static |
read the input collection sequence, write using various compression schemes
Definition at line 460 of file inv_list.cpp.
References bm::serializer< BV >::byte_order_serialization(), c_level, bm::serializer< BV >::gap_length_serialization(), io_read_u32_coll(), is_silent, bm::serializer< BV >::set_compression_level(), bm::sparse_vector_serial_layout< SV >::size(), timing_map, validate_inp_vec(), write_as_bvector(), and write_as_rsc_svector().
Referenced by main().
|
static |
read and decode the compressed dump file
Definition at line 763 of file inv_list.cpp.
References is_silent, read_bvector(), and timing_map.
Referenced by main().
int io_read_u32_coll | ( | std::ifstream & | fin, |
VT & | vec | ||
) |
Read 32-bit vector size-prefix format (length:0, 1, 2, 3, ....)
Definition at line 248 of file inv_list.cpp.
Referenced by compress_inv_dump_file(), and verify_inv_dump_file().
bool is_super_sparse | ( | const BV & | bv | ) |
Debug utility to detect super sparse bit-vectors which probably get bad compression rate.
Definition at line 325 of file inv_list.cpp.
int main | ( | int | argc, |
char * | argv[] | ||
) |
Definition at line 817 of file inv_list.cpp.
References bv_in_file, bv_out_file, compress_inv_dump_file(), decode_test_dump_file(), is_decode, is_timing, is_verify, parse_args(), bm::chrono_taker< TOut >::print_duration_map(), show_help(), sv_out_file, timing_map, u32_in_file, and verify_inv_dump_file().
|
static |
Definition at line 103 of file inv_list.cpp.
References bv_in_file, bv_out_file, c_level, is_decode, is_diag, is_silent, is_timing, is_verify, show_help(), sv_in_file, sv_out_file, and u32_in_file.
Referenced by main().
|
static |
read and desrialize bit-bector from the dump file
Definition at line 650 of file inv_list.cpp.
References bm::deserialize().
Referenced by decode_test_dump_file(), and verify_inv_dump_file().
|
static |
Definition at line 62 of file inv_list.cpp.
Referenced by main(), and parse_args().
int validate_inp_vec | ( | const VT & | vec, |
typename VT::value_type & | min_delta, | ||
typename VT::value_type & | min_delta_cnt | ||
) |
Check if input vector is monotonously sorted (true inverted list) along the way in computes a minimal delta between values.
Definition at line 273 of file inv_list.cpp.
Referenced by compress_inv_dump_file().
|
static |
read the input collection sequence and dump file, verify correctness
Definition at line 677 of file inv_list.cpp.
References compare_vect(), io_read_u32_coll(), is_silent, read_bvector(), and timing_map.
Referenced by main().
bool write_as_bvector | ( | std::ofstream & | bv_file, |
const VT & | vec, | ||
bm::serializer< bm::bvector<> > & | bvs, | ||
bm::serializer< bm::bvector<> >::buffer & | sbuf | ||
) |
convert vector into bit-vector and append to the file
Definition at line 343 of file inv_list.cpp.
References BM_DECLARE_TEMP_BLOCK, bm::BM_SORTED, bm::bvector< Alloc >::optimize(), and bm::bvector< Alloc >::set().
Referenced by compress_inv_dump_file().
void write_as_rsc_svector | ( | std::ofstream & | sv_file, |
const VT & | vec, | ||
unsigned | min_delta, | ||
bm::sparse_vector_serial_layout< rsc_sparse_vector_u32 > & | sv_lay | ||
) |
convert vector into delta coded bit-transposed vector and append to the file
Definition at line 409 of file inv_list.cpp.
References bm::sparse_vector< unsigned, bm::bvector<> >::back_insert_iterator, BM_DECLARE_TEMP_BLOCK, bm::sparse_vector_serial_layout< SV >::data(), bm::sparse_vector< Val, BV >::get_back_inserter(), bm::rsc_sparse_vector< Val, SV >::load_from(), bm::rsc_sparse_vector< Val, SV >::optimize(), bm::sparse_vector_serial_layout< SV >::size(), bm::sparse_vector_serialize(), and bm::use_null.
Referenced by compress_inv_dump_file().
void write_as_svector | ( | std::ofstream & | sv_file, |
const VT & | vec, | ||
unsigned | min_delta, | ||
bm::sparse_vector_serial_layout< sparse_vector_u32 > & | sv_lay | ||
) |
convert vector into delta coded bit-transposed vector and append to the file
Definition at line 366 of file inv_list.cpp.
References bm::sparse_vector< unsigned, bm::bvector<> >::back_insert_iterator, BM_DECLARE_TEMP_BLOCK, bm::sparse_vector_serial_layout< SV >::data(), bm::sparse_vector< Val, BV >::get_back_inserter(), bm::sparse_vector< Val, BV >::optimize(), bm::sparse_vector_serial_layout< SV >::size(), and bm::sparse_vector_serialize().