64 unsigned max_coll = 8000000,
65 unsigned repeat_factor=10)
71 for (
unsigned i = 10; i < max_coll; i+= (rand()&0xF))
75 case 0: str =
"AB";
break;
76 case 1: str =
"GTx";
break;
77 case 2: str =
"cnv";
break;
78 default: str =
"AbY11";
break;
80 str.append(to_string(i));
82 for (
unsigned k = 0; k < repeat_factor; ++k)
84 str_coll.emplace_back(str);
97 for (
int i = 1; i < argc; ++i)
99 std::string arg = argv[i];
100 if (arg ==
"-nodiag")
111int main(
int argc,
char *argv[])
118 std::vector<string> str_coll;
121 cout <<
"Generating the test data... " << flush;
125 cout <<
"OK" << endl;
129 cout <<
"Remapping the data to create compressed vector " << flush;
136 cout <<
"OK" << endl;
146 cout <<
"\nStatistics on generated SV:" << endl;
147 bm::print_svector_stat(cout, str_sv1);
149 cout <<
"\nStatistics on remapped/optimized SV:" << endl;
150 bm::print_svector_stat(cout, str_sv0);
151 cout << endl << endl;
156 unsigned test_runs = 10000;
157 std::vector<string> str_test_coll;
161 if (idx >= test_runs)
163 str_test_coll.push_back(str_coll[idx]);
165 assert(str_test_coll.size() == test_runs);
169 std::vector<unique_ptr<bvector_type> > res_vec1;
173 cout <<
"Running benchmark tests.." << endl;
175 for (
int pass = 0; pass < 2; pass++)
177 cout <<
"PASS = " << pass << ((pass==0) ?
" -- remap/optimized" :
" -- NOT remapped") << endl;
180 const str_sv_type* str_sv = (pass==0) ? &str_sv0 : &str_sv1;
190 const string& str = str_test_coll[i];
192 scanner.
find_eq_str(*str_sv, str.c_str(), *bv_res);
193 res_vec1.emplace_back(unique_ptr<bvector_type>(bv_res.release()));
219 pipe1.
options().batch_size = test_runs;
224 for (
size_t i = 0; i < test_runs; ++i)
226 const string& str = str_test_coll[i];
227 pipe1.
add(str.c_str());
249 pipe1_and.set_search_mask(&bv_mask);
250 pipe1_and.options().batch_size = test_runs;
253 bm::chrono_taker tt(cout,
"scanner::pipeline+MASK find_eq_str()", test_runs);
256 for (
size_t i = 0; i < test_runs; ++i)
258 const string& str = str_test_coll[i];
259 pipe1_and.add(str.c_str());
261 pipe1_and.complete();
279 pipe1.
options().batch_size = test_runs;
282 bm::chrono_taker tt(cout,
"scanner::pipeline find_eq_str()-count()", test_runs);
284 for (
size_t i = 0; i < test_runs; ++i)
286 const string& str = str_test_coll[i];
287 pipe2.add(str.c_str());
309 auto& res_vect_and = pipe1_and.get_bv_res_vector();
310 auto& cnt_vect = pipe2.get_bv_count_vector();
312 assert(res_vect.size() == cnt_vect.size());
315 size_t res_sz = res_vect.size();
316 for (
size_t i = 0; i < res_sz; ++i)
322 bool match = bv1->
equal(*bv);
325 auto c = cnt_vect[i];
326 auto cnt = bv->
count();
336 auto c1 = bv_and->
count();
337 assert(c1 == c_and); (void)c1; (void)c_and;
340 match = bv_and->
equal(bv_m);
363 pipe1.
options().batch_size = test_runs;
366 pipe3.set_or_target(&bv_or);
369 bm::chrono_taker tt(cout,
"scanner::pipeline find_eq_str()-OR()", test_runs);
371 for (
size_t i = 0; i < test_runs; ++i)
373 const string& str = str_test_coll[i];
374 pipe3.add(str.c_str());
380 bool match = bv_or.
equal(bv_or_total);
383 cerr <<
"OR vector mismatch!" << endl;
400 catch(std::exception& ex)
402 std::cerr << ex.what() << std::endl;
Compressed bit-vector bvector<> container, set algebraic methods, traversal iterators.
#define BM_DECLARE_TEMP_BLOCK(x)
Algorithms for bm::sparse_vector.
string sparse vector based on bit-transposed matrix
Timing utilities for benchmarking (internal)
pre-processor un-defines to avoid global space pollution (internal)
Bitvector Bit-vector container with runtime compression of bits.
bool equal(const bvector< Alloc > &bvect) const BMNOEXCEPT
Equal comparison with an agr bit-vector.
size_type count() const BMNOEXCEPT
population count (count of ON bits)
bm::bvector< Alloc > & bit_and(const bm::bvector< Alloc > &bv1, const bm::bvector< Alloc > &bv2, typename bm::bvector< Alloc >::optmode opt_mode=opt_none)
3-operand AND : this := bv1 AND bv2
bvector_size_type size_type
bvector< Alloc > & set_range(size_type left, size_type right, bool value=true)
Sets all bits in the specified closed interval [left,right] Interval must be inside the bvector's siz...
Utility class to collect performance measurements and statistics.
Pipeline to run multiple searches against a particular SV faster using cache optimizations.
void complete()
Prepare pipeline for the execution (resize and init internal structures) Once complete,...
bvect_vector_type & get_bv_res_vector() BMNOEXCEPT
Return results vector used for pipeline execution.
void add(const typename SV::value_type *str)
Add new search string.
run_options & options() BMNOEXCEPT
Set pipeline run options.
algorithms for sparse_vector scan/search
bool find_eq_str(const SV &sv, const value_type *str, bvector_type &bv_out)
find sparse vector elements (string)
succinct sparse vector for strings with compression using bit-slicing ( transposition) method
void optimize(bm::word_t *temp_block=0, typename bvector_type::optmode opt_mode=bvector_type::opt_compress, typename str_sparse_vector< CharType, BV, STR_SIZE >::statistics *stat=0)
run memory optimization for all vector planes
size_type size() const
return size of the vector
void remap()
Build remapping profile and re-load content to save memory.
back_insert_iterator get_back_inserter()
Provide back insert iterator Back insert iterator implements buffered insertion, which is faster,...
@ use_null
support "non-assigned" or "NULL" logic
BV::size_type count_and(const BV &bv1, const BV &bv2) BMNOEXCEPT
Computes bitcount of AND operation of two bitsets.
int main(int argc, char *argv[])
static void parse_args(int argc, char *argv[])
Rudimentary cmd-args parser.
static void GenerateTestData(std::vector< string > &str_coll, str_sv_type &str_sv, unsigned max_coll=8000000, unsigned repeat_factor=10)
Test data generation.
bm::str_sparse_vector< char, bvector_type, 8 > str_sv_type
bool is_diag
Flag to print the SV diagnostics.
Aggregation options to control execution Default settings are to support only result bit-vector filte...