91 for (
unsigned i = 0; i < size/3; )
94 unsigned nc = (unsigned)(rand() % 16);
104 bit.add_null(size/3);
106 for (
unsigned i = 0; i < size; )
109 unsigned nc = (unsigned)(rand() % 16);
127 for (
unsigned i = 0; i < size; )
129 unsigned sample = (unsigned)(rand() % 256);
130 sample_vec.push_back(sample);
133 std::random_device rd;
134 std::mt19937 g(rd());
137 std::shuffle(sample_vec.begin(), sample_vec.end(), g);
147 assert(sampling_size);
148 assert(sampling_size < csv.
size());
161 auto sz = csv.
size();
164 auto cnt = bv_null->count_range(from, to);
166 from += sampling_size;
167 to = from + sampling_size - 1;
184 assert(sampling_size);
185 assert(sampling_size < csv.
size());
197 auto sz = csv.
size();
200 auto cnt = bv_null->count_range(from, to);
201 hist_sv.
set(from, cnt);
203 from += sampling_size;
204 to = from + sampling_size - 1;
224 assert(sampling_size);
233 size_t sz = pair_vect.size();
234 for (
size_t k = 0; k < sz; ++k)
236 const auto& p = pair_vect[k];
251 auto en = bv_null->get_enumerator(0);
252 for (;en.valid(); ++en)
256 auto v1 = hist_rsc[idx];
257 auto v2 = hist_sv[sv_idx];
260 cerr <<
"Discrepancy at:" << idx << endl;
274 const std::vector<bvector_type::size_type>& sample_vec,
275 unsigned sampling_size)
277 unsigned long long sum = 0;
278 for (
size_t i = 0; i < sample_vec.size(); ++i)
280 auto idx = sample_vec[i];
281 idx = idx / sampling_size;
282 auto v = hist_sv[idx];
295 const std::vector<bvector_type::size_type>& sample_vec)
300 unsigned long long sum = 0;
301 for (
size_t i = 0; i < sample_vec.size(); ++i)
303 auto idx = sample_vec[i];
309 assert(found); (void)found;
311 auto v = hist_rsc[pos];
320 const std::vector<bvector_type::size_type>& sample_vec,
328 assert(found); (void)found;
334 for (
size_t i = 0; i < sample_vec.size(); ++i)
336 auto idx = sample_vec[i];
344 found = bv_null->
find(idx+1, pos_end);
365 unsigned hist1_avg = 0;
370 std::vector<bvector_type::size_type> svec;
377 data_set_size = bv_null->
count();
378 cout <<
"Data set size: " << rsc_test.
size() << endl;
379 cout <<
"Number of elements/events in the data set: " << data_set_size << endl;
388 sparse_vector_u32::statistics st;
391 cout <<
"Histogram 1 (SV)" << endl;
392 cout <<
" size: " << hist1.
size() << endl;
393 cout <<
" RAM size: " << st.memory_used << endl;
396 size_t serialized_size = 0;
397 int res = bm::file_save_svector(hist1,
"hist1.sv", &serialized_size,
true);
399 cerr <<
"Failed to save!" << endl;
401 cout <<
" file size: " << serialized_size << endl;
408 auto it = hist1.
begin();
409 auto it_end = hist1.
end();
410 for (; it != it_end; ++it)
412 assert (h1_sum == data_set_size);
413 hist1_avg = h1_sum / hist1.
size();
421 rsc_sparse_vector_u32::statistics st;
424 cout <<
"Histogram 2 (RSC)" << endl;
425 cout <<
" size: " << hist2.
size() << endl;
427 cout <<
" RAM size: " << st.memory_used << endl;
431 size_t serialized_size = 0;
432 int res = bm::file_save_svector(hist2,
"hist2.sv", &serialized_size,
true);
434 cerr <<
"Failed to save!" << endl;
436 cout <<
" file size: " << serialized_size << endl;
446 rsc_sparse_vector_u32::statistics st;
449 cout <<
"Histogram 3 adaptive (RSC)" << endl;
450 cout <<
" size: " << hist3.
size() << endl;
452 cout <<
" RAM size: " << st.memory_used << endl;
455 size_t serialized_size = 0;
456 int res = bm::file_save_svector(hist3,
"hist3.sv", &serialized_size,
true);
458 cerr <<
"Failed to save!" << endl;
460 cout <<
" file size: " << serialized_size << endl;
466 cout <<
"Access sample size = " << svec.size() << endl;
474 unsigned long long sum1(0), sum2(0);
487 cerr <<
"Control sum discrepancy!" << endl;
500 catch(std::exception& ex)
502 std::cerr << ex.what() << std::endl;
Compressed bit-vector bvector<> container, set algebraic methods, traversal iterators.
#define BM_DECLARE_TEMP_BLOCK(x)
Sparse constainer sparse_vector<> for integer types using bit-transposition transform.
Compressed sparse container rsc_sparse_vector<> for integer types.
Timing utilities for benchmarking (internal)
pre-processor un-defines to avoid global space pollution (internal)
Bitvector Bit-vector container with runtime compression of bits.
bool find(size_type &pos) const BMNOEXCEPT
Finds index of first 1 bit.
size_type count() const BMNOEXCEPT
population count (count of ON bits)
bvector_size_type size_type
bool find_reverse(size_type &pos) const BMNOEXCEPT
Finds last index of 1 bit.
Utility class to collect performance measurements and statistics.
std::map< std::string, statistics > duration_map_type
test name to duration map
static void print_duration_map(TOut &tout, const duration_map_type &dmap, format fmt=ct_time)
void load_from(const sparse_vector_type &sv_src)
Load compressed vector from a sparse vector (with NULLs)
void push_back(size_type idx, value_type v)
set specified element with bounds checking and automatic resize
back_insert_iterator get_back_inserter()
void optimize(bm::word_t *temp_block=0, typename bvector_type::optmode opt_mode=bvector_type::opt_compress, statistics *stat=0)
run memory optimization for all vector slices
void sync(bool force)
Re-calculate rank-select index for faster access to vector.
void calc_stat(struct rsc_sparse_vector< Val, SV >::statistics *st) const BMNOEXCEPT
Calculates memory statistics.
const bvector_type * get_null_bvector() const BMNOEXCEPT
Get bit-vector of assigned values (or NULL)
size_type size() const BMNOEXCEPT
return size of the vector
SV::bvector_type bvector_type
succinct sparse vector with runtime compression using bit-slicing / transposition method
bvector_type::size_type size_type
size_type size() const BMNOEXCEPT
return size of the vector
void set(size_type idx, value_type v)
set specified element with bounds checking and automatic resize
const_iterator end() const BMNOEXCEPT
Provide const iterator access to the end
back_insert_iterator get_back_inserter()
Provide back insert iterator Back insert iterator implements buffered insertion, which is faster,...
void calc_stat(struct sparse_vector< Val, BV >::statistics *st) const BMNOEXCEPT
Calculates memory statistics.
const_iterator begin() const BMNOEXCEPT
Provide const iterator access to container content
void optimize(bm::word_t *temp_block=0, typename bvector_type::optmode opt_mode=bvector_type::opt_compress, typename sparse_vector< Val, BV >::statistics *stat=0)
run memory optimization for all vector planes
@ use_null
support "non-assigned" or "NULL" logic
void rank_range_split(const BV &bv, typename BV::size_type rank, PairVect &target_v)
Algorithm to identify bit-vector ranges (splits) for the rank.
bm::bvector ::size_type bv_size_type
std::vector< std::pair< bv_size_type, bv_size_type > > bv_ranges_vector
bvector_type::size_type bv_size_type
static void verify_histograms(const rsc_sparse_vector_u32 &hist_rsc, const sparse_vector_u32 &hist_sv, sparse_vector_u32::size_type sampling_size)
Some test to confirm correctness.
static void compute_adaptive_rsc_histogram(rsc_sparse_vector_u32 &hist_rsc, const rsc_sparse_vector_u32 &csv, sparse_vector_u32::size_type sampling_size)
Adaptive histogram identifies number of not NULL elements (events) and varies the size of the histogr...
const unsigned sampling_interval
static void compute_rsc_historgam(rsc_sparse_vector_u32 &hist_rsc, const rsc_sparse_vector_u32 &csv, sparse_vector_u32::size_type sampling_size)
Compute histogram as a RSC vector using fixed sampling interval.
std::vector< std::pair< bv_size_type, bv_size_type > > bv_ranges_vector
bm::sparse_vector< unsigned, bvector_type > sparse_vector_u32
bm::chrono_taker ::duration_map_type timing_map
static void compute_historgam(sparse_vector_u32 &hist_sv, const rsc_sparse_vector_u32 &csv, sparse_vector_u32::size_type sampling_size)
Compute histogram as a SV vector using fixed sampling interval.
static void access_bench3(const rsc_sparse_vector_u32 &hist_rsc, const std::vector< bvector_type::size_type > &sample_vec, const rsc_sparse_vector_u32 &rsc_data)
static unsigned long long access_bench1(const sparse_vector_u32 &hist_sv, const std::vector< bvector_type::size_type > &sample_vec, unsigned sampling_size)
Access benchmark 1.
static void generate_test_data(rsc_sparse_vector_u32 &csv, unsigned size)
Generate a test RSC vector with a randomly distributed values imitating distribution density of genom...
static void generate_access_samples(std::vector< bvector_type::size_type > &sample_vec, unsigned size)
generate list of random indexes (locations) to read histogram values
static unsigned long long access_bench2(const rsc_sparse_vector_u32 &hist_rsc, const std::vector< bvector_type::size_type > &sample_vec)
Access benchmark 2.
bm::rsc_sparse_vector< unsigned, sparse_vector_u32 > rsc_sparse_vector_u32