BitMagic-C++
xsample01.cpp
Go to the documentation of this file.
1 /*
2 Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
3 
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7 
8  http://www.apache.org/licenses/LICENSE-2.0
9 
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 
16 For more information please visit: http://bitmagic.io
17 */
18 
19 /** \example xsample01.cpp
20  Demo and a benchmark on memory consumption control and logical operation
21 */
22 /*! \file xsample01.cpp
23  \brief Example: Example: memory consumption techniques
24 */
25 
26 
27 #include <iostream>
28 #include <memory>
29 #include <map>
30 #include <vector>
31 #include <chrono>
32 #include <algorithm>
33 #include <stdexcept>
34 
35 #include "bm.h"
36 #include "bmalgo.h"
37 #include "bmtimer.h"
38 #include "bmserial.h"
39 #include "bmsparsevec.h"
40 
41 #include "bmsparsevec_algo.h"
42 #include "bmsparsevec_serial.h"
43 #include "bmalgo_similarity.h"
44 
45 #include "bmdbg.h"
46 
47 
48 // ----------------------------------------------------
49 // Global parameters and types
50 // ----------------------------------------------------
51 
52 // Number of vectors generated for the test
53 const unsigned index_size = 1000000;
54 
55 // Dynamic range for constructed sets
56 const unsigned max_size = 2000000;
57 
58 // Number of bits per one vector
59 const unsigned bits_per_vect = 5;
60 
61 // benchmark operation count
62 const unsigned benchmark_ops = 1000;
63 
64 // subset of vectors used as a sample
65 const unsigned sample_cnt = 250;
66 
67 // index values to extract
68 const unsigned result_set_cnt = 200;
69 
70 
71 // bit-vector type for this example
73 
74 
75 // timing storage for benchmarking
77 
78 
79 
80 /* BitMagic provides two GAP length tables for situations when we have
81  standard or embarassingly sparse vectors.
82  bm::gap_len_table - default standard
83  bm::gap_len_table_min - option for smaller vectors
84 
85  Here we define an alternative table for very sparse vectors
86 */
87 template<bool T> struct gap_len_table_sparse
88 {
90 };
91 template<bool T>
93  { 8, 32, 128, 512 };
94 
95 
96 // simple bit-vector class factory for the project
97 //
98 static
99 TBVector* construct_bvector()
100 {
101  // in this example we plan to keep lots of vectors in memory, thus
102  // use parameters to minimize memory consumption
103  //
104  TBVector* bv =
105  new TBVector(bm::BM_GAP, // use GAP compressed mode
106  gap_len_table_sparse<true>::_len, // custom lens for super sparse vectors
107  max_size // limit the maximum size
108  );
109  return bv;
110 }
111 
112 // Generic utility to destroy map of pointers
113 template<typename TM>
114 void destroy_map(TM& id_map)
115 {
116  for (typename TM::iterator it = id_map.begin();
117  it != id_map.end();
118  ++it)
119  {
120  typename TM::mapped_type mp = it->second;
121  delete mp;
122  } // for
123  id_map.clear();
124 }
125 
126 // ------------------------------------------------------------------
127 // Sample data structures
128 // ------------------------------------------------------------------
129 
130 // Sample index structure to keep a map of in-memory bit-vectors
131 //
132 struct bv_index
133 {
134  typedef std::map<unsigned, TBVector*> map_type;
136  {
137  destroy_map(idx_);
138  }
139  map_type idx_;
140 };
141 
142 // Sample index structure to keep map of in-memory serialized/compressed bit-vectors
143 //
144 struct bvs_index
145 {
146  typedef std::vector<unsigned char> buffer_type;
147  typedef std::map<unsigned, buffer_type> map_type;
148 
149  map_type idx_;
150 };
151 
152 // Sample index structure to keep map of in-memory vector<unsigned int>
153 //
155 {
156  typedef std::vector<unsigned int> buffer_type;
157  typedef std::map<unsigned, buffer_type> map_type;
158 
159  map_type idx_;
160 };
161 
162 
163 // Sample index structure as in-memory sparse_vector
164 //
166 {
167  struct vect_addr
168  {
169  unsigned offset;
170  unsigned size;
171  };
172 
174  typedef std::map<unsigned, vect_addr> map_type;
175  typedef std::vector< std::pair<uint64_t, unsigned> > delta_sum_map_type;
176 
177 
178  void get_vector(unsigned id, std::vector<unsigned>& vect) const;
179 
180 
181  sparse_vector_type sv_storage_;
182  sparse_vector_type sv_storage1_;
183  map_type idx_;
184 };
185 
186 void sparse_vect_index::get_vector(unsigned id, std::vector<unsigned>& vect) const
187 {
188  map_type::const_iterator it = idx_.find(id);
189  if (it != idx_.end())
190  {
191  const sparse_vect_index::vect_addr& vaddr = it->second;
192  vect.resize(vaddr.size+1);
193  vect[0] = sv_storage1_.get(id);
194  for (unsigned j = 1; j < vect.size(); ++j)
195  {
196  unsigned a = sv_storage_.get(j + vaddr.offset - 1);
197  a += (vect[j-1] + 1);
198  vect[j] = a;
199  } // for j
200  }
201  else
202  {
203  vect.resize(0);
204  }
205 }
206 
207 // --------------------------------------------------------------------
208 
209 
210 // set bits in a vector using various methods picked at random
211 ///
212 // one method will generate a plato of non-random integers,
213 // another random integers of near neighbors
214 // the other adds ints randomly without following any system
215 //
216 static
217 void generate_random_vector(TBVector* bv)
218 {
219  unsigned method = rand() % 5; // pick a generation method
220  if (method == 0) // generate a incremental linear sequence at random location
221  {
222  unsigned seed_id = unsigned(rand()) % max_size;
223  for (unsigned i = seed_id; i < seed_id+bits_per_vect; ++i)
224  {
225  if (i >= max_size)
226  break;
227  bv->set_bit(i);
228  } // for i
229  }
230  else
231  if (method == 1) // generate near neighbors
232  {
233  unsigned seed_id = unsigned(rand()) % max_size;
234  unsigned id = seed_id;
235  for (unsigned i = 0; i < bits_per_vect; ++i)
236  {
237  if (id >= max_size)
238  break;
239  bv->set_bit(id);
240  id += (rand() % 10);
241  if (id >= max_size)
242  id = unsigned(rand()) % max_size;
243  } // for i
244  }
245  else // generate completely random bits
246  {
247  for (unsigned i = 0; i < bits_per_vect; ++i)
248  {
249  unsigned id = unsigned(rand()) % max_size;
250  if (i >= max_size) // paranoiya check
251  break;
252  bv->set_bit(id);
253  } // for i
254  }
255 }
256 
257 // generate map of bit-vectors, each filled with just a few bits
258 //
259 static
261 {
262  for (unsigned i = 0; i < index_size; ++i)
263  {
264  std::auto_ptr<TBVector> ap(construct_bvector());
265 
266  generate_random_vector(ap.get());
267 
268  if (!ap->any()) // integrity check
269  {
270  // this should never happen
271  std::cerr << "Warning. Empty vector generated!" << std::endl;
272  }
273 
274  bvi.idx_[i] = ap.release();
275  }
276 }
277 
278 // calculate memory footprint for in memory index
279 //
280 static
281 size_t calc_memory_footprint(const bv_index& bvi)
282 {
283  size_t mem_total = 0;
284  for (bv_index::map_type::const_iterator it = bvi.idx_.begin();
285  it != bvi.idx_.end();
286  ++it)
287  {
288  const TBVector* mp = it->second;
289 
291  mp->calc_stat(&st);
292  mem_total += st.memory_used;
293 
294  mem_total += sizeof(void*);
295  } // for
296 
297  return mem_total;
298 }
299 
300 // convert bit-vector index to bit-vector serialized index
301 //
302 static
303 size_t convert_bv2bvs(const bv_index& bvi, bvs_index& bvs)
304 {
305  size_t mem_total = 0;
306  std::vector<unsigned char> buf; // prepare a temporary buffer
307  buf.reserve(1024);
308 
309  // bit-vector serializer
310  // (keep it out of the serialization loop to minimize buffers re-allocations)
311  //
313  bvsr.byte_order_serialization(false);
314  bvsr.gap_length_serialization(false);
315  bvsr.set_compression_level(4);
316 
317  for (bv_index::map_type::const_iterator it = bvi.idx_.begin();
318  it != bvi.idx_.end();
319  ++it)
320  {
321  unsigned id = it->first;
322  const TBVector* bvp = it->second;
323 
325  bvp->calc_stat(&st); // calculate max. serialized size
326 
327  buf.resize(st.max_serialize_mem); // prepare the temp buffer
328 
329  // run serialization, actual serialization size is expacted to be smaller
330  //
331  unsigned bvs_size = bvsr.serialize(*bvp, buf.data(), st.max_serialize_mem);
332 
333  // move from temp serialization buffer to compressed in-memory index
334  //
335  bvs_index::buffer_type& vbuf = bvs.idx_[id];
336  vbuf.resize(bvs_size);
337  ::memcpy(vbuf.data(), buf.data(), bvs_size);
338 
339  mem_total += bvs_size;
340 
341  mem_total += sizeof(std::vector<unsigned char>::size_type);
342 
343 
344  // paranoia check compare source and desirialized vectors
345  //
346  #ifdef DEBUG
347  {
348  TBVector bv1;
349  bm::deserialize(bv1, vbuf.data());
350  if (bv1.compare(*bvp) !=0 )
351  {
352  throw std::runtime_error("deserialization check failed");
353  }
354  }
355  #endif
356 
357  } // for
358 
359  return mem_total;
360 }
361 
362 // convert bit-vector index to vector<usingned>
363 //
364 static
365 size_t convert_bv2vect(const bv_index& bvi, vect_index& vidx)
366 {
367  size_t mem_total = 0;
368 
369  for (bv_index::map_type::const_iterator it = bvi.idx_.begin();
370  it != bvi.idx_.end();
371  ++it)
372  {
373  unsigned id = it->first;
374  const TBVector* bvp = it->second;
375 
376  unsigned count = bvp->count(); // population count
377 
378  vect_index::buffer_type& vect = vidx.idx_[id];
379  vect.resize(count);
380 
381  for (TBVector::enumerator en = bvp->first(); en.valid(); ++en)
382  {
383  vect.push_back(*en);
384  }
385 
386  mem_total +=
387  sizeof(vect_index::buffer_type::value_type) * vect.size() +
388  sizeof(vect_index::buffer_type::size_type);
389 
390  } // for
391  return mem_total;
392 }
393 
394 static
395 void bv2delta(const TBVector& bv, std::vector<unsigned>& vect)
396 {
397  // convert into a plain vector first
398  //
399  vect.resize(0);
400  for (TBVector::enumerator en = bv.first(); en.valid(); ++en)
401  {
402  vect.push_back(*en);
403  }
404 
405  // convert into delta-vector
406  //
407  {
408  for (size_t k = vect.size()-1; k >= 1; --k)
409  {
410  vect[k] -= vect[k-1];
411  --vect[k];
412  } // for
413  }
414 }
415 
416 // convert bit-vector index to bm::sparse_vector
417 //
418 static
419 size_t convert_bv2sv(const bv_index& bvi, sparse_vect_index& sv_idx)
420 {
421  size_t mem_total = 0;
422 
423  std::vector<unsigned> vect;
424 
426 
427  for (bv_index::map_type::const_iterator it = bvi.idx_.begin();
428  it != bvi.idx_.end();
429  ++it)
430  {
431  unsigned id = it->first;
432  const TBVector* bvp = it->second;
433 
434  bv2delta(*bvp, vect);
435 
436  // compute sum of the delta-vector elements add to the sort map
437  {
438  uint64_t sum = 0;
439  for (unsigned k = 1; k < vect.size(); ++k)
440  {
441  sum += vect[k];
442  } // for
443  delta_map.push_back(std::make_pair(sum, id));
444  }
445 
446  } // for
447 
448  // sort by "enthropy" (sort of)
449  //
450  std::sort(delta_map.begin(), delta_map.end());
451  if (delta_map.size() != bvi.idx_.size()) // paranoia check
452  {
453  throw std::runtime_error("delta map size is incorrect");
454  }
455 
456  unsigned sv_pos = 0; // current position in sparse vector
457  for (unsigned j = 0; j < delta_map.size(); ++j)
458  {
459  unsigned id = delta_map[j].second;
460 
461  bv_index::map_type::const_iterator it = bvi.idx_.find(id);
462  if (it == bvi.idx_.end())
463  continue;
464  const TBVector& bv = *(it->second);
465 
466  // convert into a plain delta vector again
467  bv2delta(bv, vect);
468 
470  vaddr.offset = sv_pos;
471  vaddr.size = (unsigned)(vect.size() - 1);
472 
473  sv_idx.sv_storage1_.set(id, vect[0]);
474 
475  if (vaddr.size)
476  {
477  sv_idx.sv_storage_.import(&vect[1], vaddr.size, vaddr.offset);
478  sv_pos += vaddr.size;
479  }
480 
481  sv_idx.idx_[id] = vaddr;
482  } // for
483 
484 
485  // optimize sparse vector storage, compute memory consumption
486  {
487  sparse_vect_index::sparse_vector_type::statistics st;
488 
491  mem_total += st.memory_used;
493  mem_total += st.memory_used;
494  }
495 
496  // check
497  for (bv_index::map_type::const_iterator it = bvi.idx_.begin();
498  it != bvi.idx_.end();
499  ++it)
500  {
501  unsigned id = it->first;
502  const TBVector* bvp = it->second;
503 
504  // convert into a plain vector first
505  //
506  vect.resize(0);
507  for (TBVector::enumerator en = bvp->first(); en.valid(); ++en)
508  {
509  vect.push_back(*en);
510  }
511 
512 
513  std::vector<unsigned> svect;
514  sv_idx.get_vector(id, svect);
515  if (svect.size() != vect.size())
516  {
517  std::cerr << "Size check failed! id = " << id
518  << "size() = " << svect.size()
519  << std::endl;
520  throw std::runtime_error("sparse vector content check failed");
521  }
522 
523  for (unsigned k = 0; k < vect.size(); ++k)
524  {
525  if (vect[k] != svect[k])
526  {
527  std::cerr << "SV content check failed! id = " << id
528  << " i=" << k << std::endl;
529  for (unsigned h = 0; h < vect.size(); ++h)
530  {
531  std::cout << "[" << vect[h] << "=" << svect[h] << "], ";
532  } // for h
533  std::cout << std::endl;
534  throw std::runtime_error("sparse vector content check failed");
535  }
536  } // for k
537 
538  } // for
539 
540  #ifdef DEBUG
541  bm::print_svector_stat(sv_idx.sv_storage_, true);
542  bm::print_svector_stat(sv_idx.sv_storage1_, true);
543  #endif
544 
545  return mem_total;
546 }
547 
548 
549 // speed test for in-memory bit vectors
550 // benchmark performs a mix of logical operations
551 //
552 static
554 {
555  TBVector bv_join; // OR join vector
556 
557  bm::chrono_taker tt1("1. bm::bvector<> index", 1, &timing_map);
558 
559  // join all vectors using OR operation
560  for (bv_index::map_type::const_iterator it = bvi.idx_.begin();
561  it != bvi.idx_.end();
562  ++it)
563  {
564  const TBVector* bvp = it->second;
565  bv_join |= *bvp;
566  } // for
567  bv_join.optimize();
568 
569  // a group of random vectors from the index map, compute OR
570  // then compute AND with the join vector
571  //
572  TBVector bv_res(bm::BM_GAP);
573  std::vector<unsigned> result_set;
574  result_set.reserve(result_set_cnt); // memory reservation to avoid reallocs
575 
576  for (unsigned i = 0; i < benchmark_ops; ++i)
577  {
578  bv_res.clear(true); // free all blocks
579  result_set.resize(0);
580 
581  for (unsigned j = 0; j < sample_cnt; ++j)
582  {
583  unsigned id = unsigned(rand()) % index_size;
584  bv_index::map_type::const_iterator it = bvi.idx_.find(id);
585  if (it == bvi.idx_.end())
586  continue;
587  const TBVector& bv = *(it->second);
588  bv_res |= bv;
589  }
590 
591  bv_res &= bv_join;
592 
593  // enumerate the final result set, extract first N elements
594  //
595  TBVector::enumerator en = bv_res.first();
596  for (unsigned k = 0; en.valid() && k < result_set_cnt; ++k, ++en)
597  {
598  result_set.push_back(*en);
599  }
600 
601  } // for i
602 
603  tt1.add_repeats(benchmark_ops + 1);
604 }
605 
606 // speed test for in-memory serialized bit vectors
607 // this function uses bm::operation_deserializer
608 // to perform logical operation between a BLOB and bvector<> in memory
609 // and avoids extra decompression overhead
610 //
611 static
613 {
614  TBVector bv_join; // OR join vector
615 
617 
619 
620  bm::chrono_taker tt1("2. serialized bvector", 1, &timing_map);
621 
622  // join all vectors using OR operation
623  for (bvs_index::map_type::const_iterator it = bvs.idx_.begin();
624  it != bvs.idx_.end();
625  ++it)
626  {
627  const bvs_index::buffer_type& svect = it->second;
628  if (svect.size() == 0)
629  {
630  throw std::runtime_error("empty buffer error");
631  }
632  const unsigned char* buf = it->second.data();
633 
634  des.deserialize(bv_join, buf, tb, bm::set_OR);
635  } // for
636  bv_join.optimize();
637 
638  // a group of random vectors from the index map, compute OR
639  // then compute AND with the join vector
640  //
641  TBVector bv_res(bm::BM_GAP);
642  std::vector<unsigned> result_set;
643  result_set.reserve(result_set_cnt); // memory reservation to avoid reallocs
644 
645  for (unsigned i = 0; i < benchmark_ops; ++i)
646  {
647  bv_res.clear(true); // free all blocks
648  result_set.resize(0);
649 
650  for (unsigned j = 0; j < sample_cnt; ++j)
651  {
652  unsigned id = unsigned(rand()) % index_size;
653  bvs_index::map_type::const_iterator it = bvs.idx_.find(id);
654  if (it == bvs.idx_.end())
655  continue;
656 
657  const unsigned char* buf = it->second.data();
658  des.deserialize(bv_res, buf, tb, bm::set_OR);
659  } // for j
660 
661  bv_res &= bv_join;
662 
663  // enumerate the final result set, extract first N elements
664  //
665  TBVector::enumerator en = bv_res.first();
666  for (unsigned k = 0; en.valid() && k < result_set_cnt; ++k, ++en)
667  {
668  result_set.push_back(*en);
669  }
670  } // for i
671 
672  tt1.add_repeats(benchmark_ops + 1);
673 }
674 
675 static
677 {
678  TBVector bv_join; // OR join vector
679 
680  bm::chrono_taker tt1("3. std::vector<unsigned> ", 1, &timing_map);
681 
682  // join all vectors using OR operation
683  for (vect_index::map_type::const_iterator it = vecti.idx_.begin();
684  it != vecti.idx_.end();
685  ++it)
686  {
687  const vect_index::buffer_type& vect = it->second;
688  if (vect.size() == 0)
689  {
690  throw std::runtime_error("empty buffer error");
691  }
692 
693  bm::combine_or(bv_join, vect.begin(), vect.end());
694  } // for
695  bv_join.optimize();
696 
697 
698  // a group of random vectors from the index map, compute OR
699  // then compute AND with the join vector
700  //
701  TBVector bv_res(bm::BM_GAP);
702  std::vector<unsigned> result_set;
703  result_set.reserve(result_set_cnt); // memory reservation to avoid reallocs
704 
705  for (unsigned i = 0; i < benchmark_ops; ++i)
706  {
707  bv_res.clear(true); // free all blocks
708  result_set.resize(0);
709 
710  for (unsigned j = 0; j < sample_cnt; ++j)
711  {
712  unsigned id = unsigned(rand()) % index_size;
713  vect_index::map_type::const_iterator it = vecti.idx_.find(id);
714  if (it == vecti.idx_.end())
715  continue;
716 
717  const vect_index::buffer_type& vect = it->second;
718 
719  bm::combine_or(bv_join, vect.begin(), vect.end());
720  } // for j
721 
722  bv_res &= bv_join;
723 
724  // enumerate the final result set, extract first N elements
725  //
726  TBVector::enumerator en = bv_res.first();
727  for (unsigned k = 0; en.valid() && k < result_set_cnt; ++k, ++en)
728  {
729  result_set.push_back(*en);
730  }
731 
732  } // for i
733 
734  tt1.add_repeats(benchmark_ops + 1);
735 }
736 
737 static
739 {
740  TBVector bv_join; // OR join vector
741 
742  bm::chrono_taker tt1("4. bm::sparse_vector<unsigned> ", 1, &timing_map);
743 
744  std::vector<unsigned> vect;
745 
746  // join all vectors using OR operation
747  for (sparse_vect_index::map_type::const_iterator it = svi.idx_.begin();
748  it != svi.idx_.end();
749  ++it)
750  {
751  unsigned id = it->first;
752  svi.get_vector(id, vect);
753 
754  bm::combine_or(bv_join, vect.begin(), vect.end());
755  } // for
756  bv_join.optimize();
757 
758 
759  // a group of random vectors from the index map, compute OR
760  // then compute AND with the join vector
761  //
762  TBVector bv_res(bm::BM_GAP);
763  std::vector<unsigned> result_set;
764  result_set.reserve(result_set_cnt); // memory reservation to avoid reallocs
765 
766  for (unsigned i = 0; i < benchmark_ops; ++i)
767  {
768  bv_res.clear(true); // free all blocks
769  result_set.resize(0);
770 
771  for (unsigned j = 0; j < sample_cnt; ++j)
772  {
773  unsigned id = unsigned(rand()) % index_size;
774  svi.get_vector(id, vect);
775  if (vect.size() == 0)
776  continue;
777 
778  bm::combine_or(bv_join, vect.begin(), vect.end());
779  } // for j
780 
781  bv_res &= bv_join;
782 
783  // enumerate the final result set, extract first N elements
784  //
785  TBVector::enumerator en = bv_res.first();
786  for (unsigned k = 0; en.valid() && k < result_set_cnt; ++k, ++en)
787  {
788  result_set.push_back(*en);
789  }
790 
791  } // for i
792 
793  tt1.add_repeats(benchmark_ops + 1);
794 }
795 
796 
797 
798 int main(void)
799 {
800  try
801  {
802  bv_index bvi; // regular in-memory index id to bvector<>
803  bvs_index bvs; // compressed in-memory index id to bvector<> BLOB
804  vect_index vecti; // index based on plain uncompressed vector<unsigned>
805  sparse_vect_index svi; // all ids in a sparse vector
806 
807  // experiments generation, measuring memory footprints
808  //
809  generate_bv_index(bvi);
810 
811  size_t bv_mem_total = calc_memory_footprint(bvi);
812  size_t bv_mem_total_MB = bv_mem_total / (1024*1024);
813 
814  std::cout << "bm::bvector<> memory footprint = "
815  << bv_mem_total << " (" << bv_mem_total_MB << "MB)"
816  << std::endl;
817 
818  size_t bvs_mem_total = convert_bv2bvs(bvi, bvs);
819  size_t bvs_mem_total_MB = bvs_mem_total / (1024*1024);
820 
821  std::cout << "bm::bvector<> BLOB memory footprint = "
822  << bvs_mem_total << " (" << bvs_mem_total_MB << "MB)"
823  << std::endl;
824 
825  size_t vecti_mem_total = convert_bv2vect(bvi, vecti);
826  size_t vecti_mem_total_MB = vecti_mem_total / (1024*1024);
827 
828  std::cout << "std::vector<unsigned> memory footprint = "
829  << vecti_mem_total << " (" << vecti_mem_total_MB << "MB)"
830  << std::endl;
831 
832  size_t svi_mem_total = convert_bv2sv(bvi, svi);
833  size_t svi_mem_total_MB = svi_mem_total / (1024*1024);
834 
835  std::cout << "bm::sparse_vector<> memory footprint = "
836  << svi_mem_total << " (" << svi_mem_total_MB << "MB)"
837  << std::endl;
838 
839  // run performance tests
840  //
841 
842  speed_test_bv_index(bvi);
844  speed_test_vect_index(vecti);
845  speed_test_sv_index(svi);
846 
847 
848  std::cout << std::endl << "Performance (ops/sec):" << std::endl;
850 
851  //getchar(); // uncomment to check memory consumption at the OS level
852 
853  }
854  catch(std::exception& ex)
855  {
856  std::cerr << ex.what() << std::endl;
857  return 1;
858  }
859 
860  return 0;
861 }
862 
map_type idx_
Definition: xsample01.cpp:149
Compressed bit-vector bvector<> container, set algebraic methods, traversal iterators.
const unsigned result_set_cnt
Definition: xsample01.cpp:68
static TBVector * construct_bvector()
Definition: xsample01.cpp:99
static void speed_test_bv_index(const bv_index &bvi)
Definition: xsample01.cpp:553
static size_t convert_bv2sv(const bv_index &bvi, sparse_vect_index &sv_idx)
Definition: xsample01.cpp:419
std::vector< unsigned char > buffer_type
Definition: xsample01.cpp:146
static size_t convert_bv2bvs(const bv_index &bvi, bvs_index &bvs)
Definition: xsample01.cpp:303
static size_t convert_bv2vect(const bv_index &bvi, vect_index &vidx)
Definition: xsample01.cpp:365
void clear(const bm::id_t *ids, unsigned ids_size, bm::sort_order so=bm::BM_UNKNOWN)
clear list of bits in this bitset
Definition: bm.h:3332
void optimize(bm::word_t *temp_block=0, typename bvector_type::optmode opt_mode=bvector_type::opt_compress, typename sparse_vector< Val, BV >::statistics *stat=0)
run memory optimization for all vector plains
Definition: bmsparsevec.h:1584
const unsigned index_size
Definition: xsample01.cpp:53
Sparse constainer sparse_vector<> for integer types using bit-transposition transform.
const unsigned bits_per_vect
Definition: xsample01.cpp:59
unsigned serialize(const BV &bv, unsigned char *buf, size_t buf_size)
Bitvector serilization into memory block.
Definition: bmserial.h:912
void optimize(bm::word_t *temp_block=0, optmode opt_mode=opt_compress, statistics *stat=0)
Optimize memory bitvector&#39;s memory allocation.
Definition: bm.h:2964
Timing utilities for benchmarking (internal)
size_t max_serialize_mem
Estimated maximum of memory required for serialization.
Definition: bmfunc.h:60
const unsigned sample_cnt
Definition: xsample01.cpp:65
Bit-vector serialization class.
Definition: bmserial.h:145
GAP compression is ON.
Definition: bmconst.h:121
void combine_or(BV &bv, It first, It last)
OR Combine bitvector and the iterable sequence.
Definition: bmalgo_impl.h:1134
compress blocks when possible (GAP/prefix sum)
Definition: bm.h:157
void set(size_type idx, value_type v)
set specified element with bounds checking and automatic resize
Definition: bmsparsevec.h:1406
int main(void)
Definition: xsample01.cpp:798
std::map< unsigned, buffer_type > map_type
Definition: xsample01.cpp:157
int compare(const bvector< Alloc > &bvect) const
Lexicographical comparison with a bitvector.
Definition: bm.h:3051
void import(const value_type *arr, size_type arr_size, size_type offset=0)
Import list of elements from a C-style array.
Definition: bmsparsevec.h:908
#define BM_DECLARE_TEMP_BLOCK(x)
Definition: bm.h:47
Statistical information about bitset&#39;s memory allocation details.
Definition: bm.h:144
Algorithms for bvector<> (main include)
static void speed_test_vect_index(const vect_index &vecti)
Definition: xsample01.cpp:676
sparse vector with runtime compression using bit transposition method
Definition: bmsparsevec.h:75
void get_vector(unsigned id, std::vector< unsigned > &vect) const
Definition: xsample01.cpp:186
bm::sparse_vector< unsigned, bm::bvector<> > sparse_vector_type
Definition: xsample01.cpp:173
map_type idx_
Definition: xsample01.cpp:139
std::vector< std::pair< uint64_t, unsigned > > delta_sum_map_type
Definition: xsample01.cpp:175
size_t memory_used
Memory used by bitvector including temp and service blocks.
Definition: bmfunc.h:62
enumerator first() const
Returns enumerator pointing on the first non-zero bit.
Definition: bm.h:2098
static void generate_random_vector(TBVector *bv)
Definition: xsample01.cpp:217
void byte_order_serialization(bool value)
Set byte-order serialization (for cross platform compatibility)
Definition: bmserial.h:657
std::map< std::string, statistics > duration_map_type
test name to duration map
Definition: bmtimer.h:65
unsigned deserialize(BV &bv, const unsigned char *buf, bm::word_t *temp_block=0)
Bitvector deserialization from memory.
Definition: bmserial.h:1249
void destroy_map(TM &id_map)
Definition: xsample01.cpp:114
map_type idx_
Definition: xsample01.cpp:159
static void generate_bv_index(bv_index &bvi)
Definition: xsample01.cpp:260
const unsigned gap_levels
Definition: bmconst.h:76
static void speed_test_bvs_index(const bvs_index &bvs)
Definition: xsample01.cpp:612
bm::id_t count() const
population cout (count of ON bits)
Definition: bm.h:2468
static void print_duration_map(const duration_map_type &dmap, format fmt=ct_time)
Definition: bmtimer.h:127
unsigned short gap_word_t
Definition: bmconst.h:70
std::map< unsigned, TBVector * > map_type
Definition: xsample01.cpp:134
bool valid() const
Checks if iterator is still valid.
Definition: bm.h:300
std::map< unsigned, buffer_type > map_type
Definition: xsample01.cpp:147
Serialization for sparse_vector<>
enumerator end() const
Returns enumerator pointing on the next bit after the last.
Definition: bm.h:2104
Utility class to collect performance measurements and statistics.
Definition: bmtimer.h:39
void add_repeats(unsigned inc)
Definition: bmtimer.h:121
static void bv2delta(const TBVector &bv, std::vector< unsigned > &vect)
Definition: xsample01.cpp:395
std::map< unsigned, vect_addr > map_type
Definition: xsample01.cpp:174
void gap_length_serialization(bool value)
Set GAP length serialization (serializes GAP levels of the original vector)
Definition: bmserial.h:651
std::vector< unsigned int > buffer_type
Definition: xsample01.cpp:156
void calc_stat(struct bm::bvector< Alloc >::statistics *st) const
Calculates bitvector statistics.
Definition: bm.h:3193
static const bm::gap_word_t _len[bm::gap_levels]
Definition: xsample01.cpp:89
sparse_vector_type sv_storage_
Definition: xsample01.cpp:181
Serialization / compression of bvector<>. Set operations on compressed BLOBs.
bm::chrono_taker::duration_map_type timing_map
Definition: xsample01.cpp:76
Constant iterator designed to enumerate "ON" bits.
Definition: bm.h:601
static unsigned deserialize(bvector_type &bv, const unsigned char *buf, bm::word_t *temp_block, set_operation op=bm::set_OR, bool exit_on_one=false)
Deserialize bvector using buffer as set operation argument.
Definition: bmserial.h:2874
void resize(size_type new_size)
Change size of the bvector.
Definition: bm.h:2498
const unsigned benchmark_ops
Definition: xsample01.cpp:62
sparse_vector_type sv_storage1_
Definition: xsample01.cpp:182
const unsigned max_size
Definition: xsample01.cpp:56
bm::bvector TBVector
Definition: xsample01.cpp:72
bool set_bit(bm::id_t n, bool val=true)
Sets bit n.
Definition: bm.h:3406
static size_t calc_memory_footprint(const bv_index &bvi)
Definition: xsample01.cpp:281
static void speed_test_sv_index(const sparse_vect_index &svi)
Definition: xsample01.cpp:738
void set_compression_level(unsigned clevel)
Set compression level.
Definition: bmserial.h:631
Deserializer, performs logical operations between bit-vector and serialized bit-vector.
Definition: bmserial.h:534
Algorithms for sparse_vector<>