BitMagic-C++
xsample01.cpp
Go to the documentation of this file.
1 /*
2 Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
3 
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7 
8  http://www.apache.org/licenses/LICENSE-2.0
9 
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 
16 For more information please visit: http://bitmagic.io
17 */
18 
19 /** \example xsample01.cpp
20  Demo and a benchmark on memory consumption control and logical operation
21 */
22 /*! \file xsample01.cpp
23  \brief Example: Example: memory consumption techniques
24 */
25 
26 
27 #include <iostream>
28 #include <memory>
29 #include <map>
30 #include <vector>
31 #include <chrono>
32 #include <algorithm>
33 #include <stdexcept>
34 
35 #include "bm.h"
36 #include "bmalgo.h"
37 #include "bmtimer.h"
38 #include "bmserial.h"
39 #include "bmsparsevec.h"
40 
41 #include "bmsparsevec_algo.h"
42 #include "bmsparsevec_serial.h"
43 #include "bmalgo_similarity.h"
44 
45 #include "bmdbg.h"
46 
47 
48 // ----------------------------------------------------
49 // Global parameters and types
50 // ----------------------------------------------------
51 
52 // Number of vectors generated for the test
53 const unsigned index_size = 1000000;
54 
55 // Dynamic range for constructed sets
56 const unsigned max_size = 2000000;
57 
58 // Number of bits per one vector
59 const unsigned bits_per_vect = 5;
60 
61 // benchmark operation count
62 const unsigned benchmark_ops = 1000;
63 
64 // subset of vectors used as a sample
65 const unsigned sample_cnt = 250;
66 
67 // index values to extract
68 const unsigned result_set_cnt = 200;
69 
70 
71 // bit-vector type for this example
73 
74 
75 // timing storage for benchmarking
77 
78 
79 
80 /* BitMagic provides two GAP length tables for situations when we have
81  standard or embarassingly sparse vectors.
82  bm::gap_len_table - default standard
83  bm::gap_len_table_min - option for smaller vectors
84 
85  Here we define an alternative table for very sparse vectors
86 */
87 template<bool T> struct gap_len_table_sparse
88 {
90 };
91 template<bool T>
93  { 8, 32, 128, 512 };
94 
95 
96 // simple bit-vector class factory for the project
97 //
98 static
100 {
101  // in this example we plan to keep lots of vectors in memory, thus
102  // use parameters to minimize memory consumption
103  //
104  TBVector* bv =
105  new TBVector(bm::BM_GAP, // use GAP compressed mode
106  gap_len_table_sparse<true>::_len, // custom lens for super sparse vectors
107  max_size // limit the maximum size
108  );
109  return bv;
110 }
111 
112 // Generic utility to destroy map of pointers
113 template<typename TM>
114 void destroy_map(TM& id_map)
115 {
116  for (typename TM::iterator it = id_map.begin();
117  it != id_map.end();
118  ++it)
119  {
120  typename TM::mapped_type mp = it->second;
121  delete mp;
122  } // for
123  id_map.clear();
124 }
125 
126 // ------------------------------------------------------------------
127 // Sample data structures
128 // ------------------------------------------------------------------
129 
130 // Sample index structure to keep a map of in-memory bit-vectors
131 //
132 struct bv_index
133 {
134  typedef std::map<unsigned, TBVector*> map_type;
136  {
137  destroy_map(idx_);
138  }
140 };
141 
142 // Sample index structure to keep map of in-memory serialized/compressed bit-vectors
143 //
144 struct bvs_index
145 {
146  typedef std::vector<unsigned char> buffer_type;
147  typedef std::map<unsigned, buffer_type> map_type;
148 
150 };
151 
152 // Sample index structure to keep map of in-memory vector<unsigned int>
153 //
155 {
156  typedef std::vector<unsigned int> buffer_type;
157  typedef std::map<unsigned, buffer_type> map_type;
158 
160 };
161 
162 
163 // Sample index structure as in-memory sparse_vector
164 //
166 {
167  struct vect_addr
168  {
169  unsigned offset;
170  unsigned size;
171  };
172 
174  typedef std::map<unsigned, vect_addr> map_type;
175  typedef std::vector< std::pair<uint64_t, unsigned> > delta_sum_map_type;
176 
177 
178  void get_vector(unsigned id, std::vector<unsigned>& vect) const;
179 
180 
184 };
185 
186 void sparse_vect_index::get_vector(unsigned id, std::vector<unsigned>& vect) const
187 {
188  map_type::const_iterator it = idx_.find(id);
189  if (it != idx_.end())
190  {
191  const sparse_vect_index::vect_addr& vaddr = it->second;
192  vect.resize(vaddr.size+1);
193  vect[0] = sv_storage1_.get(id);
194  for (unsigned j = 1; j < vect.size(); ++j)
195  {
196  unsigned a = sv_storage_.get(j + vaddr.offset - 1);
197  a += (vect[j-1] + 1);
198  vect[j] = a;
199  } // for j
200  }
201  else
202  {
203  vect.resize(0);
204  }
205 }
206 
207 // --------------------------------------------------------------------
208 
209 
210 // set bits in a vector using various methods picked at random
211 ///
212 // one method will generate a plato of non-random integers,
213 // another random integers of near neighbors
214 // the other adds ints randomly without following any system
215 //
216 static
218 {
219  unsigned method = rand() % 5; // pick a generation method
220  if (method == 0) // generate a incremental linear sequence at random location
221  {
222  unsigned seed_id = unsigned(rand()) % max_size;
223  for (unsigned i = seed_id; i < seed_id+bits_per_vect; ++i)
224  {
225  if (i >= max_size)
226  break;
227  bv->set_bit(i);
228  } // for i
229  }
230  else
231  if (method == 1) // generate near neighbors
232  {
233  unsigned seed_id = unsigned(rand()) % max_size;
234  unsigned id = seed_id;
235  for (unsigned i = 0; i < bits_per_vect; ++i)
236  {
237  if (id >= max_size)
238  break;
239  bv->set_bit(id);
240  id += (rand() % 10);
241  if (id >= max_size)
242  id = unsigned(rand()) % max_size;
243  } // for i
244  }
245  else // generate completely random bits
246  {
247  for (unsigned i = 0; i < bits_per_vect; ++i)
248  {
249  unsigned id = unsigned(rand()) % max_size;
250  if (i >= max_size) // paranoiya check
251  break;
252  bv->set_bit(id);
253  } // for i
254  }
255 }
256 
257 // generate map of bit-vectors, each filled with just a few bits
258 //
259 static
261 {
262  for (unsigned i = 0; i < index_size; ++i)
263  {
264  std::unique_ptr<TBVector> ap(construct_bvector());
265 
266  generate_random_vector(ap.get());
267 
268  if (!ap->any()) // integrity check
269  {
270  // this should never happen
271  std::cerr << "Warning. Empty vector generated!" << std::endl;
272  }
273 
274  bvi.idx_[i] = ap.release();
275  }
276 }
277 
278 // calculate memory footprint for in memory index
279 //
280 static
281 size_t calc_memory_footprint(const bv_index& bvi)
282 {
283  size_t mem_total = 0;
284  for (bv_index::map_type::const_iterator it = bvi.idx_.begin();
285  it != bvi.idx_.end();
286  ++it)
287  {
288  const TBVector* mp = it->second;
289 
291  mp->calc_stat(&st);
292  mem_total += st.memory_used;
293 
294  mem_total += sizeof(void*);
295  } // for
296 
297  return mem_total;
298 }
299 
300 // convert bit-vector index to bit-vector serialized index
301 //
302 static
303 size_t convert_bv2bvs(const bv_index& bvi, bvs_index& bvs)
304 {
305  size_t mem_total = 0;
306  std::vector<unsigned char> buf; // prepare a temporary buffer
307  buf.reserve(1024);
308 
309  // bit-vector serializer
310  // (keep it out of the serialization loop to minimize buffers re-allocations)
311  //
313  bvsr.byte_order_serialization(false);
314  bvsr.gap_length_serialization(false);
315  bvsr.set_compression_level(4);
316 
317  for (bv_index::map_type::const_iterator it = bvi.idx_.begin();
318  it != bvi.idx_.end();
319  ++it)
320  {
321  unsigned id = it->first;
322  const TBVector* bvp = it->second;
323 
325  bvp->calc_stat(&st); // calculate max. serialized size
326 
327  buf.resize(st.max_serialize_mem); // prepare the temp buffer
328 
329  // run serialization, actual serialization size is expacted to be smaller
330  //
331  unsigned bvs_size = bvsr.serialize(*bvp, buf.data(), st.max_serialize_mem);
332 
333  // move from temp serialization buffer to compressed in-memory index
334  //
335  bvs_index::buffer_type& vbuf = bvs.idx_[id];
336  vbuf.resize(bvs_size);
337  ::memcpy(vbuf.data(), buf.data(), bvs_size);
338 
339  mem_total += bvs_size;
340 
341  mem_total += sizeof(std::vector<unsigned char>::size_type);
342 
343 
344  // paranoia check compare source and desirialized vectors
345  //
346  #ifdef DEBUG
347  {
348  TBVector bv1;
349  bm::deserialize(bv1, vbuf.data());
350  if (bv1.compare(*bvp) !=0 )
351  {
352  throw std::runtime_error("deserialization check failed");
353  }
354  }
355  #endif
356 
357  } // for
358 
359  return mem_total;
360 }
361 
362 // convert bit-vector index to vector<usingned>
363 //
364 static
365 size_t convert_bv2vect(const bv_index& bvi, vect_index& vidx)
366 {
367  size_t mem_total = 0;
368 
369  for (bv_index::map_type::const_iterator it = bvi.idx_.begin();
370  it != bvi.idx_.end();
371  ++it)
372  {
373  unsigned id = it->first;
374  const TBVector* bvp = it->second;
375 
376  unsigned count = bvp->count(); // population count
377 
378  vect_index::buffer_type& vect = vidx.idx_[id];
379  vect.resize(count);
380 
381  for (TBVector::enumerator en = bvp->first(); en.valid(); ++en)
382  {
383  vect.push_back(*en);
384  }
385 
386  mem_total +=
387  sizeof(vect_index::buffer_type::value_type) * vect.size() +
388  sizeof(vect_index::buffer_type::size_type);
389 
390  } // for
391  return mem_total;
392 }
393 
394 static
395 void bv2delta(const TBVector& bv, std::vector<unsigned>& vect)
396 {
397  // convert into a plain vector first
398  //
399  vect.resize(0);
400  for (TBVector::enumerator en = bv.first(); en.valid(); ++en)
401  {
402  vect.push_back(*en);
403  }
404 
405  // convert into delta-vector
406  //
407  {
408  for (size_t k = vect.size()-1; k >= 1; --k)
409  {
410  vect[k] -= vect[k-1];
411  --vect[k];
412  } // for
413  }
414 }
415 
416 // convert bit-vector index to bm::sparse_vector
417 //
418 static
419 size_t convert_bv2sv(const bv_index& bvi, sparse_vect_index& sv_idx)
420 {
421  size_t mem_total = 0;
422 
423  std::vector<unsigned> vect;
424 
426 
427  for (bv_index::map_type::const_iterator it = bvi.idx_.begin();
428  it != bvi.idx_.end();
429  ++it)
430  {
431  unsigned id = it->first;
432  const TBVector* bvp = it->second;
433 
434  bv2delta(*bvp, vect);
435 
436  // compute sum of the delta-vector elements add to the sort map
437  {
438  uint64_t sum = 0;
439  for (unsigned k = 1; k < vect.size(); ++k)
440  {
441  sum += vect[k];
442  } // for
443  delta_map.push_back(std::make_pair(sum, id));
444  }
445 
446  } // for
447 
448  // sort by "enthropy" (sort of)
449  //
450  std::sort(delta_map.begin(), delta_map.end());
451  if (delta_map.size() != bvi.idx_.size()) // paranoia check
452  {
453  throw std::runtime_error("delta map size is incorrect");
454  }
455 
456  unsigned sv_pos = 0; // current position in sparse vector
457  for (unsigned j = 0; j < delta_map.size(); ++j)
458  {
459  unsigned id = delta_map[j].second;
460 
461  bv_index::map_type::const_iterator it = bvi.idx_.find(id);
462  if (it == bvi.idx_.end())
463  continue;
464  const TBVector& bv = *(it->second);
465 
466  // convert into a plain delta vector again
467  bv2delta(bv, vect);
468 
470  vaddr.offset = sv_pos;
471  vaddr.size = (unsigned)(vect.size() - 1);
472 
473  sv_idx.sv_storage1_.set(id, vect[0]);
474 
475  if (vaddr.size)
476  {
477  sv_idx.sv_storage_.import(&vect[1], vaddr.size, vaddr.offset);
478  sv_pos += vaddr.size;
479  }
480 
481  sv_idx.idx_[id] = vaddr;
482  } // for
483 
484 
485  // optimize sparse vector storage, compute memory consumption
486  {
487  sparse_vect_index::sparse_vector_type::statistics st;
488 
491  mem_total += st.memory_used;
493  mem_total += st.memory_used;
494  }
495 
496  // check
497  for (bv_index::map_type::const_iterator it = bvi.idx_.begin();
498  it != bvi.idx_.end();
499  ++it)
500  {
501  unsigned id = it->first;
502  const TBVector* bvp = it->second;
503 
504  // convert into a plain vector first
505  //
506  vect.resize(0);
507  for (TBVector::enumerator en = bvp->first(); en.valid(); ++en)
508  {
509  vect.push_back(*en);
510  }
511 
512 
513  std::vector<unsigned> svect;
514  sv_idx.get_vector(id, svect);
515  if (svect.size() != vect.size())
516  {
517  std::cerr << "Size check failed! id = " << id
518  << "size() = " << svect.size()
519  << std::endl;
520  throw std::runtime_error("sparse vector content check failed");
521  }
522 
523  for (unsigned k = 0; k < vect.size(); ++k)
524  {
525  if (vect[k] != svect[k])
526  {
527  std::cerr << "SV content check failed! id = " << id
528  << " i=" << k << std::endl;
529  for (unsigned h = 0; h < vect.size(); ++h)
530  {
531  std::cout << "[" << vect[h] << "=" << svect[h] << "], ";
532  } // for h
533  std::cout << std::endl;
534  throw std::runtime_error("sparse vector content check failed");
535  }
536  } // for k
537 
538  } // for
539 
540  #ifdef DEBUG
541  bm::print_svector_stat(sv_idx.sv_storage_, true);
542  bm::print_svector_stat(sv_idx.sv_storage1_, true);
543  #endif
544 
545  return mem_total;
546 }
547 
548 
549 // speed test for in-memory bit vectors
550 // benchmark performs a mix of logical operations
551 //
552 static
554 {
555  TBVector bv_join; // OR join vector
556 
557  bm::chrono_taker tt1("1. bm::bvector<> index", 1, &timing_map);
558 
559  // join all vectors using OR operation
560  for (bv_index::map_type::const_iterator it = bvi.idx_.begin();
561  it != bvi.idx_.end();
562  ++it)
563  {
564  const TBVector* bvp = it->second;
565  bv_join |= *bvp;
566  } // for
567  bv_join.optimize();
568 
569  // a group of random vectors from the index map, compute OR
570  // then compute AND with the join vector
571  //
572  TBVector bv_res(bm::BM_GAP);
573  std::vector<unsigned> result_set;
574  result_set.reserve(result_set_cnt); // memory reservation to avoid reallocs
575 
576  for (unsigned i = 0; i < benchmark_ops; ++i)
577  {
578  bv_res.clear(true); // free all blocks
579  result_set.resize(0);
580 
581  for (unsigned j = 0; j < sample_cnt; ++j)
582  {
583  unsigned id = unsigned(rand()) % index_size;
584  bv_index::map_type::const_iterator it = bvi.idx_.find(id);
585  if (it == bvi.idx_.end())
586  continue;
587  const TBVector& bv = *(it->second);
588  bv_res |= bv;
589  }
590 
591  bv_res &= bv_join;
592 
593  // enumerate the final result set, extract first N elements
594  //
595  TBVector::enumerator en = bv_res.first();
596  for (unsigned k = 0; en.valid() && k < result_set_cnt; ++k, ++en)
597  {
598  result_set.push_back(*en);
599  }
600 
601  } // for i
602 
603  tt1.add_repeats(benchmark_ops + 1);
604 }
605 
606 // speed test for in-memory serialized bit vectors
607 // this function uses bm::operation_deserializer
608 // to perform logical operation between a BLOB and bvector<> in memory
609 // and avoids extra decompression overhead
610 //
611 static
613 {
614  TBVector bv_join; // OR join vector
615 
617 
619 
620  bm::chrono_taker tt1("2. serialized bvector", 1, &timing_map);
621 
622  // join all vectors using OR operation
623  for (bvs_index::map_type::const_iterator it = bvs.idx_.begin();
624  it != bvs.idx_.end();
625  ++it)
626  {
627  const bvs_index::buffer_type& svect = it->second;
628  if (svect.size() == 0)
629  {
630  throw std::runtime_error("empty buffer error");
631  }
632  const unsigned char* buf = it->second.data();
633 
634  des.deserialize(bv_join, buf, tb, bm::set_OR);
635  } // for
636  bv_join.optimize();
637 
638  // a group of random vectors from the index map, compute OR
639  // then compute AND with the join vector
640  //
641  TBVector bv_res(bm::BM_GAP);
642  std::vector<unsigned> result_set;
643  result_set.reserve(result_set_cnt); // memory reservation to avoid reallocs
644 
645  for (unsigned i = 0; i < benchmark_ops; ++i)
646  {
647  bv_res.clear(true); // free all blocks
648  result_set.resize(0);
649 
650  for (unsigned j = 0; j < sample_cnt; ++j)
651  {
652  unsigned id = unsigned(rand()) % index_size;
653  bvs_index::map_type::const_iterator it = bvs.idx_.find(id);
654  if (it == bvs.idx_.end())
655  continue;
656 
657  const unsigned char* buf = it->second.data();
658  des.deserialize(bv_res, buf, tb, bm::set_OR);
659  } // for j
660 
661  bv_res &= bv_join;
662 
663  // enumerate the final result set, extract first N elements
664  //
665  TBVector::enumerator en = bv_res.first();
666  for (unsigned k = 0; en.valid() && k < result_set_cnt; ++k, ++en)
667  {
668  result_set.push_back(*en);
669  }
670  } // for i
671 
672  tt1.add_repeats(benchmark_ops + 1);
673 }
674 
675 static
677 {
678  TBVector bv_join; // OR join vector
679 
680  bm::chrono_taker tt1("3. std::vector<unsigned> ", 1, &timing_map);
681 
682  // join all vectors using OR operation
683  for (vect_index::map_type::const_iterator it = vecti.idx_.begin();
684  it != vecti.idx_.end();
685  ++it)
686  {
687  const vect_index::buffer_type& vect = it->second;
688  if (vect.size() == 0)
689  {
690  throw std::runtime_error("empty buffer error");
691  }
692 
693  bm::combine_or(bv_join, vect.begin(), vect.end());
694  } // for
695  bv_join.optimize();
696 
697 
698  // a group of random vectors from the index map, compute OR
699  // then compute AND with the join vector
700  //
701  TBVector bv_res(bm::BM_GAP);
702  std::vector<unsigned> result_set;
703  result_set.reserve(result_set_cnt); // memory reservation to avoid reallocs
704 
705  for (unsigned i = 0; i < benchmark_ops; ++i)
706  {
707  bv_res.clear(true); // free all blocks
708  result_set.resize(0);
709 
710  for (unsigned j = 0; j < sample_cnt; ++j)
711  {
712  unsigned id = unsigned(rand()) % index_size;
713  vect_index::map_type::const_iterator it = vecti.idx_.find(id);
714  if (it == vecti.idx_.end())
715  continue;
716 
717  const vect_index::buffer_type& vect = it->second;
718 
719  bm::combine_or(bv_join, vect.begin(), vect.end());
720  } // for j
721 
722  bv_res &= bv_join;
723 
724  // enumerate the final result set, extract first N elements
725  //
726  TBVector::enumerator en = bv_res.first();
727  for (unsigned k = 0; en.valid() && k < result_set_cnt; ++k, ++en)
728  {
729  result_set.push_back(*en);
730  }
731 
732  } // for i
733 
734  tt1.add_repeats(benchmark_ops + 1);
735 }
736 
737 static
739 {
740  TBVector bv_join; // OR join vector
741 
742  bm::chrono_taker tt1("4. bm::sparse_vector<unsigned> ", 1, &timing_map);
743 
744  std::vector<unsigned> vect;
745 
746  // join all vectors using OR operation
747  for (sparse_vect_index::map_type::const_iterator it = svi.idx_.begin();
748  it != svi.idx_.end();
749  ++it)
750  {
751  unsigned id = it->first;
752  svi.get_vector(id, vect);
753 
754  bm::combine_or(bv_join, vect.begin(), vect.end());
755  } // for
756  bv_join.optimize();
757 
758 
759  // a group of random vectors from the index map, compute OR
760  // then compute AND with the join vector
761  //
762  TBVector bv_res(bm::BM_GAP);
763  std::vector<unsigned> result_set;
764  result_set.reserve(result_set_cnt); // memory reservation to avoid reallocs
765 
766  for (unsigned i = 0; i < benchmark_ops; ++i)
767  {
768  bv_res.clear(true); // free all blocks
769  result_set.resize(0);
770 
771  for (unsigned j = 0; j < sample_cnt; ++j)
772  {
773  unsigned id = unsigned(rand()) % index_size;
774  svi.get_vector(id, vect);
775  if (vect.size() == 0)
776  continue;
777 
778  bm::combine_or(bv_join, vect.begin(), vect.end());
779  } // for j
780 
781  bv_res &= bv_join;
782 
783  // enumerate the final result set, extract first N elements
784  //
785  TBVector::enumerator en = bv_res.first();
786  for (unsigned k = 0; en.valid() && k < result_set_cnt; ++k, ++en)
787  {
788  result_set.push_back(*en);
789  }
790 
791  } // for i
792 
793  tt1.add_repeats(benchmark_ops + 1);
794 }
795 
796 
797 
798 int main(void)
799 {
800  try
801  {
802  bv_index bvi; // regular in-memory index id to bvector<>
803  bvs_index bvs; // compressed in-memory index id to bvector<> BLOB
804  vect_index vecti; // index based on plain uncompressed vector<unsigned>
805  sparse_vect_index svi; // all ids in a sparse vector
806 
807  // experiments generation, measuring memory footprints
808  //
809  generate_bv_index(bvi);
810 
811  size_t bv_mem_total = calc_memory_footprint(bvi);
812  size_t bv_mem_total_MB = bv_mem_total / (1024*1024);
813 
814  std::cout << "bm::bvector<> memory footprint = "
815  << bv_mem_total << " (" << bv_mem_total_MB << "MB)"
816  << std::endl;
817 
818  size_t bvs_mem_total = convert_bv2bvs(bvi, bvs);
819  size_t bvs_mem_total_MB = bvs_mem_total / (1024*1024);
820 
821  std::cout << "bm::bvector<> BLOB memory footprint = "
822  << bvs_mem_total << " (" << bvs_mem_total_MB << "MB)"
823  << std::endl;
824 
825  size_t vecti_mem_total = convert_bv2vect(bvi, vecti);
826  size_t vecti_mem_total_MB = vecti_mem_total / (1024*1024);
827 
828  std::cout << "std::vector<unsigned> memory footprint = "
829  << vecti_mem_total << " (" << vecti_mem_total_MB << "MB)"
830  << std::endl;
831 
832  size_t svi_mem_total = convert_bv2sv(bvi, svi);
833  size_t svi_mem_total_MB = svi_mem_total / (1024*1024);
834 
835  std::cout << "bm::sparse_vector<> memory footprint = "
836  << svi_mem_total << " (" << svi_mem_total_MB << "MB)"
837  << std::endl;
838 
839  // run performance tests
840  //
841 
842  speed_test_bv_index(bvi);
844  speed_test_vect_index(vecti);
845  speed_test_sv_index(svi);
846 
847 
848  std::cout << std::endl << "Performance (ops/sec):" << std::endl;
850 
851  //getchar(); // uncomment to check memory consumption at the OS level
852 
853  }
854  catch(std::exception& ex)
855  {
856  std::cerr << ex.what() << std::endl;
857  return 1;
858  }
859 
860  return 0;
861 }
862 
result_set_cnt
const unsigned result_set_cnt
Definition: xsample01.cpp:68
speed_test_bv_index
static void speed_test_bv_index(const bv_index &bvi)
Definition: xsample01.cpp:553
bm::deserialize
size_t deserialize(BV &bv, const unsigned char *buf, bm::word_t *temp_block=0, const bm::bv_ref_vector< BV > *ref_vect=0)
Bitvector deserialization from a memory BLOB.
Definition: bmserial.h:2902
bm::bvector::calc_stat
void calc_stat(struct bm::bvector< Alloc >::statistics *st) const BMNOEXCEPT
Calculates bitvector statistics.
Definition: bm.h:3419
bmsparsevec_algo.h
Algorithms for bm::sparse_vector.
bm::bv_statistics::max_serialize_mem
size_t max_serialize_mem
estimated maximum memory for serialization
Definition: bmfunc.h:60
bm::sparse_vector::optimize
void optimize(bm::word_t *temp_block=0, typename bvector_type::optmode opt_mode=bvector_type::opt_compress, typename sparse_vector< Val, BV >::statistics *stat=0)
run memory optimization for all vector plains
Definition: bmsparsevec.h:1750
sparse_vect_index
Definition: xsample01.cpp:165
bm::sparse_vector
sparse vector with runtime compression using bit transposition method
Definition: bmsparsevec.h:81
bm::chrono_taker
Utility class to collect performance measurements and statistics.
Definition: bmtimer.h:39
BM_DECLARE_TEMP_BLOCK
#define BM_DECLARE_TEMP_BLOCK(x)
Definition: bm.h:47
bv_index
Definition: xsample01.cpp:132
generate_bv_index
static void generate_bv_index(bv_index &bvi)
Definition: xsample01.cpp:260
timing_map
bm::chrono_taker::duration_map_type timing_map
Definition: xsample01.cpp:76
sparse_vect_index::vect_addr::size
unsigned size
Definition: xsample01.cpp:170
bmsparsevec.h
Sparse constainer sparse_vector<> for integer types using bit-transposition transform.
bm::serializer::serialize
size_type serialize(const BV &bv, unsigned char *buf, size_t buf_size)
Bitvector serialization into memory block.
Definition: bmserial.h:2440
vect_index
Definition: xsample01.cpp:154
bm::bvector::enumerator
Constant iterator designed to enumerate "ON" bits.
Definition: bm.h:599
bm::bvector::compare
int compare(const bvector< Alloc > &bvect) const BMNOEXCEPT
Lexicographical comparison with a bitvector.
Definition: bm.h:3185
calc_memory_footprint
static size_t calc_memory_footprint(const bv_index &bvi)
Definition: xsample01.cpp:281
sparse_vect_index::vect_addr::offset
unsigned offset
Definition: xsample01.cpp:169
gap_len_table_sparse
Definition: xsample01.cpp:87
bmsparsevec_serial.h
Serialization for sparse_vector<>
bmalgo.h
Algorithms for bvector<> (main include)
bv_index::~bv_index
~bv_index()
Definition: xsample01.cpp:135
bv_index::idx_
map_type idx_
Definition: xsample01.cpp:139
bm::serializer::set_compression_level
void set_compression_level(unsigned clevel) BMNOEXCEPT
Set compression level.
Definition: bmserial.h:1184
bm::bvector<>
benchmark_ops
const unsigned benchmark_ops
Definition: xsample01.cpp:62
bm::set_OR
@ set_OR
Definition: bmconst.h:155
bm::bvector::count
size_type count() const BMNOEXCEPT
population cout (count of ON bits)
Definition: bm.h:2220
bv_index::map_type
std::map< unsigned, TBVector * > map_type
Definition: xsample01.cpp:134
max_size
const unsigned max_size
Definition: xsample01.cpp:56
bm::bvector::iterator_base::valid
bool valid() const BMNOEXCEPT
Checks if iterator is still valid. Analog of != 0 comparison for pointers.
Definition: bm.h:280
sample_cnt
const unsigned sample_cnt
Definition: xsample01.cpp:65
vect_index::idx_
map_type idx_
Definition: xsample01.cpp:159
TBVector
bm::bvector TBVector
Definition: xsample01.cpp:72
sparse_vect_index::delta_sum_map_type
std::vector< std::pair< uint64_t, unsigned > > delta_sum_map_type
Definition: xsample01.cpp:175
bm::serializer::gap_length_serialization
void gap_length_serialization(bool value) BMNOEXCEPT
Set GAP length serialization (serializes GAP levels of the original vector)
Definition: bmserial.h:1205
bvs_index::map_type
std::map< unsigned, buffer_type > map_type
Definition: xsample01.cpp:147
construct_bvector
static TBVector * construct_bvector()
Definition: xsample01.cpp:99
bm::bvector::opt_compress
@ opt_compress
compress blocks when possible (GAP/prefix sum)
Definition: bm.h:134
generate_random_vector
static void generate_random_vector(TBVector *bv)
Definition: xsample01.cpp:217
bm::sparse_vector::import
void import(const value_type *arr, size_type arr_size, size_type offset=0, bool set_not_null=true)
Import list of elements from a C-style array.
Definition: bmsparsevec.h:1000
bm::gap_word_t
unsigned short gap_word_t
Definition: bmconst.h:77
bmtimer.h
Timing utilities for benchmarking (internal)
speed_test_bvs_index
static void speed_test_bvs_index(const bvs_index &bvs)
Definition: xsample01.cpp:612
convert_bv2bvs
static size_t convert_bv2bvs(const bv_index &bvi, bvs_index &bvs)
Definition: xsample01.cpp:303
convert_bv2vect
static size_t convert_bv2vect(const bv_index &bvi, vect_index &vidx)
Definition: xsample01.cpp:365
vect_index::buffer_type
std::vector< unsigned int > buffer_type
Definition: xsample01.cpp:156
sparse_vect_index::sv_storage1_
sparse_vector_type sv_storage1_
Definition: xsample01.cpp:182
bm::chrono_taker::ct_ops_per_sec
@ ct_ops_per_sec
Definition: bmtimer.h:59
bm::sparse_vector::get
value_type get(size_type idx) const BMNOEXCEPT
get specified element without bounds checking
Definition: bmsparsevec.h:1451
bvs_index
Definition: xsample01.cpp:144
bm::BM_GAP
@ BM_GAP
GAP compression is ON.
Definition: bmconst.h:144
bits_per_vect
const unsigned bits_per_vect
Definition: xsample01.cpp:59
bm::operation_deserializer
Deserializer, performs logical operations between bit-vector and serialized bit-vector.
Definition: bmserial.h:867
bm::bvector::clear
void clear(const size_type *ids, size_type ids_size, bm::sort_order so=bm::BM_UNKNOWN)
clear list of bits in this bitset
Definition: bm.h:3572
bv2delta
static void bv2delta(const TBVector &bv, std::vector< unsigned > &vect)
Definition: xsample01.cpp:395
bm::serializer
Bit-vector serialization class.
Definition: bmserial.h:75
bm::bvector::resize
void resize(size_type new_size)
Change size of the bvector.
Definition: bm.h:2282
sparse_vect_index::sparse_vector_type
bm::sparse_vector< unsigned, bm::bvector<> > sparse_vector_type
Definition: xsample01.cpp:173
sparse_vect_index::idx_
map_type idx_
Definition: xsample01.cpp:183
convert_bv2sv
static size_t convert_bv2sv(const bv_index &bvi, sparse_vect_index &sv_idx)
Definition: xsample01.cpp:419
bm::combine_or
void combine_or(BV &bv, It first, It last)
OR Combine bitvector and the iterable sequence.
Definition: bmalgo_impl.h:1016
bm::chrono_taker::duration_map_type
std::map< std::string, statistics > duration_map_type
test name to duration map
Definition: bmtimer.h:65
destroy_map
void destroy_map(TM &id_map)
Definition: xsample01.cpp:114
bm::operation_deserializer::deserialize
size_type deserialize(bvector_type &bv, const unsigned char *buf, set_operation op, bool exit_on_one=false)
Deserialize bvector using buffer as set operation argument.
Definition: bmserial.h:6208
bmserial.h
Serialization / compression of bvector<>. Set theoretical operations on compressed BLOBs.
bm::bvector::end
enumerator end() const
Returns enumerator pointing on the next bit after the last.
Definition: bm.h:1802
bm::chrono_taker::print_duration_map
static void print_duration_map(const duration_map_type &dmap, format fmt=ct_time)
Definition: bmtimer.h:127
bmalgo_similarity.h
vect_index::map_type
std::map< unsigned, buffer_type > map_type
Definition: xsample01.cpp:157
sparse_vect_index::sv_storage_
sparse_vector_type sv_storage_
Definition: xsample01.cpp:181
index_size
const unsigned index_size
Definition: xsample01.cpp:53
bm::serializer::byte_order_serialization
void byte_order_serialization(bool value) BMNOEXCEPT
Set byte-order serialization (for cross platform compatibility)
Definition: bmserial.h:1211
bm::bv_statistics::memory_used
size_t memory_used
memory usage for all blocks and service tables
Definition: bmfunc.h:61
sparse_vect_index::map_type
std::map< unsigned, vect_addr > map_type
Definition: xsample01.cpp:174
bm::bvector::first
enumerator first() const
Returns enumerator pointing on the first non-zero bit.
Definition: bm.h:1796
bm::chrono_taker::add_repeats
void add_repeats(unsigned inc)
Definition: bmtimer.h:121
sparse_vect_index::get_vector
void get_vector(unsigned id, std::vector< unsigned > &vect) const
Definition: xsample01.cpp:186
bvs_index::idx_
map_type idx_
Definition: xsample01.cpp:149
gap_len_table_sparse::_len
static const bm::gap_word_t _len[bm::gap_levels]
Definition: xsample01.cpp:89
sparse_vect_index::vect_addr
Definition: xsample01.cpp:167
bm.h
Compressed bit-vector bvector<> container, set algebraic methods, traversal iterators.
bm::sparse_vector::set
void set(size_type idx, value_type v)
set specified element with bounds checking and automatic resize
Definition: bmsparsevec.h:1475
main
int main(void)
Definition: xsample01.cpp:798
speed_test_sv_index
static void speed_test_sv_index(const sparse_vect_index &svi)
Definition: xsample01.cpp:738
bm::bvector::set_bit
bool set_bit(size_type n, bool val=true)
Sets bit n.
Definition: bm.h:3643
bm::bvector::optimize
void optimize(bm::word_t *temp_block=0, optmode opt_mode=opt_compress, statistics *stat=0)
Optimize memory bitvector's memory allocation.
Definition: bm.h:3097
bvs_index::buffer_type
std::vector< unsigned char > buffer_type
Definition: xsample01.cpp:146
speed_test_vect_index
static void speed_test_vect_index(const vect_index &vecti)
Definition: xsample01.cpp:676
bm::gap_levels
const unsigned gap_levels
Definition: bmconst.h:84
bm::bvector::statistics
Statistical information about bitset's memory allocation details.
Definition: bm.h:121