BitMagicC++Library
xsample01.cpp
Go to the documentation of this file.
1 /*
2 Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
3 
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7 
8  http://www.apache.org/licenses/LICENSE-2.0
9 
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 
16 For more information please visit: http://bitmagic.io
17 */
18 
19 /** \example xsample01.cpp
20  Demo and a benchmark on memory consumption control and logical operation
21 
22 */
23 
24 
25 #include <iostream>
26 #include <memory>
27 #include <map>
28 #include <vector>
29 #include <chrono>
30 #include <algorithm>
31 #include <stdexcept>
32 
33 #include "bm.h"
34 #include "bmalgo.h"
35 #include "bmtimer.h"
36 #include "bmserial.h"
37 #include "bmsparsevec.h"
38 
39 #include "bmsparsevec_algo.h"
40 #include "bmsparsevec_serial.h"
41 #include "bmalgo_similarity.h"
42 
43 #include "bmdbg.h"
44 
45 
46 // ----------------------------------------------------
47 // Global parameters and types
48 // ----------------------------------------------------
49 
50 // Number of vectors generated for the test
51 const unsigned index_size = 1000000;
52 
53 // Dynamic range for constructed sets
54 const unsigned max_size = 2000000;
55 
56 // Number of bits per one vector
57 const unsigned bits_per_vect = 5;
58 
59 // benchmark operation count
60 const unsigned benchmark_ops = 1000;
61 
62 // subset of vectors used as a sample
63 const unsigned sample_cnt = 250;
64 
65 // index values to extract
66 const unsigned result_set_cnt = 200;
67 
68 
69 // bit-vector type for this example
71 
72 
73 // timing storage for benchmarking
74 bm::chrono_taker::duration_map_type timing_map;
75 
76 
77 
78 /* BitMagic provides two GAP length tables for situations when we have
79  standard or embarassingly sparse vectors.
80  bm::gap_len_table - default standard
81  bm::gap_len_table_min - option for smaller vectors
82 
83  Here we define an alternative table for very sparse vectors
84 */
85 template<bool T> struct gap_len_table_sparse
86 {
88 };
89 template<bool T>
91  { 8, 32, 128, 512 };
92 
93 
94 // simple bit-vector class factory for the project
95 //
96 TBVector* construct_bvector()
97 {
98  // in this example we plan to keep lots of vectors in memory, thus
99  // use parameters to minimize memory consumption
100  //
101  TBVector* bv =
102  new TBVector(bm::BM_GAP, // use GAP compressed mode
103  gap_len_table_sparse<true>::_len, // custom lens for super sparse vectors
104  max_size // limit the maximum size
105  );
106  return bv;
107 }
108 
109 // Generic utility to destroy map of pointers
110 template<typename TM>
111 void destroy_map(TM& id_map)
112 {
113  for (typename TM::iterator it = id_map.begin();
114  it != id_map.end();
115  ++it)
116  {
117  typename TM::mapped_type mp = it->second;
118  delete mp;
119  } // for
120  id_map.clear();
121 }
122 
123 // ------------------------------------------------------------------
124 // Sample data structures
125 // ------------------------------------------------------------------
126 
127 // Sample index structure to keep a map of in-memory bit-vectors
128 //
129 struct bv_index
130 {
131  typedef std::map<unsigned, TBVector*> map_type;
133  {
134  destroy_map(idx_);
135  }
136  map_type idx_;
137 };
138 
139 // Sample index structure to keep map of in-memory serialized/compressed bit-vectors
140 //
141 struct bvs_index
142 {
143  typedef std::vector<unsigned char> buffer_type;
144  typedef std::map<unsigned, buffer_type> map_type;
145 
146  map_type idx_;
147 };
148 
149 // Sample index structure to keep map of in-memory vector<unsigned int>
150 //
152 {
153  typedef std::vector<unsigned int> buffer_type;
154  typedef std::map<unsigned, buffer_type> map_type;
155 
156  map_type idx_;
157 };
158 
159 
160 // Sample index structure as in-memory sparse_vector
161 //
163 {
164  struct vect_addr
165  {
166  unsigned offset;
167  unsigned size;
168  };
169 
171  typedef std::map<unsigned, vect_addr> map_type;
172  typedef std::vector< std::pair<uint64_t, unsigned> > delta_sum_map_type;
173 
174 
175  void get_vector(unsigned id, std::vector<unsigned>& vect) const;
176 
177 
178  sparse_vector_type sv_storage_;
179  sparse_vector_type sv_storage1_;
180  map_type idx_;
181 };
182 
183 void sparse_vect_index::get_vector(unsigned id, std::vector<unsigned>& vect) const
184 {
185  map_type::const_iterator it = idx_.find(id);
186  if (it != idx_.end())
187  {
188  const sparse_vect_index::vect_addr& vaddr = it->second;
189  vect.resize(vaddr.size+1);
190  vect[0] = sv_storage1_.get(id);
191  for (unsigned j = 1; j < vect.size(); ++j)
192  {
193  unsigned a = sv_storage_.get(j + vaddr.offset - 1);
194  a += (vect[j-1] + 1);
195  vect[j] = a;
196  } // for j
197  }
198  else
199  {
200  vect.resize(0);
201  }
202 }
203 
204 // --------------------------------------------------------------------
205 
206 
207 // set bits in a vector using various methods picked at random
208 ///
209 // one method will generate a plato of non-random integers,
210 // another random integers of near neighbors
211 // the other adds ints randomly without following any system
212 //
213 void generate_random_vector(TBVector* bv)
214 {
215  unsigned method = rand() % 5; // pick a generation method
216  if (method == 0) // generate a incremental linear sequence at random location
217  {
218  unsigned seed_id = rand() % max_size;
219  for (unsigned i = seed_id; i < seed_id+bits_per_vect; ++i)
220  {
221  if (i >= max_size)
222  break;
223  bv->set_bit(i);
224  } // for i
225  }
226  else
227  if (method == 1) // generate near neighbors
228  {
229  unsigned seed_id = rand() % max_size;
230  unsigned id = seed_id;
231  for (unsigned i = 0; i < bits_per_vect; ++i)
232  {
233  if (id >= max_size)
234  break;
235  bv->set_bit(id);
236  id += (rand() % 10);
237  if (id >= max_size)
238  id = rand() % max_size;
239  } // for i
240  }
241  else // generate completely random bits
242  {
243  for (unsigned i = 0; i < bits_per_vect; ++i)
244  {
245  unsigned id = rand() % max_size;
246  if (i >= max_size) // paranoiya check
247  break;
248  bv->set_bit(id);
249  } // for i
250  }
251 }
252 
253 // generate map of bit-vectors, each filled with just a few bits
254 //
256 {
257  for (unsigned i = 0; i < index_size; ++i)
258  {
259  std::auto_ptr<TBVector> ap(construct_bvector());
260 
261  generate_random_vector(ap.get());
262 
263  if (!ap->any()) // integrity check
264  {
265  // this should never happen
266  std::cerr << "Warning. Empty vector generated!" << std::endl;
267  }
268 
269  bvi.idx_[i] = ap.release();
270  }
271 }
272 
273 // calculate memory footprint for in memory index
274 //
275 size_t calc_memory_footprint(const bv_index& bvi)
276 {
277  size_t mem_total = 0;
278  for (bv_index::map_type::const_iterator it = bvi.idx_.begin();
279  it != bvi.idx_.end();
280  ++it)
281  {
282  const TBVector* mp = it->second;
283 
285  mp->calc_stat(&st);
286  mem_total += st.memory_used;
287 
288  mem_total += sizeof(void*);
289  } // for
290 
291  return mem_total;
292 }
293 
294 // convert bit-vector index to bit-vector serialized index
295 //
296 size_t convert_bv2bvs(const bv_index& bvi, bvs_index& bvs)
297 {
298  size_t mem_total = 0;
299  std::vector<unsigned char> buf; // prepare a temporary buffer
300  buf.reserve(1024);
301 
302  // bit-vector serializer
303  // (keep it out of the serialization loop to minimize buffers re-allocations)
304  //
306  bvsr.byte_order_serialization(false);
307  bvsr.gap_length_serialization(false);
308  bvsr.set_compression_level(4);
309 
310  for (bv_index::map_type::const_iterator it = bvi.idx_.begin();
311  it != bvi.idx_.end();
312  ++it)
313  {
314  unsigned id = it->first;
315  const TBVector* bvp = it->second;
316 
318  bvp->calc_stat(&st); // calculate max. serialized size
319 
320  buf.resize(st.max_serialize_mem); // prepare the temp buffer
321 
322  // run serialization, actual serialization size is expacted to be smaller
323  //
324  unsigned bvs_size = bvsr.serialize(*bvp, buf.data(), st.max_serialize_mem);
325 
326  // move from temp serialization buffer to compressed in-memory index
327  //
328  bvs_index::buffer_type& vbuf = bvs.idx_[id];
329  vbuf.resize(bvs_size);
330  ::memcpy(vbuf.data(), buf.data(), bvs_size);
331 
332  mem_total += bvs_size;
333 
334  mem_total += sizeof(std::vector<unsigned char>::size_type);
335 
336 
337  // paranoia check compare source and desirialized vectors
338  //
339  #ifdef DEBUG
340  {
341  TBVector bv1;
342  bm::deserialize(bv1, vbuf.data());
343  if (bv1.compare(*bvp) !=0 )
344  {
345  throw std::runtime_error("deserialization check failed");
346  }
347  }
348  #endif
349 
350  } // for
351 
352  return mem_total;
353 }
354 
355 // convert bit-vector index to vector<usingned>
356 //
357 size_t convert_bv2vect(const bv_index& bvi, vect_index& vidx)
358 {
359  size_t mem_total = 0;
360 
361  for (bv_index::map_type::const_iterator it = bvi.idx_.begin();
362  it != bvi.idx_.end();
363  ++it)
364  {
365  unsigned id = it->first;
366  const TBVector* bvp = it->second;
367 
368  unsigned count = bvp->count(); // population count
369 
370  vect_index::buffer_type& vect = vidx.idx_[id];
371  vect.resize(count);
372 
373  for (TBVector::enumerator en = bvp->first(); en.valid(); ++en)
374  {
375  vect.push_back(*en);
376  }
377 
378  mem_total +=
379  sizeof(vect_index::buffer_type::value_type) * vect.size() +
380  sizeof(vect_index::buffer_type::size_type);
381 
382  } // for
383  return mem_total;
384 }
385 
386 static
387 void bv2delta(const TBVector& bv, std::vector<unsigned>& vect)
388 {
389  // convert into a plain vector first
390  //
391  vect.resize(0);
392  for (TBVector::enumerator en = bv.first(); en.valid(); ++en)
393  {
394  vect.push_back(*en);
395  }
396 
397  // convert into delta-vector
398  //
399  {
400  for (size_t k = vect.size()-1; k >= 1; --k)
401  {
402  vect[k] -= vect[k-1];
403  --vect[k];
404  } // for
405  }
406 }
407 
408 // convert bit-vector index to bm::sparse_vector
409 //
410 size_t convert_bv2sv(const bv_index& bvi, sparse_vect_index& sv_idx)
411 {
412  size_t mem_total = 0;
413 
414  std::vector<unsigned> vect;
415 
417 
418  for (bv_index::map_type::const_iterator it = bvi.idx_.begin();
419  it != bvi.idx_.end();
420  ++it)
421  {
422  unsigned id = it->first;
423  const TBVector* bvp = it->second;
424 
425  bv2delta(*bvp, vect);
426 
427  // compute sum of the delta-vector elements add to the sort map
428  {
429  uint64_t sum = 0;
430  for (unsigned k = 1; k < vect.size(); ++k)
431  {
432  sum += vect[k];
433  } // for
434  delta_map.push_back(std::make_pair(sum, id));
435  }
436 
437  } // for
438 
439  // sort by "enthropy" (sort of)
440  //
441  std::sort(delta_map.begin(), delta_map.end());
442  if (delta_map.size() != bvi.idx_.size()) // paranoia check
443  {
444  throw std::runtime_error("delta map size is incorrect");
445  }
446 
447  unsigned sv_pos = 0; // current position in sparse vector
448  for (unsigned j = 0; j < delta_map.size(); ++j)
449  {
450  unsigned id = delta_map[j].second;
451 
452  bv_index::map_type::const_iterator it = bvi.idx_.find(id);
453  if (it == bvi.idx_.end())
454  continue;
455  const TBVector& bv = *(it->second);
456 
457  // convert into a plain delta vector again
458  bv2delta(bv, vect);
459 
461  vaddr.offset = sv_pos;
462  vaddr.size = (unsigned)(vect.size() - 1);
463 
464  sv_idx.sv_storage1_.set(id, vect[0]);
465 
466  if (vaddr.size)
467  {
468  sv_idx.sv_storage_.import(&vect[1], vaddr.size, vaddr.offset);
469  sv_pos += vaddr.size;
470  }
471 
472  sv_idx.idx_[id] = vaddr;
473  } // for
474 
475 
476  // optimize sparse vector storage, compute memory consumption
477  {
478  sparse_vect_index::sparse_vector_type::statistics st;
479 
482  mem_total += st.memory_used;
484  mem_total += st.memory_used;
485  }
486 
487  // check
488  for (bv_index::map_type::const_iterator it = bvi.idx_.begin();
489  it != bvi.idx_.end();
490  ++it)
491  {
492  unsigned id = it->first;
493  const TBVector* bvp = it->second;
494 
495  // convert into a plain vector first
496  //
497  vect.resize(0);
498  for (TBVector::enumerator en = bvp->first(); en.valid(); ++en)
499  {
500  vect.push_back(*en);
501  }
502 
503 
504  std::vector<unsigned> svect;
505  sv_idx.get_vector(id, svect);
506  if (svect.size() != vect.size())
507  {
508  std::cerr << "Size check failed! id = " << id
509  << "size() = " << svect.size()
510  << std::endl;
511  throw std::runtime_error("sparse vector content check failed");
512  }
513 
514  for (unsigned k = 0; k < vect.size(); ++k)
515  {
516  if (vect[k] != svect[k])
517  {
518  std::cerr << "SV content check failed! id = " << id
519  << " i=" << k << std::endl;
520  for (unsigned h = 0; h < vect.size(); ++h)
521  {
522  std::cout << "[" << vect[h] << "=" << svect[h] << "], ";
523  } // for h
524  std::cout << std::endl;
525  throw std::runtime_error("sparse vector content check failed");
526  }
527  } // for k
528 
529  } // for
530 
531  #ifdef DEBUG
532  bm::print_svector_stat(sv_idx.sv_storage_, true);
533  bm::print_svector_stat(sv_idx.sv_storage1_, true);
534  #endif
535 
536  return mem_total;
537 }
538 
539 
540 // speed test for in-memory bit vectors
541 // benchmark performs a mix of logical operations
542 //
544 {
545  TBVector bv_join; // OR join vector
546 
547  bm::chrono_taker tt1("1. bm::bvector<> index", 1, &timing_map);
548 
549  // join all vectors using OR operation
550  for (bv_index::map_type::const_iterator it = bvi.idx_.begin();
551  it != bvi.idx_.end();
552  ++it)
553  {
554  const TBVector* bvp = it->second;
555  bv_join |= *bvp;
556  } // for
557  bv_join.optimize();
558 
559  // a group of random vectors from the index map, compute OR
560  // then compute AND with the join vector
561  //
562  TBVector bv_res(bm::BM_GAP);
563  std::vector<unsigned> result_set;
564  result_set.reserve(result_set_cnt); // memory reservation to avoid reallocs
565 
566  for (unsigned i = 0; i < benchmark_ops; ++i)
567  {
568  bv_res.clear(true); // free all blocks
569  result_set.resize(0);
570 
571  for (unsigned j = 0; j < sample_cnt; ++j)
572  {
573  unsigned id = rand() % index_size;
574  bv_index::map_type::const_iterator it = bvi.idx_.find(id);
575  if (it == bvi.idx_.end())
576  continue;
577  const TBVector& bv = *(it->second);
578  bv_res |= bv;
579  }
580 
581  bv_res &= bv_join;
582 
583  // enumerate the final result set, extract first N elements
584  //
585  TBVector::enumerator en = bv_res.first();
586  for (unsigned k = 0; en.valid() && k < result_set_cnt; ++k, ++en)
587  {
588  result_set.push_back(*en);
589  }
590 
591  } // for i
592 
593  tt1.add_repeats(benchmark_ops + 1);
594 }
595 
596 // speed test for in-memory serialized bit vectors
597 // this function uses bm::operation_deserializer
598 // to perform logical operation between a BLOB and bvector<> in memory
599 // and avoids extra decompression overhead
600 //
602 {
603  TBVector bv_join; // OR join vector
604 
606 
608 
609  bm::chrono_taker tt1("2. serialized bvector", 1, &timing_map);
610 
611  // join all vectors using OR operation
612  for (bvs_index::map_type::const_iterator it = bvs.idx_.begin();
613  it != bvs.idx_.end();
614  ++it)
615  {
616  const bvs_index::buffer_type& svect = it->second;
617  if (svect.size() == 0)
618  {
619  throw std::runtime_error("empty buffer error");
620  }
621  const unsigned char* buf = it->second.data();
622 
623  des.deserialize(bv_join, buf, tb, bm::set_OR);
624  } // for
625  bv_join.optimize();
626 
627  // a group of random vectors from the index map, compute OR
628  // then compute AND with the join vector
629  //
630  TBVector bv_res(bm::BM_GAP);
631  std::vector<unsigned> result_set;
632  result_set.reserve(result_set_cnt); // memory reservation to avoid reallocs
633 
634  for (unsigned i = 0; i < benchmark_ops; ++i)
635  {
636  bv_res.clear(true); // free all blocks
637  result_set.resize(0);
638 
639  for (unsigned j = 0; j < sample_cnt; ++j)
640  {
641  unsigned id = rand() % index_size;
642  bvs_index::map_type::const_iterator it = bvs.idx_.find(id);
643  if (it == bvs.idx_.end())
644  continue;
645 
646  const unsigned char* buf = it->second.data();
647  des.deserialize(bv_res, buf, tb, bm::set_OR);
648  } // for j
649 
650  bv_res &= bv_join;
651 
652  // enumerate the final result set, extract first N elements
653  //
654  TBVector::enumerator en = bv_res.first();
655  for (unsigned k = 0; en.valid() && k < result_set_cnt; ++k, ++en)
656  {
657  result_set.push_back(*en);
658  }
659  } // for i
660 
661  tt1.add_repeats(benchmark_ops + 1);
662 }
663 
665 {
666  TBVector bv_join; // OR join vector
667 
668  bm::chrono_taker tt1("3. std::vector<unsigned> ", 1, &timing_map);
669 
670  // join all vectors using OR operation
671  for (vect_index::map_type::const_iterator it = vecti.idx_.begin();
672  it != vecti.idx_.end();
673  ++it)
674  {
675  const vect_index::buffer_type& vect = it->second;
676  if (vect.size() == 0)
677  {
678  throw std::runtime_error("empty buffer error");
679  }
680 
681  bm::combine_or(bv_join, vect.begin(), vect.end());
682  } // for
683  bv_join.optimize();
684 
685 
686  // a group of random vectors from the index map, compute OR
687  // then compute AND with the join vector
688  //
689  TBVector bv_res(bm::BM_GAP);
690  std::vector<unsigned> result_set;
691  result_set.reserve(result_set_cnt); // memory reservation to avoid reallocs
692 
693  for (unsigned i = 0; i < benchmark_ops; ++i)
694  {
695  bv_res.clear(true); // free all blocks
696  result_set.resize(0);
697 
698  for (unsigned j = 0; j < sample_cnt; ++j)
699  {
700  unsigned id = rand() % index_size;
701  vect_index::map_type::const_iterator it = vecti.idx_.find(id);
702  if (it == vecti.idx_.end())
703  continue;
704 
705  const vect_index::buffer_type& vect = it->second;
706 
707  bm::combine_or(bv_join, vect.begin(), vect.end());
708  } // for j
709 
710  bv_res &= bv_join;
711 
712  // enumerate the final result set, extract first N elements
713  //
714  TBVector::enumerator en = bv_res.first();
715  for (unsigned k = 0; en.valid() && k < result_set_cnt; ++k, ++en)
716  {
717  result_set.push_back(*en);
718  }
719 
720  } // for i
721 
722  tt1.add_repeats(benchmark_ops + 1);
723 }
724 
726 {
727  TBVector bv_join; // OR join vector
728 
729  bm::chrono_taker tt1("4. bm::sparse_vector<unsigned> ", 1, &timing_map);
730 
731  std::vector<unsigned> vect;
732 
733  // join all vectors using OR operation
734  for (sparse_vect_index::map_type::const_iterator it = svi.idx_.begin();
735  it != svi.idx_.end();
736  ++it)
737  {
738  unsigned id = it->first;
739  svi.get_vector(id, vect);
740 
741  bm::combine_or(bv_join, vect.begin(), vect.end());
742  } // for
743  bv_join.optimize();
744 
745 
746  // a group of random vectors from the index map, compute OR
747  // then compute AND with the join vector
748  //
749  TBVector bv_res(bm::BM_GAP);
750  std::vector<unsigned> result_set;
751  result_set.reserve(result_set_cnt); // memory reservation to avoid reallocs
752 
753  for (unsigned i = 0; i < benchmark_ops; ++i)
754  {
755  bv_res.clear(true); // free all blocks
756  result_set.resize(0);
757 
758  for (unsigned j = 0; j < sample_cnt; ++j)
759  {
760  unsigned id = rand() % index_size;
761  svi.get_vector(id, vect);
762  if (vect.size() == 0)
763  continue;
764 
765  bm::combine_or(bv_join, vect.begin(), vect.end());
766  } // for j
767 
768  bv_res &= bv_join;
769 
770  // enumerate the final result set, extract first N elements
771  //
772  TBVector::enumerator en = bv_res.first();
773  for (unsigned k = 0; en.valid() && k < result_set_cnt; ++k, ++en)
774  {
775  result_set.push_back(*en);
776  }
777 
778  } // for i
779 
780  tt1.add_repeats(benchmark_ops + 1);
781 }
782 
783 
784 
785 int main(void)
786 {
787  try
788  {
789  bv_index bvi; // regular in-memory index id to bvector<>
790  bvs_index bvs; // compressed in-memory index id to bvector<> BLOB
791  vect_index vecti; // index based on plain uncompressed vector<unsigned>
792  sparse_vect_index svi; // all ids in a sparse vector
793 
794  // experiments generation, measuring memory footprints
795  //
796  generate_bv_index(bvi);
797 
798  size_t bv_mem_total = calc_memory_footprint(bvi);
799  size_t bv_mem_total_MB = bv_mem_total / (1024*1024);
800 
801  std::cout << "bm::bvector<> memory footprint = "
802  << bv_mem_total << " (" << bv_mem_total_MB << "MB)"
803  << std::endl;
804 
805  size_t bvs_mem_total = convert_bv2bvs(bvi, bvs);
806  size_t bvs_mem_total_MB = bvs_mem_total / (1024*1024);
807 
808  std::cout << "bm::bvector<> BLOB memory footprint = "
809  << bvs_mem_total << " (" << bvs_mem_total_MB << "MB)"
810  << std::endl;
811 
812  size_t vecti_mem_total = convert_bv2vect(bvi, vecti);
813  size_t vecti_mem_total_MB = vecti_mem_total / (1024*1024);
814 
815  std::cout << "std::vector<unsigned> memory footprint = "
816  << vecti_mem_total << " (" << vecti_mem_total_MB << "MB)"
817  << std::endl;
818 
819  size_t svi_mem_total = convert_bv2sv(bvi, svi);
820  size_t svi_mem_total_MB = svi_mem_total / (1024*1024);
821 
822  std::cout << "bm::sparse_vector<> memory footprint = "
823  << svi_mem_total << " (" << svi_mem_total_MB << "MB)"
824  << std::endl;
825 
826  // run performance tests
827  //
828 
829  speed_test_bv_index(bvi);
831  speed_test_vect_index(vecti);
832  speed_test_sv_index(svi);
833 
834 
835  std::cout << std::endl << "Performance (ops/sec):" << std::endl;
836  bm::chrono_taker::print_duration_map(timing_map, bm::chrono_taker::ct_ops_per_sec);
837 
838  //getchar(); // uncomment to check memory consumption at the OS level
839 
840  }
841  catch(std::exception& ex)
842  {
843  std::cerr << ex.what() << std::endl;
844  return 1;
845  }
846 
847  return 0;
848 }
849 
#define BM_DECLARE_TEMP_BLOCK(x)
Definition: bmconst.h:95
map_type idx_
Definition: xsample01.cpp:146
const unsigned result_set_cnt
Definition: xsample01.cpp:66
std::vector< unsigned char > buffer_type
Definition: xsample01.cpp:143
void generate_bv_index(bv_index &bvi)
Definition: xsample01.cpp:255
void optimize(bm::word_t *temp_block=0, typename bvector_type::optmode opt_mode=bvector_type::opt_compress, typename sparse_vector< Val, BV >::statistics *stat=0)
run memory optimization for all vector plains
Definition: bmsparsevec.h:1154
const unsigned index_size
Definition: xsample01.cpp:51
const unsigned bits_per_vect
Definition: xsample01.cpp:57
void generate_random_vector(TBVector *bv)
Definition: xsample01.cpp:213
unsigned serialize(const BV &bv, unsigned char *buf, size_t buf_size)
Bitvector serilization into memory block.
Definition: bmserial.h:910
void optimize(bm::word_t *temp_block=0, optmode opt_mode=opt_compress, statistics *stat=0)
Optimize memory bitvector&#39;s memory allocation.
Definition: bm.h:2311
void speed_test_sv_index(const sparse_vect_index &svi)
Definition: xsample01.cpp:725
size_t max_serialize_mem
Estimated maximum of memory required for serialization.
Definition: bmfunc.h:57
const unsigned sample_cnt
Definition: xsample01.cpp:63
Bit-vector serialization class.
Definition: bmserial.h:143
GAP compression is ON.
Definition: bmconst.h:105
void combine_or(BV &bv, It first, It last)
OR Combine bitvector and the iterable sequence.
Definition: bmalgo_impl.h:1148
void speed_test_bvs_index(const bvs_index &bvs)
Definition: xsample01.cpp:601
compress blocks when possible
Definition: bm.h:1667
void set(size_type idx, value_type v)
set specified element with bounds checking and automatic resize
Definition: bmsparsevec.h:1008
int main(void)
Definition: xsample01.cpp:785
std::map< unsigned, buffer_type > map_type
Definition: xsample01.cpp:154
int compare(const bvector< Alloc > &bvect) const
Lexicographical comparison with a bitvector.
Definition: bm.h:2400
Statistical information about bitset&#39;s memory allocation details.
Definition: bm.h:118
sparse vector with runtime compression using bit transposition method
Definition: bmsparsevec.h:60
void get_vector(unsigned id, std::vector< unsigned > &vect) const
Definition: xsample01.cpp:183
bm::sparse_vector< unsigned, bm::bvector<> > sparse_vector_type
Definition: xsample01.cpp:170
map_type idx_
Definition: xsample01.cpp:136
std::vector< std::pair< uint64_t, unsigned > > delta_sum_map_type
Definition: xsample01.cpp:172
size_t memory_used
Memory used by bitvector including temp and service blocks.
Definition: bmfunc.h:59
enumerator first() const
Returns enumerator pointing on the first non-zero bit.
Definition: bm.h:1749
void byte_order_serialization(bool value)
Set byte-order serialization (for cross platform compatibility)
Definition: bmserial.h:655
TBVector * construct_bvector()
Definition: xsample01.cpp:96
unsigned deserialize(BV &bv, const unsigned char *buf, bm::word_t *temp_block=0)
Bitvector deserialization from memory.
Definition: bmserial.h:1250
void destroy_map(TM &id_map)
Definition: xsample01.cpp:111
map_type idx_
Definition: xsample01.cpp:156
const unsigned gap_levels
Definition: bmconst.h:66
bm::id_t count() const
Returns count of bits which are 1.
Definition: bm.h:2004
unsigned short gap_word_t
Definition: bmconst.h:60
std::map< unsigned, TBVector * > map_type
Definition: xsample01.cpp:131
bool valid() const
Checks if iterator is still valid.
Definition: bm.h:260
std::map< unsigned, buffer_type > map_type
Definition: xsample01.cpp:144
size_t calc_memory_footprint(const bv_index &bvi)
Definition: xsample01.cpp:275
enumerator end() const
Returns enumerator pointing on the next bit after the last.
Definition: bm.h:1758
void speed_test_bv_index(const bv_index &bvi)
Definition: xsample01.cpp:543
static void bv2delta(const TBVector &bv, std::vector< unsigned > &vect)
Definition: xsample01.cpp:387
std::map< unsigned, vect_addr > map_type
Definition: xsample01.cpp:171
void gap_length_serialization(bool value)
Set GAP length serialization (serializes GAP levels of the original vector)
Definition: bmserial.h:649
void import(const value_type *arr, size_type size, size_type offset=0)
Import list of elements from a C-style array.
Definition: bmsparsevec.h:538
std::vector< unsigned int > buffer_type
Definition: xsample01.cpp:153
void calc_stat(struct bm::bvector< Alloc >::statistics *st) const
Calculates bitvector statistics.
Definition: bm.h:2533
static const bm::gap_word_t _len[bm::gap_levels]
Definition: xsample01.cpp:87
sparse_vector_type sv_storage_
Definition: xsample01.cpp:178
size_t convert_bv2sv(const bv_index &bvi, sparse_vect_index &sv_idx)
Definition: xsample01.cpp:410
bm::chrono_taker::duration_map_type timing_map
Definition: xsample01.cpp:74
Constant iterator designed to enumerate "ON" bits.
Definition: bm.h:423
static unsigned deserialize(bvector_type &bv, const unsigned char *buf, bm::word_t *temp_block, set_operation op=bm::set_OR, bool exit_on_one=false)
Deserialize bvector using buffer as set operation argument.
Definition: bmserial.h:2937
void resize(size_type new_size)
Change size of the bvector.
Definition: bm.h:2029
const unsigned benchmark_ops
Definition: xsample01.cpp:60
void speed_test_vect_index(const vect_index &vecti)
Definition: xsample01.cpp:664
sparse_vector_type sv_storage1_
Definition: xsample01.cpp:179
const unsigned max_size
Definition: xsample01.cpp:54
bm::bvector TBVector
Definition: xsample01.cpp:70
bool set_bit(bm::id_t n, bool val=true)
Sets bit n.
Definition: bm.h:1208
void clear(bool free_mem=false)
Clears every bit in the bitvector.
Definition: bm.h:1317
void set_compression_level(unsigned clevel)
Set compression level.
Definition: bmserial.h:629
Deserializer, performs logical operations between bit-vector and serialized bit-vector.
Definition: bmserial.h:532
size_t convert_bv2vect(const bv_index &bvi, vect_index &vidx)
Definition: xsample01.cpp:357
size_t convert_bv2bvs(const bv_index &bvi, bvs_index &bvs)
Definition: xsample01.cpp:296