BitMagicC++Library
bmalgo_impl.h
Go to the documentation of this file.
1 #ifndef BMALGO_IMPL__H__INCLUDED__
2 #define BMALGO_IMPL__H__INCLUDED__
3 /*
4 Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 For more information please visit: http://bitmagic.io
19 */
20 
21 #ifdef _MSC_VER
22 #pragma warning( push )
23 #pragma warning( disable : 4311 4312 4127)
24 #endif
25 
26 #include "bmdef.h"
27 #include "bmutil.h"
28 
29 namespace bm
30 {
31 
32 /*!
33  @defgroup setalgo bvector<> algorithms
34 
35  Set algebra algorithms using bit-vectors, arrays. Binary metrics and distances. Random sub-sets.
36  @ingroup bvector
37  */
38 
39 /*!
40  @defgroup distance Binary-distance metrics
41 
42  Distance metrics and algorithms to compute binary distances
43  @ingroup setalgo
44  */
45 
46 
47 /*!
48  \brief Distance metrics codes defined for vectors A and B
49  \ingroup distance
50 */
52 {
53  COUNT_AND = set_COUNT_AND, //!< (A & B).count()
54  COUNT_XOR = set_COUNT_XOR, //!< (A ^ B).count()
55  COUNT_OR = set_COUNT_OR, //!< (A | B).count()
56  COUNT_SUB_AB = set_COUNT_SUB_AB, //!< (A - B).count()
57  COUNT_SUB_BA = set_COUNT_SUB_BA, //!< (B - A).count()
58  COUNT_A = set_COUNT_A, //!< A.count()
59  COUNT_B = set_COUNT_B //!< B.count()
60 };
61 
62 /**
63  Convert set operation into compatible distance metric
64  \ingroup distance
65 */
66 inline
68 {
70  if (op == set_COUNT) op = set_COUNT_B;
71  // distance metric is created as a set operation sub-class
72  // simple cast will work to convert
73  return (distance_metric) op;
74 }
75 
76 /*!
77  \brief Distance metric descriptor, holds metric code and result.
78  \sa distance_operation
79 */
80 
82 {
85 
87  : metric(m),
88  result(0)
89  {}
91  : metric(bm::COUNT_XOR),
92  result(0)
93  {}
94 
95  /*!
96  \brief Sets metric result to 0
97  */
98  void reset()
99  {
100  result = 0;
101  }
102 };
103 
104 
105 
106 /*!
107  \brief Internal function computes different distance metrics.
108  \internal
109  \ingroup distance
110 
111 */
112 inline
114  const bm::word_t* arg_blk,
116  distance_metric_descriptor* dmit_end)
117 
118 {
119  gap_word_t* g1 = BMGAP_PTR(blk);
120  gap_word_t* g2 = BMGAP_PTR(arg_blk);
121 
122  unsigned gap = BM_IS_GAP(blk);
123  unsigned arg_gap = BM_IS_GAP(arg_blk);
124 
125  if (gap) // first block GAP-type
126  {
127  if (arg_gap) // both blocks GAP-type
128  {
129  for (distance_metric_descriptor* it = dmit;it < dmit_end; ++it)
130  {
131  distance_metric_descriptor& dmd = *it;
132 
133  switch (dmd.metric)
134  {
135  case bm::COUNT_AND:
136  dmd.result += gap_count_and(g1, g2);
137  break;
138  case bm::COUNT_OR:
139  dmd.result += gap_count_or(g1, g2);
140  break;
141  case bm::COUNT_SUB_AB:
142  dmd.result += gap_count_sub(g1, g2);
143  break;
144  case bm::COUNT_SUB_BA:
145  dmd.result += gap_count_sub(g2, g1);
146  break;
147  case bm::COUNT_XOR:
148  dmd.result += gap_count_xor(g1, g2);
149  break;
150  case bm::COUNT_A:
151  dmd.result += gap_bit_count_unr(g1);
152  break;
153  case bm::COUNT_B:
154  dmd.result += gap_bit_count_unr(g2);
155  break;
156  } // switch
157 
158  } // for it
159 
160  return;
161 
162  }
163  else // first block - GAP, argument - BITSET
164  {
165  for (distance_metric_descriptor* it = dmit;it < dmit_end; ++it)
166  {
167  distance_metric_descriptor& dmd = *it;
168 
169  switch (dmd.metric)
170  {
171  case bm::COUNT_AND:
172  if (arg_blk)
173  dmd.result += gap_bitset_and_count(arg_blk, g1);
174  break;
175  case bm::COUNT_OR:
176  if (!arg_blk)
177  dmd.result += gap_bit_count_unr(g1);
178  else
179  dmd.result += gap_bitset_or_count(arg_blk, g1);
180  break;
181  case bm::COUNT_SUB_AB:
182  {
184 
185  gap_convert_to_bitset((bm::word_t*) temp_bit_blk, g1);
186  dmd.result +=
187  bit_operation_sub_count((bm::word_t*)temp_bit_blk,
188  ((bm::word_t*)temp_bit_blk) + bm::set_block_size,
189  arg_blk);
190  }
191  break;
192  case bm::COUNT_SUB_BA:
193  dmd.metric = bm::COUNT_SUB_AB; // recursive call to SUB_AB
195  blk,
196  it, it+1);
197  dmd.metric = bm::COUNT_SUB_BA; // restore status quo
198  break;
199  case bm::COUNT_XOR:
200  if (!arg_blk)
201  dmd.result += gap_bit_count_unr(g1);
202  else
203  dmd.result += gap_bitset_xor_count(arg_blk, g1);
204  break;
205  case bm::COUNT_A:
206  if (g1)
207  dmd.result += gap_bit_count_unr(g1);
208  break;
209  case bm::COUNT_B:
210  if (arg_blk)
211  {
212  dmd.result +=
213  bit_block_calc_count(arg_blk,
214  arg_blk + bm::set_block_size);
215  }
216  break;
217  } // switch
218 
219  } // for it
220 
221  return;
222 
223  }
224  }
225  else // first block is BITSET-type
226  {
227  if (arg_gap) // second argument block is GAP-type
228  {
229  for (distance_metric_descriptor* it = dmit;it < dmit_end; ++it)
230  {
231  distance_metric_descriptor& dmd = *it;
232 
233  switch (dmd.metric)
234  {
235  case bm::COUNT_AND:
236  if (blk)
237  dmd.result += gap_bitset_and_count(blk, g2);
238  break;
239  case bm::COUNT_OR:
240  if (!blk)
241  dmd.result += gap_bit_count_unr(g2);
242  else
243  dmd.result += gap_bitset_or_count(blk, g2);
244  break;
245  case bm::COUNT_SUB_AB:
246  if (blk)
247  dmd.result += gap_bitset_sub_count(blk, g2);
248  break;
249  case bm::COUNT_SUB_BA:
250  dmd.metric = bm::COUNT_SUB_AB; // recursive call to SUB_AB
252  //arg_gap,
253  blk,
254  //gap,
255  it, it+1);
256  dmd.metric = bm::COUNT_SUB_BA; // restore status quo
257  break;
258  case bm::COUNT_XOR:
259  if (!blk)
260  dmd.result += gap_bit_count_unr(g2);
261  else
262  dmd.result += gap_bitset_xor_count(blk, g2);
263  break;
264  case bm::COUNT_A:
265  if (blk)
266  {
267  dmd.result +=
269  blk + bm::set_block_size);
270  }
271  break;
272  case bm::COUNT_B:
273  if (g2)
274  dmd.result += gap_bit_count_unr(g2);
275  break;
276  } // switch
277 
278  } // for it
279 
280  return;
281  }
282  }
283 
284  // --------------------------------------------
285  //
286  // Here we combine two plain bitblocks
287 
288  //const bm::word_t* blk_end;
289  //const bm::word_t* arg_end;
290 
291  //blk_end = blk + (bm::set_block_size);
292  //arg_end = arg_blk + (bm::set_block_size);
293 
294 
295  for (distance_metric_descriptor* it = dmit; it < dmit_end; ++it)
296  {
297  distance_metric_descriptor& dmd = *it;
300  if (gfunc)
301  {
302  dmd.result += (*gfunc)(blk, blk + bm::set_block_size, arg_blk);
303  }
304  else
305  {
306  switch (dmd.metric)
307  {
308  case bm::COUNT_A:
309  if (blk)
311  break;
312  case bm::COUNT_B:
313  if (arg_blk)
314  dmd.result +=
315  bit_block_calc_count(arg_blk,
316  arg_blk + bm::set_block_size);
317  break;
318  default:
319  BM_ASSERT(0);
320  } // switch
321  }
322 
323  } // for it
324 }
325 
326 /*!
327 \brief Internal function computes AND distance.
328 \internal
329 \ingroup distance
330 */
331 inline
333  const bm::word_t* arg_blk)
334 {
335  unsigned gap = BM_IS_GAP(blk);
336  unsigned arg_gap = BM_IS_GAP(arg_blk);
337  if (gap) // first block GAP-type
338  {
339  if (arg_gap) // both blocks GAP-type
340  {
341  return gap_count_and(BMGAP_PTR(blk), BMGAP_PTR(arg_blk));
342  }
343  else // first block - GAP, argument - BITSET
344  {
345  return gap_bitset_and_count(arg_blk, BMGAP_PTR(blk));
346  }
347  }
348  else // first block is BITSET-type
349  {
350  if (arg_gap) // second argument block is GAP-type
351  {
352  return gap_bitset_and_count(blk, BMGAP_PTR(arg_blk));
353  }
354  }
355 
356  // --------------------------------------------
357  // Here we combine two plain bitblocks
358 
359  return bit_operation_and_count(blk, blk + (bm::set_block_size), arg_blk);
360 }
361 
362 
363 /*!
364  \brief Internal function computes different existense of distance metric.
365  \internal
366  \ingroup distance
367 */
368 inline
370  unsigned gap,
371  const bm::word_t* arg_blk,
372  int arg_gap,
374  distance_metric_descriptor* dmit_end)
375 
376 {
377  gap_word_t* res=0;
378 
379  gap_word_t* g1 = BMGAP_PTR(blk);
380  gap_word_t* g2 = BMGAP_PTR(arg_blk);
381 
382  if (gap) // first block GAP-type
383  {
384  if (arg_gap) // both blocks GAP-type
385  {
386  gap_word_t tmp_buf[bm::gap_max_buff_len * 3]; // temporary result
387 
388  for (distance_metric_descriptor* it = dmit;it < dmit_end; ++it)
389  {
390  distance_metric_descriptor& dmd = *it;
391  if (dmd.result)
392  {
393  continue;
394  }
395  res = 0;
396  unsigned dsize = 0;
397  switch (dmd.metric)
398  {
399  case bm::COUNT_AND:
400  dmd.result += gap_operation_any_and(g1, g2);
401  break;
402  case bm::COUNT_OR:
403  res = gap_operation_or(g1, g2, tmp_buf, dsize);
404  break;
405  case bm::COUNT_SUB_AB:
406  dmd.result += gap_operation_any_sub(g1, g2);
407  break;
408  case bm::COUNT_SUB_BA:
409  dmd.result += gap_operation_any_sub(g2, g1);
410  break;
411  case bm::COUNT_XOR:
412  dmd.result += gap_operation_any_xor(g1, g2);
413  break;
414  case bm::COUNT_A:
415  res = g1;
416  break;
417  case bm::COUNT_B:
418  res = g2;
419  break;
420  } // switch
421  if (res)
423 
424  } // for it
425 
426  return;
427 
428  }
429  else // first block - GAP, argument - BITSET
430  {
431  for (distance_metric_descriptor* it = dmit;it < dmit_end; ++it)
432  {
433  distance_metric_descriptor& dmd = *it;
434  if (dmd.result)
435  {
436  continue;
437  }
438 
439  switch (dmd.metric)
440  {
441  case bm::COUNT_AND:
442  if (arg_blk)
443  dmd.result += gap_bitset_and_any(arg_blk, g1);
444  break;
445  case bm::COUNT_OR:
446  if (!arg_blk)
448  else
449  dmd.result += gap_bitset_or_any(arg_blk, g1);
450  break;
451  case bm::COUNT_SUB_AB:
452  {
454  gap_convert_to_bitset((bm::word_t*) temp_blk, g1);
455  dmd.result +=
456  bit_operation_sub_any((bm::word_t*)temp_blk,
457  ((bm::word_t*)temp_blk) + bm::set_block_size,
458  arg_blk);
459  }
460  break;
461  case bm::COUNT_SUB_BA:
462  dmd.metric = bm::COUNT_SUB_AB; // recursive call to SUB_AB
464  arg_gap,
465  blk,
466  gap,
467  it, it+1);
468  dmd.metric = bm::COUNT_SUB_BA; // restore status quo
469  break;
470  case bm::COUNT_XOR:
471  if (!arg_blk)
473  else
474  dmd.result += gap_bitset_xor_any(arg_blk, g1);
475  break;
476  case bm::COUNT_A:
477  if (g1)
479  break;
480  case bm::COUNT_B:
481  if (arg_blk)
482  {
483  dmd.result +=
484  !bit_is_all_zero((bm::wordop_t*)arg_blk,
485  (bm::wordop_t*)(arg_blk + bm::set_block_size));
486  }
487  break;
488  } // switch
489 
490  } // for it
491 
492  return;
493 
494  }
495  }
496  else // first block is BITSET-type
497  {
498  if (arg_gap) // second argument block is GAP-type
499  {
500  for (distance_metric_descriptor* it = dmit;it < dmit_end; ++it)
501  {
502  distance_metric_descriptor& dmd = *it;
503  if (dmd.result)
504  {
505  continue;
506  }
507 
508  switch (dmd.metric)
509  {
510  case bm::COUNT_AND:
511  if (blk)
512  dmd.result += gap_bitset_and_any(blk, g2);
513  break;
514  case bm::COUNT_OR:
515  if (!blk)
517  else
518  dmd.result += gap_bitset_or_any(blk, g2);
519  break;
520  case bm::COUNT_SUB_AB:
521  if (blk)
522  dmd.result += gap_bitset_sub_any(blk, g2);
523  break;
524  case bm::COUNT_SUB_BA:
525  dmd.metric = bm::COUNT_SUB_AB; // recursive call to SUB_AB
527  arg_gap,
528  blk,
529  gap,
530  it, it+1);
531  dmd.metric = bm::COUNT_SUB_BA; // restore status quo
532  break;
533  case bm::COUNT_XOR:
534  if (!blk)
536  else
537  dmd.result += gap_bitset_xor_any(blk, g2);
538  break;
539  case bm::COUNT_A:
540  if (blk)
541  {
542  dmd.result+=
545  }
546  break;
547  case bm::COUNT_B:
548  if (g2)
550  break;
551  } // switch
552 
553  } // for it
554 
555  return;
556  }
557  }
558 
559  // --------------------------------------------
560  //
561  // Here we combine two plain bitblocks
562 
563  const bm::word_t* blk_end;
564  const bm::word_t* arg_end;
565 
566  blk_end = blk + (bm::set_block_size);
567  arg_end = arg_blk + (bm::set_block_size);
568 
569  for (distance_metric_descriptor* it = dmit; it < dmit_end; ++it)
570  {
571  distance_metric_descriptor& dmd = *it;
572  if (dmd.result)
573  {
574  continue;
575  }
576 
577  switch (dmd.metric)
578  {
579  case bm::COUNT_AND:
580  dmd.result +=
581  bit_operation_and_any(blk, blk_end, arg_blk);
582  break;
583  case bm::COUNT_OR:
584  dmd.result +=
585  bit_operation_or_any(blk, blk_end, arg_blk);
586  break;
587  case bm::COUNT_SUB_AB:
588  dmd.result +=
589  bit_operation_sub_any(blk, blk_end, arg_blk);
590  break;
591  case bm::COUNT_SUB_BA:
592  dmd.result +=
593  bit_operation_sub_any(arg_blk, arg_end, blk);
594  break;
595  case bm::COUNT_XOR:
596  dmd.result +=
597  bit_operation_xor_any(blk, blk_end, arg_blk);
598  break;
599  case bm::COUNT_A:
600  if (blk)
601  dmd.result += !bit_is_all_zero((bm::wordop_t*)blk,
602  (bm::wordop_t*)blk_end);
603  break;
604  case bm::COUNT_B:
605  if (arg_blk)
606  dmd.result += !bit_is_all_zero((bm::wordop_t*)arg_blk,
607  (bm::wordop_t*)arg_end);
608  break;
609  } // switch
610 
611  } // for it
612 }
613 
614 
615 
616 /*!
617  Convenience internal function to compute combine count for one metric
618  \internal
619  \ingroup distance
620 */
621 inline
623 // unsigned gap,
624  const bm::word_t* arg_blk,
625 // int arg_gap,
626  //bm::word_t* temp_blk,
628 {
629  distance_metric_descriptor dmd(metric);
631  arg_blk, //arg_gap,
632  //temp_blk,
633  &dmd, &dmd+1);
634  return dmd.result;
635 }
636 
637 
638 /*!
639  Convenience internal function to compute combine any for one metric
640  \internal
641  \ingroup distance
642 */
643 inline
645  unsigned gap,
646  const bm::word_t* arg_blk,
647  int arg_gap,
649 {
650  distance_metric_descriptor dmd(metric);
652  arg_blk, arg_gap,
653  &dmd, &dmd+1);
654  return dmd.result;
655 }
656 
657 /*!
658  \brief Staging function for distance operation
659 
660  \return temp block allocated (or NULL)
661 
662  \internal
663 */
664 inline
666  const distance_metric_descriptor* dmit_end,
667  bool* is_all_and)
668 {
669  for (const distance_metric_descriptor* it = dmit; it < dmit_end; ++it)
670  {
671  if (it->metric != bm::COUNT_AND)
672  {
673  *is_all_and = false;
674  }
675  } // for
676 }
677 
678 /*!
679  \brief Distance computing template function.
680 
681  Function receives two bitvectors and an array of distance metrics
682  (metrics pipeline). Function computes all metrics saves result into
683  corresponding pipeline results (distance_metric_descriptor::result)
684  An important detail is that function reuses metric descriptors,
685  incrementing received values. It allows you to accumulate results
686  from different calls in the pipeline.
687 
688  \param bv1 - argument bitvector 1 (A)
689  \param bv2 - argument bitvector 2 (B)
690  \param dmit - pointer to first element of metric descriptors array
691  Input-Output parameter, receives metric code as input,
692  computation is added to "result" field
693  \param dmit_end - pointer to (last+1) element of metric descriptors array
694  \ingroup distance
695 
696 */
697 
698 template<class BV>
699 void distance_operation(const BV& bv1,
700  const BV& bv2,
702  distance_metric_descriptor* dmit_end)
703 {
704  const typename BV::blocks_manager_type& bman1 = bv1.get_blocks_manager();
705  const typename BV::blocks_manager_type& bman2 = bv2.get_blocks_manager();
706 
707  bool is_all_and = true; // flag is distance operation is just COUNT_AND
708  distance_stage(dmit, dmit_end, &is_all_and);
709 
710  bm::word_t*** blk_root = bman1.top_blocks_root();
711  unsigned block_idx = 0;
712  unsigned i, j;
713 
714  const bm::word_t* blk;
715  const bm::word_t* arg_blk;
716 
718 
719  unsigned effective_top_block_size = bman1.effective_top_block_size();
720  unsigned ebs2 = bman2.effective_top_block_size();
721  if (ebs2 > effective_top_block_size)
722  effective_top_block_size = ebs2;
723 
724  for (i = 0; i < effective_top_block_size; ++i)
725  {
726  bm::word_t** blk_blk = blk_root ? blk_root[i] : 0;
727 
728  if (blk_blk == 0) // not allocated
729  {
730  // AND operation requested - we can skip this portion here
731  if (is_all_and)
732  {
733  block_idx += bm::set_array_size;
734  continue;
735  }
736  const bm::word_t* const* bvbb = bman2.get_topblock(i);
737  if (bvbb == 0)
738  {
739  block_idx += bm::set_array_size;
740  continue;
741  }
742 
743  blk = 0;
744  for (j = 0; j < bm::set_array_size; ++j,++block_idx)
745  {
746  arg_blk = bman2.get_block(i, j);
747  if (!arg_blk)
748  continue;
750  arg_blk,
751  dmit, dmit_end);
752  } // for j
753  continue;
754  }
755 
756  for (j = 0; j < bm::set_array_size; ++j, ++block_idx)
757  {
758  blk = BLOCK_ADDR_SAN(blk_blk[j]);
759  if (blk == 0 && is_all_and)
760  continue;
761 
762  arg_blk = bman2.get_block(i, j);
763 
764  if (blk == 0 && arg_blk == 0)
765  continue;
766 
768  arg_blk,
769  dmit, dmit_end);
770 
771 
772  } // for j
773 
774  } // for i
775 }
776 
777 
778 /*!
779 \brief Distance AND computing template function.
780 
781 
782 \param bv1 - argument bitvector 1 (A)
783 \param bv2 - argument bitvector 2 (B)
784 \ingroup distance
785 
786 */
787 
788 template<class BV>
789 unsigned distance_and_operation(const BV& bv1,
790  const BV& bv2)
791 {
792  const typename BV::blocks_manager_type& bman1 = bv1.get_blocks_manager();
793  const typename BV::blocks_manager_type& bman2 = bv2.get_blocks_manager();
794 
795  if (!bman1.is_init() || !bman2.is_init())
796  return 0;
797 
798  bm::word_t*** blk_root = bman1.top_blocks_root();
799  bm::word_t*** blk_root_arg = bman2.top_blocks_root();
800  unsigned count = 0;
801 
803 
804  unsigned effective_top_block_size =
805  bm::min_value(bman1.effective_top_block_size(),
806  bman2.effective_top_block_size());
807 
808  for (unsigned i = 0; i < effective_top_block_size; ++i)
809  {
810  bm::word_t** blk_blk;
811  bm::word_t** blk_blk_arg;
812  if ((blk_blk = blk_root[i]) == 0 || (blk_blk_arg= blk_root_arg[i]) == 0)
813  {
814  continue;
815  }
816  for (unsigned j = 0; j < bm::set_array_size; j+=4)
817  {
818  (blk_blk[j] && blk_blk_arg[j]) ?
819  count += combine_count_and_operation_with_block(BLOCK_ADDR_SAN(blk_blk[j]), BLOCK_ADDR_SAN(blk_blk_arg[j]))
820  :0;
821  (blk_blk[j+1] && blk_blk_arg[j+1]) ?
822  count += combine_count_and_operation_with_block(BLOCK_ADDR_SAN(blk_blk[j+1]), BLOCK_ADDR_SAN(blk_blk_arg[j+1]))
823  :0;
824  (blk_blk[j+2] && blk_blk_arg[j+2]) ?
825  count += combine_count_and_operation_with_block(BLOCK_ADDR_SAN(blk_blk[j+2]), BLOCK_ADDR_SAN(blk_blk_arg[j+2]))
826  :0;
827  (blk_blk[j+3] && blk_blk_arg[j+3]) ?
828  count += combine_count_and_operation_with_block(BLOCK_ADDR_SAN(blk_blk[j+3]), BLOCK_ADDR_SAN(blk_blk_arg[j+3]))
829  :0;
830  } // for j
831 
832  } // for i
833  return count;
834 }
835 
836 
837 
838 
839 /*!
840  \brief Distance screening template function.
841 
842  Function receives two bitvectors and an array of distance metrics
843  (metrics pipeline). Function computes possybility of a metric(any bit)
844  saves result into corresponding pipeline results
845  (distance_metric_descriptor::result)
846  An important detail is that function reuses metric descriptors,
847  incrementing received values. It allows you to accumulate results
848  from different calls in the pipeline.
849 
850  \param bv1 - argument bitvector 1 (A)
851  \param bv2 - argument bitvector 2 (B)
852  \param dmit - pointer to first element of metric descriptors array
853  Input-Output parameter, receives metric code as input,
854  computation is added to "result" field
855  \param dmit_end - pointer to (last+1) element of metric descriptors array
856  \ingroup distance
857 
858 */
859 
860 template<class BV>
861 void distance_operation_any(const BV& bv1,
862  const BV& bv2,
864  distance_metric_descriptor* dmit_end)
865 {
866  const typename BV::blocks_manager_type& bman1 = bv1.get_blocks_manager();
867  const typename BV::blocks_manager_type& bman2 = bv2.get_blocks_manager();
868 
869  bool is_all_and = true; // flag is distance operation is just COUNT_AND
870  distance_stage(dmit, dmit_end, &is_all_and);
871 
872  bm::word_t*** blk_root = bman1.top_blocks_root();
873  unsigned block_idx = 0;
874  unsigned i, j;
875 
876  const bm::word_t* blk;
877  const bm::word_t* arg_blk;
878  bool blk_gap;
879  bool arg_gap;
880 
882 
883  unsigned effective_top_block_size = bman1.effective_top_block_size();
884  unsigned ebs2 = bman2.effective_top_block_size();
885  if (ebs2 > effective_top_block_size)
886  effective_top_block_size = ebs2;
887 
888  for (i = 0; i < effective_top_block_size; ++i)
889  {
890  bm::word_t** blk_blk = blk_root ? blk_root[i] : 0;
891 
892  if (blk_blk == 0) // not allocated
893  {
894  // AND operation requested - we can skip this portion here
895  if (is_all_and)
896  {
897  block_idx += bm::set_array_size;
898  continue;
899  }
900 
901  const bm::word_t* const* bvbb = bman2.get_topblock(i);
902  if (bvbb == 0)
903  {
904  block_idx += bm::set_array_size;
905  continue;
906  }
907 
908  blk = 0;
909  blk_gap = false;
910 
911  for (j = 0; j < bm::set_array_size; ++j,++block_idx)
912  {
913  arg_blk = bman2.get_block(i, j);
914  if (!arg_blk)
915  continue;
916  arg_gap = BM_IS_GAP(arg_blk);
917 
919  arg_blk, arg_gap,
920  dmit, dmit_end);
921 
922  // check if all distance requests alredy resolved
923  bool all_resolved = false;
925  do
926  {
927  if (!it->result)
928  {
929  all_resolved = false;
930  break;
931  }
932  ++it;
933  } while (it < dmit_end);
934  if (all_resolved)
935  return;
936  } // for j
937 
938  continue;
939  }
940 
941  for (j = 0; j < bm::set_array_size; ++j, ++block_idx)
942  {
943  blk = BLOCK_ADDR_SAN(blk_blk[j]);
944  if (blk == 0 && is_all_and)
945  continue;
946 
947  arg_blk = bman2.get_block(i, j);
948 
949  if (blk == 0 && arg_blk == 0)
950  continue;
951 
952  arg_gap = BM_IS_GAP(arg_blk);
953  blk_gap = BM_IS_GAP(blk);
954 
956  arg_blk, arg_gap,
957  dmit, dmit_end);
958 
959  // check if all distance requests alredy resolved
960  bool all_resolved = false;
962  do
963  {
964  if (!it->result)
965  {
966  all_resolved = false;
967  break;
968  }
969  ++it;
970  } while (it < dmit_end);
971  if (all_resolved)
972  return;
973 
974  } // for j
975 
976  } // for i
977 }
978 
979 
980 
981 /*!
982  \brief Computes bitcount of AND operation of two bitsets
983  \param bv1 - Argument bit-vector.
984  \param bv2 - Argument bit-vector.
985  \return bitcount of the result
986  \ingroup distance
987 */
988 template<class BV>
989 bm::id_t count_and(const BV& bv1, const BV& bv2)
990 {
991  return distance_and_operation(bv1, bv2);
992 }
993 
994 /*!
995  \brief Computes if there is any bit in AND operation of two bitsets
996  \param bv1 - Argument bit-vector.
997  \param bv2 - Argument bit-vector.
998  \return non zero value if there is any bit
999  \ingroup distance
1000 */
1001 template<class BV>
1002 bm::id_t any_and(const BV& bv1, const BV& bv2)
1003 {
1005 
1006  distance_operation_any(bv1, bv2, &dmd, &dmd+1);
1007  return dmd.result;
1008 }
1009 
1010 
1011 
1012 /*!
1013  \brief Computes bitcount of XOR operation of two bitsets
1014  \param bv1 - Argument bit-vector.
1015  \param bv2 - Argument bit-vector.
1016  \return bitcount of the result
1017  \ingroup distance
1018 */
1019 template<class BV>
1020 bm::id_t count_xor(const BV& bv1, const BV& bv2)
1021 {
1023 
1024  distance_operation(bv1, bv2, &dmd, &dmd+1);
1025  return dmd.result;
1026 }
1027 
1028 /*!
1029  \brief Computes if there is any bit in XOR operation of two bitsets
1030  \param bv1 - Argument bit-vector.
1031  \param bv2 - Argument bit-vector.
1032  \return non zero value if there is any bit
1033  \ingroup distance
1034 */
1035 template<class BV>
1036 bm::id_t any_xor(const BV& bv1, const BV& bv2)
1037 {
1039 
1040  distance_operation_any(bv1, bv2, &dmd, &dmd+1);
1041  return dmd.result;
1042 }
1043 
1044 
1045 
1046 /*!
1047  \brief Computes bitcount of SUB operation of two bitsets
1048  \param bv1 - Argument bit-vector.
1049  \param bv2 - Argument bit-vector.
1050  \return bitcount of the result
1051  \ingroup distance
1052 */
1053 template<class BV>
1054 bm::id_t count_sub(const BV& bv1, const BV& bv2)
1055 {
1057 
1058  distance_operation(bv1, bv2, &dmd, &dmd+1);
1059  return dmd.result;
1060 }
1061 
1062 
1063 /*!
1064  \brief Computes if there is any bit in SUB operation of two bitsets
1065  \param bv1 - Argument bit-vector.
1066  \param bv2 - Argument bit-vector.
1067  \return non zero value if there is any bit
1068  \ingroup distance
1069 */
1070 template<class BV>
1071 bm::id_t any_sub(const BV& bv1, const BV& bv2)
1072 {
1074 
1075  distance_operation_any(bv1, bv2, &dmd, &dmd+1);
1076  return dmd.result;
1077 }
1078 
1079 
1080 /*!
1081  \brief Computes bitcount of OR operation of two bitsets
1082  \param bv1 - Argument bit-vector.
1083  \param bv2 - Argument bit-vector.
1084  \return bitcount of the result
1085  \ingroup distance
1086 */
1087 template<class BV>
1088 bm::id_t count_or(const BV& bv1, const BV& bv2)
1089 {
1091 
1092  distance_operation(bv1, bv2, &dmd, &dmd+1);
1093  return dmd.result;
1094 }
1095 
1096 /*!
1097  \brief Computes if there is any bit in OR operation of two bitsets
1098  \param bv1 - Argument bit-vector.
1099  \param bv2 - Argument bit-vector.
1100  \return non zero value if there is any bit
1101  \ingroup distance
1102 */
1103 template<class BV>
1104 bm::id_t any_or(const BV& bv1, const BV& bv2)
1105 {
1107 
1108  distance_operation_any(bv1, bv2, &dmd, &dmd+1);
1109  return dmd.result;
1110 }
1111 
1112 
1113 /**
1114  \brief Internal algorithms scans the input for the block range limit
1115  \internal
1116 */
1117 template<class It>
1118 It block_range_scan(It first, It last, unsigned nblock, unsigned* max_id)
1119 {
1120  It right;
1121  for (right = first; right != last; ++right)
1122  {
1123  unsigned v = unsigned(*right);
1124  BM_ASSERT(v < bm::id_max);
1125  if (v >= *max_id)
1126  *max_id = v;
1127  unsigned nb = v >> bm::set_block_shift;
1128  if (nb != nblock)
1129  break;
1130  }
1131  return right;
1132 }
1133 
1134 /**
1135  \brief OR Combine bitvector and the iterable sequence
1136 
1137  Algorithm combines bvector with sequence of integers
1138  (represents another set). When the agrument set is sorted
1139  this method can give serious increase in performance.
1140 
1141  \param bv - destination bitvector
1142  \param first - first element of the iterated sequence
1143  \param last - last element of the iterated sequence
1144 
1145  \ingroup setalgo
1146 */
1147 template<class BV, class It>
1148 void combine_or(BV& bv, It first, It last)
1149 {
1150  typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
1151  if (!bman.is_init())
1152  bman.init_tree();
1153 
1154  unsigned max_id = 0;
1155 
1156  while (first < last)
1157  {
1158  unsigned nblock = unsigned((*first) >> bm::set_block_shift);
1159  It right = block_range_scan(first, last, nblock, &max_id);
1160 
1161  if (max_id >= bv.size())
1162  {
1163  BM_ASSERT(max_id < bm::id_max);
1164  bv.resize(max_id + 1);
1165  }
1166 
1167  // now we have one in-block array of bits to set
1168 
1169  label1:
1170 
1171  int block_type;
1172  bm::word_t* blk =
1173  bman.check_allocate_block(nblock,
1174  true,
1175  bv.get_new_blocks_strat(),
1176  &block_type);
1177  if (!blk)
1178  continue;
1179 
1180  if (block_type == 1) // gap
1181  {
1182  bm::gap_word_t* gap_blk = BMGAP_PTR(blk);
1183  unsigned threshold = bm::gap_limit(gap_blk, bman.glen());
1184 
1185  for (; first < right; ++first)
1186  {
1187  unsigned is_set;
1188  unsigned nbit = (*first) & bm::set_block_mask;
1189 
1190  unsigned new_block_len =
1191  gap_set_value(true, gap_blk, nbit, &is_set);
1192  if (new_block_len > threshold)
1193  {
1194  bman.extend_gap_block(nblock, gap_blk);
1195  ++first;
1196  goto label1; // block pointer changed - goto reset
1197  }
1198  }
1199  }
1200  else // bit block
1201  {
1202  for (; first < right; ++first)
1203  {
1204  unsigned nbit = unsigned(*first & bm::set_block_mask);
1205  unsigned nword = unsigned(nbit >> bm::set_word_shift);
1206  nbit &= bm::set_word_mask;
1207  blk[nword] |= (1u << nbit);
1208  } // for
1209  }
1210  } // while
1211 
1212  bv.forget_count();
1213 }
1214 
1215 
1216 /**
1217  \brief XOR Combine bitvector and the iterable sequence
1218 
1219  Algorithm combines bvector with sequence of integers
1220  (represents another set). When the agrument set is sorted
1221  this method can give serious increase in performance.
1222 
1223  \param bv - destination bitvector
1224  \param first - first element of the iterated sequence
1225  \param last - last element of the iterated sequence
1226 
1227  \ingroup setalgo
1228 */
1229 template<class BV, class It>
1230 void combine_xor(BV& bv, It first, It last)
1231 {
1232  typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
1233  if (!bman.is_init())
1234  bman.init_tree();
1235 
1236  unsigned max_id = 0;
1237 
1238  while (first < last)
1239  {
1240  unsigned nblock = unsigned((*first) >> bm::set_block_shift);
1241  It right = block_range_scan(first, last, nblock, &max_id);
1242 
1243  if (max_id >= bv.size())
1244  {
1245  BM_ASSERT(max_id < bm::id_max);
1246  bv.resize(max_id + 1);
1247  }
1248 
1249  // now we have one in-block array of bits to set
1250 
1251  label1:
1252 
1253  int block_type;
1254  bm::word_t* blk =
1255  bman.check_allocate_block(nblock,
1256  true,
1257  bv.get_new_blocks_strat(),
1258  &block_type,
1259  false /* no null return */);
1260  BM_ASSERT(blk);
1261 
1262  if (block_type == 1) // gap
1263  {
1264  bm::gap_word_t* gap_blk = BMGAP_PTR(blk);
1265  unsigned threshold = bm::gap_limit(gap_blk, bman.glen());
1266 
1267  for (; first < right; ++first)
1268  {
1269  unsigned is_set;
1270  unsigned nbit = (*first) & bm::set_block_mask;
1271 
1272  is_set = bm::gap_test_unr(gap_blk, nbit);
1273  BM_ASSERT(is_set <= 1);
1274  is_set ^= 1;
1275 
1276  unsigned new_block_len =
1277  gap_set_value(is_set, gap_blk, nbit, &is_set);
1278  if (new_block_len > threshold)
1279  {
1280  bman.extend_gap_block(nblock, gap_blk);
1281  ++first;
1282  goto label1; // block pointer changed - goto reset
1283  }
1284  }
1285  }
1286  else // bit block
1287  {
1288  for (; first < right; ++first)
1289  {
1290  unsigned nbit = unsigned(*first & bm::set_block_mask);
1291  unsigned nword = unsigned(nbit >> bm::set_word_shift);
1292  nbit &= bm::set_word_mask;
1293  blk[nword] ^= (bm::word_t)1 << nbit;
1294  } // for
1295  }
1296  } // while
1297 
1298  bv.forget_count();
1299 }
1300 
1301 
1302 
1303 /**
1304  \brief SUB Combine bitvector and the iterable sequence
1305 
1306  Algorithm combines bvector with sequence of integers
1307  (represents another set). When the agrument set is sorted
1308  this method can give serious increase in performance.
1309 
1310  \param bv - destination bitvector
1311  \param first - first element of the iterated sequence
1312  \param last - last element of the iterated sequence
1313 
1314  \ingroup setalgo
1315 */
1316 template<class BV, class It>
1317 void combine_sub(BV& bv, It first, It last)
1318 {
1319  typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
1320  if (!bman.is_init())
1321  bman.init_tree();
1322 
1323  unsigned max_id = 0;
1324 
1325  while (first < last)
1326  {
1327  unsigned nblock = unsigned((*first) >> bm::set_block_shift);
1328  It right = block_range_scan(first, last, nblock, &max_id);
1329 
1330  if (max_id >= bv.size())
1331  {
1332  BM_ASSERT(max_id < bm::id_max);
1333  bv.resize(max_id + 1);
1334  }
1335 
1336  // now we have one in-block array of bits to set
1337 
1338  label1:
1339 
1340  int block_type;
1341  bm::word_t* blk =
1342  bman.check_allocate_block(nblock,
1343  false,
1344  bv.get_new_blocks_strat(),
1345  &block_type);
1346 
1347  if (!blk)
1348  continue;
1349 
1350  if (block_type == 1) // gap
1351  {
1352  bm::gap_word_t* gap_blk = BMGAP_PTR(blk);
1353  unsigned threshold = bm::gap_limit(gap_blk, bman.glen());
1354 
1355  for (; first < right; ++first)
1356  {
1357  unsigned is_set;
1358  unsigned nbit = (*first) & bm::set_block_mask;
1359 
1360  is_set = bm::gap_test_unr(gap_blk, nbit);
1361  if (!is_set)
1362  continue;
1363 
1364  unsigned new_block_len =
1365  gap_set_value(false, gap_blk, nbit, &is_set);
1366  if (new_block_len > threshold)
1367  {
1368  bman.extend_gap_block(nblock, gap_blk);
1369  ++first;
1370  goto label1; // block pointer changed - goto reset
1371  }
1372  }
1373  }
1374  else // bit block
1375  {
1376  for (; first < right; ++first)
1377  {
1378  unsigned nbit = unsigned(*first & bm::set_block_mask);
1379  unsigned nword = unsigned(nbit >> bm::set_word_shift);
1380  nbit &= bm::set_word_mask;
1381  blk[nword] &= ~((bm::word_t)1 << nbit);
1382  } // for
1383  }
1384  } // while
1385 
1386  bv.forget_count();
1387 }
1388 
1389 /**
1390  \brief AND Combine bitvector and the iterable sequence
1391 
1392  Algorithm combines bvector with sorted sequence of integers
1393  (represents another set).
1394 
1395  \param bv - destination bitvector
1396  \param first - first element of the iterated sequence
1397  \param last - last element of the iterated sequence
1398 
1399  \ingroup setalgo
1400 */
1401 template<class BV, class It>
1402 void combine_and_sorted(BV& bv, It first, It last)
1403 {
1404  bm::id_t prev = 0;
1405  for ( ;first < last; ++first)
1406  {
1407  bm::id_t id = *first;
1408  BM_ASSERT(id >= prev); // make sure it's sorted
1409  bv.set_bit_and(id, true);
1410  if (++prev < id)
1411  {
1412  bv.set_range(prev, id-1, false);
1413  }
1414  prev = id;
1415  }
1416 }
1417 
1418 
1419 /**
1420  \brief AND Combine bitvector and the iterable sequence
1421 
1422  Algorithm combines bvector with sequence of integers
1423  (represents another set). When the agrument set is sorted
1424  this method can give serious increase in performance.
1425 
1426  \param bv - destination bitvector
1427  \param first - first element of the iterated sequence
1428  \param last - last element of the iterated sequence
1429 
1430  \ingroup setalgo
1431  \sa combine_and_sorted
1432 */
1433 template<class BV, class It>
1434 void combine_and(BV& bv, It first, It last)
1435 {
1436  BV bv_tmp;
1437  combine_or(bv_tmp, first, last);
1438  bv &= bv_tmp;
1439 }
1440 
1441 /*!
1442  \brief Compute number of bit intervals (GAPs) in the bitvector
1443 
1444  Algorithm traverses bit vector and count number of uninterrupted
1445  intervals of 1s and 0s.
1446  <pre>
1447  For example:
1448  empty vector - 1 interval
1449  00001111100000 - gives us 3 intervals
1450  10001111100000 - 4 intervals
1451  00000000000000 - 1 interval
1452  11111111111111 - 1 interval
1453  </pre>
1454  \return Number of intervals
1455  \ingroup setalgo
1456 */
1457 template<class BV>
1459 {
1460  const typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
1461 
1462  if (!bman.is_init())
1463  return 1;
1464 
1465  bm::word_t*** blk_root = bman.top_blocks_root();
1466  typename BV::blocks_manager_type::block_count_change_func func(bman);
1467  for_each_block(blk_root, bman.top_block_size(), func);
1468 
1469  return func.count();
1470 }
1471 
1472 /*!
1473  \brief Export bitset from an array of binary data representing
1474  the bit vector.
1475 
1476  Input array can be array of ints, chars or other native C types.
1477  Method works with iterators, which makes it compatible with
1478  STL containers and C arrays
1479 
1480  \param bv - destination bitvector
1481  \param first - first element of the iterated sequence
1482  \param last - last element of the iterated sequence
1483 
1484  \ingroup setalgo
1485 */
1486 template<class BV, class It>
1487 void export_array(BV& bv, It first, It last)
1488 {
1489  typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
1490  if (!bman.is_init())
1491  bman.init_tree();
1492 
1493  unsigned inp_word_size = sizeof(*first);
1494  size_t array_size = last - first;
1495  size_t bit_cnt = array_size * inp_word_size * 8;
1496  int block_type;
1497  bm::word_t tmp;
1498  unsigned b1, b2, b3, b4;
1499 
1500  if (bit_cnt >= bv.size())
1501  bv.resize((bm::id_t)bit_cnt + 1);
1502  else
1503  bv.set_range((bm::id_t)bit_cnt, bv.size() - 1, false);
1504 
1505  switch (inp_word_size)
1506  {
1507  case 1:
1508  {
1509  size_t word_cnt = array_size / 4;
1510  for (unsigned i = 0; i < bm::set_total_blocks; ++i)
1511  {
1512  bm::word_t* blk =
1513  bman.check_allocate_block(i,
1514  false,
1515  BM_BIT,
1516  &block_type,
1517  false /*no NULL ret*/);
1518  if (block_type == 1) // gap
1519  {
1520  blk = bman.convert_gap2bitset(i, BMGAP_PTR(blk));
1521  }
1522 
1523  bm::word_t* wrd_ptr = blk;
1524  if (word_cnt >= bm::set_block_size) {
1525  bm::word_t* wrd_end = blk + bm::set_block_size;
1526  do {
1527  b1 = *first++; b2 = *first++;
1528  b3 = *first++; b4 = *first++;
1529  tmp = b1 | (b2 << 8) | (b3 << 16) | (b4 << 24);
1530  *wrd_ptr++ = tmp;
1531  } while (wrd_ptr < wrd_end);
1532  word_cnt -= bm::set_block_size;
1533  }
1534  else
1535  {
1536  size_t to_convert = last - first;
1537  for (size_t j = 0; j < to_convert / 4; ++j)
1538  {
1539  b1 = *first++; b2 = *first++;
1540  b3 = *first++; b4 = *first++;
1541  tmp = b1 | (b2 << 8) | (b3 << 16) | (b4 << 24);
1542  *wrd_ptr++ = tmp;
1543  }
1544  while (1)
1545  {
1546  if (first == last) break;
1547  *wrd_ptr = *first++;
1548  if (first == last) break;
1549  *wrd_ptr |= (*first++) << 8;
1550  if (first == last) break;
1551  *wrd_ptr |= (*first++) << 16;
1552  if (first == last) break;
1553  *wrd_ptr |= (*first++) << 24;
1554  ++wrd_ptr;
1555  }
1556  }
1557  if (first == last) break;
1558  } // for
1559  }
1560  break;
1561  case 2:
1562  {
1563  size_t word_cnt = array_size / 2;
1564  for (unsigned i = 0; i < bm::set_total_blocks; ++i)
1565  {
1566  bm::word_t* blk =
1567  bman.check_allocate_block(i,
1568  false,
1569  BM_BIT,
1570  &block_type,
1571  false /*no NULL ret*/);
1572  if (block_type == 1) // gap
1573  {
1574  blk = bman.convert_gap2bitset(i, BMGAP_PTR(blk));
1575  }
1576 
1577  bm::word_t* wrd_ptr = blk;
1578  if (word_cnt >= bm::set_block_size) {
1579  bm::word_t* wrd_end = blk + bm::set_block_size;
1580  do {
1581  b1 = *first++; b2 = *first++;
1582  tmp = b1 | (b2 << 16);
1583  *wrd_ptr++ = tmp;
1584  } while (wrd_ptr < wrd_end);
1585  word_cnt -= bm::set_block_size;
1586  }
1587  else
1588  {
1589  size_t to_convert = last - first;
1590  for (unsigned j = 0; j < to_convert / 2; ++j)
1591  {
1592  b1 = *first++; b2 = *first++;
1593  tmp = b1 | (b2 << 16);
1594  *wrd_ptr++ = tmp;
1595  }
1596  while (1)
1597  {
1598  if (first == last) break;
1599  *wrd_ptr = *first++;
1600  if (first == last) break;
1601  *wrd_ptr |= (*first++) << 16;
1602  ++wrd_ptr;
1603  }
1604  }
1605  if (first == last) break;
1606  } // for
1607  }
1608  break;
1609  case 4:
1610  {
1611  size_t word_cnt = array_size;
1612  for (unsigned i = 0; i < bm::set_total_blocks; ++i)
1613  {
1614  bm::word_t* blk =
1615  bman.check_allocate_block(i,
1616  false,
1617  BM_BIT,
1618  &block_type,
1619  false /*no NULL ret*/);
1620  if (block_type == 1) // gap
1621  {
1622  blk = bman.convert_gap2bitset(i, BMGAP_PTR(blk));
1623  }
1624 
1625  bm::word_t* wrd_ptr = blk;
1626  if (word_cnt >= bm::set_block_size) {
1627  bm::word_t* wrd_end = blk + bm::set_block_size;
1628  do {
1629  *wrd_ptr++ = *first++;
1630  } while (wrd_ptr < wrd_end);
1631  word_cnt -= bm::set_block_size;
1632  }
1633  else
1634  {
1635  while (1)
1636  {
1637  if (first == last) break;
1638  *wrd_ptr = *first++;
1639  ++wrd_ptr;
1640  }
1641  }
1642  if (first == last) break;
1643  } // for
1644  }
1645  break;
1646  default:
1647  BM_ASSERT(0);
1648  } // switch
1649 
1650 }
1651 
1652 
1653 /*!
1654  \brief for-each visitor, calls a special visitor functor for each 1 bit group
1655 
1656  \param block - bit block buffer pointer
1657  \param offset - global block offset (number of bits)
1658  \param bit_functor - functor must support .add_bits(offset, bits_ptr, size)
1659 
1660  @ingroup bitfunc
1661  @internal
1662 */
1663 template<class Func>
1664 void for_each_bit_blk(const bm::word_t* block, bm::id_t offset,
1665  Func& bit_functor)
1666 {
1667  unsigned char bits[bm::set_bitscan_wave_size*32];
1668 
1669  unsigned offs = offset;
1670  unsigned cnt;
1671 
1672  const word_t* block_end = block + bm::set_block_size;
1673  for ( ;block < block_end; )
1674  {
1675  cnt = bm::bitscan_wave(block, bits);
1676 
1677  bit_functor.add_bits(offs, bits, cnt);
1678 
1679  offs += bm::set_bitscan_wave_size * 32;
1680  block += bm::set_bitscan_wave_size;
1681  } // for
1682 }
1683 
1684 
1685 /*!
1686  \brief for-each visitor, calls a special visitor functor for each 1 bit range
1687 
1688  \param block - bit block buffer pointer
1689  \param offset - global block offset (number of bits)
1690  \param bit_functor - functor must support .add_range(offset, bits_ptr, size)
1691 
1692  @ingroup gapfunc
1693  @internal
1694 */
1695 template<typename T, typename Func>
1696 void for_each_gap_blk(const T* buf, bm::id_t offset,
1697  Func& bit_functor)
1698 {
1699  const T* pcurr = buf + 1;
1700  const T* pend = buf + (*buf >> 3);
1701 
1702  if (*buf & 1)
1703  {
1704  bit_functor.add_range(offset, *pcurr + 1);
1705  ++pcurr;
1706  }
1707  for (++pcurr; pcurr <= pend; pcurr += 2)
1708  {
1709  T prev = *(pcurr-1);
1710  bit_functor.add_range(offset + prev + 1, *pcurr - prev);
1711  }
1712 }
1713 
1714 
1715 
1716 
1717 
1718 
1719 } // namespace bm
1720 
1721 #ifdef _MSC_VER
1722 #pragma warning( pop )
1723 #endif
1724 
1725 #endif
BMFORCEINLINE unsigned gap_count_and(const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2)
GAP bitcount AND operation test.
Definition: bmfunc.h:3545
bm::id_t bit_operation_xor_any(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src1_end, const bm::word_t *BMRESTRICT src2)
Performs bitblock XOR operation test.
Definition: bmfunc.h:4675
#define BM_VECT_ALIGN
Definition: bmdef.h:250
(A & B).count()
Definition: bmalgo_impl.h:53
const unsigned set_block_size
Definition: bmconst.h:44
set_operation
Nomenclature of set operations.
Definition: bmfunc.h:231
const unsigned set_word_shift
Definition: bmconst.h:54
bm::id_t bit_operation_or_any(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src1_end, const bm::word_t *BMRESTRICT src2)
Performs bitblock OR operation test.
Definition: bmfunc.h:4346
unsigned distance_and_operation(const BV &bv1, const BV &bv2)
Distance AND computing template function.
Definition: bmalgo_impl.h:789
bm::id_t any_and(const BV &bv1, const BV &bv2)
Computes if there is any bit in AND operation of two bitsets.
Definition: bmalgo_impl.h:1002
bm::id_t any_xor(const BV &bv1, const BV &bv2)
Computes if there is any bit in XOR operation of two bitsets.
Definition: bmalgo_impl.h:1036
BMFORCEINLINE unsigned gap_operation_any_and(const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2)
GAP AND operation test.
Definition: bmfunc.h:3528
unsigned gap_set_value(unsigned val, T *BMRESTRICT buf, unsigned pos, unsigned *BMRESTRICT is_set)
Sets or clears bit in the GAP buffer.
Definition: bmfunc.h:1359
void combine_any_operation_with_block(const bm::word_t *blk, unsigned gap, const bm::word_t *arg_blk, int arg_gap, distance_metric_descriptor *dmit, distance_metric_descriptor *dmit_end)
Internal function computes different existense of distance metric.
Definition: bmalgo_impl.h:369
bm::id_t gap_bitset_and_count(const unsigned *block, const T *buf)
Compute bitcount of bit block AND masked by GAP block.
Definition: bmfunc.h:1998
bm::id_t gap_bitset_and_any(const unsigned *block, const T *buf)
Bitcount test of bit block AND masked by GAP block.
Definition: bmfunc.h:2030
unsigned combine_count_and_operation_with_block(const bm::word_t *blk, const bm::word_t *arg_blk)
Internal function computes AND distance.
Definition: bmalgo_impl.h:332
const unsigned short set_bitscan_wave_size
Definition: bmfunc.h:5528
void distance_stage(const distance_metric_descriptor *dmit, const distance_metric_descriptor *dmit_end, bool *is_all_and)
Staging function for distance operation.
Definition: bmalgo_impl.h:665
(A - B).count()
Definition: bmalgo_impl.h:56
Definition: bm.h:59
bm::id_t gap_bitset_sub_any(const unsigned *block, const T *buf)
Compute bitcount test of bit block SUB masked by GAP block.
Definition: bmfunc.h:2104
BMFORCEINLINE unsigned gap_count_or(const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2)
GAP bitcount OR operation test.
Definition: bmfunc.h:3656
void gap_convert_to_bitset(unsigned *dest, const T *buf)
GAP block to bitblock conversion.
Definition: bmfunc.h:2343
void combine_or(BV &bv, It first, It last)
OR Combine bitvector and the iterable sequence.
Definition: bmalgo_impl.h:1148
void combine_sub(BV &bv, It first, It last)
SUB Combine bitvector and the iterable sequence.
Definition: bmalgo_impl.h:1317
(A ^ B).count()
Definition: bmalgo_impl.h:54
bm::id_t count_xor(const BV &bv1, const BV &bv2)
Computes bitcount of XOR operation of two bitsets.
Definition: bmalgo_impl.h:1020
bool gap_is_all_zero(const T *buf, unsigned set_max)
Temporary inverts all bits in the GAP buffer.
Definition: bmfunc.h:2537
const unsigned id_max
Definition: bmconst.h:40
(B - A).count()
Definition: bmalgo_impl.h:57
void distance_operation_any(const BV &bv1, const BV &bv2, distance_metric_descriptor *dmit, distance_metric_descriptor *dmit_end)
Distance screening template function.
Definition: bmalgo_impl.h:861
Distance metric descriptor, holds metric code and result.
Definition: bmalgo_impl.h:81
B.count()
Definition: bmalgo_impl.h:59
T min_value(T v1, T v2)
Get minimum of 2 values.
Definition: bmutil.h:47
bm::id_t gap_bitset_xor_count(const unsigned *block, const T *buf)
Compute bitcount of bit block XOR masked by GAP block.
Definition: bmfunc.h:2143
distance_metric_descriptor(distance_metric m)
Definition: bmalgo_impl.h:86
bm::id_t bit_operation_and_count(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src1_end, const bm::word_t *BMRESTRICT src2)
Performs bitblock AND operation and calculates bitcount of the result.
Definition: bmfunc.h:4185
void for_each_gap_blk(const T *buf, bm::id_t offset, Func &bit_functor)
for-each visitor, calls a special visitor functor for each 1 bit range
Definition: bmalgo_impl.h:1696
bool bit_is_all_zero(const bm::wordop_t *start, const bm::wordop_t *end)
Returns "true" if all bits in the block are 0.
Definition: bmfunc.h:3438
BMFORCEINLINE unsigned gap_count_xor(const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2)
GAP bitcount XOR operation test.
Definition: bmfunc.h:3611
unsigned int word_t
Definition: bmconst.h:35
#define BM_VECT_ALIGN_ATTR
Definition: bmdef.h:251
bm::id_t bit_operation_and_any(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src1_end, const bm::word_t *BMRESTRICT src2)
Performs bitblock AND operation test.
Definition: bmfunc.h:4208
void distance_operation(const BV &bv1, const BV &bv2, distance_metric_descriptor *dmit, distance_metric_descriptor *dmit_end)
Distance computing template function.
Definition: bmalgo_impl.h:699
void combine_xor(BV &bv, It first, It last)
XOR Combine bitvector and the iterable sequence.
Definition: bmalgo_impl.h:1230
(A | B).count()
Definition: bmalgo_impl.h:55
A.count()
Definition: bmalgo_impl.h:58
const unsigned gap_max_buff_len
Definition: bmconst.h:62
No GAP compression strategy. All new blocks are bit blocks.
Definition: bmconst.h:104
bm::id_t count_intervals(const BV &bv)
Compute number of bit intervals (GAPs) in the bitvector.
Definition: bmalgo_impl.h:1458
unsigned gap_test_unr(const T *buf, const unsigned pos)
Tests if bit = pos is true. Analog of bm::gap_test with SIMD unrolling.
Definition: bmfunc.h:525
unsigned short gap_word_t
Definition: bmconst.h:60
bm::id_t count_sub(const BV &bv1, const BV &bv2)
Computes bitcount of SUB operation of two bitsets.
Definition: bmalgo_impl.h:1054
BMFORCEINLINE unsigned gap_operation_any_sub(const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2)
GAP SUB operation test.
Definition: bmfunc.h:3706
gap_word_t * gap_operation_or(const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2, gap_word_t *BMRESTRICT tmp_buf, unsigned &dsize)
GAP OR operation.
Definition: bmfunc.h:3636
bm::id_t bit_operation_sub_count(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src1_end, const bm::word_t *BMRESTRICT src2)
Performs bitblock SUB operation and calculates bitcount of the result.
Definition: bmfunc.h:4233
BMFORCEINLINE unsigned gap_count_sub(const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2)
GAP bitcount SUB (AND NOT) operation test.
Definition: bmfunc.h:3723
bm::id_t count_or(const BV &bv1, const BV &bv2)
Computes bitcount of OR operation of two bitsets.
Definition: bmalgo_impl.h:1088
static bit_operation_count_func_type bit_operation_count(unsigned i)
Definition: bmfunc.h:5482
bm::id_t gap_bitset_xor_any(const unsigned *block, const T *buf)
Compute bitcount test of bit block XOR masked by GAP block.
Definition: bmfunc.h:2182
bm::id_t any_sub(const BV &bv1, const BV &bv2)
Computes if there is any bit in SUB operation of two bitsets.
Definition: bmalgo_impl.h:1071
void combine_and_sorted(BV &bv, It first, It last)
AND Combine bitvector and the iterable sequence.
Definition: bmalgo_impl.h:1402
bm::id_t(* bit_operation_count_func_type)(const bm::word_t *BMRESTRICT, const bm::word_t *BMRESTRICT, const bm::word_t *BMRESTRICT)
Definition: bmfunc.h:5454
bm::id_t gap_bitset_or_any(const unsigned *block, const T *buf)
Compute bitcount test of bit block OR masked by GAP block.
Definition: bmfunc.h:2274
void combine_and(BV &bv, It first, It last)
AND Combine bitvector and the iterable sequence.
Definition: bmalgo_impl.h:1434
const unsigned gap_max_bits
Definition: bmconst.h:63
#define BM_IS_GAP(ptr)
Definition: bmdef.h:161
distance_metric operation2metric(set_operation op)
Convert set operation into compatible distance metric.
Definition: bmalgo_impl.h:67
BMFORCEINLINE unsigned gap_operation_any_xor(const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2)
GAP XOR operation test.
Definition: bmfunc.h:3595
#define BM_SET_MMX_GUARD
Definition: bmdef.h:201
const unsigned set_block_mask
Definition: bmconst.h:46
void export_array(BV &bv, It first, It last)
Export bitset from an array of binary data representing the bit vector.
Definition: bmalgo_impl.h:1487
#define BMGAP_PTR(ptr)
Definition: bmdef.h:159
bm::id_t bit_block_calc_count(const bm::word_t *block, const bm::word_t *block_end)
Bitcount for bit string.
Definition: bmfunc.h:2912
const unsigned set_array_size
Definition: bmconst.h:72
unsigned gap_bit_count_unr(const T *buf)
Calculates number of bits ON in GAP buffer. Loop unrolled version.
Definition: bmfunc.h:804
void for_each_block(T ***root, unsigned size1, F &f)
Definition: bmfunc.h:708
const unsigned set_word_mask
Definition: bmconst.h:55
bm::id_t gap_bitset_or_count(const unsigned *block, const T *buf)
Compute bitcount of bit block OR masked by GAP block.
Definition: bmfunc.h:2226
unsigned short bitscan_wave(const bm::word_t *w_ptr, unsigned char *bits)
Unpacks word wave (2x 32-bit words)
Definition: bmfunc.h:5539
unsigned int id_t
Definition: bmconst.h:34
unsigned gap_limit(const T *buf, const T *glevel_len)
Returs GAP block capacity limit.
Definition: bmfunc.h:2593
void combine_count_operation_with_block(const bm::word_t *blk, const bm::word_t *arg_blk, distance_metric_descriptor *dmit, distance_metric_descriptor *dmit_end)
Internal function computes different distance metrics.
Definition: bmalgo_impl.h:113
#define BM_ASSERT
Definition: bmdef.h:112
const unsigned set_total_blocks
Definition: bmconst.h:75
id64_t wordop_t
Definition: bmconst.h:83
distance_metric
Distance metrics codes defined for vectors A and B.
Definition: bmalgo_impl.h:51
void for_each_bit_blk(const bm::word_t *block, bm::id_t offset, Func &bit_functor)
for-each visitor, calls a special visitor functor for each 1 bit group
Definition: bmalgo_impl.h:1664
bm::id_t gap_bitset_sub_count(const unsigned *block, const T *buf)
Compute bitcount of bit block SUB masked by GAP block.
Definition: bmfunc.h:2070
const unsigned set_block_shift
Definition: bmconst.h:45
bool is_const_set_operation(set_operation op)
Returns true if set operation is constant (bitcount)
Definition: bmfunc.h:252
bm::id_t count_and(const BV &bv1, const BV &bv2)
Computes bitcount of AND operation of two bitsets.
Definition: bmalgo_impl.h:989
void reset()
Sets metric result to 0.
Definition: bmalgo_impl.h:98
It block_range_scan(It first, It last, unsigned nblock, unsigned *max_id)
Internal algorithms scans the input for the block range limit.
Definition: bmalgo_impl.h:1118
bm::id_t bit_operation_sub_any(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src1_end, const bm::word_t *BMRESTRICT src2)
Performs bitblock test of SUB operation.
Definition: bmfunc.h:4284
#define BLOCK_ADDR_SAN(addr)
Definition: bmdef.h:133
bm::id_t any_or(const BV &bv1, const BV &bv2)
Computes if there is any bit in OR operation of two bitsets.
Definition: bmalgo_impl.h:1104