BitMagic-C++
bmalgo_impl.h
Go to the documentation of this file.
1 #ifndef BMALGO_IMPL__H__INCLUDED__
2 #define BMALGO_IMPL__H__INCLUDED__
3 /*
4 Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 For more information please visit: http://bitmagic.io
19 */
20 
21 /*! \file bmalgo_impl.h
22  \brief Algorithms for bvector<>
23 */
24 
25 #ifdef _MSC_VER
26 #pragma warning( push )
27 #pragma warning( disable : 4311 4312 4127)
28 #endif
29 
30 #include "bmdef.h"
31 #include "bmutil.h"
32 
33 namespace bm
34 {
35 
36 /*!
37  @defgroup setalgo bvector<> algorithms
38 
39  Set algebra algorithms using bit-vectors, arrays.
40  Binary metrics and distances. Random sub-sets.
41 
42  @ingroup bvector
43  */
44 
45 /*!
46  @defgroup distance Binary-distance metrics
47 
48  Distance metrics and algorithms to compute binary distances
49  @ingroup setalgo
50  */
51 
52 
53 /*!
54  \brief Distance metrics codes defined for vectors A and B
55  \ingroup distance
56 */
58 {
59  COUNT_AND = set_COUNT_AND, //!< (A & B).count()
60  COUNT_XOR = set_COUNT_XOR, //!< (A ^ B).count()
61  COUNT_OR = set_COUNT_OR, //!< (A | B).count()
62  COUNT_SUB_AB = set_COUNT_SUB_AB, //!< (A - B).count()
63  COUNT_SUB_BA = set_COUNT_SUB_BA, //!< (B - A).count()
64  COUNT_A = set_COUNT_A, //!< A.count()
65  COUNT_B = set_COUNT_B //!< B.count()
66 };
67 
68 /**
69  Convert set operation into compatible distance metric
70  \ingroup distance
71 */
72 inline
74 {
76  if (op == set_COUNT) op = set_COUNT_B;
77  // distance metric is created as a set operation sub-class
78  // simple cast will work to convert
79  return (distance_metric) op;
80 }
81 
82 /*!
83  \brief Distance metric descriptor, holds metric code and result.
84  \sa distance_operation
85 */
86 
88 {
91 
93  : metric(m),
94  result(0)
95  {}
97  : metric(bm::COUNT_XOR),
98  result(0)
99  {}
100 
101  /*!
102  \brief Sets metric result to 0
103  */
104  void reset()
105  {
106  result = 0;
107  }
108 };
109 
110 
111 
112 /*!
113  \brief Internal function computes different distance metrics.
114  \internal
115  \ingroup distance
116 
117 */
118 inline
120  const bm::word_t* arg_blk,
122  distance_metric_descriptor* dmit_end)
123 
124 {
125  gap_word_t* g1 = BMGAP_PTR(blk);
126  gap_word_t* g2 = BMGAP_PTR(arg_blk);
127 
128  unsigned gap = BM_IS_GAP(blk);
129  unsigned arg_gap = BM_IS_GAP(arg_blk);
130 
131  if (gap) // first block GAP-type
132  {
133  if (arg_gap) // both blocks GAP-type
134  {
135  for (distance_metric_descriptor* it = dmit;it < dmit_end; ++it)
136  {
137  distance_metric_descriptor& dmd = *it;
138 
139  switch (dmd.metric)
140  {
141  case bm::COUNT_AND:
142  dmd.result += gap_count_and(g1, g2);
143  break;
144  case bm::COUNT_OR:
145  dmd.result += gap_count_or(g1, g2);
146  break;
147  case bm::COUNT_SUB_AB:
148  dmd.result += gap_count_sub(g1, g2);
149  break;
150  case bm::COUNT_SUB_BA:
151  dmd.result += gap_count_sub(g2, g1);
152  break;
153  case bm::COUNT_XOR:
154  dmd.result += gap_count_xor(g1, g2);
155  break;
156  case bm::COUNT_A:
157  dmd.result += gap_bit_count_unr(g1);
158  break;
159  case bm::COUNT_B:
160  dmd.result += gap_bit_count_unr(g2);
161  break;
162  default:
163  BM_ASSERT(0);
164  } // switch
165 
166  } // for it
167 
168  return;
169 
170  }
171  else // first block - GAP, argument - BITSET
172  {
173  for (distance_metric_descriptor* it = dmit;it < dmit_end; ++it)
174  {
175  distance_metric_descriptor& dmd = *it;
176 
177  switch (dmd.metric)
178  {
179  case bm::COUNT_AND:
180  if (arg_blk)
181  dmd.result += gap_bitset_and_count(arg_blk, g1);
182  break;
183  case bm::COUNT_OR:
184  if (!arg_blk)
185  dmd.result += gap_bit_count_unr(g1);
186  else
187  dmd.result += gap_bitset_or_count(arg_blk, g1);
188  break;
189  case bm::COUNT_SUB_AB:
190  {
192 
193  gap_convert_to_bitset((bm::word_t*) temp_bit_blk, g1);
194  dmd.result +=
195  bit_operation_sub_count((bm::word_t*)temp_bit_blk, arg_blk);
196  }
197  break;
198  case bm::COUNT_SUB_BA:
199  dmd.metric = bm::COUNT_SUB_AB; // recursive call to SUB_AB
201  blk,
202  it, it+1);
203  dmd.metric = bm::COUNT_SUB_BA; // restore status quo
204  break;
205  case bm::COUNT_XOR:
206  if (!arg_blk)
207  dmd.result += gap_bit_count_unr(g1);
208  else
209  dmd.result += gap_bitset_xor_count(arg_blk, g1);
210  break;
211  case bm::COUNT_A:
212  if (g1)
213  dmd.result += gap_bit_count_unr(g1);
214  break;
215  case bm::COUNT_B:
216  if (arg_blk)
217  {
218  dmd.result +=
219  bit_block_count(arg_blk);
220  }
221  break;
222  default:
223  BM_ASSERT(0);
224  } // switch
225 
226  } // for it
227 
228  return;
229 
230  }
231  }
232  else // first block is BITSET-type
233  {
234  if (arg_gap) // second argument block is GAP-type
235  {
236  for (distance_metric_descriptor* it = dmit;it < dmit_end; ++it)
237  {
238  distance_metric_descriptor& dmd = *it;
239 
240  switch (dmd.metric)
241  {
242  case bm::COUNT_AND:
243  if (blk)
244  dmd.result += gap_bitset_and_count(blk, g2);
245  break;
246  case bm::COUNT_OR:
247  if (!blk)
248  dmd.result += gap_bit_count_unr(g2);
249  else
250  dmd.result += gap_bitset_or_count(blk, g2);
251  break;
252  case bm::COUNT_SUB_AB:
253  if (blk)
254  dmd.result += gap_bitset_sub_count(blk, g2);
255  break;
256  case bm::COUNT_SUB_BA:
257  dmd.metric = bm::COUNT_SUB_AB; // recursive call to SUB_AB
259  //arg_gap,
260  blk,
261  //gap,
262  it, it+1);
263  dmd.metric = bm::COUNT_SUB_BA; // restore status quo
264  break;
265  case bm::COUNT_XOR:
266  if (!blk)
267  dmd.result += gap_bit_count_unr(g2);
268  else
269  dmd.result += gap_bitset_xor_count(blk, g2);
270  break;
271  case bm::COUNT_A:
272  if (blk)
273  {
274  dmd.result +=
275  bit_block_count(blk);
276  }
277  break;
278  case bm::COUNT_B:
279  if (g2)
280  dmd.result += gap_bit_count_unr(g2);
281  break;
282  default:
283  BM_ASSERT(0);
284  } // switch
285 
286  } // for it
287 
288  return;
289  }
290  }
291 
292  // --------------------------------------------
293  //
294  // Here we combine two plain bitblocks
295 
296  for (distance_metric_descriptor* it = dmit; it < dmit_end; ++it)
297  {
298  distance_metric_descriptor& dmd = *it;
301  if (gfunc)
302  {
303  dmd.result += (*gfunc)(blk, arg_blk);
304  }
305  else
306  {
307  switch (dmd.metric)
308  {
309  case bm::COUNT_A:
310  if (blk)
311  dmd.result += bm::bit_block_count(blk);
312  break;
313  case bm::COUNT_B:
314  if (arg_blk)
315  dmd.result += bm::bit_block_count(arg_blk);
316  break;
317  case bm::COUNT_AND:
318  case bm::COUNT_XOR:
319  case bm::COUNT_OR:
320  case bm::COUNT_SUB_AB:
321  case bm::COUNT_SUB_BA:
322  default:
323  BM_ASSERT(0);
324  } // switch
325  }
326 
327  } // for it
328 }
329 
330 /*!
331 \brief Internal function computes AND distance.
332 \internal
333 \ingroup distance
334 */
335 inline
337  const bm::word_t* arg_blk)
338 {
339  unsigned gap = BM_IS_GAP(blk);
340  unsigned arg_gap = BM_IS_GAP(arg_blk);
341  if (gap) // first block GAP-type
342  {
343  if (arg_gap) // both blocks GAP-type
344  {
345  return gap_count_and(BMGAP_PTR(blk), BMGAP_PTR(arg_blk));
346  }
347  else // first block - GAP, argument - BITSET
348  {
349  return gap_bitset_and_count(arg_blk, BMGAP_PTR(blk));
350  }
351  }
352  else // first block is BITSET-type
353  {
354  if (arg_gap) // second argument block is GAP-type
355  {
356  return gap_bitset_and_count(blk, BMGAP_PTR(arg_blk));
357  }
358  }
359 
360  // --------------------------------------------
361  // Here we combine two plain bitblocks
362 
363  return bit_operation_and_count(blk, arg_blk);
364 }
365 
366 
367 /*!
368  \brief Internal function computes different existense of distance metric.
369  \internal
370  \ingroup distance
371 */
372 inline
374  unsigned gap,
375  const bm::word_t* arg_blk,
376  unsigned arg_gap,
378  distance_metric_descriptor* dmit_end)
379 
380 {
381  gap_word_t* res=0;
382 
383  gap_word_t* g1 = BMGAP_PTR(blk);
384  gap_word_t* g2 = BMGAP_PTR(arg_blk);
385 
386  if (gap) // first block GAP-type
387  {
388  if (arg_gap) // both blocks GAP-type
389  {
390  gap_word_t tmp_buf[bm::gap_max_buff_len * 3]; // temporary result
391 
392  for (distance_metric_descriptor* it = dmit;it < dmit_end; ++it)
393  {
394  distance_metric_descriptor& dmd = *it;
395  if (dmd.result)
396  {
397  continue;
398  }
399  res = 0;
400  unsigned dsize = 0;
401  switch (dmd.metric)
402  {
403  case bm::COUNT_AND:
404  dmd.result += gap_operation_any_and(g1, g2);
405  break;
406  case bm::COUNT_OR:
407  res = gap_operation_or(g1, g2, tmp_buf, dsize);
408  break;
409  case bm::COUNT_SUB_AB:
410  dmd.result += gap_operation_any_sub(g1, g2);
411  break;
412  case bm::COUNT_SUB_BA:
413  dmd.result += gap_operation_any_sub(g2, g1);
414  break;
415  case bm::COUNT_XOR:
416  dmd.result += gap_operation_any_xor(g1, g2);
417  break;
418  case bm::COUNT_A:
419  res = g1;
420  break;
421  case bm::COUNT_B:
422  res = g2;
423  break;
424  default:
425  BM_ASSERT(0);
426  } // switch
427  if (res)
428  dmd.result += !gap_is_all_zero(res);
429 
430  } // for it
431 
432  return;
433 
434  }
435  else // first block - GAP, argument - BITSET
436  {
437  for (distance_metric_descriptor* it = dmit;it < dmit_end; ++it)
438  {
439  distance_metric_descriptor& dmd = *it;
440  if (dmd.result)
441  {
442  continue;
443  }
444 
445  switch (dmd.metric)
446  {
447  case bm::COUNT_AND:
448  if (arg_blk)
449  dmd.result += gap_bitset_and_any(arg_blk, g1);
450  break;
451  case bm::COUNT_OR:
452  if (!arg_blk)
453  dmd.result += !gap_is_all_zero(g1);
454  else
455  dmd.result += gap_bitset_or_any(arg_blk, g1);
456  break;
457  case bm::COUNT_SUB_AB:
458  {
460  gap_convert_to_bitset((bm::word_t*) temp_blk, g1);
461  dmd.result +=
462  bit_operation_sub_any((bm::word_t*)temp_blk, arg_blk);
463  }
464  break;
465  case bm::COUNT_SUB_BA:
466  dmd.metric = bm::COUNT_SUB_AB; // recursive call to SUB_AB
468  arg_gap,
469  blk,
470  gap,
471  it, it+1);
472  dmd.metric = bm::COUNT_SUB_BA; // restore status quo
473  break;
474  case bm::COUNT_XOR:
475  if (!arg_blk)
476  dmd.result += !gap_is_all_zero(g1);
477  else
478  dmd.result += gap_bitset_xor_any(arg_blk, g1);
479  break;
480  case bm::COUNT_A:
481  if (g1)
482  dmd.result += !gap_is_all_zero(g1);
483  break;
484  case bm::COUNT_B:
485  if (arg_blk)
486  {
487  dmd.result +=
488  !bit_is_all_zero(arg_blk);
489  }
490  break;
491  default:
492  BM_ASSERT(0);
493  } // switch
494 
495  } // for it
496 
497  return;
498 
499  }
500  }
501  else // first block is BITSET-type
502  {
503  if (arg_gap) // second argument block is GAP-type
504  {
505  for (distance_metric_descriptor* it = dmit;it < dmit_end; ++it)
506  {
507  distance_metric_descriptor& dmd = *it;
508  if (dmd.result)
509  {
510  continue;
511  }
512 
513  switch (dmd.metric)
514  {
515  case bm::COUNT_AND:
516  if (blk)
517  dmd.result += gap_bitset_and_any(blk, g2);
518  break;
519  case bm::COUNT_OR:
520  if (!blk)
521  dmd.result += !gap_is_all_zero(g2);
522  else
523  dmd.result += gap_bitset_or_any(blk, g2);
524  break;
525  case bm::COUNT_SUB_AB:
526  if (blk)
527  dmd.result += gap_bitset_sub_any(blk, g2);
528  break;
529  case bm::COUNT_SUB_BA:
530  dmd.metric = bm::COUNT_SUB_AB; // recursive call to SUB_AB
532  arg_gap,
533  blk,
534  gap,
535  it, it+1);
536  dmd.metric = bm::COUNT_SUB_BA; // restore status quo
537  break;
538  case bm::COUNT_XOR:
539  if (!blk)
540  dmd.result += !gap_is_all_zero(g2);
541  else
542  dmd.result += gap_bitset_xor_any(blk, g2);
543  break;
544  case bm::COUNT_A:
545  if (blk)
546  {
547  dmd.result+=
548  !bm::bit_is_all_zero(blk);
549  }
550  break;
551  case bm::COUNT_B:
552  if (g2)
553  dmd.result += !gap_is_all_zero(g2);
554  break;
555  default:
556  BM_ASSERT(0);
557  } // switch
558 
559  } // for it
560 
561  return;
562  }
563  }
564 
565  // --------------------------------------------
566  //
567  // Here we combine two plain bitblocks
568 
569  for (distance_metric_descriptor* it = dmit; it < dmit_end; ++it)
570  {
571  distance_metric_descriptor& dmd = *it;
572  if (dmd.result)
573  {
574  continue;
575  }
576 
577  switch (dmd.metric)
578  {
579  case bm::COUNT_AND:
580  dmd.result +=
581  bit_operation_and_any(blk, arg_blk);
582  break;
583  case bm::COUNT_OR:
584  dmd.result +=
585  bit_operation_or_any(blk, arg_blk);
586  break;
587  case bm::COUNT_SUB_AB:
588  dmd.result +=
589  bit_operation_sub_any(blk, arg_blk);
590  break;
591  case bm::COUNT_SUB_BA:
592  dmd.result +=
593  bit_operation_sub_any(arg_blk, blk);
594  break;
595  case bm::COUNT_XOR:
596  dmd.result +=
597  bit_operation_xor_any(blk, arg_blk);
598  break;
599  case bm::COUNT_A:
600  if (blk)
601  dmd.result += !bit_is_all_zero(blk);
602  break;
603  case bm::COUNT_B:
604  if (arg_blk)
605  dmd.result += !bit_is_all_zero(arg_blk);
606  break;
607  default:
608  BM_ASSERT(0);
609  } // switch
610 
611  } // for it
612 }
613 
614 
615 
616 /*!
617  Convenience internal function to compute combine count for one metric
618  \internal
619  \ingroup distance
620 */
621 inline
623  const bm::word_t* arg_blk,
625 {
626  distance_metric_descriptor dmd(metric);
628  arg_blk, //arg_gap,
629  &dmd, &dmd+1);
630  return dmd.result;
631 }
632 
633 
634 /*!
635  Convenience internal function to compute combine any for one metric
636  \internal
637  \ingroup distance
638 */
639 inline
641  unsigned gap,
642  const bm::word_t* arg_blk,
643  unsigned arg_gap,
645 {
646  distance_metric_descriptor dmd(metric);
648  arg_blk, arg_gap,
649  &dmd, &dmd+1);
650  return dmd.result;
651 }
652 
653 /*!
654  \brief Staging function for distance operation
655 
656  \return temp block allocated (or NULL)
657 
658  \internal
659 */
660 inline
662  const distance_metric_descriptor* dmit_end,
663  bool* is_all_and)
664 {
665  for (const distance_metric_descriptor* it = dmit; it < dmit_end; ++it)
666  {
667  if (it->metric != bm::COUNT_AND)
668  {
669  *is_all_and = false;
670  }
671  } // for
672 }
673 
674 /*!
675  \brief Distance computing template function.
676 
677  Function receives two bitvectors and an array of distance metrics
678  (metrics pipeline). Function computes all metrics saves result into
679  corresponding pipeline results (distance_metric_descriptor::result)
680  An important detail is that function reuses metric descriptors,
681  incrementing received values. It allows you to accumulate results
682  from different calls in the pipeline.
683 
684  \param bv1 - argument bitvector 1 (A)
685  \param bv2 - argument bitvector 2 (B)
686  \param dmit - pointer to first element of metric descriptors array
687  Input-Output parameter, receives metric code as input,
688  computation is added to "result" field
689  \param dmit_end - pointer to (last+1) element of metric descriptors array
690  \ingroup distance
691 
692 */
693 template<class BV>
694 void distance_operation(const BV& bv1,
695  const BV& bv2,
697  distance_metric_descriptor* dmit_end)
698 {
699  const typename BV::blocks_manager_type& bman1 = bv1.get_blocks_manager();
700  const typename BV::blocks_manager_type& bman2 = bv2.get_blocks_manager();
701 
702  bool is_all_and = true; // flag is distance operation is just COUNT_AND
703  distance_stage(dmit, dmit_end, &is_all_and);
704 
705  bm::word_t*** blk_root = bman1.top_blocks_root();
706  unsigned block_idx = 0;
707  unsigned i, j;
708 
709  const bm::word_t* blk;
710  const bm::word_t* arg_blk;
711 
713 
714  unsigned top_block_size = bman1.top_block_size();
715  unsigned ebs2 = bman2.top_block_size();
716  if (ebs2 > top_block_size)
717  top_block_size = ebs2;
718 
719  for (i = 0; i < top_block_size; ++i)
720  {
721  bm::word_t** blk_blk = blk_root ? blk_root[i] : 0;
722 
723  if (blk_blk == 0) // not allocated
724  {
725  // AND operation requested - we can skip this portion here
726  if (is_all_and)
727  {
728  block_idx += bm::set_array_size;
729  continue;
730  }
731  const bm::word_t* const* bvbb = bman2.get_topblock(i);
732  if (bvbb == 0)
733  {
734  block_idx += bm::set_array_size;
735  continue;
736  }
737 
738  blk = 0;
739  for (j = 0; j < bm::set_array_size; ++j,++block_idx)
740  {
741  arg_blk = bman2.get_block(i, j);
742  if (!arg_blk)
743  continue;
745  arg_blk,
746  dmit, dmit_end);
747  } // for j
748  continue;
749  }
750 
751  for (j = 0; j < bm::set_array_size; ++j, ++block_idx)
752  {
753  blk = bman1.get_block(i, j);
754  if (blk == 0 && is_all_and)
755  continue;
756 
757  arg_blk = bman2.get_block(i, j);
758 
759  if (!blk & !arg_blk)
760  continue;
761 
763  arg_blk,
764  dmit, dmit_end);
765  } // for j
766 
767  } // for i
768 }
769 
770 
771 /*!
772 \brief Distance AND computing template function.
773 
774 
775 \param bv1 - argument bitvector 1 (A)
776 \param bv2 - argument bitvector 2 (B)
777 \ingroup distance
778 
779 */
780 template<class BV>
781 unsigned distance_and_operation(const BV& bv1,
782  const BV& bv2)
783 {
784  const typename BV::blocks_manager_type& bman1 = bv1.get_blocks_manager();
785  const typename BV::blocks_manager_type& bman2 = bv2.get_blocks_manager();
786 
787  if (!bman1.is_init() || !bman2.is_init())
788  return 0;
789 
790  bm::word_t*** blk_root = bman1.top_blocks_root();
791  bm::word_t*** blk_root_arg = bman2.top_blocks_root();
792  unsigned count = 0;
793 
794  unsigned top_block_size =
795  bm::min_value(bman1.top_block_size(),bman2.top_block_size());
796 
797  for (unsigned i = 0; i < top_block_size; ++i)
798  {
799  bm::word_t** blk_blk;
800  bm::word_t** blk_blk_arg;
801  if ((blk_blk = blk_root[i]) == 0 || (blk_blk_arg= blk_root_arg[i]) == 0)
802  {
803  continue;
804  }
805  for (unsigned j = 0; j < bm::set_array_size; j+=4)
806  {
807  (blk_blk[j] && blk_blk_arg[j]) ?
808  count += combine_count_and_operation_with_block(BLOCK_ADDR_SAN(blk_blk[j]), BLOCK_ADDR_SAN(blk_blk_arg[j]))
809  :0;
810  (blk_blk[j+1] && blk_blk_arg[j+1]) ?
811  count += combine_count_and_operation_with_block(BLOCK_ADDR_SAN(blk_blk[j+1]), BLOCK_ADDR_SAN(blk_blk_arg[j+1]))
812  :0;
813  (blk_blk[j+2] && blk_blk_arg[j+2]) ?
814  count += combine_count_and_operation_with_block(BLOCK_ADDR_SAN(blk_blk[j+2]), BLOCK_ADDR_SAN(blk_blk_arg[j+2]))
815  :0;
816  (blk_blk[j+3] && blk_blk_arg[j+3]) ?
817  count += combine_count_and_operation_with_block(BLOCK_ADDR_SAN(blk_blk[j+3]), BLOCK_ADDR_SAN(blk_blk_arg[j+3]))
818  :0;
819  } // for j
820 
821  } // for i
822  return count;
823 }
824 
825 
826 
827 
828 /*!
829  \brief Distance screening template function.
830 
831  Function receives two bitvectors and an array of distance metrics
832  (metrics pipeline). Function computes possybility of a metric(any bit)
833  saves result into corresponding pipeline results
834  (distance_metric_descriptor::result)
835  An important detail is that function reuses metric descriptors,
836  incrementing received values. It allows you to accumulate results
837  from different calls in the pipeline.
838 
839  \param bv1 - argument bitvector 1 (A)
840  \param bv2 - argument bitvector 2 (B)
841  \param dmit - pointer to first element of metric descriptors array
842  Input-Output parameter, receives metric code as input,
843  computation is added to "result" field
844  \param dmit_end - pointer to (last+1) element of metric descriptors array
845  \ingroup distance
846 */
847 template<class BV>
848 void distance_operation_any(const BV& bv1,
849  const BV& bv2,
851  distance_metric_descriptor* dmit_end)
852 {
853  const typename BV::blocks_manager_type& bman1 = bv1.get_blocks_manager();
854  const typename BV::blocks_manager_type& bman2 = bv2.get_blocks_manager();
855 
856  bool is_all_and = true; // flag is distance operation is just COUNT_AND
857  distance_stage(dmit, dmit_end, &is_all_and);
858 
859  bm::word_t*** blk_root = bman1.top_blocks_root();
860  unsigned block_idx = 0;
861  unsigned i, j;
862 
863  const bm::word_t* blk;
864  const bm::word_t* arg_blk;
865  bool blk_gap;
866  bool arg_gap;
867 
868  unsigned top_block_size = bman1.top_block_size();
869  unsigned ebs2 = bman2.top_block_size();
870  if (ebs2 > top_block_size)
871  top_block_size = ebs2;
872 
873  for (i = 0; i < top_block_size; ++i)
874  {
875  bm::word_t** blk_blk = blk_root ? blk_root[i] : 0;
876 
877  if (blk_blk == 0) // not allocated
878  {
879  // AND operation requested - we can skip this portion here
880  if (is_all_and)
881  {
882  block_idx += bm::set_array_size;
883  continue;
884  }
885 
886  const bm::word_t* const* bvbb = bman2.get_topblock(i);
887  if (bvbb == 0)
888  {
889  block_idx += bm::set_array_size;
890  continue;
891  }
892 
893  blk = 0;
894  blk_gap = false;
895 
896  for (j = 0; j < bm::set_array_size; ++j,++block_idx)
897  {
898  arg_blk = bman2.get_block(i, j);
899  if (!arg_blk)
900  continue;
901  arg_gap = BM_IS_GAP(arg_blk);
902 
904  arg_blk, arg_gap,
905  dmit, dmit_end);
906 
907  // check if all distance requests alredy resolved
908  bool all_resolved = false;
910  do
911  {
912  if (!it->result)
913  {
914  all_resolved = false;
915  break;
916  }
917  ++it;
918  } while (it < dmit_end);
919  if (all_resolved)
920  return;
921  } // for j
922 
923  continue;
924  }
925 
926  for (j = 0; j < bm::set_array_size; ++j, ++block_idx)
927  {
928  blk = bman1.get_block(i, j);
929  //BLOCK_ADDR_SAN(blk_blk[j]);
930  if (blk == 0 && is_all_and)
931  continue;
932 
933  arg_blk = bman2.get_block(i, j);
934 
935  if (blk == 0 && arg_blk == 0)
936  continue;
937 
938  arg_gap = BM_IS_GAP(arg_blk);
939  blk_gap = BM_IS_GAP(blk);
940 
942  arg_blk, arg_gap,
943  dmit, dmit_end);
944 
945  // check if all distance requests alredy resolved
946  bool all_resolved = false;
948  do
949  {
950  if (!it->result)
951  {
952  all_resolved = false;
953  break;
954  }
955  ++it;
956  } while (it < dmit_end);
957  if (all_resolved)
958  return;
959 
960  } // for j
961 
962  } // for i
963 }
964 
965 
966 
967 /*!
968  \brief Computes bitcount of AND operation of two bitsets
969  \param bv1 - Argument bit-vector.
970  \param bv2 - Argument bit-vector.
971  \return bitcount of the result
972  \ingroup distance
973 */
974 template<class BV>
975 bm::id_t count_and(const BV& bv1, const BV& bv2)
976 {
977  return distance_and_operation(bv1, bv2);
978 }
979 
980 /*!
981  \brief Computes if there is any bit in AND operation of two bitsets
982  \param bv1 - Argument bit-vector.
983  \param bv2 - Argument bit-vector.
984  \return non zero value if there is any bit
985  \ingroup distance
986 */
987 template<class BV>
988 bm::id_t any_and(const BV& bv1, const BV& bv2)
989 {
991 
992  distance_operation_any(bv1, bv2, &dmd, &dmd+1);
993  return dmd.result;
994 }
995 
996 
997 
998 /*!
999  \brief Computes bitcount of XOR operation of two bitsets
1000  \param bv1 - Argument bit-vector.
1001  \param bv2 - Argument bit-vector.
1002  \return bitcount of the result
1003  \ingroup distance
1004 */
1005 template<class BV>
1006 bm::id_t count_xor(const BV& bv1, const BV& bv2)
1007 {
1009 
1010  distance_operation(bv1, bv2, &dmd, &dmd+1);
1011  return dmd.result;
1012 }
1013 
1014 /*!
1015  \brief Computes if there is any bit in XOR operation of two bitsets
1016  \param bv1 - Argument bit-vector.
1017  \param bv2 - Argument bit-vector.
1018  \return non zero value if there is any bit
1019  \ingroup distance
1020 */
1021 template<class BV>
1022 bm::id_t any_xor(const BV& bv1, const BV& bv2)
1023 {
1025 
1026  distance_operation_any(bv1, bv2, &dmd, &dmd+1);
1027  return dmd.result;
1028 }
1029 
1030 
1031 
1032 /*!
1033  \brief Computes bitcount of SUB operation of two bitsets
1034  \param bv1 - Argument bit-vector.
1035  \param bv2 - Argument bit-vector.
1036  \return bitcount of the result
1037  \ingroup distance
1038 */
1039 template<class BV>
1040 bm::id_t count_sub(const BV& bv1, const BV& bv2)
1041 {
1043 
1044  distance_operation(bv1, bv2, &dmd, &dmd+1);
1045  return dmd.result;
1046 }
1047 
1048 
1049 /*!
1050  \brief Computes if there is any bit in SUB operation of two bitsets
1051  \param bv1 - Argument bit-vector.
1052  \param bv2 - Argument bit-vector.
1053  \return non zero value if there is any bit
1054  \ingroup distance
1055 */
1056 template<class BV>
1057 bm::id_t any_sub(const BV& bv1, const BV& bv2)
1058 {
1060 
1061  distance_operation_any(bv1, bv2, &dmd, &dmd+1);
1062  return dmd.result;
1063 }
1064 
1065 
1066 /*!
1067  \brief Computes bitcount of OR operation of two bitsets
1068  \param bv1 - Argument bit-vector.
1069  \param bv2 - Argument bit-vector.
1070  \return bitcount of the result
1071  \ingroup distance
1072 */
1073 template<class BV>
1074 bm::id_t count_or(const BV& bv1, const BV& bv2)
1075 {
1077 
1078  distance_operation(bv1, bv2, &dmd, &dmd+1);
1079  return dmd.result;
1080 }
1081 
1082 /*!
1083  \brief Computes if there is any bit in OR operation of two bitsets
1084  \param bv1 - Argument bit-vector.
1085  \param bv2 - Argument bit-vector.
1086  \return non zero value if there is any bit
1087  \ingroup distance
1088 */
1089 template<class BV>
1090 bm::id_t any_or(const BV& bv1, const BV& bv2)
1091 {
1093 
1094  distance_operation_any(bv1, bv2, &dmd, &dmd+1);
1095  return dmd.result;
1096 }
1097 
1098 
1099 /**
1100  \brief Internal algorithms scans the input for the block range limit
1101  \internal
1102 */
1103 template<class It>
1104 It block_range_scan(It first, It last, unsigned nblock, unsigned* max_id)
1105 {
1106  It right;
1107  for (right = first; right != last; ++right)
1108  {
1109  unsigned v = unsigned(*right);
1110  BM_ASSERT(v < bm::id_max);
1111  if (v >= *max_id)
1112  *max_id = v;
1113  unsigned nb = v >> bm::set_block_shift;
1114  if (nb != nblock)
1115  break;
1116  }
1117  return right;
1118 }
1119 
1120 /**
1121  \brief OR Combine bitvector and the iterable sequence
1122 
1123  Algorithm combines bvector with sequence of integers
1124  (represents another set). When the agrument set is sorted
1125  this method can give serious increase in performance.
1126 
1127  \param bv - destination bitvector
1128  \param first - first element of the iterated sequence
1129  \param last - last element of the iterated sequence
1130 
1131  \ingroup setalgo
1132 */
1133 template<class BV, class It>
1134 void combine_or(BV& bv, It first, It last)
1135 {
1136  typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
1137  if (!bman.is_init())
1138  bman.init_tree();
1139 
1140  unsigned max_id = 0;
1141 
1142  while (first < last)
1143  {
1144  unsigned nblock = unsigned((*first) >> bm::set_block_shift);
1145  It right = bm::block_range_scan(first, last, nblock, &max_id);
1146 
1147  if (max_id >= bv.size())
1148  {
1149  BM_ASSERT(max_id < bm::id_max);
1150  bv.resize(max_id + 1);
1151  }
1152 
1153  // now we have one in-block array of bits to set
1154 
1155  label1:
1156 
1157  int block_type;
1158  bm::word_t* blk =
1159  bman.check_allocate_block(nblock,
1160  true,
1161  bv.get_new_blocks_strat(),
1162  &block_type);
1163  if (!blk)
1164  continue;
1165 
1166  if (block_type == 1) // gap
1167  {
1168  bm::gap_word_t* gap_blk = BMGAP_PTR(blk);
1169  unsigned threshold = bm::gap_limit(gap_blk, bman.glen());
1170 
1171  for (; first < right; ++first)
1172  {
1173  unsigned is_set;
1174  unsigned nbit = (*first) & bm::set_block_mask;
1175 
1176  unsigned new_block_len =
1177  bm::gap_set_value(true, gap_blk, nbit, &is_set);
1178  if (new_block_len > threshold)
1179  {
1180  bman.extend_gap_block(nblock, gap_blk);
1181  ++first;
1182  goto label1; // block pointer changed - goto reset
1183  }
1184  }
1185  }
1186  else // bit block
1187  {
1188  for (; first < right; ++first)
1189  {
1190  unsigned nbit = unsigned(*first & bm::set_block_mask);
1191  unsigned nword = unsigned(nbit >> bm::set_word_shift);
1192  nbit &= bm::set_word_mask;
1193  blk[nword] |= (1u << nbit);
1194  } // for
1195  }
1196  } // while
1197 
1198  bv.forget_count();
1199 }
1200 
1201 
1202 /**
1203  \brief XOR Combine bitvector and the iterable sequence
1204 
1205  Algorithm combines bvector with sequence of integers
1206  (represents another set). When the agrument set is sorted
1207  this method can give serious increase in performance.
1208 
1209  \param bv - destination bitvector
1210  \param first - first element of the iterated sequence
1211  \param last - last element of the iterated sequence
1212 
1213  \ingroup setalgo
1214 */
1215 template<class BV, class It>
1216 void combine_xor(BV& bv, It first, It last)
1217 {
1218  typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
1219  if (!bman.is_init())
1220  bman.init_tree();
1221 
1222  unsigned max_id = 0;
1223 
1224  while (first < last)
1225  {
1226  unsigned nblock = unsigned((*first) >> bm::set_block_shift);
1227  It right = block_range_scan(first, last, nblock, &max_id);
1228 
1229  if (max_id >= bv.size())
1230  {
1231  BM_ASSERT(max_id < bm::id_max);
1232  bv.resize(max_id + 1);
1233  }
1234 
1235  // now we have one in-block array of bits to set
1236 
1237  label1:
1238 
1239  int block_type;
1240  bm::word_t* blk =
1241  bman.check_allocate_block(nblock,
1242  true,
1243  bv.get_new_blocks_strat(),
1244  &block_type,
1245  false /* no null return */);
1246  BM_ASSERT(blk);
1247 
1248  if (block_type == 1) // gap
1249  {
1250  bm::gap_word_t* gap_blk = BMGAP_PTR(blk);
1251  unsigned threshold = bm::gap_limit(gap_blk, bman.glen());
1252 
1253  for (; first < right; ++first)
1254  {
1255  unsigned is_set;
1256  unsigned nbit = (*first) & bm::set_block_mask;
1257 
1258  is_set = bm::gap_test_unr(gap_blk, nbit);
1259  BM_ASSERT(is_set <= 1);
1260  is_set ^= 1;
1261 
1262  unsigned new_block_len =
1263  gap_set_value(is_set, gap_blk, nbit, &is_set);
1264  if (new_block_len > threshold)
1265  {
1266  bman.extend_gap_block(nblock, gap_blk);
1267  ++first;
1268  goto label1; // block pointer changed - goto reset
1269  }
1270  }
1271  }
1272  else // bit block
1273  {
1274  for (; first < right; ++first)
1275  {
1276  unsigned nbit = unsigned(*first & bm::set_block_mask);
1277  unsigned nword = unsigned(nbit >> bm::set_word_shift);
1278  nbit &= bm::set_word_mask;
1279  blk[nword] ^= (1u << nbit);
1280  } // for
1281  }
1282  } // while
1283 
1284  bv.forget_count();
1285 }
1286 
1287 
1288 
1289 /**
1290  \brief SUB Combine bitvector and the iterable sequence
1291 
1292  Algorithm combines bvector with sequence of integers
1293  (represents another set). When the agrument set is sorted
1294  this method can give serious increase in performance.
1295 
1296  \param bv - destination bitvector
1297  \param first - first element of the iterated sequence
1298  \param last - last element of the iterated sequence
1299 
1300  \ingroup setalgo
1301 */
1302 template<class BV, class It>
1303 void combine_sub(BV& bv, It first, It last)
1304 {
1305  typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
1306  if (!bman.is_init())
1307  bman.init_tree();
1308 
1309  unsigned max_id = 0;
1310 
1311  while (first < last)
1312  {
1313  unsigned nblock = unsigned((*first) >> bm::set_block_shift);
1314  It right = block_range_scan(first, last, nblock, &max_id);
1315 
1316  if (max_id >= bv.size())
1317  {
1318  BM_ASSERT(max_id < bm::id_max);
1319  bv.resize(max_id + 1);
1320  }
1321 
1322  // now we have one in-block array of bits to set
1323 
1324  label1:
1325 
1326  int block_type;
1327  bm::word_t* blk =
1328  bman.check_allocate_block(nblock,
1329  false,
1330  bv.get_new_blocks_strat(),
1331  &block_type);
1332 
1333  if (!blk)
1334  continue;
1335 
1336  if (block_type == 1) // gap
1337  {
1338  bm::gap_word_t* gap_blk = BMGAP_PTR(blk);
1339  unsigned threshold = bm::gap_limit(gap_blk, bman.glen());
1340 
1341  for (; first < right; ++first)
1342  {
1343  unsigned is_set;
1344  unsigned nbit = (*first) & bm::set_block_mask;
1345 
1346  is_set = bm::gap_test_unr(gap_blk, nbit);
1347  if (!is_set)
1348  continue;
1349 
1350  unsigned new_block_len =
1351  gap_set_value(false, gap_blk, nbit, &is_set);
1352  if (new_block_len > threshold)
1353  {
1354  bman.extend_gap_block(nblock, gap_blk);
1355  ++first;
1356  goto label1; // block pointer changed - goto reset
1357  }
1358  }
1359  }
1360  else // bit block
1361  {
1362  for (; first < right; ++first)
1363  {
1364  unsigned nbit = unsigned(*first & bm::set_block_mask);
1365  unsigned nword = unsigned(nbit >> bm::set_word_shift);
1366  nbit &= bm::set_word_mask;
1367  blk[nword] &= ~((bm::word_t)1 << nbit);
1368  } // for
1369  }
1370  } // while
1371 
1372  bv.forget_count();
1373 }
1374 
1375 /**
1376  \brief AND Combine bitvector and the iterable sequence
1377 
1378  Algorithm combines bvector with sorted sequence of integers
1379  (represents another set).
1380 
1381  \param bv - destination bitvector
1382  \param first - first element of the iterated sequence
1383  \param last - last element of the iterated sequence
1384 
1385  \ingroup setalgo
1386 */
1387 template<class BV, class It>
1388 void combine_and_sorted(BV& bv, It first, It last)
1389 {
1390  bm::id_t prev = 0;
1391  for ( ;first < last; ++first)
1392  {
1393  bm::id_t id = *first;
1394  BM_ASSERT(id >= prev); // make sure it's sorted
1395  bv.set_bit_and(id, true);
1396  if (++prev < id)
1397  {
1398  bv.set_range(prev, id-1, false);
1399  }
1400  prev = id;
1401  }
1402 }
1403 
1404 
1405 /**
1406  \brief AND Combine bitvector and the iterable sequence
1407 
1408  Algorithm combines bvector with sequence of integers
1409  (represents another set). When the agrument set is sorted
1410  this method can give serious increase in performance.
1411 
1412  \param bv - destination bitvector
1413  \param first - first element of the iterated sequence
1414  \param last - last element of the iterated sequence
1415 
1416  \ingroup setalgo
1417  \sa combine_and_sorted
1418 */
1419 template<class BV, class It>
1420 void combine_and(BV& bv, It first, It last)
1421 {
1422  BV bv_tmp;
1423  combine_or(bv_tmp, first, last);
1424  bv &= bv_tmp;
1425 }
1426 
1427 /*!
1428  \brief Compute number of bit intervals (GAPs) in the bitvector
1429 
1430  Algorithm traverses bit vector and count number of uninterrupted
1431  intervals of 1s and 0s.
1432  <pre>
1433  For example:
1434  empty vector - 1 interval
1435  00001111100000 - gives us 3 intervals
1436  10001111100000 - 4 intervals
1437  00000000000000 - 1 interval
1438  11111111111111 - 1 interval
1439  </pre>
1440  \return Number of intervals
1441  \ingroup setalgo
1442 */
1443 template<class BV>
1445 {
1446  const typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
1447 
1448  if (!bman.is_init())
1449  return 1;
1450 
1451  bm::word_t*** blk_root = bman.top_blocks_root();
1452  typename BV::blocks_manager_type::block_count_change_func func(bman);
1453  for_each_block(blk_root, bman.top_block_size(), func);
1454 
1455  return func.count();
1456 }
1457 
1458 /*!
1459  \brief Export bitset from an array of binary data representing
1460  the bit vector.
1461 
1462  Input array can be array of ints, chars or other native C types.
1463  Method works with iterators, which makes it compatible with
1464  STL containers and C arrays
1465 
1466  \param bv - destination bitvector
1467  \param first - first element of the iterated sequence
1468  \param last - last element of the iterated sequence
1469 
1470  \ingroup setalgo
1471 */
1472 template<typename BV, class It>
1473 void export_array(BV& bv, It first, It last)
1474 {
1475  typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
1476  if (!bman.is_init())
1477  bman.init_tree();
1478 
1479  unsigned inp_word_size = sizeof(*first);
1480  size_t array_size = size_t(last - first);
1481  size_t bit_cnt = array_size * inp_word_size * 8;
1482  int block_type;
1483  bm::word_t tmp;
1484  bm::word_t b1, b2, b3, b4;
1485 
1486  if (bit_cnt >= bv.size())
1487  bv.resize((bm::id_t)bit_cnt + 1);
1488  else
1489  bv.set_range((bm::id_t)bit_cnt, bv.size() - 1, false);
1490 
1491  switch (inp_word_size)
1492  {
1493  case 1:
1494  {
1495  size_t word_cnt = array_size / 4;
1496  for (unsigned i = 0; i < bm::set_total_blocks; ++i)
1497  {
1498  bm::word_t* blk =
1499  bman.check_allocate_block(i,
1500  false,
1501  BM_BIT,
1502  &block_type,
1503  false /*no NULL ret*/);
1504  if (block_type == 1) // gap
1505  {
1506  blk = bman.deoptimize_block(i);
1507  }
1508 
1509  bm::word_t* wrd_ptr = blk;
1510  if (word_cnt >= bm::set_block_size) {
1511  bm::word_t* wrd_end = blk + bm::set_block_size;
1512  do {
1513  b1 = bm::word_t(*first++); b2 = bm::word_t(*first++);
1514  b3 = bm::word_t(*first++); b4 = bm::word_t(*first++);
1515  tmp = b1 | (b2 << 8u) | (b3 << 16u) | (b4 << 24u);
1516  *wrd_ptr++ = tmp;
1517  } while (wrd_ptr < wrd_end);
1518  word_cnt -= bm::set_block_size;
1519  }
1520  else
1521  {
1522  size_t to_convert = size_t(last - first);
1523  for (size_t j = 0; j < to_convert / 4; ++j)
1524  {
1525  b1 = bm::word_t(*first++); b2 = bm::word_t(*first++);
1526  b3 = bm::word_t(*first++); b4 = bm::word_t(*first++);
1527  tmp = b1 | (b2 << 8u) | (b3 << 16u) | (b4 << 24u);
1528  *wrd_ptr++ = tmp;
1529  }
1530  while (1)
1531  {
1532  if (first == last) break;
1533  *wrd_ptr = bm::word_t(*first++);
1534  if (first == last) break;
1535  *wrd_ptr |= bm::word_t(*first++) << 8u;
1536  if (first == last) break;
1537  *wrd_ptr |= bm::word_t(*first++) << 16u;
1538  if (first == last) break;
1539  *wrd_ptr |= bm::word_t(*first++) << 24u;
1540  ++wrd_ptr;
1541  }
1542  }
1543  if (first == last) break;
1544  } // for
1545  }
1546  break;
1547  case 2:
1548  {
1549  size_t word_cnt = array_size / 2;
1550  for (unsigned i = 0; i < bm::set_total_blocks; ++i)
1551  {
1552  bm::word_t* blk =
1553  bman.check_allocate_block(i,
1554  false,
1555  BM_BIT,
1556  &block_type,
1557  false /*no NULL ret*/);
1558  if (block_type) // gap
1559  blk = bman.deoptimize_block(i);
1560 
1561  bm::word_t* wrd_ptr = blk;
1562  if (word_cnt >= bm::set_block_size) {
1563  bm::word_t* wrd_end = blk + bm::set_block_size;
1564  do {
1565  b1 = bm::word_t(*first++); b2 = bm::word_t(*first++);
1566  tmp = b1 | (b2 << 16);
1567  *wrd_ptr++ = tmp;
1568  } while (wrd_ptr < wrd_end);
1569  word_cnt -= bm::set_block_size;
1570  }
1571  else
1572  {
1573  size_t to_convert = size_t(last - first);
1574  for (unsigned j = 0; j < to_convert / 2; ++j)
1575  {
1576  b1 = bm::word_t(*first++); b2 = bm::word_t(*first++);
1577  tmp = b1 | (b2 << 16u);
1578  *wrd_ptr++ = tmp;
1579  }
1580  while (1)
1581  {
1582  if (first == last) break;
1583  *wrd_ptr = bm::word_t(*first++);
1584  if (first == last) break;
1585  *wrd_ptr |= bm::word_t((*first++) << 16u);
1586  ++wrd_ptr;
1587  }
1588  }
1589  if (first == last) break;
1590  } // for
1591  }
1592  break;
1593  case 4:
1594  {
1595  size_t word_cnt = array_size;
1596  for (unsigned i = 0; i < bm::set_total_blocks; ++i)
1597  {
1598  bm::word_t* blk =
1599  bman.check_allocate_block(i,
1600  false,
1601  BM_BIT,
1602  &block_type,
1603  false /*no NULL ret*/);
1604  if (block_type == 1) // gap
1605  blk = bman.deoptimize_block(i);
1606 
1607  bm::word_t* wrd_ptr = blk;
1608  if (word_cnt >= bm::set_block_size) {
1609  bm::word_t* wrd_end = blk + bm::set_block_size;
1610  do
1611  {
1612  *wrd_ptr++ = bm::word_t(*first++);
1613  } while (wrd_ptr < wrd_end);
1614  word_cnt -= bm::set_block_size;
1615  }
1616  else
1617  {
1618  while (1)
1619  {
1620  if (first == last) break;
1621  *wrd_ptr = bm::word_t(*first++);
1622  ++wrd_ptr;
1623  }
1624  }
1625  if (first == last) break;
1626  } // for
1627  }
1628  break;
1629  default:
1630  BM_ASSERT(0);
1631  } // switch
1632 
1633 }
1634 
1635 
1636 /*!
1637  \brief for-each visitor, calls a special visitor functor for each 1 bit group
1638 
1639  \param block - bit block buffer pointer
1640  \param offset - global block offset (number of bits)
1641  \param bit_functor - functor must support .add_bits(offset, bits_ptr, size)
1642 
1643  @ingroup bitfunc
1644  @internal
1645 */
1646 template<class Func>
1647 void for_each_bit_blk(const bm::word_t* block, bm::id_t offset,
1648  Func& bit_functor)
1649 {
1650  unsigned char bits[bm::set_bitscan_wave_size*32];
1651 
1652  unsigned offs = offset;
1653  unsigned cnt;
1654  const word_t* block_end = block + bm::set_block_size;
1655  do
1656  {
1657  cnt = bm::bitscan_wave(block, bits);
1658  if (cnt)
1659  bit_functor.add_bits(offs, bits, cnt);
1660  offs += bm::set_bitscan_wave_size * 32;
1661  block += bm::set_bitscan_wave_size;
1662  } while (block < block_end);
1663 }
1664 
1665 
1666 /*!
1667  \brief for-each visitor, calls a special visitor functor for each 1 bit range
1668 
1669  \param buf - bit block buffer pointer
1670  \param offset - global block offset (number of bits)
1671  \param bit_functor - functor must support .add_range(offset, bits_ptr, size)
1672 
1673  @ingroup gapfunc
1674  @internal
1675 */
1676 template<typename T, typename Func>
1677 void for_each_gap_blk(const T* buf, bm::id_t offset,
1678  Func& bit_functor)
1679 {
1680  const T* pcurr = buf + 1;
1681  const T* pend = buf + (*buf >> 3);
1682 
1683  if (*buf & 1)
1684  {
1685  bit_functor.add_range(offset, *pcurr + 1);
1686  ++pcurr;
1687  }
1688  for (++pcurr; pcurr <= pend; pcurr += 2)
1689  {
1690  T prev = *(pcurr-1);
1691  bit_functor.add_range(offset + prev + 1, *pcurr - prev);
1692  }
1693 }
1694 
1695 
1696 } // namespace bm
1697 
1698 #ifdef _MSC_VER
1699 #pragma warning( pop )
1700 #endif
1701 
1702 #endif
bm::id_t bit_block_count(const bm::word_t *block)
Bitcount for bit block.
Definition: bmfunc.h:3716
BMFORCEINLINE unsigned gap_count_and(const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2)
GAP bitcount AND operation test.
Definition: bmfunc.h:4494
#define BM_VECT_ALIGN
Definition: bmdef.h:293
(A & B).count()
Definition: bmalgo_impl.h:59
const unsigned set_block_size
Definition: bmconst.h:48
const unsigned set_word_shift
Definition: bmconst.h:64
unsigned distance_and_operation(const BV &bv1, const BV &bv2)
Distance AND computing template function.
Definition: bmalgo_impl.h:781
bm::id_t any_and(const BV &bv1, const BV &bv2)
Computes if there is any bit in AND operation of two bitsets.
Definition: bmalgo_impl.h:988
bm::id_t gap_bitset_and_any(const unsigned *block, const T *pcurr)
Bitcount test of bit block AND masked by GAP block.
Definition: bmfunc.h:2985
bm::id_t any_xor(const BV &bv1, const BV &bv2)
Computes if there is any bit in XOR operation of two bitsets.
Definition: bmalgo_impl.h:1022
BMFORCEINLINE unsigned gap_operation_any_and(const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2)
GAP AND operation test.
Definition: bmfunc.h:4477
unsigned gap_set_value(unsigned val, T *BMRESTRICT buf, unsigned pos, unsigned *BMRESTRICT is_set)
Sets or clears bit in the GAP buffer.
Definition: bmfunc.h:2193
unsigned combine_count_and_operation_with_block(const bm::word_t *blk, const bm::word_t *arg_blk)
Internal function computes AND distance.
Definition: bmalgo_impl.h:336
const unsigned short set_bitscan_wave_size
Definition: bmfunc.h:7046
void distance_stage(const distance_metric_descriptor *dmit, const distance_metric_descriptor *dmit_end, bool *is_all_and)
Staging function for distance operation.
Definition: bmalgo_impl.h:661
(A - B).count()
Definition: bmalgo_impl.h:62
Definition: bm.h:69
bm::id_t gap_bitset_sub_any(const unsigned *block, const T *buf)
Compute bitcount test of bit block SUB masked by GAP block.
Definition: bmfunc.h:3048
void combine_any_operation_with_block(const bm::word_t *blk, unsigned gap, const bm::word_t *arg_blk, unsigned arg_gap, distance_metric_descriptor *dmit, distance_metric_descriptor *dmit_end)
Internal function computes different existense of distance metric.
Definition: bmalgo_impl.h:373
BMFORCEINLINE unsigned gap_count_or(const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2)
GAP bitcount OR operation test.
Definition: bmfunc.h:4605
void gap_convert_to_bitset(unsigned *dest, const T *buf)
GAP block to bitblock conversion.
Definition: bmfunc.h:3227
bm::id_t bit_operation_xor_any(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2)
Performs bitblock XOR operation test.
Definition: bmfunc.h:6121
bm::id_t bit_operation_sub_any(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2)
Performs bitblock test of SUB operation.
Definition: bmfunc.h:5416
void combine_or(BV &bv, It first, It last)
OR Combine bitvector and the iterable sequence.
Definition: bmalgo_impl.h:1134
void combine_sub(BV &bv, It first, It last)
SUB Combine bitvector and the iterable sequence.
Definition: bmalgo_impl.h:1303
(A ^ B).count()
Definition: bmalgo_impl.h:60
bm::id_t count_xor(const BV &bv1, const BV &bv2)
Computes bitcount of XOR operation of two bitsets.
Definition: bmalgo_impl.h:1006
const unsigned id_max
Definition: bmconst.h:44
(B - A).count()
Definition: bmalgo_impl.h:63
void distance_operation_any(const BV &bv1, const BV &bv2, distance_metric_descriptor *dmit, distance_metric_descriptor *dmit_end)
Distance screening template function.
Definition: bmalgo_impl.h:848
Distance metric descriptor, holds metric code and result.
Definition: bmalgo_impl.h:87
B.count()
Definition: bmalgo_impl.h:65
T min_value(T v1, T v2)
Get minimum of 2 values.
Definition: bmutil.h:102
bm::id_t gap_bitset_xor_count(const unsigned *block, const T *buf)
Compute bitcount of bit block XOR masked by GAP block.
Definition: bmfunc.h:3085
distance_metric_descriptor(distance_metric m)
Definition: bmalgo_impl.h:92
BMFORCEINLINE bool gap_is_all_zero(const bm::gap_word_t *buf)
Checks if GAP block is all-zero.
Definition: bmfunc.h:950
void for_each_gap_blk(const T *buf, bm::id_t offset, Func &bit_functor)
for-each visitor, calls a special visitor functor for each 1 bit range
Definition: bmalgo_impl.h:1677
BMFORCEINLINE unsigned gap_count_xor(const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2)
GAP bitcount XOR operation test.
Definition: bmfunc.h:4560
unsigned int word_t
Definition: bmconst.h:36
#define BM_VECT_ALIGN_ATTR
Definition: bmdef.h:294
bool bit_is_all_zero(const bm::word_t *BMRESTRICT start)
Returns "true" if all bits in the block are 0.
Definition: bmfunc.h:923
bm::id_t bit_operation_or_any(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2)
Performs bitblock OR operation test.
Definition: bmfunc.h:5470
void distance_operation(const BV &bv1, const BV &bv2, distance_metric_descriptor *dmit, distance_metric_descriptor *dmit_end)
Distance computing template function.
Definition: bmalgo_impl.h:694
void combine_xor(BV &bv, It first, It last)
XOR Combine bitvector and the iterable sequence.
Definition: bmalgo_impl.h:1216
(A | B).count()
Definition: bmalgo_impl.h:61
A.count()
Definition: bmalgo_impl.h:64
const unsigned gap_max_buff_len
Definition: bmconst.h:72
No GAP compression strategy. All new blocks are bit blocks.
Definition: bmconst.h:120
bm::id_t count_intervals(const BV &bv)
Compute number of bit intervals (GAPs) in the bitvector.
Definition: bmalgo_impl.h:1444
unsigned gap_test_unr(const T *buf, const unsigned pos)
Tests if bit = pos is true. Analog of bm::gap_test with SIMD unrolling.
Definition: bmfunc.h:1167
unsigned short gap_word_t
Definition: bmconst.h:70
bm::id_t count_sub(const BV &bv1, const BV &bv2)
Computes bitcount of SUB operation of two bitsets.
Definition: bmalgo_impl.h:1040
BMFORCEINLINE unsigned gap_operation_any_sub(const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2)
GAP SUB operation test.
Definition: bmfunc.h:4655
gap_word_t * gap_operation_or(const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2, gap_word_t *BMRESTRICT tmp_buf, unsigned &dsize)
GAP OR operation.
Definition: bmfunc.h:4585
BMFORCEINLINE unsigned gap_count_sub(const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2)
GAP bitcount SUB (AND NOT) operation test.
Definition: bmfunc.h:4672
bm::id_t gap_bitset_and_count(const unsigned *block, const T *pcurr)
Compute bitcount of bit block AND masked by GAP block.
Definition: bmfunc.h:2958
Definitions(internal)
bm::id_t count_or(const BV &bv1, const BV &bv2)
Computes bitcount of OR operation of two bitsets.
Definition: bmalgo_impl.h:1074
static bit_operation_count_func_type bit_operation_count(unsigned i)
Definition: bmfunc.h:7000
set_operation
Codes of set operations.
Definition: bmconst.h:129
bm::id_t gap_bitset_xor_any(const unsigned *block, const T *buf)
Compute bitcount test of bit block XOR masked by GAP block.
Definition: bmfunc.h:3122
bm::id_t any_sub(const BV &bv1, const BV &bv2)
Computes if there is any bit in SUB operation of two bitsets.
Definition: bmalgo_impl.h:1057
void combine_and_sorted(BV &bv, It first, It last)
AND Combine bitvector and the iterable sequence.
Definition: bmalgo_impl.h:1388
bm::id_t bit_operation_and_count(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2)
Performs bitblock AND operation and calculates bitcount of the result.
Definition: bmfunc.h:5332
bm::id_t gap_bitset_or_any(const unsigned *block, const T *buf)
Compute bitcount test of bit block OR masked by GAP block.
Definition: bmfunc.h:3191
void combine_and(BV &bv, It first, It last)
AND Combine bitvector and the iterable sequence.
Definition: bmalgo_impl.h:1420
#define BM_IS_GAP(ptr)
Definition: bmdef.h:167
distance_metric operation2metric(set_operation op)
Convert set operation into compatible distance metric.
Definition: bmalgo_impl.h:73
BMFORCEINLINE unsigned gap_operation_any_xor(const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2)
GAP XOR operation test.
Definition: bmfunc.h:4544
#define BM_SET_MMX_GUARD
Definition: bmdef.h:232
const unsigned set_block_mask
Definition: bmconst.h:50
void export_array(BV &bv, It first, It last)
Export bitset from an array of binary data representing the bit vector.
Definition: bmalgo_impl.h:1473
#define BMGAP_PTR(ptr)
Definition: bmdef.h:165
const unsigned set_array_size
Definition: bmconst.h:82
unsigned gap_bit_count_unr(const T *buf)
Calculates number of bits ON in GAP buffer. Loop unrolled version.
Definition: bmfunc.h:1583
void for_each_block(T ***root, unsigned size1, F &f)
Definition: bmfunc.h:1488
bm::id_t(* bit_operation_count_func_type)(const bm::word_t *BMRESTRICT, const bm::word_t *BMRESTRICT)
Definition: bmfunc.h:6973
const unsigned set_word_mask
Definition: bmconst.h:65
bm::id_t gap_bitset_or_count(const unsigned *block, const T *buf)
Compute bitcount of bit block OR masked by GAP block.
Definition: bmfunc.h:3159
bm::id_t bit_operation_and_any(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2)
Performs bitblock AND operation test.
Definition: bmfunc.h:5351
unsigned short bitscan_wave(const bm::word_t *w_ptr, unsigned char *bits)
Unpacks word wave (Nx 32-bit words)
Definition: bmfunc.h:7057
unsigned int id_t
Definition: bmconst.h:35
unsigned gap_limit(const T *buf, const T *glevel_len)
Returs GAP block capacity limit.
Definition: bmfunc.h:1008
void combine_count_operation_with_block(const bm::word_t *blk, const bm::word_t *arg_blk, distance_metric_descriptor *dmit, distance_metric_descriptor *dmit_end)
Internal function computes different distance metrics.
Definition: bmalgo_impl.h:119
#define BM_ASSERT
Definition: bmdef.h:116
const unsigned set_total_blocks
Definition: bmconst.h:85
distance_metric
Distance metrics codes defined for vectors A and B.
Definition: bmalgo_impl.h:57
void for_each_bit_blk(const bm::word_t *block, bm::id_t offset, Func &bit_functor)
for-each visitor, calls a special visitor functor for each 1 bit group
Definition: bmalgo_impl.h:1647
bm::id_t gap_bitset_sub_count(const unsigned *block, const T *buf)
Compute bitcount of bit block SUB masked by GAP block.
Definition: bmfunc.h:3014
const unsigned set_block_shift
Definition: bmconst.h:49
bool is_const_set_operation(set_operation op)
Returns true if set operation is constant (bitcount)
Definition: bmfunc.h:768
bm::id_t count_and(const BV &bv1, const BV &bv2)
Computes bitcount of AND operation of two bitsets.
Definition: bmalgo_impl.h:975
void reset()
Sets metric result to 0.
Definition: bmalgo_impl.h:104
bm::id_t bit_operation_sub_count(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2)
Performs bitblock SUB operation and calculates bitcount of the result.
Definition: bmfunc.h:5372
It block_range_scan(It first, It last, unsigned nblock, unsigned *max_id)
Internal algorithms scans the input for the block range limit.
Definition: bmalgo_impl.h:1104
Bit manipulation primitives (internal)
#define BLOCK_ADDR_SAN(addr)
Definition: bmdef.h:137
bm::id_t any_or(const BV &bv1, const BV &bv2)
Computes if there is any bit in OR operation of two bitsets.
Definition: bmalgo_impl.h:1090