BitMagic-C++
bmalgo_impl.h
Go to the documentation of this file.
1 #ifndef BMALGO_IMPL__H__INCLUDED__
2 #define BMALGO_IMPL__H__INCLUDED__
3 /*
4 Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 For more information please visit: http://bitmagic.io
19 */
20 
21 /*! \file bmalgo_impl.h
22  \brief Algorithms for bvector<>
23 */
24 
25 #ifdef _MSC_VER
26 #pragma warning( push )
27 #pragma warning( disable : 4311 4312 4127)
28 #endif
29 
30 #include "bmdef.h"
31 #include "bmutil.h"
32 
33 namespace bm
34 {
35 
36 /*!
37  @defgroup setalgo bvector<> algorithms
38 
39  Set algebra algorithms using bit-vectors, arrays.
40  Binary metrics and distances. Random sub-sets.
41 
42  @ingroup bvector
43  */
44 
45 /*!
46  @defgroup distance Binary-distance metrics
47 
48  Distance metrics and algorithms to compute binary distances
49  @ingroup setalgo
50  */
51 
52 
53 /*!
54  \brief Distance metrics codes defined for vectors A and B
55  \ingroup distance
56 */
58 {
59  COUNT_AND = set_COUNT_AND, //!< (A & B).count()
60  COUNT_XOR = set_COUNT_XOR, //!< (A ^ B).count()
61  COUNT_OR = set_COUNT_OR, //!< (A | B).count()
62  COUNT_SUB_AB = set_COUNT_SUB_AB, //!< (A - B).count()
63  COUNT_SUB_BA = set_COUNT_SUB_BA, //!< (B - A).count()
64  COUNT_A = set_COUNT_A, //!< A.count()
65  COUNT_B = set_COUNT_B //!< B.count()
66 };
67 
68 /**
69  Convert set operation into compatible distance metric
70  \ingroup distance
71 */
72 inline
74 {
76  if (op == set_COUNT) op = set_COUNT_B;
77  // distance metric is created as a set operation sub-class
78  // simple cast will work to convert
79  return (distance_metric) op;
80 }
81 
82 /*!
83  \brief Distance metric descriptor, holds metric code and result.
84  \sa distance_operation
85 */
86 
88 {
89 #ifdef BM64ADDR
90  typedef bm::id64_t size_type;
91 #else
93 #endif
94 
96  size_type result;
97 
99  : metric(m),
100  result(0)
101  {}
103  : metric(bm::COUNT_XOR),
104  result(0)
105  {}
106 
107  /*!
108  \brief Sets metric result to 0
109  */
110  void reset()
111  {
112  result = 0;
113  }
114 };
115 
116 
117 
118 /*!
119  \brief Internal function computes different distance metrics.
120  \internal
121  \ingroup distance
122 
123 */
124 inline
126  const bm::word_t* arg_blk,
128  distance_metric_descriptor* dmit_end)
129 
130 {
131  gap_word_t* g1 = BMGAP_PTR(blk);
132  gap_word_t* g2 = BMGAP_PTR(arg_blk);
133 
134  unsigned gap = BM_IS_GAP(blk);
135  unsigned arg_gap = BM_IS_GAP(arg_blk);
136 
137  if (gap) // first block GAP-type
138  {
139  if (arg_gap) // both blocks GAP-type
140  {
141  for (distance_metric_descriptor* it = dmit;it < dmit_end; ++it)
142  {
143  distance_metric_descriptor& dmd = *it;
144 
145  switch (dmd.metric)
146  {
147  case bm::COUNT_AND:
148  dmd.result += gap_count_and(g1, g2);
149  break;
150  case bm::COUNT_OR:
151  dmd.result += gap_count_or(g1, g2);
152  break;
153  case bm::COUNT_SUB_AB:
154  dmd.result += gap_count_sub(g1, g2);
155  break;
156  case bm::COUNT_SUB_BA:
157  dmd.result += gap_count_sub(g2, g1);
158  break;
159  case bm::COUNT_XOR:
160  dmd.result += gap_count_xor(g1, g2);
161  break;
162  case bm::COUNT_A:
163  dmd.result += gap_bit_count_unr(g1);
164  break;
165  case bm::COUNT_B:
166  dmd.result += gap_bit_count_unr(g2);
167  break;
168  default:
169  BM_ASSERT(0);
170  } // switch
171 
172  } // for it
173 
174  return;
175 
176  }
177  else // first block - GAP, argument - BITSET
178  {
179  for (distance_metric_descriptor* it = dmit;it < dmit_end; ++it)
180  {
181  distance_metric_descriptor& dmd = *it;
182 
183  switch (dmd.metric)
184  {
185  case bm::COUNT_AND:
186  if (arg_blk)
187  dmd.result += gap_bitset_and_count(arg_blk, g1);
188  break;
189  case bm::COUNT_OR:
190  if (!arg_blk)
191  dmd.result += gap_bit_count_unr(g1);
192  else
193  dmd.result += gap_bitset_or_count(arg_blk, g1);
194  break;
195  case bm::COUNT_SUB_AB:
196  {
198 
199  gap_convert_to_bitset((bm::word_t*) temp_bit_blk, g1);
200  dmd.result +=
201  bit_operation_sub_count((bm::word_t*)temp_bit_blk, arg_blk);
202  }
203  break;
204  case bm::COUNT_SUB_BA:
205  dmd.metric = bm::COUNT_SUB_AB; // recursive call to SUB_AB
207  blk,
208  it, it+1);
209  dmd.metric = bm::COUNT_SUB_BA; // restore status quo
210  break;
211  case bm::COUNT_XOR:
212  if (!arg_blk)
213  dmd.result += gap_bit_count_unr(g1);
214  else
215  dmd.result += gap_bitset_xor_count(arg_blk, g1);
216  break;
217  case bm::COUNT_A:
218  if (g1)
219  dmd.result += gap_bit_count_unr(g1);
220  break;
221  case bm::COUNT_B:
222  if (arg_blk)
223  {
224  dmd.result +=
225  bit_block_count(arg_blk);
226  }
227  break;
228  default:
229  BM_ASSERT(0);
230  } // switch
231 
232  } // for it
233 
234  return;
235 
236  }
237  }
238  else // first block is BITSET-type
239  {
240  if (arg_gap) // second argument block is GAP-type
241  {
242  for (distance_metric_descriptor* it = dmit;it < dmit_end; ++it)
243  {
244  distance_metric_descriptor& dmd = *it;
245 
246  switch (dmd.metric)
247  {
248  case bm::COUNT_AND:
249  if (blk)
250  dmd.result += gap_bitset_and_count(blk, g2);
251  break;
252  case bm::COUNT_OR:
253  if (!blk)
254  dmd.result += gap_bit_count_unr(g2);
255  else
256  dmd.result += gap_bitset_or_count(blk, g2);
257  break;
258  case bm::COUNT_SUB_AB:
259  if (blk)
260  dmd.result += gap_bitset_sub_count(blk, g2);
261  break;
262  case bm::COUNT_SUB_BA:
263  dmd.metric = bm::COUNT_SUB_AB; // recursive call to SUB_AB
265  //arg_gap,
266  blk,
267  //gap,
268  it, it+1);
269  dmd.metric = bm::COUNT_SUB_BA; // restore status quo
270  break;
271  case bm::COUNT_XOR:
272  if (!blk)
273  dmd.result += gap_bit_count_unr(g2);
274  else
275  dmd.result += gap_bitset_xor_count(blk, g2);
276  break;
277  case bm::COUNT_A:
278  if (blk)
279  {
280  dmd.result +=
281  bit_block_count(blk);
282  }
283  break;
284  case bm::COUNT_B:
285  if (g2)
286  dmd.result += gap_bit_count_unr(g2);
287  break;
288  default:
289  BM_ASSERT(0);
290  } // switch
291 
292  } // for it
293 
294  return;
295  }
296  }
297 
298  // --------------------------------------------
299  //
300  // Here we combine two plain bitblocks
301 
302  for (distance_metric_descriptor* it = dmit; it < dmit_end; ++it)
303  {
304  distance_metric_descriptor& dmd = *it;
307  if (gfunc)
308  {
309  dmd.result += (*gfunc)(blk, arg_blk);
310  }
311  else
312  {
313  switch (dmd.metric)
314  {
315  case bm::COUNT_A:
316  if (blk)
317  dmd.result += bm::bit_block_count(blk);
318  break;
319  case bm::COUNT_B:
320  if (arg_blk)
321  dmd.result += bm::bit_block_count(arg_blk);
322  break;
323  case bm::COUNT_AND:
324  case bm::COUNT_XOR:
325  case bm::COUNT_OR:
326  case bm::COUNT_SUB_AB:
327  case bm::COUNT_SUB_BA:
328  default:
329  BM_ASSERT(0);
330  } // switch
331  }
332 
333  } // for it
334 }
335 
336 /*!
337 \brief Internal function computes AND distance.
338 \internal
339 \ingroup distance
340 */
341 inline
343  const bm::word_t* arg_blk)
344 {
345  unsigned gap = BM_IS_GAP(blk);
346  unsigned arg_gap = BM_IS_GAP(arg_blk);
347  if (gap) // first block GAP-type
348  {
349  if (arg_gap) // both blocks GAP-type
350  {
351  return gap_count_and(BMGAP_PTR(blk), BMGAP_PTR(arg_blk));
352  }
353  else // first block - GAP, argument - BITSET
354  {
355  return gap_bitset_and_count(arg_blk, BMGAP_PTR(blk));
356  }
357  }
358  else // first block is BITSET-type
359  {
360  if (arg_gap) // second argument block is GAP-type
361  {
362  return gap_bitset_and_count(blk, BMGAP_PTR(arg_blk));
363  }
364  }
365 
366  // --------------------------------------------
367  // Here we combine two plain bitblocks
368 
369  return bit_operation_and_count(blk, arg_blk);
370 }
371 
372 
373 /*!
374  \brief Internal function computes different existense of distance metric.
375  \internal
376  \ingroup distance
377 */
378 inline
380  unsigned gap,
381  const bm::word_t* arg_blk,
382  unsigned arg_gap,
384  distance_metric_descriptor* dmit_end)
385 
386 {
387  gap_word_t* res=0;
388 
389  gap_word_t* g1 = BMGAP_PTR(blk);
390  gap_word_t* g2 = BMGAP_PTR(arg_blk);
391 
392  if (gap) // first block GAP-type
393  {
394  if (arg_gap) // both blocks GAP-type
395  {
396  gap_word_t tmp_buf[bm::gap_max_buff_len * 3]; // temporary result
397 
398  for (distance_metric_descriptor* it = dmit;it < dmit_end; ++it)
399  {
400  distance_metric_descriptor& dmd = *it;
401  if (dmd.result)
402  {
403  continue;
404  }
405  res = 0;
406  unsigned dsize = 0;
407  switch (dmd.metric)
408  {
409  case bm::COUNT_AND:
410  dmd.result += gap_operation_any_and(g1, g2);
411  break;
412  case bm::COUNT_OR:
413  res = gap_operation_or(g1, g2, tmp_buf, dsize);
414  break;
415  case bm::COUNT_SUB_AB:
416  dmd.result += gap_operation_any_sub(g1, g2);
417  break;
418  case bm::COUNT_SUB_BA:
419  dmd.result += gap_operation_any_sub(g2, g1);
420  break;
421  case bm::COUNT_XOR:
422  dmd.result += gap_operation_any_xor(g1, g2);
423  break;
424  case bm::COUNT_A:
425  res = g1;
426  break;
427  case bm::COUNT_B:
428  res = g2;
429  break;
430  default:
431  BM_ASSERT(0);
432  } // switch
433  if (res)
434  dmd.result += !gap_is_all_zero(res);
435 
436  } // for it
437 
438  return;
439 
440  }
441  else // first block - GAP, argument - BITSET
442  {
443  for (distance_metric_descriptor* it = dmit;it < dmit_end; ++it)
444  {
445  distance_metric_descriptor& dmd = *it;
446  if (dmd.result)
447  {
448  continue;
449  }
450 
451  switch (dmd.metric)
452  {
453  case bm::COUNT_AND:
454  if (arg_blk)
455  dmd.result += gap_bitset_and_any(arg_blk, g1);
456  break;
457  case bm::COUNT_OR:
458  if (!arg_blk)
459  dmd.result += !gap_is_all_zero(g1);
460  else
461  dmd.result += gap_bitset_or_any(arg_blk, g1);
462  break;
463  case bm::COUNT_SUB_AB:
464  {
466  gap_convert_to_bitset((bm::word_t*) temp_blk, g1);
467  dmd.result +=
468  bit_operation_sub_any((bm::word_t*)temp_blk, arg_blk);
469  }
470  break;
471  case bm::COUNT_SUB_BA:
472  dmd.metric = bm::COUNT_SUB_AB; // recursive call to SUB_AB
474  arg_gap,
475  blk,
476  gap,
477  it, it+1);
478  dmd.metric = bm::COUNT_SUB_BA; // restore status quo
479  break;
480  case bm::COUNT_XOR:
481  if (!arg_blk)
482  dmd.result += !gap_is_all_zero(g1);
483  else
484  dmd.result += gap_bitset_xor_any(arg_blk, g1);
485  break;
486  case bm::COUNT_A:
487  if (g1)
488  dmd.result += !gap_is_all_zero(g1);
489  break;
490  case bm::COUNT_B:
491  if (arg_blk)
492  {
493  dmd.result +=
494  !bit_is_all_zero(arg_blk);
495  }
496  break;
497  default:
498  BM_ASSERT(0);
499  } // switch
500 
501  } // for it
502 
503  return;
504 
505  }
506  }
507  else // first block is BITSET-type
508  {
509  if (arg_gap) // second argument block is GAP-type
510  {
511  for (distance_metric_descriptor* it = dmit;it < dmit_end; ++it)
512  {
513  distance_metric_descriptor& dmd = *it;
514  if (dmd.result)
515  {
516  continue;
517  }
518 
519  switch (dmd.metric)
520  {
521  case bm::COUNT_AND:
522  if (blk)
523  dmd.result += gap_bitset_and_any(blk, g2);
524  break;
525  case bm::COUNT_OR:
526  if (!blk)
527  dmd.result += !gap_is_all_zero(g2);
528  else
529  dmd.result += gap_bitset_or_any(blk, g2);
530  break;
531  case bm::COUNT_SUB_AB:
532  if (blk)
533  dmd.result += gap_bitset_sub_any(blk, g2);
534  break;
535  case bm::COUNT_SUB_BA:
536  dmd.metric = bm::COUNT_SUB_AB; // recursive call to SUB_AB
538  arg_gap,
539  blk,
540  gap,
541  it, it+1);
542  dmd.metric = bm::COUNT_SUB_BA; // restore status quo
543  break;
544  case bm::COUNT_XOR:
545  if (!blk)
546  dmd.result += !gap_is_all_zero(g2);
547  else
548  dmd.result += gap_bitset_xor_any(blk, g2);
549  break;
550  case bm::COUNT_A:
551  if (blk)
552  {
553  dmd.result+=
554  !bm::bit_is_all_zero(blk);
555  }
556  break;
557  case bm::COUNT_B:
558  if (g2)
559  dmd.result += !gap_is_all_zero(g2);
560  break;
561  default:
562  BM_ASSERT(0);
563  } // switch
564 
565  } // for it
566 
567  return;
568  }
569  }
570 
571  // --------------------------------------------
572  //
573  // Here we combine two plain bitblocks
574 
575  for (distance_metric_descriptor* it = dmit; it < dmit_end; ++it)
576  {
577  distance_metric_descriptor& dmd = *it;
578  if (dmd.result)
579  {
580  continue;
581  }
582 
583  switch (dmd.metric)
584  {
585  case bm::COUNT_AND:
586  dmd.result +=
587  bit_operation_and_any(blk, arg_blk);
588  break;
589  case bm::COUNT_OR:
590  dmd.result +=
591  bit_operation_or_any(blk, arg_blk);
592  break;
593  case bm::COUNT_SUB_AB:
594  dmd.result +=
595  bit_operation_sub_any(blk, arg_blk);
596  break;
597  case bm::COUNT_SUB_BA:
598  dmd.result +=
599  bit_operation_sub_any(arg_blk, blk);
600  break;
601  case bm::COUNT_XOR:
602  dmd.result +=
603  bit_operation_xor_any(blk, arg_blk);
604  break;
605  case bm::COUNT_A:
606  if (blk)
607  dmd.result += !bit_is_all_zero(blk);
608  break;
609  case bm::COUNT_B:
610  if (arg_blk)
611  dmd.result += !bit_is_all_zero(arg_blk);
612  break;
613  default:
614  BM_ASSERT(0);
615  } // switch
616 
617  } // for it
618 }
619 
620 
621 
622 /*!
623  Convenience internal function to compute combine count for one metric
624  \internal
625  \ingroup distance
626 */
627 inline
628 unsigned
630  const bm::word_t* arg_blk,
632 {
633  distance_metric_descriptor dmd(metric);
635  arg_blk, //arg_gap,
636  &dmd, &dmd+1);
637  return unsigned(dmd.result);
638 }
639 
640 
641 /*!
642  Convenience internal function to compute combine any for one metric
643  \internal
644  \ingroup distance
645 */
646 inline
649  unsigned gap,
650  const bm::word_t* arg_blk,
651  unsigned arg_gap,
653 {
654  distance_metric_descriptor dmd(metric);
656  arg_blk, arg_gap,
657  &dmd, &dmd+1);
658  return dmd.result;
659 }
660 
661 /*!
662  \brief Staging function for distance operation
663 
664  \return temp block allocated (or NULL)
665 
666  \internal
667 */
668 inline
670  const distance_metric_descriptor* dmit_end,
671  bool* is_all_and)
672 {
673  for (const distance_metric_descriptor* it = dmit; it < dmit_end; ++it)
674  {
675  if (it->metric != bm::COUNT_AND)
676  {
677  *is_all_and = false;
678  }
679  } // for
680 }
681 
682 /*!
683  \brief Distance computing template function.
684 
685  Function receives two bitvectors and an array of distance metrics
686  (metrics pipeline). Function computes all metrics saves result into
687  corresponding pipeline results (distance_metric_descriptor::result)
688  An important detail is that function reuses metric descriptors,
689  incrementing received values. It allows you to accumulate results
690  from different calls in the pipeline.
691 
692  \param bv1 - argument bitvector 1 (A)
693  \param bv2 - argument bitvector 2 (B)
694  \param dmit - pointer to first element of metric descriptors array
695  Input-Output parameter, receives metric code as input,
696  computation is added to "result" field
697  \param dmit_end - pointer to (last+1) element of metric descriptors array
698  \ingroup distance
699 
700 */
701 template<class BV>
702 void distance_operation(const BV& bv1,
703  const BV& bv2,
705  distance_metric_descriptor* dmit_end)
706 {
707  const typename BV::blocks_manager_type& bman1 = bv1.get_blocks_manager();
708  const typename BV::blocks_manager_type& bman2 = bv2.get_blocks_manager();
709 
710  bool is_all_and = true; // flag is distance operation is just COUNT_AND
711  distance_stage(dmit, dmit_end, &is_all_and);
712 
713  bm::word_t*** blk_root = bman1.top_blocks_root();
714  typename BV::size_type block_idx = 0;
715  unsigned i, j;
716 
717  const bm::word_t* blk;
718  const bm::word_t* arg_blk;
719 
720  unsigned top_block_size = bman1.top_block_size();
721  unsigned ebs2 = bman2.top_block_size();
722  unsigned top_size;
723  if (ebs2 > top_block_size)
724  top_size = ebs2;
725  else
726  top_size = top_block_size;
727 
728  for (i = 0; i < top_size; ++i)
729  {
730  bm::word_t** blk_blk = (blk_root && (i < top_block_size)) ? blk_root[i] : 0;
731  if (!blk_blk)
732  {
733  // AND operation requested - we can skip this portion here
734  if (is_all_and)
735  {
736  block_idx += bm::set_sub_array_size;
737  continue;
738  }
739  const bm::word_t* const* bvbb = bman2.get_topblock(i);
740  if (bvbb == 0)
741  {
742  block_idx += bm::set_sub_array_size;
743  continue;
744  }
745 
746  blk = 0;
747  for (j = 0; j < bm::set_sub_array_size; ++j,++block_idx)
748  {
749  arg_blk = bman2.get_block(i, j);
750  if (!arg_blk)
751  continue;
753  arg_blk,
754  dmit, dmit_end);
755  } // for j
756  continue;
757  }
758 
759  for (j = 0; j < bm::set_sub_array_size; ++j, ++block_idx)
760  {
761  blk = bman1.get_block(i, j);
762  if (blk == 0 && is_all_and)
763  continue;
764 
765  arg_blk = bman2.get_block(i, j);
766 
767  if (!blk & !arg_blk)
768  continue;
769 
771  arg_blk,
772  dmit, dmit_end);
773  } // for j
774 
775  } // for i
776 }
777 
778 
779 /*!
780 \brief Distance AND computing template function.
781 
782 
783 \param bv1 - argument bitvector 1 (A)
784 \param bv2 - argument bitvector 2 (B)
785 \ingroup distance
786 
787 */
788 template<class BV>
789 typename BV::size_type distance_and_operation(const BV& bv1,
790  const BV& bv2)
791 {
792  const typename BV::blocks_manager_type& bman1 = bv1.get_blocks_manager();
793  const typename BV::blocks_manager_type& bman2 = bv2.get_blocks_manager();
794 
795  if (!bman1.is_init() || !bman2.is_init())
796  return 0;
797 
798  bm::word_t*** blk_root = bman1.top_blocks_root();
799  bm::word_t*** blk_root_arg = bman2.top_blocks_root();
800  typename BV::size_type count = 0;
801 
802  unsigned top_block_size =
803  bm::min_value(bman1.top_block_size(),bman2.top_block_size());
804 
805  for (unsigned i = 0; i < top_block_size; ++i)
806  {
807  bm::word_t** blk_blk;
808  bm::word_t** blk_blk_arg;
809  if ((blk_blk = blk_root[i]) == 0 || (blk_blk_arg= blk_root_arg[i]) == 0)
810  continue;
811 
812  if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR)
813  blk_blk = FULL_SUB_BLOCK_REAL_ADDR;
814  if ((bm::word_t*)blk_blk_arg == FULL_BLOCK_FAKE_ADDR)
815  blk_blk_arg = FULL_SUB_BLOCK_REAL_ADDR;
816 
817  for (unsigned j = 0; j < bm::set_sub_array_size; j+=4)
818  {
819  (blk_blk[j] && blk_blk_arg[j]) ?
820  count += combine_count_and_operation_with_block(BLOCK_ADDR_SAN(blk_blk[j]), BLOCK_ADDR_SAN(blk_blk_arg[j]))
821  :0;
822  (blk_blk[j+1] && blk_blk_arg[j+1]) ?
823  count += combine_count_and_operation_with_block(BLOCK_ADDR_SAN(blk_blk[j+1]), BLOCK_ADDR_SAN(blk_blk_arg[j+1]))
824  :0;
825  (blk_blk[j+2] && blk_blk_arg[j+2]) ?
826  count += combine_count_and_operation_with_block(BLOCK_ADDR_SAN(blk_blk[j+2]), BLOCK_ADDR_SAN(blk_blk_arg[j+2]))
827  :0;
828  (blk_blk[j+3] && blk_blk_arg[j+3]) ?
829  count += combine_count_and_operation_with_block(BLOCK_ADDR_SAN(blk_blk[j+3]), BLOCK_ADDR_SAN(blk_blk_arg[j+3]))
830  :0;
831  } // for j
832 
833  } // for i
834  return count;
835 }
836 
837 
838 /*!
839  \brief Distance screening template function.
840 
841  Function receives two bitvectors and an array of distance metrics
842  (metrics pipeline). Function computes possybility of a metric(any bit)
843  saves result into corresponding pipeline results
844  (distance_metric_descriptor::result)
845  An important detail is that function reuses metric descriptors,
846  incrementing received values. It allows you to accumulate results
847  from different calls in the pipeline.
848 
849  \param bv1 - argument bitvector 1 (A)
850  \param bv2 - argument bitvector 2 (B)
851  \param dmit - pointer to first element of metric descriptors array
852  Input-Output parameter, receives metric code as input,
853  computation is added to "result" field
854  \param dmit_end - pointer to (last+1) element of metric descriptors array
855  \ingroup distance
856 */
857 template<class BV>
858 void distance_operation_any(const BV& bv1,
859  const BV& bv2,
861  distance_metric_descriptor* dmit_end)
862 {
863  const typename BV::blocks_manager_type& bman1 = bv1.get_blocks_manager();
864  const typename BV::blocks_manager_type& bman2 = bv2.get_blocks_manager();
865 
866  bool is_all_and = true; // flag is distance operation is just COUNT_AND
867  distance_stage(dmit, dmit_end, &is_all_and);
868 
869  bm::word_t*** blk_root = bman1.top_blocks_root();
870  unsigned block_idx = 0;
871  unsigned i, j;
872 
873  const bm::word_t* blk;
874  const bm::word_t* arg_blk;
875  bool blk_gap;
876  bool arg_gap;
877 
878  unsigned top_block_size = bman1.top_block_size();
879  unsigned ebs2 = bman2.top_block_size();
880  unsigned top_size;
881  if (ebs2 > top_block_size)
882  top_size = ebs2;
883  else
884  top_size = top_block_size;
885 
886  for (i = 0; i < top_size; ++i)
887  {
888  bm::word_t** blk_blk = (blk_root && (i < top_block_size)) ? blk_root[i] : 0;
889  if (blk_blk == 0) // not allocated
890  {
891  // AND operation requested - we can skip this portion here
892  if (is_all_and)
893  {
894  block_idx += bm::set_sub_array_size;
895  continue;
896  }
897 
898  const bm::word_t* const* bvbb = bman2.get_topblock(i);
899  if (bvbb == 0)
900  {
901  block_idx += bm::set_sub_array_size;
902  continue;
903  }
904 
905  blk = 0;
906  blk_gap = false;
907 
908  for (j = 0; j < bm::set_sub_array_size; ++j,++block_idx)
909  {
910  arg_blk = bman2.get_block(i, j);
911  if (!arg_blk)
912  continue;
913  arg_gap = BM_IS_GAP(arg_blk);
914 
916  arg_blk, arg_gap,
917  dmit, dmit_end);
918 
919  // check if all distance requests alredy resolved
920  bool all_resolved = false;
922  do
923  {
924  if (!it->result)
925  {
926  all_resolved = false;
927  break;
928  }
929  ++it;
930  } while (it < dmit_end);
931  if (all_resolved)
932  return;
933  } // for j
934 
935  continue;
936  }
937 
938  for (j = 0; j < bm::set_sub_array_size; ++j, ++block_idx)
939  {
940  blk = bman1.get_block(i, j);
941  if (blk == 0 && is_all_and)
942  continue;
943 
944  arg_blk = bman2.get_block(i, j);
945 
946  if (blk == 0 && arg_blk == 0)
947  continue;
948 
949  arg_gap = BM_IS_GAP(arg_blk);
950  blk_gap = BM_IS_GAP(blk);
951 
953  arg_blk, arg_gap,
954  dmit, dmit_end);
955 
956  // check if all distance requests alredy resolved
957  bool all_resolved = true;
959  do
960  {
961  if (!it->result)
962  {
963  all_resolved = false;
964  break;
965  }
966  ++it;
967  } while (it < dmit_end);
968  if (all_resolved)
969  return;
970 
971  } // for j
972 
973  } // for i
974 }
975 
976 
977 
978 /*!
979  \brief Computes bitcount of AND operation of two bitsets
980  \param bv1 - Argument bit-vector.
981  \param bv2 - Argument bit-vector.
982  \return bitcount of the result
983  \ingroup distance
984 */
985 template<class BV>
986 typename BV::size_type count_and(const BV& bv1, const BV& bv2)
987 {
988  return distance_and_operation(bv1, bv2);
989 }
990 
991 /*!
992  \brief Computes if there is any bit in AND operation of two bitsets
993  \param bv1 - Argument bit-vector.
994  \param bv2 - Argument bit-vector.
995  \return non zero value if there is any bit
996  \ingroup distance
997 */
998 template<class BV>
999 typename BV::size_type any_and(const BV& bv1, const BV& bv2)
1000 {
1002 
1003  distance_operation_any(bv1, bv2, &dmd, &dmd+1);
1004  return dmd.result;
1005 }
1006 
1007 
1008 
1009 /*!
1010  \brief Computes bitcount of XOR operation of two bitsets
1011  \param bv1 - Argument bit-vector.
1012  \param bv2 - Argument bit-vector.
1013  \return bitcount of the result
1014  \ingroup distance
1015 */
1016 template<class BV>
1018 count_xor(const BV& bv1, const BV& bv2)
1019 {
1021 
1022  distance_operation(bv1, bv2, &dmd, &dmd+1);
1023  return dmd.result;
1024 }
1025 
1026 /*!
1027  \brief Computes if there is any bit in XOR operation of two bitsets
1028  \param bv1 - Argument bit-vector.
1029  \param bv2 - Argument bit-vector.
1030  \return non zero value if there is any bit
1031  \ingroup distance
1032 */
1033 template<class BV>
1034 typename BV::size_type any_xor(const BV& bv1, const BV& bv2)
1035 {
1037 
1038  distance_operation_any(bv1, bv2, &dmd, &dmd+1);
1039  return dmd.result;
1040 }
1041 
1042 
1043 
1044 /*!
1045  \brief Computes bitcount of SUB operation of two bitsets
1046  \param bv1 - Argument bit-vector.
1047  \param bv2 - Argument bit-vector.
1048  \return bitcount of the result
1049  \ingroup distance
1050 */
1051 template<class BV>
1052 typename BV::size_type count_sub(const BV& bv1, const BV& bv2)
1053 {
1055 
1056  distance_operation(bv1, bv2, &dmd, &dmd+1);
1057  return dmd.result;
1058 }
1059 
1060 
1061 /*!
1062  \brief Computes if there is any bit in SUB operation of two bitsets
1063  \param bv1 - Argument bit-vector.
1064  \param bv2 - Argument bit-vector.
1065  \return non zero value if there is any bit
1066  \ingroup distance
1067 */
1068 template<class BV>
1069 typename BV::size_type any_sub(const BV& bv1, const BV& bv2)
1070 {
1072 
1073  distance_operation_any(bv1, bv2, &dmd, &dmd+1);
1074  return dmd.result;
1075 }
1076 
1077 
1078 /*!
1079  \brief Computes bitcount of OR operation of two bitsets
1080  \param bv1 - Argument bit-vector.
1081  \param bv2 - Argument bit-vector.
1082  \return bitcount of the result
1083  \ingroup distance
1084 */
1085 template<class BV>
1086 typename BV::size_type count_or(const BV& bv1, const BV& bv2)
1087 {
1089 
1090  distance_operation(bv1, bv2, &dmd, &dmd+1);
1091  return dmd.result;
1092 }
1093 
1094 /*!
1095  \brief Computes if there is any bit in OR operation of two bitsets
1096  \param bv1 - Argument bit-vector.
1097  \param bv2 - Argument bit-vector.
1098  \return non zero value if there is any bit
1099  \ingroup distance
1100 */
1101 template<class BV>
1102 typename BV::size_type any_or(const BV& bv1, const BV& bv2)
1103 {
1105 
1106  distance_operation_any(bv1, bv2, &dmd, &dmd+1);
1107  return dmd.result;
1108 }
1109 
1110 
1111 /**
1112  \brief Internal algorithms scans the input for the block range limit
1113  \internal
1114 */
1115 template<typename It, typename SIZE_TYPE>
1116 It block_range_scan(It first, It last, SIZE_TYPE nblock, SIZE_TYPE* max_id)
1117 {
1118  SIZE_TYPE m = *max_id;
1119  It right;
1120  for (right = first; right != last; ++right)
1121  {
1122  SIZE_TYPE v = SIZE_TYPE(*right);
1123  BM_ASSERT(v < bm::id_max);
1124  if (v >= m)
1125  m = v;
1126  SIZE_TYPE nb = v >> bm::set_block_shift;
1127  if (nb != nblock)
1128  break;
1129  }
1130  *max_id = m;
1131  return right;
1132 }
1133 
1134 /**
1135  \brief OR Combine bitvector and the iterable sequence
1136 
1137  Algorithm combines bvector with sequence of integers
1138  (represents another set). When the agrument set is sorted
1139  this method can give serious increase in performance.
1140 
1141  \param bv - destination bitvector
1142  \param first - first element of the iterated sequence
1143  \param last - last element of the iterated sequence
1144 
1145  \ingroup setalgo
1146 */
1147 template<class BV, class It>
1148 void combine_or(BV& bv, It first, It last)
1149 {
1150  typedef typename BV::size_type size_type;
1151  typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
1152  if (!bman.is_init())
1153  bman.init_tree();
1154 
1155  size_type max_id = 0;
1156 
1157  while (first < last)
1158  {
1159  typename BV::block_idx_type nblock = (*first) >> bm::set_block_shift;
1160  It right = bm::block_range_scan(first, last, nblock, &max_id);
1161  if (max_id >= bv.size())
1162  {
1163  BM_ASSERT(max_id < bm::id_max);
1164  bv.resize(max_id + 1);
1165  }
1166 
1167  // now we have one in-block array of bits to set
1168 
1169  label1:
1170 
1171  int block_type;
1172  bm::word_t* blk =
1173  bman.check_allocate_block(nblock,
1174  true,
1175  bv.get_new_blocks_strat(),
1176  &block_type);
1177  if (!blk)
1178  continue;
1179 
1180  if (block_type == 1) // gap
1181  {
1182  bm::gap_word_t* gap_blk = BMGAP_PTR(blk);
1183  unsigned threshold = bm::gap_limit(gap_blk, bman.glen());
1184 
1185  for (; first < right; ++first)
1186  {
1187  unsigned is_set;
1188  unsigned nbit = (*first) & bm::set_block_mask;
1189 
1190  unsigned new_block_len =
1191  bm::gap_set_value(true, gap_blk, nbit, &is_set);
1192  if (new_block_len > threshold)
1193  {
1194  bman.extend_gap_block(nblock, gap_blk);
1195  ++first;
1196  goto label1; // block pointer changed - goto reset
1197  }
1198  }
1199  }
1200  else // bit block
1201  {
1202  for (; first < right; ++first)
1203  {
1204  size_type pos = *first;
1205  unsigned nbit = unsigned(pos & bm::set_block_mask);
1206  unsigned nword = unsigned(nbit >> bm::set_word_shift);
1207  nbit &= bm::set_word_mask;
1208  blk[nword] |= (1u << nbit);
1209  } // for
1210  }
1211  } // while
1212 }
1213 
1214 
1215 /**
1216  \brief XOR Combine bitvector and the iterable sequence
1217 
1218  Algorithm combines bvector with sequence of integers
1219  (represents another set). When the agrument set is sorted
1220  this method can give serious increase in performance.
1221 
1222  \param bv - destination bitvector
1223  \param first - first element of the iterated sequence
1224  \param last - last element of the iterated sequence
1225 
1226  \ingroup setalgo
1227 */
1228 template<class BV, class It>
1229 void combine_xor(BV& bv, It first, It last)
1230 {
1231  typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
1232  if (!bman.is_init())
1233  bman.init_tree();
1234 
1235  typename BV::size_type max_id = 0;
1236 
1237  while (first < last)
1238  {
1239  typename BV::block_idx_type nblock = ((*first) >> bm::set_block_shift);
1240  It right = block_range_scan(first, last, nblock, &max_id);
1241 
1242  if (max_id >= bv.size())
1243  {
1244  BM_ASSERT(max_id < bm::id_max);
1245  bv.resize(max_id + 1);
1246  }
1247 
1248  // now we have one in-block array of bits to set
1249 
1250  label1:
1251 
1252  int block_type;
1253  bm::word_t* blk =
1254  bman.check_allocate_block(nblock,
1255  true,
1256  bv.get_new_blocks_strat(),
1257  &block_type,
1258  false /* no null return */);
1259  BM_ASSERT(blk);
1260 
1261  if (block_type == 1) // gap
1262  {
1263  bm::gap_word_t* gap_blk = BMGAP_PTR(blk);
1264  unsigned threshold = bm::gap_limit(gap_blk, bman.glen());
1265 
1266  for (; first < right; ++first)
1267  {
1268  unsigned is_set;
1269  unsigned nbit = (*first) & bm::set_block_mask;
1270 
1271  is_set = bm::gap_test_unr(gap_blk, nbit);
1272  BM_ASSERT(is_set <= 1);
1273  is_set ^= 1;
1274 
1275  unsigned new_block_len =
1276  gap_set_value(is_set, gap_blk, nbit, &is_set);
1277  if (new_block_len > threshold)
1278  {
1279  bman.extend_gap_block(nblock, gap_blk);
1280  ++first;
1281  goto label1; // block pointer changed - goto reset
1282  }
1283  }
1284  }
1285  else // bit block
1286  {
1287  for (; first < right; ++first)
1288  {
1289  unsigned nbit = unsigned(*first & bm::set_block_mask);
1290  unsigned nword = unsigned(nbit >> bm::set_word_shift);
1291  nbit &= bm::set_word_mask;
1292  blk[nword] ^= (1u << nbit);
1293  } // for
1294  }
1295  } // while
1296 
1297  bv.forget_count();
1298 }
1299 
1300 
1301 
1302 /**
1303  \brief SUB Combine bitvector and the iterable sequence
1304 
1305  Algorithm combines bvector with sequence of integers
1306  (represents another set). When the agrument set is sorted
1307  this method can give serious increase in performance.
1308 
1309  \param bv - destination bitvector
1310  \param first - first element of the iterated sequence
1311  \param last - last element of the iterated sequence
1312 
1313  \ingroup setalgo
1314 */
1315 template<class BV, class It>
1316 void combine_sub(BV& bv, It first, It last)
1317 {
1318  typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
1319  if (!bman.is_init())
1320  bman.init_tree();
1321 
1322  typename BV::size_type max_id = 0;
1323 
1324  while (first < last)
1325  {
1326  typename BV::block_idx_type nblock = (*first) >> bm::set_block_shift;
1327  It right = bm::block_range_scan(first, last, nblock, &max_id);
1328 
1329  if (max_id >= bv.size())
1330  {
1331  BM_ASSERT(max_id < bm::id_max);
1332  bv.resize(max_id + 1);
1333  }
1334 
1335  // now we have one in-block array of bits to set
1336 
1337  label1:
1338 
1339  int block_type;
1340  bm::word_t* blk =
1341  bman.check_allocate_block(nblock,
1342  false,
1343  bv.get_new_blocks_strat(),
1344  &block_type);
1345 
1346  if (!blk)
1347  continue;
1348 
1349  if (block_type == 1) // gap
1350  {
1351  bm::gap_word_t* gap_blk = BMGAP_PTR(blk);
1352  unsigned threshold = bm::gap_limit(gap_blk, bman.glen());
1353 
1354  for (; first < right; ++first)
1355  {
1356  unsigned is_set;
1357  unsigned nbit = (*first) & bm::set_block_mask;
1358 
1359  is_set = bm::gap_test_unr(gap_blk, nbit);
1360  if (!is_set)
1361  continue;
1362 
1363  unsigned new_block_len =
1364  gap_set_value(false, gap_blk, nbit, &is_set);
1365  if (new_block_len > threshold)
1366  {
1367  bman.extend_gap_block(nblock, gap_blk);
1368  ++first;
1369  goto label1; // block pointer changed - goto reset
1370  }
1371  }
1372  }
1373  else // bit block
1374  {
1375  for (; first < right; ++first)
1376  {
1377  unsigned nbit = unsigned(*first & bm::set_block_mask);
1378  unsigned nword = unsigned(nbit >> bm::set_word_shift);
1379  nbit &= bm::set_word_mask;
1380  blk[nword] &= ~((bm::word_t)1 << nbit);
1381  } // for
1382  }
1383  } // while
1384 
1385  bv.forget_count();
1386 }
1387 
1388 /**
1389  \brief AND Combine bitvector and the iterable sequence
1390 
1391  Algorithm combines bvector with sorted sequence of integers
1392  (represents another set).
1393 
1394  \param bv - destination bitvector
1395  \param first - first element of the iterated sequence
1396  \param last - last element of the iterated sequence
1397 
1398  \ingroup setalgo
1399 */
1400 template<class BV, class It>
1401 void combine_and_sorted(BV& bv, It first, It last)
1402 {
1403  typename BV::size_type prev = 0;
1404  for ( ;first < last; ++first)
1405  {
1406  typename BV::size_type id = *first;
1407  BM_ASSERT(id >= prev); // make sure it's sorted
1408  bv.set_bit_and(id, true);
1409  if (++prev < id)
1410  {
1411  bv.set_range(prev, id-1, false);
1412  }
1413  prev = id;
1414  }
1415 }
1416 
1417 
1418 /**
1419  \brief AND Combine bitvector and the iterable sequence
1420 
1421  Algorithm combines bvector with sequence of integers
1422  (represents another set). When the agrument set is sorted
1423  this method can give serious increase in performance.
1424 
1425  \param bv - destination bitvector
1426  \param first - first element of the iterated sequence
1427  \param last - last element of the iterated sequence
1428 
1429  \ingroup setalgo
1430  \sa combine_and_sorted
1431 */
1432 template<class BV, class It>
1433 void combine_and(BV& bv, It first, It last)
1434 {
1435  BV bv_tmp;
1436  combine_or(bv_tmp, first, last);
1437  bv &= bv_tmp;
1438 }
1439 
1440 /*!
1441  \brief Compute number of bit intervals (GAPs) in the bitvector
1442 
1443  Algorithm traverses bit vector and count number of uninterrupted
1444  intervals of 1s and 0s.
1445  <pre>
1446  For example:
1447  empty vector - 1 interval
1448  00001111100000 - gives us 3 intervals
1449  10001111100000 - 4 intervals
1450  00000000000000 - 1 interval
1451  11111111111111 - 1 interval
1452  </pre>
1453  \return Number of intervals
1454  \ingroup setalgo
1455 */
1456 template<class BV>
1457 typename BV::size_type count_intervals(const BV& bv)
1458 {
1459  const typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
1460 
1461  if (!bman.is_init())
1462  return 1;
1463 
1464  bm::word_t*** blk_root = bman.top_blocks_root();
1465  typename BV::blocks_manager_type::block_count_change_func func(bman);
1466  typename BV::blocks_manager_type::block_idx_type st = 0;
1467  bm::for_each_block(blk_root, bman.top_block_size(), func, st);
1468 
1469  return func.count();
1470 }
1471 
1472 /*!
1473  \brief Export bitset from an array of binary data representing
1474  the bit vector.
1475 
1476  Input array can be array of ints, chars or other native C types.
1477  Method works with iterators, which makes it compatible with
1478  STL containers and C arrays
1479 
1480  \param bv - destination bitvector
1481  \param first - first element of the iterated sequence
1482  \param last - last element of the iterated sequence
1483 
1484  \ingroup setalgo
1485 */
1486 template<typename BV, class It>
1487 void export_array(BV& bv, It first, It last)
1488 {
1489  typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
1490  if (!bman.is_init())
1491  bman.init_tree();
1492 
1493  unsigned inp_word_size = sizeof(*first);
1494  size_t array_size = size_t(last - first);
1495  size_t bit_cnt = array_size * inp_word_size * 8;
1496  int block_type;
1497  bm::word_t tmp;
1498  bm::word_t b1, b2, b3, b4;
1499 
1500  if (bit_cnt >= bv.size())
1501  bv.resize((bm::id_t)bit_cnt + 1);
1502  else
1503  bv.set_range((typename BV::size_type)bit_cnt, bv.size() - 1, false);
1504  switch (inp_word_size)
1505  {
1506  case 1:
1507  {
1508  size_t word_cnt = array_size / 4;
1509  for (typename BV::block_idx_type i = 0; i < bm::set_total_blocks; ++i)
1510  {
1511  bm::word_t* blk =
1512  bman.check_allocate_block(i,
1513  false,
1514  BM_BIT,
1515  &block_type,
1516  false /*no NULL ret*/);
1517  if (block_type == 1) // gap
1518  {
1519  blk = bman.deoptimize_block(i);
1520  }
1521 
1522  bm::word_t* wrd_ptr = blk;
1523  if (word_cnt >= bm::set_block_size) {
1524  bm::word_t* wrd_end = blk + bm::set_block_size;
1525  do {
1526  b1 = bm::word_t(*first++); b2 = bm::word_t(*first++);
1527  b3 = bm::word_t(*first++); b4 = bm::word_t(*first++);
1528  tmp = b1 | (b2 << 8u) | (b3 << 16u) | (b4 << 24u);
1529  *wrd_ptr++ = tmp;
1530  } while (wrd_ptr < wrd_end);
1531  word_cnt -= bm::set_block_size;
1532  }
1533  else
1534  {
1535  size_t to_convert = size_t(last - first);
1536  for (size_t j = 0; j < to_convert / 4; ++j)
1537  {
1538  b1 = bm::word_t(*first++); b2 = bm::word_t(*first++);
1539  b3 = bm::word_t(*first++); b4 = bm::word_t(*first++);
1540  tmp = b1 | (b2 << 8u) | (b3 << 16u) | (b4 << 24u);
1541  *wrd_ptr++ = tmp;
1542  }
1543  while (1)
1544  {
1545  if (first == last) break;
1546  *wrd_ptr = bm::word_t(*first++);
1547  if (first == last) break;
1548  *wrd_ptr |= bm::word_t(*first++) << 8u;
1549  if (first == last) break;
1550  *wrd_ptr |= bm::word_t(*first++) << 16u;
1551  if (first == last) break;
1552  *wrd_ptr |= bm::word_t(*first++) << 24u;
1553  ++wrd_ptr;
1554  }
1555  }
1556  if (first == last) break;
1557  } // for
1558  }
1559  break;
1560  case 2:
1561  {
1562  size_t word_cnt = array_size / 2;
1563  for (typename BV::block_idx_type i = 0; i < bm::set_total_blocks; ++i)
1564  {
1565  bm::word_t* blk =
1566  bman.check_allocate_block(i,
1567  false,
1568  BM_BIT,
1569  &block_type,
1570  false /*no NULL ret*/);
1571  if (block_type) // gap
1572  blk = bman.deoptimize_block(i);
1573 
1574  bm::word_t* wrd_ptr = blk;
1575  if (word_cnt >= bm::set_block_size) {
1576  bm::word_t* wrd_end = blk + bm::set_block_size;
1577  do {
1578  b1 = bm::word_t(*first++); b2 = bm::word_t(*first++);
1579  tmp = b1 | (b2 << 16);
1580  *wrd_ptr++ = tmp;
1581  } while (wrd_ptr < wrd_end);
1582  word_cnt -= bm::set_block_size;
1583  }
1584  else
1585  {
1586  size_t to_convert = size_t(last - first);
1587  for (unsigned j = 0; j < to_convert / 2; ++j)
1588  {
1589  b1 = bm::word_t(*first++); b2 = bm::word_t(*first++);
1590  tmp = b1 | (b2 << 16u);
1591  *wrd_ptr++ = tmp;
1592  }
1593  while (1)
1594  {
1595  if (first == last) break;
1596  *wrd_ptr = bm::word_t(*first++);
1597  if (first == last) break;
1598  *wrd_ptr |= bm::word_t((*first++) << 16u);
1599  ++wrd_ptr;
1600  }
1601  }
1602  if (first == last) break;
1603  } // for
1604  }
1605  break;
1606  case 4:
1607  {
1608  size_t word_cnt = array_size;
1609  for (typename BV::block_idx_type i = 0; i < bm::set_total_blocks; ++i)
1610  {
1611  bm::word_t* blk =
1612  bman.check_allocate_block(i,
1613  false,
1614  BM_BIT,
1615  &block_type,
1616  false /*no NULL ret*/);
1617  if (block_type == 1) // gap
1618  blk = bman.deoptimize_block(i);
1619 
1620  bm::word_t* wrd_ptr = blk;
1621  if (word_cnt >= bm::set_block_size) {
1622  bm::word_t* wrd_end = blk + bm::set_block_size;
1623  do
1624  {
1625  *wrd_ptr++ = bm::word_t(*first++);
1626  } while (wrd_ptr < wrd_end);
1627  word_cnt -= bm::set_block_size;
1628  }
1629  else
1630  {
1631  while (1)
1632  {
1633  if (first == last) break;
1634  *wrd_ptr = bm::word_t(*first++);
1635  ++wrd_ptr;
1636  }
1637  }
1638  if (first == last) break;
1639  } // for
1640  }
1641  break;
1642  default:
1643  BM_ASSERT(0);
1644  } // switch
1645 
1646 }
1647 
1648 
1649 /*!
1650  \brief for-each visitor, calls a special visitor functor for each 1 bit group
1651 
1652  \param block - bit block buffer pointer
1653  \param offset - global block offset (number of bits)
1654  \param bit_functor - functor must support .add_bits(offset, bits_ptr, size)
1655 
1656  @ingroup bitfunc
1657  @internal
1658 */
1659 template<typename Func, typename SIZE_TYPE>
1660 void for_each_bit_blk(const bm::word_t* block, SIZE_TYPE offset,
1661  Func& bit_functor)
1662 {
1663  if (IS_FULL_BLOCK(block))
1664  {
1665  bit_functor.add_range(offset, bm::gap_max_bits);
1666  return;
1667  }
1668  unsigned char bits[bm::set_bitscan_wave_size*32];
1669 
1670  SIZE_TYPE offs = offset;
1671  unsigned cnt;
1672  const word_t* block_end = block + bm::set_block_size;
1673  do
1674  {
1675  cnt = bm::bitscan_wave(block, bits);
1676  if (cnt)
1677  bit_functor.add_bits(offs, bits, cnt);
1678  offs += bm::set_bitscan_wave_size * 32;
1679  block += bm::set_bitscan_wave_size;
1680  } while (block < block_end);
1681 }
1682 
1683 
1684 /*!
1685  \brief for-each visitor, calls a special visitor functor for each 1 bit range
1686 
1687  \param buf - bit block buffer pointer
1688  \param offset - global block offset (number of bits)
1689  \param bit_functor - functor must support .add_range(offset, bits_ptr, size)
1690 
1691  @ingroup gapfunc
1692  @internal
1693 */
1694 template<typename T, typename Func, typename SIZE_TYPE>
1695 void for_each_gap_blk(const T* buf, SIZE_TYPE offset,
1696  Func& bit_functor)
1697 {
1698  const T* pcurr = buf + 1;
1699  const T* pend = buf + (*buf >> 3);
1700 
1701  if (*buf & 1)
1702  {
1703  bit_functor.add_range(offset, *pcurr + 1);
1704  ++pcurr;
1705  }
1706  for (++pcurr; pcurr <= pend; pcurr += 2)
1707  {
1708  T prev = *(pcurr-1);
1709  bit_functor.add_range(offset + prev + 1, *pcurr - prev);
1710  }
1711 }
1712 
1713 
1714 } // namespace bm
1715 
1716 #ifdef _MSC_VER
1717 #pragma warning( pop )
1718 #endif
1719 
1720 #endif
bm::id_t bit_block_count(const bm::word_t *block)
Bitcount for bit block.
Definition: bmfunc.h:3972
BMFORCEINLINE unsigned gap_count_and(const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2)
GAP bitcount AND operation test.
Definition: bmfunc.h:4868
#define BM_VECT_ALIGN
Definition: bmdef.h:346
(A & B).count()
Definition: bmalgo_impl.h:59
const unsigned set_block_size
Definition: bmconst.h:54
const unsigned set_word_shift
Definition: bmconst.h:70
void for_each_block(T ***root, unsigned size1, F &f, BLOCK_IDX start)
Definition: bmfunc.h:1685
const unsigned set_sub_array_size
Definition: bmconst.h:91
BV::size_type any_sub(const BV &bv1, const BV &bv2)
Computes if there is any bit in SUB operation of two bitsets.
Definition: bmalgo_impl.h:1069
bm::id_t gap_bitset_and_any(const unsigned *block, const T *pcurr)
Bitcount test of bit block AND masked by GAP block.
Definition: bmfunc.h:3268
#define FULL_SUB_BLOCK_REAL_ADDR
Definition: bmdef.h:137
BMFORCEINLINE unsigned gap_operation_any_and(const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2)
GAP AND operation test.
Definition: bmfunc.h:4851
unsigned gap_set_value(unsigned val, T *BMRESTRICT buf, unsigned pos, unsigned *BMRESTRICT is_set)
Sets or clears bit in the GAP buffer.
Definition: bmfunc.h:2391
unsigned long long int id64_t
Definition: bmconst.h:34
unsigned combine_count_and_operation_with_block(const bm::word_t *blk, const bm::word_t *arg_blk)
Internal function computes AND distance.
Definition: bmalgo_impl.h:342
const unsigned short set_bitscan_wave_size
Definition: bmfunc.h:7480
void distance_stage(const distance_metric_descriptor *dmit, const distance_metric_descriptor *dmit_end, bool *is_all_and)
Staging function for distance operation.
Definition: bmalgo_impl.h:669
(A - B).count()
Definition: bmalgo_impl.h:62
Definition: bm.h:76
bm::id_t gap_bitset_sub_any(const unsigned *block, const T *buf)
Compute bitcount test of bit block SUB masked by GAP block.
Definition: bmfunc.h:3331
void combine_any_operation_with_block(const bm::word_t *blk, unsigned gap, const bm::word_t *arg_blk, unsigned arg_gap, distance_metric_descriptor *dmit, distance_metric_descriptor *dmit_end)
Internal function computes different existense of distance metric.
Definition: bmalgo_impl.h:379
BMFORCEINLINE unsigned gap_count_or(const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2)
GAP bitcount OR operation test.
Definition: bmfunc.h:4979
void gap_convert_to_bitset(unsigned *dest, const T *buf)
GAP block to bitblock conversion.
Definition: bmfunc.h:3510
bm::id_t bit_operation_xor_any(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2)
Performs bitblock XOR operation test.
Definition: bmfunc.h:6545
bm::id_t bit_operation_sub_any(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2)
Performs bitblock test of SUB operation.
Definition: bmfunc.h:5810
BV::size_type any_or(const BV &bv1, const BV &bv2)
Computes if there is any bit in OR operation of two bitsets.
Definition: bmalgo_impl.h:1102
void combine_or(BV &bv, It first, It last)
OR Combine bitvector and the iterable sequence.
Definition: bmalgo_impl.h:1148
void for_each_gap_blk(const T *buf, SIZE_TYPE offset, Func &bit_functor)
for-each visitor, calls a special visitor functor for each 1 bit range
Definition: bmalgo_impl.h:1695
void combine_sub(BV &bv, It first, It last)
SUB Combine bitvector and the iterable sequence.
Definition: bmalgo_impl.h:1316
(A ^ B).count()
Definition: bmalgo_impl.h:60
BV::size_type any_xor(const BV &bv1, const BV &bv2)
Computes if there is any bit in XOR operation of two bitsets.
Definition: bmalgo_impl.h:1034
const unsigned id_max
Definition: bmconst.h:105
(B - A).count()
Definition: bmalgo_impl.h:63
void distance_operation_any(const BV &bv1, const BV &bv2, distance_metric_descriptor *dmit, distance_metric_descriptor *dmit_end)
Distance screening template function.
Definition: bmalgo_impl.h:858
Distance metric descriptor, holds metric code and result.
Definition: bmalgo_impl.h:87
B.count()
Definition: bmalgo_impl.h:65
T min_value(T v1, T v2)
Get minimum of 2 values.
Definition: bmutil.h:102
bm::id_t gap_bitset_xor_count(const unsigned *block, const T *buf)
Compute bitcount of bit block XOR masked by GAP block.
Definition: bmfunc.h:3368
distance_metric_descriptor(distance_metric m)
Definition: bmalgo_impl.h:98
BMFORCEINLINE bool gap_is_all_zero(const bm::gap_word_t *buf)
Checks if GAP block is all-zero.
Definition: bmfunc.h:1039
BMFORCEINLINE unsigned gap_count_xor(const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2)
GAP bitcount XOR operation test.
Definition: bmfunc.h:4934
unsigned int word_t
Definition: bmconst.h:38
#define BM_VECT_ALIGN_ATTR
Definition: bmdef.h:347
#define IS_FULL_BLOCK(addr)
Definition: bmdef.h:140
bool bit_is_all_zero(const bm::word_t *BMRESTRICT start)
Returns "true" if all bits in the block are 0.
Definition: bmfunc.h:1012
bm::id_t bit_operation_or_any(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2)
Performs bitblock OR operation test.
Definition: bmfunc.h:5882
void distance_operation(const BV &bv1, const BV &bv2, distance_metric_descriptor *dmit, distance_metric_descriptor *dmit_end)
Distance computing template function.
Definition: bmalgo_impl.h:702
void combine_xor(BV &bv, It first, It last)
XOR Combine bitvector and the iterable sequence.
Definition: bmalgo_impl.h:1229
(A | B).count()
Definition: bmalgo_impl.h:61
A.count()
Definition: bmalgo_impl.h:64
const unsigned gap_max_buff_len
Definition: bmconst.h:78
No GAP compression strategy. All new blocks are bit blocks.
Definition: bmconst.h:144
unsigned gap_test_unr(const T *buf, const unsigned pos)
Tests if bit = pos is true. Analog of bm::gap_test with SIMD unrolling.
Definition: bmfunc.h:1256
unsigned short gap_word_t
Definition: bmconst.h:76
BMFORCEINLINE unsigned gap_operation_any_sub(const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2)
GAP SUB operation test.
Definition: bmfunc.h:5029
bm::distance_metric_descriptor::size_type count_xor(const BV &bv1, const BV &bv2)
Computes bitcount of XOR operation of two bitsets.
Definition: bmalgo_impl.h:1018
BV::size_type count_intervals(const BV &bv)
Compute number of bit intervals (GAPs) in the bitvector.
Definition: bmalgo_impl.h:1457
gap_word_t * gap_operation_or(const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2, gap_word_t *BMRESTRICT tmp_buf, unsigned &dsize)
GAP OR operation.
Definition: bmfunc.h:4959
BMFORCEINLINE unsigned gap_count_sub(const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2)
GAP bitcount SUB (AND NOT) operation test.
Definition: bmfunc.h:5046
void for_each_bit_blk(const bm::word_t *block, SIZE_TYPE offset, Func &bit_functor)
for-each visitor, calls a special visitor functor for each 1 bit group
Definition: bmalgo_impl.h:1660
bm::id_t gap_bitset_and_count(const unsigned *block, const T *pcurr)
Compute bitcount of bit block AND masked by GAP block.
Definition: bmfunc.h:3241
Definitions(internal)
static bit_operation_count_func_type bit_operation_count(unsigned i)
Definition: bmfunc.h:7434
BV::size_type distance_and_operation(const BV &bv1, const BV &bv2)
Distance AND computing template function.
Definition: bmalgo_impl.h:789
set_operation
Codes of set operations.
Definition: bmconst.h:153
bm::id_t gap_bitset_xor_any(const unsigned *block, const T *buf)
Compute bitcount test of bit block XOR masked by GAP block.
Definition: bmfunc.h:3405
void combine_and_sorted(BV &bv, It first, It last)
AND Combine bitvector and the iterable sequence.
Definition: bmalgo_impl.h:1401
bm::id_t bit_operation_and_count(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2)
Performs bitblock AND operation and calculates bitcount of the result.
Definition: bmfunc.h:5706
BV::size_type any_and(const BV &bv1, const BV &bv2)
Computes if there is any bit in AND operation of two bitsets.
Definition: bmalgo_impl.h:999
bm::id_t gap_bitset_or_any(const unsigned *block, const T *buf)
Compute bitcount test of bit block OR masked by GAP block.
Definition: bmfunc.h:3474
void combine_and(BV &bv, It first, It last)
AND Combine bitvector and the iterable sequence.
Definition: bmalgo_impl.h:1433
const unsigned gap_max_bits
Definition: bmconst.h:79
#define BM_IS_GAP(ptr)
Definition: bmdef.h:168
distance_metric operation2metric(set_operation op)
Convert set operation into compatible distance metric.
Definition: bmalgo_impl.h:73
BMFORCEINLINE unsigned gap_operation_any_xor(const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2)
GAP XOR operation test.
Definition: bmfunc.h:4918
const unsigned set_block_mask
Definition: bmconst.h:56
BV::size_type count_or(const BV &bv1, const BV &bv2)
Computes bitcount of OR operation of two bitsets.
Definition: bmalgo_impl.h:1086
void export_array(BV &bv, It first, It last)
Export bitset from an array of binary data representing the bit vector.
Definition: bmalgo_impl.h:1487
#define BMGAP_PTR(ptr)
Definition: bmdef.h:166
unsigned gap_bit_count_unr(const T *buf)
Calculates number of bits ON in GAP buffer. Loop unrolled version.
Definition: bmfunc.h:1786
bm::id_t(* bit_operation_count_func_type)(const bm::word_t *BMRESTRICT, const bm::word_t *BMRESTRICT)
Definition: bmfunc.h:7407
It block_range_scan(It first, It last, SIZE_TYPE nblock, SIZE_TYPE *max_id)
Internal algorithms scans the input for the block range limit.
Definition: bmalgo_impl.h:1116
const unsigned set_word_mask
Definition: bmconst.h:71
bm::id_t gap_bitset_or_count(const unsigned *block, const T *buf)
Compute bitcount of bit block OR masked by GAP block.
Definition: bmfunc.h:3442
bm::id_t bit_operation_and_any(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2)
Performs bitblock AND operation test.
Definition: bmfunc.h:5730
unsigned short bitscan_wave(const bm::word_t *w_ptr, unsigned char *bits)
Unpacks word wave (Nx 32-bit words)
Definition: bmfunc.h:7491
unsigned int id_t
Definition: bmconst.h:37
BV::size_type count_sub(const BV &bv1, const BV &bv2)
Computes bitcount of SUB operation of two bitsets.
Definition: bmalgo_impl.h:1052
unsigned gap_limit(const T *buf, const T *glevel_len)
Returs GAP block capacity limit.
Definition: bmfunc.h:1097
void combine_count_operation_with_block(const bm::word_t *blk, const bm::word_t *arg_blk, distance_metric_descriptor *dmit, distance_metric_descriptor *dmit_end)
Internal function computes different distance metrics.
Definition: bmalgo_impl.h:125
#define BM_ASSERT
Definition: bmdef.h:117
const unsigned set_total_blocks
Definition: bmconst.h:107
distance_metric
Distance metrics codes defined for vectors A and B.
Definition: bmalgo_impl.h:57
bm::id_t gap_bitset_sub_count(const unsigned *block, const T *buf)
Compute bitcount of bit block SUB masked by GAP block.
Definition: bmfunc.h:3297
const unsigned set_block_shift
Definition: bmconst.h:55
bool is_const_set_operation(set_operation op)
Returns true if set operation is constant (bitcount)
Definition: bmfunc.h:784
void reset()
Sets metric result to 0.
Definition: bmalgo_impl.h:110
BV::size_type count_and(const BV &bv1, const BV &bv2)
Computes bitcount of AND operation of two bitsets.
Definition: bmalgo_impl.h:986
bm::id_t bit_operation_sub_count(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2)
Performs bitblock SUB operation and calculates bitcount of the result.
Definition: bmfunc.h:5755
Bit manipulation primitives (internal)
#define FULL_BLOCK_FAKE_ADDR
Definition: bmdef.h:136
#define BLOCK_ADDR_SAN(addr)
Definition: bmdef.h:138