BitMagic-C++
bmfunc.h
Go to the documentation of this file.
1 #ifndef BMFUNC__H__INCLUDED__
2 #define BMFUNC__H__INCLUDED__
3 /*
4 Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 For more information please visit: http://bitmagic.io
19 */
20 
21 /*! \file bmfunc.h
22  \brief Bit manipulation primitives (internal)
23 */
24 
25 #include <memory.h>
26 
27 #include "bmdef.h"
28 #include "bmutil.h"
29 
30 
31 #ifdef _MSC_VER
32 # pragma warning( disable: 4146 )
33 #endif
34 
35 namespace bm
36 {
37 
38 
39 inline
41  bm::word_t left,
42  bm::word_t right);
43 
44 inline
46  bm::word_t left,
47  bm::word_t right);
48 
49 /*!
50  @brief Structure with statistical information about memory
51  allocation footprint, serialization projection, number of vectors
52  @ingroup bvector
53 */
55 {
56  size_t bit_blocks; ///< Number of bit blocks
57  size_t gap_blocks; ///< Number of GAP blocks
58  size_t ptr_sub_blocks; ///< Number of sub-blocks
59  size_t bv_count; ///< Number of bit-vectors
60  size_t max_serialize_mem; ///< estimated maximum memory for serialization
61  size_t memory_used; ///< memory usage for all blocks and service tables
62  gap_word_t gap_levels[bm::gap_levels]; ///< GAP block lengths in the bvect
63  unsigned long long gaps_by_level[bm::gap_levels]; ///< number of GAP blocks at each level
64 
65  /// cound bit block
67  {
68  ++bit_blocks;
69  size_t mem_used = sizeof(bm::word_t) * bm::set_block_size;
70  memory_used += mem_used;
71  max_serialize_mem += mem_used;
72  }
73 
74  /// count gap block
75  void add_gap_block(unsigned capacity, unsigned length)
76  {
77  ++gap_blocks;
78  size_t mem_used = (capacity * sizeof(gap_word_t));
79  memory_used += mem_used;
80  max_serialize_mem += (unsigned)(length * sizeof(gap_word_t));
81  for (unsigned i = 0; i < bm::gap_levels; ++i)
82  {
83  if (capacity == gap_levels[i])
84  {
85  gaps_by_level[i]++;
86  return;
87  }
88  }
89  BM_ASSERT(0);
90  }
91 
92  /// Reset statisctics
93  void reset()
94  {
95  bit_blocks = gap_blocks = ptr_sub_blocks = bv_count = 0;
96  max_serialize_mem = memory_used = 0;
97  for (unsigned i = 0; i < bm::gap_levels; ++i)
98  gaps_by_level[i] = 0ull;
99  }
100 
101  /// Sum data from another sttructure
102  void add(const bv_statistics& st)
103  {
104  bit_blocks += st.bit_blocks;
105  gap_blocks += st.gap_blocks;
106  bv_count += st.bv_count;
107  max_serialize_mem += st.max_serialize_mem + 8;
108  memory_used += st.memory_used;
109  }
110 };
111 
112 /**
113  @brief Pair type
114 */
115 template<typename First, typename Second>
116 struct pair
117 {
118  First first;
119  Second second;
120 };
121 
122 /**
123  \brief bit-decode cache structure
124 */
126 {
127  unsigned short bits[65]; //< decoded bits
128  unsigned bcnt; //< length of bits array
129  bm::id64_t cvalue; //< cache decoded value
130 
131  bit_decode_cache() : bcnt(0), cvalue(0) {}
132 };
133 
134 
135 /**
136  \brief ad-hoc conditional expressions
137  \internal
138 */
139 template <bool b> struct conditional
140 {
141  static bool test() { return true; }
142 };
143 template <> struct conditional<false>
144 {
145  static bool test() { return false; }
146 };
147 
148 /*!
149  @defgroup gapfunc GAP functions
150  GAP functions implement different opereations on GAP compressed blocks (internals)
151  and serve as a minimal building blocks.
152  \internal
153  @ingroup bvector
154  */
155 
156 /*!
157  @defgroup bitfunc BIT functions
158  Bit functions implement different opereations on bit blocks (internals)
159  and serve as a minimal building blocks.
160  \internal
161  @ingroup bvector
162  */
163 
164 
165 
166 
167 /*!
168  Returns BSR value
169  @ingroup bitfunc
170 */
171 template <class T>
172 unsigned bit_scan_reverse(T value)
173 {
174  BM_ASSERT(value);
175 
176  if (bm::conditional<sizeof(T)==8>::test())
177  {
178  bm::id64_t v8 = value;
179  v8 >>= 32;
180  unsigned v = (unsigned)v8;
181  if (v)
182  {
183  return bit_scan_reverse32(v) + 32;
184  }
185  }
186  return bit_scan_reverse32((unsigned)value);
187 }
188 
189 
190 /*!
191  Returns bit count
192  @ingroup bitfunc
193 */
196 {
197 #if defined(BMSSE42OPT) || defined(BMAVX2OPT)
198  return bm::id_t(_mm_popcnt_u32(w));
199 #else
200  return
201  bm::bit_count_table<true>::_count[(unsigned char)(w)] +
202  bm::bit_count_table<true>::_count[(unsigned char)((w) >> 8)] +
203  bm::bit_count_table<true>::_count[(unsigned char)((w) >> 16)] +
204  bm::bit_count_table<true>::_count[(unsigned char)((w) >> 24)];
205 #endif
206 }
207 
208 inline
209 int parallel_popcnt_32(unsigned int n)
210 {
211  unsigned int tmp;
212 
213  tmp = n - ((n >> 1) & 033333333333)
214  - ((n >> 2) & 011111111111);
215  return ((tmp + (tmp >> 3)) & 030707070707) % 63;
216 }
217 
218 
219 /*!
220  Function calculates number of 1 bits in 64-bit word.
221  @ingroup bitfunc
222 */
225 {
226 #if defined(BMSSE42OPT) || defined(BMAVX2OPT)
227 #if defined(BM64_SSE4) || defined(BM64_AVX2) || defined(BM64_AVX512)
228  return unsigned(_mm_popcnt_u64(x));
229 #else
230  return _mm_popcnt_u32(x >> 32) + _mm_popcnt_u32((unsigned)x);
231 #endif
232 #else
233  x = x - ((x >> 1) & 0x5555555555555555);
234  x = (x & 0x3333333333333333) + ((x >> 2) & 0x3333333333333333);
235  x = (x + (x >> 4)) & 0x0F0F0F0F0F0F0F0F;
236  x = x + (x >> 8);
237  x = x + (x >> 16);
238  x = x + (x >> 32);
239  return x & 0xFF;
240 #endif
241 }
242 
243 inline
245  bm::id64_t u, bm::id64_t v)
246 {
247  const bm::id64_t m1 = 0x5555555555555555U;
248  const bm::id64_t m2 = 0x3333333333333333U;
249  const bm::id64_t m3 = 0x0F0F0F0F0F0F0F0FU;
250  const bm::id64_t m4 = 0x000000FF000000FFU;
251 
252  x = x - ((x >> 1) & m1);
253  y = y - ((y >> 1) & m1);
254  u = u - ((u >> 1) & m1);
255  v = v - ((v >> 1) & m1);
256  x = (x & m2) + ((x >> 2) & m2);
257  y = (y & m2) + ((y >> 2) & m2);
258  u = (u & m2) + ((u >> 2) & m2);
259  v = (v & m2) + ((v >> 2) & m2);
260  x = x + y;
261  u = u + v;
262  x = (x & m3) + ((x >> 4) & m3);
263  u = (u & m3) + ((u >> 4) & m3);
264  x = x + u;
265  x = x + (x >> 8);
266  x = x + (x >> 16);
267  x = x & m4;
268  x = x + (x >> 32);
269  return x & 0x000001FFU;
270 }
271 
272 
273 // --------------------------------------------------------------
274 // Functions for bit-scanning
275 // --------------------------------------------------------------
276 
277 /*!
278  \brief Templated algorithm to unpacks octet based word into list of ON bit indexes
279  \param w - value
280  \param func - bit functor
281 
282  @ingroup bitfunc
283 */
284 template<typename T, typename F>
285 void bit_for_each_4(T w, F& func)
286 {
287  for (unsigned sub_octet = 0; w != 0; w >>= 4, sub_octet += 4)
288  {
289  switch (w & 15) // 1111
290  {
291  case 0: // 0000
292  break;
293  case 1: // 0001
294  func(sub_octet);
295  break;
296  case 2: // 0010
297  func(sub_octet + 1);
298  break;
299  case 3: // 0011
300  func(sub_octet, sub_octet + 1);
301  break;
302  case 4: // 0100
303  func(sub_octet + 2);
304  break;
305  case 5: // 0101
306  func(sub_octet, sub_octet + 2);
307  break;
308  case 6: // 0110
309  func(sub_octet + 1, sub_octet + 2);
310  break;
311  case 7: // 0111
312  func(sub_octet, sub_octet + 1, sub_octet + 2);
313  break;
314  case 8: // 1000
315  func(sub_octet + 3);
316  break;
317  case 9: // 1001
318  func(sub_octet, sub_octet + 3);
319  break;
320  case 10: // 1010
321  func(sub_octet + 1, sub_octet + 3);
322  break;
323  case 11: // 1011
324  func(sub_octet, sub_octet + 1, sub_octet + 3);
325  break;
326  case 12: // 1100
327  func(sub_octet + 2, sub_octet + 3);
328  break;
329  case 13: // 1101
330  func(sub_octet, sub_octet + 2, sub_octet + 3);
331  break;
332  case 14: // 1110
333  func(sub_octet + 1, sub_octet + 2, sub_octet + 3);
334  break;
335  case 15: // 1111
336  func(sub_octet, sub_octet + 1, sub_octet + 2, sub_octet + 3);
337  break;
338  default:
339  BM_ASSERT(0);
340  break;
341  }
342 
343  } // for
344 }
345 
346 
347 /*!
348  \brief Templated algorithm to unpacks word into list of ON bit indexes
349  \param w - value
350  \param func - bit functor
351 
352  @ingroup bitfunc
353 */
354 template<typename T, typename F>
355 void bit_for_each(T w, F& func)
356 {
357  // Note: 4-bit table method works slower than plain check approach
358  for (unsigned octet = 0; w != 0; w >>= 8, octet += 8)
359  {
360  if (w & 1) func(octet + 0);
361  if (w & 2) func(octet + 1);
362  if (w & 4) func(octet + 2);
363  if (w & 8) func(octet + 3);
364  if (w & 16) func(octet + 4);
365  if (w & 32) func(octet + 5);
366  if (w & 64) func(octet + 6);
367  if (w & 128) func(octet + 7);
368 
369  } // for
370 }
371 
372 /*! @brief Adaptor to copy 1 bits to array
373  @internal
374 */
375 template<typename B> class copy_to_array_functor
376 {
377 public:
378  copy_to_array_functor(B* bits): bp_(bits)
379  {}
380 
381  B* ptr() { return bp_; }
382 
383  void operator()(unsigned bit_idx) { *bp_++ = (B)bit_idx; }
384 
385  void operator()(unsigned bit_idx0,
386  unsigned bit_idx1)
387  {
388  bp_[0] = (B)bit_idx0; bp_[1] = (B)bit_idx1;
389  bp_+=2;
390  }
391 
392  void operator()(unsigned bit_idx0,
393  unsigned bit_idx1,
394  unsigned bit_idx2)
395  {
396  bp_[0] = (B)bit_idx0; bp_[1] = (B)bit_idx1; bp_[2] = (B)bit_idx2;
397  bp_+=3;
398  }
399 
400  void operator()(unsigned bit_idx0,
401  unsigned bit_idx1,
402  unsigned bit_idx2,
403  unsigned bit_idx3)
404  {
405  bp_[0] = (B)bit_idx0; bp_[1] = (B)bit_idx1;
406  bp_[2] = (B)bit_idx2; bp_[3] = (B)bit_idx3;
407  bp_+=4;
408  }
409 
410 private:
412  copy_to_array_functor& operator=(const copy_to_array_functor&);
413 private:
414  B* bp_;
415 };
416 
417 
418 /*!
419  \brief Unpacks word into list of ON bit indexes
420  \param w - value
421  \param bits - pointer on the result array
422  \return number of bits in the list
423 
424  @ingroup bitfunc
425 */
426 template<typename T,typename B> unsigned bit_list(T w, B* bits)
427 {
428  copy_to_array_functor<B> func(bits);
429  bit_for_each(w, func);
430  return (unsigned)(func.ptr() - bits);
431 }
432 
433 
434 
435 /*!
436  \brief Unpacks word into list of ON bit indexes (quad-bit based)
437  \param w - value
438  \param bits - pointer on the result array
439  \return number of bits in the list
440 
441  @ingroup bitfunc
442 */
443 template<typename T,typename B> unsigned bit_list_4(T w, B* bits)
444 {
445  copy_to_array_functor<B> func(bits);
446  bit_for_each_4(w, func);
447  return (unsigned)(func.ptr() - bits);
448 }
449 
450 /*!
451  \brief Unpacks word into list of ON bit indexes using popcnt method
452  \param w - value
453  \param bits - pointer on the result array
454  \param offs - value to add to bit position (programmed shift)
455  \return number of bits in the list
456 
457  @ingroup bitfunc
458  @internal
459 */
460 template<typename B>
461 unsigned short bitscan_popcnt(bm::id_t w, B* bits, unsigned short offs)
462 {
463  unsigned pos = 0;
464  while (w)
465  {
466  bm::id_t t = w & -w;
467  bits[pos++] = (B)(bm::word_bitcount(t - 1) + offs);
468  w &= w - 1;
469  }
470  return (unsigned short)pos;
471 }
472 
473 /*!
474  \brief Unpacks word into list of ON bit indexes using popcnt method
475  \param w - value
476  \param bits - pointer on the result array
477  \return number of bits in the list
478 
479  @ingroup bitfunc
480  @internal
481 */
482 template<typename B>
483 unsigned short bitscan_popcnt(bm::id_t w, B* bits)
484 {
485  unsigned pos = 0;
486  while (w)
487  {
488  bm::id_t t = w & -w;
489  bits[pos++] = (B)(bm::word_bitcount(t - 1));
490  w &= w - 1;
491  }
492  return (unsigned short)pos;
493 }
494 
495 
496 /*!
497  \brief Unpacks 64-bit word into list of ON bit indexes using popcnt method
498  \param w - value
499  \param bits - pointer on the result array
500  \param offs - bit address offset to add (0 - default)
501  \return number of bits in the list
502 
503  @ingroup bitfunc
504 */
505 template<typename B>
506 unsigned short bitscan_popcnt64(bm::id64_t w, B* bits)
507 {
508  unsigned short pos = 0;
509  while (w)
510  {
511  bm::id64_t t = w & -w;
512  bits[pos++] = (B) bm::word_bitcount64(t - 1);
513  w &= w - 1;
514  }
515  return pos;
516 }
517 
518 template<typename V, typename B>
519 unsigned short bitscan(V w, B* bits)
520 {
521  if (bm::conditional<sizeof(V) == 8>::test())
522  {
523  return bm::bitscan_popcnt64(w, bits);
524  }
525  else
526  {
527  return bm::bitscan_popcnt((bm::word_t)w, bits);
528  }
529 }
530 
531 // --------------------------------------------------------------
532 // Functions for select
533 // --------------------------------------------------------------
534 
535 /**
536  \brief word find index of the rank-th bit set by bit-testing
537  \param w - 64-bit work to search
538  \param rank - rank to select (should be > 0)
539 
540  \return selected value (inxed of bit set)
541 */
542 inline
543 unsigned word_select64_linear(bm::id64_t w, unsigned rank)
544 {
545  BM_ASSERT(w);
546  BM_ASSERT(rank);
547 
548  for (unsigned count = 0; w; w >>=1ull, ++count)
549  {
550  rank -= unsigned(w & 1ull);
551  if (!rank)
552  return count;
553  }
554  BM_ASSERT(0); // shoud not be here if rank is achievable
555  return ~0u;
556 }
557 
558 /**
559  \brief word find index of the rank-th bit set by bit-testing
560  \param w - 64-bit work to search
561  \param rank - rank to select (should be > 0)
562 
563  \return selected value (inxed of bit set)
564 */
565 inline
566 unsigned word_select64_bitscan(bm::id64_t w, unsigned rank)
567 {
568  BM_ASSERT(w);
569  BM_ASSERT(rank);
570  BM_ASSERT(rank <= bm::word_bitcount64(w));
571 
572  do
573  {
574  --rank;
575  if (!rank)
576  break;
577  w &= w - 1;
578  } while (1);
579 
580  bm::id64_t t = w & -w;
581  unsigned count = bm::word_bitcount64(t - 1);
582  return count;
583 }
584 
585 /**
586  \brief word find index of the rank-th bit set by bit-testing
587  \param w - 64-bit work to search
588  \param rank - rank to select (should be > 0)
589 
590  \return selected value (inxed of bit set)
591 */
592 inline
593 unsigned word_select64(bm::id64_t w, unsigned rank)
594 {
595 #if defined(BMI2_SELECT64)
596  return BMI2_SELECT64(w, rank);
597 #else
598  #if defined(BMI1_SELECT64)
599  return BMI2_SELECT64(w, rank);
600  #else
601  return bm::word_select64_bitscan(w, rank);
602  #endif
603 #endif
604 }
605 
606 // --------------------------------------------------------------
607 // Functions for bit-block digest calculation
608 // --------------------------------------------------------------
609 
610 /*!
611  \brief Compute digest mask for word address in block
612 
613  @return digest bit-mask
614 
615  @ingroup bitfunc
616  @internal
617 */
620 {
621  bm::id64_t mask(1ull);
622  return mask << (w_idx / bm::set_block_digest_wave_size);
623 }
624 
625 /**
626  \brief Compute digest mask for [from..to] positions
627  \param from - range from (in bit-block coordinates)
628  \param to - range to (in bit-block coordinates)
629 
630  @ingroup bitfunc
631  @internal
632 */
634 bm::id64_t digest_mask(unsigned from, unsigned to)
635 {
636  BM_ASSERT(from <= to);
637 
638  bm::id64_t digest_from = from >> bm::set_block_digest_pos_shift;
639  bm::id64_t digest_to = to >> bm::set_block_digest_pos_shift;;
640  const bm::id64_t maskFF(~0ull);
641  return ((maskFF) >> (63 - (digest_to - digest_from))) << digest_from;
642 }
643 
644 /*!
645  \brief check if all digest bits for the range [from..to] are 0
646 
647  \param digest - current digest
648  \param bitpos_from - range from (in bit-block coordinates)
649  \param bitpos_to - range to (in bit-block coordinates)
650 
651  @return true if all range is zero
652 
653  @ingroup bitfunc
654  @internal
655 */
656 inline
657 bool check_zero_digest(bm::id64_t digest, unsigned bitpos_from, unsigned bitpos_to)
658 {
659  bm::id64_t mask = bm::digest_mask(bitpos_from, bitpos_to);
660  return !(digest & mask);
661 }
662 
663 /*!
664  \brief Init block with 000111000 pattren based on digest
665  \param block - Bit block [out]
666  \param digest - digest (used for block initialization)
667 
668  @ingroup bitfunc
669  @internal
670 */
671 inline
672 void block_init_digest0(bm::word_t* const block, bm::id64_t digest)
673 {
674  unsigned off;
675  for (unsigned i = 0; i < 64; ++i)
676  {
678  bm::word_t mask = (digest & 1) ? ~0u : 0u;
679 #if defined(VECT_BLOCK_SET_DIGEST)
680  VECT_BLOCK_SET_DIGEST(&block[off], mask);
681 #else
682  for (unsigned j = 0; j < bm::set_block_digest_wave_size; j+=4)
683  {
684  block[off+j+0] = block[off+j+1] =
685  block[off+j+2] = block[off+j+3] = mask;
686  } // for j
687 #endif
688  digest >>= 1ull;
689  } // for
690 }
691 
692 /*!
693  \brief Compute digest for 64 non-zero areas
694  \param block - Bit block
695 
696  @return digest bit-mask (0 means all block is empty)
697 
698  @ingroup bitfunc
699  @internal
700 */
701 inline
703 {
704  bm::id64_t digest0 = 0;
705  unsigned off;
706 
707  for (unsigned i = 0; i < 64; ++i)
708  {
710  #if defined(VECT_IS_DIGEST_ZERO)
711  bool all_zero = VECT_IS_DIGEST_ZERO(&block[off]);
712  digest0 |= bm::id64_t(!all_zero) << i;
713  #else
714  const bm::id64_t mask(1ull);
715  for (unsigned j = 0; j < bm::set_block_digest_wave_size; j+=4)
716  {
717  bm::word_t w =
718  block[off+j+0] | block[off+j+1] | block[off+j+2] | block[off+j+3];
719  if (w)
720  {
721  digest0 |= (mask << i);
722  break;
723  }
724  } // for j
725  #endif
726 
727  } // for i
728  return digest0;
729 }
730 
731 /*!
732  \brief Compute digest for 64 non-zero areas based on existing digest
733  (function revalidates zero areas)
734  \param block - bit block
735  \param digest - start digest
736 
737  @return digest bit-mask (0 means all block is empty)
738 
739  @ingroup bitfunc
740  @internal
741 */
742 inline
744 {
745  const bm::id64_t mask(1ull);
746  bm::id64_t d = digest;
747 
748  while (d)
749  {
750  bm::id64_t t = bm::bmi_blsi_u64(d); // d & -d;
751 
752  unsigned wave = bm::word_bitcount64(t - 1);
753  unsigned off = wave * bm::set_block_digest_wave_size;
754 
755  #if defined(VECT_IS_DIGEST_ZERO)
756  bool all_zero = VECT_IS_DIGEST_ZERO(&block[off]);
757  digest &= all_zero ? ~(mask << wave) : digest;
758  #else
760  (const bm::bit_block_t::bunion_t*)(&block[off]);
761  bm::id64_t w64 = 0;
762  unsigned j = 0;
763  do
764  {
765  w64 |=
766  src_u->w64[j+0] | src_u->w64[j+1] | src_u->w64[j+2] | src_u->w64[j+3];
767  j += 4;
768  } while ((j < bm::set_block_digest_wave_size/2) & !w64);
769  digest &= w64 ? digest : ~(mask << wave);
770  #endif
771 
772  d = bm::bmi_bslr_u64(d); // d &= d - 1;
773  } // while
774 
775  BM_ASSERT(bm::calc_block_digest0(block) == digest);
776  return digest;
777 }
778 
779 // --------------------------------------------------------------
780 
781 
782 /// Returns true if set operation is constant (bitcount)
783 inline
785 {
786  return (int(op) >= int(set_COUNT));
787 }
788 
789 /**
790  Convert set operation to operation
791 */
792 inline
794 {
795  BM_ASSERT(op == set_AND ||
796  op == set_OR ||
797  op == set_SUB ||
798  op == set_XOR);
799  return (bm::operation) op;
800 }
801 
802 //---------------------------------------------------------------------
803 
804 /**
805  Structure carries pointer on bit block with all bits 1
806  @ingroup bitfunc
807  @internal
808 */
809 template<bool T> struct all_set
810 {
812  {
816 
818  {
819  ::memset(_p, 0xFF, sizeof(_p)); // set FULL BLOCK content (all 1s)
820  if (bm::conditional<sizeof(void*) == 8>::test())
821  {
822  const unsigned long long magic_mask = 0xFFFFfffeFFFFfffe;
823  ::memcpy(&_p_fullp, &magic_mask, sizeof(magic_mask));
824  for (unsigned i = 0; i < bm::set_sub_array_size; ++i)
825  _s[i] = reinterpret_cast<bm::word_t*>(magic_mask);
826  }
827  else
828  {
829  const unsigned magic_mask = 0xFFFFfffe;
830  ::memcpy(&_p_fullp, &magic_mask, sizeof(magic_mask));
831  for (unsigned i = 0; i < bm::set_sub_array_size; ++i)
832  _s[i] = reinterpret_cast<bm::word_t*>(magic_mask);
833  }
834  }
835  };
836 
837  // version with minimal branching, super-scalar friendly
838  //
839  inline
840  static bm::id64_t block_type(const bm::word_t* bp)
841  {
842  bm::id64_t type;
843  if (bm::conditional<sizeof(void*) == 8>::test())
844  {
845  bm::id64_t w = reinterpret_cast<unsigned long long>(bp);
846  type = (w & 3) | // FULL BLOCK or GAP
847  ((bp == _block._p) << 1);
848  type = type ? type : w;
849  }
850  else
851  {
852  unsigned w = reinterpret_cast<unsigned long>(bp);
853  type = (w & 3) | // FULL BLOCK or GAP
854  ((bp == _block._p) << 1);
855  type = type ? type : w;
856  }
857  return type;
858  }
859 
861  static bool is_full_block(const bm::word_t* bp)
862  { return (bp == _block._p || bp == _block._p_fullp); }
863 
865  static bool is_valid_block_addr(const bm::word_t* bp)
866  { return (bp && !(bp == _block._p || bp == _block._p_fullp)); }
867 
869 };
870 
871 
872 template<bool T> typename all_set<T>::all_set_block all_set<T>::_block;
873 
874 /// XOR swap two scalar variables
875 template<typename W>
876 void xor_swap(W& x, W& y)
877 {
878  BM_ASSERT(&x != &y);
879  x ^= y;
880  y ^= x;
881  x ^= y;
882 }
883 
884 /*!
885  Fini not NULL position
886  @return index of not NULL pointer
887  @internal
888 */
889 template<typename N>
890 bool find_not_null_ptr(bm::word_t*** arr, N start, N size, N* pos)
891 {
892  BM_ASSERT(pos);
893 // BM_ASSERT(start < size);
894 //#if defined(BM64_AVX2) || defined(BM64_AVX512)
895 // TODO: optimization for SIMD based next ptr scan
896 #if 0
897  const unsigned unroll_factor = 4;
898  const unsigned len = (size - start);
899  const unsigned len_unr = len - (len % unroll_factor);
900  unsigned k;
901 
902  arr += start;
903  for (k = 0; k < len_unr; k+=unroll_factor)
904  {
905  if (!avx2_test_all_zero_wave(arr+k))
906  {
907  if (arr[k])
908  {
909  *pos = k + start;
910  return true;
911  }
912  if (arr[k+1])
913  {
914  *pos = k + start + 1;
915  return true;
916  }
917  if (arr[k+2])
918  {
919  *pos = k + start + 2;
920  return true;
921  }
922  if (arr[k+3])
923  {
924  *pos = k + start + 3;
925  return true;
926  }
927  }
928  } // for k
929 
930  for (; k < len; ++k)
931  {
932  if (arr[k])
933  {
934  *pos = k + start;
935  return true;
936  }
937  } // for k
938 #else
939  for (; start < size; ++start)
940  {
941  if (arr[start])
942  {
943  *pos = start;
944  return true;
945  }
946  } // for i
947 #endif
948  return false;
949 }
950 
951 
952 
953 
954 //---------------------------------------------------------------------
955 
956 /*!
957  \brief Lexicographical comparison of two words as bit strings (reference)
958  Auxiliary implementation for testing and reference purposes.
959  \param w1 - First word.
960  \param w2 - Second word.
961  \return <0 - less, =0 - equal, >0 - greater.
962 
963  @ingroup bitfunc
964 */
965 template<typename T> int wordcmp0(T w1, T w2)
966 {
967  while (w1 != w2)
968  {
969  int res = (w1 & 1) - (w2 & 1);
970  if (res != 0) return res;
971  w1 >>= 1;
972  w2 >>= 1;
973  }
974  return 0;
975 }
976 
977 
978 /*
979 template<typename T> int wordcmp(T w1, T w2)
980 {
981  T diff = w1 ^ w2;
982  return diff ? ((w1 & diff & (diff ^ (diff - 1)))? 1 : -1) : 0;
983 }
984 */
985 /*!
986  \brief Lexicographical comparison of two words as bit strings.
987  Auxiliary implementation for testing and reference purposes.
988  \param a - First word.
989  \param b - Second word.
990  \return <0 - less, =0 - equal, >0 - greater.
991 
992  @ingroup bitfunc
993 */
994 template<typename T> int wordcmp(T a, T b)
995 {
996  T diff = a ^ b;
997  return diff? ( (a & diff & -diff)? 1 : -1 ) : 0;
998 }
999 
1000 
1001 // Low bit extraction
1002 // x & (x ^ (x-1))
1003 
1004 
1005 // ----------------------------------------------------------------------
1006 
1007 
1008 /*! @brief Returns "true" if all bits in the block are 0
1009  @ingroup bitfunc
1010 */
1011 inline
1013 {
1014 #if defined(VECT_IS_ZERO_BLOCK)
1015  return VECT_IS_ZERO_BLOCK(start);
1016 #else
1017  const bm::wordop_t* BMRESTRICT blk = (bm::wordop_t*) (start);
1018  const bm::wordop_t* BMRESTRICT blk_end = (bm::wordop_t*)(start + bm::set_block_size);
1019  do
1020  {
1021  if (blk[0] | blk[1] | blk[2] | blk[3])
1022  return false;
1023  blk += 4;
1024  } while (blk < blk_end);
1025  return true;
1026 #endif
1027 }
1028 
1029 // ----------------------------------------------------------------------
1030 
1031 /*!
1032  \brief Checks if GAP block is all-zero.
1033  \param buf - GAP buffer pointer.
1034  \returns true if all-zero.
1035 
1036  @ingroup gapfunc
1037 */
1040 {
1041  // (almost) branchless variant:
1042  return (!(*buf & 1u)) & (!(bm::gap_max_bits - 1 - buf[1]));
1043 }
1044 
1045 /*!
1046  \brief Checks if GAP block is all-one.
1047  \param buf - GAP buffer pointer.
1048  \param set_max - max possible bitset length
1049  \returns true if all-one.
1050 
1051  @ingroup gapfunc
1052 */
1055 {
1056  return ((*buf & 1u) && (buf[1] == bm::gap_max_bits - 1));
1057 }
1058 
1059 /*!
1060  \brief Returs GAP block length.
1061  \param buf - GAP buffer pointer.
1062  \returns GAP block length.
1063 
1064  @ingroup gapfunc
1065 */
1068 {
1069  return (bm::gap_word_t)((*buf >> 3) + 1);
1070 }
1071 
1072 
1073 /*!
1074  \brief Returs GAP block capacity
1075  \param buf - GAP buffer pointer
1076  \param glevel_len - pointer on GAP header word
1077  \returns GAP block capacity.
1078 
1079  @ingroup gapfunc
1080 */
1081 template<typename T>
1082 unsigned gap_capacity(const T* buf, const T* glevel_len)
1083 {
1084  return glevel_len[(*buf >> 1) & 3];
1085 }
1086 
1087 
1088 /*!
1089  \brief Returs GAP block capacity limit.
1090  \param buf - GAP buffer pointer.
1091  \param glevel_len - GAP lengths table (gap_len_table)
1092  \returns GAP block limit.
1093 
1094  @ingroup gapfunc
1095 */
1096 template<typename T>
1097 unsigned gap_limit(const T* buf, const T* glevel_len)
1098 {
1099  return glevel_len[(*buf >> 1) & 3]-4;
1100 }
1101 
1102 
1103 /*!
1104  \brief Returs GAP blocks capacity level.
1105  \param buf - GAP buffer pointer.
1106  \returns GAP block capacity level.
1107 
1108  @ingroup gapfunc
1109 */
1110 template<typename T>
1111 T gap_level(const T* buf)
1112 {
1113  return T((*buf >> 1) & 3u);
1114 }
1115 
1116 
1117 /*!
1118  \brief GAP block find the last set bit
1119 
1120  \param buf - GAP buffer pointer.
1121  \param last - index of the last 1 bit
1122 
1123  \return 0 if 1 bit was NOT found
1124 
1125  @ingroup gapfunc
1126 */
1127 template<typename T>
1128 unsigned gap_find_last(const T* buf, unsigned* last)
1129 {
1130  BM_ASSERT(last);
1131 
1132  T is_set = (*buf) & 1u;
1133  T end = T((*buf) >> 3u);
1134 
1135  BM_ASSERT(buf[end] == bm::gap_max_bits - 1);
1136 
1137  is_set ^= T((end-1) & 1u);
1138  if (is_set)
1139  {
1140  *last = buf[end];
1141  return is_set;
1142  }
1143  *last = buf[--end];
1144  return end;
1145 }
1146 
1147 /*!
1148  \brief GAP block find the first set bit
1149 
1150  \param buf - GAP buffer pointer.
1151  \param first - index of the first 1 bit
1152 
1153  \return 0 if 1 bit was NOT found
1154 
1155  @ingroup gapfunc
1156 */
1157 template<typename T>
1158 unsigned gap_find_first(const T* buf, unsigned* first)
1159 {
1160  BM_ASSERT(first);
1161 
1162  T is_set = (*buf) & 1u;
1163  if (is_set)
1164  {
1165  *first = 0;
1166  return is_set;
1167  }
1168  if (buf[1] == bm::gap_max_bits - 1)
1169  return 0;
1170  *first = buf[1] + 1;
1171  return 1;
1172 }
1173 
1174 
1175 
1176 /*
1177  \brief Binary search for the block where bit = pos located.
1178  \param buf - GAP buffer pointer.
1179  \param pos - index of the element.
1180  \param is_set - output. GAP value (0 or 1).
1181  \return GAP index.
1182  @ingroup gapfunc
1183 */
1184 template<typename T>
1185 unsigned gap_bfind(const T* buf, unsigned pos, unsigned* is_set)
1186 {
1187  BM_ASSERT(pos < bm::gap_max_bits);
1188  *is_set = (*buf) & 1;
1189 
1190  unsigned start = 1;
1191  unsigned end = 1 + ((*buf) >> 3);
1192 
1193  while ( start != end )
1194  {
1195  unsigned curr = (start + end) >> 1;
1196  if ( buf[curr] < pos )
1197  start = curr + 1;
1198  else
1199  end = curr;
1200  }
1201  *is_set ^= ((start-1) & 1);
1202  return start;
1203 }
1204 
1205 
1206 /*!
1207  \brief Tests if bit = pos is true.
1208  \param buf - GAP buffer pointer.
1209  \param pos - index of the element.
1210  \return true if position is in "1" gap
1211  @ingroup gapfunc
1212 */
1213 template<typename T> unsigned gap_test(const T* buf, unsigned pos)
1214 {
1215  BM_ASSERT(pos < bm::gap_max_bits);
1216 
1217  unsigned start = 1;
1218  unsigned end = 1 + ((*buf) >> 3);
1219  if (end - start < 10)
1220  {
1221  unsigned sv = *buf & 1;
1222  unsigned sv1= sv ^ 1;
1223  if (buf[1] >= pos) return sv;
1224  if (buf[2] >= pos) return sv1;
1225  if (buf[3] >= pos) return sv;
1226  if (buf[4] >= pos) return sv1;
1227  if (buf[5] >= pos) return sv;
1228  if (buf[6] >= pos) return sv1;
1229  if (buf[7] >= pos) return sv;
1230  if (buf[8] >= pos) return sv1;
1231  if (buf[9] >= pos) return sv;
1232  BM_ASSERT(0);
1233  }
1234  else
1235  {
1236  while (start != end)
1237  {
1238  unsigned curr = (start + end) >> 1;
1239  if (buf[curr] < pos)
1240  start = curr + 1;
1241  else
1242  end = curr;
1243  }
1244  }
1245  return ((*buf) & 1) ^ ((--start) & 1);
1246 }
1247 
1248 /*!
1249  \brief Tests if bit = pos is true. Analog of bm::gap_test with SIMD unrolling.
1250  \param buf - GAP buffer pointer.
1251  \param pos - index of the element.
1252  \return true if position is in "1" gap
1253  @ingroup gapfunc
1254 */
1255 template<typename T>
1256 unsigned gap_test_unr(const T* buf, const unsigned pos)
1257 {
1258  BM_ASSERT(pos < bm::gap_max_bits);
1259 
1260  if (pos == 0) // quick answer possible
1261  {
1262  return (*buf) & 1;
1263  }
1264 #if defined(BMSSE2OPT)
1265  unsigned start = 1;
1266  unsigned end = 1 + ((*buf) >> 3);
1267  unsigned dsize = end - start;
1268 
1269  if (dsize < 17)
1270  {
1271  start = bm::sse2_gap_find(buf + 1, (bm::gap_word_t)pos, dsize);
1272  unsigned res = ((*buf) & 1) ^ ((start) & 1);
1273  BM_ASSERT(buf[start + 1] >= pos);
1274  BM_ASSERT(buf[start] < pos || (start == 0));
1275  BM_ASSERT(res == bm::gap_test(buf, pos));
1276  return res;
1277  }
1278  unsigned arr_end = end;
1279  while (start != end)
1280  {
1281  unsigned curr = (start + end) >> 1;
1282  if (buf[curr] < pos)
1283  start = curr + 1;
1284  else
1285  end = curr;
1286 
1287  unsigned size = end - start;
1288  if (size < 16)
1289  {
1290  size += (end != arr_end);
1291  unsigned idx = bm::sse2_gap_find(buf + start, (bm::gap_word_t)pos, size);
1292  start += idx;
1293 
1294  BM_ASSERT(buf[start] >= pos);
1295  BM_ASSERT(buf[start - 1] < pos || (start == 1));
1296  break;
1297  }
1298  }
1299 
1300  unsigned res = ((*buf) & 1) ^ ((--start) & 1);
1301 
1302  BM_ASSERT(res == bm::gap_test(buf, pos));
1303  return res;
1304 //#endif
1305 #elif defined(BMSSE42OPT)
1306  unsigned start = 1;
1307  unsigned end = 1 + ((*buf) >> 3);
1308  unsigned dsize = end - start;
1309 
1310  if (dsize < 17)
1311  {
1312  start = bm::sse4_gap_find(buf+1, (bm::gap_word_t)pos, dsize);
1313  unsigned res = ((*buf) & 1) ^ ((start) & 1);
1314  BM_ASSERT(buf[start+1] >= pos);
1315  BM_ASSERT(buf[start] < pos || (start==0));
1316  BM_ASSERT(res == bm::gap_test(buf, pos));
1317  return res;
1318  }
1319  unsigned arr_end = end;
1320  while (start != end)
1321  {
1322  unsigned curr = (start + end) >> 1;
1323  if (buf[curr] < pos)
1324  start = curr + 1;
1325  else
1326  end = curr;
1327 
1328  unsigned size = end - start;
1329  if (size < 16)
1330  {
1331  size += (end != arr_end);
1332  unsigned idx = bm::sse4_gap_find(buf + start, (bm::gap_word_t)pos, size);
1333  start += idx;
1334 
1335  BM_ASSERT(buf[start] >= pos);
1336  BM_ASSERT(buf[start - 1] < pos || (start == 1));
1337  break;
1338  }
1339  }
1340 
1341  unsigned res = ((*buf) & 1) ^ ((--start) & 1);
1342 
1343  BM_ASSERT(res == bm::gap_test(buf, pos));
1344 #elif defined(BMAVX2OPT)
1345  unsigned res = bm::avx2_gap_test(buf, pos);
1346  BM_ASSERT(res == bm::gap_test(buf, pos));
1347 #else
1348  unsigned res = bm::gap_test(buf, pos);
1349 #endif
1350  return res;
1351 }
1352 
1353 /*! For each non-zero block in [from, to] executes supplied functor
1354  \internal
1355 */
1356 template<typename T, typename N, typename F>
1357 void for_each_nzblock_range(T*** root, N top_size, N nb_from, N nb_to, F& f)
1358 {
1359  BM_ASSERT(top_size);
1360  if (nb_from > nb_to)
1361  return;
1362  unsigned i_from = nb_from >> bm::set_array_shift;
1363  unsigned j_from = nb_from & bm::set_array_mask;
1364  unsigned i_to = nb_to >> bm::set_array_shift;
1365  unsigned j_to = nb_to & bm::set_array_mask;
1366 
1367  if (i_from >= top_size)
1368  return;
1369  if (i_to >= top_size)
1370  {
1371  i_to = top_size-1;
1372  j_to = bm::set_sub_array_size-1;
1373  }
1374 
1375  for (unsigned i = i_from; i <= i_to; ++i)
1376  {
1377  T** blk_blk = root[i];
1378  if (!blk_blk)
1379  {
1380  continue;
1381  }
1382  if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR)
1383  {
1384  unsigned j = (i == i_from) ? j_from : 0;
1385  if (!j && (i != i_to)) // full sub-block
1386  {
1388  }
1389  else
1390  {
1391  do
1392  {
1393  f.add_full(bm::gap_max_bits);
1394  if ((i == i_to) && (j == j_to))
1395  return;
1396  } while (++j < bm::set_sub_array_size);
1397  }
1398  }
1399  else
1400  {
1401  unsigned j = (i == i_from) ? j_from : 0;
1402  do
1403  {
1404  if (blk_blk[j])
1405  {
1406  f(blk_blk[j]);
1407  }
1408  if ((i == i_to) && (j == j_to))
1409  return;
1410  ++j;
1411  } while (j < bm::set_sub_array_size);
1412  }
1413  } // for i
1414 }
1415 
1416 /*! For each non-zero block executes supplied function.
1417  \internal
1418 */
1419 template<class T, class F>
1420 void for_each_nzblock(T*** root, unsigned size1, F& f)
1421 {
1422  typedef typename F::size_type size_type;
1423  for (unsigned i = 0; i < size1; ++i)
1424  {
1425  T** blk_blk = root[i];
1426  if (!blk_blk)
1427  {
1428  f.on_empty_top(i);
1429  continue;
1430  }
1431  f.on_non_empty_top(i);
1432  if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR)
1433  {
1434  size_type r = i * bm::set_sub_array_size;
1435  unsigned j = 0;
1436  do
1437  {
1438  f(FULL_BLOCK_FAKE_ADDR, r + j);
1439  } while (++j < bm::set_sub_array_size);
1440  continue;
1441  }
1442 
1443  unsigned non_empty_top = 0;
1444  size_type r = i * bm::set_sub_array_size;
1445  unsigned j = 0;
1446  do
1447  {
1448 #if defined(BM64_AVX2) || defined(BM64_AVX512)
1449  if (!avx2_test_all_zero_wave(blk_blk + j))
1450  {
1451  non_empty_top = 1;
1452  T* blk0 = blk_blk[j + 0];
1453  T* blk1 = blk_blk[j + 1];
1454  T* blk2 = blk_blk[j + 2];
1455  T* blk3 = blk_blk[j + 3];
1456 
1457  size_type block_idx = r + j + 0;
1458  if (blk0)
1459  f(blk0, block_idx);
1460  else
1461  f.on_empty_block(block_idx);
1462 
1463  if (blk1)
1464  f(blk1, block_idx + 1);
1465  else
1466  f.on_empty_block(block_idx + 1);
1467 
1468  if (blk2)
1469  f(blk2, block_idx + 2);
1470  else
1471  f.on_empty_block(block_idx + 2);
1472 
1473  if (blk3)
1474  f(blk3, block_idx + 3);
1475  else
1476  f.on_empty_block(block_idx + 3);
1477  }
1478  else
1479  {
1480  f.on_empty_block(r + j + 0); f.on_empty_block(r + j + 1);
1481  f.on_empty_block(r + j + 2); f.on_empty_block(r + j + 3);
1482  }
1483  j += 4;
1484 #elif defined(BM64_SSE4)
1485  if (!sse42_test_all_zero_wave((blk_blk + j)))
1486  {
1487  non_empty_top = 1;
1488  T* blk0 = blk_blk[j + 0];
1489  T* blk1 = blk_blk[j + 1];
1490 
1491  size_type block_idx = r + j + 0;
1492  if (blk0)
1493  f(blk0, block_idx);
1494  else
1495  f.on_empty_block(block_idx);
1496 
1497  ++block_idx;
1498  if (blk1)
1499  f(blk1, block_idx);
1500  else
1501  f.on_empty_block(block_idx);
1502  }
1503  else
1504  {
1505  f.on_empty_block(r + j + 0);
1506  f.on_empty_block(r + j + 1);
1507  }
1508  j += 2;
1509 #else
1510  if (blk_blk[j])
1511  {
1512  f(blk_blk[j], r + j);
1513  non_empty_top = 1;
1514  }
1515  else
1516  f.on_empty_block(r + j);
1517  ++j;
1518 #endif
1519  } while (j < bm::set_sub_array_size);
1520 
1521  if (non_empty_top == 0)
1522  f.on_empty_top(i);
1523  } // for i
1524 }
1525 
1526 /*! For each non-zero block executes supplied function.
1527 */
1528 template<class T, class F>
1529 void for_each_nzblock2(T*** root, unsigned size1, F& f)
1530 {
1531 #ifdef BM64_SSE4
1532  for (unsigned i = 0; i < size1; ++i)
1533  {
1534  T** blk_blk;
1535  if ((blk_blk = root[i])!=0)
1536  {
1537  if (blk_blk == (T**)FULL_BLOCK_FAKE_ADDR)
1538  {
1539  for (unsigned j = 0; j < bm::set_sub_array_size; ++j)
1540  {
1541  f(FULL_BLOCK_FAKE_ADDR);
1542  }
1543  continue;
1544  }
1545  {
1546  __m128i w0;
1547  for (unsigned j = 0; j < bm::set_sub_array_size; j+=4)
1548  {
1549  w0 = _mm_loadu_si128((__m128i*)(blk_blk + j));
1550  if (!_mm_testz_si128(w0, w0))
1551  {
1552  T* blk0 = blk_blk[j + 0];
1553  T* blk1 = blk_blk[j + 1];
1554 
1555  if (blk0)
1556  f(blk0);
1557  if (blk1)
1558  f(blk1);
1559  }
1560  w0 = _mm_loadu_si128((__m128i*)(blk_blk + j + 2));
1561  if (!_mm_testz_si128(w0, w0))
1562  {
1563  T* blk0 = blk_blk[j + 2];
1564  T* blk1 = blk_blk[j + 3];
1565  if (blk0)
1566  f(blk0);
1567  if (blk1)
1568  f(blk1);
1569  }
1570  } // for j
1571  }
1572  }
1573  } // for i
1574 #elif defined(BM64_AVX2) || defined(BM64_AVX512)
1575  for (unsigned i = 0; i < size1; ++i)
1576  {
1577  T** blk_blk;
1578  if ((blk_blk = root[i]) != 0)
1579  {
1580  if (blk_blk == (T**)FULL_BLOCK_FAKE_ADDR)
1581  {
1582  for (unsigned j = 0; j < bm::set_sub_array_size; ++j)
1583  {
1584  f(FULL_BLOCK_FAKE_ADDR);
1585  }
1586  continue;
1587  }
1588  {
1589  for (unsigned j = 0; j < bm::set_sub_array_size; j += 4)
1590  {
1591  __m256i w0 = _mm256_loadu_si256((__m256i*)(blk_blk + j));
1592  if (!_mm256_testz_si256(w0, w0))
1593  {
1594  // as a variant could use: blk0 = (T*)_mm256_extract_epi64(w0, 0);
1595  // but it measures marginally slower
1596  T* blk0 = blk_blk[j + 0];
1597  T* blk1 = blk_blk[j + 1];
1598  T* blk2 = blk_blk[j + 2];
1599  T* blk3 = blk_blk[j + 3];
1600  if (blk0)
1601  f(blk0);
1602  if (blk1)
1603  f(blk1);
1604  if (blk2)
1605  f(blk2);
1606  if (blk3)
1607  f(blk3);
1608  }
1609  } // for j
1610  }
1611  }
1612  } // for i
1613 
1614 #else // non-SIMD mode
1615  for (unsigned i = 0; i < size1; ++i)
1616  {
1617  T** blk_blk;
1618  if ((blk_blk = root[i])!=0)
1619  {
1620  if (blk_blk == (T**)FULL_BLOCK_FAKE_ADDR)
1621  {
1622  for (unsigned j = 0; j < bm::set_sub_array_size; ++j)
1623  {
1624  f(FULL_BLOCK_FAKE_ADDR);
1625  }
1626  continue;
1627  }
1628  unsigned j = 0;
1629  do
1630  {
1631  if (blk_blk[j])
1632  f(blk_blk[j]);
1633  if (blk_blk[j+1])
1634  f(blk_blk[j+1]);
1635  if (blk_blk[j+2])
1636  f(blk_blk[j+2]);
1637  if (blk_blk[j+3])
1638  f(blk_blk[j+3]);
1639  j += 4;
1640  } while (j < bm::set_sub_array_size);
1641  }
1642  } // for i
1643 #endif
1644 }
1645 
1646 
1647 /*! For each non-zero block executes supplied function-predicate.
1648  Function returns if function-predicate returns true
1649 */
1650 template<typename T, typename BI, typename F>
1651 bool for_each_nzblock_if(T*** root, BI size1, F& f)
1652 {
1653  BI block_idx = 0;
1654  for (BI i = 0; i < size1; ++i)
1655  {
1656  T** blk_blk = root[i];
1657  if (!blk_blk)
1658  {
1659  block_idx += bm::set_sub_array_size;
1660  continue;
1661  }
1662  if (blk_blk == (T**)FULL_BLOCK_FAKE_ADDR)
1663  {
1664  for (unsigned j = 0; j < bm::set_sub_array_size; ++j, ++block_idx)
1665  {
1666  if (f(FULL_BLOCK_FAKE_ADDR, block_idx))
1667  return true;
1668  } // for j
1669  continue;
1670  }
1671 
1672  for (unsigned j = 0;j < bm::set_sub_array_size; ++j, ++block_idx)
1673  {
1674  if (blk_blk[j])
1675  if (f(blk_blk[j], block_idx))
1676  return true;
1677  } // for j
1678  } // for i
1679  return false;
1680 }
1681 
1682 /*! For each block executes supplied function.
1683 */
1684 template<class T, class F, typename BLOCK_IDX>
1685 void for_each_block(T*** root, unsigned size1, F& f, BLOCK_IDX start)
1686 {
1687  BLOCK_IDX block_idx = start;
1688  for (unsigned i = 0; i < size1; ++i)
1689  {
1690  T** blk_blk = root[i];
1691  if (blk_blk)
1692  {
1693  if (blk_blk == (T**)FULL_BLOCK_FAKE_ADDR)
1694  {
1695  for (unsigned j = 0; j < bm::set_sub_array_size; ++j, ++block_idx)
1696  {
1697  f(FULL_BLOCK_FAKE_ADDR, block_idx);
1698  }
1699  continue;
1700  }
1701  for (unsigned j = 0;j < bm::set_sub_array_size; ++j, ++block_idx)
1702  {
1703  f(blk_blk[j], block_idx);
1704  }
1705  }
1706  else
1707  {
1708  for (unsigned j = 0;j < bm::set_sub_array_size; ++j, ++block_idx)
1709  {
1710  f(0, block_idx);
1711  }
1712  }
1713  }
1714 }
1715 
1716 
1717 
1718 /*! Special BM optimized analog of STL for_each
1719 */
1720 template<class T, class F> F bmfor_each(T first, T last, F f)
1721 {
1722  do
1723  {
1724  f(*first);
1725  ++first;
1726  } while (first < last);
1727  return f;
1728 }
1729 
1730 /*! Computes SUM of all elements of the sequence
1731 */
1732 template<typename T>
1733 bm::id64_t sum_arr(T* first, T* last)
1734 {
1735  bm::id64_t sum = 0;
1736  while (first < last)
1737  {
1738  sum += *first;
1739  ++first;
1740  }
1741  return sum;
1742 }
1743 
1744 
1745 
1746 /*!
1747  \brief Calculates number of bits ON in GAP buffer.
1748  \param buf - GAP buffer pointer.
1749  \param dsize - buffer size
1750  \return Number of non-zero bits.
1751  @ingroup gapfunc
1752 */
1753 template<typename T> unsigned gap_bit_count(const T* buf, unsigned dsize=0)
1754 {
1755  const T* pcurr = buf;
1756  if (dsize == 0)
1757  dsize = (*pcurr >> 3);
1758 
1759  const T* pend = pcurr + dsize;
1760 
1761  unsigned bits_counter = 0;
1762  ++pcurr;
1763 
1764  if (*buf & 1)
1765  {
1766  bits_counter += *pcurr + 1;
1767  ++pcurr;
1768  }
1769  ++pcurr; // set GAP to 1
1770 
1771  while (pcurr <= pend)
1772  {
1773  bits_counter += *pcurr - *(pcurr-1);
1774  pcurr += 2; // jump to the next positive GAP
1775  }
1776 
1777  return bits_counter;
1778 }
1779 
1780 /*!
1781  \brief Calculates number of bits ON in GAP buffer. Loop unrolled version.
1782  \param buf - GAP buffer pointer.
1783  \return Number of non-zero bits.
1784  @ingroup gapfunc
1785 */
1786 template<typename T> unsigned gap_bit_count_unr(const T* buf)
1787 {
1788  const T* pcurr = buf;
1789  unsigned dsize = (*pcurr >> 3);
1790 
1791  unsigned cnt = 0;
1792  pcurr = buf + 1; // set up start position
1793  T first_one = *buf & 1;
1794  if (first_one)
1795  {
1796  cnt += *pcurr + 1;
1797  ++pcurr;
1798  }
1799  ++pcurr; // set GAP to 1
1800 
1801  #if defined(BMAVX2OPT) || defined(BMAVX512OPT)
1802  if (dsize > 34)
1803  {
1804  const unsigned unr_factor = 32;
1805  unsigned waves = (dsize-2) / unr_factor;
1806  pcurr = avx2_gap_sum_arr(pcurr, waves, &cnt);
1807  }
1808  #elif defined(BMSSE42OPT) || defined(BMSSE2OPT)
1809  if (dsize > 18)
1810  {
1811  const unsigned unr_factor = 16;
1812  unsigned waves = (dsize - 2) / unr_factor;
1813  pcurr = sse2_gap_sum_arr(pcurr, waves, &cnt);
1814  }
1815  #else
1816  if (dsize > 10)
1817  {
1818  const unsigned unr_factor = 8;
1819  unsigned waves = (dsize - 2) / unr_factor;
1820  for (unsigned i = 0; i < waves; i += unr_factor)
1821  {
1822  cnt += pcurr[0] - pcurr[0 - 1];
1823  cnt += pcurr[2] - pcurr[2 - 1];
1824  cnt += pcurr[4] - pcurr[4 - 1];
1825  cnt += pcurr[6] - pcurr[6 - 1];
1826 
1827  pcurr += unr_factor;
1828  } // for
1829  }
1830  #endif
1831 
1832  const T* pend = buf + dsize;
1833  for ( ; pcurr <= pend ; pcurr+=2)
1834  {
1835  cnt += *pcurr - *(pcurr - 1);
1836  }
1837  BM_ASSERT(cnt == gap_bit_count(buf));
1838  return cnt;
1839 }
1840 
1841 
1842 
1843 /*!
1844  \brief Counts 1 bits in GAP buffer in the closed [left, right] range.
1845  \param buf - GAP buffer pointer.
1846  \param left - leftmost bit index to start from
1847  \param right- rightmost bit index
1848  \return Number of non-zero bits.
1849  @ingroup gapfunc
1850 */
1851 template<typename T>
1852 unsigned gap_bit_count_range(const T* const buf, unsigned left, unsigned right)
1853 {
1854  BM_ASSERT(left <= right);
1855 
1856  const T* pcurr = buf;
1857  const T* pend = pcurr + (*pcurr >> 3);
1858 
1859  unsigned bits_counter = 0;
1860  unsigned is_set;
1861  unsigned start_pos = bm::gap_bfind(buf, left, &is_set);
1862  is_set = ~(is_set - 1u); // 0xFFF.. if true (mask for branchless code)
1863 
1864  pcurr = buf + start_pos;
1865  if (right <= *pcurr) // we are in the target gap right now
1866  {
1867  bits_counter = unsigned(right - left + 1u) & is_set; // & is_set == if(is_set)
1868  return bits_counter;
1869  }
1870  bits_counter += unsigned(*pcurr - left + 1u) & is_set;
1871 
1872  unsigned prev_gap = *pcurr++;
1873  for (is_set ^= ~0u; right > *pcurr; is_set ^= ~0u)
1874  {
1875  bits_counter += (*pcurr - prev_gap) & is_set;
1876  if (pcurr == pend)
1877  return bits_counter;
1878  prev_gap = *pcurr++;
1879  }
1880  bits_counter += unsigned(right - prev_gap) & is_set;
1881  return bits_counter;
1882 }
1883 
1884 /*!
1885  \brief GAP block find position for the rank
1886 
1887  \param block - bit block buffer pointer
1888  \param rank - rank to find (must be > 0)
1889  \param nbit_from - start bit position in block
1890  \param nbit_pos - found position
1891 
1892  \return 0 if position with rank was found, or
1893  the remaining rank (rank - population count)
1894 
1895  @ingroup gapfunc
1896 */
1897 template<typename T, typename SIZE_TYPE>
1898 SIZE_TYPE gap_find_rank(const T* const block,
1899  SIZE_TYPE rank,
1900  unsigned nbit_from,
1901  unsigned& nbit_pos)
1902 {
1903  BM_ASSERT(block);
1904  BM_ASSERT(rank);
1905 
1906  const T* pcurr = block;
1907  const T* pend = pcurr + (*pcurr >> 3);
1908 
1909  unsigned bits_counter = 0;
1910  unsigned is_set;
1911  unsigned start_pos = bm::gap_bfind(block, nbit_from, &is_set);
1912  is_set = ~(is_set - 1u); // 0xFFF.. if true (mask for branchless code)
1913 
1914  pcurr = block + start_pos;
1915  bits_counter += unsigned(*pcurr - nbit_from + 1u) & is_set;
1916  if (bits_counter >= rank) // found!
1917  {
1918  nbit_pos = nbit_from + unsigned(rank) - 1u;
1919  return 0;
1920  }
1921  rank -= bits_counter;
1922  unsigned prev_gap = *pcurr++;
1923  for (is_set ^= ~0u; pcurr <= pend; is_set ^= ~0u)
1924  {
1925  bits_counter = (*pcurr - prev_gap) & is_set;
1926  if (bits_counter >= rank) // found!
1927  {
1928  nbit_pos = prev_gap + unsigned(rank);
1929  return 0;
1930  }
1931  rank -= bits_counter;
1932  prev_gap = *pcurr++;
1933  } // for
1934 
1935  return rank;
1936 }
1937 
1938 
1939 
1940 /*!
1941  \brief Counts 1 bits in GAP buffer in the closed [0, right] range.
1942  \param buf - GAP buffer pointer.
1943  \param right- rightmost bit index
1944  \return Number of non-zero bits.
1945  @ingroup gapfunc
1946 */
1947 template<typename T>
1948 unsigned gap_bit_count_to(const T* const buf, T right)
1949 {
1950  const T* pcurr = buf;
1951  const T* pend = pcurr + (*pcurr >> 3);
1952 
1953  unsigned bits_counter = 0;
1954  unsigned is_set = ~((unsigned(*buf) & 1u) - 1u); // 0xFFF.. if true (mask for branchless code)
1955  BM_ASSERT(is_set == 0u || is_set == ~0u);
1956  pcurr = buf + 1;
1957 
1958  if (right <= *pcurr) // we are in the target block right now
1959  {
1960  bits_counter = (right + 1u) & is_set; // & is_set == if (is_set)
1961  return bits_counter;
1962  }
1963  bits_counter += (*pcurr + 1u) & is_set;
1964 
1965  unsigned prev_gap = *pcurr++;
1966  for (is_set ^= ~0u; right > *pcurr; is_set ^= ~0u)
1967  {
1968  bits_counter += (*pcurr - prev_gap) & is_set;
1969  if (pcurr == pend)
1970  return bits_counter;
1971  prev_gap = *pcurr++;
1972  }
1973  bits_counter += (right - prev_gap) & is_set;
1974  return bits_counter;
1975 }
1976 
1977 
1978 /*!
1979  D-GAP block for_each algorithm
1980 
1981  D-Gap Functor is called for each element but last one.
1982 
1983  \param gap_buf - GAP buffer
1984  \param func - functor object
1985 
1986 */
1987 template<class T, class Func>
1988 void for_each_dgap(const T* gap_buf, Func& func)
1989 {
1990  const T* pcurr = gap_buf;
1991  const T* pend = pcurr + (*pcurr >> 3);
1992  ++pcurr;
1993 
1994  T prev = *pcurr;
1995  func((T)(prev + 1)); // first element incremented to avoid 0
1996  ++pcurr;
1997  do
1998  {
1999  func((T)(*pcurr - prev)); // all others are [N] - [N-1]
2000  prev = *pcurr;
2001  } while (++pcurr < pend);
2002 }
2003 
2004 /** d-Gap copy functor
2005  @internal
2006 */
2007 template<typename T> struct d_copy_func
2008 {
2009  d_copy_func(T* dg_buf) : dgap_buf_(dg_buf) {}
2010  void operator()(T dgap) { *dgap_buf_++ = dgap; }
2011 
2013 };
2014 
2015 /*!
2016  \brief Convert GAP buffer into D-GAP buffer
2017 
2018  Delta GAP representation is DGAP[N] = GAP[N] - GAP[N-1]
2019 
2020  \param gap_buf - GAP buffer
2021  \param dgap_buf - Delta-GAP buffer
2022  \param copy_head - flag to copy GAP header
2023 
2024  \internal
2025 
2026  @ingroup gapfunc
2027 */
2028 template<typename T>
2029 T* gap_2_dgap(const T* gap_buf, T* dgap_buf, bool copy_head=true)
2030 {
2031  if (copy_head) // copy GAP header
2032  {
2033  *dgap_buf++ = *gap_buf;
2034  }
2035 
2036  d_copy_func<T> copy_func(dgap_buf);
2037  for_each_dgap<T, d_copy_func<T> >(gap_buf, copy_func);
2038  return copy_func.dgap_buf_;
2039 }
2040 
2041 /*!
2042  \brief Convert D-GAP buffer into GAP buffer
2043 
2044  GAP representation is GAP[N] = DGAP[N] + DGAP[N-1]
2045 
2046  \param dgap_buf - Delta-GAP buffer
2047  \param gap_header - GAP header word
2048  \param gap_buf - GAP buffer
2049 
2050  \internal
2051  @ingroup gapfunc
2052 */
2053 template<typename T>
2054 void dgap_2_gap(const T* dgap_buf, T* gap_buf, T gap_header=0)
2055 {
2056  const T* pcurr = dgap_buf;
2057  unsigned len;
2058  if (!gap_header) // GAP header is already part of the stream
2059  {
2060  len = *pcurr >> 3;
2061  *gap_buf++ = *pcurr++; // copy GAP header
2062  }
2063  else // GAP header passed as a parameter
2064  {
2065  len = gap_header >> 3;
2066  *gap_buf++ = gap_header; // assign GAP header
2067  }
2068  --len; // last element is actually not encoded
2069  const T* pend = pcurr + len;
2070 
2071  *gap_buf = *pcurr++; // copy first element
2072  if (*gap_buf == 0)
2073  *gap_buf = 65535; // fix +1 overflow
2074  else
2075  *gap_buf = T(*gap_buf - 1);
2076 
2077  for (++gap_buf; pcurr < pend; ++pcurr)
2078  {
2079  T prev = *(gap_buf-1); // don't remove temp(undef expression!)
2080  *gap_buf++ = T(*pcurr + prev);
2081  }
2082  *gap_buf = 65535; // add missing last element
2083 }
2084 
2085 
2086 /*!
2087  \brief Lexicographical comparison of GAP buffers.
2088  \param buf1 - First GAP buffer pointer.
2089  \param buf2 - Second GAP buffer pointer.
2090  \return <0 - less, =0 - equal, >0 - greater.
2091 
2092  @ingroup gapfunc
2093 */
2094 template<typename T> int gapcmp(const T* buf1, const T* buf2)
2095 {
2096  const T* pcurr1 = buf1;
2097  const T* pend1 = pcurr1 + (*pcurr1 >> 3);
2098  unsigned bitval1 = *buf1 & 1;
2099  ++pcurr1;
2100 
2101  const T* pcurr2 = buf2;
2102  unsigned bitval2 = *buf2 & 1;
2103  ++pcurr2;
2104 
2105  while (pcurr1 <= pend1)
2106  {
2107  if (*pcurr1 == *pcurr2)
2108  {
2109  if (bitval1 != bitval2)
2110  {
2111  return (bitval1) ? 1 : -1;
2112  }
2113  }
2114  else
2115  {
2116  if (bitval1 == bitval2)
2117  {
2118  if (bitval1)
2119  {
2120  return (*pcurr1 < *pcurr2) ? -1 : 1;
2121  }
2122  else
2123  {
2124  return (*pcurr1 < *pcurr2) ? 1 : -1;
2125  }
2126  }
2127  else
2128  {
2129  return (bitval1) ? 1 : -1;
2130  }
2131  }
2132 
2133  ++pcurr1; ++pcurr2;
2134 
2135  bitval1 ^= 1;
2136  bitval2 ^= 1;
2137  }
2138 
2139  return 0;
2140 }
2141 
2142 
2143 /*!
2144  \brief Abstract operation for GAP buffers.
2145  Receives functor F as a template argument
2146  \param dest - destination memory buffer.
2147  \param vect1 - operand 1 GAP encoded buffer.
2148  \param vect1_mask - XOR mask for starting bitflag for vector1
2149  can be 0 or 1 (1 inverts the vector)
2150  \param vect2 - operand 2 GAP encoded buffer.
2151  \param vect2_mask - same as vect1_mask
2152  \param f - operation functor.
2153  \param dlen - destination length after the operation
2154 
2155  \note Internal function.
2156  @internal
2157 
2158  @ingroup gapfunc
2159 */
2160 template<typename T, class F>
2161 void gap_buff_op(T* BMRESTRICT dest,
2162  const T* BMRESTRICT vect1,
2163  unsigned vect1_mask,
2164  const T* BMRESTRICT vect2,
2165  unsigned vect2_mask,
2166  F& f,
2167  unsigned& dlen)
2168 {
2169  const T* cur1 = vect1;
2170  const T* cur2 = vect2;
2171 
2172  T bitval1 = (T)((*cur1++ & 1) ^ vect1_mask);
2173  T bitval2 = (T)((*cur2++ & 1) ^ vect2_mask);
2174 
2175  T bitval = (T) f(bitval1, bitval2);
2176  T bitval_prev = bitval;
2177 
2178  T* res = dest;
2179  *res = bitval;
2180  ++res;
2181 
2182  T c1 = *cur1; T c2 = *cur2;
2183  while (1)
2184  {
2185  bitval = (T) f(bitval1, bitval2);
2186 
2187  // Check if GAP value changes and we need to
2188  // start the next one
2189  //
2190  res += (bitval != bitval_prev);
2191  bitval_prev = bitval;
2192  if (c1 < c2) // (*cur1 < *cur2)
2193  {
2194  *res = c1;
2195  ++cur1; c1 = *cur1;
2196  bitval1 ^= 1;
2197  }
2198  else // >=
2199  {
2200  *res = c2;
2201  if (c2 < c1) // (*cur2 < *cur1)
2202  {
2203  bitval2 ^= 1;
2204  }
2205  else // equal
2206  {
2207  if (c2 == (bm::gap_max_bits - 1))
2208  break;
2209 
2210  ++cur1; c1 = *cur1;
2211  bitval1 ^= 1; bitval2 ^= 1;
2212  }
2213  ++cur2; c2 = *cur2;
2214  }
2215 
2216  } // while
2217 
2218  dlen = (unsigned)(res - dest);
2219  *dest = (T)((*dest & 7) + (dlen << 3));
2220 }
2221 
2222 /*!
2223  \brief Abstract distance test operation for GAP buffers.
2224  Receives functor F as a template argument
2225  \param vect1 - operand 1 GAP encoded buffer.
2226  \param vect1_mask - XOR mask for starting bitflag for vector1
2227  can be 0 or 1 (1 inverts the vector)
2228  \param vect2 - operand 2 GAP encoded buffer.
2229  \param vect2_mask - same as vect1_mask
2230  \param f - operation functor.
2231  \note Internal function.
2232  \return non zero value if operation result returns any 1 bit
2233 
2234  @ingroup gapfunc
2235 */
2236 template<typename T, class F>
2237 unsigned gap_buff_any_op(const T* BMRESTRICT vect1,
2238  unsigned vect1_mask,
2239  const T* BMRESTRICT vect2,
2240  unsigned vect2_mask,
2241  F f)
2242 {
2243  const T* cur1 = vect1;
2244  const T* cur2 = vect2;
2245 
2246  unsigned bitval1 = (*cur1++ & 1) ^ vect1_mask;
2247  unsigned bitval2 = (*cur2++ & 1) ^ vect2_mask;
2248 
2249  unsigned bitval = f(bitval1, bitval2);
2250  if (bitval)
2251  return bitval;
2252  unsigned bitval_prev = bitval;
2253 
2254  while (1)
2255  {
2256  bitval = f(bitval1, bitval2);
2257  if (bitval)
2258  return bitval;
2259 
2260  if (bitval != bitval_prev)
2261  bitval_prev = bitval;
2262 
2263  if (*cur1 < *cur2)
2264  {
2265  ++cur1;
2266  bitval1 ^= 1;
2267  }
2268  else // >=
2269  {
2270  if (*cur2 < *cur1)
2271  {
2272  bitval2 ^= 1;
2273  }
2274  else // equal
2275  {
2276  if (*cur2 == (bm::gap_max_bits - 1))
2277  {
2278  break;
2279  }
2280 
2281  ++cur1;
2282  bitval1 ^= 1;
2283  bitval2 ^= 1;
2284  }
2285  ++cur2;
2286  }
2287 
2288  } // while
2289 
2290  return 0;
2291 }
2292 
2293 
2294 
2295 /*!
2296  \brief Abstract distance(similarity) operation for GAP buffers.
2297  Receives functor F as a template argument
2298  \param vect1 - operand 1 GAP encoded buffer.
2299  \param vect2 - operand 2 GAP encoded buffer.
2300  \param f - operation functor.
2301  \note Internal function.
2302 
2303  @ingroup gapfunc
2304 */
2305 template<typename T, class F>
2306 unsigned gap_buff_count_op(const T* vect1, const T* vect2, F f)
2307 {
2308  unsigned count;// = 0;
2309  const T* cur1 = vect1;
2310  const T* cur2 = vect2;
2311 
2312  unsigned bitval1 = (*cur1++ & 1);
2313  unsigned bitval2 = (*cur2++ & 1);
2314  unsigned bitval = count = f(bitval1, bitval2);
2315  unsigned bitval_prev = bitval;
2316 
2317  //if (bitval) ++count;
2318 
2319  T res, res_prev;
2320  res = res_prev = 0;
2321 
2322  while (1)
2323  {
2324  bitval = f(bitval1, bitval2);
2325 
2326  // Check if GAP value changes and we need to
2327  // start the next one.
2328  if (bitval != bitval_prev)
2329  {
2330  bitval_prev = bitval;
2331  res_prev = res;
2332  }
2333 
2334  if (*cur1 < *cur2)
2335  {
2336  res = *cur1;
2337  if (bitval)
2338  {
2339  count += res - res_prev;
2340  res_prev = res;
2341  }
2342  ++cur1;
2343  bitval1 ^= 1;
2344  }
2345  else // >=
2346  {
2347  res = *cur2;
2348  if (bitval)
2349  {
2350  count += res - res_prev;
2351  res_prev = res;
2352  }
2353  if (*cur2 < *cur1)
2354  {
2355  bitval2 ^= 1;
2356  }
2357  else // equal
2358  {
2359  if (*cur2 == (bm::gap_max_bits - 1))
2360  {
2361  break;
2362  }
2363 
2364  ++cur1;
2365  bitval1 ^= 1;
2366  bitval2 ^= 1;
2367  }
2368  ++cur2;
2369  }
2370 
2371  } // while
2372 
2373  return count;
2374 }
2375 
2376 
2377 
2378 /*!
2379  \brief Sets or clears bit in the GAP buffer.
2380 
2381  \param val - new bit value
2382  \param buf - GAP buffer.
2383  \param pos - Index of bit to set.
2384  \param is_set - (OUT) flag if bit was actually set.
2385 
2386  \return New GAP buffer length.
2387 
2388  @ingroup gapfunc
2389 */
2390 template<typename T>
2391 unsigned gap_set_value(unsigned val,
2392  T* BMRESTRICT buf,
2393  unsigned pos,
2394  unsigned* BMRESTRICT is_set)
2395 {
2396  BM_ASSERT(pos < bm::gap_max_bits);
2397  unsigned curr = gap_bfind(buf, pos, is_set);
2398 
2399  T end = (T)(*buf >> 3);
2400  if (*is_set == val)
2401  {
2402  *is_set = 0;
2403  return end;
2404  }
2405  *is_set = 1;
2406 
2407  T* pcurr = buf + curr;
2408  T* pprev = pcurr - 1;
2409  T* pend = buf + end;
2410 
2411  // Special case, first bit GAP operation. There is no platform beside it.
2412  // initial flag must be inverted.
2413  if (pos == 0)
2414  {
2415  *buf ^= 1;
2416  if ( buf[1] ) // We need to insert a 1 bit platform here.
2417  {
2418  ::memmove(&buf[2], &buf[1], (end - 1) * sizeof(gap_word_t));
2419  buf[1] = 0;
2420  ++end;
2421  }
2422  else // Only 1 bit in the GAP. We need to delete the first GAP.
2423  {
2424  pprev = buf + 1;
2425  pcurr = pprev + 1;
2426  do
2427  {
2428  *pprev++ = *pcurr++;
2429  } while (pcurr < pend);
2430  --end;
2431  }
2432  }
2433  else if (curr > 1 && ((unsigned)(*pprev))+1 == pos) // Left border bit
2434  {
2435  ++(*pprev);
2436  if (*pprev == *pcurr) // Curr. GAP to be merged with prev.GAP.
2437  {
2438  --end;
2439  if (pcurr != pend) // GAP merge: 2 GAPS to be deleted
2440  {
2441  --end;
2442  ++pcurr;
2443  do
2444  {
2445  *pprev++ = *pcurr++;
2446  } while (pcurr < pend);
2447  }
2448  }
2449  }
2450  else if (*pcurr == pos) // Rightmost bit in the GAP. Border goes left.
2451  {
2452  --(*pcurr);
2453  if (pcurr == pend)
2454  {
2455  ++end;
2456  }
2457  }
2458  else // Worst case we need to split current block.
2459  {
2460  ::memmove(pcurr+2, pcurr,(end - curr + 1)*sizeof(T));
2461  *pcurr++ = (T)(pos - 1);
2462  *pcurr = (T)pos;
2463  end = (T)(end + 2);
2464  }
2465 
2466  // Set correct length word.
2467  *buf = (T)((*buf & 7) + (end << 3));
2468 
2469  buf[end] = bm::gap_max_bits - 1;
2470  return end;
2471 }
2472 
2473 /*!
2474  \brief Add new value to the end of GAP buffer.
2475 
2476  \param buf - GAP buffer.
2477  \param pos - Index of bit to set.
2478 
2479  \return New GAP buffer length.
2480 
2481  @ingroup gapfunc
2482 */
2483 template<typename T>
2484 unsigned gap_add_value(T* buf, unsigned pos)
2485 {
2486  BM_ASSERT(pos < bm::gap_max_bits);
2487 
2488  T end = (T)(*buf >> 3);
2489  T curr = end;
2490  T* pcurr = buf + end;
2491  T* pend = pcurr;
2492  T* pprev = pcurr - 1;
2493 
2494  // Special case, first bit GAP operation. There is no platform beside it.
2495  // initial flag must be inverted.
2496  if (pos == 0)
2497  {
2498  *buf ^= 1;
2499  if ( buf[1] ) // We need to insert a 1 bit platform here.
2500  {
2501  ::memmove(&buf[2], &buf[1], (end - 1) * sizeof(gap_word_t));
2502  buf[1] = 0;
2503  ++end;
2504  }
2505  else // Only 1 bit in the GAP. We need to delete the first GAP.
2506  {
2507  pprev = buf + 1;
2508  pcurr = pprev + 1;
2509  do
2510  {
2511  *pprev++ = *pcurr++;
2512  } while (pcurr < pend);
2513  --end;
2514  }
2515  }
2516  else if (((unsigned)(*pprev))+1 == pos && (curr > 1) ) // Left border bit
2517  {
2518  ++(*pprev);
2519  if (*pprev == *pcurr) // Curr. GAP to be merged with prev.GAP.
2520  {
2521  --end;
2522  if (pcurr != pend) // GAP merge: 2 GAPS to be deleted
2523  {
2524  // TODO: should never get here...
2525  --end;
2526  ++pcurr;
2527  do
2528  {
2529  *pprev++ = *pcurr++;
2530  } while (pcurr < pend);
2531  }
2532  }
2533  }
2534  else if (*pcurr == pos) // Rightmost bit in the GAP. Border goes left.
2535  {
2536  --(*pcurr);
2537  if (pcurr == pend)
2538  {
2539  ++end;
2540  }
2541  }
2542  else // Worst case we need to split current block.
2543  {
2544  *pcurr++ = (T)(pos - 1);
2545  *pcurr = (T)pos;
2546  end = (T)(end+2);
2547  }
2548 
2549  // Set correct length word.
2550  *buf = (T)((*buf & 7) + (end << 3));
2551 
2552  buf[end] = bm::gap_max_bits - 1;
2553  return end;
2554 }
2555 
2556 /*!
2557  @brief Right shift GAP block by 1 bit
2558  @param buf - block pointer
2559  @param co_falg - carry over from the previous block
2560 
2561  @return carry over bit (1 or 0)
2562  @ingroup gapfunc
2563 */
2564 template<typename T>
2565 bool gap_shift_r1(T* buf, unsigned co_flag, unsigned* new_len)
2566 {
2567  BM_ASSERT(new_len);
2568  bool co;
2569  // 1: increment all GAP values by 1
2570  {
2571  unsigned bitval = *buf & 1;
2572  if (buf[1] == bm::gap_max_bits-1) // full GAP block
2573  {
2574  co = bitval;
2575  }
2576  else
2577  {
2578  unsigned len = (*buf >> 3);
2579  unsigned i = 1;
2580  for (; i < len; ++i)
2581  {
2582  buf[i]++;
2583  bitval ^= 1;
2584  } // for i
2585  BM_ASSERT(buf[i] == bm::gap_max_bits-1);
2586  if (buf[i-1] == bm::gap_max_bits-1) // last element shifts out
2587  {
2588  // Set correct length word
2589  --len;
2590  *buf = (T)((*buf & 7) + (len << 3));
2591  }
2592  co = bitval;
2593  }
2594  }
2595  // set bit position 0 with carry-in flag
2596  {
2597  unsigned is_set;
2598  *new_len = bm::gap_set_value(co_flag, buf, 0, &is_set);
2599  }
2600  return co;
2601 }
2602 
2603 /*!
2604  @brief Left shift GAP block by 1 bit
2605  @param buf - block pointer
2606  @param co_falg - carry over from the previous block
2607 
2608  @return carry over bit (1 or 0)
2609  @ingroup gapfunc
2610 */
2611 template<typename T>
2612 bool gap_shift_l1(T* buf, unsigned co_flag, unsigned* new_len)
2613 {
2614  BM_ASSERT(new_len);
2615  unsigned is_set;
2616 
2617  // 1: decrement all GAP values by 1
2618  //
2619  unsigned bitval = *buf & 1;
2620  bool co0 = bitval;
2621 
2622  if (!buf[1]) // cannot decrement (corner case)
2623  {
2624  bitval ^= 1;
2625  *new_len = bm::gap_set_value(bitval, buf, 0, &is_set);
2626 
2627  BM_ASSERT(is_set);
2628  BM_ASSERT(buf[1]);
2629  BM_ASSERT(bitval == unsigned(*buf & 1u));
2630 
2631  if (*new_len == 1)
2632  {
2633  *new_len = bm::gap_set_value(co_flag, buf,
2634  bm::gap_max_bits-1, &is_set);
2635  return co0;
2636  }
2637  }
2638  if (buf[1] != bm::gap_max_bits-1) // full GAP block
2639  {
2640  BM_ASSERT(buf[1]);
2641  unsigned len = (*buf >> 3);
2642  unsigned i = 1;
2643  for (; i < len; ++i)
2644  {
2645  buf[i]--;
2646  bitval ^= 1;
2647  } // for i
2648  BM_ASSERT(buf[i] == bm::gap_max_bits-1);
2649  }
2650  // 2: set last bit position with carry-in flag
2651  //
2652  *new_len = bm::gap_set_value(co_flag, buf, bm::gap_max_bits-1, &is_set);
2653  return co0;
2654 }
2655 
2656 
2657 /*!
2658  \brief Convert array to GAP buffer.
2659 
2660  \param buf - GAP buffer.
2661  \param arr - array of values to set
2662  \param len - length of the array
2663 
2664  \return New GAP buffer length.
2665 
2666  @ingroup gapfunc
2667 */
2668 
2669 template<typename T>
2670 unsigned gap_set_array(T* buf, const T* arr, unsigned len)
2671 {
2672  *buf = (T)((*buf & 6u) + (1u << 3)); // gap header setup
2673 
2674  T* pcurr = buf + 1;
2675 
2676  unsigned i = 0;
2677  T curr = arr[i];
2678  if (curr != 0) // need to add the first gap: (0 to arr[0]-1)
2679  {
2680  *pcurr = (T)(curr - 1);
2681  ++pcurr;
2682  }
2683  else
2684  {
2685  ++(*buf); // GAP starts with 1
2686  }
2687  T prev = curr;
2688  T acc = prev;
2689 
2690  for (i = 1; i < len; ++i)
2691  {
2692  curr = arr[i];
2693  if (curr == prev + 1)
2694  {
2695  ++acc;
2696  prev = curr;
2697  }
2698  else
2699  {
2700  *pcurr++ = acc;
2701  acc = curr;
2702  *pcurr++ = (T)(curr-1);
2703  }
2704  prev = curr;
2705  }
2706  *pcurr = acc;
2707  if (acc != bm::gap_max_bits - 1)
2708  {
2709  ++pcurr;
2710  *pcurr = bm::gap_max_bits - 1;
2711  }
2712 
2713  unsigned gap_len = unsigned(pcurr - buf);
2714  BM_ASSERT(gap_len == ((gap_len << 3) >> 3));
2715 
2716  *buf = (T)((*buf & 7) + (gap_len << 3));
2717  return gap_len+1;
2718 }
2719 
2720 
2721 //------------------------------------------------------------------------
2722 
2723 /**
2724  \brief Compute number of GAPs in bit-array
2725  \param arr - array of BITs
2726  \param len - array length
2727 
2728  @ingroup gapfunc
2729 */
2730 template<typename T>
2731 unsigned bit_array_compute_gaps(const T* arr,
2732  unsigned len)
2733 {
2734  unsigned gap_count = 1;
2735  T prev = arr[0];
2736  if (prev > 0)
2737  ++gap_count;
2738  for (unsigned i = 1; i < len; ++i)
2739  {
2740  T curr = arr[i];
2741  if (curr != prev + 1)
2742  {
2743  gap_count += 2;
2744  }
2745  prev = curr;
2746  }
2747  return gap_count;
2748 }
2749 
2750 
2751 //------------------------------------------------------------------------
2752 
2753 /**
2754  \brief Searches for the next 1 bit in the GAP block
2755  \param buf - GAP buffer
2756  \param nbit - bit index to start checking from.
2757  \param prev - returns previously checked value
2758 
2759  \return 0 if not found
2760 
2761  @ingroup gapfunc
2762 */
2763 template<typename T>
2764 unsigned gap_block_find(const T* buf,
2765  unsigned nbit,
2766  bm::id_t* prev)
2767 {
2768  BM_ASSERT(nbit < bm::gap_max_bits);
2769 
2770  unsigned bitval;
2771  unsigned gap_idx = bm::gap_bfind(buf, nbit, &bitval);
2772 
2773  if (bitval) // positive block.
2774  {
2775  *prev = nbit;
2776  return 1u;
2777  }
2778 
2779  unsigned val = buf[gap_idx] + 1;
2780  *prev = val;
2781 
2782  return (val != bm::gap_max_bits); // no bug here.
2783 }
2784 
2785 /*!
2786  \brief Set 1 bit in a block
2787  @ingroup bitfunc
2788 */
2790 void set_bit(unsigned* dest, unsigned bitpos)
2791 {
2792  unsigned nbit = unsigned(bitpos & bm::set_block_mask);
2793  unsigned nword = unsigned(nbit >> bm::set_word_shift);
2794  nbit &= bm::set_word_mask;
2795  dest[nword] |= unsigned(1u << nbit);
2796 }
2797 
2798 /*!
2799  \brief Set 1 bit in a block
2800  @ingroup bitfunc
2801 */
2803 void clear_bit(unsigned* dest, unsigned bitpos)
2804 {
2805  unsigned nbit = unsigned(bitpos & bm::set_block_mask);
2806  unsigned nword = unsigned(nbit >> bm::set_word_shift);
2807  nbit &= bm::set_word_mask;
2808  dest[nword] &= ~(unsigned(1u << nbit));
2809 }
2810 
2811 /*!
2812  \brief Test 1 bit in a block
2813 
2814  @ingroup bitfunc
2815 */
2817 unsigned test_bit(const unsigned* block, unsigned bitpos)
2818 {
2819  unsigned nbit = unsigned(bitpos & bm::set_block_mask);
2820  unsigned nword = unsigned(nbit >> bm::set_word_shift);
2821  nbit &= bm::set_word_mask;
2822  return (block[nword] >> nbit) & 1u;
2823 }
2824 
2825 
2826 /*!
2827  \brief Sets bits to 1 in the bitblock.
2828  \param dest - Bitset buffer.
2829  \param bitpos - Offset of the start bit.
2830  \param bitcount - number of bits to set.
2831 
2832  @ingroup bitfunc
2833 */
2834 inline
2835 void or_bit_block(unsigned* dest, unsigned bitpos, unsigned bitcount)
2836 {
2837  const unsigned maskFF = ~0u;
2838 
2839  dest += unsigned(bitpos >> bm::set_word_shift); // nword
2840  bitpos &= bm::set_word_mask;
2841 
2842  if (bitcount == 1u) // special case (only 1 bit to set)
2843  {
2844  *dest |= (1u << bitpos);
2845  return;
2846  }
2847 
2848  if (bitpos) // starting pos is not aligned
2849  {
2850  unsigned mask_r = maskFF << bitpos;
2851  unsigned right_margin = bitpos + bitcount;
2852  if (right_margin < 32)
2853  {
2854  *dest |= (maskFF >> (32 - right_margin)) & mask_r;
2855  return;
2856  }
2857  *dest++ |= mask_r;
2858  bitcount -= 32 - bitpos;
2859  }
2860  for ( ;bitcount >= 64; bitcount-=64, dest+=2)
2861  dest[0] = dest[1] = maskFF;
2862  if (bitcount >= 32)
2863  {
2864  *dest++ = maskFF; bitcount -= 32;
2865  }
2866  if (bitcount)
2867  {
2868  *dest |= maskFF >> (32 - bitcount);
2869  }
2870 }
2871 
2872 
2873 /*!
2874  \brief SUB (AND NOT) bit interval to 1 in the bitblock.
2875  \param dest - Bitset buffer.
2876  \param bitpos - Offset of the start bit.
2877  \param bitcount - number of bits to set.
2878 
2879  @ingroup bitfunc
2880 */
2881 inline
2882 void sub_bit_block(unsigned* dest, unsigned bitpos, unsigned bitcount)
2883 {
2884  const unsigned maskFF = ~0u;
2885 
2886  dest += unsigned(bitpos >> bm::set_word_shift); // nword
2887  bitpos &= bm::set_word_mask;
2888 
2889  if (bitcount == 1u) // special case (only 1 bit to set)
2890  {
2891  *dest &= ~(1u << bitpos);
2892  return;
2893  }
2894 
2895  if (bitpos) // starting pos is not aligned
2896  {
2897  unsigned mask_r = maskFF << bitpos;
2898  unsigned right_margin = bitpos + bitcount;
2899  if (right_margin < 32)
2900  {
2901  *dest &= ~((maskFF >> (32 - right_margin)) & mask_r);
2902  return;
2903  }
2904  *dest++ &= ~mask_r;
2905  bitcount -= 32 - bitpos;
2906  }
2907  for ( ;bitcount >= 64; bitcount-=64, dest+=2)
2908  dest[0] = dest[1] = 0u;
2909  if (bitcount >= 32)
2910  {
2911  *dest++ = 0u; bitcount -= 32;
2912  }
2913  if (bitcount)
2914  {
2915  *dest &= ~(maskFF >> (32 - bitcount));
2916  }
2917 }
2918 
2919 
2920 
2921 /*!
2922  \brief XOR bit interval to 1 in the bitblock.
2923  \param dest - Bitset buffer.
2924  \param bitpos - Offset of the start bit.
2925  \param bitcount - number of bits to set.
2926 
2927  @ingroup bitfunc
2928 */
2929 inline void xor_bit_block(unsigned* dest,
2930  unsigned bitpos,
2931  unsigned bitcount)
2932 {
2933  unsigned nbit = unsigned(bitpos & bm::set_block_mask);
2934  unsigned nword = unsigned(nbit >> bm::set_word_shift);
2935  nbit &= bm::set_word_mask;
2936 
2937  bm::word_t* word = dest + nword;
2938 
2939  if (bitcount == 1) // special case (only 1 bit to set)
2940  {
2941  *word ^= unsigned(1 << nbit);
2942  return;
2943  }
2944 
2945  if (nbit) // starting position is not aligned
2946  {
2947  unsigned right_margin = nbit + bitcount;
2948 
2949  // here we checking if we setting bits only in the current
2950  // word. Example: 00111000000000000000000000000000 (32 bits word)
2951 
2952  if (right_margin < 32)
2953  {
2954  unsigned mask =
2956  bm::block_set_table<true>::_left[right_margin-1];
2957  *word ^= mask;
2958  return;
2959  }
2960  *word ^= block_set_table<true>::_right[nbit];
2961  bitcount -= 32 - nbit;
2962  ++word;
2963  }
2964  for ( ;bitcount >= 64; bitcount-=64, word+=2)
2965  {
2966  word[0] ^= ~0u; word[1] ^= ~0u;
2967  }
2968  if (bitcount >= 32)
2969  {
2970  *word++ ^= ~0u; bitcount -= 32;
2971  }
2972  if (bitcount)
2973  *word ^= block_set_table<true>::_left[bitcount-1];
2974 }
2975 
2976 
2977 /*!
2978  \brief SUB (AND NOT) GAP block to bitblock.
2979  \param dest - bitblock buffer pointer.
2980  \param pcurr - GAP buffer pointer.
2981 
2982  @ingroup gapfunc
2983 */
2984 template<typename T>
2985 void gap_sub_to_bitset(unsigned* dest, const T* pcurr)
2986 {
2987  BM_ASSERT(dest && pcurr);
2988 
2989  const T* pend = pcurr + (*pcurr >> 3);
2990  if (*pcurr & 1) // Starts with 1
2991  {
2992  bm::sub_bit_block(dest, 0, 1 + pcurr[1]);
2993  ++pcurr;
2994  }
2995  for (pcurr += 2; pcurr <= pend; pcurr += 2)
2996  {
2997  BM_ASSERT(*pcurr > pcurr[-1]);
2998  bm::sub_bit_block(dest, 1 + pcurr[-1], *pcurr - pcurr[-1]);
2999  }
3000 }
3001 
3002 
3003 /*!
3004  \brief SUB (AND NOT) GAP block to bitblock with digest assist
3005 
3006  \param dest - bitblock buffer pointer.
3007  \param pcurr - GAP buffer pointer.
3008  \param digest0 - digest of 0 strides inside bit block
3009 
3010  @ingroup gapfunc
3011 */
3012 template<typename T>
3013 void gap_sub_to_bitset(unsigned* dest, const T* pcurr, bm::id64_t digest0)
3014 {
3015  BM_ASSERT(dest && pcurr);
3016 
3017  const T* pend = pcurr + (*pcurr >> 3);
3018  if (*pcurr & 1) // Starts with 1
3019  {
3020  bool all_zero = bm::check_zero_digest(digest0, 0, pcurr[1]+1);
3021  if (!all_zero)
3022  bm::sub_bit_block(dest, 0, pcurr[1] + 1); // (not AND) - SUB [0] gaps
3023  pcurr += 3;
3024  }
3025  else
3026  pcurr += 2;
3027 
3028  // wind forward to digest start
3029  {
3030  unsigned tz = bm::count_trailing_zeros_u64(digest0);
3031  unsigned start_pos = tz << set_block_digest_pos_shift;
3032  for (; pcurr <= pend; pcurr += 2) // now we are in GAP "0"
3033  {
3034  if (*pcurr >= start_pos)
3035  break;
3036  }
3037  }
3038 
3039  unsigned lz = bm::count_leading_zeros_u64(digest0);
3040  unsigned stop_pos = (64u - lz) << set_block_digest_pos_shift;
3041 
3042  unsigned bc, pos;
3043  T prev;
3044  for (; pcurr <= pend; pcurr += 2) // now we are in GAP "1" again
3045  {
3046  BM_ASSERT(*pcurr > *(pcurr-1));
3047  prev = pcurr[-1];
3048  bc = *pcurr - prev;
3049  pos = 1u + prev;
3050 
3051  bool all_zero = bm::check_zero_digest(digest0, prev, *pcurr);
3052  if (!all_zero)
3053  bm::sub_bit_block(dest, pos, bc);
3054 
3055  if (pos > stop_pos)
3056  break; // early break is possible based on digest tail
3057 
3058  } // for
3059 }
3060 
3061 
3062 
3063 /*!
3064  \brief XOR GAP block to bitblock.
3065  \param dest - bitblock buffer pointer.
3066  \param pcurr - GAP buffer pointer.
3067 
3068  @ingroup gapfunc
3069 */
3070 template<typename T>
3071 void gap_xor_to_bitset(unsigned* dest, const T* pcurr)
3072 {
3073  BM_ASSERT(dest && pcurr);
3074 
3075  const T* pend = pcurr + (*pcurr >> 3);
3076  if (*pcurr & 1) // Starts with 1
3077  {
3078  bm::xor_bit_block(dest, 0, 1 + pcurr[1]);
3079  ++pcurr;
3080  }
3081  for (pcurr += 2; pcurr <= pend; pcurr += 2)
3082  {
3083  BM_ASSERT(*pcurr > pcurr[-1]);
3084  bm::xor_bit_block(dest, 1 + pcurr[-1], *pcurr - pcurr[-1]);
3085  }
3086 }
3087 
3088 
3089 /*!
3090  \brief Adds(OR) GAP block to bitblock.
3091  \param dest - bitblock buffer pointer.
3092  \param pcurr - GAP buffer pointer.
3093  \param len - gap length
3094 
3095  @ingroup gapfunc
3096 */
3097 template<typename T>
3098 void gap_add_to_bitset(unsigned* dest, const T* pcurr, unsigned len)
3099 {
3100  BM_ASSERT(dest && pcurr);
3101 
3102  const T* pend = pcurr + len;
3103  if (*pcurr & 1) // Starts with 1
3104  {
3105  bm::or_bit_block(dest, 0, 1 + pcurr[1]);
3106  pcurr += 3;
3107  }
3108  else
3109  pcurr += 2;
3110 
3111  unsigned bc, pos;
3112  for (; pcurr <= pend; )
3113  {
3114  BM_ASSERT(*pcurr > pcurr[-1]);
3115  pos = 1u + pcurr[-1];
3116  bc = *pcurr - pcurr[-1];
3117  pcurr += 2;
3118  bm::or_bit_block(dest, pos, bc);
3119  }
3120 }
3121 
3122 
3123 /*!
3124  \brief Adds(OR) GAP block to bitblock.
3125  \param dest - bitblock buffer pointer.
3126  \param pcurr - GAP buffer pointer.
3127 
3128  @ingroup gapfunc
3129 */
3130 template<typename T>
3131 void gap_add_to_bitset(unsigned* dest, const T* pcurr)
3132 {
3133  unsigned len = (*pcurr >> 3);
3134  gap_add_to_bitset(dest, pcurr, len);
3135 }
3136 
3137 
3138 /*!
3139  \brief ANDs GAP block to bitblock.
3140  \param dest - bitblock buffer pointer.
3141  \param pcurr - GAP buffer pointer.
3142 
3143  @ingroup gapfunc
3144 */
3145 template<typename T>
3146 void gap_and_to_bitset(unsigned* dest, const T* pcurr)
3147 {
3148  BM_ASSERT(dest && pcurr);
3149 
3150  const T* pend = pcurr + (*pcurr >> 3);
3151  if (!(*pcurr & 1) ) // Starts with 0
3152  {
3153  bm::sub_bit_block(dest, 0, pcurr[1] + 1); // (not AND) - SUB [0] gaps
3154  pcurr += 3;
3155  }
3156  else
3157  pcurr += 2;
3158 
3159  unsigned bc, pos;
3160  for (; pcurr <= pend; ) // now we are in GAP "0" again
3161  {
3162  BM_ASSERT(*pcurr > *(pcurr-1));
3163  pos = 1u + pcurr[-1];
3164  bc = *pcurr - pcurr[-1];
3165  pcurr += 2;
3166  bm::sub_bit_block(dest, pos, bc);
3167  }
3168 }
3169 
3170 
3171 /*!
3172  \brief ANDs GAP block to bitblock with digest assist
3173  \param dest - bitblock buffer pointer.
3174  \param pcurr - GAP buffer pointer.
3175  \param digest0 - digest of 0 strides for the destination
3176 
3177  @ingroup gapfunc
3178 */
3179 template<typename T>
3180 void gap_and_to_bitset(unsigned* dest, const T* pcurr, bm::id64_t digest0)
3181 {
3182  BM_ASSERT(dest && pcurr);
3183  if (!digest0)
3184  return;
3185 
3186  const T* pend = pcurr + (*pcurr >> 3);
3187  if (!(*pcurr & 1) ) // Starts with 0
3188  {
3189  bool all_zero = bm::check_zero_digest(digest0, 0, pcurr[1]+1);
3190  if (!all_zero)
3191  bm::sub_bit_block(dest, 0, pcurr[1] + 1); // (not AND) - SUB [0] gaps
3192  pcurr += 3;
3193  }
3194  else
3195  pcurr += 2;
3196 
3197  // wind forward to digest start
3198  {
3199  unsigned tz = bm::count_trailing_zeros_u64(digest0);
3200  unsigned start_pos = tz << set_block_digest_pos_shift;
3201  for (; pcurr <= pend; pcurr += 2) // now we are in GAP "0"
3202  {
3203  if (*pcurr >= start_pos)
3204  break;
3205  }
3206  }
3207 
3208  unsigned lz = bm::count_leading_zeros_u64(digest0);
3209  unsigned stop_pos = (64u - lz) << set_block_digest_pos_shift;
3210 
3211  unsigned bc, pos;
3212  T prev;
3213  for (; pcurr <= pend; pcurr += 2) // now we are in GAP "0" again
3214  {
3215  BM_ASSERT(*pcurr > *(pcurr-1));
3216 
3217  prev = pcurr[-1];
3218  bc = *pcurr - prev;
3219  pos = 1u + prev;
3220 
3221  bool all_zero = bm::check_zero_digest(digest0, prev, *pcurr);
3222  if (!all_zero)
3223  bm::sub_bit_block(dest, pos, bc);
3224 
3225  if (pos > stop_pos) // early break is possible based on digest tail
3226  break;
3227 
3228  } // for
3229 }
3230 
3231 
3232 /*!
3233  \brief Compute bitcount of bit block AND masked by GAP block
3234  \param block - bitblock buffer pointer
3235  \param pcurr - GAP buffer pointer
3236  \return bitcount - cardinality of the AND product
3237 
3238  @ingroup gapfunc
3239 */
3240 template<typename T>
3241 bm::id_t gap_bitset_and_count(const unsigned* block, const T* pcurr)
3242 {
3243  BM_ASSERT(block);
3244  const T* pend = pcurr + (*pcurr >> 3);
3245  bm::id_t count = 0;
3246  if (*pcurr & 1) // Starts with 1
3247  {
3248  count += bm::bit_block_calc_count_range(block, 0, pcurr[1]);
3249  ++pcurr;
3250  }
3251  for (pcurr +=2 ;pcurr <= pend; pcurr += 2)
3252  {
3253  count += bm::bit_block_calc_count_range(block, pcurr[-1]+1, *pcurr);
3254  }
3255  return count;
3256 }
3257 
3258 
3259 /*!
3260  \brief Bitcount test of bit block AND masked by GAP block.
3261  \param block - bitblock buffer pointer
3262  \param pcurr - GAP buffer pointer
3263  \return non-zero value if AND produces any result
3264 
3265  @ingroup gapfunc
3266 */
3267 template<typename T>
3268 bm::id_t gap_bitset_and_any(const unsigned* block, const T* pcurr)
3269 {
3270  BM_ASSERT(block);
3271 
3272  const T* pend = pcurr + (*pcurr >> 3);
3273  bm::id_t count = 0;
3274  if (*pcurr & 1) // Starts with 1
3275  {
3276  count = bm::bit_block_any_range(block, 0, pcurr[1]);
3277  ++pcurr;
3278  }
3279  for (pcurr +=2 ;!count && pcurr <= pend; pcurr += 2)
3280  {
3281  count = bm::bit_block_any_range(block, pcurr[-1]+1, *pcurr);
3282  }
3283  return count;
3284 }
3285 
3286 
3287 
3288 /*!
3289  \brief Compute bitcount of bit block SUB masked by GAP block.
3290  \param block - bitblock buffer pointer.
3291  \param buf - GAP buffer pointer.
3292  \return bit-count result of AND NOT operation
3293 
3294  @ingroup gapfunc
3295 */
3296 template<typename T>
3297 bm::id_t gap_bitset_sub_count(const unsigned* block, const T* buf)
3298 {
3299  BM_ASSERT(block);
3300 
3301  const T* pcurr = buf;
3302  const T* pend = pcurr + (*pcurr >> 3);
3303  ++pcurr;
3304 
3305  bm::id_t count = 0;
3306 
3307  if (!(*buf & 1)) // Starts with 0
3308  {
3309  count += bit_block_calc_count_range(block, 0, *pcurr);
3310  ++pcurr;
3311  }
3312  ++pcurr; // now we are in GAP "0" again
3313 
3314  for (;pcurr <= pend; pcurr+=2)
3315  {
3316  count += bm::bit_block_calc_count_range(block, *(pcurr-1)+1, *pcurr);
3317  }
3318  return count;
3319 }
3320 
3321 
3322 /*!
3323  \brief Compute bitcount test of bit block SUB masked by GAP block
3324  \param block - bitblock buffer pointer
3325  \param buf - GAP buffer pointer
3326  \return non-zero value if AND NOT produces any 1 bits
3327 
3328  @ingroup gapfunc
3329 */
3330 template<typename T>
3331 bm::id_t gap_bitset_sub_any(const unsigned* block, const T* buf)
3332 {
3333  BM_ASSERT(block);
3334 
3335  const T* pcurr = buf;
3336  const T* pend = pcurr + (*pcurr >> 3);
3337  ++pcurr;
3338 
3339  bm::id_t count = 0;
3340 
3341  if (!(*buf & 1)) // Starts with 0
3342  {
3343  count += bit_block_any_range(block, 0, *pcurr);
3344  if (count)
3345  return count;
3346  ++pcurr;
3347  }
3348  ++pcurr; // now we are in GAP "0" again
3349 
3350  for (; !count && pcurr <= pend; pcurr+=2)
3351  {
3352  count += bm::bit_block_any_range(block, *(pcurr-1)+1, *pcurr);
3353  }
3354  return count;
3355 }
3356 
3357 
3358 
3359 /*!
3360  \brief Compute bitcount of bit block XOR masked by GAP block
3361  \param block - bitblock buffer pointer
3362  \param buf - GAP buffer pointer
3363  \return bit count value of XOR operation
3364 
3365  @ingroup gapfunc
3366 */
3367 template<typename T>
3368 bm::id_t gap_bitset_xor_count(const unsigned* block, const T* buf)
3369 {
3370  BM_ASSERT(block);
3371 
3372  const T* pcurr = buf;
3373  const T* pend = pcurr + (*pcurr >> 3);
3374  ++pcurr;
3375 
3376  unsigned bitval = *buf & 1;
3377 
3378  bm::id_t count = bm::bit_block_calc_count_range(block, 0, *pcurr);
3379  if (bitval)
3380  {
3381  count = *pcurr + 1 - count;
3382  }
3383 
3384  for (bitval^=1, ++pcurr; pcurr <= pend; bitval^=1, ++pcurr)
3385  {
3386  T prev = (T)(*(pcurr-1)+1);
3387  bm::id_t c = bit_block_calc_count_range(block, prev, *pcurr);
3388 
3389  if (bitval) // 1 gap; means Result = Total_Bits - BitCount;
3390  c = (*pcurr - prev + 1) - c;
3391  count += c;
3392  }
3393  return count;
3394 }
3395 
3396 /*!
3397  \brief Compute bitcount test of bit block XOR masked by GAP block.
3398  \param block - bitblock buffer pointer
3399  \param buf - GAP buffer pointer
3400  \return non-zero value if XOR returns anything
3401 
3402  @ingroup gapfunc
3403 */
3404 template<typename T>
3405 bm::id_t gap_bitset_xor_any(const unsigned* block, const T* buf)
3406 {
3407  BM_ASSERT(block);
3408 
3409  const T* pcurr = buf;
3410  const T* pend = pcurr + (*pcurr >> 3);
3411  ++pcurr;
3412 
3413  unsigned bitval = *buf & 1;
3414 
3415  bm::id_t count = bit_block_any_range(block, 0, *pcurr);
3416  if (bitval)
3417  count = *pcurr + 1 - count;
3418 
3419  for (bitval^=1, ++pcurr; !count && pcurr <= pend; bitval^=1, ++pcurr)
3420  {
3421  T prev = (T)(*(pcurr-1)+1);
3422  bm::id_t c = bit_block_any_range(block, prev, *pcurr);
3423 
3424  if (bitval) // 1 gap; means Result = Total_Bits - BitCount;
3425  c = (*pcurr - prev + 1) - c;
3426  count += c;
3427  }
3428  return count;
3429 }
3430 
3431 
3432 
3433 /*!
3434  \brief Compute bitcount of bit block OR masked by GAP block.
3435  \param block - bitblock buffer pointer.
3436  \param buf - GAP buffer pointer.
3437  \return bit count of OR
3438 
3439  @ingroup gapfunc
3440 */
3441 template<typename T>
3442 bm::id_t gap_bitset_or_count(const unsigned* block, const T* buf)
3443 {
3444  BM_ASSERT(block);
3445 
3446  const T* pcurr = buf;
3447  const T* pend = pcurr + (*pcurr >> 3);
3448  ++pcurr;
3449 
3450  unsigned bitval = *buf & 1;
3451 
3452  bm::id_t count = bitval ? *pcurr + 1
3453  : bm::bit_block_calc_count_range(block, 0, *pcurr);
3454  for (bitval^=1, ++pcurr; pcurr <= pend; bitval^=1, ++pcurr)
3455  {
3456  T prev = (T)(*(pcurr-1)+1);
3457  bm::id_t c =
3458  bitval ? (*pcurr - prev + 1)
3459  : bm::bit_block_calc_count_range(block, prev, *pcurr);
3460  count += c;
3461  }
3462  return count;
3463 }
3464 
3465 /*!
3466  \brief Compute bitcount test of bit block OR masked by GAP block
3467  \param block - bitblock buffer pointer
3468  \param buf - GAP buffer pointer
3469  \return non zero value if union (OR) returns anything
3470 
3471  @ingroup gapfunc
3472 */
3473 template<typename T>
3474 bm::id_t gap_bitset_or_any(const unsigned* block, const T* buf)
3475 {
3476  bool b = !bm::gap_is_all_zero(buf) ||
3477  !bm::bit_is_all_zero(block);
3478  return b;
3479 }
3480 
3481 
3482 
3483 /*!
3484  \brief Bitblock memset operation.
3485 
3486  \param dst - destination block.
3487  \param value - value to set.
3488 
3489  @ingroup bitfunc
3490 */
3491 inline
3493 {
3494 #ifdef BMVECTOPT
3495  VECT_SET_BLOCK(dst, value);
3496 #else
3497  ::memset(dst, int(value), bm::set_block_size * sizeof(bm::word_t));
3498 #endif
3499 }
3500 
3501 
3502 /*!
3503  \brief GAP block to bitblock conversion.
3504  \param dest - bitblock buffer pointer.
3505  \param buf - GAP buffer pointer.
3506 
3507  @ingroup gapfunc
3508 */
3509 template<typename T>
3510 void gap_convert_to_bitset(unsigned* dest, const T* buf)
3511 {
3512  bm::bit_block_set(dest, 0);
3513  bm::gap_add_to_bitset(dest, buf);
3514 }
3515 
3516 
3517 
3518 /*!
3519  \brief Smart GAP block to bitblock conversion.
3520 
3521  Checks if GAP block is ALL-ZERO or ALL-ON. In those cases returns
3522  pointer on special static bitblocks.
3523 
3524  \param dest - bitblock buffer pointer.
3525  \param buf - GAP buffer pointer.
3526  \param set_max - max possible bitset length
3527 
3528  @ingroup gapfunc
3529 */
3530 template<typename T>
3531 unsigned* gap_convert_to_bitset_smart(unsigned* dest,
3532  const T* buf,
3533  id_t set_max)
3534 {
3535  if (buf[1] == set_max - 1)
3536  return (buf[0] & 1) ? FULL_BLOCK_REAL_ADDR : 0;
3537 
3538  bm::gap_convert_to_bitset(dest, buf);
3539  return dest;
3540 }
3541 
3542 
3543 /*!
3544  \brief Calculates sum of all words in GAP block. (For debugging purposes)
3545  \note For debugging and testing ONLY.
3546  \param buf - GAP buffer pointer.
3547  \return Sum of all words.
3548 
3549  @ingroup gapfunc
3550  @internal
3551 */
3552 template<typename T> unsigned gap_control_sum(const T* buf)
3553 {
3554  unsigned end = *buf >> 3;
3555 
3556  const T* pcurr = buf;
3557  const T* pend = pcurr + (*pcurr >> 3);
3558  ++pcurr;
3559 
3560  if (*buf & 1) // Starts with 1
3561  {
3562  ++pcurr;
3563  }
3564  ++pcurr; // now we are in GAP "1" again
3565 
3566  while (pcurr <= pend)
3567  {
3568  BM_ASSERT(*pcurr > *(pcurr-1));
3569  pcurr += 2;
3570  }
3571  return buf[end];
3572 }
3573 
3574 
3575 /*!
3576  \brief Sets all bits to 0 or 1 (GAP)
3577  \param buf - GAP buffer pointer.
3578  \param set_max - max possible bitset length
3579  \param value - value to set
3580 
3581  @ingroup gapfunc
3582 */
3583 template<class T> void gap_set_all(T* buf,
3584  unsigned set_max,
3585  unsigned value)
3586 {
3587  BM_ASSERT(value == 0 || value == 1);
3588  *buf = (T)((*buf & 6u) + (1u << 3) + value);
3589  *(++buf) = (T)(set_max - 1);
3590 }
3591 
3592 
3593 /*!
3594  \brief Init gap block so it has block in it (can be whole block)
3595  \param buf - GAP buffer pointer.
3596  \param from - one block start
3597  \param to - one block end
3598  \param value - (block value)1 or 0
3599 
3600  @ingroup gapfunc
3601 */
3602 template<class T>
3604  T from,
3605  T to,
3606  T value)
3607  //unsigned set_max)
3608 {
3609  BM_ASSERT(value == 0 || value == 1);
3610  const unsigned set_max = bm::bits_in_block;
3611 
3612  unsigned gap_len;
3613  if (from == 0)
3614  {
3615  if (to == set_max - 1)
3616  {
3617  bm::gap_set_all(buf, set_max, value);
3618  }
3619  else
3620  {
3621  gap_len = 2;
3622  buf[1] = (T)to;
3623  buf[2] = (T)(set_max - 1);
3624  buf[0] = (T)((*buf & 6u) + (gap_len << 3) + value);
3625  }
3626  return;
3627  }
3628  // from != 0
3629 
3630  value = !value;
3631  if (to == set_max - 1)
3632  {
3633  gap_len = 2;
3634  buf[1] = (T)(from - 1);
3635  buf[2] = (T)(set_max - 1);
3636  }
3637  else
3638  {
3639  gap_len = 3;
3640  buf[1] = (T) (from - 1);
3641  buf[2] = (T) to;
3642  buf[3] = (T)(set_max - 1);
3643  }
3644  buf[0] = (T)((*buf & 6u) + (gap_len << 3) + value);
3645 }
3646 
3647 
3648 /*!
3649  \brief Inverts all bits in the GAP buffer.
3650  \param buf - GAP buffer pointer.
3651 
3652  @ingroup gapfunc
3653 */
3654 template<typename T> void gap_invert(T* buf)
3655 {
3656  *buf ^= 1;
3657 }
3658 
3659 
3660 #ifdef __GNUG__
3661 #pragma GCC diagnostic push
3662 #pragma GCC diagnostic ignored "-Wconversion"
3663 #endif
3664 
3665 /*!
3666  \brief Sets GAP block capacity level.
3667  \param buf - GAP buffer pointer.
3668  \param level new GAP block capacity level.
3669 
3670  @ingroup gapfunc
3671 */
3672 template<typename T>
3673 void set_gap_level(T* buf, int level)
3674 {
3675  BM_ASSERT(level >= 0);
3676  BM_ASSERT(unsigned(level) < bm::gap_levels);
3677 
3678  *buf = (T)(((level & 3) << 1) | (*buf & 1) | (*buf & ~7));
3679 }
3680 #ifdef __GNUG__
3681 #pragma GCC diagnostic pop
3682 #endif
3683 
3684 
3685 
3686 /*!
3687  \brief Calculates GAP block capacity level.
3688  \param len - GAP buffer length.
3689  \param glevel_len - GAP lengths table
3690  \return GAP block capacity level.
3691  -1 if block does not fit any level.
3692  @ingroup gapfunc
3693 */
3694 template<typename T>
3695 inline int gap_calc_level(unsigned len, const T* glevel_len)
3696 {
3697  if (len <= unsigned(glevel_len[0]-4)) return 0;
3698  if (len <= unsigned(glevel_len[1]-4)) return 1;
3699  if (len <= unsigned(glevel_len[2]-4)) return 2;
3700  if (len <= unsigned(glevel_len[3]-4)) return 3;
3701 
3702  BM_ASSERT(bm::gap_levels == 4);
3703  return -1;
3704 }
3705 
3706 /*! @brief Returns number of free elements in GAP block array.
3707  Difference between GAP block capacity on this level and actual GAP length.
3708 
3709  @param buf - GAP buffer pointer
3710  @param glevel_len - GAP lengths table
3711 
3712  @return Number of free GAP elements
3713  @ingroup gapfunc
3714 */
3715 template<typename T>
3716 inline unsigned gap_free_elements(const T* buf, const T* glevel_len)
3717 {
3718  unsigned len = gap_length(buf);
3719  unsigned capacity = gap_capacity(buf, glevel_len);
3720  return capacity - len;
3721 }
3722 
3723 /*!
3724  \brief Lexicographical comparison of BIT buffers.
3725  \param buf1 - First buffer pointer.
3726  \param buf2 - Second buffer pointer.
3727  \param len - Buffer length in elements (T).
3728  \return <0 - less, =0 - equal, >0 - greater.
3729 
3730  @ingroup bitfunc
3731 */
3732 template<typename T>
3733 int bitcmp(const T* buf1, const T* buf2, unsigned len)
3734 {
3735  BM_ASSERT(len);
3736  const T* pend1 = buf1 + len;
3737  do
3738  {
3739  T w1 = *buf1++;
3740  T w2 = *buf2++;
3741  T diff = w1 ^ w2;
3742  if (diff)
3743  return (w1 & diff & -diff) ? 1 : -1;
3744  } while (buf1 < pend1);
3745  return 0;
3746 }
3747 
3748 #ifndef BMAVX2OPT
3749 
3750 /*!
3751  \brief Converts bit block to GAP.
3752  \param dest - Destinatio GAP buffer.
3753  \param block - Source bitblock buffer.
3754  \param dest_len - length of the dest. buffer.
3755  \return New length of GAP block or 0 if conversion failed
3756  (insufficicent space).
3757 
3758  @ingroup gapfunc
3759 */
3760 inline
3762  const unsigned* BMRESTRICT block,
3763  unsigned dest_len)
3764 {
3765  const unsigned* BMRESTRICT block_end = block + bm::set_block_size;
3766  gap_word_t* BMRESTRICT pcurr = dest;
3767  gap_word_t* BMRESTRICT end = dest + dest_len; (void)end;
3768 
3769  unsigned bitval = (*block) & 1u;
3770  *pcurr++ = bm::gap_word_t(bitval);
3771  *pcurr = 0;
3772  unsigned bit_idx = 0;
3773 
3774  do
3775  {
3776  unsigned val = *block;
3777  while (!val || val == ~0u)
3778  {
3779  if (bitval != unsigned(bool(val)))
3780  {
3781  *pcurr++ = (gap_word_t)(bit_idx-1);
3782  bitval ^= 1u;
3783  BM_ASSERT((pcurr-1) == (dest+1) || *(pcurr-1) > *(pcurr-2));
3784  BM_ASSERT(pcurr != end);
3785  }
3786  bit_idx += unsigned(sizeof(*block) * 8);
3787  if (++block >= block_end)
3788  goto complete;
3789  val = *block;
3790  } // while
3791 
3792  // process "0100011" word
3793  //
3794  unsigned bits_consumed = 0;
3795  do
3796  {
3797  unsigned tz = 1u;
3798  if (bitval != (val & 1u))
3799  {
3800  *pcurr++ = (gap_word_t)(bit_idx-1);
3801  bitval ^= 1u;
3802  BM_ASSERT((pcurr-1) == (dest+1) || *(pcurr-1) > *(pcurr-2));
3803  BM_ASSERT(pcurr != end);
3804  }
3805  else // match, find the next idx
3806  {
3807  tz = bm::bit_scan_forward32(bitval ? ~val : val);
3808  // possible alternative:
3809  // tz = bm::count_trailing_zeros(bitval ? ~val : val);
3810  }
3811 
3812  bits_consumed += tz;
3813  bit_idx += tz;
3814  val >>= tz;
3815 
3816  if (!val)
3817  {
3818  if (bits_consumed < 32u)
3819  {
3820  *pcurr++ = (gap_word_t)(bit_idx-1);
3821  bitval ^= 1u;
3822  bit_idx += 32u - bits_consumed;
3823  BM_ASSERT((pcurr-1) == (dest+1) || *(pcurr-1) > *(pcurr-2));
3824  BM_ASSERT(pcurr != end);
3825  }
3826  break;
3827  }
3828  } while (1);
3829 
3830  } while(++block < block_end);
3831 
3832 complete:
3833  *pcurr = (gap_word_t)(bit_idx-1);
3834  unsigned len = (unsigned)(pcurr - dest);
3835  *dest = (gap_word_t)((*dest & 7) + (len << 3));
3836  return len;
3837 }
3838 #endif
3839 
3840 inline
3842  const unsigned* BMRESTRICT block,
3843  unsigned dest_len)
3844 {
3845 #if defined(VECT_BIT_TO_GAP)
3846  return VECT_BIT_TO_GAP(dest, block, dest_len);
3847 #else
3848  return bm::bit_block_to_gap(dest, block, dest_len);
3849 #endif
3850 }
3851 
3852 
3853 /*!
3854  \brief Iterate gap block as delta-bits with a functor
3855  @ingroup gapfunc
3856 */
3857 template<class T, class F>
3858 void for_each_gap_dbit(const T* buf, F& func)
3859 {
3860  const T* pcurr = buf;
3861  const T* pend = pcurr + (*pcurr >> 3);
3862 
3863  ++pcurr;
3864 
3865  unsigned prev = 0;
3866  unsigned first_inc;
3867 
3868  if (*buf & 1)
3869  {
3870  first_inc = 0;
3871  unsigned to = *pcurr;
3872  for (unsigned i = 0; i <= to; ++i)
3873  {
3874  func(1);
3875  }
3876  prev = to;
3877  ++pcurr;
3878  }
3879  else
3880  {
3881  first_inc = 1;
3882  }
3883  ++pcurr; // set GAP to 1
3884 
3885  while (pcurr <= pend)
3886  {
3887  unsigned from = *(pcurr-1)+1;
3888  unsigned to = *pcurr;
3889  if (first_inc)
3890  {
3891  func(from - prev + first_inc);
3892  first_inc = 0;
3893  }
3894  else
3895  {
3896  func(from - prev);
3897  }
3898 
3899  for (unsigned i = from+1; i <= to; ++i)
3900  {
3901  func(1);
3902  }
3903  prev = to;
3904  pcurr += 2; // jump to the next positive GAP
3905  }
3906 }
3907 
3908 /*!
3909  \brief Convert gap block into array of ints corresponding to 1 bits
3910  @ingroup gapfunc
3911 */
3912 template<typename D, typename T>
3914  const T* BMRESTRICT buf,
3915  unsigned dest_len,
3916  bool invert = false)
3917 {
3918  BMREGISTER const T* BMRESTRICT pcurr = buf;
3919  BMREGISTER const T* pend = pcurr + (*pcurr >> 3);
3920 
3921  D* BMRESTRICT dest_curr = dest;
3922  ++pcurr;
3923 
3924  int bitval = (*buf) & 1;
3925  if (invert)
3926  bitval = !bitval; // invert the GAP buffer
3927 
3928  if (bitval)
3929  {
3930  if (unsigned(*pcurr + 1) >= dest_len)
3931  return 0; // insufficient space
3932  dest_len -= *pcurr;
3933  T to = *pcurr;
3934  for (T i = 0; ;++i)
3935  {
3936  *dest_curr++ = i;
3937  if (i == to) break;
3938  }
3939  ++pcurr;
3940  }
3941  ++pcurr; // set GAP to 1
3942 
3943  while (pcurr <= pend)
3944  {
3945  unsigned pending = *pcurr - *(pcurr-1);
3946  if (pending >= dest_len)
3947  return 0;
3948  dest_len -= pending;
3949  T from = (T)(*(pcurr-1)+1);
3950  T to = *pcurr;
3951  for (T i = from; ;++i)
3952  {
3953  *dest_curr++ = i;
3954  if (i == to) break;
3955  }
3956  pcurr += 2; // jump to the next positive GAP
3957  }
3958  return (D) (dest_curr - dest);
3959 }
3960 
3961 
3962 
3963 /*!
3964  @brief Bitcount for bit block
3965 
3966  Function calculates number of 1 bits in the given array of words.
3967  Make sure the addresses are aligned.
3968 
3969  @ingroup bitfunc
3970 */
3971 inline
3973 {
3974  const bm::word_t* block_end = block + bm::set_block_size;
3975  bm::id_t count = 0;
3976 
3977 #ifdef BMVECTOPT
3978  count = VECT_BITCOUNT(block, block_end);
3979 #else
3980 #ifdef BM64OPT
3981  // 64-bit optimized algorithm. No sparse vect opt.
3982  // instead it uses 4-way parallel pipelined version
3983 
3984  const bm::id64_t* b1 = (bm::id64_t*) block;
3985  const bm::id64_t* b2 = (bm::id64_t*) block_end;
3986  do
3987  {
3988  bm::id64_t x = b1[0];
3989  bm::id64_t y = b1[1];
3990  bm::id64_t u = b1[2];
3991  bm::id64_t v = b1[3];
3992 
3993  if (x | y | u | v)
3994  {
3995  unsigned c = bitcount64_4way(x, y, u, v);
3996  BM_ASSERT(c);
3997  count += c;
3998  }
3999  b1 += 4;
4000  } while (b1 < b2);
4001 #else
4002  // For 32 bit code the fastest method is
4003  // to use bitcount table for each byte in the block.
4004  // As optimization for sparse bitsets used bits accumulator
4005  // to collect ON bits using bitwise OR.
4006  bm::word_t acc = *block++;
4007  do
4008  {
4009  bm::word_t in = *block++;
4010  bm::word_t acc_prev = acc;
4011  acc |= in;
4012  if (acc_prev &= in) // accumulator miss: counting bits
4013  {
4014  BM_INCWORD_BITCOUNT(count, acc);
4015  acc = acc_prev;
4016  }
4017  } while (block < block_end);
4018 
4019  BM_INCWORD_BITCOUNT(count, acc); // count-in remaining accumulator
4020 
4021 #endif
4022 #endif
4023  return count;
4024 }
4025 
4026 /*!
4027  @brief Bitcount for bit block
4028 
4029  Function calculates number of 1 bits in the given array of words.
4030  uses digest to understand zero areas
4031 
4032  @ingroup bitfunc
4033 */
4034 inline
4035 bm::id_t bit_block_count(const bm::word_t* const block, bm::id64_t digest)
4036 {
4037  bm::id_t count = 0;
4038  bm::id64_t d = digest;
4039  while (d)
4040  {
4041  bm::id64_t t = bm::bmi_blsi_u64(d); // d & -d;
4042 
4043  unsigned wave = bm::word_bitcount64(t - 1);
4044  unsigned off = wave * bm::set_block_digest_wave_size;
4045 
4046  const bm::bit_block_t::bunion_t* BMRESTRICT src_u =
4047  (const bm::bit_block_t::bunion_t*)(&block[off]);
4048  unsigned j = 0;
4049  do
4050  {
4051  count += bm::word_bitcount64(src_u->w64[j+0]) +
4052  bm::word_bitcount64(src_u->w64[j+1]) +
4053  bm::word_bitcount64(src_u->w64[j+2]) +
4054  bm::word_bitcount64(src_u->w64[j+3]);
4055  j += 4;
4056  } while (j < bm::set_block_digest_wave_size/2);
4057 
4058  d = bm::bmi_bslr_u64(d); // d &= d - 1;
4059  } // while
4060  return count;
4061 }
4062 
4063 
4064 
4065 /*!
4066  @brief Bitcount for bit string
4067 
4068  Added for back-compatibility purposes, not block aligned,
4069  not SIMD accelerated
4070 
4071  @ingroup bitfunc
4072 */
4073 inline
4075  const bm::word_t* block_end)
4076 {
4077  bm::id_t count = 0;
4078  bm::word_t acc = *block++;
4079  do
4080  {
4081  bm::word_t in = *block++;
4082  bm::word_t acc_prev = acc;
4083  acc |= in;
4084  if (acc_prev &= in) // accumulator miss: counting bits
4085  {
4086  BM_INCWORD_BITCOUNT(count, acc);
4087  acc = acc_prev;
4088  }
4089  } while (block < block_end);
4090 
4091  BM_INCWORD_BITCOUNT(count, acc); // count-in remaining accumulator
4092  return count;
4093 }
4094 
4095 
4096 
4097 /*!
4098  Function calculates number of times when bit value changed
4099  (1-0 or 0-1).
4100 
4101  For 001 result is 2
4102  010 - 3
4103  011 - 2
4104  111 - 1
4105 
4106  @ingroup bitfunc
4107 */
4108 inline
4110 {
4111  unsigned count = 1;
4112  w ^= (w >> 1);
4113 
4114  count += bm::word_bitcount(w);
4115  count -= (w >> ((sizeof(w) * 8) - 1));
4116  return count;
4117 }
4118 
4119 
4120 /*!
4121  Function calculates number of times when bit value changed
4122  @internal
4123 */
4124 inline
4125 unsigned bit_block_change32(const bm::word_t* block)
4126 {
4127  unsigned gap_count = 1;
4128 
4129  bm::word_t w, w0, w_prev, w_l;
4130  w = w0 = *block;
4131 
4132  const int w_shift = int(sizeof(w) * 8 - 1);
4133  w ^= (w >> 1);
4134  BM_INCWORD_BITCOUNT(gap_count, w);
4135  gap_count -= (w_prev = (w0 >> w_shift)); // negative value correction
4136 
4137  const bm::word_t* block_end = block + bm::set_block_size;
4138  for (++block ;block < block_end; ++block)
4139  {
4140  w = w0 = *block;
4141  ++gap_count;
4142  if (!w)
4143  {
4144  gap_count -= !w_prev;
4145  w_prev = 0;
4146  }
4147  else
4148  {
4149  w ^= (w >> 1);
4150  BM_INCWORD_BITCOUNT(gap_count, w);
4151 
4152  w_l = w0 & 1;
4153  gap_count -= (w0 >> w_shift); // negative value correction
4154  gap_count -= !(w_prev ^ w_l); // word border correction
4155 
4156  w_prev = (w0 >> w_shift);
4157  }
4158  } // for
4159  return gap_count;
4160 }
4161 
4162 
4163 /*!
4164  Function calculates number of times when bit value changed
4165  (1-0 or 0-1) in the bit block.
4166 
4167  @param block - bit-block start pointer
4168  @return number of 1-0, 0-1 transitions
4169 
4170  @ingroup bitfunc
4171 */
4172 inline
4173 unsigned bit_block_calc_change(const bm::word_t* block)
4174 {
4175 #if defined(VECT_BLOCK_CHANGE)
4176  return VECT_BLOCK_CHANGE(block);
4177 #else
4178  return bm::bit_block_change32(block);
4179 #endif
4180 }
4181 
4182 
4183 /*!
4184  Function calculates number of 1 bits in the given array of words in
4185  the range between left anf right bits (borders included)
4186  Make sure the addr is aligned.
4187 
4188  @ingroup bitfunc
4189 */
4190 inline
4192  bm::word_t left,
4193  bm::word_t right)
4194 {
4195  BM_ASSERT(left <= right);
4196  BM_ASSERT(right <= bm::gap_max_bits-1);
4197 
4198  unsigned nword, nbit, bitcount, count;
4199  nbit = left & bm::set_word_mask;
4200  const bm::word_t* word =
4201  block + (nword = unsigned(left >> bm::set_word_shift));
4202  if (left == right) // special case (only 1 bit to check)
4203  {
4204  return (*word >> nbit) & 1u;
4205  }
4206 
4207  count = 0;
4208  bitcount = right - left + 1u;
4209  if (nbit) // starting position is not aligned
4210  {
4211  unsigned right_margin = nbit + right - left;
4212  if (right_margin < 32)
4213  {
4214  unsigned mask =
4216  block_set_table<true>::_left[right_margin];
4217  return bm::word_bitcount(*word & mask);
4218  }
4219  count = bm::word_bitcount(*word & block_set_table<true>::_right[nbit]);
4220  bitcount -= 32 - nbit;
4221  ++word;
4222  }
4223 
4224  // now when we are word aligned, we can count bits the usual way
4225  //
4226  #if defined(BM64_SSE4) || defined(BM64_AVX2) || defined(BM64_AVX512)
4227  for ( ;bitcount >= 64; bitcount-=64)
4228  {
4229  bm::id64_t w64 = *((bm::id64_t*)word); // x86 unaligned(!) read
4230  count += unsigned(_mm_popcnt_u64(w64));
4231  word += 2;
4232  }
4233  if (bitcount >= 32)
4234  {
4235  count += bm::word_bitcount(*word++);
4236  bitcount-=32;
4237  }
4238  #else
4239  for ( ;bitcount >= 32; bitcount-=32, ++word)
4240  count += bm::word_bitcount(*word);
4241  #endif
4242  BM_ASSERT(bitcount < 32);
4243 
4244  if (bitcount) // we have a tail to count
4245  {
4246  count += bm::word_bitcount(
4247  *word & block_set_table<true>::_left[bitcount-1]);
4248  }
4249  return count;
4250 }
4251 
4252 /*!
4253  Function calculates number of 1 bits in the given array of words in
4254  the range between 0 anf right bits (borders included)
4255  Make sure the addr is aligned.
4256 
4257  @ingroup bitfunc
4258 */
4259 inline
4261  bm::word_t right)
4262 {
4263  BM_ASSERT(block);
4264  if (!right) // special case, first bit check
4265  return *block & 1u;
4266  bm::id_t count = 0;
4267 
4268  unsigned bitcount = right + 1;
4269 
4270  // AVX2 or 64-bit loop unroll
4271  #if defined(BMAVX2OPT) || defined(BMAVX512OPT)
4272  BM_AVX2_POPCNT_PROLOG
4273 
4274  __m256i cnt = _mm256_setzero_si256();
4275  bm::id64_t* cnt64;
4276 
4277  for ( ;bitcount >= 256; bitcount -= 256)
4278  {
4279  const __m256i* src = (__m256i*)block;
4280  __m256i xmm256 = _mm256_load_si256(src);
4281  BM_AVX2_BIT_COUNT(bc, xmm256);
4282  cnt = _mm256_add_epi64(cnt, bc);
4283 
4284  block += 8;
4285  }
4286  cnt64 = (bm::id64_t*)&cnt;
4287  count += (unsigned)(cnt64[0] + cnt64[1] + cnt64[2] + cnt64[3]);
4288  #endif
4289 
4290  for ( ;bitcount >= 64; bitcount -= 64)
4291  {
4292  bm::id64_t* p = (bm::id64_t*)block;
4293  count += bm::word_bitcount64(*p);
4294  block += 2;
4295  }
4296  if (bitcount >= 32)
4297  {
4298  count += bm::word_bitcount(*block++);
4299  bitcount-=32;
4300  }
4301 
4302  if (bitcount) // we have a tail to count
4303  {
4304  count +=
4305  bm::word_bitcount(*block & block_set_table<true>::_left[bitcount-1]);
4306  }
4307  return count;
4308 }
4309 
4310 
4311 
4312 /*!
4313  Cyclic rotation of bit-block left by 1 bit
4314  @ingroup bitfunc
4315 */
4316 inline
4318 {
4319  bm::word_t co_flag = (block[0] >> 31) & 1; // carry over bit
4320  for (unsigned i = 0; i < bm::set_block_size-1; ++i)
4321  {
4322  block[i] = (block[i] << 1) | (block[i + 1] >> 31);
4323  }
4324  block[set_block_size - 1] = (block[set_block_size - 1] << 1) | co_flag;
4325 }
4326 
4327 /*!
4328  @brief Unrolled cyclic rotation of bit-block left by 1 bit
4329  @param block - bit-block pointer
4330  @ingroup bitfunc
4331 */
4332 inline
4334 {
4335  bm::word_t co_flag = (block[0] >> 31) & 1; // carry over bit
4336  const unsigned unroll_factor = 4;
4337  bm::word_t w0, w1, w2, w3;
4338 
4339  unsigned i;
4340  for (i = 0; i < bm::set_block_size - unroll_factor; i += unroll_factor)
4341  {
4342  w0 = block[i + 1] >> 31;
4343  w1 = block[i + 2] >> 31;
4344  w2 = block[i + 3] >> 31;
4345  w3 = block[i + 4] >> 31;
4346 
4347  block[0 + i] = (block[0 + i] << 1) | w0;
4348  block[1 + i] = (block[1 + i] << 1) | w1;
4349  block[2 + i] = (block[2 + i] << 1) | w2;
4350  block[3 + i] = (block[3 + i] << 1) | w3;
4351  }
4352  block[i] = (block[i] << 1) | (block[i + 1] >> 31);
4353  block[i + 1] = (block[i + 1] << 1) | (block[i + 2] >> 31);
4354  block[i + 2] = (block[i + 2] << 1) | (block[i + 3] >> 31);
4355  block[set_block_size - 1] = (block[set_block_size - 1] << 1) | co_flag;
4356 }
4357 
4358 /*!
4359  @brief insert bit into position and shift the rest right with carryover
4360 
4361  @param block - bit-block pointer
4362  @param bitpos - bit position to insert
4363  @param value - bit value (0|1) to insert
4364 
4365  @return carry over value
4366  @ingroup bitfunc
4367 */
4368 inline
4369 bm::word_t bit_block_insert(bm::word_t* block, unsigned bitpos, bool value)
4370 {
4371  BM_ASSERT(block);
4372  BM_ASSERT(bitpos < 65536);
4373 
4374  unsigned nbit = unsigned(bitpos & bm::set_block_mask);
4375  unsigned nword = unsigned(nbit >> bm::set_word_shift);
4376  nbit &= bm::set_word_mask;
4377 
4378  bm::word_t co_flag = 0;
4379  if (nbit)
4380  {
4381  unsigned r_mask = bm::block_set_table<true>::_right[nbit];
4382  bm::word_t w = block[nword] & r_mask;
4383  bm::word_t wl = block[nword] & ~r_mask;
4384  co_flag = w >> 31;
4385  w <<= 1u;
4386  block[nword] = w | (unsigned(value) << nbit) | wl;
4387  ++nword;
4388  }
4389  else
4390  {
4391  co_flag = value;
4392  }
4393 
4394  for (unsigned i = nword; i < bm::set_block_size; ++i)
4395  {
4396  bm::word_t w = block[i];
4397  bm::word_t co_flag1 = w >> 31;
4398  w = (w << 1u) | co_flag;
4399  block[i] = w;
4400  co_flag = co_flag1;
4401  } // for i
4402  return co_flag;
4403 }
4404 
4405 
4406 
4407 /*!
4408  @brief Right bit-shift bitblock by 1 bit (reference)
4409  @param block - bit-block pointer
4410  @param empty_acc - [out] contains 0 if block is empty
4411  @param co_flag - carry over from the previous block
4412 
4413  @return carry over bit (1 or 0)
4414  @ingroup bitfunc
4415 */
4416 inline
4418  bm::word_t* empty_acc, bm::word_t co_flag)
4419 {
4420  BM_ASSERT(block);
4421  BM_ASSERT(empty_acc);
4422 
4423  bm::word_t acc = 0;
4424  for (unsigned i = 0; i < bm::set_block_size; ++i)
4425  {
4426  bm::word_t w = block[i];
4427  bm::word_t co_flag1 = w >> 31;
4428  acc |= w = (w << 1u) | co_flag;
4429  block[i] = w;
4430  co_flag = co_flag1;
4431  }
4432  *empty_acc = acc;
4433  return co_flag;
4434 }
4435 
4436 /*!
4437  @brief Right bit-shift of bit-block by 1 bit (loop unrolled)
4438  @param block - bit-block pointer
4439  @param empty_acc - [out] contains 0 if block is empty
4440  @param co_flag - carry over from the previous block
4441 
4442  @return carry over bit (1 or 0)
4443  @ingroup bitfunc
4444 */
4445 inline
4447  bm::word_t* empty_acc, bm::word_t co_flag)
4448 {
4449  BM_ASSERT(block);
4450  BM_ASSERT(empty_acc);
4451  #if defined(VECT_SHIFT_R1)
4452  return VECT_SHIFT_R1(block, empty_acc, co_flag);
4453  #else
4454  return bm::bit_block_shift_r1(block, empty_acc, co_flag);
4455  #endif
4456 }
4457 
4458 
4459 /*!
4460  @brief Left bit-shift bitblock by 1 bit (reference)
4461  @param block - bit-block pointer
4462  @param empty_acc - [out] contains 0 if block is empty
4463  @param co_flag - carry over from the prev/next block
4464 
4465  @return carry over bit (1 or 0)
4466 
4467  @ingroup bitfunc
4468 */
4469 inline
4471  bm::word_t* empty_acc, bm::word_t co_flag)
4472 {
4473  BM_ASSERT(block);
4474  BM_ASSERT(empty_acc);
4475 
4476  bm::word_t acc = 0;
4477  for (int i = bm::set_block_size-1; i >= 0; --i)
4478  {
4479  bm::word_t w = block[i];
4480  bm::word_t co_flag1 = w & 1u;
4481  acc |= w = (w >> 1u) | (co_flag << 31u);
4482  block[i] = w;
4483  co_flag = co_flag1;
4484  }
4485 
4486  *empty_acc = acc;
4487  return co_flag;
4488 }
4489 
4490 /*!
4491  @brief Left bit-shift of bit-block by 1 bit (loop unrolled)
4492  @param block - bit-block pointer
4493  @param empty_acc - [out] contains 0 if block is empty
4494  @param co_flag - carry over from the prev/next block
4495 
4496  @return carry over bit (1 or 0)
4497  @ingroup bitfunc
4498 */
4499 inline
4501  bm::word_t* empty_acc, bm::word_t co_flag)
4502 {
4503  BM_ASSERT(block);
4504  BM_ASSERT(empty_acc);
4505  #if defined(VECT_SHIFT_L1)
4506  return VECT_SHIFT_L1(block, empty_acc, co_flag);
4507  #else
4508  return bm::bit_block_shift_l1(block, empty_acc, co_flag);
4509  #endif
4510 }
4511 
4512 /*!
4513  @brief erase bit from position and shift the rest right with carryover
4514 
4515  @param block - bit-block pointer
4516  @param bitpos - bit position to insert
4517  @param carry_over - bit value to add to the end (0|1)
4518 
4519  @ingroup bitfunc
4520 */
4521 inline
4522 void bit_block_erase(bm::word_t* block, unsigned bitpos, bool carry_over)
4523 {
4524  BM_ASSERT(block);
4525  BM_ASSERT(bitpos < 65536);
4526 
4527  if (!bitpos)
4528  {
4529  bm::word_t acc;
4530  bit_block_shift_l1_unr(block, &acc, carry_over);
4531  return;
4532  }
4533 
4534  unsigned nbit = unsigned(bitpos & bm::set_block_mask);
4535  unsigned nword = unsigned(nbit >> bm::set_word_shift);
4536  nbit &= bm::set_word_mask;
4537 
4538  bm::word_t co_flag = carry_over;
4539  for (unsigned i = bm::set_block_size-1; i > nword; --i)
4540  {
4541  bm::word_t w = block[i];
4542  bm::word_t co_flag1 = w & 1u;
4543  w = (w >> 1u) | (co_flag << 31u);
4544  block[i] = w;
4545  co_flag = co_flag1;
4546  }
4547 
4548  if (nbit)
4549  {
4550  unsigned r_mask = bm::block_set_table<true>::_right[nbit];
4551  bm::word_t w = block[nword] & r_mask;
4552  bm::word_t wl = block[nword] & ~r_mask;
4553  w &= ~(1 << nbit); // clear the removed bit
4554  w >>= 1u;
4555  w |= wl | (co_flag << 31u);
4556  block[nword] = w;
4557  }
4558  else
4559  {
4560  block[nword] = (block[nword] >> 1u) | (co_flag << 31u);
4561  }
4562 }
4563 
4564 
4565 /*!
4566  @brief Right bit-shift of bit-block by 1 bit (reference) + AND
4567  @param block - bit-block pointer
4568  @param co_flag - carry over from the previous block
4569  @param mask_block - mask bit-block pointer
4570  @param digest - block digest
4571 
4572  @return carry over bit (1 or 0)
4573  @ingroup bitfunc
4574 */
4575 inline
4577  bm::word_t co_flag,
4578  const bm::word_t* BMRESTRICT mask_block,
4579  bm::id64_t* BMRESTRICT digest)
4580 {
4581  BM_ASSERT(block);
4582  BM_ASSERT(mask_block);
4583  BM_ASSERT(digest && *digest);
4584 
4585 
4586  bm::id64_t d = *digest;
4587 
4588  unsigned di = 0;
4589  if (!co_flag)
4590  {
4591  bm::id64_t t = d & -d;
4592  di = bm::word_bitcount64(t - 1); // find start bit-index
4593  }
4594 
4595  for (; di < 64; ++di)
4596  {
4597  const unsigned d_base = di * bm::set_block_digest_wave_size;
4598  bm::id64_t dmask = (1ull << di);
4599  if (d & dmask) // digest stride not empty
4600  {
4601  bm::word_t acc = 0;
4602  for (unsigned i = d_base; i < d_base + bm::set_block_digest_wave_size; ++i)
4603  {
4605 
4606  bm::word_t w = block[i];
4607  bm::word_t co_flag1 = w >> 31;
4608  w = (w << 1u) | co_flag;
4609  acc |= block[i] = w & mask_block[i];
4610  co_flag = co_flag1;
4611  }
4612  if (!acc)
4613  d &= ~dmask; // update digest: clear stride bit
4614  }
4615  else // stride is empty
4616  {
4617  BM_ASSERT(block[d_base + bm::set_block_digest_wave_size -1]==0);
4618  BM_ASSERT(block[d_base]==0);
4619 
4620  if (co_flag) // there is carry-over
4621  {
4622  BM_ASSERT(co_flag == 1);
4623  BM_ASSERT(block[d_base] == 0);
4624 
4625  block[d_base] = co_flag & mask_block[d_base];
4626  if (block[d_base])
4627  d |= dmask; // update d
4628  co_flag = 0;
4629  }
4630  }
4631  } // for di
4632 
4633  *digest = d;
4634  return co_flag;
4635 }
4636 
4637 /*!
4638  @brief Right bit-shift bitblock by 1 bit (reference) + AND
4639  @param block - bit-block pointer
4640  @param co_flag - carry over from the previous block
4641  @param mask_block - mask bit-block pointer
4642  @param digest - block digest
4643 
4644  @return carry over bit (1 or 0)
4645  @ingroup bitfunc
4646 */
4647 inline
4649  bm::word_t co_flag,
4650  const bm::word_t* BMRESTRICT mask_block,
4651  bm::id64_t* BMRESTRICT digest)
4652 {
4653  BM_ASSERT(block);
4654  BM_ASSERT(mask_block);
4655  BM_ASSERT(digest);
4656 
4657  #if defined(VECT_SHIFT_R1_AND)
4658  return VECT_SHIFT_R1_AND(block, co_flag, mask_block, digest);
4659  #else
4660  return bm::bit_block_shift_r1_and(block, co_flag, mask_block, digest);
4661  #endif
4662 }
4663 
4664 
4665 /*!
4666  Function calculates if there is any number of 1 bits
4667  in the given array of words in the range between left anf right bits
4668  (borders included). Make sure the addresses are aligned.
4669 
4670  @ingroup bitfunc
4671 */
4672 inline
4674  bm::word_t left,
4675  bm::word_t right)
4676 {
4677  BM_ASSERT(left <= right);
4678 
4679  unsigned nbit = left; // unsigned(left & bm::set_block_mask);
4680  unsigned nword = unsigned(nbit >> bm::set_word_shift);
4681  nbit &= bm::set_word_mask;
4682 
4683  const bm::word_t* word = block + nword;
4684 
4685  if (left == right) // special case (only 1 bit to check)
4686  {
4687  return (*word >> nbit) & 1;
4688  }
4689  unsigned acc;
4690  unsigned bitcount = right - left + 1;
4691 
4692  if (nbit) // starting position is not aligned
4693  {
4694  unsigned right_margin = nbit + (right - left);
4695  if (right_margin < 32)
4696  {
4697  unsigned mask =
4699  block_set_table<true>::_left[right_margin];
4700  acc = *word & mask;
4701  return acc;
4702  }
4703  else
4704  {
4705  acc = *word & block_set_table<true>::_right[nbit];
4706  if (acc)
4707  return acc;
4708  bitcount -= 32 - nbit;
4709  }
4710  ++word;
4711  }
4712 
4713  // now when we are word aligned, we can check bits the usual way
4714  for ( ;bitcount >= 32; bitcount -= 32)
4715  {
4716  acc = *word++;
4717  if (acc)
4718  return acc;
4719  }
4720 
4721  if (bitcount) // we have a tail to count
4722  {
4723  acc = (*word) & block_set_table<true>::_left[bitcount-1];
4724  if (acc)
4725  return acc;
4726  }
4727 
4728  return 0;
4729 }
4730 
4731 // ----------------------------------------------------------------------
4732 
4733 /*! Function inverts block of bits
4734  @ingroup bitfunc
4735 */
4736 template<typename T> void bit_invert(T* start)
4737 {
4739 #ifdef BMVECTOPT
4740  VECT_INVERT_BLOCK(start);
4741 #else
4742  T* end = (T*)((unsigned*)(start) + bm::set_block_size);
4743  do
4744  {
4745  start[0] = ~start[0];
4746  start[1] = ~start[1];
4747  start[2] = ~start[2];
4748  start[3] = ~start[3];
4749  start+=4;
4750  } while (start < end);
4751 #endif
4752 }
4753 
4754 // ----------------------------------------------------------------------
4755 
4756 /*! @brief Returns "true" if all bits in the block are 1
4757  @ingroup bitfunc
4758 */
4759 inline
4760 bool is_bits_one(const bm::wordop_t* start)
4761 {
4762 #if defined(BMSSE42OPT) || defined(BMAVX2OPT)
4763  return VECT_IS_ONE_BLOCK(start);
4764 #else
4765  const bm::word_t* BMRESTRICT src_end = (bm::word_t*)start + bm::set_block_size;
4766  const bm::wordop_t* end = (const bm::wordop_t*)src_end;
4767  do
4768  {
4769  bm::wordop_t tmp =
4770  start[0] & start[1] & start[2] & start[3];
4771  if (tmp != bm::all_bits_mask)
4772  return false;
4773  start += 4;
4774  } while (start < end);
4775  return true;
4776 #endif
4777 }
4778 
4779 // ----------------------------------------------------------------------
4780 
4781 // GAP blocks manipulation functions:
4782 
4783 /*! \brief GAP and functor */
4784 BMFORCEINLINE unsigned and_op(unsigned v1, unsigned v2)
4785 {
4786  return v1 & v2;
4787 }
4788 
4789 
4790 /*! \brief GAP xor functor */
4791 BMFORCEINLINE unsigned xor_op(unsigned v1, unsigned v2)
4792 {
4793  return v1 ^ v2;
4794 }
4795 
4796 
4797 /*! \brief GAP or functor */
4798 BMFORCEINLINE unsigned or_op(unsigned v1, unsigned v2)
4799 {
4800  return v1 | v2;
4801 }
4802 
4803 /*! \brief GAP or functor */
4804 BMFORCEINLINE unsigned sub_op(unsigned v1, unsigned v2)
4805 {
4806  return v1 & ~v2;
4807 }
4808 
4809 
4810 /*!
4811  \brief GAP AND operation.
4812 
4813  Function performs AND logical operation on gap vectors.
4814  If possible function put the result into vect1 and returns this
4815  pointer. Otherwise result is put into tmp_buf, which should be
4816  twice of the vector size.
4817 
4818  \param vect1 - operand 1
4819  \param vect2 - operand 2
4820  \param tmp_buf - pointer on temporary buffer
4821  \param dsize - out size of the destination
4822  \return Result pointer (tmp_buf OR vect1)
4823 
4824  @ingroup gapfunc
4825 */
4828  const gap_word_t* BMRESTRICT vect2,
4829  gap_word_t* BMRESTRICT tmp_buf,
4830  unsigned& dsize)
4831 {
4832  bm::gap_buff_op(tmp_buf, vect1, 0, vect2, 0, bm::and_op, dsize);
4833  return tmp_buf;
4834 }
4835 
4836 /*!
4837  \brief GAP AND operation test.
4838 
4839  Function performs AND logical operation on gap vectors.
4840  If possible function put the result into vect1 and returns this
4841  pointer. Otherwise result is put into tmp_buf, which should be
4842  twice of the vector size.
4843 
4844  \param vect1 - operand 1
4845  \param vect2 - operand 2
4846  \return non zero value if operation returns any 1 bit
4847 
4848  @ingroup gapfunc
4849 */
4852  const gap_word_t* BMRESTRICT vect2)
4853 {
4854  return gap_buff_any_op(vect1, 0, vect2, 0, and_op);
4855 }
4856 
4857 
4858 /*!
4859  \brief GAP bitcount AND operation test.
4860 
4861  \param vect1 - operand 1
4862  \param vect2 - operand 2
4863  \return bitcount of vect1 AND vect2
4864 
4865  @ingroup gapfunc
4866 */
4868 unsigned gap_count_and(const gap_word_t* BMRESTRICT vect1,
4869  const gap_word_t* BMRESTRICT vect2)
4870 {
4871  return gap_buff_count_op(vect1, vect2, and_op);
4872 }
4873 
4874 
4875 
4876 /*!
4877  \brief GAP XOR operation.
4878 
4879  Function performs XOR logical operation on gap vectors.
4880  If possible function put the result into vect1 and returns this
4881  pointer. Otherwise result is put into tmp_buf, which should be
4882  twice of the vector size.
4883 
4884  \param vect1 - operand 1
4885  \param vect2 - operand 2
4886  \param tmp_buf - pointer on temporary buffer
4887  \param dsize - out destination size
4888  \return Result pointer (tmp_buf)
4889 
4890  @ingroup gapfunc
4891 */
4894  const gap_word_t* BMRESTRICT vect2,
4895  gap_word_t* BMRESTRICT tmp_buf,
4896  unsigned& dsize)
4897 {
4898  gap_buff_op(tmp_buf, vect1, 0, vect2, 0, bm::xor_op, dsize);
4899  return tmp_buf;
4900 }
4901 
4902 
4903 /*!
4904  \brief GAP XOR operation test.
4905 
4906  Function performs AND logical operation on gap vectors.
4907  If possible function put the result into vect1 and returns this
4908  pointer. Otherwise result is put into tmp_buf, which should be
4909  twice of the vector size.
4910 
4911  \param vect1 - operand 1
4912  \param vect2 - operand 2
4913  \return non zero value if operation returns any 1 bit
4914 
4915  @ingroup gapfunc
4916 */
4919  const gap_word_t* BMRESTRICT vect2)
4920 {
4921  return gap_buff_any_op(vect1, 0, vect2, 0, bm::xor_op);
4922 }
4923 
4924 /*!
4925  \brief GAP bitcount XOR operation test.
4926 
4927  \param vect1 - operand 1
4928  \param vect2 - operand 2
4929  \return bitcount of vect1 XOR vect2
4930 
4931  @ingroup gapfunc
4932 */
4934 unsigned gap_count_xor(const gap_word_t* BMRESTRICT vect1,
4935  const gap_word_t* BMRESTRICT vect2)
4936 {
4937  return gap_buff_count_op(vect1, vect2, bm::xor_op);
4938 }
4939 
4940 
4941 /*!
4942  \brief GAP OR operation.
4943 
4944  Function performs OR logical oparation on gap vectors.
4945  If possible function put the result into vect1 and returns this
4946  pointer. Otherwise result is put into tmp_buf, which should be
4947  twice of the vector size.
4948 
4949  \param vect1 - operand 1
4950  \param vect2 - operand 2
4951  \param tmp_buf - pointer on temporary buffer
4952  \param dsize - out destination size
4953 
4954  \return Result pointer (tmp_buf)
4955 
4956  @ingroup gapfunc
4957 */
4958 inline
4960  const gap_word_t* BMRESTRICT vect2,
4961  gap_word_t* BMRESTRICT tmp_buf,
4962  unsigned& dsize)
4963 {
4964  gap_buff_op(tmp_buf, vect1, 1, vect2, 1, bm::and_op, dsize);
4965  gap_invert(tmp_buf);
4966  return tmp_buf;
4967 }
4968 
4969 /*!
4970  \brief GAP bitcount OR operation test.
4971 
4972  \param vect1 - operand 1
4973  \param vect2 - operand 2
4974  \return bitcount of vect1 OR vect2
4975 
4976  @ingroup gapfunc
4977 */
4979 unsigned gap_count_or(const gap_word_t* BMRESTRICT vect1,
4980  const gap_word_t* BMRESTRICT vect2)
4981 {
4982  return gap_buff_count_op(vect1, vect2, bm::or_op);
4983 }
4984 
4985 
4986 
4987 /*!
4988  \brief GAP SUB (AND NOT) operation.
4989 
4990  Function performs SUB logical operation on gap vectors.
4991  If possible function put the result into vect1 and returns this
4992  pointer. Otherwise result is put into tmp_buf, which should be
4993  twice of the vector size.
4994 
4995  \param vect1 - operand 1
4996  \param vect2 - operand 2
4997  \param tmp_buf - pointer on temporary buffer
4998  \param dsize - out destination size
4999 
5000  \return Result pointer (tmp_buf)
5001 
5002  @ingroup gapfunc
5003 */
5005  const gap_word_t* BMRESTRICT vect2,
5006  gap_word_t* BMRESTRICT tmp_buf,
5007  unsigned& dsize)
5008 {
5009  gap_buff_op(tmp_buf, vect1, 0, vect2, 1, and_op, dsize);
5010  return tmp_buf;
5011 }
5012 
5013 
5014 /*!
5015  \brief GAP SUB operation test.
5016 
5017  Function performs AND logical operation on gap vectors.
5018  If possible function put the result into vect1 and returns this
5019  pointer. Otherwise result is put into tmp_buf, which should be
5020  twice of the vector size.
5021 
5022  \param vect1 - operand 1
5023  \param vect2 - operand 2
5024  \return non zero value if operation returns any 1 bit
5025 
5026  @ingroup gapfunc
5027 */
5030  const gap_word_t* BMRESTRICT vect2)
5031 {
5032  return gap_buff_any_op(vect1, 0, vect2, 1, bm::and_op);
5033 }
5034 
5035 
5036 /*!
5037 \brief GAP bitcount SUB (AND NOT) operation test.
5038 
5039 \param vect1 - operand 1
5040 \param vect2 - operand 2
5041 \return bitcount of vect1 SUB (AND NOT) vect2
5042 
5043 @ingroup gapfunc
5044 */
5046 unsigned gap_count_sub(const gap_word_t* BMRESTRICT vect1,
5047  const gap_word_t* BMRESTRICT vect2)
5048 {
5049  return gap_buff_count_op(vect1, vect2, bm::sub_op);
5050 }
5051 
5052 
5053 // ----------------------------------------------------------------------
5054 
5055 // BIT blocks manipulation functions:
5056 
5057 
5058 /*!
5059  \brief Bitblock copy operation.
5060 
5061  \param dst - destination block.
5062  \param src - source block.
5063 
5064  @ingroup bitfunc
5065 */
5066 inline
5068 {
5069 #ifdef BMVECTOPT
5070  VECT_COPY_BLOCK(dst, src);
5071 #else
5072  ::memcpy(dst, src, bm::set_block_size * sizeof(bm::word_t));
5073 #endif
5074 }
5075 
5076 /*!
5077  \brief Bitblock copy/stream operation.
5078 
5079  \param dst - destination block.
5080  \param src - source block.
5081 
5082  @ingroup bitfunc
5083 */
5084 inline
5086 {
5087 #ifdef VECT_STREAM_BLOCK
5088  VECT_STREAM_BLOCK(dst, src);
5089 #else
5090  ::memcpy(dst, src, bm::set_block_size * sizeof(bm::word_t));
5091 #endif
5092 }
5093 
5094 
5095 /*!
5096  \brief Plain bitblock AND operation.
5097  Function does not analyse availability of source and destination blocks.
5098 
5099  \param dst - destination block.
5100  \param src - source block.
5101 
5102  \return 0 if AND operation did not produce anything (no 1s in the output)
5103 
5104  @ingroup bitfunc
5105 */
5106 inline
5108 {
5109  BM_ASSERT(dst);
5110  BM_ASSERT(src);
5111  BM_ASSERT(dst != src);
5112 
5113 #ifdef BMVECTOPT
5114  bm::id64_t acc = VECT_AND_BLOCK(dst, src);
5115 #else
5116  unsigned arr_sz = bm::set_block_size / 2;
5117 
5120 
5121  bm::id64_t acc = 0;
5122  for (unsigned i = 0; i < arr_sz; i+=4)
5123  {
5124  acc |= dst_u->w64[i] &= src_u->w64[i];
5125  acc |= dst_u->w64[i+1] &= src_u->w64[i+1];
5126  acc |= dst_u->w64[i+2] &= src_u->w64[i+2];
5127  acc |= dst_u->w64[i+3] &= src_u->w64[i+3];
5128  }
5129 #endif
5130  return acc;
5131 }
5132 
5133 /*!
5134  \brief digest based bit-block AND
5135 
5136  \param dst - destination block.
5137  \param src - source block.
5138  \param digest - known digest of dst block
5139 
5140  \return new digest
5141 
5142  @ingroup bitfunc
5143 */
5144 inline
5146  const bm::word_t* BMRESTRICT src,
5147  bm::id64_t digest)
5148 {
5149  BM_ASSERT(dst);
5150  BM_ASSERT(src);
5151  BM_ASSERT(dst != src);
5152 
5153  const bm::id64_t mask(1ull);
5154  bm::id64_t d = digest;
5155  while (d)
5156  {
5157  bm::id64_t t = bm::bmi_blsi_u64(d); // d & -d;
5158 
5159  unsigned wave = bm::word_bitcount64(t - 1);
5160  unsigned off = wave * bm::set_block_digest_wave_size;
5161 
5162  #if defined(VECT_AND_DIGEST)
5163  bool all_zero = VECT_AND_DIGEST(&dst[off], &src[off]);
5164  if (all_zero)
5165  digest &= ~(mask << wave);
5166  #else
5167  const bm::bit_block_t::bunion_t* BMRESTRICT src_u = (const bm::bit_block_t::bunion_t*)(&src[off]);
5169 
5170  bm::id64_t acc = 0;
5171  unsigned j = 0;
5172  do
5173  {
5174  acc |= dst_u->w64[j+0] &= src_u->w64[j+0];
5175  acc |= dst_u->w64[j+1] &= src_u->w64[j+1];
5176  acc |= dst_u->w64[j+2] &= src_u->w64[j+2];
5177  acc |= dst_u->w64[j+3] &= src_u->w64[j+3];
5178  j+=4;
5179  } while (j < bm::set_block_digest_wave_size/2);
5180 
5181  if (!acc) // all zero
5182  digest &= ~(mask << wave);
5183  #endif
5184 
5185  d = bm::bmi_bslr_u64(d); // d &= d - 1;
5186  } // while
5187 
5188  return digest;
5189 }
5190 
5191 
5192 /*!
5193  \brief digest based bit-block AND 5-way
5194 
5195  \return new digest
5196 
5197  @ingroup bitfunc
5198 */
5199 inline
5201  const bm::word_t* BMRESTRICT src0,
5202  const bm::word_t* BMRESTRICT src1,
5203  const bm::word_t* BMRESTRICT src2,
5204  const bm::word_t* BMRESTRICT src3,
5205  bm::id64_t digest)
5206 {
5207  BM_ASSERT(dst);
5208  BM_ASSERT(src0 && src1 && src2 && src3);
5209 
5210  const bm::id64_t mask(1ull);
5211  bm::id64_t d = digest;
5212  while (d)
5213  {
5214  bm::id64_t t = bm::bmi_blsi_u64(d); // d & -d;
5215 
5216  unsigned wave = bm::word_bitcount64(t - 1);
5217  unsigned off = wave * bm::set_block_digest_wave_size;
5218 
5219 #if defined(VECT_AND_DIGEST_5WAY)
5220  bool all_zero = VECT_AND_DIGEST_5WAY(&dst[off], &src0[off], &src1[off], &src2[off], &src3[off]);
5221  if (all_zero)
5222  digest &= ~(mask << wave);
5223 #else
5224  const bm::bit_block_t::bunion_t* BMRESTRICT src_u0 = (const bm::bit_block_t::bunion_t*)(&src0[off]);
5225  const bm::bit_block_t::bunion_t* BMRESTRICT src_u1 = (const bm::bit_block_t::bunion_t*)(&src1[off]);
5226  const bm::bit_block_t::bunion_t* BMRESTRICT src_u2 = (const bm::bit_block_t::bunion_t*)(&src2[off]);
5227  const bm::bit_block_t::bunion_t* BMRESTRICT src_u3 = (const bm::bit_block_t::bunion_t*)(&src3[off]);
5229 
5230  bm::id64_t acc = 0;
5231  unsigned j = 0;
5232  do
5233  {
5234  acc |= dst_u->w64[j + 0] &= src_u0->w64[j + 0] & src_u1->w64[j + 0] & src_u2->w64[j + 0] & src_u3->w64[j + 0];
5235  acc |= dst_u->w64[j + 1] &= src_u0->w64[j + 1] & src_u1->w64[j + 1] & src_u2->w64[j + 1] & src_u3->w64[j + 1];
5236  acc |= dst_u->w64[j + 2] &= src_u0->w64[j + 2] & src_u1->w64[j + 2] & src_u2->w64[j + 2] & src_u3->w64[j + 2];
5237  acc |= dst_u->w64[j + 3] &= src_u0->w64[j + 3] & src_u1->w64[j + 3] & src_u2->w64[j + 3] & src_u3->w64[j + 3];
5238  j += 4;
5239  } while (j < bm::set_block_digest_wave_size / 2);
5240 
5241  if (!acc) // all zero
5242  digest &= ~(mask << wave);
5243 #endif
5244 
5245  d = bm::bmi_bslr_u64(d); // d &= d - 1;
5246  } // while
5247 
5248  return digest;
5249 }
5250 
5251 
5252 /*!
5253  \brief digest based bit-block AND
5254 
5255  dst = src1 AND src2
5256 
5257  \param dst - destination block.
5258  \param src1 - source block.
5259  \param src2 - source block.
5260  \param digest - known initial digest
5261 
5262  \return new digest
5263 
5264  @ingroup bitfunc
5265 */
5266 inline
5268  const bm::word_t* BMRESTRICT src1,
5269  const bm::word_t* BMRESTRICT src2,
5270  bm::id64_t digest)
5271 {
5272  BM_ASSERT(dst);
5273  BM_ASSERT(src1 && src2);
5274  BM_ASSERT(dst != src1 && dst != src2);
5275 
5276  const bm::id64_t mask(1ull);
5277  bm::id64_t d = digest;
5278  while (d)
5279  {
5280  bm::id64_t t = bm::bmi_blsi_u64(d); // d & -d;
5281 
5282  unsigned wave = bm::word_bitcount64(t - 1);
5283  unsigned off = wave * bm::set_block_digest_wave_size;
5284 
5285  #if defined(VECT_AND_DIGEST_2WAY)
5286  bool all_zero = VECT_AND_DIGEST_2WAY(&dst[off], &src1[off], &src2[off]);
5287  if (all_zero)
5288  digest &= ~(mask << wave);
5289  #else
5290  const bm::bit_block_t::bunion_t* BMRESTRICT src_u1 =
5291  (const bm::bit_block_t::bunion_t*)(&src1[off]);
5292  const bm::bit_block_t::bunion_t* BMRESTRICT src_u2 =
5293  (const bm::bit_block_t::bunion_t*)(&src2[off]);
5295  (bm::bit_block_t::bunion_t*)(&dst[off]);
5296 
5297  bm::id64_t acc = 0;
5298  unsigned j = 0;
5299  do
5300  {
5301  acc |= dst_u->w64[j+0] = src_u1->w64[j+0] & src_u2->w64[j+0];
5302  acc |= dst_u->w64[j+1] = src_u1->w64[j+1] & src_u2->w64[j+1];
5303  acc |= dst_u->w64[j+2] = src_u1->w64[j+2] & src_u2->w64[j+2];
5304  acc |= dst_u->w64[j+3] = src_u1->w64[j+3] & src_u2->w64[j+3];
5305  j+=4;
5306  } while (j < bm::set_block_digest_wave_size/2);
5307 
5308  if (!acc) // all zero
5309  digest &= ~(mask << wave);
5310  #endif
5311 
5312  d = bm::bmi_bslr_u64(d); // d &= d - 1;
5313  } // while
5314 
5315  return digest;
5316 }
5317 
5318 
5319 
5320 /*!
5321  \brief Function ANDs two bitblocks and computes the bitcount.
5322  Function does not analyse availability of source blocks.
5323 
5324  \param src1 - first bit block
5325  \param src2 - second bit block
5326 
5327  @ingroup bitfunc
5328 */
5329 inline
5331  const bm::word_t* BMRESTRICT src2)
5332 {
5333  unsigned count;
5334  const bm::word_t* src1_end = src1 + bm::set_block_size;
5335 #ifdef BMVECTOPT
5336  count = VECT_BITCOUNT_AND(src1, src1_end, src2);
5337 #else
5338  count = 0;
5339 # ifdef BM64OPT
5340  const bm::id64_t* b1 = (bm::id64_t*) src1;
5341  const bm::id64_t* b1_end = (bm::id64_t*) src1_end;
5342  const bm::id64_t* b2 = (bm::id64_t*) src2;
5343  do
5344  {
5345  count += bitcount64_4way(b1[0] & b2[0],
5346  b1[1] & b2[1],
5347  b1[2] & b2[2],
5348  b1[3] & b2[3]);
5349  b1 += 4;
5350  b2 += 4;
5351  } while (b1 < b1_end);
5352 # else
5353  do
5354  {
5355  BM_INCWORD_BITCOUNT(count, src1[0] & src2[0]);
5356  BM_INCWORD_BITCOUNT(count, src1[1] & src2[1]);
5357  BM_INCWORD_BITCOUNT(count, src1[2] & src2[2]);
5358  BM_INCWORD_BITCOUNT(count, src1[3] & src2[3]);
5359 
5360  src1+=4;
5361  src2+=4;
5362  } while (src1 < src1_end);
5363 # endif
5364 #endif
5365  return count;
5366 }
5367 
5368 
5369 /*!
5370  \brief Function ANDs two bitblocks and tests for any bit.
5371  Function does not analyse availability of source blocks.
5372 
5373  \param src1 - first bit block
5374  \param src2 - second bit block
5375 
5376  @ingroup bitfunc
5377 */
5378 inline
5379 unsigned bit_block_and_any(const bm::word_t* src1,
5380  const bm::word_t* src2)
5381 {
5382  unsigned count = 0;
5383  const bm::word_t* src1_end = src1 + bm::set_block_size;
5384  do
5385  {
5386  count = (src1[0] & src2[0]) |
5387  (src1[1] & src2[1]) |
5388  (src1[2] & src2[2]) |
5389  (src1[3] & src2[3]);
5390 
5391  src1+=4; src2+=4;
5392  } while ((src1 < src1_end) && !count);
5393  return count;
5394 }
5395 
5396 
5397 
5398 
5399 /*!
5400  \brief Function XORs two bitblocks and computes the bitcount.
5401  Function does not analyse availability of source blocks.
5402 
5403  \param src1 - first bit block
5404  \param src2 - second bit block
5405 
5406  @ingroup bitfunc
5407 */
5408 inline
5410  const bm::word_t* BMRESTRICT src2)
5411 {
5412  unsigned count;
5413  const bm::word_t* BMRESTRICT src1_end = src1 + bm::set_block_size;
5414 #ifdef BMVECTOPT
5415  count = VECT_BITCOUNT_XOR(src1, src1_end, src2);
5416 #else
5417  count = 0;
5418 # ifdef BM64OPT
5419  const bm::id64_t* b1 = (bm::id64_t*) src1;
5420  const bm::id64_t* b1_end = (bm::id64_t*) src1_end;
5421  const bm::id64_t* b2 = (bm::id64_t*) src2;
5422  do
5423  {
5424  count += bitcount64_4way(b1[0] ^ b2[0],
5425  b1[1] ^ b2[1],
5426  b1[2] ^ b2[2],
5427  b1[3] ^ b2[3]);
5428  b1 += 4;
5429  b2 += 4;
5430  } while (b1 < b1_end);
5431 # else
5432  do
5433  {
5434  BM_INCWORD_BITCOUNT(count, src1[0] ^ src2[0]);
5435  BM_INCWORD_BITCOUNT(count, src1[1] ^ src2[1]);
5436  BM_INCWORD_BITCOUNT(count, src1[2] ^ src2[2]);
5437  BM_INCWORD_BITCOUNT(count, src1[3] ^ src2[3]);
5438 
5439  src1+=4;
5440  src2+=4;
5441  } while (src1 < src1_end);
5442 # endif
5443 #endif
5444  return count;
5445 }
5446 
5447 
5448 /*!
5449  \brief Function XORs two bitblocks and and tests for any bit.
5450  Function does not analyse availability of source blocks.
5451 
5452  \param src1 - first bit block.
5453  \param src2 - second bit block.
5454 
5455  @ingroup bitfunc
5456 */
5457 inline
5459  const bm::word_t* BMRESTRICT src2)
5460 {
5461  unsigned count = 0;
5462  const bm::word_t* BMRESTRICT src1_end = src1 + bm::set_block_size;
5463  do
5464  {
5465  count = (src1[0] ^ src2[0]) |
5466  (src1[1] ^ src2[1]) |
5467  (src1[2] ^ src2[2]) |
5468  (src1[3] ^ src2[3]);
5469 
5470  src1+=4; src2+=4;
5471  } while (!count && (src1 < src1_end));
5472  return count;
5473 }
5474 
5475 /*!
5476  \brief Function SUBs two bitblocks and computes the bitcount.
5477  Function does not analyse availability of source blocks.
5478 
5479  \param src1 - first bit block.
5480  \param src2 - second bit block.
5481 
5482  @ingroup bitfunc
5483 */
5484 inline
5486  const bm::word_t* BMRESTRICT src2)
5487 {
5488  unsigned count;
5489  const bm::word_t* BMRESTRICT src1_end = src1 + bm::set_block_size;
5490 #ifdef BMVECTOPT
5491  count = VECT_BITCOUNT_SUB(src1, src1_end, src2);
5492 #else
5493  count = 0;
5494 # ifdef BM64OPT
5495  const bm::id64_t* b1 = (bm::id64_t*) src1;
5496  const bm::id64_t* b1_end = (bm::id64_t*) src1_end;
5497  const bm::id64_t* b2 = (bm::id64_t*) src2;
5498  do
5499  {
5500  count += bitcount64_4way(b1[0] & ~b2[0],
5501  b1[1] & ~b2[1],
5502  b1[2] & ~b2[2],
5503  b1[3] & ~b2[3]);
5504  b1 += 4;
5505  b2 += 4;
5506  } while (b1 < b1_end);
5507 # else
5508  do
5509  {
5510  BM_INCWORD_BITCOUNT(count, src1[0] & ~src2[0]);
5511  BM_INCWORD_BITCOUNT(count, src1[1] & ~src2[1]);
5512  BM_INCWORD_BITCOUNT(count, src1[2] & ~src2[2]);
5513  BM_INCWORD_BITCOUNT(count, src1[3] & ~src2[3]);
5514 
5515  src1+=4;
5516  src2+=4;
5517  } while (src1 < src1_end);
5518 # endif
5519 #endif
5520  return count;
5521 }
5522 
5523 /*!
5524  \brief Function SUBs two bitblocks and and tests for any bit.
5525  Function does not analyse availability of source blocks.
5526 
5527  \param src1 - first bit block.
5528  \param src2 - second bit block.
5529 
5530  @ingroup bitfunc
5531 */
5532 inline
5534  const bm::word_t* BMRESTRICT src2)
5535 {
5536  unsigned count = 0;
5537  const bm::word_t* BMRESTRICT src1_end = src1 + bm::set_block_size;
5538 
5539  do
5540  {
5541  count = (src1[0] & ~src2[0]) |
5542  (src1[1] & ~src2[1]) |
5543  (src1[2] & ~src2[2]) |
5544  (src1[3] & ~src2[3]);
5545 
5546  src1+=4; src2+=4;
5547  } while ((src1 < src1_end) && (count == 0));
5548  return count;
5549 }
5550 
5551 
5552 /*!
5553  \brief Function ORs two bitblocks and computes the bitcount.
5554  Function does not analyse availability of source blocks.
5555 
5556  \param src1 - first bit block
5557  \param src2 - second bit block.
5558 
5559  @ingroup bitfunc
5560 */
5561 inline
5562 unsigned bit_block_or_count(const bm::word_t* src1,
5563  const bm::word_t* src2)
5564 {
5565  unsigned count;
5566  const bm::word_t* src1_end = src1 + bm::set_block_size;
5567 #ifdef BMVECTOPT
5568  count = VECT_BITCOUNT_OR(src1, src1_end, src2);
5569 #else
5570  count = 0;
5571 # ifdef BM64OPT
5572  const bm::id64_t* b1 = (bm::id64_t*) src1;
5573  const bm::id64_t* b1_end = (bm::id64_t*) src1_end;
5574  const bm::id64_t* b2 = (bm::id64_t*) src2;
5575  do
5576  {
5577  count += bitcount64_4way(b1[0] | b2[0],
5578  b1[1] | b2[1],
5579  b1[2] | b2[2],
5580  b1[3] | b2[3]);
5581  b1 += 4;
5582  b2 += 4;
5583  } while (b1 < b1_end);
5584 # else
5585  do
5586  {
5587  BM_INCWORD_BITCOUNT(count, src1[0] | src2[0]);
5588  BM_INCWORD_BITCOUNT(count, src1[1] | src2[1]);
5589  BM_INCWORD_BITCOUNT(count, src1[2] | src2[2]);
5590  BM_INCWORD_BITCOUNT(count, src1[3] | src2[3]);
5591 
5592  src1+=4;
5593  src2+=4;
5594  } while (src1 < src1_end);
5595 # endif
5596 #endif
5597  return count;
5598 }
5599 
5600 /*!
5601  \brief Function ORs two bitblocks and and tests for any bit.
5602  Function does not analyse availability of source blocks.
5603 
5604  \param src1 - first bit block.
5605  \param src2 - second bit block.
5606 
5607  @ingroup bitfunc
5608 */
5609 inline
5611  const bm::word_t* BMRESTRICT src2)
5612 {
5613  unsigned count = 0;
5614  const bm::word_t* BMRESTRICT src1_end = src1 + bm::set_block_size;
5615  do
5616  {
5617  count = (src1[0] | src2[0]) |
5618  (src1[1] | src2[1]) |
5619  (src1[2] | src2[2]) |
5620  (src1[3] | src2[3]);
5621 
5622  src1+=4; src2+=4;
5623  } while (!count && (src1 < src1_end));
5624  return count;
5625 }
5626 
5627 
5628 
5629 
5630 /*!
5631  \brief bitblock AND operation.
5632 
5633  \param dst - destination block.
5634  \param src - source block.
5635 
5636  \returns pointer on destination block.
5637  If returned value equal to src means that block mutation requested.
5638  NULL is valid return value.
5639 
5640  @ingroup bitfunc
5641 */
5643  const bm::word_t* BMRESTRICT src)
5644 {
5645  BM_ASSERT(dst || src);
5646 
5647  bm::word_t* ret = dst;
5648 
5649  if (IS_VALID_ADDR(dst)) // The destination block already exists
5650  {
5651  if (!IS_VALID_ADDR(src))
5652  {
5653  if (IS_EMPTY_BLOCK(src))
5654  {
5655  //If the source block is zero
5656  //just clean the destination block
5657  return 0;
5658  }
5659  }
5660  else
5661  {
5662  // Regular operation AND on the whole block.
5663  //
5664  auto any = bm::bit_block_and(dst, src);
5665  if (!any)
5666  ret = 0;
5667  }
5668  }
5669  else // The destination block does not exist yet
5670  {
5671  if(!IS_VALID_ADDR(src))
5672  {
5673  if(IS_EMPTY_BLOCK(src))
5674  {
5675  // The source block is empty.
5676  // One argument empty - all result is empty.
5677  return 0;
5678  }
5679  // Here we have nothing to do.
5680  // Src block is all ON, dst block remains as it is
5681  }
5682  else // destination block does not exists, src - valid block
5683  {
5684  if (IS_FULL_BLOCK(dst))
5685  return const_cast<bm::word_t*>(src);
5686  // Nothng to do.
5687  // Dst block is all ZERO no combination required.
5688  }
5689  }
5690 
5691  return ret;
5692 }
5693 
5694 
5695 /*!
5696  \brief Performs bitblock AND operation and calculates bitcount of the result.
5697 
5698  \param src1 - first bit block.
5699  \param src2 - second bit block.
5700 
5701  \returns bitcount value
5702 
5703  @ingroup bitfunc
5704 */
5705 inline
5707  const bm::word_t* BMRESTRICT src2)
5708 {
5709  if (IS_EMPTY_BLOCK(src1) || IS_EMPTY_BLOCK(src2))
5710  return 0;
5711  if (src1 == FULL_BLOCK_FAKE_ADDR)
5712  src1 = FULL_BLOCK_REAL_ADDR;
5713  if (src2 == FULL_BLOCK_FAKE_ADDR)
5714  src2 = FULL_BLOCK_REAL_ADDR;
5715 
5716  return bit_block_and_count(src1, src2);
5717 }
5718 
5719 /*!
5720  \brief Performs bitblock AND operation test.
5721 
5722  \param src1 - first bit block.
5723  \param src2 - second bit block.
5724 
5725  \returns non zero if there is any value
5726 
5727  @ingroup bitfunc
5728 */
5729 inline
5731  const bm::word_t* BMRESTRICT src2)
5732 {
5733  if (IS_EMPTY_BLOCK(src1) || IS_EMPTY_BLOCK(src2))
5734  return 0;
5735  if (src1 == FULL_BLOCK_FAKE_ADDR)
5736  src1 = FULL_BLOCK_REAL_ADDR;
5737  if (src2 == FULL_BLOCK_FAKE_ADDR)
5738  src2 = FULL_BLOCK_REAL_ADDR;
5739  return bit_block_and_any(src1, src2);
5740 }
5741 
5742 
5743 
5744 /*!
5745  \brief Performs bitblock SUB operation and calculates bitcount of the result.
5746 
5747  \param src1 - first bit block.
5748  \param src2 - second bit block
5749 
5750  \returns bitcount value
5751 
5752  @ingroup bitfunc
5753 */
5754 inline
5756  const bm::word_t* BMRESTRICT src2)
5757 {
5758  if (src1 == src2)
5759  return 0;
5760  if (IS_EMPTY_BLOCK(src1))
5761  return 0;
5762  if (IS_EMPTY_BLOCK(src2)) // nothing to diff
5763  {
5764  if (IS_FULL_BLOCK(src1))
5765  return bm::gap_max_bits;
5766  return bm::bit_block_count(src1);
5767  }
5768  if (IS_FULL_BLOCK(src2))
5769  return 0;
5770 
5771  if (src1 == FULL_BLOCK_FAKE_ADDR)
5772  src1 = FULL_BLOCK_REAL_ADDR;
5773  if (src2 == FULL_BLOCK_FAKE_ADDR)
5774  src2 = FULL_BLOCK_REAL_ADDR;
5775 
5776  return bm::bit_block_sub_count(src1, src2);
5777 }
5778 
5779 
5780 /*!
5781  \brief Performs inverted bitblock SUB operation and calculates
5782  bitcount of the result.
5783 
5784  \param src1 - first bit block.
5785  \param src2 - second bit block
5786 
5787  \returns bitcount value
5788 
5789  @ingroup bitfunc
5790 */
5791 inline
5793  const bm::word_t* BMRESTRICT src2)
5794 {
5795  return bit_operation_sub_count(src2, src1);
5796 }
5797 
5798 
5799 /*!
5800  \brief Performs bitblock test of SUB operation.
5801 
5802  \param src1 - first bit block.
5803  \param src2 - second bit block
5804 
5805  \returns non zero value if there are any bits
5806 
5807  @ingroup bitfunc
5808 */
5809 inline
5811  const bm::word_t* BMRESTRICT src2)
5812 {
5813  if (IS_EMPTY_BLOCK(src1))
5814  return 0;
5815  if (src1 == src2)
5816  return 0;
5817 
5818  if (IS_EMPTY_BLOCK(src2)) // nothing to diff
5819  return !bit_is_all_zero(src1);
5820 
5821  if (IS_FULL_BLOCK(src2))
5822  return 0;
5823 
5824  if (src1 == FULL_BLOCK_FAKE_ADDR)
5825  src1 = FULL_BLOCK_REAL_ADDR;
5826  if (src2 == FULL_BLOCK_FAKE_ADDR)
5827  src2 = FULL_BLOCK_REAL_ADDR;
5828 
5829  return bm::bit_block_sub_any(src1, src2);
5830 }
5831 
5832 
5833 
5834 /*!
5835  \brief Performs bitblock OR operation and calculates bitcount of the result.
5836 
5837  \param src1 - first bit block.
5838  \param src2 - second bit block.
5839 
5840  \returns bitcount value
5841 
5842  @ingroup bitfunc
5843 */
5844 inline
5846  const bm::word_t* BMRESTRICT src2)
5847 {
5848  if (IS_FULL_BLOCK(src1) || IS_FULL_BLOCK(src2))
5849  return bm::gap_max_bits;
5850 
5851  if (IS_EMPTY_BLOCK(src1))
5852  {
5853  if (!IS_EMPTY_BLOCK(src2))
5854  return bm::bit_block_count(src2);
5855  else
5856  return 0; // both blocks are empty
5857  }
5858  else
5859  {
5860  if (IS_EMPTY_BLOCK(src2))
5861  return bm::bit_block_count(src1);
5862  }
5863  if (src1 == FULL_BLOCK_FAKE_ADDR)
5864  src1 = FULL_BLOCK_REAL_ADDR;
5865  if (src2 == FULL_BLOCK_FAKE_ADDR)
5866  src2 = FULL_BLOCK_REAL_ADDR;
5867 
5868  return bm::bit_block_or_count(src1, src2);
5869 }
5870 
5871 /*!
5872  \brief Performs bitblock OR operation test.
5873 
5874  \param src1 - first bit block.
5875  \param src2 - second bit block.
5876 
5877  \returns non zero value if there are any bits
5878 
5879  @ingroup bitfunc
5880 */
5881 inline
5883  const bm::word_t* BMRESTRICT src2)
5884 {
5885  if (IS_EMPTY_BLOCK(src1))
5886  {
5887  if (!IS_EMPTY_BLOCK(src2))
5888  return !bit_is_all_zero(src2);
5889  else
5890  return 0; // both blocks are empty
5891  }
5892  else
5893  {
5894  if (IS_EMPTY_BLOCK(src2))
5895  return !bit_is_all_zero(src1);
5896  }
5897  if (src1 == FULL_BLOCK_FAKE_ADDR)
5898  src1 = FULL_BLOCK_REAL_ADDR;
5899  if (src2 == FULL_BLOCK_FAKE_ADDR)
5900  src2 = FULL_BLOCK_REAL_ADDR;
5901 
5902  return bit_block_or_any(src1, src2);
5903 }
5904 
5905 /*!
5906  \brief Plain bitblock OR operation.
5907  Function does not analyse availability of source and destination blocks.
5908 
5909  \param dst - destination block.
5910  \param src - source block.
5911 
5912  @ingroup bitfunc
5913 */
5914 inline
5916  const bm::word_t* BMRESTRICT src)
5917 {
5918 #ifdef BMVECTOPT
5919  return VECT_OR_BLOCK(dst, src);
5920 #else
5921  const bm::wordop_t* BMRESTRICT wrd_ptr = (wordop_t*)src;
5922  const bm::wordop_t* BMRESTRICT wrd_end = (wordop_t*)(src + bm::set_block_size);
5923  bm::wordop_t* BMRESTRICT dst_ptr = (wordop_t*)dst;
5924 
5925  bm::wordop_t acc = 0;
5926  const bm::wordop_t not_acc = acc = ~acc;
5927 
5928  do
5929  {
5930  acc &= (dst_ptr[0] |= wrd_ptr[0]);
5931  acc &= (dst_ptr[1] |= wrd_ptr[1]);
5932  acc &= (dst_ptr[2] |= wrd_ptr[2]);
5933  acc &= (dst_ptr[3] |= wrd_ptr[3]);
5934 
5935  dst_ptr+=4;wrd_ptr+=4;
5936 
5937  } while (wrd_ptr < wrd_end);
5938  return acc == not_acc;
5939 #endif
5940 }
5941 
5942 /*!
5943  \brief 2 way (target := source1 | source2) bitblock OR operation.
5944  \param dst - dest block [out]
5945  \param src1 - source 1
5946  \param src2 - source 2
5947 
5948  @return 1 if produced block of ALL ones
5949  @ingroup bitfunc
5950 */
5951 inline
5953  const bm::word_t* BMRESTRICT src1,
5954  const bm::word_t* BMRESTRICT src2)
5955 {
5956 #ifdef BMVECTOPT
5957  return VECT_OR_BLOCK_2WAY(dst, src1, src2);
5958 #else
5959  const bm::wordop_t* BMRESTRICT wrd_ptr1 = (wordop_t*)src1;
5960  const bm::wordop_t* BMRESTRICT wrd_end1 = (wordop_t*)(src1 + set_block_size);
5961  const bm::wordop_t* BMRESTRICT wrd_ptr2 = (wordop_t*)src2;
5962  bm::wordop_t* BMRESTRICT dst_ptr = (wordop_t*)dst;
5963 
5964  bm::wordop_t acc = 0;
5965  const bm::wordop_t not_acc = acc = ~acc;
5966  do
5967  {
5968  acc &= (dst_ptr[0] = wrd_ptr1[0] | wrd_ptr2[0]);
5969  acc &= (dst_ptr[1] = wrd_ptr1[1] | wrd_ptr2[1]);
5970  acc &= (dst_ptr[2] = wrd_ptr1[2] | wrd_ptr2[2]);
5971  acc &= (dst_ptr[3] = wrd_ptr1[3] | wrd_ptr2[3]);
5972 
5973  dst_ptr+=4; wrd_ptr1+=4; wrd_ptr2+=4;
5974 
5975  } while (wrd_ptr1 < wrd_end1);
5976  return acc == not_acc;
5977 #endif
5978 }
5979 
5980 
5981 /*!
5982  \brief 2 way (target := source1 ^ source2) bitblock XOR operation.
5983  \param dst - dest block [out]
5984  \param src1 - source 1
5985  \param src2 - source 2
5986 
5987  @return OR accumulator
5988  @ingroup bitfunc
5989 */
5990 inline
5992  const bm::word_t* BMRESTRICT src1,
5993  const bm::word_t* BMRESTRICT src2)
5994 {
5995 #ifdef BMVECTOPT
5996  return VECT_XOR_BLOCK_2WAY(dst, src1, src2);
5997 #else
5998  const bm::wordop_t* BMRESTRICT wrd_ptr1 = (wordop_t*)src1;
5999  const bm::wordop_t* BMRESTRICT wrd_end1 = (wordop_t*)(src1 + set_block_size);
6000  const bm::wordop_t* BMRESTRICT wrd_ptr2 = (wordop_t*)src2;
6001  bm::wordop_t* BMRESTRICT dst_ptr = (wordop_t*)dst;
6002 
6003  bm::wordop_t acc = 0;
6004  do
6005  {
6006  acc |= (dst_ptr[0] = wrd_ptr1[0] ^ wrd_ptr2[0]);
6007  acc |= (dst_ptr[1] = wrd_ptr1[1] ^ wrd_ptr2[1]);
6008  acc |= (dst_ptr[2] = wrd_ptr1[2] ^ wrd_ptr2[2]);
6009  acc |= (dst_ptr[3] = wrd_ptr1[3] ^ wrd_ptr2[3]);
6010 
6011  dst_ptr+=4; wrd_ptr1+=4; wrd_ptr2+=4;
6012 
6013  } while (wrd_ptr1 < wrd_end1);
6014  return acc;
6015 #endif
6016 }
6017 
6018 
6019 /*!
6020  \brief 3 way (target | source1 | source2) bitblock OR operation.
6021  Function does not analyse availability of source and destination blocks.
6022 
6023  \param dst - sst-dest block [in,out]
6024  \param src1 - source 1
6025  \param src2 - source 2
6026 
6027  @return 1 if produced block of ALL ones
6028 
6029  @ingroup bitfunc
6030 */
6031 inline
6033  const bm::word_t* BMRESTRICT src1,
6034  const bm::word_t* BMRESTRICT src2)
6035 {
6036 #ifdef BMVECTOPT
6037  return VECT_OR_BLOCK_3WAY(dst, src1, src2);
6038 #else
6039  const bm::wordop_t* BMRESTRICT wrd_ptr1 = (wordop_t*)src1;
6040  const bm::wordop_t* BMRESTRICT wrd_end1 = (wordop_t*)(src1 + set_block_size);
6041  const bm::wordop_t* BMRESTRICT wrd_ptr2 = (wordop_t*)src2;
6042  bm::wordop_t* BMRESTRICT dst_ptr = (wordop_t*)dst;
6043 
6044  bm::wordop_t acc = 0;
6045  const bm::wordop_t not_acc = acc = ~acc;
6046  do
6047  {
6048  acc &= (dst_ptr[0] |= wrd_ptr1[0] | wrd_ptr2[0]);
6049  acc &= (dst_ptr[1] |= wrd_ptr1[1] | wrd_ptr2[1]);
6050  acc &= (dst_ptr[2] |= wrd_ptr1[2] | wrd_ptr2[2]);
6051  acc &= (dst_ptr[3] |= wrd_ptr1[3] | wrd_ptr2[3]);
6052 
6053  dst_ptr+=4; wrd_ptr1+=4;wrd_ptr2+=4;
6054 
6055  } while (wrd_ptr1 < wrd_end1);
6056  return acc == not_acc;
6057 #endif
6058 }
6059 
6060 
6061 /*!
6062  \brief 5 way (target, source1, source2) bitblock OR operation.
6063  Function does not analyse availability of source and destination blocks.
6064 
6065  \param dst - destination block.
6066  \param src1 - source1, etc
6067 
6068  @return 1 if produced block of ALL ones
6069 
6070  @ingroup bitfunc
6071 */
6072 inline
6074  const bm::word_t* BMRESTRICT src1,
6075  const bm::word_t* BMRESTRICT src2,
6076  const bm::word_t* BMRESTRICT src3,
6077  const bm::word_t* BMRESTRICT src4)
6078 {
6079 #ifdef BMVECTOPT
6080  return VECT_OR_BLOCK_5WAY(dst, src1, src2, src3, src4);
6081 #else
6082  const bm::wordop_t* BMRESTRICT wrd_ptr1 = (wordop_t*)src1;
6083  const bm::wordop_t* BMRESTRICT wrd_end1 = (wordop_t*)(src1 + set_block_size);
6084  const bm::wordop_t* BMRESTRICT wrd_ptr2 = (wordop_t*)src2;
6085  const bm::wordop_t* BMRESTRICT wrd_ptr3 = (wordop_t*)src3;
6086  const bm::wordop_t* BMRESTRICT wrd_ptr4 = (wordop_t*)src4;
6087  bm::wordop_t* BMRESTRICT dst_ptr = (wordop_t*)dst;
6088 
6089  bm::wordop_t acc = 0;
6090  const bm::wordop_t not_acc = acc = ~acc;
6091  do
6092  {
6093  acc &= (dst_ptr[0] |= wrd_ptr1[0] | wrd_ptr2[0] | wrd_ptr3[0] | wrd_ptr4[0]);
6094  acc &= (dst_ptr[1] |= wrd_ptr1[1] | wrd_ptr2[1] | wrd_ptr3[1] | wrd_ptr4[1]);
6095  acc &= (dst_ptr[2] |= wrd_ptr1[2] | wrd_ptr2[2] | wrd_ptr3[2] | wrd_ptr4[2]);
6096  acc &= (dst_ptr[3] |= wrd_ptr1[3] | wrd_ptr2[3] | wrd_ptr3[3] | wrd_ptr4[3]);
6097 
6098  dst_ptr+=4;
6099  wrd_ptr1+=4;wrd_ptr2+=4;wrd_ptr3+=4;wrd_ptr4+=4;
6100 
6101  } while (wrd_ptr1 < wrd_end1);
6102  return acc == not_acc;
6103 #endif
6104 }
6105 
6106 
6107 
6108 
6109 /*!
6110  \brief Block OR operation. Makes analysis if block is 0 or FULL.
6111 
6112  \param dst - destination block.
6113  \param src - source block.
6114 
6115  \returns pointer on destination block.
6116  If returned value equal to src means that block mutation requested.
6117  NULL is valid return value.
6118 
6119  @ingroup bitfunc
6120 */
6121 inline
6123  const bm::word_t* BMRESTRICT src)
6124 {
6125  BM_ASSERT(dst || src);
6126 
6127  bm::word_t* ret = dst;
6128 
6129  if (IS_VALID_ADDR(dst)) // The destination block already exists
6130  {
6131  if (!IS_VALID_ADDR(src))
6132  {
6133  if (IS_FULL_BLOCK(src))
6134  {
6135  // if the source block is all set
6136  // just set the destination block
6137  ::memset(dst, 0xFF, bm::set_block_size * sizeof(bm::word_t));
6138  }
6139  }
6140  else
6141  {
6142  // Regular operation OR on the whole block
6143  bm::bit_block_or(dst, src);
6144  }
6145  }
6146  else // The destination block does not exist yet
6147  {
6148  if (!IS_VALID_ADDR(src))
6149  {
6150  if (IS_FULL_BLOCK(src))
6151  {
6152  // The source block is all set, because dst does not exist
6153  // we can simply replace it.
6154  return const_cast<bm::word_t*>(FULL_BLOCK_FAKE_ADDR);
6155  }
6156  }
6157  else
6158  {
6159  if (dst == 0)
6160  {
6161  // The only case when we have to allocate the new block:
6162  // Src is all zero and Dst does not exist
6163  return const_cast<bm::word_t*>(src);
6164  }
6165  }
6166  }
6167  return ret;
6168 }
6169 
6170 /*!
6171  \brief Plain bitblock SUB (AND NOT) operation.
6172  Function does not analyse availability of source and destination blocks.
6173 
6174  \param dst - destination block.
6175  \param src - source block.
6176 
6177  \return 0 if SUB operation did not produce anything (no 1s in the output)
6178 
6179  @ingroup bitfunc
6180 */
6181 inline
6183  const bm::word_t* BMRESTRICT src)
6184 {
6185 #ifdef BMVECTOPT
6186  bm::id64_t acc = VECT_SUB_BLOCK(dst, src);
6187  return acc;
6188 #else
6189  unsigned arr_sz = bm::set_block_size / 2;
6190 
6193 
6194  bm::id64_t acc = 0;
6195  for (unsigned i = 0; i < arr_sz; i+=4)
6196  {
6197  acc |= dst_u->w64[i] &= ~src_u->w64[i];
6198  acc |= dst_u->w64[i+1] &= ~src_u->w64[i+1];
6199  acc |= dst_u->w64[i+2] &= ~src_u->w64[i+2];
6200  acc |= dst_u->w64[i+3] &= ~src_u->w64[i+3];
6201  }
6202  return acc;
6203 #endif
6204 }
6205 
6206 /*!
6207  \brief digest based bitblock SUB (AND NOT) operation
6208 
6209  \param dst - destination block.
6210  \param src - source block.
6211  \param digest - known digest of dst block
6212 
6213  \return new digest
6214 
6215  @ingroup bitfunc
6216 */
6217 inline
6219  const bm::word_t* BMRESTRICT src,
6220  bm::id64_t digest)
6221 {
6222  BM_ASSERT(dst);
6223  BM_ASSERT(src);
6224  BM_ASSERT(dst != src);
6225 
6226  const bm::id64_t mask(1ull);
6227 
6228  bm::id64_t d = digest;
6229  while (d)
6230  {
6231  bm::id64_t t = bm::bmi_blsi_u64(d); // d & -d;
6232 
6233  unsigned wave = bm::word_bitcount64(t - 1);
6234  unsigned off = wave * bm::set_block_digest_wave_size;
6235 
6236  #if defined(VECT_SUB_DIGEST)
6237  bool all_zero = VECT_SUB_DIGEST(&dst[off], &src[off]);
6238  if (all_zero)
6239  digest &= ~(mask << wave);
6240  #else
6241  const bm::bit_block_t::bunion_t* BMRESTRICT src_u = (const bm::bit_block_t::bunion_t*)(&src[off]);
6243 
6244  bm::id64_t acc = 0;
6245  unsigned j = 0;
6246  do
6247  {
6248  acc |= dst_u->w64[j+0] &= ~src_u->w64[j+0];
6249  acc |= dst_u->w64[j+1] &= ~src_u->w64[j+1];
6250  acc |= dst_u->w64[j+2] &= ~src_u->w64[j+2];
6251  acc |= dst_u->w64[j+3] &= ~src_u->w64[j+3];
6252  j+=4;
6253  } while (j < bm::set_block_digest_wave_size/2);
6254 
6255  if (!acc) // all zero
6256  digest &= ~(mask << wave);
6257  #endif
6258 
6259  d = bm::bmi_bslr_u64(d); // d &= d - 1;
6260  } // while
6261 
6262  return digest;
6263 }
6264 
6265 /*!
6266  \brief digest based bitblock SUB (AND NOT) operation (3 operand)
6267 
6268  \param dst - destination block.
6269  \param src1 - source block.
6270  \param src1 - source block.
6271  \param digest - known digest of dst block
6272 
6273  \return new digest
6274 
6275  @ingroup bitfunc
6276 */
6277 inline
6279  const bm::word_t* BMRESTRICT src1,
6280  const bm::word_t* BMRESTRICT src2,
6281  bm::id64_t digest)
6282 {
6283  BM_ASSERT(dst);
6284  BM_ASSERT(src1 && src2);
6285  BM_ASSERT(dst != src1 && dst != src2);
6286 
6287  const bm::id64_t mask(1ull);
6288 
6289  bm::id64_t d = digest;
6290  while (d)
6291  {
6292  bm::id64_t t = bm::bmi_blsi_u64(d); // d & -d;
6293 
6294  unsigned wave = bm::word_bitcount64(t - 1);
6295  unsigned off = wave * bm::set_block_digest_wave_size;
6296 
6297  #if defined(VECT_SUB_DIGEST_2WAY)
6298  bool all_zero = VECT_SUB_DIGEST_2WAY(&dst[off], &src1[off], &src2[off]);
6299  if (all_zero)
6300  digest &= ~(mask << wave);
6301  #else
6302  const bm::bit_block_t::bunion_t* BMRESTRICT src_u1 =
6303  (const bm::bit_block_t::bunion_t*)(&src1[off]);
6304  const bm::bit_block_t::bunion_t* BMRESTRICT src_u2 =
6305  (const bm::bit_block_t::bunion_t*)(&src2[off]);
6307  (bm::bit_block_t::bunion_t*)(&dst[off]);
6308 
6309  bm::id64_t acc = 0;
6310  unsigned j = 0;
6311  do
6312  {
6313  acc |= dst_u->w64[j+0] = src_u1->w64[j+0] & ~src_u2->w64[j+0];
6314  acc |= dst_u->w64[j+1] = src_u1->w64[j+1] & ~src_u2->w64[j+1];
6315  acc |= dst_u->w64[j+2] = src_u1->w64[j+2] & ~src_u2->w64[j+2];
6316  acc |= dst_u->w64[j+3] = src_u1->w64[j+3] & ~src_u2->w64[j+3];
6317  j+=4;
6318  } while (j < bm::set_block_digest_wave_size/2);
6319 
6320  if (!acc) // all zero
6321  digest &= ~(mask << wave);
6322  #endif
6323 
6324  d = bm::bmi_bslr_u64(d); // d &= d - 1;
6325  } // while
6326 
6327  return digest;
6328 }
6329 
6330 
6331 
6332 /*!
6333  \brief bitblock SUB operation.
6334 
6335  \param dst - destination block.
6336  \param src - source block.
6337 
6338  \returns pointer on destination block.
6339  If returned value equal to src means that block mutation requested.
6340  NULL is valid return value.
6341 
6342  @ingroup bitfunc
6343 */
6344 inline
6346  const bm::word_t* BMRESTRICT src)
6347 {
6348  BM_ASSERT(dst || src);
6349 
6350  bm::word_t* ret = dst;
6351  if (IS_VALID_ADDR(dst)) // The destination block already exists
6352  {
6353  if (!IS_VALID_ADDR(src))
6354  {
6355  if (IS_FULL_BLOCK(src))
6356  {
6357  // If the source block is all set
6358  // just clean the destination block
6359  return 0;
6360  }
6361  }
6362  else
6363  {
6364  auto any = bm::bit_block_sub(dst, src);
6365  if (!any)
6366  ret = 0;
6367  }
6368  }
6369  else // The destination block does not exist yet
6370  {
6371  if (!IS_VALID_ADDR(src))
6372  {
6373  if (IS_FULL_BLOCK(src))
6374  {
6375  // The source block is full
6376  return 0;
6377  }
6378  }
6379  else
6380  {
6381  if (IS_FULL_BLOCK(dst))
6382  {
6383  // The only case when we have to allocate the new block:
6384  // dst is all set and src exists
6385  return const_cast<bm::word_t*>(src);
6386  }
6387  }
6388  }
6389  return ret;
6390 }
6391 
6392 
6393 /*!
6394  \brief Plain bitblock XOR operation.
6395  Function does not analyse availability of source and destination blocks.
6396 
6397  \param dst - destination block.
6398  \param src - source block.
6399 
6400  @ingroup bitfunc
6401 */
6402 inline
6404  const bm::word_t* BMRESTRICT src)
6405 {
6406  BM_ASSERT(dst);
6407  BM_ASSERT(src);
6408  BM_ASSERT(dst != src);
6409 
6410 #ifdef BMVECTOPT
6411  bm::id64_t acc = VECT_XOR_BLOCK(dst, src);
6412 #else
6413  unsigned arr_sz = bm::set_block_size / 2;
6414 
6417 
6418  bm::id64_t acc = 0;
6419  for (unsigned i = 0; i < arr_sz; i+=4)
6420  {
6421  acc |= dst_u->w64[i] ^= src_u->w64[i];
6422  acc |= dst_u->w64[i+1] ^= src_u->w64[i+1];
6423  acc |= dst_u->w64[i+2] ^= src_u->w64[i+2];
6424  acc |= dst_u->w64[i+3] ^= src_u->w64[i+3];
6425  }
6426 #endif
6427  return acc;
6428 }
6429 
6430 /*!
6431  \brief bitblock AND NOT with constant ~0 mask operation.
6432 
6433  \param dst - destination block.
6434  \param src - source block.
6435 
6436  @ingroup bitfunc
6437 */
6438 inline
6440  const bm::word_t* BMRESTRICT src)
6441 {
6442  const bm::word_t* BMRESTRICT src_end = src + bm::set_block_size;
6443 #ifdef BMVECTOPT
6444  VECT_ANDNOT_ARR_2_MASK(dst, src, src_end, ~0u);
6445 #else
6446  bm::wordop_t* dst_ptr = (wordop_t*)dst;
6447  const bm::wordop_t* wrd_ptr = (wordop_t*) src;
6448  const bm::wordop_t* wrd_end = (wordop_t*) src_end;
6449 
6450  do
6451  {
6452  dst_ptr[0] = bm::all_bits_mask & ~wrd_ptr[0];
6453  dst_ptr[1] = bm::all_bits_mask & ~wrd_ptr[1];
6454  dst_ptr[2] = bm::all_bits_mask & ~wrd_ptr[2];
6455  dst_ptr[3] = bm::all_bits_mask & ~wrd_ptr[3];
6456  dst_ptr+=4; wrd_ptr+=4;
6457  } while (wrd_ptr < wrd_end);
6458 #endif
6459 }
6460 
6461 /*!
6462  \brief bitblock XOR operation.
6463 
6464  \param dst - destination block.
6465  \param src - source block.
6466 
6467  \returns pointer on destination block.
6468  If returned value equal to src means that block mutation requested.
6469  NULL is valid return value.
6470 
6471  @ingroup bitfunc
6472 */
6473 inline
6475  const bm::word_t* BMRESTRICT src)
6476 {
6477  BM_ASSERT(dst || src);
6478  if (src == dst) return 0; // XOR rule
6479 
6480  bm::word_t* ret = dst;
6481 
6482  if (IS_VALID_ADDR(dst)) // The destination block already exists
6483  {
6484  if (!src) return dst;
6485 
6486  bit_block_xor(dst, src);
6487  }
6488  else // The destination block does not exist yet
6489  {
6490  if (!src) return dst; // 1 xor 0 = 1
6491 
6492  // Here we have two cases:
6493  // if dest block is full or zero if zero we need to copy the source
6494  // otherwise XOR loop against 0xFF...
6495  //BM_ASSERT(dst == 0);
6496  return const_cast<bm::word_t*>(src); // src is the final result
6497  }
6498  return ret;
6499 }
6500 
6501 /*!
6502  \brief Performs bitblock XOR operation and calculates bitcount of the result.
6503 
6504  \param src1 - bit block start ptr
6505  \param src1_end - bit block end ptr
6506  \param src2 - second bit block
6507 
6508  \returns bitcount value
6509 
6510  @ingroup bitfunc
6511 */
6512 inline
6514  const bm::word_t* BMRESTRICT src2)
6515 {
6516  if (src1 == src2)
6517  return 0;
6518 
6519  if (IS_EMPTY_BLOCK(src1) || IS_EMPTY_BLOCK(src2))
6520  {
6521  const bm::word_t* block = IS_EMPTY_BLOCK(src1) ? src2 : src1;
6522  if (IS_FULL_BLOCK(block))
6523  return bm::gap_max_bits;
6524  return bm::bit_block_count(block);
6525  }
6526  if (src1 == FULL_BLOCK_FAKE_ADDR)
6527  src1 = FULL_BLOCK_REAL_ADDR;
6528  if (src2 == FULL_BLOCK_FAKE_ADDR)
6529  src2 = FULL_BLOCK_REAL_ADDR;
6530 
6531  return bit_block_xor_count(src1, src2);
6532 }
6533 
6534 /*!
6535  \brief Performs bitblock XOR operation test.
6536 
6537  \param src1 - bit block start ptr
6538  \param src2 - second bit block ptr
6539 
6540  \returns non zero value if there are bits
6541 
6542  @ingroup bitfunc
6543 */
6544 inline
6546  const bm::word_t* BMRESTRICT src2)
6547 {
6548  if (src1 == src2)
6549  return 0;
6550  if (IS_EMPTY_BLOCK(src1) || IS_EMPTY_BLOCK(src2))
6551  {
6552  const bm::word_t* block = IS_EMPTY_BLOCK(src1) ? src2 : src1;
6553  return !bit_is_all_zero(block);
6554  }
6555  return bm::bit_block_xor_any(src1, src2);
6556 }
6557 
6558 
6559 
6560 /**
6561  \brief Inspects block for full zero words
6562 
6563  \param blk - bit block pointer
6564  \param data_size - data size
6565 
6566  \return size of all non-zero words
6567 
6568  @ingroup bitfunc
6569 */
6570 template<class T>
6571 unsigned bit_count_nonzero_size(const T* blk, unsigned data_size)
6572 {
6573  BM_ASSERT(blk && data_size);
6574  unsigned count = 0;
6575  const T* blk_end = blk + data_size - 2;
6576  do
6577  {
6578  if (*blk == 0)
6579  {
6580  // scan fwd to find 0 island length
6581  const T* blk_j = blk + 1;
6582  for (; blk_j < blk_end; ++blk_j)
6583  {
6584  if (*blk_j != 0)
6585  break;
6586  }
6587  blk = blk_j-1;
6588  count += (unsigned)sizeof(gap_word_t);
6589  }
6590  else
6591  {
6592  // scan fwd to find non-0 island length
6593  const T* blk_j = blk + 1;
6594  for ( ; blk_j < blk_end; ++blk_j)
6595  {
6596  if (*blk_j == 0)
6597  {
6598  // look ahead to identify and ignore short 0-run
6599  if (blk_j[1] | blk_j[2])
6600  {
6601  // skip zero word
6602  ++blk_j;
6603  continue;
6604  }
6605  break;
6606  }
6607  }
6608  count += unsigned(sizeof(gap_word_t));
6609  // count all bit-words now
6610  count += unsigned(blk_j - blk) * unsigned(sizeof(T));
6611  blk = blk_j;
6612  }
6613  ++blk;
6614  }
6615  while(blk < blk_end);
6616  return count + unsigned(2 * sizeof(T));
6617 }
6618 
6619 
6620 /**
6621  \brief Searches for the next 1 bit in the BIT block
6622  \param block - BIT buffer
6623  \param nbit - bit index to start checking from
6624  \param pos - [out] found value
6625 
6626  \return 0 if not found
6627 
6628  @ingroup bitfunc
6629 */
6630 inline
6631 unsigned bit_block_find(const bm::word_t* block, unsigned nbit, unsigned* pos)
6632 {
6633  BM_ASSERT(block);
6634  BM_ASSERT(pos);
6635 
6636  unsigned nword = unsigned(nbit >> bm::set_word_shift);
6637  unsigned bit_pos = (nbit & bm::set_word_mask);
6638 
6639  bm::word_t w = block[nword];
6640  w &= (1u << bit_pos);
6641  if (w)
6642  {
6643  *pos = nbit;
6644  return 1;
6645  }
6646  w = block[nword] >> bit_pos;
6647  w <<= bit_pos; // clear the trailing bits
6648  if (w)
6649  {
6650  bit_pos = bm::bit_scan_forward32(w); // trailing zeros
6651  *pos = unsigned(bit_pos + (nword * 8u * sizeof(bm::word_t)));
6652  return 1;
6653  }
6654 
6655  for (unsigned i = nword+1; i < bm::set_block_size; ++i)
6656  {
6657  w = block[i];
6658  if (w)
6659  {
6660  bit_pos = bm::bit_scan_forward32(w); // trailing zeros
6661  *pos = unsigned(bit_pos + (i * 8u * sizeof(bm::word_t)));
6662  return w;
6663  }
6664  } // for i
6665  return 0u;
6666 }
6667 
6668 
6669 /*!
6670  \brief BIT block find the last set bit (backward search)
6671 
6672  \param block - bit block buffer pointer
6673  \param last - index of the last 1 bit (out)
6674  \return 0 is not found
6675 
6676  @ingroup bitfunc
6677 */
6678 inline
6679 unsigned bit_find_last(const bm::word_t* block, unsigned* last)
6680 {
6681  BM_ASSERT(block);
6682  BM_ASSERT(last);
6683 
6684  // TODO: SIMD version
6685 
6686  for (unsigned i = bm::set_block_size-1; true; --i)
6687  {
6688  bm::word_t w = block[i];
6689  if (w)
6690  {
6691  unsigned idx = bm::bit_scan_reverse(w);
6692  *last = unsigned(idx + (i * 8u * sizeof(bm::word_t)));
6693  return w;
6694  }
6695  if (i == 0)
6696  break;
6697  } // for i
6698  return 0u;
6699 }
6700 
6701 /*!
6702  \brief BIT block find the first set bit
6703 
6704  \param block - bit block buffer pointer
6705  \param first - index of the first 1 bit (out)
6706  \return 0 if not found
6707 
6708  @ingroup bitfunc
6709 */
6710 inline
6711 unsigned bit_find_first(const bm::word_t* block, unsigned* first)
6712 {
6713  BM_ASSERT(block);
6714  BM_ASSERT(first);
6715 
6716  // TODO: SIMD version
6717 
6718  for (unsigned i = 0; i < bm::set_block_size; ++i)
6719  {
6720  bm::word_t w = block[i];
6721  if (w)
6722  {
6723  unsigned idx = bit_scan_forward32(w); // trailing zeros
6724  *first = unsigned(idx + (i * 8u * sizeof(bm::word_t)));
6725  return w;
6726  }
6727  } // for i
6728  return 0u;
6729 }
6730 
6731 /*!
6732  \brief BIT block find the first set bit
6733 
6734  \param block - bit block buffer pointer
6735  \param first - index of the first 1 bit (out)
6736  \param digest - known digest of dst block
6737 
6738  \return 0 if not found
6739 
6740  @ingroup bitfunc
6741 */
6742 inline
6743 unsigned bit_find_first(const bm::word_t* block,
6744  unsigned* first,
6745  bm::id64_t digest)
6746 {
6747  BM_ASSERT(block);
6748  BM_ASSERT(first);
6749  BM_ASSERT(digest);
6750 
6751  bm::id64_t t = bm::bmi_blsi_u64(digest); // d & -d;
6752 
6753  unsigned wave = bm::word_bitcount64(t - 1);
6754  unsigned off = wave * bm::set_block_digest_wave_size;
6755  for (unsigned i = off; i < bm::set_block_size; ++i)
6756  {
6757  bm::word_t w = block[i];
6758  if (w)
6759  {
6760  unsigned idx = bit_scan_forward32(w); // trailing zeros
6761  *first = unsigned(idx + (i * 8u * sizeof(bm::word_t)));
6762  return w;
6763  }
6764  } // for i
6765  return 0u;
6766 }
6767 
6768 
6769 /*!
6770  \brief BIT block find the first set bit if only 1 bit is set
6771 
6772  \param block - bit block buffer pointer
6773  \param first - index of the first 1 bit (out)
6774  \param digest - known digest of dst block
6775 
6776  \return 0 if not found
6777 
6778  @ingroup bitfunc
6779 */
6780 inline
6782  unsigned* first,
6783  bm::id64_t digest)
6784 {
6785  BM_ASSERT(block);
6786  BM_ASSERT(first);
6787 
6788  unsigned bc = bm::word_bitcount64(digest);
6789  if (bc != 1)
6790  return false;
6791 
6792  bool found = false;
6793  bm::id64_t t = bm::bmi_blsi_u64(digest); // d & -d;
6794 
6795  unsigned wave = bm::word_bitcount64(t - 1);
6796  unsigned off = wave * bm::set_block_digest_wave_size;
6797  unsigned i;
6798  for (i = off; i < off + bm::set_block_digest_wave_size; ++i)
6799  {
6800  bm::word_t w = block[i];
6801  if (w)
6802  {
6803  bc = bm::word_bitcount(w);
6804  if (bc != 1)
6805  return false;
6806 
6807  unsigned idx = bit_scan_forward32(w); // trailing zeros
6808  *first = unsigned(idx + (i * 8u * sizeof(bm::word_t)));
6809  found = true;
6810  break;
6811  }
6812  } // for i
6813 
6814  // check if all other bits are zero
6815  for (++i; i < off + bm::set_block_digest_wave_size; ++i)
6816  {
6817  if (block[i])
6818  return false;
6819  }
6820  return found;
6821 }
6822 
6823 
6824 /*!
6825  \brief BIT block find position for the rank
6826 
6827  \param block - bit block buffer pointer
6828  \param rank - rank to find (must be > 0)
6829  \param nbit_from - start bit position in block
6830  \param nbit_pos - (out)found position
6831 
6832  \return 0 if position with rank was found, or
6833  the remaining rank (rank - population count)
6834 
6835  @ingroup bitfunc
6836 */
6837 template<typename SIZE_TYPE>
6838 SIZE_TYPE bit_find_rank(const bm::word_t* const block,
6839  SIZE_TYPE rank,
6840  unsigned nbit_from,
6841  unsigned& nbit_pos)
6842 {
6843  BM_ASSERT(block);
6844  BM_ASSERT(rank);
6845 
6846  unsigned nword = nbit_from >> bm::set_word_shift;
6847  BM_ASSERT(nword < bm::set_block_size);
6848 
6849  unsigned pos = nbit_from;
6850  bm::id_t nbit = (nbit_from & bm::set_word_mask);
6851 
6852  if (nbit)
6853  {
6854  bm::id_t w = block[nword];
6855  w >>= nbit;
6856  unsigned bc = bm::word_bitcount(w);
6857  if (bc < rank) // skip this
6858  {
6859  rank -= bc; pos += unsigned(32u - nbit);
6860  ++nword;
6861  }
6862  else // target word
6863  {
6864  unsigned idx = bm::word_select64(w, unsigned(rank));
6865  nbit_pos = pos + idx;
6866  return 0;
6867  }
6868  }
6869 
6870  if (bm::conditional<sizeof(void*) == 8>::test()) // 64-bit fast scan
6871  {
6872  for (; nword < bm::set_block_size-1; nword+=2)
6873  {
6874  bm::id64_t w = (bm::id64_t(block[nword+1]) << 32) | bm::id64_t(block[nword]);
6875  bm::id_t bc = bm::word_bitcount64(w);
6876  if (bc >= rank) // target
6877  {
6878