BitMagic-C++
bmfunc.h
Go to the documentation of this file.
1 #ifndef BMFUNC__H__INCLUDED__
2 #define BMFUNC__H__INCLUDED__
3 /*
4 Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 For more information please visit: http://bitmagic.io
19 */
20 
21 /*! \file bmfunc.h
22  \brief Bit manipulation primitives (internal)
23 */
24 
25 #include <memory.h>
26 
27 #include "bmdef.h"
28 #include "bmutil.h"
29 
30 
31 #ifdef _MSC_VER
32 # pragma warning( disable: 4146 )
33 #endif
34 
35 namespace bm
36 {
37 
38 
39 inline
41  bm::word_t left,
42  bm::word_t right) BMNOEXCEPT;
43 
44 inline
46  bm::word_t left,
47  bm::word_t right) BMNOEXCEPT;
48 
49 /*!
50  @brief Structure with statistical information about memory
51  allocation footprint, serialization projection, number of vectors
52  @ingroup bvector
53 */
55 {
56  size_t bit_blocks; ///< Number of bit blocks
57  size_t gap_blocks; ///< Number of GAP blocks
58  size_t ptr_sub_blocks; ///< Number of sub-blocks
59  size_t bv_count; ///< Number of bit-vectors
60  size_t max_serialize_mem; ///< estimated maximum memory for serialization
61  size_t memory_used; ///< memory usage for all blocks and service tables
62  size_t gap_cap_overhead; ///< gap memory overhead between length and capacity
63  gap_word_t gap_levels[bm::gap_levels]; ///< GAP block lengths in the bvect
64  unsigned long long gaps_by_level[bm::gap_levels]; ///< number of GAP blocks at each level
65 
66  /// cound bit block
68  {
69  ++bit_blocks;
70  size_t mem_used = sizeof(bm::word_t) * bm::set_block_size;
71  memory_used += mem_used;
72  max_serialize_mem += mem_used;
73  }
74 
75  /// count gap block
76  void add_gap_block(unsigned capacity, unsigned length) BMNOEXCEPT
77  {
78  ++gap_blocks;
79  size_t mem_used = (capacity * sizeof(gap_word_t));
80  memory_used += mem_used;
81  max_serialize_mem += (unsigned)(length * sizeof(gap_word_t));
82  BM_ASSERT(length <= capacity);
83  gap_cap_overhead += (capacity - length) * sizeof(gap_word_t);
84  for (unsigned i = 0; i < bm::gap_levels; ++i)
85  {
86  if (capacity == gap_levels[i])
87  {
88  gaps_by_level[i]++;
89  return;
90  }
91  }
92  BM_ASSERT(0);
93  }
94 
95  /// Reset statisctics
97  {
100  for (unsigned i = 0; i < bm::gap_levels; ++i)
101  gaps_by_level[i] = 0ull;
102  }
103 
104  /// Sum data from another sttructure
105  void add(const bv_statistics& st) BMNOEXCEPT
106  {
107  bit_blocks += st.bit_blocks;
108  gap_blocks += st.gap_blocks;
109  ptr_sub_blocks += st.ptr_sub_blocks;
110  bv_count += st.bv_count;
111  max_serialize_mem += st.max_serialize_mem + 8;
112  memory_used += st.memory_used;
113  gap_cap_overhead += st.gap_cap_overhead;
114  }
115 };
116 
117 /**
118  @brief Pair type
119 */
120 template<typename First, typename Second>
121 struct pair
122 {
123  First first;
124  Second second;
125 
126  pair(First f, Second s) : first(f), second(s) {}
127 };
128 
129 /**
130  \brief bit-decode cache structure
131 */
133 {
134  unsigned short bits[65]; //< decoded bits
135  unsigned bcnt; //< length of bits array
136  bm::id64_t cvalue; //< cache decoded value
137 
139 };
140 
141 
142 /**
143  \brief Recalc linear bvector block index into 2D matrix coordinates
144  \internal
145 */
146 template<typename BI_TYPE>
148 void get_block_coord(BI_TYPE nb, unsigned& i, unsigned& j) BMNOEXCEPT
149 {
150  i = unsigned(nb >> bm::set_array_shift); // top block address
151  j = unsigned(nb & bm::set_array_mask); // address in sub-block
152 }
153 
154 /**
155  Compute bit address of the first bit in a superblock
156  \internal
157 */
158 template<typename RTYPE>
160 {
161  return RTYPE(i) * bm::set_sub_total_bits;
162 }
163 
164 /**
165  Compute bit address of the first bit in a block
166  \internal
167 */
168 template<typename RTYPE>
169 BMFORCEINLINE RTYPE get_block_start(unsigned i, unsigned j) BMNOEXCEPT
170 {
171  RTYPE idx = bm::get_super_block_start<RTYPE>(i);
172  idx += (j) * bm::gap_max_bits;
173  return idx;
174 }
175 
176 
177 /*!
178  @defgroup gapfunc GAP functions
179  GAP functions implement different opereations on GAP compressed blocks (internals)
180  and serve as a minimal building blocks.
181  \internal
182  @ingroup bvector
183  */
184 
185 /*!
186  @defgroup bitfunc BIT functions
187  Bit functions implement different opereations on bit blocks (internals)
188  and serve as a minimal building blocks.
189  \internal
190  @ingroup bvector
191  */
192 
193 
194 /*!
195  Returns bit count
196  @ingroup bitfunc
197 */
200 {
201 #if defined(BMSSE42OPT) || defined(BMAVX2OPT)
202  return bm::id_t(_mm_popcnt_u32(w));
203 #else
204  #if defined(BM_USE_GCC_BUILD)
205  return (bm::id_t)__builtin_popcount(w);
206  #else
207  return
208  bm::bit_count_table<true>::_count[(unsigned char)(w)] +
209  bm::bit_count_table<true>::_count[(unsigned char)((w) >> 8)] +
210  bm::bit_count_table<true>::_count[(unsigned char)((w) >> 16)] +
211  bm::bit_count_table<true>::_count[(unsigned char)((w) >> 24)];
212  #endif
213 #endif
214 }
215 
216 inline
217 int parallel_popcnt_32(unsigned int n) BMNOEXCEPT
218 {
219  unsigned int tmp;
220 
221  tmp = n - ((n >> 1) & 033333333333)
222  - ((n >> 2) & 011111111111);
223  return ((tmp + (tmp >> 3)) & 030707070707) % 63;
224 }
225 
226 
227 /*!
228  Function calculates number of 1 bits in 64-bit word.
229  @ingroup bitfunc
230 */
233 {
234 #if defined(BMSSE42OPT) || defined(BMAVX2OPT)
235 #if defined(BM64_SSE4) || defined(BM64_AVX2) || defined(BM64_AVX512)
236  return unsigned(_mm_popcnt_u64(x));
237 #else
238  return _mm_popcnt_u32(x >> 32) + _mm_popcnt_u32((unsigned)x);
239 #endif
240 #else
241  #if defined(BM_USE_GCC_BUILD)
242  return (unsigned)__builtin_popcountll(x);
243  #else
244  x = x - ((x >> 1) & 0x5555555555555555);
245  x = (x & 0x3333333333333333) + ((x >> 2) & 0x3333333333333333);
246  x = (x + (x >> 4)) & 0x0F0F0F0F0F0F0F0F;
247  x = x + (x >> 8);
248  x = x + (x >> 16);
249  x = x + (x >> 32);
250  return x & 0xFF;
251  #endif
252 #endif
253 }
254 
255 inline
258 {
259  const bm::id64_t m1 = 0x5555555555555555U;
260  const bm::id64_t m2 = 0x3333333333333333U;
261  const bm::id64_t m3 = 0x0F0F0F0F0F0F0F0FU;
262  const bm::id64_t m4 = 0x000000FF000000FFU;
263 
264  x = x - ((x >> 1) & m1);
265  y = y - ((y >> 1) & m1);
266  u = u - ((u >> 1) & m1);
267  v = v - ((v >> 1) & m1);
268  x = (x & m2) + ((x >> 2) & m2);
269  y = (y & m2) + ((y >> 2) & m2);
270  u = (u & m2) + ((u >> 2) & m2);
271  v = (v & m2) + ((v >> 2) & m2);
272  x = x + y;
273  u = u + v;
274  x = (x & m3) + ((x >> 4) & m3);
275  u = (u & m3) + ((u >> 4) & m3);
276  x = x + u;
277  x = x + (x >> 8);
278  x = x + (x >> 16);
279  x = x & m4;
280  x = x + (x >> 32);
281  return x & 0x000001FFU;
282 }
283 
284 
285 // --------------------------------------------------------------
286 // Functions for bit-scanning
287 // --------------------------------------------------------------
288 
289 /*!
290  \brief Templated algorithm to unpacks octet based word into list of ON bit indexes
291  \param w - value
292  \param func - bit functor
293 
294  @ingroup bitfunc
295 */
296 template<typename T, typename F>
297 void bit_for_each_4(T w, F& func)
298 {
299  for (unsigned sub_octet = 0; w != 0; w >>= 4, sub_octet += 4)
300  {
301  switch (w & 15) // 1111
302  {
303  case 0: // 0000
304  break;
305  case 1: // 0001
306  func(sub_octet);
307  break;
308  case 2: // 0010
309  func(sub_octet + 1);
310  break;
311  case 3: // 0011
312  func(sub_octet, sub_octet + 1);
313  break;
314  case 4: // 0100
315  func(sub_octet + 2);
316  break;
317  case 5: // 0101
318  func(sub_octet, sub_octet + 2);
319  break;
320  case 6: // 0110
321  func(sub_octet + 1, sub_octet + 2);
322  break;
323  case 7: // 0111
324  func(sub_octet, sub_octet + 1, sub_octet + 2);
325  break;
326  case 8: // 1000
327  func(sub_octet + 3);
328  break;
329  case 9: // 1001
330  func(sub_octet, sub_octet + 3);
331  break;
332  case 10: // 1010
333  func(sub_octet + 1, sub_octet + 3);
334  break;
335  case 11: // 1011
336  func(sub_octet, sub_octet + 1, sub_octet + 3);
337  break;
338  case 12: // 1100
339  func(sub_octet + 2, sub_octet + 3);
340  break;
341  case 13: // 1101
342  func(sub_octet, sub_octet + 2, sub_octet + 3);
343  break;
344  case 14: // 1110
345  func(sub_octet + 1, sub_octet + 2, sub_octet + 3);
346  break;
347  case 15: // 1111
348  func(sub_octet, sub_octet + 1, sub_octet + 2, sub_octet + 3);
349  break;
350  default:
351  BM_ASSERT(0);
352  break;
353  }
354 
355  } // for
356 }
357 
358 
359 /*!
360  \brief Templated algorithm to unpacks word into list of ON bit indexes
361  \param w - value
362  \param func - bit functor
363 
364  @ingroup bitfunc
365 */
366 template<typename T, typename F>
367 void bit_for_each(T w, F& func)
368 {
369  // Note: 4-bit table method works slower than plain check approach
370  for (unsigned octet = 0; w != 0; w >>= 8, octet += 8)
371  {
372  if (w & 1) func(octet + 0);
373  if (w & 2) func(octet + 1);
374  if (w & 4) func(octet + 2);
375  if (w & 8) func(octet + 3);
376  if (w & 16) func(octet + 4);
377  if (w & 32) func(octet + 5);
378  if (w & 64) func(octet + 6);
379  if (w & 128) func(octet + 7);
380 
381  } // for
382 }
383 
384 /*! @brief Adaptor to copy 1 bits to array
385  @internal
386 */
387 template<typename B>
389 {
390 public:
391  copy_to_array_functor(B* bits): bp_(bits)
392  {}
393 
394  B* ptr() { return bp_; }
395 
396  void operator()(unsigned bit_idx) BMNOEXCEPT { *bp_++ = (B)bit_idx; }
397 
398  void operator()(unsigned bit_idx0,
399  unsigned bit_idx1) BMNOEXCEPT
400  {
401  bp_[0] = (B)bit_idx0; bp_[1] = (B)bit_idx1;
402  bp_+=2;
403  }
404 
405  void operator()(unsigned bit_idx0,
406  unsigned bit_idx1,
407  unsigned bit_idx2) BMNOEXCEPT
408  {
409  bp_[0] = (B)bit_idx0; bp_[1] = (B)bit_idx1; bp_[2] = (B)bit_idx2;
410  bp_+=3;
411  }
412 
413  void operator()(unsigned bit_idx0,
414  unsigned bit_idx1,
415  unsigned bit_idx2,
416  unsigned bit_idx3) BMNOEXCEPT
417  {
418  bp_[0] = (B)bit_idx0; bp_[1] = (B)bit_idx1;
419  bp_[2] = (B)bit_idx2; bp_[3] = (B)bit_idx3;
420  bp_+=4;
421  }
422 
423 private:
425  copy_to_array_functor& operator=(const copy_to_array_functor&);
426 private:
427  B* bp_;
428 };
429 
430 
431 /*!
432  \brief Unpacks word into list of ON bit indexes
433  \param w - value
434  \param bits - pointer on the result array
435  \return number of bits in the list
436 
437  @ingroup bitfunc
438 */
439 template<typename T,typename B>
440 unsigned bit_list(T w, B* bits) BMNOEXCEPT
441 {
442  copy_to_array_functor<B> func(bits);
443  bit_for_each(w, func);
444  return (unsigned)(func.ptr() - bits);
445 }
446 
447 
448 
449 /*!
450  \brief Unpacks word into list of ON bit indexes (quad-bit based)
451  \param w - value
452  \param bits - pointer on the result array
453  \return number of bits in the list
454 
455  @ingroup bitfunc
456 */
457 template<typename T,typename B>
458 unsigned bit_list_4(T w, B* bits) BMNOEXCEPT
459 {
460  copy_to_array_functor<B> func(bits);
461  bit_for_each_4(w, func);
462  return (unsigned)(func.ptr() - bits);
463 }
464 
465 /*!
466  \brief Unpacks word into list of ON bit indexes using popcnt method
467  \param w - value
468  \param bits - pointer on the result array
469  \param offs - value to add to bit position (programmed shift)
470  \return number of bits in the list
471 
472  @ingroup bitfunc
473  @internal
474 */
475 template<typename B>
476 unsigned short
477 bitscan_popcnt(bm::id_t w, B* bits, unsigned short offs) BMNOEXCEPT
478 {
479  unsigned pos = 0;
480  while (w)
481  {
482  bm::id_t t = w & -w;
483  bits[pos++] = (B)(bm::word_bitcount(t - 1) + offs);
484  w &= w - 1;
485  }
486  return (unsigned short)pos;
487 }
488 
489 /*!
490  \brief Unpacks word into list of ON bit indexes using popcnt method
491  \param w - value
492  \param bits - pointer on the result array
493  \return number of bits in the list
494 
495  @ingroup bitfunc
496  @internal
497 */
498 template<typename B>
499 unsigned short bitscan_popcnt(bm::id_t w, B* bits) BMNOEXCEPT
500 {
501  unsigned pos = 0;
502  while (w)
503  {
504  bm::id_t t = w & -w;
505  bits[pos++] = (B)(bm::word_bitcount(t - 1));
506  w &= w - 1;
507  }
508  return (unsigned short)pos;
509 }
510 
511 
512 /*!
513  \brief Unpacks 64-bit word into list of ON bit indexes using popcnt method
514  \param w - value
515  \param bits - pointer on the result array
516  \return number of bits in the list
517  @ingroup bitfunc
518 */
519 template<typename B>
520 unsigned short bitscan_popcnt64(bm::id64_t w, B* bits) BMNOEXCEPT
521 {
522  unsigned short pos = 0;
523  while (w)
524  {
525  bm::id64_t t = bmi_blsi_u64(w); // w & -w;
526  bits[pos++] = (B) bm::word_bitcount64(t - 1);
527  w = bmi_bslr_u64(w); // w &= w - 1;
528  }
529  return pos;
530 }
531 
532 /*!
533  \brief Unpacks 64-bit word into list of ON bit indexes using popcnt method
534  \param w - value
535  \param bits - pointer on the result array
536  \param offs - value to add to bit position (programmed shift)
537  \return number of bits in the list
538  @ingroup bitfunc
539 */
540 template<typename B>
541 unsigned short
542 bitscan_popcnt64(bm::id64_t w, B* bits, unsigned short offs) BMNOEXCEPT
543 {
544  unsigned short pos = 0;
545  while (w)
546  {
547  bm::id64_t t = bmi_blsi_u64(w); // w & -w;
548  bits[pos++] = B(bm::word_bitcount64(t - 1) + offs);
549  w = bmi_bslr_u64(w); // w &= w - 1;
550  }
551  return pos;
552 }
553 
554 
555 template<typename V, typename B>
556 unsigned short bitscan(V w, B* bits) BMNOEXCEPT
557 {
558  if (bm::conditional<sizeof(V) == 8>::test())
559  return bm::bitscan_popcnt64(w, bits);
560  else
561  return bm::bitscan_popcnt((bm::word_t)w, bits);
562 }
563 
564 // --------------------------------------------------------------
565 // Functions for select
566 // --------------------------------------------------------------
567 
568 /**
569  \brief word find index of the rank-th bit set by bit-testing
570  \param w - 64-bit work to search
571  \param rank - rank to select (should be > 0)
572 
573  \return selected value (inxed of bit set)
574 */
575 inline
576 unsigned word_select64_linear(bm::id64_t w, unsigned rank) BMNOEXCEPT
577 {
578  BM_ASSERT(w);
579  BM_ASSERT(rank);
580 
581  for (unsigned count = 0; w; w >>=1ull, ++count)
582  {
583  rank -= unsigned(w & 1ull);
584  if (!rank)
585  return count;
586  }
587  BM_ASSERT(0); // shoud not be here if rank is achievable
588  return ~0u;
589 }
590 
591 /**
592  \brief word find index of the rank-th bit set by bit-testing
593  \param w - 64-bit work to search
594  \param rank - rank to select (should be > 0)
595 
596  \return selected value (inxed of bit set)
597 */
598 inline
599 unsigned word_select64_bitscan(bm::id64_t w, unsigned rank) BMNOEXCEPT
600 {
601  BM_ASSERT(w);
602  BM_ASSERT(rank);
603  BM_ASSERT(rank <= bm::word_bitcount64(w));
604 
605  do
606  {
607  --rank;
608  if (!rank)
609  break;
610  w &= w - 1;
611  } while (1);
612 
613  bm::id64_t t = w & -w;
614  unsigned count = bm::word_bitcount64(t - 1);
615  return count;
616 }
617 
618 /**
619  \brief word find index of the rank-th bit set by bit-testing
620  \param w - 64-bit work to search
621  \param rank - rank to select (should be > 0)
622 
623  \return selected value (inxed of bit set)
624 */
625 inline
626 unsigned word_select64(bm::id64_t w, unsigned rank) BMNOEXCEPT
627 {
628 #if defined(BMI2_SELECT64)
629  return BMI2_SELECT64(w, rank);
630 #else
631  #if defined(BMI1_SELECT64)
632  return BMI2_SELECT64(w, rank);
633  #else
634  return bm::word_select64_bitscan(w, rank);
635  #endif
636 #endif
637 }
638 
639 // --------------------------------------------------------------
640 // Functions for bit-block digest calculation
641 // --------------------------------------------------------------
642 
643 /*!
644  \brief Compute digest mask for word address in block
645 
646  @return digest bit-mask
647 
648  @ingroup bitfunc
649  @internal
650 */
653 {
654  bm::id64_t mask(1ull);
655  return mask << (w_idx / bm::set_block_digest_wave_size);
656 }
657 
658 /**
659  \brief Compute digest mask for [from..to] positions
660  \param from - range from (in bit-block coordinates)
661  \param to - range to (in bit-block coordinates)
662 
663  @ingroup bitfunc
664  @internal
665 */
667 bm::id64_t digest_mask(unsigned from, unsigned to) BMNOEXCEPT
668 {
669  BM_ASSERT(from <= to);
670 
671  bm::id64_t digest_from = from >> bm::set_block_digest_pos_shift;
672  bm::id64_t digest_to = to >> bm::set_block_digest_pos_shift;;
673  const bm::id64_t maskFF(~0ull);
674  return ((maskFF) >> (63 - (digest_to - digest_from))) << digest_from;
675 }
676 
677 /*!
678  \brief check if all digest bits for the range [from..to] are 0
679 
680  \param digest - current digest
681  \param bitpos_from - range from (in bit-block coordinates)
682  \param bitpos_to - range to (in bit-block coordinates)
683 
684  @return true if all range is zero
685 
686  @ingroup bitfunc
687  @internal
688 */
689 inline
691  unsigned bitpos_from, unsigned bitpos_to) BMNOEXCEPT
692 {
693  bm::id64_t mask = bm::digest_mask(bitpos_from, bitpos_to);
694  return !(digest & mask);
695 }
696 
697 /*!
698  \brief Init block with 000111000 pattren based on digest
699  \param block - Bit block [out]
700  \param digest - digest (used for block initialization)
701 
702  @ingroup bitfunc
703  @internal
704 */
705 inline
707 {
708  unsigned off;
709  for (unsigned i = 0; i < 64; ++i)
710  {
712  bm::word_t mask = (digest & 1) ? ~0u : 0u;
713 #if defined(VECT_BLOCK_SET_DIGEST)
714  VECT_BLOCK_SET_DIGEST(&block[off], mask);
715 #else
716  for (unsigned j = 0; j < bm::set_block_digest_wave_size; j+=4)
717  {
718  block[off+j+0] = block[off+j+1] =
719  block[off+j+2] = block[off+j+3] = mask;
720  } // for j
721 #endif
722  digest >>= 1ull;
723  } // for
724 }
725 
726 /*!
727  \brief Compute digest for 64 non-zero areas
728  \param block - Bit block
729 
730  @return digest bit-mask (0 means all block is empty)
731 
732  @ingroup bitfunc
733  @internal
734 */
735 inline
737 {
738  bm::id64_t digest0 = 0;
739  unsigned off;
740 
741  for (unsigned i = 0; i < 64; ++i)
742  {
744  #if defined(VECT_IS_DIGEST_ZERO)
745  bool all_zero = VECT_IS_DIGEST_ZERO(&block[off]);
746  digest0 |= bm::id64_t(!all_zero) << i;
747  #else
748  const bm::id64_t mask(1ull);
749  for (unsigned j = 0; j < bm::set_block_digest_wave_size; j+=4)
750  {
751  bm::word_t w =
752  block[off+j+0] | block[off+j+1] | block[off+j+2] | block[off+j+3];
753  if (w)
754  {
755  digest0 |= (mask << i);
756  break;
757  }
758  } // for j
759  #endif
760 
761  } // for i
762  return digest0;
763 }
764 
765 /*!
766  \brief Compute digest for 64 non-zero areas based on existing digest
767  (function revalidates zero areas)
768  \param block - bit block
769  \param digest - start digest
770 
771  @return digest bit-mask (0 means all block is empty)
772 
773  @ingroup bitfunc
774  @internal
775 */
776 inline
779 {
780  const bm::id64_t mask(1ull);
781  bm::id64_t d = digest;
782 
783  while (d)
784  {
785  bm::id64_t t = bm::bmi_blsi_u64(d); // d & -d;
786 
787  unsigned wave = bm::word_bitcount64(t - 1);
788  unsigned off = wave * bm::set_block_digest_wave_size;
789 
790  #if defined(VECT_IS_DIGEST_ZERO)
791  bool all_zero = VECT_IS_DIGEST_ZERO(&block[off]);
792  digest &= all_zero ? ~(mask << wave) : digest;
793  #else
795  (const bm::bit_block_t::bunion_t*)(&block[off]);
796  bm::id64_t w64 = 0;
797  unsigned j = 0;
798  do
799  {
800  w64 |=
801  src_u->w64[j+0] | src_u->w64[j+1] | src_u->w64[j+2] | src_u->w64[j+3];
802  j += 4;
803  } while ((j < bm::set_block_digest_wave_size/2) & !w64);
804  digest &= w64 ? digest : ~(mask << wave);
805  #endif
806 
807  d = bm::bmi_bslr_u64(d); // d &= d - 1;
808  } // while
809 
810  BM_ASSERT(bm::calc_block_digest0(block) == digest);
811  return digest;
812 }
813 
814 // --------------------------------------------------------------
815 
816 
817 /// Returns true if set operation is constant (bitcount)
818 inline
820 {
821  return (int(op) >= int(set_COUNT));
822 }
823 
824 /**
825  Convert set operation to operation
826 */
827 inline
829 {
830  BM_ASSERT(op == set_AND ||
831  op == set_OR ||
832  op == set_SUB ||
833  op == set_XOR);
834  return (bm::operation) op;
835 }
836 
837 //---------------------------------------------------------------------
838 
839 /**
840  Structure carries pointer on bit block with all bits 1
841  @ingroup bitfunc
842  @internal
843 */
844 template<bool T> struct all_set
845 {
847  {
851 
853  {
854  ::memset(_p, 0xFF, sizeof(_p)); // set FULL BLOCK content (all 1s)
855  if (bm::conditional<sizeof(void*) == 8>::test())
856  {
857  const unsigned long long magic_mask = 0xFFFFfffeFFFFfffe;
858  ::memcpy(&_p_fullp, &magic_mask, sizeof(magic_mask));
859  for (unsigned i = 0; i < bm::set_sub_array_size; ++i)
860  _s[i] = reinterpret_cast<bm::word_t*>(magic_mask);
861  }
862  else
863  {
864  const unsigned magic_mask = 0xFFFFfffe;
865  ::memcpy(&_p_fullp, &magic_mask, sizeof(magic_mask));
866  for (unsigned i = 0; i < bm::set_sub_array_size; ++i)
867  _s[i] = reinterpret_cast<bm::word_t*>(magic_mask);
868  }
869  }
870  };
871 
872  // version with minimal branching, super-scalar friendly
873  //
874  inline
876  {
877  bm::id64_t type;
878  if (bm::conditional<sizeof(void*) == 8>::test())
879  {
880  bm::id64_t w = reinterpret_cast<unsigned long long>(bp);
881  type = (w & 3) | // FULL BLOCK or GAP
882  ((bp == _block._p) << 1);
883  type = type ? type : w;
884  }
885  else
886  {
887  unsigned w = reinterpret_cast<unsigned long>(bp);
888  type = (w & 3) | // FULL BLOCK or GAP
889  ((bp == _block._p) << 1);
890  type = type ? type : w;
891  }
892  return type;
893  }
894 
896  static bool is_full_block(const bm::word_t* bp) BMNOEXCEPT
897  { return (bp == _block._p || bp == _block._p_fullp); }
898 
901  { return (bp && !(bp == _block._p || bp == _block._p_fullp)); }
902 
904 };
905 
906 
907 template<bool T> typename all_set<T>::all_set_block all_set<T>::_block;
908 
909 /// XOR swap two scalar variables
910 template<typename W>
911 void xor_swap(W& x, W& y) BMNOEXCEPT
912 {
913  BM_ASSERT(&x != &y);
914  x ^= y;
915  y ^= x;
916  x ^= y;
917 }
918 
919 /*!
920  Fini not NULL position
921  @return index of not NULL pointer
922  @internal
923 */
924 template<typename N>
925 bool find_not_null_ptr(bm::word_t*** arr, N start, N size, N* pos) BMNOEXCEPT
926 {
927  BM_ASSERT(pos);
928 // BM_ASSERT(start < size);
929 //#if defined(BM64_AVX2) || defined(BM64_AVX512)
930 // TODO: optimization for SIMD based next ptr scan
931 #if 0
932  const unsigned unroll_factor = 4;
933  const unsigned len = (size - start);
934  const unsigned len_unr = len - (len % unroll_factor);
935  unsigned k;
936 
937  arr += start;
938  for (k = 0; k < len_unr; k+=unroll_factor)
939  {
940  if (!avx2_test_all_zero_wave(arr+k))
941  {
942  if (arr[k])
943  {
944  *pos = k + start;
945  return true;
946  }
947  if (arr[k+1])
948  {
949  *pos = k + start + 1;
950  return true;
951  }
952  if (arr[k+2])
953  {
954  *pos = k + start + 2;
955  return true;
956  }
957  if (arr[k+3])
958  {
959  *pos = k + start + 3;
960  return true;
961  }
962  }
963  } // for k
964 
965  for (; k < len; ++k)
966  {
967  if (arr[k])
968  {
969  *pos = k + start;
970  return true;
971  }
972  } // for k
973 #else
974  for (; start < size; ++start)
975  {
976  if (arr[start])
977  {
978  *pos = start;
979  return true;
980  }
981  } // for i
982 #endif
983  return false;
984 }
985 
986 
987 
988 
989 //---------------------------------------------------------------------
990 
991 /*!
992  \brief Lexicographical comparison of two words as bit strings (reference)
993  Auxiliary implementation for testing and reference purposes.
994  \param w1 - First word.
995  \param w2 - Second word.
996  \return <0 - less, =0 - equal, >0 - greater.
997 
998  @ingroup bitfunc
999 */
1000 template<typename T> int wordcmp0(T w1, T w2)
1001 {
1002  while (w1 != w2)
1003  {
1004  int res = (w1 & 1) - (w2 & 1);
1005  if (res != 0) return res;
1006  w1 >>= 1;
1007  w2 >>= 1;
1008  }
1009  return 0;
1010 }
1011 
1012 
1013 /*
1014 template<typename T> int wordcmp(T w1, T w2)
1015 {
1016  T diff = w1 ^ w2;
1017  return diff ? ((w1 & diff & (diff ^ (diff - 1)))? 1 : -1) : 0;
1018 }
1019 */
1020 /*!
1021  \brief Lexicographical comparison of two words as bit strings.
1022  Auxiliary implementation for testing and reference purposes.
1023  \param a - First word.
1024  \param b - Second word.
1025  \return <0 - less, =0 - equal, >0 - greater.
1026 
1027  @ingroup bitfunc
1028 */
1029 template<typename T> int wordcmp(T a, T b)
1030 {
1031  T diff = a ^ b;
1032  return diff? ( (a & diff & -diff)? 1 : -1 ) : 0;
1033 }
1034 
1035 
1036 // Low bit extraction
1037 // x & (x ^ (x-1))
1038 
1039 
1040 // ----------------------------------------------------------------------
1041 
1042 
1043 /*! @brief Returns "true" if all bits in the block are 0
1044  @ingroup bitfunc
1045 */
1046 inline
1048 {
1049 #if defined(VECT_IS_ZERO_BLOCK)
1050  return VECT_IS_ZERO_BLOCK(start);
1051 #else
1052  const bm::wordop_t* BMRESTRICT blk = (bm::wordop_t*) (start);
1053  const bm::wordop_t* BMRESTRICT blk_end = (bm::wordop_t*)(start + bm::set_block_size);
1054  do
1055  {
1056  if (blk[0] | blk[1] | blk[2] | blk[3])
1057  return false;
1058  blk += 4;
1059  } while (blk < blk_end);
1060  return true;
1061 #endif
1062 }
1063 
1064 // ----------------------------------------------------------------------
1065 
1066 /*!
1067  \brief Checks if GAP block is all-zero.
1068  \param buf - GAP buffer pointer.
1069  \returns true if all-zero.
1070 
1071  @ingroup gapfunc
1072 */
1075 {
1076  // (almost) branchless variant:
1077  return (!(*buf & 1u)) & (!(bm::gap_max_bits - 1 - buf[1]));
1078 }
1079 
1080 /*!
1081  \brief Checks if GAP block is all-one.
1082  \param buf - GAP buffer pointer.
1083  \returns true if all-one.
1084  @ingroup gapfunc
1085 */
1088 {
1089  return ((*buf & 1u) && (buf[1] == bm::gap_max_bits - 1));
1090 }
1091 
1092 /*!
1093  \brief Returs GAP block length.
1094  \param buf - GAP buffer pointer.
1095  \returns GAP block length.
1096 
1097  @ingroup gapfunc
1098 */
1101 {
1102  return (bm::gap_word_t)((*buf >> 3) + 1);
1103 }
1104 
1105 
1106 /*!
1107  \brief Returs GAP block capacity
1108  \param buf - GAP buffer pointer
1109  \param glevel_len - pointer on GAP header word
1110  \returns GAP block capacity.
1111 
1112  @ingroup gapfunc
1113 */
1114 template<typename T>
1115 unsigned
1116 gap_capacity(const T* BMRESTRICT buf, const T* BMRESTRICT glevel_len) BMNOEXCEPT
1117 {
1118  return glevel_len[(*buf >> 1) & 3];
1119 }
1120 
1121 
1122 /*!
1123  \brief Returs GAP block capacity limit.
1124  \param buf - GAP buffer pointer.
1125  \param glevel_len - GAP lengths table (gap_len_table)
1126  \returns GAP block limit.
1127 
1128  @ingroup gapfunc
1129 */
1130 template<typename T>
1131 unsigned
1132 gap_limit(const T* BMRESTRICT buf, const T* BMRESTRICT glevel_len) BMNOEXCEPT
1133 {
1134  return glevel_len[(*buf >> 1) & 3]-4;
1135 }
1136 
1137 
1138 /*!
1139  \brief Returs GAP blocks capacity level.
1140  \param buf - GAP buffer pointer.
1141  \returns GAP block capacity level.
1142 
1143  @ingroup gapfunc
1144 */
1145 template<typename T>
1147 {
1148  return T((*buf >> 1) & 3u);
1149 }
1150 
1151 
1152 /*!
1153  \brief GAP block find the last set bit
1154 
1155  \param buf - GAP buffer pointer.
1156  \param last - index of the last 1 bit
1157 
1158  \return 0 if 1 bit was NOT found
1159 
1160  @ingroup gapfunc
1161 */
1162 template<typename T>
1163 unsigned
1164 gap_find_last(const T* BMRESTRICT buf, unsigned* BMRESTRICT last) BMNOEXCEPT
1165 {
1166  BM_ASSERT(last);
1167 
1168  T is_set = (*buf) & 1u;
1169  T end = T((*buf) >> 3u);
1170 
1171  BM_ASSERT(buf[end] == bm::gap_max_bits - 1);
1172 
1173  is_set ^= T((end-1) & 1u);
1174  if (is_set)
1175  {
1176  *last = buf[end];
1177  return is_set;
1178  }
1179  *last = buf[--end];
1180  return end;
1181 }
1182 
1183 /*!
1184  \brief GAP block find the first set bit
1185 
1186  \param buf - GAP buffer pointer.
1187  \param first - index of the first 1 bit
1188 
1189  \return 0 if 1 bit was NOT found
1190 
1191  @ingroup gapfunc
1192 */
1193 template<typename T>
1194 unsigned
1195 gap_find_first(const T* BMRESTRICT buf, unsigned* BMRESTRICT first) BMNOEXCEPT
1196 {
1197  BM_ASSERT(first);
1198 
1199  T is_set = (*buf) & 1u;
1200  if (is_set)
1201  {
1202  *first = 0;
1203  return is_set;
1204  }
1205  if (buf[1] == bm::gap_max_bits - 1)
1206  return 0;
1207  *first = buf[1] + 1;
1208  return 1;
1209 }
1210 
1211 
1212 
1213 /*
1214  \brief Binary search for the block where bit = pos located.
1215  \param buf - GAP buffer pointer.
1216  \param pos - index of the element.
1217  \param is_set - output. GAP value (0 or 1).
1218  \return GAP index.
1219  @ingroup gapfunc
1220 */
1221 template<typename T>
1222 unsigned gap_bfind(const T* BMRESTRICT buf,
1223  unsigned pos, unsigned* BMRESTRICT is_set) BMNOEXCEPT
1224 {
1225  BM_ASSERT(pos < bm::gap_max_bits);
1226  #undef VECT_GAP_BFIND // TODO: VECTOR bfind causes performance degradation
1227  #ifdef VECT_GAP_BFIND
1228  return VECT_GAP_BFIND(buf, pos, is_set);
1229  #else
1230  *is_set = (*buf) & 1;
1231 
1232  unsigned start = 1;
1233  unsigned end = 1 + ((*buf) >> 3);
1234 
1235  while ( start != end )
1236  {
1237  unsigned curr = (start + end) >> 1;
1238  if ( buf[curr] < pos )
1239  start = curr + 1;
1240  else
1241  end = curr;
1242  }
1243  *is_set ^= ((start-1) & 1);
1244  return start;
1245  #endif
1246 }
1247 
1248 
1249 /*!
1250  \brief Tests if bit = pos is true.
1251  \param buf - GAP buffer pointer.
1252  \param pos - index of the element.
1253  \return true if position is in "1" gap
1254  @ingroup gapfunc
1255 */
1256 template<typename T>
1257 unsigned gap_test(const T* BMRESTRICT buf, unsigned pos) BMNOEXCEPT
1258 {
1259  BM_ASSERT(pos < bm::gap_max_bits);
1260 
1261  unsigned start = 1;
1262  unsigned end = 1 + ((*buf) >> 3);
1263  if (end - start < 10)
1264  {
1265  unsigned sv = *buf & 1;
1266  unsigned sv1= sv ^ 1;
1267  if (buf[1] >= pos) return sv;
1268  if (buf[2] >= pos) return sv1;
1269  if (buf[3] >= pos) return sv;
1270  if (buf[4] >= pos) return sv1;
1271  if (buf[5] >= pos) return sv;
1272  if (buf[6] >= pos) return sv1;
1273  if (buf[7] >= pos) return sv;
1274  if (buf[8] >= pos) return sv1;
1275  if (buf[9] >= pos) return sv;
1276  BM_ASSERT(0);
1277  }
1278  else
1279  {
1280  while (start != end)
1281  {
1282  unsigned curr = (start + end) >> 1;
1283  if (buf[curr] < pos)
1284  start = curr + 1;
1285  else
1286  end = curr;
1287  }
1288  }
1289  return ((*buf) & 1) ^ ((--start) & 1);
1290 }
1291 
1292 /*!
1293  \brief Tests if bit = pos is true. Analog of bm::gap_test with SIMD unrolling.
1294  \param buf - GAP buffer pointer.
1295  \param pos - index of the element.
1296  \return true if position is in "1" gap
1297  @ingroup gapfunc
1298 */
1299 template<typename T>
1300 unsigned gap_test_unr(const T* BMRESTRICT buf, const unsigned pos) BMNOEXCEPT
1301 {
1302  BM_ASSERT(pos < bm::gap_max_bits);
1303 
1304  if (pos == 0) // quick answer possible
1305  {
1306  return (*buf) & 1;
1307  }
1308 #if defined(BMSSE2OPT)
1309  unsigned res = bm::sse2_gap_test(buf, pos);
1310  BM_ASSERT(res == bm::gap_test(buf, pos));
1311 #elif defined(BMSSE42OPT)
1312  unsigned res = bm::sse42_gap_test(buf, pos);
1313  BM_ASSERT(res == bm::gap_test(buf, pos));
1314 #elif defined(BMAVX2OPT)
1315  unsigned res = bm::avx2_gap_test(buf, pos);
1316  BM_ASSERT(res == bm::gap_test(buf, pos));
1317 #else
1318  unsigned res = bm::gap_test(buf, pos);
1319 #endif
1320  return res;
1321 }
1322 
1323 /*! For each non-zero block in [from, to] executes supplied functor
1324  \internal
1325 */
1326 template<typename T, typename N, typename F>
1327 void for_each_nzblock_range(T*** root,
1328  N top_size, N nb_from, N nb_to, F& f) BMNOEXCEPT
1329 {
1330  BM_ASSERT(top_size);
1331  if (nb_from > nb_to)
1332  return;
1333  unsigned i_from = unsigned(nb_from >> bm::set_array_shift);
1334  unsigned j_from = unsigned(nb_from & bm::set_array_mask);
1335  unsigned i_to = unsigned(nb_to >> bm::set_array_shift);
1336  unsigned j_to = unsigned(nb_to & bm::set_array_mask);
1337 
1338  if (i_from >= top_size)
1339  return;
1340  if (i_to >= top_size)
1341  {
1342  i_to = unsigned(top_size-1);
1343  j_to = bm::set_sub_array_size-1;
1344  }
1345 
1346  for (unsigned i = i_from; i <= i_to; ++i)
1347  {
1348  T** blk_blk = root[i];
1349  if (!blk_blk)
1350  continue;
1351  if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR)
1352  {
1353  unsigned j = (i == i_from) ? j_from : 0;
1354  if (!j && (i != i_to)) // full sub-block
1355  f.add_full(bm::set_sub_total_bits);
1356  else
1357  {
1358  do
1359  {
1360  f.add_full(bm::gap_max_bits);
1361  if ((i == i_to) && (j == j_to))
1362  return;
1363  } while (++j < bm::set_sub_array_size);
1364  }
1365  }
1366  else
1367  {
1368  unsigned j = (i == i_from) ? j_from : 0;
1369  do
1370  {
1371  if (blk_blk[j])
1372  f(blk_blk[j]);
1373  if ((i == i_to) && (j == j_to))
1374  return;
1375  } while (++j < bm::set_sub_array_size);
1376  }
1377  } // for i
1378 }
1379 
1380 /*! For each non-zero block executes supplied function.
1381  \internal
1382 */
1383 template<class T, class F>
1384 void for_each_nzblock(T*** root, unsigned size1, F& f)
1385 {
1386  typedef typename F::size_type size_type;
1387  for (unsigned i = 0; i < size1; ++i)
1388  {
1389  T** blk_blk = root[i];
1390  if (!blk_blk)
1391  {
1392  f.on_empty_top(i);
1393  continue;
1394  }
1395  f.on_non_empty_top(i);
1396  if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR)
1397  {
1398  size_type r = i * bm::set_sub_array_size;
1399  unsigned j = 0;
1400  do
1401  {
1402  f(FULL_BLOCK_FAKE_ADDR, r + j);
1403  } while (++j < bm::set_sub_array_size);
1404  continue;
1405  }
1406 
1407  unsigned non_empty_top = 0;
1408  size_type r = i * bm::set_sub_array_size;
1409  unsigned j = 0;
1410  do
1411  {
1412 #if defined(BM64_AVX2) || defined(BM64_AVX512)
1413  if (!avx2_test_all_zero_wave(blk_blk + j))
1414  {
1415  non_empty_top = 1;
1416  T* blk0 = blk_blk[j + 0];
1417  T* blk1 = blk_blk[j + 1];
1418  T* blk2 = blk_blk[j + 2];
1419  T* blk3 = blk_blk[j + 3];
1420 
1421  size_type block_idx = r + j + 0;
1422  if (blk0)
1423  f(blk0, block_idx);
1424  else
1425  f.on_empty_block(block_idx);
1426 
1427  if (blk1)
1428  f(blk1, block_idx + 1);
1429  else
1430  f.on_empty_block(block_idx + 1);
1431 
1432  if (blk2)
1433  f(blk2, block_idx + 2);
1434  else
1435  f.on_empty_block(block_idx + 2);
1436 
1437  if (blk3)
1438  f(blk3, block_idx + 3);
1439  else
1440  f.on_empty_block(block_idx + 3);
1441  }
1442  else
1443  {
1444  f.on_empty_block(r + j + 0); f.on_empty_block(r + j + 1);
1445  f.on_empty_block(r + j + 2); f.on_empty_block(r + j + 3);
1446  }
1447  j += 4;
1448 #elif defined(BM64_SSE4)
1449  if (!sse42_test_all_zero_wave((blk_blk + j)))
1450  {
1451  non_empty_top = 1;
1452  T* blk0 = blk_blk[j + 0];
1453  T* blk1 = blk_blk[j + 1];
1454 
1455  size_type block_idx = r + j + 0;
1456  if (blk0)
1457  f(blk0, block_idx);
1458  else
1459  f.on_empty_block(block_idx);
1460 
1461  ++block_idx;
1462  if (blk1)
1463  f(blk1, block_idx);
1464  else
1465  f.on_empty_block(block_idx);
1466  }
1467  else
1468  {
1469  f.on_empty_block(r + j + 0);
1470  f.on_empty_block(r + j + 1);
1471  }
1472  j += 2;
1473 #else
1474  if (blk_blk[j])
1475  {
1476  f(blk_blk[j], r + j);
1477  non_empty_top = 1;
1478  }
1479  else
1480  f.on_empty_block(r + j);
1481  ++j;
1482 #endif
1483  } while (j < bm::set_sub_array_size);
1484 
1485  if (non_empty_top == 0)
1486  f.on_empty_top(i);
1487  } // for i
1488 }
1489 
1490 /*! For each non-zero block executes supplied function.
1491 */
1492 template<class T, class F>
1493 void for_each_nzblock2(T*** root, unsigned size1, F& f)
1494 {
1495 #ifdef BM64_SSE4
1496  for (unsigned i = 0; i < size1; ++i)
1497  {
1498  T** blk_blk;
1499  if ((blk_blk = root[i])!=0)
1500  {
1501  if (blk_blk == (T**)FULL_BLOCK_FAKE_ADDR)
1502  {
1503  for (unsigned j = 0; j < bm::set_sub_array_size; ++j)
1504  {
1506  }
1507  continue;
1508  }
1509  {
1510  __m128i w0;
1511  for (unsigned j = 0; j < bm::set_sub_array_size; j+=4)
1512  {
1513  w0 = _mm_loadu_si128((__m128i*)(blk_blk + j));
1514  if (!_mm_testz_si128(w0, w0))
1515  {
1516  T* blk0 = blk_blk[j + 0];
1517  T* blk1 = blk_blk[j + 1];
1518 
1519  if (blk0)
1520  f(blk0);
1521  if (blk1)
1522  f(blk1);
1523  }
1524  w0 = _mm_loadu_si128((__m128i*)(blk_blk + j + 2));
1525  if (!_mm_testz_si128(w0, w0))
1526  {
1527  T* blk0 = blk_blk[j + 2];
1528  T* blk1 = blk_blk[j + 3];
1529  if (blk0)
1530  f(blk0);
1531  if (blk1)
1532  f(blk1);
1533  }
1534  } // for j
1535  }
1536  }
1537  } // for i
1538 #elif defined(BM64_AVX2) || defined(BM64_AVX512)
1539  for (unsigned i = 0; i < size1; ++i)
1540  {
1541  T** blk_blk;
1542  if ((blk_blk = root[i]) != 0)
1543  {
1544  if (blk_blk == (T**)FULL_BLOCK_FAKE_ADDR)
1545  {
1546  for (unsigned j = 0; j < bm::set_sub_array_size; ++j)
1547  {
1549  }
1550  continue;
1551  }
1552  {
1553  for (unsigned j = 0; j < bm::set_sub_array_size; j += 4)
1554  {
1555  __m256i w0 = _mm256_loadu_si256((__m256i*)(blk_blk + j));
1556  if (!_mm256_testz_si256(w0, w0))
1557  {
1558  // as a variant could use: blk0 = (T*)_mm256_extract_epi64(w0, 0);
1559  // but it measures marginally slower
1560  T* blk0 = blk_blk[j + 0];
1561  T* blk1 = blk_blk[j + 1];
1562  T* blk2 = blk_blk[j + 2];
1563  T* blk3 = blk_blk[j + 3];
1564  if (blk0)
1565  f(blk0);
1566  if (blk1)
1567  f(blk1);
1568  if (blk2)
1569  f(blk2);
1570  if (blk3)
1571  f(blk3);
1572  }
1573  } // for j
1574  }
1575  }
1576  } // for i
1577 
1578 #else // non-SIMD mode
1579  for (unsigned i = 0; i < size1; ++i)
1580  {
1581  T** blk_blk;
1582  if ((blk_blk = root[i])!=0)
1583  {
1584  if (blk_blk == (T**)FULL_BLOCK_FAKE_ADDR)
1585  {
1586  for (unsigned j = 0; j < bm::set_sub_array_size; ++j)
1587  {
1589  }
1590  continue;
1591  }
1592  unsigned j = 0;
1593  do
1594  {
1595  if (blk_blk[j])
1596  f(blk_blk[j]);
1597  if (blk_blk[j+1])
1598  f(blk_blk[j+1]);
1599  if (blk_blk[j+2])
1600  f(blk_blk[j+2]);
1601  if (blk_blk[j+3])
1602  f(blk_blk[j+3]);
1603  j += 4;
1604  } while (j < bm::set_sub_array_size);
1605  }
1606  } // for i
1607 #endif
1608 }
1609 
1610 
1611 /*! For each non-zero block executes supplied function-predicate.
1612  Function returns if function-predicate returns true
1613 */
1614 template<typename T, typename BI, typename F>
1615 bool for_each_nzblock_if(T*** root, BI size1, F& f) BMNOEXCEPT
1616 {
1617  BI block_idx = 0;
1618  for (BI i = 0; i < size1; ++i)
1619  {
1620  T** blk_blk = root[i];
1621  if (!blk_blk)
1622  {
1623  block_idx += bm::set_sub_array_size;
1624  continue;
1625  }
1626  if (blk_blk == (T**)FULL_BLOCK_FAKE_ADDR)
1627  {
1628  for (unsigned j = 0; j < bm::set_sub_array_size; ++j, ++block_idx)
1629  {
1630  if (f(FULL_BLOCK_FAKE_ADDR, block_idx))
1631  return true;
1632  } // for j
1633  continue;
1634  }
1635 
1636  for (unsigned j = 0;j < bm::set_sub_array_size; ++j, ++block_idx)
1637  {
1638  if (blk_blk[j])
1639  if (f(blk_blk[j], block_idx))
1640  return true;
1641  } // for j
1642  } // for i
1643  return false;
1644 }
1645 
1646 /*! For each block executes supplied function.
1647 */
1648 template<class T, class F, typename BLOCK_IDX>
1649 void for_each_block(T*** root, unsigned size1, F& f, BLOCK_IDX start)
1650 {
1651  BLOCK_IDX block_idx = start;
1652  for (unsigned i = 0; i < size1; ++i)
1653  {
1654  T** blk_blk = root[i];
1655  if (blk_blk)
1656  {
1657  if (blk_blk == (T**)FULL_BLOCK_FAKE_ADDR)
1658  {
1659  for (unsigned j = 0; j < bm::set_sub_array_size; ++j, ++block_idx)
1660  {
1661  f(FULL_BLOCK_FAKE_ADDR, block_idx);
1662  }
1663  continue;
1664  }
1665  for (unsigned j = 0;j < bm::set_sub_array_size; ++j, ++block_idx)
1666  {
1667  f(blk_blk[j], block_idx);
1668  }
1669  }
1670  else
1671  {
1672  for (unsigned j = 0;j < bm::set_sub_array_size; ++j, ++block_idx)
1673  {
1674  f(0, block_idx);
1675  }
1676  }
1677  }
1678 }
1679 
1680 
1681 
1682 /*! Special BM optimized analog of STL for_each
1683 */
1684 template<class T, class F> F bmfor_each(T first, T last, F f)
1685 {
1686  do
1687  {
1688  f(*first);
1689  ++first;
1690  } while (first < last);
1691  return f;
1692 }
1693 
1694 /*! Computes SUM of all elements of the sequence
1695 */
1696 template<typename T>
1697 bm::id64_t sum_arr(const T* first, const T* last) BMNOEXCEPT
1698 {
1699  bm::id64_t sum = 0;
1700  for (;first < last; ++first)
1701  sum += *first;
1702  return sum;
1703 }
1704 
1705 /*!
1706  Extract short (len=1) exceptions from the GAP block
1707  \param buf - GAP buffer to split
1708  \param arr0 - [OUT] list of isolates 0 positions (clear list)
1709  \param arr1 - [OUT] list of isolated 1 positions (set list)
1710  \param arr0_cnt - [OUT] arr0 size
1711  \param arr1_cnt -
1712  @ingroup gapfunc
1713 */
1714 template<typename T>
1715 void gap_split(const T* buf,
1716  T* arr0, T* arr1, T& arr0_cnt, T& arr1_cnt) BMNOEXCEPT
1717 {
1718  const T* pcurr = buf;
1719  unsigned len = (*pcurr >> 3);
1720  const T* pend = pcurr + len;
1721 
1722  T cnt0, cnt1;
1723  cnt0 = cnt1 = 0;
1724  unsigned is_set = (*buf & 1);
1725 
1726  if (*pcurr == 0)
1727  {
1728  if (is_set)
1729  {
1730  arr1[cnt1] = *pcurr;
1731  ++cnt1;
1732  }
1733  else
1734  {
1735  arr0[cnt0] = *pcurr;
1736  ++cnt0;
1737  }
1738  }
1739  T prev = *pcurr;
1740  ++pcurr;
1741 
1742  while (pcurr <= pend)
1743  {
1744  is_set ^= 1;
1745  T delta = *pcurr - prev;
1746  if (delta == 1)
1747  {
1748  if (is_set)
1749  {
1750  arr1[cnt1] = prev;
1751  ++cnt1;
1752  }
1753  else
1754  {
1755  arr0[cnt0] = prev;
1756  ++cnt0;
1757  }
1758  }
1759  prev = *pcurr++;
1760  } // while
1761 
1762  arr0_cnt = cnt0;
1763  arr1_cnt = cnt1;
1764 }
1765 
1766 
1767 /*!
1768  \brief Calculates number of bits ON in GAP buffer.
1769  \param buf - GAP buffer pointer.
1770  \param dsize - buffer size
1771  \return Number of non-zero bits.
1772  @ingroup gapfunc
1773 */
1774 template<typename T>
1775 unsigned gap_bit_count(const T* buf, unsigned dsize=0) BMNOEXCEPT
1776 {
1777  const T* pcurr = buf;
1778  if (dsize == 0)
1779  dsize = (*pcurr >> 3);
1780 
1781  const T* pend = pcurr + dsize;
1782 
1783  unsigned bits_counter = 0;
1784  ++pcurr;
1785 
1786  if (*buf & 1)
1787  {
1788  bits_counter += *pcurr + 1;
1789  ++pcurr;
1790  }
1791  for (++pcurr; pcurr <= pend; pcurr += 2)
1792  bits_counter += *pcurr - *(pcurr-1);
1793  return bits_counter;
1794 }
1795 
1796 /*!
1797  \brief Calculates number of bits ON in GAP buffer. Loop unrolled version.
1798  \param buf - GAP buffer pointer.
1799  \return Number of non-zero bits.
1800  @ingroup gapfunc
1801 */
1802 template<typename T>
1803 unsigned gap_bit_count_unr(const T* buf) BMNOEXCEPT
1804 {
1805  const T* pcurr = buf;
1806  unsigned dsize = (*pcurr >> 3);
1807 
1808  unsigned cnt = 0;
1809  pcurr = buf + 1; // set up start position
1810  T first_one = *buf & 1;
1811  if (first_one)
1812  {
1813  cnt += *pcurr + 1;
1814  ++pcurr;
1815  }
1816  ++pcurr; // set GAP to 1
1817 
1818  #if defined(BMAVX2OPT) || defined(BMAVX512OPT)
1819  if (dsize > 34)
1820  {
1821  const unsigned unr_factor = 32;
1822  unsigned waves = (dsize-2) / unr_factor;
1823  pcurr = avx2_gap_sum_arr(pcurr, waves, &cnt);
1824  }
1825  #elif defined(BMSSE42OPT) || defined(BMSSE2OPT)
1826  if (dsize > 18)
1827  {
1828  const unsigned unr_factor = 16;
1829  unsigned waves = (dsize - 2) / unr_factor;
1830  pcurr = sse2_gap_sum_arr(pcurr, waves, &cnt);
1831  }
1832  #else
1833  if (dsize > 10)
1834  {
1835  const unsigned unr_factor = 8;
1836  unsigned waves = (dsize - 2) / unr_factor;
1837  for (unsigned i = 0; i < waves; i += unr_factor)
1838  {
1839  cnt += pcurr[0] - pcurr[0 - 1];
1840  cnt += pcurr[2] - pcurr[2 - 1];
1841  cnt += pcurr[4] - pcurr[4 - 1];
1842  cnt += pcurr[6] - pcurr[6 - 1];
1843 
1844  pcurr += unr_factor;
1845  } // for
1846  }
1847  #endif
1848 
1849  const T* pend = buf + dsize;
1850  for ( ; pcurr <= pend ; pcurr+=2)
1851  {
1852  cnt += *pcurr - *(pcurr - 1);
1853  }
1854  BM_ASSERT(cnt == bm::gap_bit_count(buf));
1855  return cnt;
1856 }
1857 
1858 
1859 
1860 /*!
1861  \brief Counts 1 bits in GAP buffer in the closed [left, right] range.
1862  \param buf - GAP buffer pointer.
1863  \param left - leftmost bit index to start from
1864  \param right- rightmost bit index
1865  \return Number of non-zero bits.
1866  @ingroup gapfunc
1867 */
1868 template<typename T>
1869 unsigned gap_bit_count_range(const T* const buf,
1870  unsigned left, unsigned right) BMNOEXCEPT
1871 {
1872  BM_ASSERT(left <= right);
1873  BM_ASSERT(right < bm::gap_max_bits);
1874 
1875  const T* pcurr = buf;
1876  const T* pend = pcurr + (*pcurr >> 3);
1877 
1878  unsigned bits_counter = 0;
1879  unsigned is_set;
1880  unsigned start_pos = bm::gap_bfind(buf, left, &is_set);
1881  is_set = ~(is_set - 1u); // 0xFFF.. if true (mask for branchless code)
1882 
1883  pcurr = buf + start_pos;
1884  if (right <= *pcurr) // we are in the target gap right now
1885  {
1886  bits_counter = unsigned(right - left + 1u) & is_set; // & is_set == if(is_set)
1887  return bits_counter;
1888  }
1889  bits_counter += unsigned(*pcurr - left + 1u) & is_set;
1890 
1891  unsigned prev_gap = *pcurr++;
1892  for (is_set ^= ~0u; right > *pcurr; is_set ^= ~0u)
1893  {
1894  bits_counter += (*pcurr - prev_gap) & is_set;
1895  if (pcurr == pend)
1896  return bits_counter;
1897  prev_gap = *pcurr++;
1898  }
1899  bits_counter += unsigned(right - prev_gap) & is_set;
1900  return bits_counter;
1901 }
1902 
1903 /*!
1904  \brief Test if all bits are 1 in GAP buffer in the [left, right] range.
1905  \param buf - GAP buffer pointer.
1906  \param left - leftmost bit index to start from
1907  \param right- rightmost bit index
1908  \return true if all bits are "11111"
1909  @ingroup gapfunc
1910 */
1911 template<typename T>
1912 bool gap_is_all_one_range(const T* const BMRESTRICT buf,
1913  unsigned left, unsigned right) BMNOEXCEPT
1914 {
1915  BM_ASSERT(left <= right);
1916  BM_ASSERT(right < bm::gap_max_bits);
1917 
1918  unsigned is_set;
1919  unsigned start_pos = bm::gap_bfind(buf, left, &is_set);
1920  if (!is_set) // GAP is 0
1921  return false;
1922  const T* const pcurr = buf + start_pos;
1923  return (right <= *pcurr);
1924 }
1925 
1926 /*!
1927  \brief Test if any bits are 1 in GAP buffer in the [left, right] range.
1928  \param buf - GAP buffer pointer.
1929  \param left - leftmost bit index to start from
1930  \param right- rightmost bit index
1931  \return true if at least 1 "00010"
1932  @ingroup gapfunc
1933 */
1934 template<typename T>
1935 bool gap_any_range(const T* const BMRESTRICT buf,
1936  unsigned left, unsigned right) BMNOEXCEPT
1937 {
1938  BM_ASSERT(left <= right);
1939  BM_ASSERT(right < bm::gap_max_bits);
1940 
1941  unsigned is_set;
1942  unsigned start_pos = bm::gap_bfind(buf, left, &is_set);
1943  const T* const pcurr = buf + start_pos;
1944 
1945  if (!is_set) // start GAP is 0 ...
1946  {
1947  if (right <= *pcurr) // ...bit if the interval goes into at least 1 blk
1948  return false; // .. nope
1949  return true;
1950  }
1951  return true;
1952 }
1953 
1954 /*!
1955  \brief Test if any bits are 1 in GAP buffer in the [left, right] range
1956  and flanked with 0s
1957  \param buf - GAP buffer pointer.
1958  \param left - leftmost bit index to start from
1959  \param right- rightmost bit index
1960  \return true if "011110"
1961  @ingroup gapfunc
1962 */
1963 template<typename T>
1964 bool gap_is_interval(const T* const BMRESTRICT buf,
1965  unsigned left, unsigned right) BMNOEXCEPT
1966 {
1967  BM_ASSERT(left <= right);
1968  BM_ASSERT(left > 0); // cannot check left-1 otherwise
1969  BM_ASSERT(right < bm::gap_max_bits-1); // cannot check right+1 otherwise
1970 
1971  unsigned is_set;
1972  unsigned start_pos = bm::gap_bfind(buf, left, &is_set);
1973 
1974  const T* pcurr = buf + start_pos;
1975  if (!is_set || (right != *pcurr) || (start_pos <= 1))
1976  return false;
1977  --pcurr;
1978  if (*pcurr != left-1)
1979  return false;
1980  return true;
1981 }
1982 
1983 /**
1984  \brief Searches for the last 1 bit in the 111 interval of a GAP block
1985  \param buf - BIT block buffer
1986  \param nbit - bit index to start checking from
1987  \param pos - [out] found value
1988 
1989  \return false if not found
1990  @ingroup gapfunc
1991 */
1992 template<typename T>
1993 bool gap_find_interval_end(const T* const BMRESTRICT buf,
1994  unsigned nbit, unsigned* BMRESTRICT pos) BMNOEXCEPT
1995 {
1996  BM_ASSERT(pos);
1997  BM_ASSERT(nbit < bm::gap_max_bits);
1998 
1999  unsigned is_set;
2000  unsigned start_pos = bm::gap_bfind(buf, nbit, &is_set);
2001  if (!is_set)
2002  return false;
2003  *pos = buf[start_pos];
2004  return true;
2005 }
2006 
2007 
2008 /**
2009  \brief Searches for the first 1 bit in the 111 interval of a GAP block
2010  \param buf - GAP block buffer
2011  \param nbit - bit index to start checking from
2012  \param pos - [out] found value
2013 
2014  \return false if not found
2015  @ingroup gapfunc
2016 */
2017 template<typename T>
2018 bool gap_find_interval_start(const T* const BMRESTRICT buf,
2019  unsigned nbit, unsigned* BMRESTRICT pos) BMNOEXCEPT
2020 {
2021  BM_ASSERT(pos);
2022  BM_ASSERT(nbit < bm::gap_max_bits);
2023 
2024  unsigned is_set;
2025  unsigned start_pos = bm::gap_bfind(buf, nbit, &is_set);
2026  if (!is_set)
2027  return false;
2028  --start_pos;
2029  if (!start_pos)
2030  *pos = 0;
2031  else
2032  *pos = buf[start_pos]+1;
2033  return true;
2034 }
2035 
2036 /**
2037  \brief reverse search for the first 1 bit of a GAP block
2038  \param buf - GAP block buffer
2039  \param nbit - bit index to start checking from
2040  \param pos - [out] found value
2041 
2042  \return false if not found
2043  @ingroup gapfunc
2044 */
2045 template<typename T>
2046 bool gap_find_prev(const T* const BMRESTRICT buf,
2047  unsigned nbit, unsigned* BMRESTRICT pos) BMNOEXCEPT
2048 {
2049  BM_ASSERT(pos);
2050  BM_ASSERT(nbit < bm::gap_max_bits);
2051 
2052  unsigned is_set;
2053  unsigned start_pos = bm::gap_bfind(buf, nbit, &is_set);
2054  if (is_set)
2055  {
2056  *pos = nbit;
2057  return true;
2058  }
2059  --start_pos;
2060  if (!start_pos)
2061  return false;
2062  else
2063  *pos = buf[start_pos];
2064  return true;
2065 }
2066 
2067 
2068 
2069 /*!
2070  \brief GAP block find position for the rank
2071 
2072  \param block - bit block buffer pointer
2073  \param rank - rank to find (must be > 0)
2074  \param nbit_from - start bit position in block
2075  \param nbit_pos - found position
2076 
2077  \return 0 if position with rank was found, or
2078  the remaining rank (rank - population count)
2079 
2080  @ingroup gapfunc
2081 */
2082 template<typename T, typename SIZE_TYPE>
2083 SIZE_TYPE gap_find_rank(const T* const block,
2084  SIZE_TYPE rank,
2085  unsigned nbit_from,
2086  unsigned& nbit_pos) BMNOEXCEPT
2087 {
2088  BM_ASSERT(block);
2089  BM_ASSERT(rank);
2090 
2091  const T* pcurr = block;
2092  const T* pend = pcurr + (*pcurr >> 3);
2093 
2094  unsigned bits_counter = 0;
2095  unsigned is_set;
2096  unsigned start_pos = bm::gap_bfind(block, nbit_from, &is_set);
2097  is_set = ~(is_set - 1u); // 0xFFF.. if true (mask for branchless code)
2098 
2099  pcurr = block + start_pos;
2100  bits_counter += unsigned(*pcurr - nbit_from + 1u) & is_set;
2101  if (bits_counter >= rank) // found!
2102  {
2103  nbit_pos = nbit_from + unsigned(rank) - 1u;
2104  return 0;
2105  }
2106  rank -= bits_counter;
2107  unsigned prev_gap = *pcurr++;
2108  for (is_set ^= ~0u; pcurr <= pend; is_set ^= ~0u)
2109  {
2110  bits_counter = (*pcurr - prev_gap) & is_set;
2111  if (bits_counter >= rank) // found!
2112  {
2113  nbit_pos = prev_gap + unsigned(rank);
2114  return 0;
2115  }
2116  rank -= bits_counter;
2117  prev_gap = *pcurr++;
2118  } // for
2119 
2120  return rank;
2121 }
2122 
2123 
2124 
2125 /*!
2126  \brief Counts 1 bits in GAP buffer in the closed [0, right] range.
2127  \param buf - GAP buffer pointer.
2128  \param right- rightmost bit index
2129  \param is_corrected - if true the result will be rank corrected
2130  if right bit == true count=count-1
2131  \return Number of non-zero bits
2132  @ingroup gapfunc
2133 */
2134 template<typename T>
2135 unsigned gap_bit_count_to(const T* const buf, T right,
2136  bool is_corrected=false) BMNOEXCEPT
2137 {
2138  const T* pcurr = buf;
2139  const T* pend = pcurr + (*pcurr >> 3);
2140 
2141  unsigned bits_counter = 0;
2142  unsigned is_set = ~((unsigned(*buf) & 1u) - 1u); // 0xFFF.. if true (mask for branchless code)
2143  BM_ASSERT(is_set == 0u || is_set == ~0u);
2144  pcurr = buf + 1;
2145 
2146  if (right <= *pcurr) // we are in the target block right now
2147  {
2148  bits_counter = (right + 1u) & is_set; // & is_set == if (is_set)
2149  bits_counter -= (is_set & unsigned(is_corrected));
2150  return bits_counter;
2151  }
2152  bits_counter += (*pcurr + 1u) & is_set;
2153 
2154  unsigned prev_gap = *pcurr++;
2155  for (is_set ^= ~0u; right > *pcurr; is_set ^= ~0u)
2156  {
2157  bits_counter += (*pcurr - prev_gap) & is_set;
2158  if (pcurr == pend)
2159  {
2160  bits_counter -= (is_set & unsigned(is_corrected));
2161  return bits_counter;
2162  }
2163  prev_gap = *pcurr++;
2164  }
2165  bits_counter += (right - prev_gap) & is_set;
2166  bits_counter -= (is_set & unsigned(is_corrected));
2167  return bits_counter;
2168 }
2169 
2170 
2171 /*!
2172  D-GAP block for_each algorithm
2173 
2174  D-Gap Functor is called for each element but last one.
2175 
2176  \param gap_buf - GAP buffer
2177  \param func - functor object
2178 
2179 */
2180 template<class T, class Func>
2181 void for_each_dgap(const T* gap_buf, Func& func)
2182 {
2183  const T* pcurr = gap_buf;
2184  const T* pend = pcurr + (*pcurr >> 3);
2185  ++pcurr;
2186 
2187  T prev = *pcurr;
2188  func((T)(prev + 1)); // first element incremented to avoid 0
2189  ++pcurr;
2190  do
2191  {
2192  func((T)(*pcurr - prev)); // all others are [N] - [N-1]
2193  prev = *pcurr;
2194  } while (++pcurr < pend);
2195 }
2196 
2197 /** d-Gap copy functor
2198  @internal
2199 */
2200 template<typename T> struct d_copy_func
2201 {
2202  d_copy_func(T* dg_buf) : dgap_buf_(dg_buf) {}
2203  void operator()(T dgap) { *dgap_buf_++ = dgap; }
2204 
2206 };
2207 
2208 /*!
2209  \brief Convert GAP buffer into D-GAP buffer
2210 
2211  Delta GAP representation is DGAP[N] = GAP[N] - GAP[N-1]
2212 
2213  \param gap_buf - GAP buffer
2214  \param dgap_buf - Delta-GAP buffer
2215  \param copy_head - flag to copy GAP header
2216 
2217  \internal
2218 
2219  @ingroup gapfunc
2220 */
2221 template<typename T>
2222 T* gap_2_dgap(const T* BMRESTRICT gap_buf,
2223  T* BMRESTRICT dgap_buf, bool copy_head=true) BMNOEXCEPT
2224 {
2225  if (copy_head) // copy GAP header
2226  {
2227  *dgap_buf++ = *gap_buf;
2228  }
2229 
2230  d_copy_func<T> copy_func(dgap_buf);
2231  for_each_dgap<T, d_copy_func<T> >(gap_buf, copy_func);
2232  return copy_func.dgap_buf_;
2233 }
2234 
2235 /*!
2236  \brief Convert D-GAP buffer into GAP buffer
2237 
2238  GAP representation is GAP[N] = DGAP[N] + DGAP[N-1]
2239 
2240  \param dgap_buf - Delta-GAP buffer
2241  \param gap_header - GAP header word
2242  \param gap_buf - GAP buffer
2243 
2244  \internal
2245  @ingroup gapfunc
2246 */
2247 template<typename T>
2248 void dgap_2_gap(const T* BMRESTRICT dgap_buf,
2249  T* BMRESTRICT gap_buf, T gap_header=0) BMNOEXCEPT
2250 {
2251  const T* pcurr = dgap_buf;
2252  unsigned len;
2253  if (!gap_header) // GAP header is already part of the stream
2254  {
2255  len = *pcurr >> 3;
2256  *gap_buf++ = *pcurr++; // copy GAP header
2257  }
2258  else // GAP header passed as a parameter
2259  {
2260  len = gap_header >> 3;
2261  *gap_buf++ = gap_header; // assign GAP header
2262  }
2263  --len; // last element is actually not encoded
2264  const T* pend = pcurr + len;
2265 
2266  *gap_buf = *pcurr++; // copy first element
2267  if (*gap_buf == 0)
2268  *gap_buf = 65535; // fix +1 overflow
2269  else
2270  *gap_buf = T(*gap_buf - 1);
2271 
2272  for (++gap_buf; pcurr < pend; ++pcurr)
2273  {
2274  T prev = *(gap_buf-1); // don't remove temp(undef expression!)
2275  *gap_buf++ = T(*pcurr + prev);
2276  }
2277  *gap_buf = 65535; // add missing last element
2278 }
2279 
2280 
2281 /*!
2282  \brief Lexicographical comparison of GAP buffers.
2283  \param buf1 - First GAP buffer pointer.
2284  \param buf2 - Second GAP buffer pointer.
2285  \return <0 - less, =0 - equal, >0 - greater.
2286 
2287  @ingroup gapfunc
2288 */
2289 template<typename T>
2290 int gapcmp(const T* buf1, const T* buf2) BMNOEXCEPT
2291 {
2292  const T* pcurr1 = buf1;
2293  const T* pend1 = pcurr1 + (*pcurr1 >> 3);
2294  unsigned bitval1 = *buf1 & 1;
2295  ++pcurr1;
2296 
2297  const T* pcurr2 = buf2;
2298  unsigned bitval2 = *buf2 & 1;
2299  ++pcurr2;
2300 
2301  while (pcurr1 <= pend1)
2302  {
2303  if (*pcurr1 == *pcurr2)
2304  {
2305  if (bitval1 != bitval2)
2306  {
2307  return (bitval1) ? 1 : -1;
2308  }
2309  }
2310  else
2311  {
2312  if (bitval1 == bitval2)
2313  {
2314  if (bitval1)
2315  {
2316  return (*pcurr1 < *pcurr2) ? -1 : 1;
2317  }
2318  else
2319  {
2320  return (*pcurr1 < *pcurr2) ? 1 : -1;
2321  }
2322  }
2323  else
2324  {
2325  return (bitval1) ? 1 : -1;
2326  }
2327  }
2328  ++pcurr1; ++pcurr2;
2329  bitval1 ^= 1;
2330  bitval2 ^= 1;
2331  }
2332 
2333  return 0;
2334 }
2335 
2336 /*!
2337  \brief Find first bit which is different between two GAP-blocks
2338  \param buf1 - block 1
2339  \param buf2 - block 2
2340  \param pos - out - position of difference (undefined if blocks are equal)
2341  \return true if difference was found
2342 
2343  @ingroup gapfunc
2344 */
2345 template<typename T>
2346 bool gap_find_first_diff(const T* BMRESTRICT buf1,
2347  const T* BMRESTRICT buf2,
2348  unsigned* BMRESTRICT pos) BMNOEXCEPT
2349 {
2350  BM_ASSERT(buf1 && buf2 && pos);
2351 
2352  const T* pcurr1 = buf1;
2353  const T* pend1 = pcurr1 + (*pcurr1 >> 3);
2354  const T* pcurr2 = buf2;
2355  for (++pcurr1, ++pcurr2; pcurr1 <= pend1; ++pcurr1, ++pcurr2)
2356  {
2357  if (*pcurr1 != *pcurr2)
2358  {
2359  *pos = 1 + ((*pcurr1 < *pcurr2) ? *pcurr1 : *pcurr2);
2360  return true;
2361  }
2362  } // for
2363  return false;
2364 }
2365 
2366 // -------------------------------------------------------------------------
2367 //
2368 
2369 /*!
2370  \brief Abstract operation for GAP buffers.
2371  Receives functor F as a template argument
2372  \param dest - destination memory buffer.
2373  \param vect1 - operand 1 GAP encoded buffer.
2374  \param vect1_mask - XOR mask for starting bitflag for vector1
2375  can be 0 or 1 (1 inverts the vector)
2376  \param vect2 - operand 2 GAP encoded buffer.
2377  \param vect2_mask - same as vect1_mask
2378  \param dlen - destination length after the operation
2379 
2380  \note Internal function.
2381  @internal
2382 
2383  @ingroup gapfunc
2384 */
2385 template<typename T, class F>
2386 void gap_buff_op(T* BMRESTRICT dest,
2387  const T* BMRESTRICT vect1,
2388  unsigned vect1_mask,
2389  const T* BMRESTRICT vect2,
2390  unsigned vect2_mask,
2391  unsigned& dlen) BMNOEXCEPT2
2392 {
2393  const T* cur1 = vect1;
2394  const T* cur2 = vect2;
2395 
2396  T bitval1 = (T)((*cur1++ & 1) ^ vect1_mask);
2397  T bitval2 = (T)((*cur2++ & 1) ^ vect2_mask);
2398 
2399  T bitval = (T) F::op(bitval1, bitval2);
2400  T bitval_prev = bitval;
2401 
2402  T* res = dest;
2403  *res = bitval;
2404  ++res;
2405 
2406  T c1 = *cur1; T c2 = *cur2;
2407  while (1)
2408  {
2409  bitval = (T) F::op(bitval1, bitval2);
2410 
2411  // Check if GAP value changes and we need to
2412  // start the next one
2413  //
2414  res += (bitval != bitval_prev);
2415  bitval_prev = bitval;
2416  if (c1 < c2) // (*cur1 < *cur2)
2417  {
2418  *res = c1;
2419  ++cur1; c1 = *cur1;
2420  bitval1 ^= 1;
2421  }
2422  else // >=
2423  {
2424  *res = c2;
2425  if (c2 < c1) // (*cur2 < *cur1)
2426  {
2427  bitval2 ^= 1;
2428  }
2429  else // equal
2430  {
2431  if (c2 == (bm::gap_max_bits - 1))
2432  break;
2433 
2434  ++cur1; c1 = *cur1;
2435  bitval1 ^= 1; bitval2 ^= 1;
2436  }
2437  ++cur2; c2 = *cur2;
2438  }
2439  } // while
2440 
2441  dlen = (unsigned)(res - dest);
2442  *dest = (T)((*dest & 7) + (dlen << 3));
2443 }
2444 
2445 
2446 /*!
2447  \brief Abstract operation for GAP buffers (predicts legth)
2448  Receives functor F as a template argument
2449  \param vect1 - operand 1 GAP encoded buffer.
2450  \param vect2 - operand 2 GAP encoded buffer.
2451  \param dlen - destination length after the operation
2452  \param limit - maximum target length limit,
2453  returns false if limit is reached
2454  \return true if operation would be successfull or
2455  false if limit reached
2456 
2457  \note Internal function.
2458  @internal
2459 
2460  @ingroup gapfunc
2461 */
2462 template<typename T, class F>
2463 bool gap_buff_dry_op(const T* BMRESTRICT vect1,
2464  const T* BMRESTRICT vect2,
2465  unsigned& dlen,
2466  unsigned limit) BMNOEXCEPT2
2467 {
2468  const T* cur1 = vect1;
2469  const T* cur2 = vect2;
2470 
2471  T bitval1 = (T)((*cur1++ & 1));
2472  T bitval2 = (T)((*cur2++ & 1));
2473 
2474  T bitval = (T) F::op(bitval1, bitval2);
2475  T bitval_prev = bitval;
2476 
2477  unsigned len = 1;
2478 
2479  T c1 = *cur1; T c2 = *cur2;
2480  while (1)
2481  {
2482  bitval = (T) F::op(bitval1, bitval2);
2483 
2484  // Check if GAP value changes and we need to
2485  // start the next one
2486  //
2487  len += (bitval != bitval_prev);
2488  if (len > limit)
2489  return false;
2490  bitval_prev = bitval;
2491  if (c1 < c2)
2492  {
2493  ++cur1; c1 = *cur1;
2494  bitval1 ^= 1;
2495  }
2496  else // >=
2497  {
2498  if (c2 < c1) // (*cur2 < *cur1)
2499  {
2500  bitval2 ^= 1;
2501  }
2502  else // equal
2503  {
2504  if (c2 == (bm::gap_max_bits - 1))
2505  break;
2506 
2507  ++cur1; c1 = *cur1;
2508  bitval1 ^= 1; bitval2 ^= 1;
2509  }
2510  ++cur2; c2 = *cur2;
2511  }
2512 
2513  } // while
2514 
2515  dlen = len;
2516  return true;
2517 }
2518 
2519 
2520 /*!
2521  \brief Abstract distance test operation for GAP buffers.
2522  Receives functor F as a template argument
2523  \param vect1 - operand 1 GAP encoded buffer.
2524  \param vect1_mask - XOR mask for starting bitflag for vector1
2525  can be 0 or 1 (1 inverts the vector)
2526  \param vect2 - operand 2 GAP encoded buffer.
2527  \param vect2_mask - same as vect1_mask
2528  \note Internal function.
2529  \return non zero value if operation result returns any 1 bit
2530 
2531  @ingroup gapfunc
2532 */
2533 template<typename T, class F>
2534 unsigned gap_buff_any_op(const T* BMRESTRICT vect1,
2535  unsigned vect1_mask,
2536  const T* BMRESTRICT vect2,
2537  unsigned vect2_mask) BMNOEXCEPT2
2538 {
2539  const T* cur1 = vect1;
2540  const T* cur2 = vect2;
2541 
2542  unsigned bitval1 = (*cur1++ & 1) ^ vect1_mask;
2543  unsigned bitval2 = (*cur2++ & 1) ^ vect2_mask;
2544 
2545  unsigned bitval = F::op(bitval1, bitval2);
2546  if (bitval)
2547  return bitval;
2548  unsigned bitval_prev = bitval;
2549 
2550  while (1)
2551  {
2552  bitval = F::op(bitval1, bitval2);
2553  if (bitval)
2554  return bitval;
2555 
2556  if (bitval != bitval_prev)
2557  bitval_prev = bitval;
2558 
2559  if (*cur1 < *cur2)
2560  {
2561  ++cur1;
2562  bitval1 ^= 1;
2563  }
2564  else // >=
2565  {
2566  if (*cur2 < *cur1)
2567  {
2568  bitval2 ^= 1;
2569  }
2570  else // equal
2571  {
2572  if (*cur2 == (bm::gap_max_bits - 1))
2573  {
2574  break;
2575  }
2576  ++cur1;
2577  bitval1 ^= 1; bitval2 ^= 1;
2578  }
2579  ++cur2;
2580  }
2581 
2582  } // while
2583 
2584  return 0;
2585 }
2586 
2587 
2588 
2589 /*!
2590  \brief Abstract distance(similarity) operation for GAP buffers.
2591  Receives functor F as a template argument
2592  \param vect1 - operand 1 GAP encoded buffer.
2593  \param vect2 - operand 2 GAP encoded buffer.
2594  \note Internal function.
2595 
2596  @ingroup gapfunc
2597 */
2598 template<typename T, class F>
2599 unsigned gap_buff_count_op(const T* vect1, const T* vect2) BMNOEXCEPT2
2600 {
2601  unsigned count;// = 0;
2602  const T* cur1 = vect1;
2603  const T* cur2 = vect2;
2604 
2605  unsigned bitval1 = (*cur1++ & 1);
2606  unsigned bitval2 = (*cur2++ & 1);
2607  unsigned bitval = count = F::op(bitval1, bitval2);
2608  unsigned bitval_prev = bitval;
2609 
2610  T res, res_prev;
2611  res = res_prev = 0;
2612 
2613  while (1)
2614  {
2615  bitval = F::op(bitval1, bitval2);
2616  // Check if GAP value changes and we need to
2617  // start the next one.
2618  if (bitval != bitval_prev)
2619  {
2620  bitval_prev = bitval;
2621  res_prev = res;
2622  }
2623 
2624  if (*cur1 < *cur2)
2625  {
2626  res = *cur1;
2627  if (bitval)
2628  {
2629  count += res - res_prev;
2630  res_prev = res;
2631  }
2632  ++cur1; bitval1 ^= 1;
2633  }
2634  else // >=
2635  {
2636  res = *cur2;
2637  if (bitval)
2638  {
2639  count += res - res_prev;
2640  res_prev = res;
2641  }
2642  if (*cur2 < *cur1)
2643  {
2644  bitval2 ^= 1;
2645  }
2646  else // equal
2647  {
2648  if (*cur2 == (bm::gap_max_bits - 1))
2649  break;
2650 
2651  ++cur1;
2652  bitval1 ^= 1; bitval2 ^= 1;
2653  }
2654  ++cur2;
2655  }
2656 
2657  } // while
2658 
2659  return count;
2660 }
2661 
2662 
2663 #ifdef __GNUG__
2664 #pragma GCC diagnostic push
2665 #pragma GCC diagnostic ignored "-Wconversion"
2666 #endif
2667 
2668 /*!
2669  \brief Sets or clears bit in the GAP buffer.
2670 
2671  \param val - new bit value
2672  \param buf - GAP buffer.
2673  \param pos - Index of bit to set.
2674  \param is_set - (OUT) flag if bit was actually set.
2675 
2676  \return New GAP buffer length.
2677 
2678  @ingroup gapfunc
2679 */
2680 template<typename T>
2681 unsigned gap_set_value(unsigned val,
2682  T* BMRESTRICT buf,
2683  unsigned pos,
2684  unsigned* BMRESTRICT is_set) BMNOEXCEPT
2685 {
2686  BM_ASSERT(pos < bm::gap_max_bits);
2687 
2688  unsigned curr = bm::gap_bfind(buf, pos, is_set);
2689  T end = (T)(*buf >> 3);
2690  if (*is_set == val)
2691  {
2692  *is_set = 0;
2693  return end;
2694  }
2695  *is_set = 1;
2696 
2697  T* pcurr = buf + curr;
2698  T* pprev = pcurr - 1;
2699  T* pend = buf + end;
2700 
2701  // Special case, first bit GAP operation. There is no platform beside it.
2702  // initial flag must be inverted.
2703  if (!pos)
2704  {
2705  *buf ^= 1;
2706  if (buf[1]) // We need to insert a 1 bit GAP here
2707  {
2708  ::memmove(&buf[2], &buf[1], (end - 1) * sizeof(gap_word_t));
2709  buf[1] = 0;
2710  ++end;
2711  }
2712  else // Only 1 bit in the GAP. We need to delete the first GAP.
2713  {
2714  pprev = buf + 1; pcurr = pprev + 1;
2715  goto copy_gaps;
2716  }
2717  }
2718  else
2719  if (curr > 1 && ((unsigned)(*pprev))+1 == pos) // Left border bit
2720  {
2721  ++(*pprev);
2722  if (*pprev == *pcurr) // Curr. GAP to be merged with prev.GAP.
2723  {
2724  --end;
2725  if (pcurr != pend) // GAP merge: 2 GAPS to be deleted
2726  {
2727  ++pcurr;
2728  copy_gaps:
2729  --end;
2730  do { *pprev++ = *pcurr++; } while (pcurr < pend);
2731  }
2732  }
2733  }
2734  else
2735  if (*pcurr == pos) // Rightmost bit in the GAP. Border goes left.
2736  {
2737  --(*pcurr);
2738  end += (pcurr == pend);
2739  }
2740  else // Worst case: split current GAP
2741  {
2742  if (*pcurr != bm::gap_max_bits-1) // last gap does not need memmove
2743  ::memmove(pcurr+2, pcurr, (end - curr + 1)*(sizeof(T)));
2744  end += 2;
2745  pcurr[0] = (T)(pos-1);
2746  pcurr[1] = (T)pos;
2747  }
2748 
2749  // Set correct length word and last border word
2750  *buf = (T)((*buf & 7) + (end << 3));
2751  buf[end] = bm::gap_max_bits-1;
2752  return end;
2753 }
2754 
2755 /*!
2756  \brief Sets or clears bit in the GAP buffer.
2757 
2758  \param val - new bit value
2759  \param buf - GAP buffer.
2760  \param pos - Index of bit to set.
2761 
2762  \return New GAP buffer length.
2763 
2764  @ingroup gapfunc
2765 */
2766 template<typename T>
2767 unsigned gap_set_value(unsigned val,
2768  T* BMRESTRICT buf,
2769  unsigned pos) BMNOEXCEPT
2770 {
2771  BM_ASSERT(pos < bm::gap_max_bits);
2772  unsigned is_set;
2773  unsigned curr = bm::gap_bfind(buf, pos, &is_set);
2774  T end = (T)(*buf >> 3);
2775  if (is_set == val)
2776  return end;
2777 
2778  T* pcurr = buf + curr;
2779  T* pprev = pcurr - 1;
2780  T* pend = buf + end;
2781 
2782  // Special case, first bit GAP operation. There is no platform beside it.
2783  // initial flag must be inverted.
2784  if (!pos)
2785  {
2786  *buf ^= 1;
2787  if (buf[1]) // We need to insert a 1 bit GAP here
2788  {
2789  ::memmove(&buf[2], &buf[1], (end - 1) * sizeof(gap_word_t));
2790  buf[1] = 0;
2791  ++end;
2792  }
2793  else // Only 1 bit in the GAP. We need to delete the first GAP.
2794  {
2795  pprev = buf + 1; pcurr = pprev + 1;
2796  goto copy_gaps;
2797  }
2798  }
2799  else
2800  if (curr > 1 && ((unsigned)(*pprev))+1 == pos) // Left border bit
2801  {
2802  ++(*pprev);
2803  if (*pprev == *pcurr) // Curr. GAP to be merged with prev.GAP.
2804  {
2805  --end;
2806  if (pcurr != pend) // GAP merge: 2 GAPS to be deleted
2807  {
2808  ++pcurr;
2809  copy_gaps:
2810  --end;
2811  do { *pprev++ = *pcurr++; } while (pcurr < pend);
2812  }
2813  }
2814  }
2815  else
2816  if (*pcurr == pos) // Rightmost bit in the GAP. Border goes left.
2817  {
2818  --(*pcurr);
2819  end += (pcurr == pend);
2820  }
2821  else // Worst case: split current GAP
2822  {
2823  if (*pcurr != bm::gap_max_bits-1) // last gap does not need memmove
2824  ::memmove(pcurr+2, pcurr, (end - curr + 1)*(sizeof(T)));
2825  end += 2;
2826  pcurr[0] = (T)(pos-1);
2827  pcurr[1] = (T)pos;
2828  }
2829 
2830  // Set correct length word and last border word
2831  *buf = (T)((*buf & 7) + (end << 3));
2832  buf[end] = bm::gap_max_bits-1;
2833  return end;
2834 }
2835 
2836 /*!
2837  \brief Add new value to the end of GAP buffer.
2838 
2839  \param buf - GAP buffer.
2840  \param pos - Index of bit to set.
2841 
2842  \return New GAP buffer length.
2843 
2844  @ingroup gapfunc
2845 */
2846 template<typename T>
2847 unsigned gap_add_value(T* buf, unsigned pos) BMNOEXCEPT
2848 {
2849  BM_ASSERT(pos < bm::gap_max_bits);
2850 
2851  T end = (T)(*buf >> 3);
2852  T curr = end;
2853  T* pcurr = buf + end;
2854  T* pend = pcurr;
2855  T* pprev = pcurr - 1;
2856 
2857  // Special case, first bit GAP operation. There is no platform beside it.
2858  // initial flag must be inverted.
2859  if (!pos)
2860  {
2861  *buf ^= 1;
2862  if ( buf[1] ) // We need to insert a 1 bit platform here.
2863  {
2864  ::memmove(&buf[2], &buf[1], (end - 1) * sizeof(gap_word_t));
2865  buf[1] = 0;
2866  ++end;
2867  }
2868  else // Only 1 bit in the GAP. We need to delete the first GAP.
2869  {
2870  pprev = buf + 1; pcurr = pprev + 1;
2871  --end;
2872  do { *pprev++ = *pcurr++; } while (pcurr < pend);
2873  }
2874  }
2875  else if (((unsigned)(*pprev))+1 == pos && (curr > 1) ) // Left border bit
2876  {
2877  ++(*pprev);
2878  if (*pprev == *pcurr) // Curr. GAP to be merged with prev.GAP.
2879  {
2880  --end;
2881  BM_ASSERT(pcurr == pend);
2882  }
2883  }
2884  else if (*pcurr == pos) // Rightmost bit in the GAP. Border goes left.
2885  {
2886  --(*pcurr);
2887  end += (pcurr == pend);
2888  }
2889  else // Worst case we need to split current block.
2890  {
2891  pcurr[0] = (T)(pos-1);
2892  pcurr[1] = (T)pos;
2893  end = (T)(end+2);
2894  }
2895 
2896  // Set correct length word.
2897  *buf = (T)((*buf & 7) + (end << 3));
2898  buf[end] = bm::gap_max_bits - 1;
2899  return end;
2900 }
2901 
2902 #ifdef __GNUG__
2903 #pragma GCC diagnostic pop
2904 #endif
2905 
2906 
2907 /*!
2908  @brief Right shift GAP block by 1 bit
2909  @param buf - block pointer
2910  @param co_flag - carry over from the previous block
2911  @param new_len - output length of the GAP block after the operation
2912 
2913  @return carry over bit (1 or 0)
2914  @ingroup gapfunc
2915 */
2916 template<typename T>
2918  unsigned co_flag, unsigned* BMRESTRICT new_len) BMNOEXCEPT
2919 {
2920  BM_ASSERT(new_len);
2921  bool co;
2922  // 1: increment all GAP values by 1
2923  {
2924  unsigned bitval = *buf & 1;
2925  if (buf[1] == bm::gap_max_bits-1) // full GAP block
2926  {
2927  co = bitval;
2928  }
2929  else
2930  {
2931  unsigned len = (*buf >> 3);
2932  unsigned i = 1;
2933  for (; i < len; ++i)
2934  {
2935  buf[i]++;
2936  bitval ^= 1;
2937  } // for i
2938  BM_ASSERT(buf[i] == bm::gap_max_bits-1);
2939  if (buf[i-1] == bm::gap_max_bits-1) // last element shifts out
2940  {
2941  // Set correct length word
2942  --len;
2943  *buf = (T)((*buf & 7) + (len << 3));
2944  }
2945  co = bitval;
2946  }
2947  }
2948  // set bit position 0 with carry-in flag
2949  {
2950  unsigned is_set;
2951  *new_len = bm::gap_set_value(co_flag, buf, 0, &is_set);
2952  }
2953  return co;
2954 }
2955 
2956 /*!
2957  @brief Left shift GAP block by 1 bit
2958  @param buf - block pointer
2959  @param co_flag - carry over from the previous block
2960  @param new_len - new length of the block
2961 
2962  @return carry over bit (1 or 0)
2963  @ingroup gapfunc
2964 */
2965 template<typename T>
2967  unsigned co_flag, unsigned* BMRESTRICT new_len) BMNOEXCEPT
2968 {
2969  BM_ASSERT(new_len);
2970  unsigned is_set;
2971 
2972  // 1: decrement all GAP values by 1
2973  //
2974  unsigned bitval = *buf & 1;
2975  bool co0 = bitval;
2976 
2977  if (!buf[1]) // cannot decrement (corner case)
2978  {
2979  bitval ^= 1;
2980  *new_len = bm::gap_set_value(bitval, buf, 0, &is_set);
2981 
2982  BM_ASSERT(is_set);
2983  BM_ASSERT(buf[1]);
2984  BM_ASSERT(bitval == unsigned(*buf & 1u));
2985 
2986  if (*new_len == 1)
2987  {
2988  *new_len = bm::gap_set_value(co_flag, buf,
2989  bm::gap_max_bits-1, &is_set);
2990  return co0;
2991  }
2992  }
2993  if (buf[1] != bm::gap_max_bits-1) // full GAP block
2994  {
2995  BM_ASSERT(buf[1]);
2996  unsigned len = (*buf >> 3);
2997  unsigned i = 1;
2998  for (; i < len; ++i)
2999  {
3000  buf[i]--;
3001  bitval ^= 1;
3002  } // for i
3003  BM_ASSERT(buf[i] == bm::gap_max_bits-1);
3004  }
3005  // 2: set last bit position with carry-in flag
3006  //
3007  *new_len = bm::gap_set_value(co_flag, buf, bm::gap_max_bits-1, &is_set);
3008  return co0;
3009 }
3010 
3011 
3012 /*!
3013  \brief Convert array to GAP buffer.
3014 
3015  \param buf - GAP buffer.
3016  \param arr - array of values to set
3017  \param len - length of the array
3018 
3019  \return New GAP buffer length.
3020 
3021  @ingroup gapfunc
3022 */
3023 
3024 template<typename T>
3025 unsigned gap_set_array(T* buf, const T* arr, unsigned len) BMNOEXCEPT
3026 {
3027  *buf = (T)((*buf & 6u) + (1u << 3)); // gap header setup
3028 
3029  T* pcurr = buf + 1;
3030 
3031  unsigned i = 0;
3032  T curr = arr[i];
3033  if (curr != 0) // need to add the first gap: (0 to arr[0]-1)
3034  {
3035  *pcurr = (T)(curr - 1);
3036  ++pcurr;
3037  }
3038  else
3039  {
3040  ++(*buf); // GAP starts with 1
3041  }
3042  T prev = curr;
3043  T acc = prev;
3044 
3045  for (i = 1; i < len; ++i)
3046  {
3047  curr = arr[i];
3048  if (curr == prev + 1)
3049  {
3050  ++acc;
3051  prev = curr;
3052  }
3053  else
3054  {
3055  *pcurr++ = acc;
3056  acc = curr;
3057  *pcurr++ = (T)(curr-1);
3058  }
3059  prev = curr;
3060  }
3061  *pcurr = acc;
3062  if (acc != bm::gap_max_bits - 1)
3063  {
3064  ++pcurr;
3065  *pcurr = bm::gap_max_bits - 1;
3066  }
3067 
3068  unsigned gap_len = unsigned(pcurr - buf);
3069  BM_ASSERT(gap_len == ((gap_len << 3) >> 3));
3070 
3071  *buf = (T)((*buf & 7) + (gap_len << 3));
3072  return gap_len+1;
3073 }
3074 
3075 
3076 //------------------------------------------------------------------------
3077 
3078 /**
3079  \brief Compute number of GAPs in bit-array
3080  \param arr - array of BITs
3081  \param len - array length
3082 
3083  @ingroup gapfunc
3084 */
3085 template<typename T>
3086 unsigned bit_array_compute_gaps(const T* arr, unsigned len) BMNOEXCEPT
3087 {
3088  unsigned gap_count = 1;
3089  T prev = arr[0];
3090  if (prev > 0)
3091  ++gap_count;
3092  for (unsigned i = 1; i < len; ++i)
3093  {
3094  T curr = arr[i];
3095  if (curr != prev + 1)
3096  {
3097  gap_count += 2;
3098  }
3099  prev = curr;
3100  }
3101  return gap_count;
3102 }
3103 
3104 
3105 //------------------------------------------------------------------------
3106 
3107 /**
3108  \brief Searches for the next 1 bit in the GAP block
3109  \param buf - GAP buffer
3110  \param nbit - bit index to start checking from.
3111  \param prev - returns previously checked value
3112 
3113  \return 0 if not found
3114 
3115  @ingroup gapfunc
3116 */
3117 template<typename T>
3118 unsigned gap_block_find(const T* BMRESTRICT buf,
3119  unsigned nbit,
3121 {
3122  BM_ASSERT(nbit < bm::gap_max_bits);
3123 
3124  unsigned bitval;
3125  unsigned gap_idx = bm::gap_bfind(buf, nbit, &bitval);
3126 
3127  if (bitval) // positive block.
3128  {
3129  *prev = nbit;
3130  return 1u;
3131  }
3132  unsigned val = buf[gap_idx] + 1;
3133  *prev = val;
3134  return (val != bm::gap_max_bits); // no bug here.
3135 }
3136 
3137 //------------------------------------------------------------------------
3138 
3139 
3140 /*!
3141  \brief Set 1 bit in a block
3142  @ingroup bitfunc
3143 */
3145 void set_bit(unsigned* dest, unsigned bitpos) BMNOEXCEPT
3146 {
3147  unsigned nbit = unsigned(bitpos & bm::set_block_mask);
3148  unsigned nword = unsigned(nbit >> bm::set_word_shift);
3149  nbit &= bm::set_word_mask;
3150  dest[nword] |= unsigned(1u << nbit);
3151 }
3152 
3153 /*!
3154  \brief Set 1 bit in a block
3155  @ingroup bitfunc
3156 */
3158 void clear_bit(unsigned* dest, unsigned bitpos) BMNOEXCEPT
3159 {
3160  unsigned nbit = unsigned(bitpos & bm::set_block_mask);
3161  unsigned nword = unsigned(nbit >> bm::set_word_shift);
3162  nbit &= bm::set_word_mask;
3163  dest[nword] &= ~(unsigned(1u << nbit));
3164 }
3165 
3166 /*!
3167  \brief Test 1 bit in a block
3168 
3169  @ingroup bitfunc
3170 */
3172 unsigned test_bit(const unsigned* block, unsigned bitpos) BMNOEXCEPT
3173 {
3174  unsigned nbit = unsigned(bitpos & bm::set_block_mask);
3175  unsigned nword = unsigned(nbit >> bm::set_word_shift);
3176  nbit &= bm::set_word_mask;
3177  return (block[nword] >> nbit) & 1u;
3178 }
3179 
3180 
3181 /*!
3182  \brief Sets bits to 1 in the bitblock.
3183  \param dest - Bitset buffer.
3184  \param bitpos - Offset of the start bit.
3185  \param bitcount - number of bits to set.
3186 
3187  @ingroup bitfunc
3188 */
3189 inline
3190 void or_bit_block(unsigned* dest, unsigned bitpos, unsigned bitcount) BMNOEXCEPT
3191 {
3192  const unsigned maskFF = ~0u;
3193 
3194  dest += unsigned(bitpos >> bm::set_word_shift); // nword
3195  bitpos &= bm::set_word_mask;
3196 
3197  if (bitcount == 1u) // special case (only 1 bit to set)
3198  {
3199  *dest |= (1u << bitpos);
3200  return;
3201  }
3202 
3203  if (bitpos) // starting pos is not aligned
3204  {
3205  unsigned mask_r = maskFF << bitpos;
3206  unsigned right_margin = bitpos + bitcount;
3207  if (right_margin < 32)
3208  {
3209  *dest |= (maskFF >> (32 - right_margin)) & mask_r;
3210  return;
3211  }
3212  *dest++ |= mask_r;
3213  bitcount -= 32 - bitpos;
3214  }
3215  for ( ;bitcount >= 64; bitcount-=64, dest+=2)
3216  dest[0] = dest[1] = maskFF;
3217  if (bitcount >= 32)
3218  {
3219  *dest++ = maskFF; bitcount -= 32;
3220  }
3221  if (bitcount)
3222  {
3223  *dest |= maskFF >> (32 - bitcount);
3224  }
3225 }
3226 
3227 
3228 /*!
3229  \brief SUB (AND NOT) bit interval to 1 in the bitblock.
3230  \param dest - Bitset buffer.
3231  \param bitpos - Offset of the start bit.
3232  \param bitcount - number of bits to set.
3233 
3234  @ingroup bitfunc
3235 */
3236 inline
3237 void sub_bit_block(unsigned* dest, unsigned bitpos, unsigned bitcount) BMNOEXCEPT
3238 {
3239  const unsigned maskFF = ~0u;
3240 
3241  dest += unsigned(bitpos >> bm::set_word_shift); // nword
3242  bitpos &= bm::set_word_mask;
3243 
3244  if (bitcount == 1u) // special case (only 1 bit to set)
3245  {
3246  *dest &= ~(1u << bitpos);
3247  return;
3248  }
3249 
3250  if (bitpos) // starting pos is not aligned
3251  {
3252  unsigned mask_r = maskFF << bitpos;
3253  unsigned right_margin = bitpos + bitcount;
3254  if (right_margin < 32)
3255  {
3256  *dest &= ~((maskFF >> (32 - right_margin)) & mask_r);
3257  return;
3258  }
3259  *dest++ &= ~mask_r;
3260  bitcount -= 32 - bitpos;
3261  }
3262  for ( ;bitcount >= 64; bitcount-=64, dest+=2)
3263  dest[0] = dest[1] = 0u;
3264  if (bitcount >= 32)
3265  {
3266  *dest++ = 0u; bitcount -= 32;
3267  }
3268  if (bitcount)
3269  {
3270  *dest &= ~(maskFF >> (32 - bitcount));
3271  }
3272 }
3273 
3274 
3275 
3276 /*!
3277  \brief XOR bit interval to 1 in the bitblock.
3278  \param dest - Bitset buffer.
3279  \param bitpos - Offset of the start bit.
3280  \param bitcount - number of bits to set.
3281 
3282  @ingroup bitfunc
3283 */
3284 inline void xor_bit_block(unsigned* dest,
3285  unsigned bitpos,
3286  unsigned bitcount) BMNOEXCEPT
3287 {
3288  unsigned nbit = unsigned(bitpos & bm::set_block_mask);
3289  unsigned nword = unsigned(nbit >> bm::set_word_shift);
3290  nbit &= bm::set_word_mask;
3291 
3292  bm::word_t* word = dest + nword;
3293 
3294  if (bitcount == 1) // special case (only 1 bit to set)
3295  {
3296  *word ^= unsigned(1 << nbit);
3297  return;
3298  }
3299 
3300  if (nbit) // starting position is not aligned
3301  {
3302  unsigned right_margin = nbit + bitcount;
3303 
3304  // here we checking if we setting bits only in the current
3305  // word. Example: 00111000000000000000000000000000 (32 bits word)
3306 
3307  if (right_margin < 32)
3308  {
3309  unsigned mask =
3311  bm::block_set_table<true>::_left[right_margin-1];
3312  *word ^= mask;
3313  return;
3314  }
3315  *word ^= block_set_table<true>::_right[nbit];
3316  bitcount -= 32 - nbit;
3317  ++word;
3318  }
3319  for ( ;bitcount >= 64; bitcount-=64, word+=2)
3320  {
3321  word[0] ^= ~0u; word[1] ^= ~0u;
3322  }
3323  if (bitcount >= 32)
3324  {
3325  *word++ ^= ~0u; bitcount -= 32;
3326  }
3327  if (bitcount)
3328  *word ^= block_set_table<true>::_left[bitcount-1];
3329 }
3330 
3331 
3332 /*!
3333  \brief SUB (AND NOT) GAP block to bitblock.
3334  \param dest - bitblock buffer pointer.
3335  \param pcurr - GAP buffer pointer.
3336 
3337  @ingroup gapfunc
3338 */
3339 template<typename T>
3340 void gap_sub_to_bitset(unsigned* BMRESTRICT dest,
3341  const T* BMRESTRICT pcurr) BMNOEXCEPT
3342 {
3343  BM_ASSERT(dest && pcurr);
3344 
3345  const T* pend = pcurr + (*pcurr >> 3);
3346  if (*pcurr & 1) // Starts with 1
3347  {
3348  bm::sub_bit_block(dest, 0, 1 + pcurr[1]);
3349  ++pcurr;
3350  }
3351  for (pcurr += 2; pcurr <= pend; pcurr += 2)
3352  {
3353  BM_ASSERT(*pcurr > pcurr[-1]);
3354  bm::sub_bit_block(dest, 1 + pcurr[-1], *pcurr - pcurr[-1]);
3355  }
3356 }
3357 
3358 
3359 /*!
3360  \brief SUB (AND NOT) GAP block to bitblock with digest assist
3361 
3362  \param dest - bitblock buffer pointer.
3363  \param pcurr - GAP buffer pointer.
3364  \param digest0 - digest of 0 strides inside bit block
3365 
3366  @ingroup gapfunc
3367 */
3368 template<typename T>
3369 void gap_sub_to_bitset(unsigned* BMRESTRICT dest,
3370  const T* BMRESTRICT pcurr, bm::id64_t digest0) BMNOEXCEPT
3371 {
3372  BM_ASSERT(dest && pcurr);
3373 
3374  const T* pend = pcurr + (*pcurr >> 3);
3375  if (*pcurr & 1) // Starts with 1
3376  {
3377  bool all_zero = bm::check_zero_digest(digest0, 0, pcurr[1]+1);
3378  if (!all_zero)
3379  bm::sub_bit_block(dest, 0, pcurr[1] + 1); // (not AND) - SUB [0] gaps
3380  pcurr += 3;
3381  }
3382  else
3383  pcurr += 2;
3384 
3385  // wind forward to digest start
3386  {
3387  unsigned tz = bm::count_trailing_zeros_u64(digest0);
3388  unsigned start_pos = tz << set_block_digest_pos_shift;
3389  for (; pcurr <= pend; pcurr += 2) // now we are in GAP "0"
3390  {
3391  if (*pcurr >= start_pos)
3392  break;
3393  }
3394  }
3395 
3396  unsigned lz = bm::count_leading_zeros_u64(digest0);
3397  unsigned stop_pos = (64u - lz) << set_block_digest_pos_shift;
3398 
3399  unsigned bc, pos;
3400  T prev;
3401  for (; pcurr <= pend; pcurr += 2) // now we are in GAP "1" again
3402  {
3403  BM_ASSERT(*pcurr > *(pcurr-1));
3404  prev = pcurr[-1];
3405  bc = *pcurr - prev;
3406  pos = 1u + prev;
3407 
3408  bool all_zero = bm::check_zero_digest(digest0, prev, *pcurr);
3409  if (!all_zero)
3410  bm::sub_bit_block(dest, pos, bc);
3411 
3412  if (pos > stop_pos)
3413  break; // early break is possible based on digest tail
3414 
3415  } // for
3416 }
3417 
3418 
3419 
3420 /*!
3421  \brief XOR GAP block to bitblock.
3422  \param dest - bitblock buffer pointer.
3423  \param pcurr - GAP buffer pointer.
3424 
3425  @ingroup gapfunc
3426 */
3427 template<typename T>
3428 void gap_xor_to_bitset(unsigned* BMRESTRICT dest,
3429  const T* BMRESTRICT pcurr) BMNOEXCEPT
3430 {
3431  BM_ASSERT(dest && pcurr);
3432 
3433  const T* pend = pcurr + (*pcurr >> 3);
3434  if (*pcurr & 1) // Starts with 1
3435  {
3436  bm::xor_bit_block(dest, 0, 1 + pcurr[1]);
3437  ++pcurr;
3438  }
3439  for (pcurr += 2; pcurr <= pend; pcurr += 2)
3440  {
3441  BM_ASSERT(*pcurr > pcurr[-1]);
3442  bm::xor_bit_block(dest, 1 + pcurr[-1], *pcurr - pcurr[-1]);
3443  }
3444 }
3445 
3446 
3447 /*!
3448  \brief Adds(OR) GAP block to bitblock.
3449  \param dest - bitblock buffer pointer.
3450  \param pcurr - GAP buffer pointer.
3451  \param len - gap length
3452 
3453  @ingroup gapfunc
3454 */
3455 template<typename T>
3456 void gap_add_to_bitset(unsigned* BMRESTRICT dest,
3457  const T* BMRESTRICT pcurr, unsigned len) BMNOEXCEPT
3458 {
3459  BM_ASSERT(dest && pcurr);
3460 
3461  const T* pend = pcurr + len;
3462  if (*pcurr & 1) // Starts with 1
3463  {
3464  bm::or_bit_block(dest, 0, 1 + pcurr[1]);
3465  pcurr += 3;
3466  }
3467  else
3468  pcurr += 2;
3469 
3470  unsigned bc, pos;
3471  for (; pcurr <= pend; )
3472  {
3473  BM_ASSERT(*pcurr > pcurr[-1]);
3474  pos = 1u + pcurr[-1];
3475  bc = *pcurr - pcurr[-1];
3476  pcurr += 2;
3477  bm::or_bit_block(dest, pos, bc);
3478  }
3479 }
3480 
3481 
3482 /*!
3483  \brief Adds(OR) GAP block to bitblock.
3484  \param dest - bitblock buffer pointer.
3485  \param pcurr - GAP buffer pointer.
3486 
3487  @ingroup gapfunc
3488 */
3489 template<typename T>
3490 void gap_add_to_bitset(unsigned* BMRESTRICT dest,
3491  const T* BMRESTRICT pcurr) BMNOEXCEPT
3492 {
3493  unsigned len = (*pcurr >> 3);
3494  gap_add_to_bitset(dest, pcurr, len);
3495 }
3496 
3497 
3498 /*!
3499  \brief ANDs GAP block to bitblock.
3500  \param dest - bitblock buffer pointer.
3501  \param pcurr - GAP buffer pointer.
3502 
3503  @ingroup gapfunc
3504 */
3505 template<typename T>
3506 void gap_and_to_bitset(unsigned* BMRESTRICT dest,
3507  const T* BMRESTRICT pcurr) BMNOEXCEPT
3508 {
3509  BM_ASSERT(dest && pcurr);
3510 
3511  const T* pend = pcurr + (*pcurr >> 3);
3512  if (!(*pcurr & 1) ) // Starts with 0
3513  {
3514  bm::sub_bit_block(dest, 0, pcurr[1] + 1); // (not AND) - SUB [0] gaps
3515  pcurr += 3;
3516  }
3517  else
3518  pcurr += 2;
3519 
3520  unsigned bc, pos;
3521  for (; pcurr <= pend; ) // now we are in GAP "0" again
3522  {
3523  BM_ASSERT(*pcurr > *(pcurr-1));
3524  pos = 1u + pcurr[-1];
3525  bc = *pcurr - pcurr[-1];
3526  pcurr += 2;
3527  bm::sub_bit_block(dest, pos, bc);
3528  }
3529 }
3530 
3531 
3532 /*!
3533  \brief ANDs GAP block to bitblock with digest assist
3534  \param dest - bitblock buffer pointer.
3535  \param pcurr - GAP buffer pointer.
3536  \param digest0 - digest of 0 strides for the destination
3537 
3538  @ingroup gapfunc
3539 */
3540 template<typename T>
3541 void gap_and_to_bitset(unsigned* BMRESTRICT dest,
3542  const T* BMRESTRICT pcurr, bm::id64_t digest0) BMNOEXCEPT
3543 {
3544  BM_ASSERT(dest && pcurr);
3545  if (!digest0)
3546  return;
3547 
3548  const T* pend = pcurr + (*pcurr >> 3);
3549  if (!(*pcurr & 1) ) // Starts with 0
3550  {
3551  bool all_zero = bm::check_zero_digest(digest0, 0, pcurr[1]+1);
3552  if (!all_zero)
3553  bm::sub_bit_block(dest, 0, pcurr[1] + 1); // (not AND) - SUB [0] gaps
3554  pcurr += 3;
3555  }
3556  else
3557  pcurr += 2;
3558 
3559  // wind forward to digest start
3560  {
3561  unsigned tz = bm::count_trailing_zeros_u64(digest0);
3562  unsigned start_pos = tz << set_block_digest_pos_shift;
3563  for (; pcurr <= pend; pcurr += 2) // now we are in GAP "0"
3564  {
3565  if (*pcurr >= start_pos)
3566  break;
3567  }
3568  }
3569 
3570  unsigned lz = bm::count_leading_zeros_u64(digest0);
3571  unsigned stop_pos = (64u - lz) << set_block_digest_pos_shift;
3572 
3573  unsigned bc, pos;
3574  T prev;
3575  for (; pcurr <= pend; pcurr += 2) // now we are in GAP "0" again
3576  {
3577  BM_ASSERT(*pcurr > *(pcurr-1));
3578 
3579  prev = pcurr[-1];
3580  bc = *pcurr - prev;
3581  pos = 1u + prev;
3582 
3583  bool all_zero = bm::check_zero_digest(digest0, prev, *pcurr);
3584  if (!all_zero)
3585  bm::sub_bit_block(dest, pos, bc);
3586 
3587  if (pos > stop_pos) // early break is possible based on digest tail
3588  break;
3589 
3590  } // for
3591 }
3592 
3593 
3594 /*!
3595  \brief Compute bitcount of bit block AND masked by GAP block
3596  \param block - bitblock buffer pointer
3597  \param pcurr - GAP buffer pointer
3598  \return bitcount - cardinality of the AND product
3599 
3600  @ingroup gapfunc
3601 */
3602 template<typename T>
3604  const T* BMRESTRICT pcurr) BMNOEXCEPT
3605 {
3606  BM_ASSERT(block);
3607  const T* pend = pcurr + (*pcurr >> 3);
3608  bm::id_t count = 0;
3609  if (*pcurr & 1) // Starts with 1
3610  {
3611  count += bm::bit_block_calc_count_range(block, 0, pcurr[1]);
3612  ++pcurr;
3613  }
3614  for (pcurr +=2 ;pcurr <= pend; pcurr += 2)
3615  {
3616  count += bm::bit_block_calc_count_range(block, pcurr[-1]+1, *pcurr);
3617  }
3618  return count;
3619 }
3620 
3621 
3622 /*!
3623  \brief Bitcount test of bit block AND masked by GAP block.
3624  \param block - bitblock buffer pointer
3625  \param pcurr - GAP buffer pointer
3626  \return non-zero value if AND produces any result
3627 
3628  @ingroup gapfunc
3629 */
3630 template<typename T>
3632  const T* BMRESTRICT pcurr) BMNOEXCEPT
3633 {
3634  BM_ASSERT(block);
3635 
3636  const T* pend = pcurr + (*pcurr >> 3);
3637  bm::id_t count = 0;
3638  if (*pcurr & 1) // Starts with 1
3639  {
3640  count = bm::bit_block_any_range(block, 0, pcurr[1]);
3641  ++pcurr;
3642  }
3643  for (pcurr +=2 ;!count && pcurr <= pend; pcurr += 2)
3644  {
3645  count = bm::bit_block_any_range(block, pcurr[-1]+1, *pcurr);
3646  }
3647  return count;
3648 }
3649 
3650 
3651 
3652 /*!
3653  \brief Compute bitcount of bit block SUB masked by GAP block.
3654  \param block - bitblock buffer pointer.
3655  \param buf - GAP buffer pointer.
3656  \return bit-count result of AND NOT operation
3657 
3658  @ingroup gapfunc
3659 */
3660 template<typename T>
3662  const T* BMRESTRICT buf) BMNOEXCEPT
3663 {
3664  BM_ASSERT(block);
3665 
3666  const T* pcurr = buf;
3667  const T* pend = pcurr + (*pcurr >> 3);
3668  ++pcurr;
3669 
3670  bm::id_t count = 0;
3671 
3672  if (!(*buf & 1)) // Starts with 0
3673  {
3674  count += bit_block_calc_count_range(block, 0, *pcurr);
3675  ++pcurr;
3676  }
3677  ++pcurr; // now we are in GAP "0" again
3678 
3679  for (;pcurr <= pend; pcurr+=2)
3680  {
3681  count += bm::bit_block_calc_count_range(block, *(pcurr-1)+1, *pcurr);
3682  }
3683  return count;
3684 }
3685 
3686 
3687 /*!
3688  \brief Compute bitcount test of bit block SUB masked by GAP block
3689  \param block - bitblock buffer pointer
3690  \param buf - GAP buffer pointer
3691  \return non-zero value if AND NOT produces any 1 bits
3692 
3693  @ingroup gapfunc
3694 */
3695 template<typename T>
3697  const T* BMRESTRICT buf) BMNOEXCEPT
3698 {
3699  BM_ASSERT(block);
3700 
3701  const T* pcurr = buf;
3702  const T* pend = pcurr + (*pcurr >> 3);
3703  ++pcurr;
3704 
3705  bm::id_t count = 0;
3706 
3707  if (!(*buf & 1)) // Starts with 0
3708  {
3709  count += bit_block_any_range(block, 0, *pcurr);
3710  if (count)
3711  return count;
3712  ++pcurr;
3713  }
3714  ++pcurr; // now we are in GAP "0" again
3715 
3716  for (; !count && pcurr <= pend; pcurr+=2)
3717  {
3718  count += bm::bit_block_any_range(block, *(pcurr-1)+1, *pcurr);
3719  }
3720  return count;
3721 }
3722 
3723 
3724 
3725 /*!
3726  \brief Compute bitcount of bit block XOR masked by GAP block
3727  \param block - bitblock buffer pointer
3728  \param buf - GAP buffer pointer
3729  \return bit count value of XOR operation
3730 
3731  @ingroup gapfunc
3732 */
3733 template<typename T>
3735  const T* BMRESTRICT buf) BMNOEXCEPT
3736 {
3737  BM_ASSERT(block);
3738 
3739  const T* pcurr = buf;
3740  const T* pend = pcurr + (*pcurr >> 3);
3741  ++pcurr;
3742 
3743  unsigned bitval = *buf & 1;
3744 
3745  bm::id_t count = bm::bit_block_calc_count_range(block, 0, *pcurr);
3746  if (bitval)
3747  {
3748  count = *pcurr + 1 - count;
3749  }
3750 
3751  for (bitval^=1, ++pcurr; pcurr <= pend; bitval^=1, ++pcurr)
3752  {
3753  T prev = (T)(*(pcurr-1)+1);
3754  bm::id_t c = bit_block_calc_count_range(block, prev, *pcurr);
3755 
3756  if (bitval) // 1 gap; means Result = Total_Bits - BitCount;
3757  c = (*pcurr - prev + 1) - c;
3758  count += c;
3759  }
3760  return count;
3761 }
3762 
3763 /*!
3764  \brief Compute bitcount test of bit block XOR masked by GAP block.
3765  \param block - bitblock buffer pointer
3766  \param buf - GAP buffer pointer
3767  \return non-zero value if XOR returns anything
3768 
3769  @ingroup gapfunc
3770 */
3771 template<typename T>
3773  const T* BMRESTRICT buf) BMNOEXCEPT
3774 {
3775  BM_ASSERT(block);
3776 
3777  const T* pcurr = buf;
3778  const T* pend = pcurr + (*pcurr >> 3);
3779  ++pcurr;
3780 
3781  unsigned bitval = *buf & 1;
3782 
3783  bm::id_t count = bit_block_any_range(block, 0, *pcurr);
3784  if (bitval)
3785  count = *pcurr + 1 - count;
3786 
3787  for (bitval^=1, ++pcurr; !count && pcurr <= pend; bitval^=1, ++pcurr)
3788  {
3789  T prev = (T)(*(pcurr-1)+1);
3790  bm::id_t c = bit_block_any_range(block, prev, *pcurr);
3791 
3792  if (bitval) // 1 gap; means Result = Total_Bits - BitCount;
3793  c = (*pcurr - prev + 1) - c;
3794  count += c;
3795  }
3796  return count;
3797 }
3798 
3799 
3800 
3801 /*!
3802  \brief Compute bitcount of bit block OR masked by GAP block.
3803  \param block - bitblock buffer pointer.
3804  \param buf - GAP buffer pointer.
3805  \return bit count of OR
3806 
3807  @ingroup gapfunc
3808 */
3809 template<typename T>
3811  const T* BMRESTRICT buf) BMNOEXCEPT
3812 {
3813  BM_ASSERT(block);
3814  const T* pcurr = buf;
3815  const T* pend = pcurr + (*pcurr >> 3);
3816  ++pcurr;
3817 
3818  unsigned bitval = *buf & 1;
3819 
3820  bm::id_t count = bitval ? *pcurr + 1
3821  : bm::bit_block_calc_count_range(block, 0, *pcurr);
3822  for (bitval^=1, ++pcurr; pcurr <= pend; bitval^=1, ++pcurr)
3823  {
3824  T prev = (T)(*(pcurr-1)+1);
3825  bm::id_t c =
3826  bitval ? (*pcurr - prev + 1)
3827  : bm::bit_block_calc_count_range(block, prev, *pcurr);
3828  count += c;
3829  }
3830  return count;
3831 }
3832 
3833 /*!
3834  \brief Compute bitcount test of bit block OR masked by GAP block
3835  \param block - bitblock buffer pointer
3836  \param buf - GAP buffer pointer
3837  \return non zero value if union (OR) returns anything
3838 
3839  @ingroup gapfunc
3840 */
3841 template<typename T>
3842 bm::id_t gap_bitset_or_any(const unsigned* BMRESTRICT block,
3843  const T* BMRESTRICT buf) BMNOEXCEPT
3844 {
3845  bool b = !bm::gap_is_all_zero(buf) ||
3846  !bm::bit_is_all_zero(block);
3847  return b;
3848 }
3849 
3850 
3851 
3852 /*!
3853  \brief Bitblock memset operation.
3854 
3855  \param dst - destination block.
3856  \param value - value to set.
3857 
3858  @ingroup bitfunc
3859 */
3860 inline
3862 {
3863 #ifdef BMVECTOPT
3864  VECT_SET_BLOCK(dst, value);
3865 #else
3866  ::memset(dst, int(value), bm::set_block_size * sizeof(bm::word_t));
3867 #endif
3868 }
3869 
3870 
3871 /*!
3872  \brief GAP block to bitblock conversion.
3873  \param dest - bitblock buffer pointer.
3874  \param buf - GAP buffer pointer.
3875 
3876  @ingroup gapfunc
3877 */
3878 template<typename T>
3879 void gap_convert_to_bitset(unsigned* BMRESTRICT dest,
3880  const T* BMRESTRICT buf) BMNOEXCEPT
3881 {
3882  bm::bit_block_set(dest, 0);
3883  bm::gap_add_to_bitset(dest, buf);
3884 }
3885 
3886 
3887 
3888 /*!
3889  \brief Smart GAP block to bitblock conversion.
3890 
3891  Checks if GAP block is ALL-ZERO or ALL-ON. In those cases returns
3892  pointer on special static bitblocks.
3893 
3894  \param dest - bitblock buffer pointer.
3895  \param buf - GAP buffer pointer.
3896  \param set_max - max possible bitset length
3897 
3898  @ingroup gapfunc
3899 */
3900 template<typename T>
3901 unsigned* gap_convert_to_bitset_smart(unsigned* BMRESTRICT dest,
3902  const T* BMRESTRICT buf,
3903  id_t set_max) BMNOEXCEPT
3904 {
3905  if (buf[1] == set_max - 1)
3906  return (buf[0] & 1) ? FULL_BLOCK_REAL_ADDR : 0;
3907  bm::gap_convert_to_bitset(dest, buf);
3908  return dest;
3909 }
3910 
3911 
3912 /*!
3913  \brief Calculates sum of all words in GAP block. (For debugging purposes)
3914  \note For debugging and testing ONLY.
3915  \param buf - GAP buffer pointer.
3916  \return Sum of all words.
3917 
3918  @ingroup gapfunc
3919  @internal
3920 */
3921 template<typename T>
3922 unsigned gap_control_sum(const T* buf) BMNOEXCEPT
3923 {
3924  unsigned end = *buf >> 3;
3925 
3926  const T* pcurr = buf;
3927  const T* pend = pcurr + (*pcurr >> 3);
3928  ++pcurr;
3929 
3930  if (*buf & 1) // Starts with 1
3931  {
3932  ++pcurr;
3933  }
3934  ++pcurr; // now we are in GAP "1" again
3935  while (pcurr <= pend)
3936  {
3937  BM_ASSERT(*pcurr > *(pcurr-1));
3938  pcurr += 2;
3939  }
3940  return buf[end];
3941 }
3942 
3943 
3944 /*!
3945  \brief Sets all bits to 0 or 1 (GAP)
3946  \param buf - GAP buffer pointer.
3947  \param set_max - max possible bitset length
3948  \param value - value to set
3949 
3950  @ingroup gapfunc
3951 */
3952 template<class T>
3953 void gap_set_all(T* buf, unsigned set_max, unsigned value) BMNOEXCEPT
3954 {
3955  BM_ASSERT(value == 0 || value == 1);
3956  *buf = (T)((*buf & 6u) + (1u << 3) + value);
3957  *(++buf) = (T)(set_max - 1);
3958 }
3959 
3960 
3961 /*!
3962  \brief Init gap block so it has block in it (can be whole block)
3963  \param buf - GAP buffer pointer.
3964  \param from - one block start
3965  \param to - one block end
3966  \param value - (block value)1 or 0
3967 
3968  @ingroup gapfunc
3969 */
3970 template<class T>
3972  T from,
3973  T to,
3974  T value) BMNOEXCEPT
3975 {
3976  BM_ASSERT(value == 0 || value == 1);
3977  const unsigned set_max = bm::bits_in_block;
3978 
3979  unsigned gap_len;
3980  if (from == 0)
3981  {
3982  if (to == set_max - 1)
3983  {
3984  bm::gap_set_all(buf, set_max, value);
3985  }
3986  else
3987  {
3988  gap_len = 2;
3989  buf[1] = (T)to;
3990  buf[2] = (T)(set_max - 1);
3991  buf[0] = (T)((*buf & 6u) + (gap_len << 3) + value);
3992  }
3993  return;
3994  }
3995  // from != 0
3996 
3997  value = !value;
3998  if (to == set_max - 1)
3999  {
4000  gap_len = 2;
4001  buf[1] = (T)(from - 1);
4002  buf[2] = (T)(set_max - 1);
4003  }
4004  else
4005  {
4006  gap_len = 3;
4007  buf[1] = (T) (from - 1);
4008  buf[2] = (T) to;
4009  buf[3] = (T)(set_max - 1);
4010  }
4011  buf[0] = (T)((*buf & 6u) + (gap_len << 3) + value);
4012 }
4013 
4014 
4015 /*!
4016  \brief Inverts all bits in the GAP buffer.
4017  \param buf - GAP buffer pointer.
4018 
4019  @ingroup gapfunc
4020 */
4021 template<typename T> void gap_invert(T* buf) BMNOEXCEPT
4022 {
4023  *buf ^= 1;
4024 }
4025 
4026 
4027 #ifdef __GNUG__
4028 #pragma GCC diagnostic push
4029 #pragma GCC diagnostic ignored "-Wconversion"
4030 #endif
4031 
4032 /*!
4033  \brief Sets GAP block capacity level.
4034  \param buf - GAP buffer pointer.
4035  \param level new GAP block capacity level.
4036 
4037  @ingroup gapfunc
4038 */
4039 template<typename T>
4040 void set_gap_level(T* buf, int level) BMNOEXCEPT
4041 {
4042  BM_ASSERT(level >= 0);
4043  BM_ASSERT(unsigned(level) < bm::gap_levels);
4044 
4045  *buf = (T)(((level & 3) << 1) | (*buf & 1) | (*buf & ~7));
4046 }
4047 #ifdef __GNUG__
4048 #pragma GCC diagnostic pop
4049 #endif
4050 
4051 
4052 
4053 /*!
4054  \brief Calculates GAP block capacity level.
4055  \param len - GAP buffer length.
4056  \param glevel_len - GAP lengths table
4057  \return GAP block capacity level.
4058  -1 if block does not fit any level.
4059  @ingroup gapfunc
4060 */
4061 template<typename T>
4062 int gap_calc_level(unsigned len, const T* glevel_len) BMNOEXCEPT
4063 {
4064  if (len <= unsigned(glevel_len[0]-4)) return 0;
4065  if (len <= unsigned(glevel_len[1]-4)) return 1;
4066  if (len <= unsigned(glevel_len[2]-4)) return 2;
4067  if (len <= unsigned(glevel_len[3]-4)) return 3;
4068 
4069  BM_ASSERT(bm::gap_levels == 4);
4070  return -1;
4071 }
4072 
4073 /*! @brief Returns number of free elements in GAP block array.
4074  Difference between GAP block capacity on this level and actual GAP length.
4075 
4076  @param buf - GAP buffer pointer
4077  @param glevel_len - GAP lengths table
4078 
4079  @return Number of free GAP elements
4080  @ingroup gapfunc
4081 */
4082 template<typename T>
4083 inline unsigned gap_free_elements(const T* BMRESTRICT buf,
4084  const T* BMRESTRICT glevel_len) BMNOEXCEPT
4085 {
4086  unsigned len = bm::gap_length(buf);
4087  unsigned capacity = bm::gap_capacity(buf, glevel_len);
4088  return capacity - len;
4089 }
4090 
4091 /*!
4092  \brief Lexicographical comparison of BIT buffers.
4093  \param buf1 - First buffer pointer.
4094  \param buf2 - Second buffer pointer.
4095  \param len - Buffer length in elements (T).
4096  \return <0 - less, =0 - equal, >0 - greater.
4097 
4098  @ingroup bitfunc
4099 */
4100 template<typename T>
4101 int bitcmp(const T* buf1, const T* buf2, unsigned len) BMNOEXCEPT
4102 {
4103  BM_ASSERT(len);
4104  const T* pend1 = buf1 + len;
4105  do
4106  {
4107  T w1 = *buf1++;
4108  T w2 = *buf2++;
4109  T diff = w1 ^ w2;
4110  if (diff)
4111  return (w1 & diff & -diff) ? 1 : -1;
4112  } while (buf1 < pend1);
4113  return 0;
4114 }
4115 
4116 /*!
4117  \brief Find first bit which is different between two bit-blocks
4118  \param blk1 - block 1
4119  \param blk2 - block 2
4120  \param pos - out - position of difference (undefined if blocks are equal)
4121  \return true if difference was found
4122 
4123  @ingroup bitfunc
4124 */
4125 inline
4127  const bm::word_t* BMRESTRICT blk2,
4128  unsigned* BMRESTRICT pos) BMNOEXCEPT
4129 {
4130  BM_ASSERT(blk1 && blk2 && pos);
4131 #ifdef VECT_BIT_FIND_DIFF
4132  bool f = VECT_BIT_FIND_DIFF(blk1, blk2, pos);
4133  return f;
4134 #else
4135 #ifdef BM64OPT
4136  BM_ASSERT(sizeof(bm::wordop_t) == 8);
4137 
4138  const bm::wordop_t* b1 = (const bm::wordop_t*) blk1;
4139  const bm::wordop_t* b2 = (const bm::wordop_t*) blk2;
4140 
4141  for (unsigned i = 0; i < bm::set_block_size/2; ++i)
4142  {
4143  bm::wordop_t w1 = b1[i]; bm::wordop_t w2 = b2[i];
4144  bm::wordop_t diff = w1 ^ w2;
4145  if (diff)
4146  {
4147  unsigned idx = bm::count_trailing_zeros_u64(diff);
4148  *pos = unsigned(idx + (i * 8u * unsigned(sizeof(bm::wordop_t))));
4149  return true;
4150  }
4151  } // for
4152 #else
4153  for (unsigned i = 0; i < bm::set_block_size; ++i)
4154  {
4155  bm::word_t w1 = blk1[i]; bm::word_t w2 = blk2[i];
4156  bm::word_t diff = w1 ^ w2;
4157  if (diff)
4158  {
4159  unsigned idx = bm::bit_scan_forward32(diff); // trailing zeros
4160  *pos = unsigned(idx + (i * 8u * sizeof(bm::word_t)));
4161  return true;
4162  }
4163  } // for
4164 #endif
4165 #endif
4166  return false;
4167 }
4168 
4169 
4170 #ifndef BMAVX2OPT
4171 
4172 /*!
4173  \brief Converts bit block to GAP.
4174  \param dest - Destinatio GAP buffer.
4175  \param block - Source bitblock buffer.
4176  \param dest_len - length of the destination buffer.
4177  \return New length of GAP block or 0 if conversion failed
4178  (insufficicent space).
4179 
4180  @ingroup gapfunc
4181 */
4182 inline
4184  const unsigned* BMRESTRICT block,
4185  unsigned dest_len) BMNOEXCEPT
4186 {
4187  const unsigned* BMRESTRICT block_end = block + bm::set_block_size;
4188  gap_word_t* BMRESTRICT pcurr = dest;
4189  gap_word_t* BMRESTRICT end = dest + dest_len; (void)end;
4190 
4191  unsigned bitval = (*block) & 1u;
4192  *pcurr++ = bm::gap_word_t(bitval);
4193  *pcurr = 0;
4194  unsigned bit_idx = 0;
4195 
4196  do
4197  {
4198  unsigned val = *block;
4199  while (!val || val == ~0u)
4200  {
4201  if (bitval != unsigned(bool(val)))
4202  {
4203  *pcurr++ = (gap_word_t)(bit_idx-1);
4204  bitval ^= 1u;
4205  BM_ASSERT((pcurr-1) == (dest+1) || *(pcurr-1) > *(pcurr-2));
4206  BM_ASSERT(pcurr != end);
4207  }
4208  bit_idx += unsigned(sizeof(*block) * 8);
4209  if (++block >= block_end)
4210  goto complete;
4211  val = *block;
4212  } // while
4213 
4214  // process "0100011" word
4215  //
4216  unsigned bits_consumed = 0;
4217  do
4218  {
4219  unsigned tz = 1u;
4220  if (bitval != (val & 1u))
4221  {
4222  *pcurr++ = (gap_word_t)(bit_idx-1);
4223  bitval ^= 1u;
4224  BM_ASSERT((pcurr-1) == (dest+1) || *(pcurr-1) > *(pcurr-2));
4225  BM_ASSERT(pcurr != end);
4226  }
4227  else // match, find the next idx
4228  {
4229  tz = bm::bit_scan_forward32(bitval ? ~val : val);
4230  // possible alternative:
4231  // tz = bm::count_trailing_zeros(bitval ? ~val : val);
4232  }
4233 
4234  bits_consumed += tz;
4235  bit_idx += tz;
4236  val >>= tz;
4237 
4238  if (!val)
4239  {
4240  if (bits_consumed < 32u)
4241  {
4242  *pcurr++ = (gap_word_t)(bit_idx-1);
4243  bitval ^= 1u;
4244  bit_idx += 32u - bits_consumed;
4245  BM_ASSERT((pcurr-1) == (dest+1) || *(pcurr-1) > *(pcurr-2));
4246  BM_ASSERT(pcurr != end);
4247  }
4248  break;
4249  }
4250  } while (1);
4251 
4252  } while(++block < block_end);
4253 
4254 complete:
4255  *pcurr = (gap_word_t)(bit_idx-1);
4256  unsigned len = (unsigned)(pcurr - dest);
4257  *dest = (gap_word_t)((*dest & 7) + (len << 3));
4258  return len;
4259 }
4260 #endif
4261 
4262 /**
4263  Convert bit block to GAP representation
4264  @internal
4265  @ingroup bitfunc
4266 */
4267 inline
4269  const unsigned* BMRESTRICT block,
4270  unsigned dest_len) BMNOEXCEPT
4271 {
4272 #if defined(VECT_BIT_TO_GAP)
4273  return VECT_BIT_TO_GAP(dest, block, dest_len);
4274 #else
4275  return bm::bit_block_to_gap(dest, block, dest_len);
4276 #endif
4277 }
4278 
4279 
4280 /*!
4281  \brief Iterate gap block as delta-bits with a functor
4282  @ingroup gapfunc
4283 */
4284 template<class T, class F>
4285 void for_each_gap_dbit(const T* buf, F& func)
4286 {
4287  const T* pcurr = buf;
4288  const T* pend = pcurr + (*pcurr >> 3);
4289 
4290  ++pcurr;
4291 
4292  unsigned prev = 0;
4293  unsigned first_inc;
4294 
4295  if (*buf & 1)
4296  {
4297  first_inc = 0;
4298  unsigned to = *pcurr;
4299  for (unsigned i = 0; i <= to; ++i)
4300  {
4301  func(1);
4302  }
4303  prev = to;
4304  ++pcurr;
4305  }
4306  else
4307  {
4308  first_inc = 1;
4309  }
4310  ++pcurr; // set GAP to 1
4311 
4312  while (pcurr <= pend)
4313  {
4314  unsigned from = *(pcurr-1)+1;
4315  unsigned to = *pcurr;
4316  if (first_inc)
4317  {
4318  func(from - prev + first_inc);
4319  first_inc = 0;
4320  }
4321  else
4322  {
4323  func(from - prev);
4324  }
4325 
4326  for (unsigned i = from+1; i <= to; ++i)
4327  {
4328  func(1);
4329  }
4330  prev = to;
4331  pcurr += 2; // jump to the next positive GAP
4332  }
4333 }
4334 
4335 /*!
4336  \brief Convert gap block into array of ints corresponding to 1 bits
4337  @ingroup gapfunc
4338 */
4339 template<typename D, typename T>
4341  const T* BMRESTRICT buf,
4342  unsigned dest_len,
4343  bool invert = false) BMNOEXCEPT
4344 {
4345  const T* BMRESTRICT pcurr = buf;
4346  const T* pend = pcurr + (*pcurr >> 3);
4347 
4348  D* BMRESTRICT dest_curr = dest;
4349  ++pcurr;
4350 
4351  int bitval = (*buf) & 1;
4352  if (invert)
4353  bitval = !bitval; // invert the GAP buffer
4354 
4355  if (bitval)
4356  {
4357  if (unsigned(*pcurr + 1) >= dest_len)
4358  return 0; // insufficient space
4359  dest_len -= *pcurr;
4360  T to = *pcurr;
4361  for (T i = 0; ;++i)
4362  {
4363  *dest_curr++ = i;
4364  if (i == to) break;
4365  }
4366  ++pcurr;
4367  }
4368  ++pcurr; // set GAP to 1
4369 
4370  while (pcurr <= pend)
4371  {
4372  unsigned pending = *pcurr - *(pcurr-1);
4373  if (pending >= dest_len)
4374  return 0;
4375  dest_len -= pending;
4376  T from = (T)(*(pcurr-1)+1);
4377  T to = *pcurr;
4378  for (T i = from; ;++i)
4379  {
4380  *dest_curr++ = i;
4381  if (i == to) break;
4382  }
4383  pcurr += 2; // jump to the next positive GAP
4384  }
4385  return (D) (dest_curr - dest);
4386 }
4387 
4388 
4389 
4390 /*!
4391  @brief Bitcount for bit block
4392 
4393  Function calculates number of 1 bits in the given array of words.
4394  Make sure the addresses are aligned.
4395 
4396  @ingroup bitfunc
4397 */
4398 inline
4400 {
4401  const bm::word_t* block_end = block + bm::set_block_size;
4402  bm::id_t count = 0;
4403 
4404 #ifdef BMVECTOPT
4405  count = VECT_BITCOUNT(block, block_end);
4406 #else
4407 #ifdef BM64OPT
4408  // 64-bit optimized algorithm. No sparse vect opt.
4409  // instead it uses 4-way parallel pipelined version
4410 
4411  const bm::id64_t* b1 = (bm::id64_t*) block;
4412  const bm::id64_t* b2 = (bm::id64_t*) block_end;
4413  do
4414  {
4415  bm::id64_t x = b1[0];
4416  bm::id64_t y = b1[1];
4417  bm::id64_t u = b1[2];
4418  bm::id64_t v = b1[3];
4419 
4420  if (x | y | u | v)
4421  {
4422  unsigned c = bitcount64_4way(x, y, u, v);
4423  BM_ASSERT(c);
4424  count += c;
4425  }
4426  b1 += 4;
4427  } while (b1 < b2);
4428 #else
4429  // For 32 bit code the fastest method is
4430  // to use bitcount table for each byte in the block.
4431  // As optimization for sparse bitsets used bits accumulator
4432  // to collect ON bits using bitwise OR.
4433  bm::word_t acc = *block++;
4434  do
4435  {
4436  bm::word_t in = *block++;
4437  bm::word_t acc_prev = acc;
4438  acc |= in;
4439  if (acc_prev &= in) // accumulator miss: counting bits
4440  {
4441  BM_INCWORD_BITCOUNT(count, acc);
4442  acc = acc_prev;
4443  }
4444  } while (block < block_end);
4445 
4446  BM_INCWORD_BITCOUNT(count, acc); // count-in remaining accumulator
4447 
4448 #endif
4449 #endif
4450  return count;
4451 }
4452 
4453 /*!
4454  @brief Bitcount for bit block
4455 
4456  Function calculates number of 1 bits in the given array of words.
4457  uses digest to understand zero areas
4458 
4459  @ingroup bitfunc
4460 */
4461 inline
4463  bm::id64_t digest) BMNOEXCEPT
4464 {
4465 #ifdef VECT_BIT_COUNT_DIGEST
4466  return VECT_BIT_COUNT_DIGEST(block, digest);
4467 #else
4468  bm::id_t count = 0;
4469  bm::id64_t d = digest;
4470  while (d)
4471  {
4472  bm::id64_t t = bm::bmi_blsi_u64(d); // d & -d;
4473 
4474  unsigned wave = bm::word_bitcount64(t - 1);
4475  unsigned off = wave * bm::set_block_digest_wave_size;
4476 
4477  const bm::bit_block_t::bunion_t* BMRESTRICT src_u =
4478  (const bm::bit_block_t::bunion_t*)(&block[off]);
4479  unsigned j = 0;
4480  do
4481  {
4482  count += bm::word_bitcount64(src_u->w64[j+0]) +
4483  bm::word_bitcount64(src_u->w64[j+1]) +
4484  bm::word_bitcount64(src_u->w64[j+2]) +
4485  bm::word_bitcount64(src_u->w64[j+3]);
4486  j += 4;
4487  } while (j < bm::set_block_digest_wave_size/2);
4488 
4489  d = bm::bmi_bslr_u64(d); // d &= d - 1;
4490  } // while
4491  return count;
4492 #endif
4493 }
4494 
4495 
4496 
4497 /*!
4498  @brief Bitcount for bit string
4499 
4500  Added for back-compatibility purposes, not block aligned,
4501  not SIMD accelerated
4502 
4503  @ingroup bitfunc
4504 */
4505 inline
4507  const bm::word_t* block_end) BMNOEXCEPT
4508 {
4509  bm::id_t count = 0;
4510  bm::word_t acc = *block++;
4511  do
4512  {
4513  bm::word_t in = *block++;
4514  bm::word_t acc_prev = acc;
4515  acc |= in;
4516  if (acc_prev &= in) // accumulator miss: counting bits
4517  {
4518  BM_INCWORD_BITCOUNT(count, acc);
4519  acc = acc_prev;
4520  }
4521  } while (block < block_end);
4522 
4523  BM_INCWORD_BITCOUNT(count, acc); // count-in remaining accumulator
4524  return count;
4525 }
4526 
4527 
4528 
4529 /*!
4530  Function calculates number of times when bit value changed
4531  (1-0 or 0-1).
4532 
4533  For 001 result is 2
4534  010 - 3
4535  011 - 2
4536  111 - 1
4537 
4538  @ingroup bitfunc
4539 */
4540 inline
4542 {
4543  unsigned count = 1;
4544  w ^= (w >> 1);
4545 
4546  count += bm::word_bitcount(w);
4547  count -= (w >> ((sizeof(w) * 8) - 1));
4548  return count;
4549 }
4550 
4551 
4552 /*!
4553  Function calculates number of times when bit value changed
4554  @internal
4555 */
4556 inline
4557 unsigned bit_block_change32(const bm::word_t* block, unsigned size) BMNOEXCEPT
4558 {
4559  unsigned gap_count = 1;
4560 
4561  bm::word_t w, w0, w_prev, w_l;
4562  w = w0 = *block;
4563 
4564  const int w_shift = int(sizeof(w) * 8 - 1);
4565  w ^= (w >> 1);
4566  BM_INCWORD_BITCOUNT(gap_count, w);
4567  gap_count -= (w_prev = (w0 >> w_shift)); // negative value correction
4568 
4569  const bm::word_t* block_end = block + size; // bm::set_block_size;
4570  for (++block; block < block_end; ++block)
4571  {
4572  w = w0 = *block;
4573  ++gap_count;
4574  if (!w)
4575  {
4576  gap_count -= !w_prev;
4577  w_prev = 0;
4578  }
4579  else
4580  {
4581  w ^= (w >> 1);
4582  BM_INCWORD_BITCOUNT(gap_count, w);
4583 
4584  w_l = w0 & 1;
4585  gap_count -= (w0 >> w_shift); // negative value correction
4586  gap_count -= !(w_prev ^ w_l); // word border correction
4587 
4588  w_prev = (w0 >> w_shift);
4589  }
4590  } // for
4591  return gap_count;
4592 }
4593 
4594 
4595 /*!
4596  Function calculates basic bit-block statistics
4597  number of times when bit value changed (GAPS)
4598  and population count
4599  @param block - bit-block pointer
4600  @param gc - [output] gap_count
4601  @param bc - [output] bit count
4602  @internal
4603 */
4604 inline
4606  unsigned* BMRESTRICT gc, unsigned* BMRESTRICT bc) BMNOEXCEPT
4607 {
4608  BM_ASSERT(gc);
4609  BM_ASSERT(bc);
4610 
4611  #ifdef VECT_BLOCK_CHANGE_BC
4612  VECT_BLOCK_CHANGE_BC(block, gc, bc);
4613  #else
4615  *bc = bm::bit_block_count(block);
4616  #endif
4617 }
4618 
4619 
4620 
4621 /*!
4622  Function calculates number of times when bit value changed
4623  (1-0 or 0-1) in the bit block.
4624 
4625  @param block - bit-block start pointer
4626  @return number of 1-0, 0-1 transitions
4627 
4628  @ingroup bitfunc
4629 */
4630 inline
4632 {
4633 #if defined(VECT_BLOCK_CHANGE)
4634  return VECT_BLOCK_CHANGE(block, bm::set_block_size);
4635 #else
4637 #endif
4638 }
4639 
4640 /*!
4641  Check if all bits are 1 in [left, right] range
4642  @ingroup bitfunc
4643 */
4644 inline
4646  bm::word_t left,
4647  bm::word_t right) BMNOEXCEPT
4648 {
4649  BM_ASSERT(left <= right);
4650  BM_ASSERT(right <= bm::gap_max_bits-1);
4651 
4652  unsigned nword, nbit, bitcount, temp;
4653  nbit = left & bm::set_word_mask;
4654  const bm::word_t* word =
4655  block + (nword = unsigned(left >> bm::set_word_shift));
4656  if (left == right) // special case (only 1 bit to check)
4657  return (*word >> nbit) & 1u;
4658 
4659  if (nbit) // starting position is not aligned
4660  {
4661  unsigned right_margin = nbit + right - left;
4662  if (right_margin < 32)
4663  {
4664  unsigned mask =
4666  block_set_table<true>::_left[right_margin];
4667  return mask == (*word & mask);
4668  }
4669  temp = *word & block_set_table<true>::_right[nbit];
4670  if (temp != block_set_table<true>::_right[nbit])
4671  return false;
4672  bitcount = (right - left + 1u) - (32 - nbit);
4673  ++word;
4674  }
4675  else
4676  {
4677  bitcount = right - left + 1u;
4678  }
4679 
4680  // now when we are word aligned, we can scan the bit-stream
4681  const bm::id64_t maskFF64 = ~0ull;
4682  const bm::word_t maskFF = ~0u;
4683  // loop unrolled to evaluate 4 words at a time
4684  // SIMD showed no advantage, unless evaluate sub-wave intervals
4685  //
4686  for ( ;bitcount >= 128; bitcount-=128, word+=4)
4687  {
4688  bm::id64_t w64_0 = bm::id64_t(word[0]) + (bm::id64_t(word[1]) << 32);
4689  bm::id64_t w64_1 = bm::id64_t(word[2]) + (bm::id64_t(word[3]) << 32);
4690  if ((w64_0 ^ maskFF64) | (w64_1 ^ maskFF64))
4691  return false;
4692  } // for
4693 
4694  for ( ;bitcount >= 32; bitcount-=32, ++word)
4695  {
4696  if (*word != maskFF)
4697  return false;
4698  } // for
4699  BM_ASSERT(bitcount < 32);
4700 
4701  if (bitcount) // we have a tail to count
4702  {
4703  temp = *word & block_set_table<true>::_left[bitcount-1];
4704  if (temp != block_set_table<true>::_left[bitcount-1])
4705  return false;
4706  }
4707 
4708  return true;
4709 }
4710 
4711 
4712 
4713 
4714 /*!
4715  Function calculates number of 1 bits in the given array of words in
4716  the range between left anf right bits (borders included)
4717  Make sure the addr is aligned.
4718 
4719  @ingroup bitfunc
4720 */
4721 inline
4723  bm::word_t left,
4724  bm::word_t right) BMNOEXCEPT
4725 {
4726  BM_ASSERT(left <= right);
4727  BM_ASSERT(right <= bm::gap_max_bits-1);
4728 
4729  unsigned nword, nbit, bitcount, count;
4730  nbit = left & bm::set_word_mask;
4731  const bm::word_t* word =
4732  block + (nword = unsigned(left >> bm::set_word_shift));
4733  if (left == right) // special case (only 1 bit to check)
4734  {
4735  return (*word >> nbit) & 1u;
4736  }
4737 
4738  count = 0;
4739  bitcount = right - left + 1u;
4740  if (nbit) // starting position is not aligned
4741  {
4742  unsigned right_margin = nbit + right - left;
4743  if (right_margin < 32)
4744  {
4745  unsigned mask =
4747  block_set_table<true>::_left[right_margin];
4748  return bm::word_bitcount(*word & mask);
4749  }
4750  count = bm::word_bitcount(*word & block_set_table<true>::_right[nbit]);
4751  bitcount -= 32 - nbit;
4752  ++word;
4753  }
4754 
4755  // now when we are word aligned, we can count bits the usual way
4756  //
4757  #if defined(BM64_SSE4) || defined(BM64_AVX2) || defined(BM64_AVX512)
4758  for ( ;bitcount >= 64; bitcount-=64)
4759  {
4760  bm::id64_t w64 = *((bm::id64_t*)word); // x86 unaligned(!) read
4761  count += unsigned(_mm_popcnt_u64(w64));
4762  word += 2;
4763  }
4764  if (bitcount >= 32)
4765  {
4766  count += bm::word_bitcount(*word++);
4767  bitcount-=32;
4768  }
4769  #else
4770  for ( ;bitcount >= 32; bitcount-=32, ++word)
4771  count += bm::word_bitcount(*word);
4772  #endif
4773  BM_ASSERT(bitcount < 32);
4774 
4775  if (bitcount) // we have a tail to count
4776  {
4777  count += bm::word_bitcount(
4778  *word & block_set_table<true>::_left[bitcount-1]);
4779  }
4780  return count;
4781 }
4782 
4783 /*!
4784  Function calculates number of 1 bits in the given array of words in
4785  the range between 0 anf right bits (borders included)
4786  Make sure the addr is aligned.
4787 
4788  @ingroup bitfunc
4789 */
4790 inline
4792  bm::word_t right) BMNOEXCEPT
4793 {
4794  BM_ASSERT(block);
4795  if (!right) // special case, first bit check
4796  return *block & 1u;
4797  bm::id_t count = 0;
4798 
4799  unsigned bitcount = right + 1;
4800 
4801  // AVX2 or 64-bit loop unroll
4802  #if defined(BMAVX2OPT) || defined(BMAVX512OPT)
4803  BM_AVX2_POPCNT_PROLOG
4804 
4805  __m256i cnt = _mm256_setzero_si256();
4806  bm::id64_t* cnt64;
4807 
4808  for ( ;bitcount >= 256; bitcount -= 256)
4809  {
4810  const __m256i* src = (__m256i*)block;
4811  __m256i xmm256 = _mm256_load_si256(src);
4812  BM_AVX2_BIT_COUNT(bc, xmm256);
4813  cnt = _mm256_add_epi64(cnt, bc);
4814 
4815  block += 8;
4816  }
4817  cnt64 = (bm::id64_t*)&cnt;
4818  count += (unsigned)(cnt64[0] + cnt64[1] + cnt64[2] + cnt64[3]);
4819  #endif
4820 
4821  for ( ;bitcount >= 64; bitcount -= 64)
4822  {
4823  bm::id64_t* p = (bm::id64_t*)block;
4824  count += bm::word_bitcount64(*p);
4825  block += 2;
4826  }
4827  if (bitcount >= 32)
4828  {
4829  count += bm::word_bitcount(*block++);
4830  bitcount-=32;
4831  }
4832 
4833  if (bitcount) // we have a tail to count
4834  {
4835  count +=
4836  bm::word_bitcount(*block & block_set_table<true>::_left[bitcount-1]);
4837  }
4838  return count;
4839 }
4840 
4841 
4842 
4843 /*!
4844  Cyclic rotation of bit-block left by 1 bit
4845  @ingroup bitfunc
4846 */
4847 inline
4849 {
4850  bm::word_t co_flag = (block[0] >> 31) & 1; // carry over bit
4851  for (unsigned i = 0; i < bm::set_block_size-1; ++i)
4852  {
4853  block[i] = (block[i] << 1) | (block[i + 1] >> 31);
4854  }
4855  block[set_block_size - 1] = (block[set_block_size - 1] << 1) | co_flag;
4856 }
4857 
4858 /*!
4859  @brief Unrolled cyclic rotation of bit-block left by 1 bit
4860  @param block - bit-block pointer
4861  @ingroup bitfunc
4862 */
4863 inline
4865 {
4866  bm::word_t co_flag = (block[0] >> 31) & 1; // carry over bit
4867  const unsigned unroll_factor = 4;
4868  bm::word_t w0, w1, w2, w3;
4869 
4870  unsigned i;
4871  for (i = 0; i < bm::set_block_size - unroll_factor; i += unroll_factor)
4872  {
4873  w0 = block[i + 1] >> 31;
4874  w1 = block[i + 2] >> 31;
4875  w2 = block[i + 3] >> 31;
4876  w3 = block[i + 4] >> 31;
4877 
4878  block[0 + i] = (block[0 + i] << 1) | w0;
4879  block[1 + i] = (block[1 + i] << 1) | w1;
4880  block[2 + i] = (block[2 + i] << 1) | w2;
4881  block[3 + i] = (block[3 + i] << 1) | w3;
4882  }
4883  block[i] = (block[i] << 1) | (block[i + 1] >> 31);
4884  block[i + 1] = (block[i + 1] << 1) | (block[i + 2] >> 31);
4885  block[i + 2] = (block[i + 2] << 1) | (block[i + 3] >> 31);
4886  block[set_block_size - 1] = (block[set_block_size - 1] << 1) | co_flag;
4887 }
4888 
4889 /*!
4890  @brief insert bit into position and shift the rest right with carryover
4891 
4892  @param block - bit-block pointer
4893  @param bitpos - bit position to insert
4894  @param value - bit value (0|1) to insert
4895 
4896  @return carry over value
4897  @ingroup bitfunc
4898 */
4899 inline
4901  unsigned bitpos, bool value) BMNOEXCEPT
4902 {
4903  BM_ASSERT(block);
4904  BM_ASSERT(bitpos < 65536);
4905 
4906  unsigned nbit = unsigned(bitpos & bm::set_block_mask);
4907  unsigned nword = unsigned(nbit >> bm::set_word_shift);
4908  nbit &= bm::set_word_mask;
4909 
4910  bm::word_t co_flag = 0;
4911  if (nbit)
4912  {
4913  unsigned r_mask = bm::block_set_table<true>::_right[nbit];
4914  bm::word_t w = block[nword] & r_mask;
4915  bm::word_t wl = block[nword] & ~r_mask;
4916  co_flag = w >> 31;
4917  w <<= 1u;
4918  block[nword] = w | (unsigned(value) << nbit) | wl;
4919  ++nword;
4920  }
4921  else
4922  {
4923  co_flag = value;
4924  }
4925 
4926  for (unsigned i = nword; i < bm::set_block_size; ++i)
4927  {
4928  bm::word_t w = block[i];
4929  bm::word_t co_flag1 = w >> 31;
4930  w = (w << 1u) | co_flag;
4931  block[i] = w;
4932  co_flag = co_flag1;
4933  } // for i
4934  return co_flag;
4935 }
4936 
4937 
4938 
4939 /*!
4940  @brief Right bit-shift bitblock by 1 bit (reference)
4941  @param block - bit-block pointer
4942  @param empty_acc - [out] contains 0 if block is empty
4943  @param co_flag - carry over from the previous block
4944 
4945  @return carry over bit (1 or 0)
4946  @ingroup bitfunc
4947 */
4948 inline
4950  bm::word_t* BMRESTRICT empty_acc,
4951  bm::word_t co_flag) BMNOEXCEPT
4952 {
4953  BM_ASSERT(block);
4954  BM_ASSERT(empty_acc);
4955 
4956  bm::word_t acc = 0;
4957  for (unsigned i = 0; i < bm::set_block_size; ++i)
4958  {
4959  bm::word_t w = block[i];
4960  bm::word_t co_flag1 = w >> 31;
4961  acc |= w = (w << 1u) | co_flag;
4962  block[i] = w;
4963  co_flag = co_flag1;
4964  }
4965  *empty_acc = acc;
4966  return co_flag;
4967 }
4968 
4969 /*!
4970  @brief Right bit-shift of bit-block by 1 bit (loop unrolled)
4971  @param block - bit-block pointer
4972  @param empty_acc - [out] contains 0 if block is empty
4973  @param co_flag - carry over from the previous block
4974 
4975  @return carry over bit (1 or 0)
4976  @ingroup bitfunc
4977 */
4978 inline
4980  bm::word_t* BMRESTRICT empty_acc,
4981  bm::word_t co_flag) BMNOEXCEPT
4982 {
4983  BM_ASSERT(block);
4984  BM_ASSERT(empty_acc);
4985  #if defined(VECT_SHIFT_R1)
4986  return VECT_SHIFT_R1(block, empty_acc, co_flag);
4987  #else
4988  return bm::bit_block_shift_r1(block, empty_acc, co_flag);
4989  #endif
4990 }
4991 
4992 
4993 /*!
4994  @brief Left bit-shift bitblock by 1 bit (reference)
4995  @param block - bit-block pointer
4996  @param empty_acc - [out] contains 0 if block is empty
4997  @param co_flag - carry over from the prev/next block
4998 
4999  @return carry over bit (1 or 0)
5000 
5001  @ingroup bitfunc
5002 */
5003 inline
5005  bm::word_t* empty_acc, bm::word_t co_flag) BMNOEXCEPT
5006 {
5007  BM_ASSERT(block);
5008  BM_ASSERT(empty_acc);
5009 
5010  bm::word_t acc = 0;
5011  for (int i = bm::set_block_size-1; i >= 0; --i)
5012  {
5013  bm::word_t w = block[i];
5014  bm::word_t co_flag1 = w & 1u;
5015  acc |= w = (w >> 1u) | (co_flag << 31u);
5016  block[i] = w;
5017  co_flag = co_flag1;
5018  }
5019 
5020  *empty_acc = acc;
5021  return co_flag;
5022 }
5023 
5024 /*!
5025  @brief Left bit-shift of bit-block by 1 bit (loop unrolled)
5026  @param block - bit-block pointer
5027  @param empty_acc - [out] contains 0 if block is empty
5028  @param co_flag - carry over from the prev/next block
5029 
5030  @return carry over bit (1 or 0)
5031  @ingroup bitfunc
5032 */
5033 inline
5035  bm::word_t* empty_acc,
5036  bm::word_t co_flag) BMNOEXCEPT
5037 {
5038  BM_ASSERT(block);
5039  BM_ASSERT(empty_acc);
5040  #if defined(VECT_SHIFT_L1)
5041  return VECT_SHIFT_L1(block, empty_acc, co_flag);
5042  #else
5043  return bm::bit_block_shift_l1(block, empty_acc, co_flag);
5044  #endif
5045 }
5046 
5047 /*!
5048  @brief erase bit from position and shift the rest right with carryover
5049 
5050  @param block - bit-block pointer
5051  @param bitpos - bit position to insert
5052  @param carry_over - bit value to add to the end (0|1)
5053 
5054  @ingroup bitfunc
5055 */
5056 inline
5058  unsigned bitpos,
5059  bool carry_over) BMNOEXCEPT
5060 {
5061  BM_ASSERT(block);
5062  BM_ASSERT(bitpos < 65536);
5063 
5064  if (!bitpos)
5065  {
5066  bm::word_t acc;
5067  bm::bit_block_shift_l1_unr(block, &acc, carry_over);
5068  return;
5069  }
5070 
5071  unsigned nbit = unsigned(bitpos & bm::set_block_mask);
5072  unsigned nword = unsigned(nbit >> bm::set_word_shift);
5073  nbit &= bm::set_word_mask;
5074 
5075  bm::word_t co_flag = carry_over;
5076  for (unsigned i = bm::set_block_size-1; i > nword; --i)
5077  {
5078  bm::word_t w = block[i];
5079  bm::word_t co_flag1 = w & 1u;
5080  w = (w >> 1u) | (co_flag << 31u);
5081  block[i] = w;
5082  co_flag = co_flag1;
5083  }
5084 
5085  if (nbit)
5086  {
5087  unsigned r_mask = bm::block_set_table<true>::_right[nbit];
5088  bm::word_t w = block[nword] & r_mask;
5089  bm::word_t wl = block[nword] & ~r_mask;
5090  w &= ~(1 << nbit); // clear the removed bit
5091  w >>= 1u;
5092  w |= wl | (co_flag << 31u);
5093  block[nword] = w;
5094  }
5095  else
5096  {
5097  block[nword] = (block[nword] >> 1u) | (co_flag << 31u);
5098  }
5099 }
5100 
5101 
5102 /*!
5103  @brief Right bit-shift of bit-block by 1 bit (reference) + AND
5104  @param block - bit-block pointer
5105  @param co_flag - carry over from the previous block
5106  @param mask_block - mask bit-block pointer
5107  @param digest - block digest
5108 
5109  @return carry over bit (1 or 0)
5110  @ingroup bitfunc
5111 */
5112 inline
5114  bm::word_t co_flag,
5115  const bm::word_t* BMRESTRICT mask_block,
5117 {
5118  BM_ASSERT(block);
5119  BM_ASSERT(mask_block);
5120  BM_ASSERT(digest && *digest);
5121 
5122 
5123  bm::id64_t d = *digest;
5124 
5125  unsigned di = 0;
5126  if (!co_flag)
5127  {
5128  bm::id64_t t = d & -d;
5129  di = bm::word_bitcount64(t - 1); // find start bit-index
5130  }
5131 
5132  for (; di < 64; ++di)
5133  {
5134  const unsigned d_base = di * bm::set_block_digest_wave_size;
5135  bm::id64_t dmask = (1ull << di);
5136  if (d & dmask) // digest stride not empty
5137  {
5138  bm::word_t acc = 0;
5139  for (unsigned i = d_base; i < d_base + bm::set_block_digest_wave_size; ++i)
5140  {
5142 
5143  bm::word_t w = block[i];
5144  bm::word_t co_flag1 = w >> 31;
5145  w = (w << 1u) | co_flag;
5146  acc |= block[i] = w & mask_block[i];
5147  co_flag = co_flag1;
5148  }
5149  if (!acc)
5150  d &= ~dmask; // update digest: clear stride bit
5151  }
5152  else // stride is empty
5153  {
5154  BM_ASSERT(block[d_base + bm::set_block_digest_wave_size -1]==0);
5155  BM_ASSERT(block[d_base]==0);
5156 
5157  if (co_flag) // there is carry-over
5158  {
5159  BM_ASSERT(co_flag == 1);
5160  BM_ASSERT(block[d_base] == 0);
5161 
5162  block[d_base] = co_flag & mask_block[d_base];
5163  if (block[d_base])
5164  d |= dmask; // update digest
5165  co_flag = 0;
5166  }
5167  }
5168  } // for di
5169 
5170  *digest = d;
5171  return co_flag;
5172 }
5173 
5174 /*!
5175  @brief Right bit-shift bitblock by 1 bit (reference) + AND
5176  @param block - bit-block pointer
5177  @param co_flag - carry over from the previous block
5178  @param mask_block - mask bit-block pointer
5179  @param digest - block digest
5180 
5181  @return carry over bit (1 or 0)
5182  @ingroup bitfunc
5183 */
5184 inline
5186  bm::word_t co_flag,
5187  const bm::word_t* BMRESTRICT mask_block,
5189 {
5190  BM_ASSERT(block);
5191  BM_ASSERT(mask_block);
5192  BM_ASSERT(digest);
5193 
5194  #if defined(VECT_SHIFT_R1_AND)
5195  return VECT_SHIFT_R1_AND(block, co_flag, mask_block, digest);
5196  #else
5197  return bm::bit_block_shift_r1_and(block, co_flag, mask_block, digest);
5198  #endif
5199 }
5200 
5201 
5202 /*!
5203  Function calculates if there is any number of 1 bits
5204  in the given array of words in the range between left anf right bits
5205  (borders included). Make sure the addresses are aligned.
5206 
5207  @ingroup bitfunc
5208 */
5209 inline
5211  bm::word_t left,
5212  bm::word_t right) BMNOEXCEPT
5213 {
5214  BM_ASSERT(left <= right);
5215 
5216  unsigned nbit = left; // unsigned(left & bm::set_block_mask);
5217  unsigned nword = unsigned(nbit >> bm::set_word_shift);
5218  nbit &= bm::set_word_mask;
5219 
5220  const bm::word_t* word = block + nword;
5221 
5222  if (left == right) // special case (only 1 bit to check)
5223  {
5224  return (*word >> nbit) & 1;
5225  }
5226  unsigned acc;
5227  unsigned bitcount = right - left + 1;
5228 
5229  if (nbit) // starting position is not aligned
5230  {
5231  unsigned right_margin = nbit + (right - left);
5232  if (right_margin < 32)
5233  {
5234  unsigned mask =
5236  block_set_table<true>::_left[right_margin];
5237  return *word & mask;
5238  }
5239  else
5240  {
5241  acc = *word & block_set_table<true>::_right[nbit];
5242  if (acc)
5243  return acc;
5244  bitcount -= 32 - nbit;
5245  }
5246  ++word;
5247  }
5248 
5249  // loop unrolled to evaluate 4 words at a time
5250  // SIMD showed no advantage, unless evaluate sub-wave intervals
5251  //
5252  for ( ;bitcount >= 128; bitcount-=128, word+=4)
5253  {
5254  acc = word[0] | word[1] | word[2] | word[3];
5255  if (acc)
5256  return acc;
5257  } // for
5258 
5259  acc = 0;
5260  for ( ;bitcount >= 32; bitcount -= 32)
5261  {
5262  acc |= *word++;
5263  } // for
5264 
5265  if (bitcount) // we have a tail to count
5266  acc |= (*word) & block_set_table<true>::_left[bitcount-1];
5267 
5268  return acc;
5269 }
5270 
5271 // ----------------------------------------------------------------------
5272 
5273 /*! Function inverts block of bits
5274  @ingroup bitfunc
5275 */
5276 template<typename T>
5277 void bit_invert(T* start) BMNOEXCEPT
5278 {
5280 #ifdef BMVECTOPT
5281  VECT_INVERT_BLOCK(start);
5282 #else
5283  T* end = (T*)((unsigned*)(start) + bm::set_block_size);
5284  do
5285  {
5286  start[0] = ~start[0];
5287  start[1] = ~start[1];
5288  start[2] = ~start[2];
5289  start[3] = ~start[3];
5290  start+=4;
5291  } while (start < end);
5292 #endif
5293 }
5294 
5295 // ----------------------------------------------------------------------
5296 
5297 /*! @brief Returns "true" if all bits in the block are 1
5298  @ingroup bitfunc
5299 */
5300 inline
5302 {
5303 #if defined(BMSSE42OPT) || defined(BMAVX2OPT)
5304  return VECT_IS_ONE_BLOCK(start);
5305 #else
5306  const bm::word_t* BMRESTRICT src_end = (bm::word_t*)start + bm::set_block_size;
5307  const bm::wordop_t* end = (const bm::wordop_t*)src_end;
5308  do
5309  {
5310  bm::wordop_t tmp =
5311  start[0] & start[1] & start[2] & start[3];
5312  if (tmp != bm::all_bits_mask)
5313  return false;
5314  start += 4;
5315  } while (start < end);
5316  return true;
5317 #endif
5318 }
5319 
5320 // ----------------------------------------------------------------------
5321 
5322 /*! @brief Returns "true" if all bits are 1 in the block [left, right]
5323  Function check for block varieties
5324  @internal
5325 */
5326 inline
5328  unsigned left, unsigned right) BMNOEXCEPT
5329 {
5330  BM_ASSERT(left <= right);
5331  BM_ASSERT(right < bm::gap_max_bits);
5332  if (block)
5333  {
5334  if (BM_IS_GAP(block))
5335  return bm::gap_is_all_one_range(BMGAP_PTR(block), left, right);
5336  if (block == FULL_BLOCK_FAKE_ADDR)
5337  return true;
5338  return bm::bit_block_is_all_one_range(block, left, right);
5339  }
5340  return false;
5341 }
5342 
5343 /*! @brief Returns "true" if all bits are 1 in the block [left, right]
5344  and border bits are 0
5345  @internal
5346 */
5347 inline
5348 bool block_is_interval(const bm::word_t* const BMRESTRICT block,
5349  unsigned left, unsigned right) BMNOEXCEPT
5350 {
5351  BM_ASSERT(left <= right);
5352  BM_ASSERT(right < bm::gap_max_bits-1);
5353 
5354  if (block)
5355  {
5356  bool is_left, is_right, all_one;
5357  if (BM_IS_GAP(block))
5358  {
5359  const bm::gap_word_t* gap = BMGAP_PTR(block);
5360  all_one = bm::gap_is_interval(gap, left, right);
5361  return all_one;
5362  }
5363  else // bit-block
5364  {
5365  if (block == FULL_BLOCK_FAKE_ADDR)
5366  return false;
5367  unsigned nword = ((left-1) >> bm::set_word_shift);
5368  is_left = block[nword] & (1u << ((left-1) & bm::set_word_mask));
5369  if (is_left == false)
5370  {
5371  nword = ((right + 1) >> bm::set_word_shift);
5372  is_right = block[nword] & (1u << ((right + 1) & bm::set_word_mask));
5373  if (is_right == false)
5374  {
5375  all_one = bm::bit_block_is_all_one_range(block, left, right);
5376  return all_one;
5377  }
5378  }
5379  }
5380  }
5381 
5382  return false;
5383 }
5384 
5385 // ----------------------------------------------------------------------
5386 
5387 /**
5388  \brief Searches for the last 1 bit in the 111 interval of a BIT block
5389  \param block - BIT buffer
5390  \param nbit - bit index to start checking from
5391  \param pos - [out] found value
5392 
5393  \return false if not found
5394  @ingroup bitfunc
5395 */
5396 inline
5398  unsigned nbit, unsigned* BMRESTRICT pos) BMNOEXCEPT
5399 {
5400  BM_ASSERT(block);
5401  BM_ASSERT(pos);
5402 
5403  unsigned nword = unsigned(nbit >> bm::set_word_shift);
5404  unsigned bit_pos = (nbit & bm::set_word_mask);
5405  bm::word_t w = block[nword];
5406  w &= (1u << bit_pos);
5407  if (!w)
5408  return false;
5409 
5410  if (nbit == bm::gap_max_bits-1)
5411  {
5412  *pos = bm::gap_max_bits-1;
5413  return true;
5414  }
5415  *pos = nbit;
5416 
5417  ++nbit;
5418  nword = unsigned(nbit >> bm::set_word_shift);
5419  bit_pos = (nbit & bm::set_word_mask);
5420 
5421  w = (~block[nword]) >> bit_pos;
5422  w <<= bit_pos; // clear the trailing bits
5423  if (w)
5424  {
5425  bit_pos = bm::bit_scan_forward32(w); // trailing zeros
5426  *pos = unsigned(bit_pos + (nword * 8u * unsigned(sizeof(bm::word_t)))-1);
5427  return true;
5428  }
5429 
5430  for (++nword; nword < bm::set_block_size; ++nword)
5431  {
5432  w = ~block[nword];
5433  if (w)
5434  {
5435  bit_pos = bm::bit_scan_forward32(w); // trailing zeros
5436  *pos = unsigned(bit_pos + (nword * 8u * unsigned(sizeof(bm::word_t)))-1);
5437  return true;
5438  }
5439  } // for nword
5440 
5441  // 0 not found, all block is 1s...
5442  *pos = bm::gap_max_bits-1;
5443  return true;
5444 }
5445 
5446 
5447 /*! @brief Find end of the current 111 interval
5448  @return search result code 0 - not found, 1 found, 2 - found at the end
5449  @internal
5450 */
5451 inline
5453  unsigned nbit_from,
5454  unsigned* BMRESTRICT found_nbit) BMNOEXCEPT
5455 {
5456  BM_ASSERT(block && found_nbit);
5457  BM_ASSERT(nbit_from < bm::gap_max_bits);
5458 
5459  bool b;
5460  if (BM_IS_GAP(block))
5461  {
5462  const bm::gap_word_t* gap = BMGAP_PTR(block);
5463  b = bm::gap_find_interval_end(gap, nbit_from, found_nbit);
5464  if (b && *found_nbit == bm::gap_max_bits-1)
5465  return 2; // end of block, keep searching
5466  }
5467  else // bit-block
5468  {
5469  if (IS_FULL_BLOCK(block))
5470  {
5471  *found_nbit = bm::gap_max_bits-1;
5472  return 2;
5473  }
5474  b = bm::bit_block_find_interval_end(block, nbit_from, found_nbit);
5475  if (b && *found_nbit == bm::gap_max_bits-1)
5476  return 2; // end of block, keep searching
5477  }
5478  return b;
5479 }
5480 
5481 // ----------------------------------------------------------------------
5482 
5483 /**
5484  \brief Searches for the first 1 bit in the 111 interval of a BIT block
5485  \param block - BIT buffer
5486  \param nbit - bit index to start checking from
5487  \param pos - [out] found value
5488 
5489  \return false if not found
5490  @ingroup bitfunc
5491 */
5492 inline
5494  unsigned nbit, unsigned* BMRESTRICT pos) BMNOEXCEPT
5495 {
5496  BM_ASSERT(block);
5497  BM_ASSERT(pos);
5498 
5499  unsigned nword = unsigned(nbit >> bm::set_word_shift);
5500  unsigned bit_pos = (nbit & bm::set_word_mask);
5501  bm::word_t w = block[nword];
5502  w &= (1u << bit_pos);
5503  if (!w)
5504  return false;
5505 
5506  if (nbit == 0)
5507  {
5508  *pos = 0;
5509  return true;
5510  }
5511  *pos = nbit;
5512 
5513  --nbit;
5514  nword = unsigned(nbit >> bm::set_word_shift);
5515  bit_pos = (nbit & bm::set_word_mask);
5516 
5517  w = (~block[nword]) & block_set_table<true>::_left[bit_pos];
5518  if (w)
5519  {
5520  bit_pos = bm::bit_scan_reverse32(w);
5521  *pos = unsigned(bit_pos + (nword * 8u * unsigned(sizeof(bm::word_t)))+1);
5522  return true;
5523  }
5524 
5525  if (nword)
5526  {
5527  for (--nword; true; --nword)
5528  {
5529  w = ~block[nword];
5530  if (w)
5531  {
5532  bit_pos = bm::bit_scan_reverse32(w); // trailing zeros
5533  *pos = unsigned(bit_pos + (nword * 8u * unsigned(sizeof(bm::word_t)))+1);
5534  return true;
5535  }
5536  if (!nword)
5537  break;
5538  } // for nword
5539  }
5540 
5541  // 0 not found, all block is 1s...
5542  *pos = 0;
5543  return true;
5544 }
5545 
5546 /**
5547  \brief Reverse search for the previous 1 bit
5548  \param block - BIT buffer
5549  \param nbit - bit index to start checking from
5550  \param pos - [out] found value
5551 
5552  \return false if not found
5553  @ingroup bitfunc
5554 */
5555 inline
5557  unsigned nbit, unsigned* BMRESTRICT pos) BMNOEXCEPT
5558 {
5559  BM_ASSERT(block);
5560  BM_ASSERT(pos);
5561 
5562  unsigned nword = unsigned(nbit >> bm::set_word_shift);
5563  unsigned bit_pos = (nbit & bm::set_word_mask);
5564  bm::word_t w = block[nword];
5565  w &= (1u << bit_pos);
5566  if (w)
5567  {
5568  *pos = unsigned(bit_pos + (nword * 8u * unsigned(sizeof(bm::word_t))));
5569  return true;
5570  }
5571 
5572  if (!nbit)
5573  return false;
5574 
5575  --nbit;
5576  nword = unsigned(nbit >> bm::set_word_shift);
5577  bit_pos = (nbit & bm::set_word_mask);
5578 
5579  w = block[nword] & block_set_table<true>::_left[bit_pos];
5580  if (w)
5581  {
5582  bit_pos = bm::bit_scan_reverse32(w);
5583  *pos = unsigned(bit_pos + (nword * 8u * unsigned(sizeof(bm::word_t))));
5584  return true;
5585  }
5586 
5587  if (nword)
5588  {
5589  for (--nword; true; --nword)
5590  {
5591  w = block[nword];
5592  if (w)
5593  {
5594  bit_pos = bm::bit_scan_reverse32(w); // trailing zeros
5595  *pos = unsigned(bit_pos + (nword * 8u * unsigned(sizeof(bm::word_t))));
5596  return true;
5597  }
5598  if (!nword)
5599  break;
5600  } // for nword
5601  }
5602  return false;
5603 }
5604 
5605 
5606 // ----------------------------------------------------------------------
5607 
5608 /*! @brief Find start of the current 111 interval
5609  @return search result code 0 - not found, 1 found, 2 - found at the start
5610  @internal
5611 */
5612 inline
5614  unsigned nbit_from,
5615  unsigned* BMRESTRICT found_nbit) BMNOEXCEPT
5616 {
5617  BM_ASSERT(block && found_nbit);
5618  BM_ASSERT(nbit_from < bm::gap_max_bits);
5619  bool b;
5620  if (BM_IS_GAP(block))
5621  {
5622  const bm::gap_word_t* gap = BMGAP_PTR(block);
5623  b = bm::gap_find_interval_start(gap, nbit_from, found_nbit);
5624  if (b && *found_nbit == 0)
5625  return 2; // start of block, keep searching
5626  }
5627  else // bit-block
5628  {
5629  if (IS_FULL_BLOCK(block))
5630  {
5631  *found_nbit = 0;
5632  return 2;
5633  }
5634  b = bm::bit_block_find_interval_start(block, nbit_from, found_nbit);
5635  if (b && *found_nbit == 0)
5636  return 2; // start of block, keep searching
5637  }
5638  return b;
5639 }
5640 
5641 // ----------------------------------------------------------------------
5642 
5643 /*! @brief Reverse find 1
5644  @return search result code 0 - not found, 1 found
5645  @internal
5646 */
5647 inline
5649  unsigned nbit_from,
5650  unsigned* BMRESTRICT found_nbit) BMNOEXCEPT
5651 {
5652  BM_ASSERT(block && found_nbit);
5653  BM_ASSERT(nbit_from < bm::gap_max_bits);
5654  bool b;
5655  if (BM_IS_GAP(block))
5656  {
5657  b = bm::gap_find_prev(BMGAP_PTR(block), nbit_from, found_nbit);
5658  }
5659  else // bit-block
5660  {
5661  if (IS_FULL_BLOCK(block))
5662  {
5663  *found_nbit = nbit_from;
5664  return true;
5665  }
5666  b = bm::bit_block_find_prev(block, nbit_from, found_nbit);
5667  }
5668  return b;
5669 }
5670 
5671 // ----------------------------------------------------------------------
5672 
5673 /*! @brief Returns "true" if one bit is set in the block [left, right]
5674  Function check for block varieties
5675  @internal
5676 */
5677 inline
5678 bool block_any_range(const bm::word_t* const BMRESTRICT block,
5679  unsigned left, unsigned right) BMNOEXCEPT
5680 {
5681  BM_ASSERT(left <= right);
5682  BM_ASSERT(right < bm::gap_max_bits);
5683  if (!block)
5684  return false;
5685  if (BM_IS_GAP(block))
5686  return bm::gap_any_range(BMGAP_PTR(block), left, right);
5687  if (IS_FULL_BLOCK(block))
5688  return true;
5689  return bm::bit_block_any_range(block, left, right);
5690 }
5691 
5692 // ----------------------------------------------------------------------
5693 
5694 /*! @brief Returns "true" if one bit is set in the block
5695  Function check for block varieties
5696  @internal
5697 */
5698 inline
5699 bool block_any(const bm::word_t* const BMRESTRICT block) BMNOEXCEPT
5700 {
5701  if (!block)
5702  return false;
5703  if (IS_FULL_BLOCK(block))
5704  return true;
5705  bool all_zero = (BM_IS_GAP(block)) ?
5707  : bm::bit_is_all_zero(block);
5708  return !all_zero;
5709 }
5710 
5711 
5712 
5713 // ----------------------------------------------------------------------
5714 
5715 // GAP blocks manipulation functions:
5716 
5717 
5718 /*!
5719  \brief GAP AND operation.
5720 
5721  Function performs AND logical operation on gap vectors.
5722  If possible function put the result into vect1 and returns this
5723  pointer. Otherwise result is put into tmp_buf, which should be
5724  twice of the vector size.
5725 
5726  \param vect1 - operand 1
5727  \param vect2 - operand 2
5728  \param tmp_buf - pointer on temporary buffer
5729  \param dsize - out size of the destination
5730  \return Result pointer (tmp_buf OR vect1)
5731 
5732  @ingroup gapfunc
5733 */
5734 inline
5736  const gap_word_t* BMRESTRICT vect2,
5737  gap_word_t* BMRESTRICT tmp_buf,
5738  unsigned& dsize) BMNOEXCEPT
5739 {
5740  bm::gap_buff_op<bm::gap_word_t, bm::and_func>(
5741  tmp_buf, vect1, 0, vect2, 0, dsize);
5742  return tmp_buf;
5743 }
5744 
5745 /*!
5746  \brief GAP AND operation test.
5747 
5748  Function performs AND logical operation on gap vectors.
5749  If possible function put the result into vect1 and returns this
5750  pointer. Otherwise result is put into tmp_buf, which should be
5751  twice of the vector size.
5752 
5753  \param vect1 - operand 1
5754  \param vect2 - operand 2
5755  \return non zero value if operation returns any 1 bit
5756 
5757  @ingroup gapfunc
5758 */
5759 inline
5761  const gap_word_t* BMRESTRICT vect2) BMNOEXCEPT
5762 {
5763  return gap_buff_any_op<bm::gap_word_t, bm::and_func>(vect1, 0, vect2, 0);
5764 }
5765 
5766 
5767 /*!
5768  \brief GAP bitcount AND operation test.
5769 
5770  \param vect1 - operand 1
5771  \param vect2 - operand 2
5772  \return bitcount of vect1 AND vect2
5773 
5774  @ingroup gapfunc
5775 */
5776 inline
5777 unsigned gap_count_and(const gap_word_t* BMRESTRICT vect1,
5778  const gap_word_t* BMRESTRICT vect2) BMNOEXCEPT
5779 {
5780  return bm::gap_buff_count_op<bm::gap_word_t, bm::and_func>(vect1, vect2);
5781 }
5782 
5783 
5784 
5785 /*!
5786  \brief GAP XOR operation.
5787 
5788  Function performs XOR logical operation on gap vectors.
5789  If possible function put the result into vect1 and returns this
5790  pointer. Otherwise result is put into tmp_buf, which should be
5791  twice of the vector size.
5792 
5793  \param vect1 - operand 1
5794  \param vect2 - operand 2
5795  \param tmp_buf - pointer on temporary buffer
5796  \param dsize - out destination size
5797  \return Result pointer (tmp_buf)
5798 
5799  @ingroup gapfunc
5800 */
5801 inline
5803  const gap_word_t* BMRESTRICT vect2,
5804  gap_word_t* BMRESTRICT tmp_buf,
5805  unsigned& dsize) BMNOEXCEPT
5806 {
5807  bm::gap_buff_op<bm::gap_word_t, bm::xor_func>(
5808  tmp_buf, vect1, 0, vect2, 0, dsize);
5809  return tmp_buf;
5810 }
5811 
5812 /*! Light weight gap_operation_xor for len prediction
5813  @ingroup gapfunc
5814 */
5815 inline
5817  const gap_word_t* BMRESTRICT vect2,
5818  unsigned& dsize,
5819  unsigned limit) BMNOEXCEPT
5820 {
5821  return
5822  bm::gap_buff_dry_op<bm::gap_word_t, bm::xor_func>(vect1, vect2, dsize, limit);
5823 }
5824 
5825 
5826 /*!
5827  \brief GAP XOR operation test.
5828 
5829  Function performs AND logical operation on gap vectors.
5830  If possible function put the result into vect1 and returns this
5831  pointer. Otherwise result is put into tmp_buf, which should be
5832  twice of the vector size.
5833 
5834  \param vect1 - operand 1
5835  \param vect2 - operand 2
5836  \return non zero value if operation returns any 1 bit
5837 
5838  @ingroup gapfunc
5839 */
5842  const gap_word_t* BMRESTRICT vect2) BMNOEXCEPT
5843 {
5844  return gap_buff_any_op<bm::gap_word_t, bm::xor_func>(vect1, 0, vect2, 0);
5845 }
5846 
5847 /*!
5848  \brief GAP bitcount XOR operation test.
5849 
5850  \param vect1 - operand 1
5851  \param vect2 - operand 2
5852  \return bitcount of vect1 XOR vect2
5853 
5854  @ingroup gapfunc
5855 */
5857 unsigned gap_count_xor(const gap_word_t* BMRESTRICT vect1,
5858  const gap_word_t* BMRESTRICT vect2) BMNOEXCEPT
5859 {
5860  return bm::gap_buff_count_op<bm::gap_word_t, bm::xor_func>(vect1, vect2);
5861 }
5862 
5863 
5864 /*!
5865  \brief GAP OR operation.
5866 
5867  Function performs OR logical oparation on gap vectors.
5868  If possible function put the result into vect1 and returns this
5869  pointer. Otherwise result is put into tmp_buf, which should be
5870  twice of the vector size.
5871 
5872  \param vect1 - operand 1
5873  \param vect2 - operand 2
5874  \param tmp_buf - pointer on temporary buffer
5875  \param dsize - out destination size
5876 
5877  \return Result pointer (tmp_buf)
5878 
5879  @ingroup gapfunc
5880 */
5881 inline
5883  const gap_word_t* BMRESTRICT vect2,
5884  gap_word_t* BMRESTRICT tmp_buf,
5885  unsigned& dsize) BMNOEXCEPT
5886 {
5887  bm::gap_buff_op<bm::gap_word_t, bm::and_func>(tmp_buf, vect1, 1, vect2, 1, dsize);
5888  bm::gap_invert(tmp_buf);
5889  return tmp_buf;
5890 }
5891 
5892 /*!
5893  \brief GAP bitcount OR operation test.
5894 
5895  \param vect1 - operand 1
5896  \param vect2 - operand 2
5897  \return bitcount of vect1 OR vect2
5898 
5899  @ingroup gapfunc
5900 */
5902 unsigned gap_count_or(const gap_word_t* BMRESTRICT vect1,
5903  const gap_word_t* BMRESTRICT vect2) BMNOEXCEPT
5904 {
5905  return gap_buff_count_op<bm::gap_word_t, bm::or_func>(vect1, vect2);
5906 }
5907 
5908 
5909 
5910 /*!
5911  \brief GAP SUB (AND NOT) operation.
5912 
5913  Function performs SUB logical operation on gap vectors.
5914  If possible function put the result into vect1 and returns this
5915  pointer. Otherwise result is put into tmp_buf, which should be
5916  twice of the vector size.
5917 
5918  \param vect1 - operand 1
5919  \param vect2 - operand 2
5920  \param tmp_buf - pointer on temporary buffer
5921  \param dsize - out destination size
5922 
5923  \return Result pointer (tmp_buf)
5924 
5925  @ingroup gapfunc
5926 */
5927 inline
5929  const gap_word_t* BMRESTRICT vect2,
5930  gap_word_t* BMRESTRICT tmp_buf,
5931  unsigned& dsize) BMNOEXCEPT
5932 {
5933  bm::gap_buff_op<bm::gap_word_t, bm::and_func>( // no bug here
5934  tmp_buf, vect1, 0, vect2, 1, dsize);
5935  return tmp_buf;
5936 }
5937 
5938 
5939 /*!
5940  \brief GAP SUB operation test.
5941 
5942  Function performs AND logical operation on gap vectors.
5943  If possible function put the result into vect1 and returns this
5944  pointer. Otherwise result is put into tmp_buf, which should be
5945  twice of the vector size.
5946 
5947  \param vect1 - operand 1
5948  \param vect2 - operand 2
5949  \return non zero value if operation returns any 1 bit
5950 
5951  @ingroup gapfunc
5952 */
5953 inline
5955  const gap_word_t* BMRESTRICT vect2) BMNOEXCEPT
5956 {
5957  return
5958  bm::gap_buff_any_op<bm::gap_word_t, bm::and_func>( // no bug here
5959  vect1, 0, vect2, 1);
5960 }
5961 
5962 
5963 /*!
5964 \brief GAP bitcount SUB (AND NOT) operation test.
5965 
5966 \param vect1 - operand 1
5967 \param vect2 - operand 2
5968 \return bitcount of vect1 SUB (AND NOT) vect2
5969 
5970 @ingroup gapfunc
5971 */
5973 unsigned gap_count_sub(const gap_word_t* BMRESTRICT vect1,
5974  const gap_word_t* BMRESTRICT vect2) BMNOEXCEPT
5975 {
5976  return bm::gap_buff_count_op<bm::gap_word_t, bm::sub_func>(vect1, vect2);
5977 }
5978 
5979 
5980 // ----------------------------------------------------------------------
5981 
5982 // BIT blocks manipulation functions:
5983 
5984 
5985 /*!
5986  \brief Bitblock copy operation.
5987 
5988  \param dst - destination block.
5989  \param src - source block.
5990 
5991  @ingroup bitfunc
5992 */
5993 inline
5995  const bm::word_t* BMRESTRICT src) BMNOEXCEPT
5996 {
5997 #ifdef BMVECTOPT
5998  VECT_COPY_BLOCK(dst, src);
5999 #else
6000  ::memcpy(dst, src, bm::set_block_size * sizeof(bm::word_t));
6001 #endif
6002 }
6003 
6004 /*!
6005  \brief Bitblock copy/stream operation.
6006 
6007  \param dst - destination block.
6008  \param src - source block.
6009 
6010  @ingroup bitfunc
6011 */
6012 inline
6014  const bm::word_t* BMRESTRICT src) BMNOEXCEPT
6015 {
6016 #ifdef VECT_STREAM_BLOCK
6017  VECT_STREAM_BLOCK(dst, src);
6018 #else
6019  ::memcpy(dst, src, bm::set_block_size * sizeof(bm::word_t));
6020 #endif
6021 }
6022 
6023 
6024 /*!
6025  \brief Plain bitblock AND operation.
6026  Function does not analyse availability of source and destination blocks.
6027 
6028  \param dst - destination block.
6029  \param src - source block.
6030 
6031  \return 0 if AND operation did not produce anything (no 1s in the output)
6032 
6033  @ingroup bitfunc
6034 */
6035 inline
6037  const bm::word_t* BMRESTRICT src) BMNOEXCEPT
6038 {
6039  BM_ASSERT(dst);
6040  BM_ASSERT(src);
6041  BM_ASSERT(dst != src);
6042 
6043 #ifdef BMVECTOPT
6044  bm::id64_t acc = VECT_AND_BLOCK(dst, src);
6045 #else
6046  unsigned arr_sz = bm::set_block_size / 2;
6047 
6050 
6051  bm::id64_t acc = 0;
6052  for (unsigned i = 0; i < arr_sz; i+=4)
6053  {
6054  acc |= dst_u->w64[i] &= src_u->w64[i];
6055  acc |= dst_u->w64[i+1] &= src_u->w64[i+1];
6056  acc |= dst_u->w64[i+2] &= src_u->w64[i+2];
6057  acc |= dst_u->w64[i+3] &= src_u->w64[i+3];
6058  }
6059 #endif
6060  return acc;
6061 }
6062 
6063 /*!
6064  \brief digest based bit-block AND
6065 
6066  \param dst - destination block.
6067  \param src - source block.
6068  \param digest - known digest of dst block
6069 
6070  \return new digest
6071 
6072  @ingroup bitfunc
6073 */
6074 inline
6076  const bm::word_t* BMRESTRICT src,
6077  bm::id64_t digest) BMNOEXCEPT
6078 {
6079  BM_ASSERT(dst);
6080  BM_ASSERT(src);
6081  BM_ASSERT(dst != src);
6082 
6083  const bm::id64_t mask(1ull);
6084  bm::id64_t d = digest;
6085  while (d)
6086  {
6087  bm::id64_t t = bm::bmi_blsi_u64(d); // d & -d;
6088 
6089  unsigned wave = bm::word_bitcount64(t - 1);
6090  unsigned off = wave * bm::set_block_digest_wave_size;
6091 
6092  #if defined(VECT_AND_DIGEST)
6093  bool all_zero = VECT_AND_DIGEST(&dst[off], &src[off]);
6094  if (all_zero)
6095  digest &= ~(mask << wave);
6096  #else
6097  const bm::bit_block_t::bunion_t* BMRESTRICT src_u = (const bm::bit_block_t::bunion_t*)(&src[off]);
6099 
6100  bm::id64_t acc = 0;
6101  unsigned j = 0;
6102  do
6103  {
6104  acc |= dst_u->w64[j+0] &= src_u->w64[j+0];
6105  acc |= dst_u->w64[j+1] &= src_u->w64[j+1];
6106  acc |= dst_u->w64[j+2] &= src_u->w64[j+2];
6107  acc |= dst_u->w64[j+3] &= src_u->w64[j+3];
6108  j+=4;
6109  } while (j < bm::set_block_digest_wave_size/2);
6110 
6111  if (!acc) // all zero
6112  digest &= ~(mask << wave);
6113  #endif
6114 
6115  d = bm::bmi_bslr_u64(d); // d &= d - 1;
6116  } // while
6117 
6118  return digest;
6119 }
6120 
6121 
6122 /*!
6123  \brief digest based bit-block AND 5-way
6124 
6125  \return new digest
6126 
6127  @ingroup bitfunc
6128 */
6129 inline
6131  const bm::word_t* BMRESTRICT src0,
6132  const bm::word_t* BMRESTRICT src1,
6133  const bm::word_t* BMRESTRICT src2,
6134  const bm::word_t* BMRESTRICT src3,
6135  bm::id64_t digest) BMNOEXCEPT
6136 {
6137  BM_ASSERT(dst);
6138  BM_ASSERT(src0 && src1 && src2 && src3);
6139 
6140  const bm::id64_t mask(1ull);
6141  bm::id64_t d = digest;
6142  while (d)
6143  {
6144  bm::id64_t t = bm::bmi_blsi_u64(d); // d & -d;
6145 
6146  unsigned wave = bm::word_bitcount64(t - 1);
6147  unsigned off = wave * bm::set_block_digest_wave_size;
6148 
6149 #if defined(VECT_AND_DIGEST_5WAY)
6150  bool all_zero = VECT_AND_DIGEST_5WAY(&dst[off], &src0[off], &src1[off], &src2[off], &src3[off]);
6151  if (all_zero)
6152  digest &= ~(mask << wave);
6153 #else
6154  const bm::bit_block_t::bunion_t* BMRESTRICT src_u0 = (const bm::bit_block_t::bunion_t*)(&src0[off]);
6155  const bm::bit_block_t::bunion_t* BMRESTRICT src_u1 = (const bm::bit_block_t::bunion_t*)(&src1[off]);
6156  const bm::bit_block_t::bunion_t* BMRESTRICT src_u2 = (const bm::bit_block_t::bunion_t*)(&src2[off]);
6157  const bm::bit_block_t::bunion_t* BMRESTRICT src_u3 = (const bm::bit_block_t::bunion_t*)(&src3[off]);
6159 
6160  bm::id64_t acc = 0;
6161  unsigned j = 0;
6162  do
6163  {
6164  acc |= dst_u->w64[j + 0] &= src_u0->w64[j + 0] & src_u1->w64[j + 0] & src_u2->w64[j + 0] & src_u3->w64[j + 0];
6165  acc |= dst_u->w64[j + 1] &= src_u0->w64[j + 1] & src_u1->w64[j + 1] & src_u2->w64[j + 1] & src_u3->w64[j + 1];
6166  acc |= dst_u->w64[j + 2] &= src_u0->w64[j + 2] & src_u1->w64[j + 2] & src_u2->w64[j + 2] & src_u3->w64[j + 2];
6167  acc |= dst_u->w64[j + 3] &= src_u0->w64[j + 3] & src_u1->w64[j + 3] & src_u2->w64[j + 3] & src_u3->w64[j + 3];
6168  j += 4;
6169  } while (j < bm::set_block_digest_wave_size / 2);
6170 
6171  if (!acc) // all zero
6172  digest &= ~(mask << wave);
6173 #endif
6174 
6175  d = bm::bmi_bslr_u64(d); // d &= d - 1;
6176  } // while
6177 
6178  return digest;
6179 }
6180 
6181 
6182 /*!
6183  \brief digest based bit-block AND
6184 
6185  dst = src1 AND src2
6186 
6187  \param dst - destination block.
6188  \param src1 - source block.
6189  \param src2 - source block.
6190  \param digest - known initial digest
6191 
6192  \return new digest
6193 
6194  @ingroup bitfunc
6195 */
6196 inline
6198  const bm::word_t* BMRESTRICT src1,
6199  const bm::word_t* BMRESTRICT src2,
6200  bm::id64_t digest) BMNOEXCEPT
6201 {
6202  BM_ASSERT(dst);
6203  BM_ASSERT(src1 && src2);
6204  BM_ASSERT(dst != src1 && dst != src2);
6205 
6206  const bm::id64_t mask(1ull);
6207  bm::id64_t d = digest;
6208  while (d)
6209  {
6210  bm::id64_t t = bm::bmi_blsi_u64(d); // d & -d;
6211 
6212  unsigned wave = bm::word_bitcount64(t - 1);
6213  unsigned off = wave * bm::set_block_digest_wave_size;
6214 
6215  #if defined(VECT_AND_DIGEST_2WAY)
6216  bool all_zero = VECT_AND_DIGEST_2WAY(&dst[off], &src1[off], &src2[off]);
6217  if (all_zero)
6218  digest &= ~(mask << wave);
6219  #else
6220  const bm::bit_block_t::bunion_t* BMRESTRICT src_u1 =
6221  (const bm::bit_block_t::bunion_t*)(&src1[off]);
6222  const bm::bit_block_t::bunion_t* BMRESTRICT src_u2 =
6223  (const bm::bit_block_t::bunion_t*)(&src2[off]);
6225  (bm::bit_block_t::bunion_t*)(&dst[off]);
6226 
6227  bm::id64_t acc = 0;
6228  unsigned j = 0;
6229  do
6230  {
6231  acc |= dst_u->w64[j+0] = src_u1->w64[j+0] & src_u2->w64[j+0];
6232  acc |= dst_u->w64[j+1] = src_u1->w64[j+1] & src_u2->w64[j+1];
6233  acc |= dst_u->w64[j+2] = src_u1->w64[j+2] & src_u2->w64[j+2];
6234  acc |= dst_u->w64[j+3] = src_u1->w64[j+3] & src_u2->w64[j+3];
6235  j+=4;
6236  } while (j < bm::set_block_digest_wave_size/2);
6237 
6238  if (!acc) // all zero
6239  digest &= ~(mask << wave);
6240  #endif
6241 
6242  d = bm::bmi_bslr_u64(d); // d &= d - 1;
6243  } // while
6244 
6245  return digest;
6246 }
6247 
6248 
6249 
6250 /*!
6251  \brief Function ANDs two bitblocks and computes the bitcount.
6252  Function does not analyse availability of source blocks.
6253 
6254  \param src1 - first bit block
6255  \param src2 - second bit block
6256 
6257  @ingroup bitfunc
6258 */
6259 inline
6261  const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
6262 {
6263  unsigned count;
6264  const bm::word_t* src1_end = src1 + bm::set_block_size;
6265 #ifdef BMVECTOPT
6266  count = VECT_BITCOUNT_AND(src1, src1_end, src2);
6267 #else
6268  count = 0;
6269 # ifdef BM64OPT
6270  const bm::id64_t* b1 = (bm::id64_t*) src1;
6271  const bm::id64_t* b1_end = (bm::id64_t*) src1_end;
6272  const bm::id64_t* b2 = (bm::id64_t*) src2;
6273  do
6274  {
6275  count += bitcount64_4way(b1[0] & b2[0],
6276  b1[1] & b2[1],
6277  b1[2] & b2[2],
6278  b1[3] & b2[3]);
6279  b1 += 4;
6280  b2 += 4;
6281  } while (b1 < b1_end);
6282 # else
6283  do
6284  {
6285  BM_INCWORD_BITCOUNT(count, src1[0] & src2[0]);
6286  BM_INCWORD_BITCOUNT(count, src1[1] & src2[1]);
6287  BM_INCWORD_BITCOUNT(count, src1[2] & src2[2]);
6288  BM_INCWORD_BITCOUNT(count, src1[3] & src2[3]);
6289 
6290  src1+=4;
6291  src2+=4;
6292  } while (src1 < src1_end);
6293 # endif
6294 #endif
6295  return count;
6296 }
6297 
6298 
6299 /*!
6300  \brief Function ANDs two bitblocks and tests for any bit.
6301  Function does not analyse availability of source blocks.
6302 
6303  \param src1 - first bit block
6304  \param src2 - second bit block
6305 
6306  @ingroup bitfunc
6307 */
6308 inline
6309 unsigned bit_block_and_any(const bm::word_t* src1,
6310  const bm::word_t* src2) BMNOEXCEPT
6311 {
6312  unsigned count = 0;
6313  const bm::word_t* src1_end = src1 + bm::set_block_size;
6314  do
6315  {
6316  count = (src1[0] & src2[0]) |
6317  (src1[1] & src2[1]) |
6318  (src1[2] & src2[2]) |
6319  (src1[3] & src2[3]);
6320 
6321  src1+=4; src2+=4;
6322  } while ((src1 < src1_end) && !count);
6323  return count;
6324 }
6325 
6326 
6327 
6328 
6329 /*!
6330  \brief Function XORs two bitblocks and computes the bitcount.
6331  Function does not analyse availability of source blocks.
6332 
6333  \param src1 - first bit block
6334  \param src2 - second bit block
6335 
6336  @ingroup bitfunc
6337 */
6338 inline
6340  const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
6341 {
6342  unsigned count;
6343  const bm::word_t* BMRESTRICT src1_end = src1 + bm::set_block_size;
6344 #ifdef BMVECTOPT
6345  count = VECT_BITCOUNT_XOR(src1, src1_end, src2);
6346 #else
6347  count = 0;
6348 # ifdef BM64OPT
6349  const bm::id64_t* b1 = (bm::id64_t*) src1;
6350  const bm::id64_t* b1_end = (bm::id64_t*) src1_end;
6351  const bm::id64_t* b2 = (bm::id64_t*) src2;
6352  do
6353  {
6354  count += bitcount64_4way(b1[0] ^ b2[0],
6355  b1[1] ^ b2[1],
6356  b1[2] ^ b2[2],
6357  b1[3] ^ b2[3]);
6358  b1 += 4;
6359  b2 += 4;
6360  } while (b1 < b1_end);
6361 # else
6362  do
6363  {
6364  BM_INCWORD_BITCOUNT(count, src1[0] ^ src2[0]);
6365  BM_INCWORD_BITCOUNT(count, src1[1] ^ src2[1]);
6366  BM_INCWORD_BITCOUNT(count, src1[2] ^ src2[2]);
6367  BM_INCWORD_BITCOUNT(count, src1[3] ^ src2[3]);
6368 
6369  src1+=4;
6370  src2+=4;
6371  } while (src1 < src1_end);
6372 # endif
6373 #endif
6374  return count;
6375 }
6376 
6377 
6378 /*!
6379  \brief Function XORs two bitblocks and and tests for any bit.
6380  Function does not analyse availability of source blocks.
6381 
6382  \param src1 - first bit block.
6383  \param src2 - second bit block.
6384 
6385  @ingroup bitfunc
6386 */
6387 inline
6389  const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
6390 {
6391  unsigned count = 0;
6392  const bm::word_t* BMRESTRICT src1_end = src1 + bm::set_block_size;
6393  do
6394  {
6395  count = (src1[0] ^ src2[0]) |
6396  (src1[1] ^ src2[1]) |
6397  (src1[2] ^ src2[2]) |
6398  (src1[3] ^ src2[3]);
6399 
6400  src1+=4; src2+=4;
6401  } while (!count && (src1 < src1_end));
6402  return count;
6403 }
6404 
6405 /*!
6406  \brief Function SUBs two bitblocks and computes the bitcount.
6407  Function does not analyse availability of source blocks.
6408 
6409  \param src1 - first bit block.
6410  \param src2 - second bit block.
6411 
6412  @ingroup bitfunc
6413 */
6414 inline
6416  const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
6417 {
6418  unsigned count;
6419  const bm::word_t* BMRESTRICT src1_end = src1 + bm::set_block_size;
6420 #ifdef BMVECTOPT
6421  count = VECT_BITCOUNT_SUB(src1, src1_end, src2);
6422 #else
6423  count = 0;
6424 # ifdef BM64OPT
6425  const bm::id64_t* b1 = (bm::id64_t*) src1;
6426  const bm::id64_t* b1_end = (bm::id64_t*) src1_end;
6427  const bm::id64_t* b2 = (bm::id64_t*) src2;
6428  do
6429  {
6430  count += bitcount64_4way(b1[0] & ~b2[0],
6431  b1[1] & ~b2[1],
6432  b1[2] & ~b2[2],
6433  b1[3] & ~b2[3]);
6434  b1 += 4;
6435  b2 += 4;
6436  } while (b1 < b1_end);
6437 # else
6438  do
6439  {
6440  BM_INCWORD_BITCOUNT(count, src1[0] & ~src2[0]);
6441  BM_INCWORD_BITCOUNT(count, src1[1] & ~src2[1]);
6442  BM_INCWORD_BITCOUNT(count, src1[2] & ~src2[2]);
6443  BM_INCWORD_BITCOUNT(count, src1[3] & ~src2[3]);
6444 
6445  src1+=4;
6446  src2+=4;
6447  } while (src1 < src1_end);
6448 # endif
6449 #endif
6450  return count;
6451 }
6452 
6453 /*!
6454  \brief Function SUBs two bitblocks and and tests for any bit.
6455  Function does not analyse availability of source blocks.
6456 
6457  \param src1 - first bit block.
6458  \param src2 - second bit block.
6459 
6460  @ingroup bitfunc
6461 */
6462 inline
6464  const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
6465 {
6466  unsigned count = 0;
6467  const bm::word_t* BMRESTRICT src1_end = src1 + bm::set_block_size;
6468 
6469  do
6470  {
6471  count = (src1[0] & ~src2[0]) |
6472  (src1[1] & ~src2[1]) |
6473  (src1[2] & ~src2[2]) |
6474  (src1[3] & ~src2[3]);
6475 
6476  src1+=4; src2+=4;
6477  } while ((src1 < src1_end) && (count == 0));
6478  return count;
6479 }
6480 
6481 
6482 /*!
6483  \brief Function ORs two bitblocks and computes the bitcount.
6484  Function does not analyse availability of source blocks.
6485 
6486  \param src1 - first bit block
6487  \param src2 - second bit block.
6488 
6489  @ingroup bitfunc
6490 */
6491 inline
6492 unsigned bit_block_or_count(const bm::word_t* src1,
6493  const bm::word_t* src2) BMNOEXCEPT
6494 {
6495  unsigned count;
6496  const bm::word_t* src1_end = src1 + bm::set_block_size;
6497 #ifdef BMVECTOPT
6498  count = VECT_BITCOUNT_OR(src1, src1_end, src2);
6499 #else
6500  count = 0;
6501 # ifdef BM64OPT
6502  const bm::id64_t* b1 = (bm::id64_t*) src1;
6503  const bm::id64_t* b1_end = (bm::id64_t*) src1_end;
6504  const bm::id64_t* b2 = (bm::id64_t*) src2;
6505  do
6506  {
6507  count += bitcount64_4way(b1[0] | b2[0],
6508  b1[1] | b2[1],
6509  b1[2] | b2[2],
6510  b1[3] | b2[3]);
6511  b1 += 4;
6512  b2 += 4;
6513  } while (b1 < b1_end);
6514 # else
6515  do
6516  {
6517  BM_INCWORD_BITCOUNT(count, src1[0] | src2[0]);
6518  BM_INCWORD_BITCOUNT(count, src1[1] | src2[1]);
6519  BM_INCWORD_BITCOUNT(count, src1[2] | src2[2]);
6520  BM_INCWORD_BITCOUNT(count, src1[3] | src2[3]);
6521 
6522  src1+=4;
6523  src2+=4;
6524  } while (src1 < src1_end);
6525 # endif
6526 #endif
6527  return count;
6528 }
6529 
6530 /*!
6531  \brief Function ORs two bitblocks and and tests for any bit.
6532  Function does not analyse availability of source blocks.
6533 
6534  \param src1 - first bit block.
6535  \param src2 - second bit block.
6536 
6537  @ingroup bitfunc
6538 */
6539 inline
6541  const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
6542 {
6543  unsigned count = 0;
6544  const bm::word_t* BMRESTRICT src1_end = src1 + bm::set_block_size;
6545  do
6546  {
6547  count = (src1[0] | src2[0]) |
6548  (src1[1] | src2[1]) |
6549  (src1[2] | src2[2]) |
6550  (src1[3] | src2[3]);
6551 
6552  src1+=4; src2+=4;
6553  } while (!count && (src1 < src1_end));
6554  return count;
6555 }
6556 
6557 
6558 
6559 
6560 /*!
6561  \brief bitblock AND operation.
6562 
6563  \param dst - destination block.
6564  \param src - source block.
6565 
6566  \returns pointer on destination block.
6567  If returned value equal to src means that block mutation requested.
6568  NULL is valid return value.
6569 
6570  @ingroup bitfunc
6571 */
6573  const bm::word_t* BMRESTRICT src) BMNOEXCEPT
6574 {
6575  BM_ASSERT(dst || src);
6576 
6577  bm::word_t* ret = dst;
6578 
6579  if (IS_VALID_ADDR(dst)) // The destination block already exists
6580  {
6581  if (!IS_VALID_ADDR(src))
6582  {
6583  if (IS_EMPTY_BLOCK(src))
6584  {
6585  //If the source block is zero
6586  //just clean the destination block
6587  return 0;
6588  }
6589  }
6590  else
6591  {
6592  // Regular operation AND on the whole block.
6593  //
6594  auto any = bm::bit_block_and(dst, src);
6595  if (!any)
6596  ret = 0;
6597  }
6598  }
6599  else // The destination block does not exist yet
6600  {
6601  if(!IS_VALID_ADDR(src))
6602  {
6603  if(IS_EMPTY_BLOCK(src))
6604  {
6605  // The source block is empty.
6606  // One argument empty - all result is empty.
6607  return 0;
6608  }
6609  // Here we have nothing to do.
6610  // Src block is all ON, dst block remains as it is
6611  }
6612  else // destination block does not exists, src - valid block
6613  {
6614  if (IS_FULL_BLOCK(dst))
6615  return const_cast<bm::word_t*>(src);
6616  // Nothng to do.
6617  // Dst block is all ZERO no combination required.
6618  }
6619  }
6620 
6621  return ret;
6622 }
6623 
6624 
6625 /*!
6626  \brief Performs bitblock AND operation and calculates bitcount of the result.
6627 
6628  \param src1 - first bit block.
6629  \param src2 - second bit block.
6630 
6631  \returns bitcount value
6632 
6633  @ingroup bitfunc
6634 */
6635 inline
6637  const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
6638 {
6639  if (IS_EMPTY_BLOCK(src1) || IS_EMPTY_BLOCK(src2))
6640  return 0;
6641  if (src1 == FULL_BLOCK_FAKE_ADDR)
6642  src1 = FULL_BLOCK_REAL_ADDR;
6643  if (src2 == FULL_BLOCK_FAKE_ADDR)
6644  src2 = FULL_BLOCK_REAL_ADDR;
6645 
6646  return bit_block_and_count(src1, src2);
6647 }
6648 
6649 /*!
6650  \brief Performs bitblock AND operation test.
6651 
6652  \param src1 - first bit block.
6653  \param src2 - second bit block.
6654 
6655  \returns non zero if there is any value
6656 
6657  @ingroup bitfunc
6658 */
6659 inline
6661  const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
6662 {
6663  if (IS_EMPTY_BLOCK(src1) || IS_EMPTY_BLOCK(src2))
6664  return 0;
6665  if (src1 == FULL_BLOCK_FAKE_ADDR)
6666  src1 = FULL_BLOCK_REAL_ADDR;
6667  if (src2 == FULL_BLOCK_FAKE_ADDR)
6668  src2 = FULL_BLOCK_REAL_ADDR;
6669  return bit_block_and_any(src1, src2);
6670 }
6671 
6672 
6673 
6674 /*!
6675  \brief Performs bitblock SUB operation and calculates bitcount of the result.
6676 
6677  \param src1 - first bit block.
6678  \param src2 - second bit block
6679 
6680  \returns bitcount value
6681 
6682  @ingroup bitfunc
6683 */
6684 inline
6686  const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
6687 {
6688  if (src1 == src2)
6689  return 0;
6690  if (IS_EMPTY_BLOCK(src1))
6691  return 0;
6692  if (IS_EMPTY_BLOCK(src2)) // nothing to diff
6693  {
6694  if (IS_FULL_BLOCK(src1))
6695  return bm::gap_max_bits;
6696  return bm::bit_block_count(src1);
6697  }
6698  if (IS_FULL_BLOCK(src2))
6699  return 0;
6700 
6701  if (src1 == FULL_BLOCK_FAKE_ADDR)
6702  src1 = FULL_BLOCK_REAL_ADDR;
6703  if (src2 == FULL_BLOCK_FAKE_ADDR)
6704  src2 = FULL_BLOCK_REAL_ADDR;
6705 
6706  return bm::bit_block_sub_count(src1, src2);
6707 }
6708 
6709 
6710 /*!
6711  \brief Performs inverted bitblock SUB operation and calculates
6712  bitcount of the result.
6713 
6714  \param src1 - first bit block.
6715  \param src2 - second bit block
6716 
6717  \returns bitcount value
6718 
6719  @ingroup bitfunc
6720 */