BitMagic-C++
bmfunc.h
Go to the documentation of this file.
1 #ifndef BMFUNC__H__INCLUDED__
2 #define BMFUNC__H__INCLUDED__
3 /*
4 Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 For more information please visit: http://bitmagic.io
19 */
20 
21 /*! \file bmfunc.h
22  \brief Bit manipulation primitives (internal)
23 */
24 
25 #include <memory.h>
26 
27 #include "bmdef.h"
28 #include "bmutil.h"
29 
30 
31 #ifdef _MSC_VER
32 # pragma warning( disable: 4146 )
33 #endif
34 
35 namespace bm
36 {
37 
38 
39 inline
41  bm::word_t left,
42  bm::word_t right) BMNOEXCEPT;
43 
44 inline
46  bm::word_t left,
47  bm::word_t right) BMNOEXCEPT;
48 
49 /*!
50  @brief Structure with statistical information about memory
51  allocation footprint, serialization projection, number of vectors
52  @ingroup bvector
53 */
55 {
56  size_t bit_blocks; ///< Number of bit blocks
57  size_t gap_blocks; ///< Number of GAP blocks
58  size_t ptr_sub_blocks; ///< Number of sub-blocks
59  size_t bv_count; ///< Number of bit-vectors
60  size_t max_serialize_mem; ///< estimated maximum memory for serialization
61  size_t memory_used; ///< memory usage for all blocks and service tables
62  size_t gap_cap_overhead; ///< gap memory overhead between length and capacity
63  gap_word_t gap_levels[bm::gap_levels]; ///< GAP block lengths in the bvect
64  unsigned long long gaps_by_level[bm::gap_levels]; ///< number of GAP blocks at each level
65 
66  /// cound bit block
68  {
69  ++bit_blocks;
70  size_t mem_used = sizeof(bm::word_t) * bm::set_block_size;
71  memory_used += mem_used;
72  max_serialize_mem += mem_used;
73  }
74 
75  /// count gap block
76  void add_gap_block(unsigned capacity, unsigned length) BMNOEXCEPT
77  {
78  ++gap_blocks;
79  size_t mem_used = (capacity * sizeof(gap_word_t));
80  memory_used += mem_used;
81  max_serialize_mem += (unsigned)(length * sizeof(gap_word_t));
82  BM_ASSERT(length <= capacity);
83  gap_cap_overhead += (capacity - length) * sizeof(gap_word_t);
84  for (unsigned i = 0; i < bm::gap_levels; ++i)
85  {
86  if (capacity == gap_levels[i])
87  {
88  gaps_by_level[i]++;
89  return;
90  }
91  }
92  BM_ASSERT(0);
93  }
94 
95  /// Reset statisctics
97  {
100  for (unsigned i = 0; i < bm::gap_levels; ++i)
101  gaps_by_level[i] = 0ull;
102  }
103 
104  /// Sum data from another sttructure
105  void add(const bv_statistics& st) BMNOEXCEPT
106  {
107  bit_blocks += st.bit_blocks;
108  gap_blocks += st.gap_blocks;
109  ptr_sub_blocks += st.ptr_sub_blocks;
110  bv_count += st.bv_count;
111  max_serialize_mem += st.max_serialize_mem + 8;
112  memory_used += st.memory_used;
113  gap_cap_overhead += st.gap_cap_overhead;
114  }
115 };
116 
117 /**
118  @brief Pair type
119 */
120 template<typename First, typename Second>
121 struct pair
122 {
123  First first;
124  Second second;
125 
126  pair(First f, Second s) : first(f), second(s) {}
127 };
128 
129 /**
130  \brief bit-decode cache structure
131 */
133 {
134  unsigned short bits[65]; //< decoded bits
135  unsigned bcnt; //< length of bits array
136  bm::id64_t cvalue; //< cache decoded value
137 
139 };
140 
141 
142 /**
143  \brief Recalc linear bvector block index into 2D matrix coordinates
144  \internal
145 */
146 template<typename BI_TYPE>
148 void get_block_coord(BI_TYPE nb, unsigned& i, unsigned& j) BMNOEXCEPT
149 {
150  i = unsigned(nb >> bm::set_array_shift); // top block address
151  j = unsigned(nb & bm::set_array_mask); // address in sub-block
152 }
153 
154 /**
155  Compute bit address of the first bit in a superblock
156  \internal
157 */
158 template<typename RTYPE>
160 {
161  return RTYPE(i) * bm::set_sub_total_bits;
162 }
163 
164 /**
165  Compute bit address of the first bit in a block
166  \internal
167 */
168 template<typename RTYPE>
169 BMFORCEINLINE RTYPE get_block_start(unsigned i, unsigned j) BMNOEXCEPT
170 {
171  RTYPE idx = bm::get_super_block_start<RTYPE>(i);
172  idx += (j) * bm::gap_max_bits;
173  return idx;
174 }
175 
176 
177 /*!
178  @defgroup gapfunc GAP functions
179  GAP functions implement different opereations on GAP compressed blocks (internals)
180  and serve as a minimal building blocks.
181  \internal
182  @ingroup bvector
183  */
184 
185 /*!
186  @defgroup bitfunc BIT functions
187  Bit functions implement different opereations on bit blocks (internals)
188  and serve as a minimal building blocks.
189  \internal
190  @ingroup bvector
191  */
192 
193 
194 /*!
195  Returns bit count
196  @ingroup bitfunc
197 */
200 {
201 #if defined(BMSSE42OPT) || defined(BMAVX2OPT)
202  return bm::id_t(_mm_popcnt_u32(w));
203 #else
204  #if defined(BM_USE_GCC_BUILD)
205  return (bm::id_t)__builtin_popcount(w);
206  #else
207  return
208  bm::bit_count_table<true>::_count[(unsigned char)(w)] +
209  bm::bit_count_table<true>::_count[(unsigned char)((w) >> 8)] +
210  bm::bit_count_table<true>::_count[(unsigned char)((w) >> 16)] +
211  bm::bit_count_table<true>::_count[(unsigned char)((w) >> 24)];
212  #endif
213 #endif
214 }
215 
216 inline
217 int parallel_popcnt_32(unsigned int n) BMNOEXCEPT
218 {
219  unsigned int tmp;
220 
221  tmp = n - ((n >> 1) & 033333333333)
222  - ((n >> 2) & 011111111111);
223  return ((tmp + (tmp >> 3)) & 030707070707) % 63;
224 }
225 
226 
227 /*!
228  Function calculates number of 1 bits in 64-bit word.
229  @ingroup bitfunc
230 */
233 {
234 #if defined(BMSSE42OPT) || defined(BMAVX2OPT)
235 #if defined(BM64_SSE4) || defined(BM64_AVX2) || defined(BM64_AVX512)
236  return unsigned(_mm_popcnt_u64(x));
237 #else
238  return _mm_popcnt_u32(x >> 32) + _mm_popcnt_u32((unsigned)x);
239 #endif
240 #else
241  #if defined(BM_USE_GCC_BUILD)
242  return (unsigned)__builtin_popcountll(x);
243  #else
244  x = x - ((x >> 1) & 0x5555555555555555);
245  x = (x & 0x3333333333333333) + ((x >> 2) & 0x3333333333333333);
246  x = (x + (x >> 4)) & 0x0F0F0F0F0F0F0F0F;
247  x = x + (x >> 8);
248  x = x + (x >> 16);
249  x = x + (x >> 32);
250  return x & 0xFF;
251  #endif
252 #endif
253 }
254 
255 inline
258 {
259  const bm::id64_t m1 = 0x5555555555555555U;
260  const bm::id64_t m2 = 0x3333333333333333U;
261  const bm::id64_t m3 = 0x0F0F0F0F0F0F0F0FU;
262  const bm::id64_t m4 = 0x000000FF000000FFU;
263 
264  x = x - ((x >> 1) & m1);
265  y = y - ((y >> 1) & m1);
266  u = u - ((u >> 1) & m1);
267  v = v - ((v >> 1) & m1);
268  x = (x & m2) + ((x >> 2) & m2);
269  y = (y & m2) + ((y >> 2) & m2);
270  u = (u & m2) + ((u >> 2) & m2);
271  v = (v & m2) + ((v >> 2) & m2);
272  x = x + y;
273  u = u + v;
274  x = (x & m3) + ((x >> 4) & m3);
275  u = (u & m3) + ((u >> 4) & m3);
276  x = x + u;
277  x = x + (x >> 8);
278  x = x + (x >> 16);
279  x = x & m4;
280  x = x + (x >> 32);
281  return x & 0x000001FFU;
282 }
283 
284 
285 // --------------------------------------------------------------
286 // Functions for bit-scanning
287 // --------------------------------------------------------------
288 
289 /*!
290  \brief Templated algorithm to unpacks octet based word into list of ON bit indexes
291  \param w - value
292  \param func - bit functor
293 
294  @ingroup bitfunc
295 */
296 template<typename T, typename F>
297 void bit_for_each_4(T w, F& func)
298 {
299  for (unsigned sub_octet = 0; w != 0; w >>= 4, sub_octet += 4)
300  {
301  switch (w & 15) // 1111
302  {
303  case 0: // 0000
304  break;
305  case 1: // 0001
306  func(sub_octet);
307  break;
308  case 2: // 0010
309  func(sub_octet + 1);
310  break;
311  case 3: // 0011
312  func(sub_octet, sub_octet + 1);
313  break;
314  case 4: // 0100
315  func(sub_octet + 2);
316  break;
317  case 5: // 0101
318  func(sub_octet, sub_octet + 2);
319  break;
320  case 6: // 0110
321  func(sub_octet + 1, sub_octet + 2);
322  break;
323  case 7: // 0111
324  func(sub_octet, sub_octet + 1, sub_octet + 2);
325  break;
326  case 8: // 1000
327  func(sub_octet + 3);
328  break;
329  case 9: // 1001
330  func(sub_octet, sub_octet + 3);
331  break;
332  case 10: // 1010
333  func(sub_octet + 1, sub_octet + 3);
334  break;
335  case 11: // 1011
336  func(sub_octet, sub_octet + 1, sub_octet + 3);
337  break;
338  case 12: // 1100
339  func(sub_octet + 2, sub_octet + 3);
340  break;
341  case 13: // 1101
342  func(sub_octet, sub_octet + 2, sub_octet + 3);
343  break;
344  case 14: // 1110
345  func(sub_octet + 1, sub_octet + 2, sub_octet + 3);
346  break;
347  case 15: // 1111
348  func(sub_octet, sub_octet + 1, sub_octet + 2, sub_octet + 3);
349  break;
350  default:
351  BM_ASSERT(0);
352  break;
353  }
354 
355  } // for
356 }
357 
358 
359 /*!
360  \brief Templated algorithm to unpacks word into list of ON bit indexes
361  \param w - value
362  \param func - bit functor
363 
364  @ingroup bitfunc
365 */
366 template<typename T, typename F>
367 void bit_for_each(T w, F& func)
368 {
369  // Note: 4-bit table method works slower than plain check approach
370  for (unsigned octet = 0; w != 0; w >>= 8, octet += 8)
371  {
372  if (w & 1) func(octet + 0);
373  if (w & 2) func(octet + 1);
374  if (w & 4) func(octet + 2);
375  if (w & 8) func(octet + 3);
376  if (w & 16) func(octet + 4);
377  if (w & 32) func(octet + 5);
378  if (w & 64) func(octet + 6);
379  if (w & 128) func(octet + 7);
380 
381  } // for
382 }
383 
384 /*! @brief Adaptor to copy 1 bits to array
385  @internal
386 */
387 template<typename B>
389 {
390 public:
391  copy_to_array_functor(B* bits): bp_(bits)
392  {}
393 
394  B* ptr() { return bp_; }
395 
396  void operator()(unsigned bit_idx) BMNOEXCEPT { *bp_++ = (B)bit_idx; }
397 
398  void operator()(unsigned bit_idx0,
399  unsigned bit_idx1) BMNOEXCEPT
400  {
401  bp_[0] = (B)bit_idx0; bp_[1] = (B)bit_idx1;
402  bp_+=2;
403  }
404 
405  void operator()(unsigned bit_idx0,
406  unsigned bit_idx1,
407  unsigned bit_idx2) BMNOEXCEPT
408  {
409  bp_[0] = (B)bit_idx0; bp_[1] = (B)bit_idx1; bp_[2] = (B)bit_idx2;
410  bp_+=3;
411  }
412 
413  void operator()(unsigned bit_idx0,
414  unsigned bit_idx1,
415  unsigned bit_idx2,
416  unsigned bit_idx3) BMNOEXCEPT
417  {
418  bp_[0] = (B)bit_idx0; bp_[1] = (B)bit_idx1;
419  bp_[2] = (B)bit_idx2; bp_[3] = (B)bit_idx3;
420  bp_+=4;
421  }
422 
423 private:
425  copy_to_array_functor& operator=(const copy_to_array_functor&);
426 private:
427  B* bp_;
428 };
429 
430 
431 /*!
432  \brief Unpacks word into list of ON bit indexes
433  \param w - value
434  \param bits - pointer on the result array
435  \return number of bits in the list
436 
437  @ingroup bitfunc
438 */
439 template<typename T,typename B>
440 unsigned bit_list(T w, B* bits) BMNOEXCEPT
441 {
442  copy_to_array_functor<B> func(bits);
443  bit_for_each(w, func);
444  return (unsigned)(func.ptr() - bits);
445 }
446 
447 
448 
449 /*!
450  \brief Unpacks word into list of ON bit indexes (quad-bit based)
451  \param w - value
452  \param bits - pointer on the result array
453  \return number of bits in the list
454 
455  @ingroup bitfunc
456 */
457 template<typename T,typename B>
458 unsigned bit_list_4(T w, B* bits) BMNOEXCEPT
459 {
460  copy_to_array_functor<B> func(bits);
461  bit_for_each_4(w, func);
462  return (unsigned)(func.ptr() - bits);
463 }
464 
465 /*!
466  \brief Unpacks word into list of ON bit indexes using popcnt method
467  \param w - value
468  \param bits - pointer on the result array
469  \param offs - value to add to bit position (programmed shift)
470  \return number of bits in the list
471 
472  @ingroup bitfunc
473  @internal
474 */
475 template<typename B>
476 unsigned short
477 bitscan_popcnt(bm::id_t w, B* bits, unsigned short offs) BMNOEXCEPT
478 {
479  unsigned pos = 0;
480  while (w)
481  {
482  bm::id_t t = w & -w;
483  bits[pos++] = (B)(bm::word_bitcount(t - 1) + offs);
484  w &= w - 1;
485  }
486  return (unsigned short)pos;
487 }
488 
489 /*!
490  \brief Unpacks word into list of ON bit indexes using popcnt method
491  \param w - value
492  \param bits - pointer on the result array
493  \return number of bits in the list
494 
495  @ingroup bitfunc
496  @internal
497 */
498 template<typename B>
499 unsigned short bitscan_popcnt(bm::id_t w, B* bits) BMNOEXCEPT
500 {
501  unsigned pos = 0;
502  while (w)
503  {
504  bm::id_t t = w & -w;
505  bits[pos++] = (B)(bm::word_bitcount(t - 1));
506  w &= w - 1;
507  }
508  return (unsigned short)pos;
509 }
510 
511 
512 /*!
513  \brief Unpacks 64-bit word into list of ON bit indexes using popcnt method
514  \param w - value
515  \param bits - pointer on the result array
516  \return number of bits in the list
517  @ingroup bitfunc
518 */
519 template<typename B>
520 unsigned short bitscan_popcnt64(bm::id64_t w, B* bits) BMNOEXCEPT
521 {
522  unsigned short pos = 0;
523  while (w)
524  {
525  bm::id64_t t = bmi_blsi_u64(w); // w & -w;
526  bits[pos++] = (B) bm::word_bitcount64(t - 1);
527  w = bmi_bslr_u64(w); // w &= w - 1;
528  }
529  return pos;
530 }
531 
532 /*!
533  \brief Unpacks 64-bit word into list of ON bit indexes using popcnt method
534  \param w - value
535  \param bits - pointer on the result array
536  \param offs - value to add to bit position (programmed shift)
537  \return number of bits in the list
538  @ingroup bitfunc
539 */
540 template<typename B>
541 unsigned short
542 bitscan_popcnt64(bm::id64_t w, B* bits, unsigned short offs) BMNOEXCEPT
543 {
544  unsigned short pos = 0;
545  while (w)
546  {
547  bm::id64_t t = bmi_blsi_u64(w); // w & -w;
548  bits[pos++] = B(bm::word_bitcount64(t - 1) + offs);
549  w = bmi_bslr_u64(w); // w &= w - 1;
550  }
551  return pos;
552 }
553 
554 
555 template<typename V, typename B>
556 unsigned short bitscan(V w, B* bits) BMNOEXCEPT
557 {
558  if (bm::conditional<sizeof(V) == 8>::test())
559  return bm::bitscan_popcnt64(w, bits);
560  else
561  return bm::bitscan_popcnt((bm::word_t)w, bits);
562 }
563 
564 // --------------------------------------------------------------
565 // Functions for select
566 // --------------------------------------------------------------
567 
568 /**
569  \brief word find index of the rank-th bit set by bit-testing
570  \param w - 64-bit work to search
571  \param rank - rank to select (should be > 0)
572 
573  \return selected value (inxed of bit set)
574 */
575 inline
576 unsigned word_select64_linear(bm::id64_t w, unsigned rank) BMNOEXCEPT
577 {
578  BM_ASSERT(w);
579  BM_ASSERT(rank);
580 
581  for (unsigned count = 0; w; w >>=1ull, ++count)
582  {
583  rank -= unsigned(w & 1ull);
584  if (!rank)
585  return count;
586  }
587  BM_ASSERT(0); // shoud not be here if rank is achievable
588  return ~0u;
589 }
590 
591 /**
592  \brief word find index of the rank-th bit set by bit-testing
593  \param w - 64-bit work to search
594  \param rank - rank to select (should be > 0)
595 
596  \return selected value (inxed of bit set)
597 */
598 inline
599 unsigned word_select64_bitscan(bm::id64_t w, unsigned rank) BMNOEXCEPT
600 {
601  BM_ASSERT(w);
602  BM_ASSERT(rank);
603  BM_ASSERT(rank <= bm::word_bitcount64(w));
604 
605  do
606  {
607  --rank;
608  if (!rank)
609  break;
610  w &= w - 1;
611  } while (1);
612 
613  bm::id64_t t = w & -w;
614  unsigned count = bm::word_bitcount64(t - 1);
615  return count;
616 }
617 
618 /**
619  \brief word find index of the rank-th bit set by bit-testing
620  \param w - 64-bit work to search
621  \param rank - rank to select (should be > 0)
622 
623  \return selected value (inxed of bit set)
624 */
625 inline
626 unsigned word_select64(bm::id64_t w, unsigned rank) BMNOEXCEPT
627 {
628 #if defined(BMI2_SELECT64)
629  return BMI2_SELECT64(w, rank);
630 #else
631  #if defined(BMI1_SELECT64)
632  return BMI2_SELECT64(w, rank);
633  #else
634  return bm::word_select64_bitscan(w, rank);
635  #endif
636 #endif
637 }
638 
639 // --------------------------------------------------------------
640 // Functions for bit-block digest calculation
641 // --------------------------------------------------------------
642 
643 /*!
644  \brief Compute digest mask for word address in block
645 
646  @return digest bit-mask
647 
648  @ingroup bitfunc
649  @internal
650 */
653 {
654  bm::id64_t mask(1ull);
655  return mask << (w_idx / bm::set_block_digest_wave_size);
656 }
657 
658 /**
659  \brief Compute digest mask for [from..to] positions
660  \param from - range from (in bit-block coordinates)
661  \param to - range to (in bit-block coordinates)
662 
663  @ingroup bitfunc
664  @internal
665 */
667 bm::id64_t digest_mask(unsigned from, unsigned to) BMNOEXCEPT
668 {
669  BM_ASSERT(from <= to);
670 
671  bm::id64_t digest_from = from >> bm::set_block_digest_pos_shift;
672  bm::id64_t digest_to = to >> bm::set_block_digest_pos_shift;;
673  const bm::id64_t maskFF(~0ull);
674  return ((maskFF) >> (63 - (digest_to - digest_from))) << digest_from;
675 }
676 
677 /*!
678  \brief check if all digest bits for the range [from..to] are 0
679 
680  \param digest - current digest
681  \param bitpos_from - range from (in bit-block coordinates)
682  \param bitpos_to - range to (in bit-block coordinates)
683 
684  @return true if all range is zero
685 
686  @ingroup bitfunc
687  @internal
688 */
689 inline
691  unsigned bitpos_from, unsigned bitpos_to) BMNOEXCEPT
692 {
693  bm::id64_t mask = bm::digest_mask(bitpos_from, bitpos_to);
694  return !(digest & mask);
695 }
696 
697 /*!
698  \brief Init block with 000111000 pattren based on digest
699  \param block - Bit block [out]
700  \param digest - digest (used for block initialization)
701 
702  @ingroup bitfunc
703  @internal
704 */
705 inline
707 {
708  unsigned off;
709  for (unsigned i = 0; i < 64; ++i)
710  {
712  bm::word_t mask = (digest & 1) ? ~0u : 0u;
713 #if defined(VECT_BLOCK_SET_DIGEST)
714  VECT_BLOCK_SET_DIGEST(&block[off], mask);
715 #else
716  for (unsigned j = 0; j < bm::set_block_digest_wave_size; j+=4)
717  {
718  block[off+j+0] = block[off+j+1] =
719  block[off+j+2] = block[off+j+3] = mask;
720  } // for j
721 #endif
722  digest >>= 1ull;
723  } // for
724 }
725 
726 /*!
727  \brief Compute digest for 64 non-zero areas
728  \param block - Bit block
729 
730  @return digest bit-mask (0 means all block is empty)
731 
732  @ingroup bitfunc
733  @internal
734 */
735 inline
737 {
738  bm::id64_t digest0 = 0;
739  unsigned off;
740 
741  for (unsigned i = 0; i < 64; ++i)
742  {
744  #if defined(VECT_IS_DIGEST_ZERO)
745  bool all_zero = VECT_IS_DIGEST_ZERO(&block[off]);
746  digest0 |= bm::id64_t(!all_zero) << i;
747  #else
748  const bm::id64_t mask(1ull);
749  for (unsigned j = 0; j < bm::set_block_digest_wave_size; j+=4)
750  {
751  bm::word_t w =
752  block[off+j+0] | block[off+j+1] | block[off+j+2] | block[off+j+3];
753  if (w)
754  {
755  digest0 |= (mask << i);
756  break;
757  }
758  } // for j
759  #endif
760 
761  } // for i
762  return digest0;
763 }
764 
765 /*!
766  \brief Compute digest for 64 non-zero areas based on existing digest
767  (function revalidates zero areas)
768  \param block - bit block
769  \param digest - start digest
770 
771  @return digest bit-mask (0 means all block is empty)
772 
773  @ingroup bitfunc
774  @internal
775 */
776 inline
779 {
780  const bm::id64_t mask(1ull);
781  bm::id64_t d = digest;
782 
783  while (d)
784  {
785  bm::id64_t t = bm::bmi_blsi_u64(d); // d & -d;
786 
787  unsigned wave = bm::word_bitcount64(t - 1);
788  unsigned off = wave * bm::set_block_digest_wave_size;
789 
790  #if defined(VECT_IS_DIGEST_ZERO)
791  bool all_zero = VECT_IS_DIGEST_ZERO(&block[off]);
792  digest &= all_zero ? ~(mask << wave) : digest;
793  #else
795  (const bm::bit_block_t::bunion_t*)(&block[off]);
796  bm::id64_t w64 = 0;
797  unsigned j = 0;
798  do
799  {
800  w64 |=
801  src_u->w64[j+0] | src_u->w64[j+1] | src_u->w64[j+2] | src_u->w64[j+3];
802  j += 4;
803  } while ((j < bm::set_block_digest_wave_size/2) & !w64);
804  digest &= w64 ? digest : ~(mask << wave);
805  #endif
806 
807  d = bm::bmi_bslr_u64(d); // d &= d - 1;
808  } // while
809 
810  BM_ASSERT(bm::calc_block_digest0(block) == digest);
811  return digest;
812 }
813 
814 // --------------------------------------------------------------
815 
816 
817 /// Returns true if set operation is constant (bitcount)
818 inline
820 {
821  return (int(op) >= int(set_COUNT));
822 }
823 
824 /**
825  Convert set operation to operation
826 */
827 inline
829 {
830  BM_ASSERT(op == set_AND ||
831  op == set_OR ||
832  op == set_SUB ||
833  op == set_XOR);
834  return (bm::operation) op;
835 }
836 
837 //---------------------------------------------------------------------
838 
839 /**
840  Structure carries pointer on bit block with all bits 1
841  @ingroup bitfunc
842  @internal
843 */
844 template<bool T> struct all_set
845 {
847  {
851 
853  {
854  ::memset(_p, 0xFF, sizeof(_p)); // set FULL BLOCK content (all 1s)
855  if (bm::conditional<sizeof(void*) == 8>::test())
856  {
857  const unsigned long long magic_mask = 0xFFFFfffeFFFFfffe;
858  ::memcpy(&_p_fullp, &magic_mask, sizeof(magic_mask));
859  for (unsigned i = 0; i < bm::set_sub_array_size; ++i)
860  _s[i] = reinterpret_cast<bm::word_t*>(magic_mask);
861  }
862  else
863  {
864  const unsigned magic_mask = 0xFFFFfffe;
865  ::memcpy(&_p_fullp, &magic_mask, sizeof(magic_mask));
866  for (unsigned i = 0; i < bm::set_sub_array_size; ++i)
867  _s[i] = reinterpret_cast<bm::word_t*>(magic_mask);
868  }
869  }
870  };
871 
872  // version with minimal branching, super-scalar friendly
873  //
874  inline
876  {
877  bm::id64_t type;
878  if (bm::conditional<sizeof(void*) == 8>::test())
879  {
880  bm::id64_t w = reinterpret_cast<unsigned long long>(bp);
881  type = (w & 3) | // FULL BLOCK or GAP
882  ((bp == _block._p) << 1);
883  type = type ? type : w;
884  }
885  else
886  {
887  unsigned w = reinterpret_cast<unsigned long>(bp);
888  type = (w & 3) | // FULL BLOCK or GAP
889  ((bp == _block._p) << 1);
890  type = type ? type : w;
891  }
892  return type;
893  }
894 
896  static bool is_full_block(const bm::word_t* bp) BMNOEXCEPT
897  { return (bp == _block._p || bp == _block._p_fullp); }
898 
901  { return (bp && !(bp == _block._p || bp == _block._p_fullp)); }
902 
904 };
905 
906 
907 template<bool T> typename all_set<T>::all_set_block all_set<T>::_block;
908 
909 /// XOR swap two scalar variables
910 template<typename W>
911 void xor_swap(W& x, W& y) BMNOEXCEPT
912 {
913  BM_ASSERT(&x != &y);
914  x ^= y;
915  y ^= x;
916  x ^= y;
917 }
918 
919 /*!
920  Fini not NULL position
921  @return index of not NULL pointer
922  @internal
923 */
924 template<typename N>
925 bool find_not_null_ptr(bm::word_t*** arr, N start, N size, N* pos) BMNOEXCEPT
926 {
927  BM_ASSERT(pos);
928 // BM_ASSERT(start < size);
929 //#if defined(BM64_AVX2) || defined(BM64_AVX512)
930 // TODO: optimization for SIMD based next ptr scan
931 #if 0
932  const unsigned unroll_factor = 4;
933  const unsigned len = (size - start);
934  const unsigned len_unr = len - (len % unroll_factor);
935  unsigned k;
936 
937  arr += start;
938  for (k = 0; k < len_unr; k+=unroll_factor)
939  {
940  if (!avx2_test_all_zero_wave(arr+k))
941  {
942  if (arr[k])
943  {
944  *pos = k + start;
945  return true;
946  }
947  if (arr[k+1])
948  {
949  *pos = k + start + 1;
950  return true;
951  }
952  if (arr[k+2])
953  {
954  *pos = k + start + 2;
955  return true;
956  }
957  if (arr[k+3])
958  {
959  *pos = k + start + 3;
960  return true;
961  }
962  }
963  } // for k
964 
965  for (; k < len; ++k)
966  {
967  if (arr[k])
968  {
969  *pos = k + start;
970  return true;
971  }
972  } // for k
973 #else
974  for (; start < size; ++start)
975  {
976  if (arr[start])
977  {
978  *pos = start;
979  return true;
980  }
981  } // for i
982 #endif
983  return false;
984 }
985 
986 
987 
988 
989 //---------------------------------------------------------------------
990 
991 /*!
992  \brief Lexicographical comparison of two words as bit strings (reference)
993  Auxiliary implementation for testing and reference purposes.
994  \param w1 - First word.
995  \param w2 - Second word.
996  \return <0 - less, =0 - equal, >0 - greater.
997 
998  @ingroup bitfunc
999 */
1000 template<typename T> int wordcmp0(T w1, T w2)
1001 {
1002  while (w1 != w2)
1003  {
1004  int res = (w1 & 1) - (w2 & 1);
1005  if (res != 0) return res;
1006  w1 >>= 1;
1007  w2 >>= 1;
1008  }
1009  return 0;
1010 }
1011 
1012 
1013 /*
1014 template<typename T> int wordcmp(T w1, T w2)
1015 {
1016  T diff = w1 ^ w2;
1017  return diff ? ((w1 & diff & (diff ^ (diff - 1)))? 1 : -1) : 0;
1018 }
1019 */
1020 /*!
1021  \brief Lexicographical comparison of two words as bit strings.
1022  Auxiliary implementation for testing and reference purposes.
1023  \param a - First word.
1024  \param b - Second word.
1025  \return <0 - less, =0 - equal, >0 - greater.
1026 
1027  @ingroup bitfunc
1028 */
1029 template<typename T> int wordcmp(T a, T b)
1030 {
1031  T diff = a ^ b;
1032  return diff? ( (a & diff & -diff)? 1 : -1 ) : 0;
1033 }
1034 
1035 
1036 // Low bit extraction
1037 // x & (x ^ (x-1))
1038 
1039 
1040 // ----------------------------------------------------------------------
1041 
1042 
1043 /*! @brief Returns "true" if all bits in the block are 0
1044  @ingroup bitfunc
1045 */
1046 inline
1048 {
1049 #if defined(VECT_IS_ZERO_BLOCK)
1050  return VECT_IS_ZERO_BLOCK(start);
1051 #else
1052  const bm::wordop_t* BMRESTRICT blk = (bm::wordop_t*) (start);
1053  const bm::wordop_t* BMRESTRICT blk_end = (bm::wordop_t*)(start + bm::set_block_size);
1054  do
1055  {
1056  if (blk[0] | blk[1] | blk[2] | blk[3])
1057  return false;
1058  blk += 4;
1059  } while (blk < blk_end);
1060  return true;
1061 #endif
1062 }
1063 
1064 // ----------------------------------------------------------------------
1065 
1066 /*!
1067  \brief Checks if GAP block is all-zero.
1068  \param buf - GAP buffer pointer.
1069  \returns true if all-zero.
1070 
1071  @ingroup gapfunc
1072 */
1075 {
1076  // (almost) branchless variant:
1077  return (!(*buf & 1u)) & (!(bm::gap_max_bits - 1 - buf[1]));
1078 }
1079 
1080 /*!
1081  \brief Checks if GAP block is all-one.
1082  \param buf - GAP buffer pointer.
1083  \returns true if all-one.
1084  @ingroup gapfunc
1085 */
1088 {
1089  return ((*buf & 1u) && (buf[1] == bm::gap_max_bits - 1));
1090 }
1091 
1092 /*!
1093  \brief Returs GAP block length.
1094  \param buf - GAP buffer pointer.
1095  \returns GAP block length.
1096 
1097  @ingroup gapfunc
1098 */
1101 {
1102  return (bm::gap_word_t)((*buf >> 3) + 1);
1103 }
1104 
1105 
1106 /*!
1107  \brief Returs GAP block capacity
1108  \param buf - GAP buffer pointer
1109  \param glevel_len - pointer on GAP header word
1110  \returns GAP block capacity.
1111 
1112  @ingroup gapfunc
1113 */
1114 template<typename T>
1115 unsigned
1116 gap_capacity(const T* BMRESTRICT buf, const T* BMRESTRICT glevel_len) BMNOEXCEPT
1117 {
1118  return glevel_len[(*buf >> 1) & 3];
1119 }
1120 
1121 
1122 /*!
1123  \brief Returs GAP block capacity limit.
1124  \param buf - GAP buffer pointer.
1125  \param glevel_len - GAP lengths table (gap_len_table)
1126  \returns GAP block limit.
1127 
1128  @ingroup gapfunc
1129 */
1130 template<typename T>
1131 unsigned
1132 gap_limit(const T* BMRESTRICT buf, const T* BMRESTRICT glevel_len) BMNOEXCEPT
1133 {
1134  return glevel_len[(*buf >> 1) & 3]-4;
1135 }
1136 
1137 
1138 /*!
1139  \brief Returs GAP blocks capacity level.
1140  \param buf - GAP buffer pointer.
1141  \returns GAP block capacity level.
1142 
1143  @ingroup gapfunc
1144 */
1145 template<typename T>
1147 {
1148  return T((*buf >> 1) & 3u);
1149 }
1150 
1151 
1152 /*!
1153  \brief GAP block find the last set bit
1154 
1155  \param buf - GAP buffer pointer.
1156  \param last - index of the last 1 bit
1157 
1158  \return 0 if 1 bit was NOT found
1159 
1160  @ingroup gapfunc
1161 */
1162 template<typename T>
1163 unsigned
1164 gap_find_last(const T* BMRESTRICT buf, unsigned* BMRESTRICT last) BMNOEXCEPT
1165 {
1166  BM_ASSERT(last);
1167 
1168  T is_set = (*buf) & 1u;
1169  T end = T((*buf) >> 3u);
1170 
1171  BM_ASSERT(buf[end] == bm::gap_max_bits - 1);
1172 
1173  is_set ^= T((end-1) & 1u);
1174  if (is_set)
1175  {
1176  *last = buf[end];
1177  return is_set;
1178  }
1179  *last = buf[--end];
1180  return end;
1181 }
1182 
1183 /*!
1184  \brief GAP block find the first set bit
1185 
1186  \param buf - GAP buffer pointer.
1187  \param first - index of the first 1 bit
1188 
1189  \return 0 if 1 bit was NOT found
1190 
1191  @ingroup gapfunc
1192 */
1193 template<typename T>
1194 unsigned
1195 gap_find_first(const T* BMRESTRICT buf, unsigned* BMRESTRICT first) BMNOEXCEPT
1196 {
1197  BM_ASSERT(first);
1198 
1199  T is_set = (*buf) & 1u;
1200  if (is_set)
1201  {
1202  *first = 0;
1203  return is_set;
1204  }
1205  if (buf[1] == bm::gap_max_bits - 1)
1206  return 0;
1207  *first = buf[1] + 1;
1208  return 1;
1209 }
1210 
1211 
1212 
1213 /*
1214  \brief Binary search for the block where bit = pos located.
1215  \param buf - GAP buffer pointer.
1216  \param pos - index of the element.
1217  \param is_set - output. GAP value (0 or 1).
1218  \return GAP index.
1219  @ingroup gapfunc
1220 */
1221 template<typename T>
1222 unsigned gap_bfind(const T* BMRESTRICT buf,
1223  unsigned pos, unsigned* BMRESTRICT is_set) BMNOEXCEPT
1224 {
1225  BM_ASSERT(pos < bm::gap_max_bits);
1226  #undef VECT_GAP_BFIND // TODO: VECTOR bfind causes performance degradation
1227  #ifdef VECT_GAP_BFIND
1228  return VECT_GAP_BFIND(buf, pos, is_set);
1229  #else
1230  *is_set = (*buf) & 1;
1231 
1232  unsigned start = 1;
1233  unsigned end = 1 + ((*buf) >> 3);
1234 
1235  while ( start != end )
1236  {
1237  unsigned curr = (start + end) >> 1;
1238  if ( buf[curr] < pos )
1239  start = curr + 1;
1240  else
1241  end = curr;
1242  }
1243  *is_set ^= ((start-1) & 1);
1244  return start;
1245  #endif
1246 }
1247 
1248 
1249 /*!
1250  \brief Tests if bit = pos is true.
1251  \param buf - GAP buffer pointer.
1252  \param pos - index of the element.
1253  \return true if position is in "1" gap
1254  @ingroup gapfunc
1255 */
1256 template<typename T>
1257 unsigned gap_test(const T* BMRESTRICT buf, unsigned pos) BMNOEXCEPT
1258 {
1259  BM_ASSERT(pos < bm::gap_max_bits);
1260 
1261  unsigned start = 1;
1262  unsigned end = 1 + ((*buf) >> 3);
1263  if (end - start < 10)
1264  {
1265  unsigned sv = *buf & 1;
1266  unsigned sv1= sv ^ 1;
1267  if (buf[1] >= pos) return sv;
1268  if (buf[2] >= pos) return sv1;
1269  if (buf[3] >= pos) return sv;
1270  if (buf[4] >= pos) return sv1;
1271  if (buf[5] >= pos) return sv;
1272  if (buf[6] >= pos) return sv1;
1273  if (buf[7] >= pos) return sv;
1274  if (buf[8] >= pos) return sv1;
1275  if (buf[9] >= pos) return sv;
1276  BM_ASSERT(0);
1277  }
1278  else
1279  {
1280  while (start != end)
1281  {
1282  unsigned curr = (start + end) >> 1;
1283  if (buf[curr] < pos)
1284  start = curr + 1;
1285  else
1286  end = curr;
1287  }
1288  }
1289  return ((*buf) & 1) ^ ((--start) & 1);
1290 }
1291 
1292 /*!
1293  \brief Tests if bit = pos is true. Analog of bm::gap_test with SIMD unrolling.
1294  \param buf - GAP buffer pointer.
1295  \param pos - index of the element.
1296  \return true if position is in "1" gap
1297  @ingroup gapfunc
1298 */
1299 template<typename T>
1300 unsigned gap_test_unr(const T* BMRESTRICT buf, const unsigned pos) BMNOEXCEPT
1301 {
1302  BM_ASSERT(pos < bm::gap_max_bits);
1303 
1304  if (pos == 0) // quick answer possible
1305  {
1306  return (*buf) & 1;
1307  }
1308 #if defined(BMSSE2OPT)
1309  unsigned res = bm::sse2_gap_test(buf, pos);
1310  BM_ASSERT(res == bm::gap_test(buf, pos));
1311 #elif defined(BMSSE42OPT)
1312  unsigned res = bm::sse42_gap_test(buf, pos);
1313  BM_ASSERT(res == bm::gap_test(buf, pos));
1314 #elif defined(BMAVX2OPT)
1315  unsigned res = bm::avx2_gap_test(buf, pos);
1316  BM_ASSERT(res == bm::gap_test(buf, pos));
1317 #else
1318  unsigned res = bm::gap_test(buf, pos);
1319 #endif
1320  return res;
1321 }
1322 
1323 /*! For each non-zero block in [from, to] executes supplied functor
1324  \internal
1325 */
1326 template<typename T, typename N, typename F>
1327 void for_each_nzblock_range(T*** root,
1328  N top_size, N nb_from, N nb_to, F& f) BMNOEXCEPT
1329 {
1330  BM_ASSERT(top_size);
1331  if (nb_from > nb_to)
1332  return;
1333  unsigned i_from = unsigned(nb_from >> bm::set_array_shift);
1334  unsigned j_from = unsigned(nb_from & bm::set_array_mask);
1335  unsigned i_to = unsigned(nb_to >> bm::set_array_shift);
1336  unsigned j_to = unsigned(nb_to & bm::set_array_mask);
1337 
1338  if (i_from >= top_size)
1339  return;
1340  if (i_to >= top_size)
1341  {
1342  i_to = unsigned(top_size-1);
1343  j_to = bm::set_sub_array_size-1;
1344  }
1345 
1346  for (unsigned i = i_from; i <= i_to; ++i)
1347  {
1348  T** blk_blk = root[i];
1349  if (!blk_blk)
1350  continue;
1351  if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR)
1352  {
1353  unsigned j = (i == i_from) ? j_from : 0;
1354  if (!j && (i != i_to)) // full sub-block
1355  f.add_full(bm::set_sub_total_bits);
1356  else
1357  {
1358  do
1359  {
1360  f.add_full(bm::gap_max_bits);
1361  if ((i == i_to) && (j == j_to))
1362  return;
1363  } while (++j < bm::set_sub_array_size);
1364  }
1365  }
1366  else
1367  {
1368  unsigned j = (i == i_from) ? j_from : 0;
1369  do
1370  {
1371  if (blk_blk[j])
1372  f(blk_blk[j]);
1373  if ((i == i_to) && (j == j_to))
1374  return;
1375  } while (++j < bm::set_sub_array_size);
1376  }
1377  } // for i
1378 }
1379 
1380 /*! For each non-zero block executes supplied function.
1381  \internal
1382 */
1383 template<class T, class F>
1384 void for_each_nzblock(T*** root, unsigned size1, F& f)
1385 {
1386  typedef typename F::size_type size_type;
1387  for (unsigned i = 0; i < size1; ++i)
1388  {
1389  T** blk_blk = root[i];
1390  if (!blk_blk)
1391  {
1392  f.on_empty_top(i);
1393  continue;
1394  }
1395  f.on_non_empty_top(i);
1396  if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR)
1397  {
1398  size_type r = i * bm::set_sub_array_size;
1399  unsigned j = 0;
1400  do
1401  {
1402  f(FULL_BLOCK_FAKE_ADDR, r + j);
1403  } while (++j < bm::set_sub_array_size);
1404  continue;
1405  }
1406 
1407  unsigned non_empty_top = 0;
1408  size_type r = i * bm::set_sub_array_size;
1409  unsigned j = 0;
1410  do
1411  {
1412 #if defined(BM64_AVX2) || defined(BM64_AVX512)
1413  if (!avx2_test_all_zero_wave(blk_blk + j))
1414  {
1415  non_empty_top = 1;
1416  T* blk0 = blk_blk[j + 0];
1417  T* blk1 = blk_blk[j + 1];
1418  T* blk2 = blk_blk[j + 2];
1419  T* blk3 = blk_blk[j + 3];
1420 
1421  size_type block_idx = r + j + 0;
1422  if (blk0)
1423  f(blk0, block_idx);
1424  else
1425  f.on_empty_block(block_idx);
1426 
1427  if (blk1)
1428  f(blk1, block_idx + 1);
1429  else
1430  f.on_empty_block(block_idx + 1);
1431 
1432  if (blk2)
1433  f(blk2, block_idx + 2);
1434  else
1435  f.on_empty_block(block_idx + 2);
1436 
1437  if (blk3)
1438  f(blk3, block_idx + 3);
1439  else
1440  f.on_empty_block(block_idx + 3);
1441  }
1442  else
1443  {
1444  f.on_empty_block(r + j + 0); f.on_empty_block(r + j + 1);
1445  f.on_empty_block(r + j + 2); f.on_empty_block(r + j + 3);
1446  }
1447  j += 4;
1448 #elif defined(BM64_SSE4)
1449  if (!sse42_test_all_zero_wave((blk_blk + j)))
1450  {
1451  non_empty_top = 1;
1452  T* blk0 = blk_blk[j + 0];
1453  T* blk1 = blk_blk[j + 1];
1454 
1455  size_type block_idx = r + j + 0;
1456  if (blk0)
1457  f(blk0, block_idx);
1458  else
1459  f.on_empty_block(block_idx);
1460 
1461  ++block_idx;
1462  if (blk1)
1463  f(blk1, block_idx);
1464  else
1465  f.on_empty_block(block_idx);
1466  }
1467  else
1468  {
1469  f.on_empty_block(r + j + 0);
1470  f.on_empty_block(r + j + 1);
1471  }
1472  j += 2;
1473 #else
1474  if (blk_blk[j])
1475  {
1476  f(blk_blk[j], r + j);
1477  non_empty_top = 1;
1478  }
1479  else
1480  f.on_empty_block(r + j);
1481  ++j;
1482 #endif
1483  } while (j < bm::set_sub_array_size);
1484 
1485  if (non_empty_top == 0)
1486  f.on_empty_top(i);
1487  } // for i
1488 }
1489 
1490 /*! For each non-zero block executes supplied function.
1491 */
1492 template<class T, class F>
1493 void for_each_nzblock2(T*** root, unsigned size1, F& f)
1494 {
1495 #ifdef BM64_SSE4
1496  for (unsigned i = 0; i < size1; ++i)
1497  {
1498  T** blk_blk;
1499  if ((blk_blk = root[i])!=0)
1500  {
1501  if (blk_blk == (T**)FULL_BLOCK_FAKE_ADDR)
1502  {
1503  for (unsigned j = 0; j < bm::set_sub_array_size; ++j)
1504  {
1506  }
1507  continue;
1508  }
1509  {
1510  __m128i w0;
1511  for (unsigned j = 0; j < bm::set_sub_array_size; j+=4)
1512  {
1513  w0 = _mm_loadu_si128((__m128i*)(blk_blk + j));
1514  if (!_mm_testz_si128(w0, w0))
1515  {
1516  T* blk0 = blk_blk[j + 0];
1517  T* blk1 = blk_blk[j + 1];
1518 
1519  if (blk0)
1520  f(blk0);
1521  if (blk1)
1522  f(blk1);
1523  }
1524  w0 = _mm_loadu_si128((__m128i*)(blk_blk + j + 2));
1525  if (!_mm_testz_si128(w0, w0))
1526  {
1527  T* blk0 = blk_blk[j + 2];
1528  T* blk1 = blk_blk[j + 3];
1529  if (blk0)
1530  f(blk0);
1531  if (blk1)
1532  f(blk1);
1533  }
1534  } // for j
1535  }
1536  }
1537  } // for i
1538 #elif defined(BM64_AVX2) || defined(BM64_AVX512)
1539  for (unsigned i = 0; i < size1; ++i)
1540  {
1541  T** blk_blk;
1542  if ((blk_blk = root[i]) != 0)
1543  {
1544  if (blk_blk == (T**)FULL_BLOCK_FAKE_ADDR)
1545  {
1546  for (unsigned j = 0; j < bm::set_sub_array_size; ++j)
1547  {
1549  }
1550  continue;
1551  }
1552  {
1553  for (unsigned j = 0; j < bm::set_sub_array_size; j += 4)
1554  {
1555  __m256i w0 = _mm256_loadu_si256((__m256i*)(blk_blk + j));
1556  if (!_mm256_testz_si256(w0, w0))
1557  {
1558  // as a variant could use: blk0 = (T*)_mm256_extract_epi64(w0, 0);
1559  // but it measures marginally slower
1560  T* blk0 = blk_blk[j + 0];
1561  T* blk1 = blk_blk[j + 1];
1562  T* blk2 = blk_blk[j + 2];
1563  T* blk3 = blk_blk[j + 3];
1564  if (blk0)
1565  f(blk0);
1566  if (blk1)
1567  f(blk1);
1568  if (blk2)
1569  f(blk2);
1570  if (blk3)
1571  f(blk3);
1572  }
1573  } // for j
1574  }
1575  }
1576  } // for i
1577 
1578 #else // non-SIMD mode
1579  for (unsigned i = 0; i < size1; ++i)
1580  {
1581  T** blk_blk;
1582  if ((blk_blk = root[i])!=0)
1583  {
1584  if (blk_blk == (T**)FULL_BLOCK_FAKE_ADDR)
1585  {
1586  for (unsigned j = 0; j < bm::set_sub_array_size; ++j)
1587  {
1589  }
1590  continue;
1591  }
1592  unsigned j = 0;
1593  do
1594  {
1595  if (blk_blk[j])
1596  f(blk_blk[j]);
1597  if (blk_blk[j+1])
1598  f(blk_blk[j+1]);
1599  if (blk_blk[j+2])
1600  f(blk_blk[j+2]);
1601  if (blk_blk[j+3])
1602  f(blk_blk[j+3]);
1603  j += 4;
1604  } while (j < bm::set_sub_array_size);
1605  }
1606  } // for i
1607 #endif
1608 }
1609 
1610 
1611 /*! For each non-zero block executes supplied function-predicate.
1612  Function returns if function-predicate returns true
1613 */
1614 template<typename T, typename BI, typename F>
1615 bool for_each_nzblock_if(T*** root, BI size1, F& f) BMNOEXCEPT
1616 {
1617  BI block_idx = 0;
1618  for (BI i = 0; i < size1; ++i)
1619  {
1620  T** blk_blk = root[i];
1621  if (!blk_blk)
1622  {
1623  block_idx += bm::set_sub_array_size;
1624  continue;
1625  }
1626  if (blk_blk == (T**)FULL_BLOCK_FAKE_ADDR)
1627  {
1628  for (unsigned j = 0; j < bm::set_sub_array_size; ++j, ++block_idx)
1629  {
1630  if (f(FULL_BLOCK_FAKE_ADDR, block_idx))
1631  return true;
1632  } // for j
1633  continue;
1634  }
1635 
1636  for (unsigned j = 0;j < bm::set_sub_array_size; ++j, ++block_idx)
1637  {
1638  if (blk_blk[j])
1639  if (f(blk_blk[j], block_idx))
1640  return true;
1641  } // for j
1642  } // for i
1643  return false;
1644 }
1645 
1646 /*! For each block executes supplied function.
1647 */
1648 template<class T, class F, typename BLOCK_IDX>
1649 void for_each_block(T*** root, unsigned size1, F& f, BLOCK_IDX start)
1650 {
1651  BLOCK_IDX block_idx = start;
1652  for (unsigned i = 0; i < size1; ++i)
1653  {
1654  T** blk_blk = root[i];
1655  if (blk_blk)
1656  {
1657  if (blk_blk == (T**)FULL_BLOCK_FAKE_ADDR)
1658  {
1659  for (unsigned j = 0; j < bm::set_sub_array_size; ++j, ++block_idx)
1660  {
1661  f(FULL_BLOCK_FAKE_ADDR, block_idx);
1662  }
1663  continue;
1664  }
1665  for (unsigned j = 0;j < bm::set_sub_array_size; ++j, ++block_idx)
1666  {
1667  f(blk_blk[j], block_idx);
1668  }
1669  }
1670  else
1671  {
1672  for (unsigned j = 0;j < bm::set_sub_array_size; ++j, ++block_idx)
1673  {
1674  f(0, block_idx);
1675  }
1676  }
1677  }
1678 }
1679 
1680 
1681 
1682 /*! Special BM optimized analog of STL for_each
1683 */
1684 template<class T, class F> F bmfor_each(T first, T last, F f)
1685 {
1686  do
1687  {
1688  f(*first);
1689  ++first;
1690  } while (first < last);
1691  return f;
1692 }
1693 
1694 /*! Computes SUM of all elements of the sequence
1695 */
1696 template<typename T>
1697 bm::id64_t sum_arr(const T* first, const T* last) BMNOEXCEPT
1698 {
1699  bm::id64_t sum = 0;
1700  for (;first < last; ++first)
1701  sum += *first;
1702  return sum;
1703 }
1704 
1705 /*!
1706  Extract short (len=1) exceptions from the GAP block
1707  \param buf - GAP buffer to split
1708  \param arr0 - [OUT] list of isolates 0 positions (clear list)
1709  \param arr1 - [OUT] list of isolated 1 positions (set list)
1710  \param arr0_cnt - [OUT] arr0 size
1711  \param arr1_cnt -
1712  @ingroup gapfunc
1713 */
1714 template<typename T>
1715 void gap_split(const T* buf,
1716  T* arr0, T* arr1, T& arr0_cnt, T& arr1_cnt) BMNOEXCEPT
1717 {
1718  const T* pcurr = buf;
1719  unsigned len = (*pcurr >> 3);
1720  const T* pend = pcurr + len;
1721 
1722  T cnt0, cnt1;
1723  cnt0 = cnt1 = 0;
1724  unsigned is_set = (*buf & 1);
1725 
1726  if (*pcurr == 0)
1727  {
1728  if (is_set)
1729  {
1730  arr1[cnt1] = *pcurr;
1731  ++cnt1;
1732  }
1733  else
1734  {
1735  arr0[cnt0] = *pcurr;
1736  ++cnt0;
1737  }
1738  }
1739  T prev = *pcurr;
1740  ++pcurr;
1741 
1742  while (pcurr <= pend)
1743  {
1744  is_set ^= 1;
1745  T delta = *pcurr - prev;
1746  if (delta == 1)
1747  {
1748  if (is_set)
1749  {
1750  arr1[cnt1] = prev;
1751  ++cnt1;
1752  }
1753  else
1754  {
1755  arr0[cnt0] = prev;
1756  ++cnt0;
1757  }
1758  }
1759  prev = *pcurr++;
1760  } // while
1761 
1762  arr0_cnt = cnt0;
1763  arr1_cnt = cnt1;
1764 }
1765 
1766 
1767 /*!
1768  \brief Calculates number of bits ON in GAP buffer.
1769  \param buf - GAP buffer pointer.
1770  \param dsize - buffer size
1771  \return Number of non-zero bits.
1772  @ingroup gapfunc
1773 */
1774 template<typename T>
1775 unsigned gap_bit_count(const T* buf, unsigned dsize=0) BMNOEXCEPT
1776 {
1777  const T* pcurr = buf;
1778  if (dsize == 0)
1779  dsize = (*pcurr >> 3);
1780 
1781  const T* pend = pcurr + dsize;
1782 
1783  unsigned bits_counter = 0;
1784  ++pcurr;
1785 
1786  if (*buf & 1)
1787  {
1788  bits_counter += *pcurr + 1;
1789  ++pcurr;
1790  }
1791  for (++pcurr; pcurr <= pend; pcurr += 2)
1792  bits_counter += *pcurr - *(pcurr-1);
1793  return bits_counter;
1794 }
1795 
1796 /*!
1797  \brief Calculates number of bits ON in GAP buffer. Loop unrolled version.
1798  \param buf - GAP buffer pointer.
1799  \return Number of non-zero bits.
1800  @ingroup gapfunc
1801 */
1802 template<typename T>
1803 unsigned gap_bit_count_unr(const T* buf) BMNOEXCEPT
1804 {
1805  const T* pcurr = buf;
1806  unsigned dsize = (*pcurr >> 3);
1807 
1808  unsigned cnt = 0;
1809  pcurr = buf + 1; // set up start position
1810  T first_one = *buf & 1;
1811  if (first_one)
1812  {
1813  cnt += *pcurr + 1;
1814  ++pcurr;
1815  }
1816  ++pcurr; // set GAP to 1
1817 
1818  #if defined(BMAVX2OPT) || defined(BMAVX512OPT)
1819  if (dsize > 34)
1820  {
1821  const unsigned unr_factor = 32;
1822  unsigned waves = (dsize-2) / unr_factor;
1823  pcurr = avx2_gap_sum_arr(pcurr, waves, &cnt);
1824  }
1825  #elif defined(BMSSE42OPT) || defined(BMSSE2OPT)
1826  if (dsize > 18)
1827  {
1828  const unsigned unr_factor = 16;
1829  unsigned waves = (dsize - 2) / unr_factor;
1830  pcurr = sse2_gap_sum_arr(pcurr, waves, &cnt);
1831  }
1832  #else
1833  if (dsize > 10)
1834  {
1835  const unsigned unr_factor = 8;
1836  unsigned waves = (dsize - 2) / unr_factor;
1837  for (unsigned i = 0; i < waves; i += unr_factor)
1838  {
1839  cnt += pcurr[0] - pcurr[0 - 1];
1840  cnt += pcurr[2] - pcurr[2 - 1];
1841  cnt += pcurr[4] - pcurr[4 - 1];
1842  cnt += pcurr[6] - pcurr[6 - 1];
1843 
1844  pcurr += unr_factor;
1845  } // for
1846  }
1847  #endif
1848 
1849  const T* pend = buf + dsize;
1850  for ( ; pcurr <= pend ; pcurr+=2)
1851  {
1852  cnt += *pcurr - *(pcurr - 1);
1853  }
1854  BM_ASSERT(cnt == bm::gap_bit_count(buf));
1855  return cnt;
1856 }
1857 
1858 
1859 
1860 /*!
1861  \brief Counts 1 bits in GAP buffer in the closed [left, right] range.
1862  \param buf - GAP buffer pointer.
1863  \param left - leftmost bit index to start from
1864  \param right- rightmost bit index
1865  \return Number of non-zero bits.
1866  @ingroup gapfunc
1867 */
1868 template<typename T>
1869 unsigned gap_bit_count_range(const T* const buf,
1870  unsigned left, unsigned right) BMNOEXCEPT
1871 {
1872  BM_ASSERT(left <= right);
1873  BM_ASSERT(right < bm::gap_max_bits);
1874 
1875  const T* pcurr = buf;
1876  const T* pend = pcurr + (*pcurr >> 3);
1877 
1878  unsigned bits_counter = 0;
1879  unsigned is_set;
1880  unsigned start_pos = bm::gap_bfind(buf, left, &is_set);
1881  is_set = ~(is_set - 1u); // 0xFFF.. if true (mask for branchless code)
1882 
1883  pcurr = buf + start_pos;
1884  if (right <= *pcurr) // we are in the target gap right now
1885  {
1886  bits_counter = unsigned(right - left + 1u) & is_set; // & is_set == if(is_set)
1887  return bits_counter;
1888  }
1889  bits_counter += unsigned(*pcurr - left + 1u) & is_set;
1890 
1891  unsigned prev_gap = *pcurr++;
1892  for (is_set ^= ~0u; right > *pcurr; is_set ^= ~0u)
1893  {
1894  bits_counter += (*pcurr - prev_gap) & is_set;
1895  if (pcurr == pend)
1896  return bits_counter;
1897  prev_gap = *pcurr++;
1898  }
1899  bits_counter += unsigned(right - prev_gap) & is_set;
1900  return bits_counter;
1901 }
1902 
1903 /*!
1904  \brief Test if all bits are 1 in GAP buffer in the [left, right] range.
1905  \param buf - GAP buffer pointer.
1906  \param left - leftmost bit index to start from
1907  \param right- rightmost bit index
1908  \return true if all bits are "11111"
1909  @ingroup gapfunc
1910 */
1911 template<typename T>
1912 bool gap_is_all_one_range(const T* const BMRESTRICT buf,
1913  unsigned left, unsigned right) BMNOEXCEPT
1914 {
1915  BM_ASSERT(left <= right);
1916  BM_ASSERT(right < bm::gap_max_bits);
1917 
1918  unsigned is_set;
1919  unsigned start_pos = bm::gap_bfind(buf, left, &is_set);
1920  if (!is_set) // GAP is 0
1921  return false;
1922  const T* const pcurr = buf + start_pos;
1923  return (right <= *pcurr);
1924 }
1925 
1926 /*!
1927  \brief Test if any bits are 1 in GAP buffer in the [left, right] range.
1928  \param buf - GAP buffer pointer.
1929  \param left - leftmost bit index to start from
1930  \param right- rightmost bit index
1931  \return true if at least 1 "00010"
1932  @ingroup gapfunc
1933 */
1934 template<typename T>
1935 bool gap_any_range(const T* const BMRESTRICT buf,
1936  unsigned left, unsigned right) BMNOEXCEPT
1937 {
1938  BM_ASSERT(left <= right);
1939  BM_ASSERT(right < bm::gap_max_bits);
1940 
1941  unsigned is_set;
1942  unsigned start_pos = bm::gap_bfind(buf, left, &is_set);
1943  const T* const pcurr = buf + start_pos;
1944 
1945  if (!is_set) // start GAP is 0 ...
1946  {
1947  if (right <= *pcurr) // ...bit if the interval goes into at least 1 blk
1948  return false; // .. nope
1949  return true;
1950  }
1951  return true;
1952 }
1953 
1954 /*!
1955  \brief Test if any bits are 1 in GAP buffer in the [left, right] range
1956  and flanked with 0s
1957  \param buf - GAP buffer pointer.
1958  \param left - leftmost bit index to start from
1959  \param right- rightmost bit index
1960  \return true if "011110"
1961  @ingroup gapfunc
1962 */
1963 template<typename T>
1964 bool gap_is_interval(const T* const BMRESTRICT buf,
1965  unsigned left, unsigned right) BMNOEXCEPT
1966 {
1967  BM_ASSERT(left <= right);
1968  BM_ASSERT(left > 0); // cannot check left-1 otherwise
1969  BM_ASSERT(right < bm::gap_max_bits-1); // cannot check right+1 otherwise
1970 
1971  unsigned is_set;
1972  unsigned start_pos = bm::gap_bfind(buf, left, &is_set);
1973 
1974  const T* pcurr = buf + start_pos;
1975  if (!is_set || (right != *pcurr) || (start_pos <= 1))
1976  return false;
1977  --pcurr;
1978  if (*pcurr != left-1)
1979  return false;
1980  return true;
1981 }
1982 
1983 /**
1984  \brief Searches for the last 1 bit in the 111 interval of a GAP block
1985  \param buf - BIT block buffer
1986  \param nbit - bit index to start checking from
1987  \param pos - [out] found value
1988 
1989  \return false if not found
1990  @ingroup gapfunc
1991 */
1992 template<typename T>
1993 bool gap_find_interval_end(const T* const BMRESTRICT buf,
1994  unsigned nbit, unsigned* BMRESTRICT pos) BMNOEXCEPT
1995 {
1996  BM_ASSERT(pos);
1997  BM_ASSERT(nbit < bm::gap_max_bits);
1998 
1999  unsigned is_set;
2000  unsigned start_pos = bm::gap_bfind(buf, nbit, &is_set);
2001  if (!is_set)
2002  return false;
2003  *pos = buf[start_pos];
2004  return true;
2005 }
2006 
2007 
2008 /**
2009  \brief Searches for the first 1 bit in the 111 interval of a GAP block
2010  \param buf - BIT block buffer
2011  \param nbit - bit index to start checking from
2012  \param pos - [out] found value
2013 
2014  \return false if not found
2015  @ingroup gapfunc
2016 */
2017 template<typename T>
2018 bool gap_find_interval_start(const T* const BMRESTRICT buf,
2019  unsigned nbit, unsigned* BMRESTRICT pos) BMNOEXCEPT
2020 {
2021  BM_ASSERT(pos);
2022  BM_ASSERT(nbit < bm::gap_max_bits);
2023 
2024  unsigned is_set;
2025  unsigned start_pos = bm::gap_bfind(buf, nbit, &is_set);
2026  if (!is_set)
2027  return false;
2028  --start_pos;
2029  if (!start_pos)
2030  *pos = 0;
2031  else
2032  *pos = buf[start_pos]+1;
2033  return true;
2034 }
2035 
2036 
2037 /*!
2038  \brief GAP block find position for the rank
2039 
2040  \param block - bit block buffer pointer
2041  \param rank - rank to find (must be > 0)
2042  \param nbit_from - start bit position in block
2043  \param nbit_pos - found position
2044 
2045  \return 0 if position with rank was found, or
2046  the remaining rank (rank - population count)
2047 
2048  @ingroup gapfunc
2049 */
2050 template<typename T, typename SIZE_TYPE>
2051 SIZE_TYPE gap_find_rank(const T* const block,
2052  SIZE_TYPE rank,
2053  unsigned nbit_from,
2054  unsigned& nbit_pos) BMNOEXCEPT
2055 {
2056  BM_ASSERT(block);
2057  BM_ASSERT(rank);
2058 
2059  const T* pcurr = block;
2060  const T* pend = pcurr + (*pcurr >> 3);
2061 
2062  unsigned bits_counter = 0;
2063  unsigned is_set;
2064  unsigned start_pos = bm::gap_bfind(block, nbit_from, &is_set);
2065  is_set = ~(is_set - 1u); // 0xFFF.. if true (mask for branchless code)
2066 
2067  pcurr = block + start_pos;
2068  bits_counter += unsigned(*pcurr - nbit_from + 1u) & is_set;
2069  if (bits_counter >= rank) // found!
2070  {
2071  nbit_pos = nbit_from + unsigned(rank) - 1u;
2072  return 0;
2073  }
2074  rank -= bits_counter;
2075  unsigned prev_gap = *pcurr++;
2076  for (is_set ^= ~0u; pcurr <= pend; is_set ^= ~0u)
2077  {
2078  bits_counter = (*pcurr - prev_gap) & is_set;
2079  if (bits_counter >= rank) // found!
2080  {
2081  nbit_pos = prev_gap + unsigned(rank);
2082  return 0;
2083  }
2084  rank -= bits_counter;
2085  prev_gap = *pcurr++;
2086  } // for
2087 
2088  return rank;
2089 }
2090 
2091 
2092 
2093 /*!
2094  \brief Counts 1 bits in GAP buffer in the closed [0, right] range.
2095  \param buf - GAP buffer pointer.
2096  \param right- rightmost bit index
2097  \param is_corrected - if true the result will be rank corrected
2098  if right bit == true count=count-1
2099  \return Number of non-zero bits
2100  @ingroup gapfunc
2101 */
2102 template<typename T>
2103 unsigned gap_bit_count_to(const T* const buf, T right,
2104  bool is_corrected=false) BMNOEXCEPT
2105 {
2106  const T* pcurr = buf;
2107  const T* pend = pcurr + (*pcurr >> 3);
2108 
2109  unsigned bits_counter = 0;
2110  unsigned is_set = ~((unsigned(*buf) & 1u) - 1u); // 0xFFF.. if true (mask for branchless code)
2111  BM_ASSERT(is_set == 0u || is_set == ~0u);
2112  pcurr = buf + 1;
2113 
2114  if (right <= *pcurr) // we are in the target block right now
2115  {
2116  bits_counter = (right + 1u) & is_set; // & is_set == if (is_set)
2117  bits_counter -= (is_set & unsigned(is_corrected));
2118  return bits_counter;
2119  }
2120  bits_counter += (*pcurr + 1u) & is_set;
2121 
2122  unsigned prev_gap = *pcurr++;
2123  for (is_set ^= ~0u; right > *pcurr; is_set ^= ~0u)
2124  {
2125  bits_counter += (*pcurr - prev_gap) & is_set;
2126  if (pcurr == pend)
2127  {
2128  bits_counter -= (is_set & unsigned(is_corrected));
2129  return bits_counter;
2130  }
2131  prev_gap = *pcurr++;
2132  }
2133  bits_counter += (right - prev_gap) & is_set;
2134  bits_counter -= (is_set & unsigned(is_corrected));
2135  return bits_counter;
2136 }
2137 
2138 
2139 /*!
2140  D-GAP block for_each algorithm
2141 
2142  D-Gap Functor is called for each element but last one.
2143 
2144  \param gap_buf - GAP buffer
2145  \param func - functor object
2146 
2147 */
2148 template<class T, class Func>
2149 void for_each_dgap(const T* gap_buf, Func& func)
2150 {
2151  const T* pcurr = gap_buf;
2152  const T* pend = pcurr + (*pcurr >> 3);
2153  ++pcurr;
2154 
2155  T prev = *pcurr;
2156  func((T)(prev + 1)); // first element incremented to avoid 0
2157  ++pcurr;
2158  do
2159  {
2160  func((T)(*pcurr - prev)); // all others are [N] - [N-1]
2161  prev = *pcurr;
2162  } while (++pcurr < pend);
2163 }
2164 
2165 /** d-Gap copy functor
2166  @internal
2167 */
2168 template<typename T> struct d_copy_func
2169 {
2170  d_copy_func(T* dg_buf) : dgap_buf_(dg_buf) {}
2171  void operator()(T dgap) { *dgap_buf_++ = dgap; }
2172 
2174 };
2175 
2176 /*!
2177  \brief Convert GAP buffer into D-GAP buffer
2178 
2179  Delta GAP representation is DGAP[N] = GAP[N] - GAP[N-1]
2180 
2181  \param gap_buf - GAP buffer
2182  \param dgap_buf - Delta-GAP buffer
2183  \param copy_head - flag to copy GAP header
2184 
2185  \internal
2186 
2187  @ingroup gapfunc
2188 */
2189 template<typename T>
2190 T* gap_2_dgap(const T* BMRESTRICT gap_buf,
2191  T* BMRESTRICT dgap_buf, bool copy_head=true) BMNOEXCEPT
2192 {
2193  if (copy_head) // copy GAP header
2194  {
2195  *dgap_buf++ = *gap_buf;
2196  }
2197 
2198  d_copy_func<T> copy_func(dgap_buf);
2199  for_each_dgap<T, d_copy_func<T> >(gap_buf, copy_func);
2200  return copy_func.dgap_buf_;
2201 }
2202 
2203 /*!
2204  \brief Convert D-GAP buffer into GAP buffer
2205 
2206  GAP representation is GAP[N] = DGAP[N] + DGAP[N-1]
2207 
2208  \param dgap_buf - Delta-GAP buffer
2209  \param gap_header - GAP header word
2210  \param gap_buf - GAP buffer
2211 
2212  \internal
2213  @ingroup gapfunc
2214 */
2215 template<typename T>
2216 void dgap_2_gap(const T* BMRESTRICT dgap_buf,
2217  T* BMRESTRICT gap_buf, T gap_header=0) BMNOEXCEPT
2218 {
2219  const T* pcurr = dgap_buf;
2220  unsigned len;
2221  if (!gap_header) // GAP header is already part of the stream
2222  {
2223  len = *pcurr >> 3;
2224  *gap_buf++ = *pcurr++; // copy GAP header
2225  }
2226  else // GAP header passed as a parameter
2227  {
2228  len = gap_header >> 3;
2229  *gap_buf++ = gap_header; // assign GAP header
2230  }
2231  --len; // last element is actually not encoded
2232  const T* pend = pcurr + len;
2233 
2234  *gap_buf = *pcurr++; // copy first element
2235  if (*gap_buf == 0)
2236  *gap_buf = 65535; // fix +1 overflow
2237  else
2238  *gap_buf = T(*gap_buf - 1);
2239 
2240  for (++gap_buf; pcurr < pend; ++pcurr)
2241  {
2242  T prev = *(gap_buf-1); // don't remove temp(undef expression!)
2243  *gap_buf++ = T(*pcurr + prev);
2244  }
2245  *gap_buf = 65535; // add missing last element
2246 }
2247 
2248 
2249 /*!
2250  \brief Lexicographical comparison of GAP buffers.
2251  \param buf1 - First GAP buffer pointer.
2252  \param buf2 - Second GAP buffer pointer.
2253  \return <0 - less, =0 - equal, >0 - greater.
2254 
2255  @ingroup gapfunc
2256 */
2257 template<typename T>
2258 int gapcmp(const T* buf1, const T* buf2) BMNOEXCEPT
2259 {
2260  const T* pcurr1 = buf1;
2261  const T* pend1 = pcurr1 + (*pcurr1 >> 3);
2262  unsigned bitval1 = *buf1 & 1;
2263  ++pcurr1;
2264 
2265  const T* pcurr2 = buf2;
2266  unsigned bitval2 = *buf2 & 1;
2267  ++pcurr2;
2268 
2269  while (pcurr1 <= pend1)
2270  {
2271  if (*pcurr1 == *pcurr2)
2272  {
2273  if (bitval1 != bitval2)
2274  {
2275  return (bitval1) ? 1 : -1;
2276  }
2277  }
2278  else
2279  {
2280  if (bitval1 == bitval2)
2281  {
2282  if (bitval1)
2283  {
2284  return (*pcurr1 < *pcurr2) ? -1 : 1;
2285  }
2286  else
2287  {
2288  return (*pcurr1 < *pcurr2) ? 1 : -1;
2289  }
2290  }
2291  else
2292  {
2293  return (bitval1) ? 1 : -1;
2294  }
2295  }
2296  ++pcurr1; ++pcurr2;
2297  bitval1 ^= 1;
2298  bitval2 ^= 1;
2299  }
2300 
2301  return 0;
2302 }
2303 
2304 /*!
2305  \brief Find first bit which is different between two GAP-blocks
2306  \param buf1 - block 1
2307  \param buf2 - block 2
2308  \param pos - out - position of difference (undefined if blocks are equal)
2309  \return true if difference was found
2310 
2311  @ingroup gapfunc
2312 */
2313 template<typename T>
2314 bool gap_find_first_diff(const T* BMRESTRICT buf1,
2315  const T* BMRESTRICT buf2,
2316  unsigned* BMRESTRICT pos) BMNOEXCEPT
2317 {
2318  BM_ASSERT(buf1 && buf2 && pos);
2319 
2320  const T* pcurr1 = buf1;
2321  const T* pend1 = pcurr1 + (*pcurr1 >> 3);
2322  const T* pcurr2 = buf2;
2323  for (++pcurr1, ++pcurr2; pcurr1 <= pend1; ++pcurr1, ++pcurr2)
2324  {
2325  if (*pcurr1 != *pcurr2)
2326  {
2327  *pos = 1 + ((*pcurr1 < *pcurr2) ? *pcurr1 : *pcurr2);
2328  return true;
2329  }
2330  } // for
2331  return false;
2332 }
2333 
2334 // -------------------------------------------------------------------------
2335 //
2336 
2337 /*!
2338  \brief Abstract operation for GAP buffers.
2339  Receives functor F as a template argument
2340  \param dest - destination memory buffer.
2341  \param vect1 - operand 1 GAP encoded buffer.
2342  \param vect1_mask - XOR mask for starting bitflag for vector1
2343  can be 0 or 1 (1 inverts the vector)
2344  \param vect2 - operand 2 GAP encoded buffer.
2345  \param vect2_mask - same as vect1_mask
2346  \param dlen - destination length after the operation
2347 
2348  \note Internal function.
2349  @internal
2350 
2351  @ingroup gapfunc
2352 */
2353 template<typename T, class F>
2354 void gap_buff_op(T* BMRESTRICT dest,
2355  const T* BMRESTRICT vect1,
2356  unsigned vect1_mask,
2357  const T* BMRESTRICT vect2,
2358  unsigned vect2_mask,
2359  unsigned& dlen) BMNOEXCEPT2
2360 {
2361  const T* cur1 = vect1;
2362  const T* cur2 = vect2;
2363 
2364  T bitval1 = (T)((*cur1++ & 1) ^ vect1_mask);
2365  T bitval2 = (T)((*cur2++ & 1) ^ vect2_mask);
2366 
2367  T bitval = (T) F::op(bitval1, bitval2);
2368  T bitval_prev = bitval;
2369 
2370  T* res = dest;
2371  *res = bitval;
2372  ++res;
2373 
2374  T c1 = *cur1; T c2 = *cur2;
2375  while (1)
2376  {
2377  bitval = (T) F::op(bitval1, bitval2);
2378 
2379  // Check if GAP value changes and we need to
2380  // start the next one
2381  //
2382  res += (bitval != bitval_prev);
2383  bitval_prev = bitval;
2384  if (c1 < c2) // (*cur1 < *cur2)
2385  {
2386  *res = c1;
2387  ++cur1; c1 = *cur1;
2388  bitval1 ^= 1;
2389  }
2390  else // >=
2391  {
2392  *res = c2;
2393  if (c2 < c1) // (*cur2 < *cur1)
2394  {
2395  bitval2 ^= 1;
2396  }
2397  else // equal
2398  {
2399  if (c2 == (bm::gap_max_bits - 1))
2400  break;
2401 
2402  ++cur1; c1 = *cur1;
2403  bitval1 ^= 1; bitval2 ^= 1;
2404  }
2405  ++cur2; c2 = *cur2;
2406  }
2407  } // while
2408 
2409  dlen = (unsigned)(res - dest);
2410  *dest = (T)((*dest & 7) + (dlen << 3));
2411 }
2412 
2413 
2414 /*!
2415  \brief Abstract operation for GAP buffers (predicts legth)
2416  Receives functor F as a template argument
2417  \param vect1 - operand 1 GAP encoded buffer.
2418  \param vect2 - operand 2 GAP encoded buffer.
2419  \param dlen - destination length after the operation
2420  \param limit - maximum target length limit,
2421  returns false if limit is reached
2422  \return true if operation would be successfull or
2423  false if limit reached
2424 
2425  \note Internal function.
2426  @internal
2427 
2428  @ingroup gapfunc
2429 */
2430 template<typename T, class F>
2431 bool gap_buff_dry_op(const T* BMRESTRICT vect1,
2432  const T* BMRESTRICT vect2,
2433  unsigned& dlen,
2434  unsigned limit) BMNOEXCEPT2
2435 {
2436  const T* cur1 = vect1;
2437  const T* cur2 = vect2;
2438 
2439  T bitval1 = (T)((*cur1++ & 1));
2440  T bitval2 = (T)((*cur2++ & 1));
2441 
2442  T bitval = (T) F::op(bitval1, bitval2);
2443  T bitval_prev = bitval;
2444 
2445  unsigned len = 1;
2446 
2447  T c1 = *cur1; T c2 = *cur2;
2448  while (1)
2449  {
2450  bitval = (T) F::op(bitval1, bitval2);
2451 
2452  // Check if GAP value changes and we need to
2453  // start the next one
2454  //
2455  len += (bitval != bitval_prev);
2456  if (len > limit)
2457  return false;
2458  bitval_prev = bitval;
2459  if (c1 < c2)
2460  {
2461  ++cur1; c1 = *cur1;
2462  bitval1 ^= 1;
2463  }
2464  else // >=
2465  {
2466  if (c2 < c1) // (*cur2 < *cur1)
2467  {
2468  bitval2 ^= 1;
2469  }
2470  else // equal
2471  {
2472  if (c2 == (bm::gap_max_bits - 1))
2473  break;
2474 
2475  ++cur1; c1 = *cur1;
2476  bitval1 ^= 1; bitval2 ^= 1;
2477  }
2478  ++cur2; c2 = *cur2;
2479  }
2480 
2481  } // while
2482 
2483  dlen = len;
2484  return true;
2485 }
2486 
2487 
2488 /*!
2489  \brief Abstract distance test operation for GAP buffers.
2490  Receives functor F as a template argument
2491  \param vect1 - operand 1 GAP encoded buffer.
2492  \param vect1_mask - XOR mask for starting bitflag for vector1
2493  can be 0 or 1 (1 inverts the vector)
2494  \param vect2 - operand 2 GAP encoded buffer.
2495  \param vect2_mask - same as vect1_mask
2496  \note Internal function.
2497  \return non zero value if operation result returns any 1 bit
2498 
2499  @ingroup gapfunc
2500 */
2501 template<typename T, class F>
2502 unsigned gap_buff_any_op(const T* BMRESTRICT vect1,
2503  unsigned vect1_mask,
2504  const T* BMRESTRICT vect2,
2505  unsigned vect2_mask) BMNOEXCEPT2
2506 {
2507  const T* cur1 = vect1;
2508  const T* cur2 = vect2;
2509 
2510  unsigned bitval1 = (*cur1++ & 1) ^ vect1_mask;
2511  unsigned bitval2 = (*cur2++ & 1) ^ vect2_mask;
2512 
2513  unsigned bitval = F::op(bitval1, bitval2);
2514  if (bitval)
2515  return bitval;
2516  unsigned bitval_prev = bitval;
2517 
2518  while (1)
2519  {
2520  bitval = F::op(bitval1, bitval2);
2521  if (bitval)
2522  return bitval;
2523 
2524  if (bitval != bitval_prev)
2525  bitval_prev = bitval;
2526 
2527  if (*cur1 < *cur2)
2528  {
2529  ++cur1;
2530  bitval1 ^= 1;
2531  }
2532  else // >=
2533  {
2534  if (*cur2 < *cur1)
2535  {
2536  bitval2 ^= 1;
2537  }
2538  else // equal
2539  {
2540  if (*cur2 == (bm::gap_max_bits - 1))
2541  {
2542  break;
2543  }
2544  ++cur1;
2545  bitval1 ^= 1; bitval2 ^= 1;
2546  }
2547  ++cur2;
2548  }
2549 
2550  } // while
2551 
2552  return 0;
2553 }
2554 
2555 
2556 
2557 /*!
2558  \brief Abstract distance(similarity) operation for GAP buffers.
2559  Receives functor F as a template argument
2560  \param vect1 - operand 1 GAP encoded buffer.
2561  \param vect2 - operand 2 GAP encoded buffer.
2562  \note Internal function.
2563 
2564  @ingroup gapfunc
2565 */
2566 template<typename T, class F>
2567 unsigned gap_buff_count_op(const T* vect1, const T* vect2) BMNOEXCEPT2
2568 {
2569  unsigned count;// = 0;
2570  const T* cur1 = vect1;
2571  const T* cur2 = vect2;
2572 
2573  unsigned bitval1 = (*cur1++ & 1);
2574  unsigned bitval2 = (*cur2++ & 1);
2575  unsigned bitval = count = F::op(bitval1, bitval2);
2576  unsigned bitval_prev = bitval;
2577 
2578  T res, res_prev;
2579  res = res_prev = 0;
2580 
2581  while (1)
2582  {
2583  bitval = F::op(bitval1, bitval2);
2584  // Check if GAP value changes and we need to
2585  // start the next one.
2586  if (bitval != bitval_prev)
2587  {
2588  bitval_prev = bitval;
2589  res_prev = res;
2590  }
2591 
2592  if (*cur1 < *cur2)
2593  {
2594  res = *cur1;
2595  if (bitval)
2596  {
2597  count += res - res_prev;
2598  res_prev = res;
2599  }
2600  ++cur1; bitval1 ^= 1;
2601  }
2602  else // >=
2603  {
2604  res = *cur2;
2605  if (bitval)
2606  {
2607  count += res - res_prev;
2608  res_prev = res;
2609  }
2610  if (*cur2 < *cur1)
2611  {
2612  bitval2 ^= 1;
2613  }
2614  else // equal
2615  {
2616  if (*cur2 == (bm::gap_max_bits - 1))
2617  break;
2618 
2619  ++cur1;
2620  bitval1 ^= 1; bitval2 ^= 1;
2621  }
2622  ++cur2;
2623  }
2624 
2625  } // while
2626 
2627  return count;
2628 }
2629 
2630 
2631 #ifdef __GNUG__
2632 #pragma GCC diagnostic push
2633 #pragma GCC diagnostic ignored "-Wconversion"
2634 #endif
2635 
2636 /*!
2637  \brief Sets or clears bit in the GAP buffer.
2638 
2639  \param val - new bit value
2640  \param buf - GAP buffer.
2641  \param pos - Index of bit to set.
2642  \param is_set - (OUT) flag if bit was actually set.
2643 
2644  \return New GAP buffer length.
2645 
2646  @ingroup gapfunc
2647 */
2648 template<typename T>
2649 unsigned gap_set_value(unsigned val,
2650  T* BMRESTRICT buf,
2651  unsigned pos,
2652  unsigned* BMRESTRICT is_set) BMNOEXCEPT
2653 {
2654  BM_ASSERT(pos < bm::gap_max_bits);
2655 
2656  unsigned curr = bm::gap_bfind(buf, pos, is_set);
2657  T end = (T)(*buf >> 3);
2658  if (*is_set == val)
2659  {
2660  *is_set = 0;
2661  return end;
2662  }
2663  *is_set = 1;
2664 
2665  T* pcurr = buf + curr;
2666  T* pprev = pcurr - 1;
2667  T* pend = buf + end;
2668 
2669  // Special case, first bit GAP operation. There is no platform beside it.
2670  // initial flag must be inverted.
2671  if (!pos)
2672  {
2673  *buf ^= 1;
2674  if (buf[1]) // We need to insert a 1 bit GAP here
2675  {
2676  ::memmove(&buf[2], &buf[1], (end - 1) * sizeof(gap_word_t));
2677  buf[1] = 0;
2678  ++end;
2679  }
2680  else // Only 1 bit in the GAP. We need to delete the first GAP.
2681  {
2682  pprev = buf + 1; pcurr = pprev + 1;
2683  goto copy_gaps;
2684  }
2685  }
2686  else
2687  if (curr > 1 && ((unsigned)(*pprev))+1 == pos) // Left border bit
2688  {
2689  ++(*pprev);
2690  if (*pprev == *pcurr) // Curr. GAP to be merged with prev.GAP.
2691  {
2692  --end;
2693  if (pcurr != pend) // GAP merge: 2 GAPS to be deleted
2694  {
2695  ++pcurr;
2696  copy_gaps:
2697  --end;
2698  do { *pprev++ = *pcurr++; } while (pcurr < pend);
2699  }
2700  }
2701  }
2702  else
2703  if (*pcurr == pos) // Rightmost bit in the GAP. Border goes left.
2704  {
2705  --(*pcurr);
2706  end += (pcurr == pend);
2707  }
2708  else // Worst case: split current GAP
2709  {
2710  if (*pcurr != bm::gap_max_bits-1) // last gap does not need memmove
2711  ::memmove(pcurr+2, pcurr, (end - curr + 1)*(sizeof(T)));
2712  end += 2;
2713  pcurr[0] = (T)(pos-1);
2714  pcurr[1] = (T)pos;
2715  }
2716 
2717  // Set correct length word and last border word
2718  *buf = (T)((*buf & 7) + (end << 3));
2719  buf[end] = bm::gap_max_bits-1;
2720  return end;
2721 }
2722 
2723 /*!
2724  \brief Sets or clears bit in the GAP buffer.
2725 
2726  \param val - new bit value
2727  \param buf - GAP buffer.
2728  \param pos - Index of bit to set.
2729 
2730  \return New GAP buffer length.
2731 
2732  @ingroup gapfunc
2733 */
2734 template<typename T>
2735 unsigned gap_set_value(unsigned val,
2736  T* BMRESTRICT buf,
2737  unsigned pos) BMNOEXCEPT
2738 {
2739  BM_ASSERT(pos < bm::gap_max_bits);
2740  unsigned is_set;
2741  unsigned curr = bm::gap_bfind(buf, pos, &is_set);
2742  T end = (T)(*buf >> 3);
2743  if (is_set == val)
2744  return end;
2745 
2746  T* pcurr = buf + curr;
2747  T* pprev = pcurr - 1;
2748  T* pend = buf + end;
2749 
2750  // Special case, first bit GAP operation. There is no platform beside it.
2751  // initial flag must be inverted.
2752  if (!pos)
2753  {
2754  *buf ^= 1;
2755  if (buf[1]) // We need to insert a 1 bit GAP here
2756  {
2757  ::memmove(&buf[2], &buf[1], (end - 1) * sizeof(gap_word_t));
2758  buf[1] = 0;
2759  ++end;
2760  }
2761  else // Only 1 bit in the GAP. We need to delete the first GAP.
2762  {
2763  pprev = buf + 1; pcurr = pprev + 1;
2764  goto copy_gaps;
2765  }
2766  }
2767  else
2768  if (curr > 1 && ((unsigned)(*pprev))+1 == pos) // Left border bit
2769  {
2770  ++(*pprev);
2771  if (*pprev == *pcurr) // Curr. GAP to be merged with prev.GAP.
2772  {
2773  --end;
2774  if (pcurr != pend) // GAP merge: 2 GAPS to be deleted
2775  {
2776  ++pcurr;
2777  copy_gaps:
2778  --end;
2779  do { *pprev++ = *pcurr++; } while (pcurr < pend);
2780  }
2781  }
2782  }
2783  else
2784  if (*pcurr == pos) // Rightmost bit in the GAP. Border goes left.
2785  {
2786  --(*pcurr);
2787  end += (pcurr == pend);
2788  }
2789  else // Worst case: split current GAP
2790  {
2791  if (*pcurr != bm::gap_max_bits-1) // last gap does not need memmove
2792  ::memmove(pcurr+2, pcurr, (end - curr + 1)*(sizeof(T)));
2793  end += 2;
2794  pcurr[0] = (T)(pos-1);
2795  pcurr[1] = (T)pos;
2796  }
2797 
2798  // Set correct length word and last border word
2799  *buf = (T)((*buf & 7) + (end << 3));
2800  buf[end] = bm::gap_max_bits-1;
2801  return end;
2802 }
2803 
2804 /*!
2805  \brief Add new value to the end of GAP buffer.
2806 
2807  \param buf - GAP buffer.
2808  \param pos - Index of bit to set.
2809 
2810  \return New GAP buffer length.
2811 
2812  @ingroup gapfunc
2813 */
2814 template<typename T>
2815 unsigned gap_add_value(T* buf, unsigned pos) BMNOEXCEPT
2816 {
2817  BM_ASSERT(pos < bm::gap_max_bits);
2818 
2819  T end = (T)(*buf >> 3);
2820  T curr = end;
2821  T* pcurr = buf + end;
2822  T* pend = pcurr;
2823  T* pprev = pcurr - 1;
2824 
2825  // Special case, first bit GAP operation. There is no platform beside it.
2826  // initial flag must be inverted.
2827  if (!pos)
2828  {
2829  *buf ^= 1;
2830  if ( buf[1] ) // We need to insert a 1 bit platform here.
2831  {
2832  ::memmove(&buf[2], &buf[1], (end - 1) * sizeof(gap_word_t));
2833  buf[1] = 0;
2834  ++end;
2835  }
2836  else // Only 1 bit in the GAP. We need to delete the first GAP.
2837  {
2838  pprev = buf + 1; pcurr = pprev + 1;
2839  --end;
2840  do { *pprev++ = *pcurr++; } while (pcurr < pend);
2841  }
2842  }
2843  else if (((unsigned)(*pprev))+1 == pos && (curr > 1) ) // Left border bit
2844  {
2845  ++(*pprev);
2846  if (*pprev == *pcurr) // Curr. GAP to be merged with prev.GAP.
2847  {
2848  --end;
2849  BM_ASSERT(pcurr == pend);
2850  }
2851  }
2852  else if (*pcurr == pos) // Rightmost bit in the GAP. Border goes left.
2853  {
2854  --(*pcurr);
2855  end += (pcurr == pend);
2856  }
2857  else // Worst case we need to split current block.
2858  {
2859  pcurr[0] = (T)(pos-1);
2860  pcurr[1] = (T)pos;
2861  end = (T)(end+2);
2862  }
2863 
2864  // Set correct length word.
2865  *buf = (T)((*buf & 7) + (end << 3));
2866  buf[end] = bm::gap_max_bits - 1;
2867  return end;
2868 }
2869 
2870 #ifdef __GNUG__
2871 #pragma GCC diagnostic pop
2872 #endif
2873 
2874 
2875 /*!
2876  @brief Right shift GAP block by 1 bit
2877  @param buf - block pointer
2878  @param co_flag - carry over from the previous block
2879  @param new_len - output length of the GAP block after the operation
2880 
2881  @return carry over bit (1 or 0)
2882  @ingroup gapfunc
2883 */
2884 template<typename T>
2886  unsigned co_flag, unsigned* BMRESTRICT new_len) BMNOEXCEPT
2887 {
2888  BM_ASSERT(new_len);
2889  bool co;
2890  // 1: increment all GAP values by 1
2891  {
2892  unsigned bitval = *buf & 1;
2893  if (buf[1] == bm::gap_max_bits-1) // full GAP block
2894  {
2895  co = bitval;
2896  }
2897  else
2898  {
2899  unsigned len = (*buf >> 3);
2900  unsigned i = 1;
2901  for (; i < len; ++i)
2902  {
2903  buf[i]++;
2904  bitval ^= 1;
2905  } // for i
2906  BM_ASSERT(buf[i] == bm::gap_max_bits-1);
2907  if (buf[i-1] == bm::gap_max_bits-1) // last element shifts out
2908  {
2909  // Set correct length word
2910  --len;
2911  *buf = (T)((*buf & 7) + (len << 3));
2912  }
2913  co = bitval;
2914  }
2915  }
2916  // set bit position 0 with carry-in flag
2917  {
2918  unsigned is_set;
2919  *new_len = bm::gap_set_value(co_flag, buf, 0, &is_set);
2920  }
2921  return co;
2922 }
2923 
2924 /*!
2925  @brief Left shift GAP block by 1 bit
2926  @param buf - block pointer
2927  @param co_flag - carry over from the previous block
2928  @param new_len - new length of the block
2929 
2930  @return carry over bit (1 or 0)
2931  @ingroup gapfunc
2932 */
2933 template<typename T>
2935  unsigned co_flag, unsigned* BMRESTRICT new_len) BMNOEXCEPT
2936 {
2937  BM_ASSERT(new_len);
2938  unsigned is_set;
2939 
2940  // 1: decrement all GAP values by 1
2941  //
2942  unsigned bitval = *buf & 1;
2943  bool co0 = bitval;
2944 
2945  if (!buf[1]) // cannot decrement (corner case)
2946  {
2947  bitval ^= 1;
2948  *new_len = bm::gap_set_value(bitval, buf, 0, &is_set);
2949 
2950  BM_ASSERT(is_set);
2951  BM_ASSERT(buf[1]);
2952  BM_ASSERT(bitval == unsigned(*buf & 1u));
2953 
2954  if (*new_len == 1)
2955  {
2956  *new_len = bm::gap_set_value(co_flag, buf,
2957  bm::gap_max_bits-1, &is_set);
2958  return co0;
2959  }
2960  }
2961  if (buf[1] != bm::gap_max_bits-1) // full GAP block
2962  {
2963  BM_ASSERT(buf[1]);
2964  unsigned len = (*buf >> 3);
2965  unsigned i = 1;
2966  for (; i < len; ++i)
2967  {
2968  buf[i]--;
2969  bitval ^= 1;
2970  } // for i
2971  BM_ASSERT(buf[i] == bm::gap_max_bits-1);
2972  }
2973  // 2: set last bit position with carry-in flag
2974  //
2975  *new_len = bm::gap_set_value(co_flag, buf, bm::gap_max_bits-1, &is_set);
2976  return co0;
2977 }
2978 
2979 
2980 /*!
2981  \brief Convert array to GAP buffer.
2982 
2983  \param buf - GAP buffer.
2984  \param arr - array of values to set
2985  \param len - length of the array
2986 
2987  \return New GAP buffer length.
2988 
2989  @ingroup gapfunc
2990 */
2991 
2992 template<typename T>
2993 unsigned gap_set_array(T* buf, const T* arr, unsigned len) BMNOEXCEPT
2994 {
2995  *buf = (T)((*buf & 6u) + (1u << 3)); // gap header setup
2996 
2997  T* pcurr = buf + 1;
2998 
2999  unsigned i = 0;
3000  T curr = arr[i];
3001  if (curr != 0) // need to add the first gap: (0 to arr[0]-1)
3002  {
3003  *pcurr = (T)(curr - 1);
3004  ++pcurr;
3005  }
3006  else
3007  {
3008  ++(*buf); // GAP starts with 1
3009  }
3010  T prev = curr;
3011  T acc = prev;
3012 
3013  for (i = 1; i < len; ++i)
3014  {
3015  curr = arr[i];
3016  if (curr == prev + 1)
3017  {
3018  ++acc;
3019  prev = curr;
3020  }
3021  else
3022  {
3023  *pcurr++ = acc;
3024  acc = curr;
3025  *pcurr++ = (T)(curr-1);
3026  }
3027  prev = curr;
3028  }
3029  *pcurr = acc;
3030  if (acc != bm::gap_max_bits - 1)
3031  {
3032  ++pcurr;
3033  *pcurr = bm::gap_max_bits - 1;
3034  }
3035 
3036  unsigned gap_len = unsigned(pcurr - buf);
3037  BM_ASSERT(gap_len == ((gap_len << 3) >> 3));
3038 
3039  *buf = (T)((*buf & 7) + (gap_len << 3));
3040  return gap_len+1;
3041 }
3042 
3043 
3044 //------------------------------------------------------------------------
3045 
3046 /**
3047  \brief Compute number of GAPs in bit-array
3048  \param arr - array of BITs
3049  \param len - array length
3050 
3051  @ingroup gapfunc
3052 */
3053 template<typename T>
3054 unsigned bit_array_compute_gaps(const T* arr, unsigned len) BMNOEXCEPT
3055 {
3056  unsigned gap_count = 1;
3057  T prev = arr[0];
3058  if (prev > 0)
3059  ++gap_count;
3060  for (unsigned i = 1; i < len; ++i)
3061  {
3062  T curr = arr[i];
3063  if (curr != prev + 1)
3064  {
3065  gap_count += 2;
3066  }
3067  prev = curr;
3068  }
3069  return gap_count;
3070 }
3071 
3072 
3073 //------------------------------------------------------------------------
3074 
3075 /**
3076  \brief Searches for the next 1 bit in the GAP block
3077  \param buf - GAP buffer
3078  \param nbit - bit index to start checking from.
3079  \param prev - returns previously checked value
3080 
3081  \return 0 if not found
3082 
3083  @ingroup gapfunc
3084 */
3085 template<typename T>
3086 unsigned gap_block_find(const T* BMRESTRICT buf,
3087  unsigned nbit,
3089 {
3090  BM_ASSERT(nbit < bm::gap_max_bits);
3091 
3092  unsigned bitval;
3093  unsigned gap_idx = bm::gap_bfind(buf, nbit, &bitval);
3094 
3095  if (bitval) // positive block.
3096  {
3097  *prev = nbit;
3098  return 1u;
3099  }
3100  unsigned val = buf[gap_idx] + 1;
3101  *prev = val;
3102  return (val != bm::gap_max_bits); // no bug here.
3103 }
3104 
3105 //------------------------------------------------------------------------
3106 
3107 
3108 /*!
3109  \brief Set 1 bit in a block
3110  @ingroup bitfunc
3111 */
3113 void set_bit(unsigned* dest, unsigned bitpos) BMNOEXCEPT
3114 {
3115  unsigned nbit = unsigned(bitpos & bm::set_block_mask);
3116  unsigned nword = unsigned(nbit >> bm::set_word_shift);
3117  nbit &= bm::set_word_mask;
3118  dest[nword] |= unsigned(1u << nbit);
3119 }
3120 
3121 /*!
3122  \brief Set 1 bit in a block
3123  @ingroup bitfunc
3124 */
3126 void clear_bit(unsigned* dest, unsigned bitpos) BMNOEXCEPT
3127 {
3128  unsigned nbit = unsigned(bitpos & bm::set_block_mask);
3129  unsigned nword = unsigned(nbit >> bm::set_word_shift);
3130  nbit &= bm::set_word_mask;
3131  dest[nword] &= ~(unsigned(1u << nbit));
3132 }
3133 
3134 /*!
3135  \brief Test 1 bit in a block
3136 
3137  @ingroup bitfunc
3138 */
3140 unsigned test_bit(const unsigned* block, unsigned bitpos) BMNOEXCEPT
3141 {
3142  unsigned nbit = unsigned(bitpos & bm::set_block_mask);
3143  unsigned nword = unsigned(nbit >> bm::set_word_shift);
3144  nbit &= bm::set_word_mask;
3145  return (block[nword] >> nbit) & 1u;
3146 }
3147 
3148 
3149 /*!
3150  \brief Sets bits to 1 in the bitblock.
3151  \param dest - Bitset buffer.
3152  \param bitpos - Offset of the start bit.
3153  \param bitcount - number of bits to set.
3154 
3155  @ingroup bitfunc
3156 */
3157 inline
3158 void or_bit_block(unsigned* dest, unsigned bitpos, unsigned bitcount) BMNOEXCEPT
3159 {
3160  const unsigned maskFF = ~0u;
3161 
3162  dest += unsigned(bitpos >> bm::set_word_shift); // nword
3163  bitpos &= bm::set_word_mask;
3164 
3165  if (bitcount == 1u) // special case (only 1 bit to set)
3166  {
3167  *dest |= (1u << bitpos);
3168  return;
3169  }
3170 
3171  if (bitpos) // starting pos is not aligned
3172  {
3173  unsigned mask_r = maskFF << bitpos;
3174  unsigned right_margin = bitpos + bitcount;
3175  if (right_margin < 32)
3176  {
3177  *dest |= (maskFF >> (32 - right_margin)) & mask_r;
3178  return;
3179  }
3180  *dest++ |= mask_r;
3181  bitcount -= 32 - bitpos;
3182  }
3183  for ( ;bitcount >= 64; bitcount-=64, dest+=2)
3184  dest[0] = dest[1] = maskFF;
3185  if (bitcount >= 32)
3186  {
3187  *dest++ = maskFF; bitcount -= 32;
3188  }
3189  if (bitcount)
3190  {
3191  *dest |= maskFF >> (32 - bitcount);
3192  }
3193 }
3194 
3195 
3196 /*!
3197  \brief SUB (AND NOT) bit interval to 1 in the bitblock.
3198  \param dest - Bitset buffer.
3199  \param bitpos - Offset of the start bit.
3200  \param bitcount - number of bits to set.
3201 
3202  @ingroup bitfunc
3203 */
3204 inline
3205 void sub_bit_block(unsigned* dest, unsigned bitpos, unsigned bitcount) BMNOEXCEPT
3206 {
3207  const unsigned maskFF = ~0u;
3208 
3209  dest += unsigned(bitpos >> bm::set_word_shift); // nword
3210  bitpos &= bm::set_word_mask;
3211 
3212  if (bitcount == 1u) // special case (only 1 bit to set)
3213  {
3214  *dest &= ~(1u << bitpos);
3215  return;
3216  }
3217 
3218  if (bitpos) // starting pos is not aligned
3219  {
3220  unsigned mask_r = maskFF << bitpos;
3221  unsigned right_margin = bitpos + bitcount;
3222  if (right_margin < 32)
3223  {
3224  *dest &= ~((maskFF >> (32 - right_margin)) & mask_r);
3225  return;
3226  }
3227  *dest++ &= ~mask_r;
3228  bitcount -= 32 - bitpos;
3229  }
3230  for ( ;bitcount >= 64; bitcount-=64, dest+=2)
3231  dest[0] = dest[1] = 0u;
3232  if (bitcount >= 32)
3233  {
3234  *dest++ = 0u; bitcount -= 32;
3235  }
3236  if (bitcount)
3237  {
3238  *dest &= ~(maskFF >> (32 - bitcount));
3239  }
3240 }
3241 
3242 
3243 
3244 /*!
3245  \brief XOR bit interval to 1 in the bitblock.
3246  \param dest - Bitset buffer.
3247  \param bitpos - Offset of the start bit.
3248  \param bitcount - number of bits to set.
3249 
3250  @ingroup bitfunc
3251 */
3252 inline void xor_bit_block(unsigned* dest,
3253  unsigned bitpos,
3254  unsigned bitcount) BMNOEXCEPT
3255 {
3256  unsigned nbit = unsigned(bitpos & bm::set_block_mask);
3257  unsigned nword = unsigned(nbit >> bm::set_word_shift);
3258  nbit &= bm::set_word_mask;
3259 
3260  bm::word_t* word = dest + nword;
3261 
3262  if (bitcount == 1) // special case (only 1 bit to set)
3263  {
3264  *word ^= unsigned(1 << nbit);
3265  return;
3266  }
3267 
3268  if (nbit) // starting position is not aligned
3269  {
3270  unsigned right_margin = nbit + bitcount;
3271 
3272  // here we checking if we setting bits only in the current
3273  // word. Example: 00111000000000000000000000000000 (32 bits word)
3274 
3275  if (right_margin < 32)
3276  {
3277  unsigned mask =
3279  bm::block_set_table<true>::_left[right_margin-1];
3280  *word ^= mask;
3281  return;
3282  }
3283  *word ^= block_set_table<true>::_right[nbit];
3284  bitcount -= 32 - nbit;
3285  ++word;
3286  }
3287  for ( ;bitcount >= 64; bitcount-=64, word+=2)
3288  {
3289  word[0] ^= ~0u; word[1] ^= ~0u;
3290  }
3291  if (bitcount >= 32)
3292  {
3293  *word++ ^= ~0u; bitcount -= 32;
3294  }
3295  if (bitcount)
3296  *word ^= block_set_table<true>::_left[bitcount-1];
3297 }
3298 
3299 
3300 /*!
3301  \brief SUB (AND NOT) GAP block to bitblock.
3302  \param dest - bitblock buffer pointer.
3303  \param pcurr - GAP buffer pointer.
3304 
3305  @ingroup gapfunc
3306 */
3307 template<typename T>
3308 void gap_sub_to_bitset(unsigned* BMRESTRICT dest,
3309  const T* BMRESTRICT pcurr) BMNOEXCEPT
3310 {
3311  BM_ASSERT(dest && pcurr);
3312 
3313  const T* pend = pcurr + (*pcurr >> 3);
3314  if (*pcurr & 1) // Starts with 1
3315  {
3316  bm::sub_bit_block(dest, 0, 1 + pcurr[1]);
3317  ++pcurr;
3318  }
3319  for (pcurr += 2; pcurr <= pend; pcurr += 2)
3320  {
3321  BM_ASSERT(*pcurr > pcurr[-1]);
3322  bm::sub_bit_block(dest, 1 + pcurr[-1], *pcurr - pcurr[-1]);
3323  }
3324 }
3325 
3326 
3327 /*!
3328  \brief SUB (AND NOT) GAP block to bitblock with digest assist
3329 
3330  \param dest - bitblock buffer pointer.
3331  \param pcurr - GAP buffer pointer.
3332  \param digest0 - digest of 0 strides inside bit block
3333 
3334  @ingroup gapfunc
3335 */
3336 template<typename T>
3337 void gap_sub_to_bitset(unsigned* BMRESTRICT dest,
3338  const T* BMRESTRICT pcurr, bm::id64_t digest0) BMNOEXCEPT
3339 {
3340  BM_ASSERT(dest && pcurr);
3341 
3342  const T* pend = pcurr + (*pcurr >> 3);
3343  if (*pcurr & 1) // Starts with 1
3344  {
3345  bool all_zero = bm::check_zero_digest(digest0, 0, pcurr[1]+1);
3346  if (!all_zero)
3347  bm::sub_bit_block(dest, 0, pcurr[1] + 1); // (not AND) - SUB [0] gaps
3348  pcurr += 3;
3349  }
3350  else
3351  pcurr += 2;
3352 
3353  // wind forward to digest start
3354  {
3355  unsigned tz = bm::count_trailing_zeros_u64(digest0);
3356  unsigned start_pos = tz << set_block_digest_pos_shift;
3357  for (; pcurr <= pend; pcurr += 2) // now we are in GAP "0"
3358  {
3359  if (*pcurr >= start_pos)
3360  break;
3361  }
3362  }
3363 
3364  unsigned lz = bm::count_leading_zeros_u64(digest0);
3365  unsigned stop_pos = (64u - lz) << set_block_digest_pos_shift;
3366 
3367  unsigned bc, pos;
3368  T prev;
3369  for (; pcurr <= pend; pcurr += 2) // now we are in GAP "1" again
3370  {
3371  BM_ASSERT(*pcurr > *(pcurr-1));
3372  prev = pcurr[-1];
3373  bc = *pcurr - prev;
3374  pos = 1u + prev;
3375 
3376  bool all_zero = bm::check_zero_digest(digest0, prev, *pcurr);
3377  if (!all_zero)
3378  bm::sub_bit_block(dest, pos, bc);
3379 
3380  if (pos > stop_pos)
3381  break; // early break is possible based on digest tail
3382 
3383  } // for
3384 }
3385 
3386 
3387 
3388 /*!
3389  \brief XOR GAP block to bitblock.
3390  \param dest - bitblock buffer pointer.
3391  \param pcurr - GAP buffer pointer.
3392 
3393  @ingroup gapfunc
3394 */
3395 template<typename T>
3396 void gap_xor_to_bitset(unsigned* BMRESTRICT dest,
3397  const T* BMRESTRICT pcurr) BMNOEXCEPT
3398 {
3399  BM_ASSERT(dest && pcurr);
3400 
3401  const T* pend = pcurr + (*pcurr >> 3);
3402  if (*pcurr & 1) // Starts with 1
3403  {
3404  bm::xor_bit_block(dest, 0, 1 + pcurr[1]);
3405  ++pcurr;
3406  }
3407  for (pcurr += 2; pcurr <= pend; pcurr += 2)
3408  {
3409  BM_ASSERT(*pcurr > pcurr[-1]);
3410  bm::xor_bit_block(dest, 1 + pcurr[-1], *pcurr - pcurr[-1]);
3411  }
3412 }
3413 
3414 
3415 /*!
3416  \brief Adds(OR) GAP block to bitblock.
3417  \param dest - bitblock buffer pointer.
3418  \param pcurr - GAP buffer pointer.
3419  \param len - gap length
3420 
3421  @ingroup gapfunc
3422 */
3423 template<typename T>
3424 void gap_add_to_bitset(unsigned* BMRESTRICT dest,
3425  const T* BMRESTRICT pcurr, unsigned len) BMNOEXCEPT
3426 {
3427  BM_ASSERT(dest && pcurr);
3428 
3429  const T* pend = pcurr + len;
3430  if (*pcurr & 1) // Starts with 1
3431  {
3432  bm::or_bit_block(dest, 0, 1 + pcurr[1]);
3433  pcurr += 3;
3434  }
3435  else
3436  pcurr += 2;
3437 
3438  unsigned bc, pos;
3439  for (; pcurr <= pend; )
3440  {
3441  BM_ASSERT(*pcurr > pcurr[-1]);
3442  pos = 1u + pcurr[-1];
3443  bc = *pcurr - pcurr[-1];
3444  pcurr += 2;
3445  bm::or_bit_block(dest, pos, bc);
3446  }
3447 }
3448 
3449 
3450 /*!
3451  \brief Adds(OR) GAP block to bitblock.
3452  \param dest - bitblock buffer pointer.
3453  \param pcurr - GAP buffer pointer.
3454 
3455  @ingroup gapfunc
3456 */
3457 template<typename T>
3458 void gap_add_to_bitset(unsigned* BMRESTRICT dest,
3459  const T* BMRESTRICT pcurr) BMNOEXCEPT
3460 {
3461  unsigned len = (*pcurr >> 3);
3462  gap_add_to_bitset(dest, pcurr, len);
3463 }
3464 
3465 
3466 /*!
3467  \brief ANDs GAP block to bitblock.
3468  \param dest - bitblock buffer pointer.
3469  \param pcurr - GAP buffer pointer.
3470 
3471  @ingroup gapfunc
3472 */
3473 template<typename T>
3474 void gap_and_to_bitset(unsigned* BMRESTRICT dest,
3475  const T* BMRESTRICT pcurr) BMNOEXCEPT
3476 {
3477  BM_ASSERT(dest && pcurr);
3478 
3479  const T* pend = pcurr + (*pcurr >> 3);
3480  if (!(*pcurr & 1) ) // Starts with 0
3481  {
3482  bm::sub_bit_block(dest, 0, pcurr[1] + 1); // (not AND) - SUB [0] gaps
3483  pcurr += 3;
3484  }
3485  else
3486  pcurr += 2;
3487 
3488  unsigned bc, pos;
3489  for (; pcurr <= pend; ) // now we are in GAP "0" again
3490  {
3491  BM_ASSERT(*pcurr > *(pcurr-1));
3492  pos = 1u + pcurr[-1];
3493  bc = *pcurr - pcurr[-1];
3494  pcurr += 2;
3495  bm::sub_bit_block(dest, pos, bc);
3496  }
3497 }
3498 
3499 
3500 /*!
3501  \brief ANDs GAP block to bitblock with digest assist
3502  \param dest - bitblock buffer pointer.
3503  \param pcurr - GAP buffer pointer.
3504  \param digest0 - digest of 0 strides for the destination
3505 
3506  @ingroup gapfunc
3507 */
3508 template<typename T>
3509 void gap_and_to_bitset(unsigned* BMRESTRICT dest,
3510  const T* BMRESTRICT pcurr, bm::id64_t digest0) BMNOEXCEPT
3511 {
3512  BM_ASSERT(dest && pcurr);
3513  if (!digest0)
3514  return;
3515 
3516  const T* pend = pcurr + (*pcurr >> 3);
3517  if (!(*pcurr & 1) ) // Starts with 0
3518  {
3519  bool all_zero = bm::check_zero_digest(digest0, 0, pcurr[1]+1);
3520  if (!all_zero)
3521  bm::sub_bit_block(dest, 0, pcurr[1] + 1); // (not AND) - SUB [0] gaps
3522  pcurr += 3;
3523  }
3524  else
3525  pcurr += 2;
3526 
3527  // wind forward to digest start
3528  {
3529  unsigned tz = bm::count_trailing_zeros_u64(digest0);
3530  unsigned start_pos = tz << set_block_digest_pos_shift;
3531  for (; pcurr <= pend; pcurr += 2) // now we are in GAP "0"
3532  {
3533  if (*pcurr >= start_pos)
3534  break;
3535  }
3536  }
3537 
3538  unsigned lz = bm::count_leading_zeros_u64(digest0);
3539  unsigned stop_pos = (64u - lz) << set_block_digest_pos_shift;
3540 
3541  unsigned bc, pos;
3542  T prev;
3543  for (; pcurr <= pend; pcurr += 2) // now we are in GAP "0" again
3544  {
3545  BM_ASSERT(*pcurr > *(pcurr-1));
3546 
3547  prev = pcurr[-1];
3548  bc = *pcurr - prev;
3549  pos = 1u + prev;
3550 
3551  bool all_zero = bm::check_zero_digest(digest0, prev, *pcurr);
3552  if (!all_zero)
3553  bm::sub_bit_block(dest, pos, bc);
3554 
3555  if (pos > stop_pos) // early break is possible based on digest tail
3556  break;
3557 
3558  } // for
3559 }
3560 
3561 
3562 /*!
3563  \brief Compute bitcount of bit block AND masked by GAP block
3564  \param block - bitblock buffer pointer
3565  \param pcurr - GAP buffer pointer
3566  \return bitcount - cardinality of the AND product
3567 
3568  @ingroup gapfunc
3569 */
3570 template<typename T>
3572  const T* BMRESTRICT pcurr) BMNOEXCEPT
3573 {
3574  BM_ASSERT(block);
3575  const T* pend = pcurr + (*pcurr >> 3);
3576  bm::id_t count = 0;
3577  if (*pcurr & 1) // Starts with 1
3578  {
3579  count += bm::bit_block_calc_count_range(block, 0, pcurr[1]);
3580  ++pcurr;
3581  }
3582  for (pcurr +=2 ;pcurr <= pend; pcurr += 2)
3583  {
3584  count += bm::bit_block_calc_count_range(block, pcurr[-1]+1, *pcurr);
3585  }
3586  return count;
3587 }
3588 
3589 
3590 /*!
3591  \brief Bitcount test of bit block AND masked by GAP block.
3592  \param block - bitblock buffer pointer
3593  \param pcurr - GAP buffer pointer
3594  \return non-zero value if AND produces any result
3595 
3596  @ingroup gapfunc
3597 */
3598 template<typename T>
3600  const T* BMRESTRICT pcurr) BMNOEXCEPT
3601 {
3602  BM_ASSERT(block);
3603 
3604  const T* pend = pcurr + (*pcurr >> 3);
3605  bm::id_t count = 0;
3606  if (*pcurr & 1) // Starts with 1
3607  {
3608  count = bm::bit_block_any_range(block, 0, pcurr[1]);
3609  ++pcurr;
3610  }
3611  for (pcurr +=2 ;!count && pcurr <= pend; pcurr += 2)
3612  {
3613  count = bm::bit_block_any_range(block, pcurr[-1]+1, *pcurr);
3614  }
3615  return count;
3616 }
3617 
3618 
3619 
3620 /*!
3621  \brief Compute bitcount of bit block SUB masked by GAP block.
3622  \param block - bitblock buffer pointer.
3623  \param buf - GAP buffer pointer.
3624  \return bit-count result of AND NOT operation
3625 
3626  @ingroup gapfunc
3627 */
3628 template<typename T>
3630  const T* BMRESTRICT buf) BMNOEXCEPT
3631 {
3632  BM_ASSERT(block);
3633 
3634  const T* pcurr = buf;
3635  const T* pend = pcurr + (*pcurr >> 3);
3636  ++pcurr;
3637 
3638  bm::id_t count = 0;
3639 
3640  if (!(*buf & 1)) // Starts with 0
3641  {
3642  count += bit_block_calc_count_range(block, 0, *pcurr);
3643  ++pcurr;
3644  }
3645  ++pcurr; // now we are in GAP "0" again
3646 
3647  for (;pcurr <= pend; pcurr+=2)
3648  {
3649  count += bm::bit_block_calc_count_range(block, *(pcurr-1)+1, *pcurr);
3650  }
3651  return count;
3652 }
3653 
3654 
3655 /*!
3656  \brief Compute bitcount test of bit block SUB masked by GAP block
3657  \param block - bitblock buffer pointer
3658  \param buf - GAP buffer pointer
3659  \return non-zero value if AND NOT produces any 1 bits
3660 
3661  @ingroup gapfunc
3662 */
3663 template<typename T>
3665  const T* BMRESTRICT buf) BMNOEXCEPT
3666 {
3667  BM_ASSERT(block);
3668 
3669  const T* pcurr = buf;
3670  const T* pend = pcurr + (*pcurr >> 3);
3671  ++pcurr;
3672 
3673  bm::id_t count = 0;
3674 
3675  if (!(*buf & 1)) // Starts with 0
3676  {
3677  count += bit_block_any_range(block, 0, *pcurr);
3678  if (count)
3679  return count;
3680  ++pcurr;
3681  }
3682  ++pcurr; // now we are in GAP "0" again
3683 
3684  for (; !count && pcurr <= pend; pcurr+=2)
3685  {
3686  count += bm::bit_block_any_range(block, *(pcurr-1)+1, *pcurr);
3687  }
3688  return count;
3689 }
3690 
3691 
3692 
3693 /*!
3694  \brief Compute bitcount of bit block XOR masked by GAP block
3695  \param block - bitblock buffer pointer
3696  \param buf - GAP buffer pointer
3697  \return bit count value of XOR operation
3698 
3699  @ingroup gapfunc
3700 */
3701 template<typename T>
3703  const T* BMRESTRICT buf) BMNOEXCEPT
3704 {
3705  BM_ASSERT(block);
3706 
3707  const T* pcurr = buf;
3708  const T* pend = pcurr + (*pcurr >> 3);
3709  ++pcurr;
3710 
3711  unsigned bitval = *buf & 1;
3712 
3713  bm::id_t count = bm::bit_block_calc_count_range(block, 0, *pcurr);
3714  if (bitval)
3715  {
3716  count = *pcurr + 1 - count;
3717  }
3718 
3719  for (bitval^=1, ++pcurr; pcurr <= pend; bitval^=1, ++pcurr)
3720  {
3721  T prev = (T)(*(pcurr-1)+1);
3722  bm::id_t c = bit_block_calc_count_range(block, prev, *pcurr);
3723 
3724  if (bitval) // 1 gap; means Result = Total_Bits - BitCount;
3725  c = (*pcurr - prev + 1) - c;
3726  count += c;
3727  }
3728  return count;
3729 }
3730 
3731 /*!
3732  \brief Compute bitcount test of bit block XOR masked by GAP block.
3733  \param block - bitblock buffer pointer
3734  \param buf - GAP buffer pointer
3735  \return non-zero value if XOR returns anything
3736 
3737  @ingroup gapfunc
3738 */
3739 template<typename T>
3741  const T* BMRESTRICT buf) BMNOEXCEPT
3742 {
3743  BM_ASSERT(block);
3744 
3745  const T* pcurr = buf;
3746  const T* pend = pcurr + (*pcurr >> 3);
3747  ++pcurr;
3748 
3749  unsigned bitval = *buf & 1;
3750 
3751  bm::id_t count = bit_block_any_range(block, 0, *pcurr);
3752  if (bitval)
3753  count = *pcurr + 1 - count;
3754 
3755  for (bitval^=1, ++pcurr; !count && pcurr <= pend; bitval^=1, ++pcurr)
3756  {
3757  T prev = (T)(*(pcurr-1)+1);
3758  bm::id_t c = bit_block_any_range(block, prev, *pcurr);
3759 
3760  if (bitval) // 1 gap; means Result = Total_Bits - BitCount;
3761  c = (*pcurr - prev + 1) - c;
3762  count += c;
3763  }
3764  return count;
3765 }
3766 
3767 
3768 
3769 /*!
3770  \brief Compute bitcount of bit block OR masked by GAP block.
3771  \param block - bitblock buffer pointer.
3772  \param buf - GAP buffer pointer.
3773  \return bit count of OR
3774 
3775  @ingroup gapfunc
3776 */
3777 template<typename T>
3779  const T* BMRESTRICT buf) BMNOEXCEPT
3780 {
3781  BM_ASSERT(block);
3782  const T* pcurr = buf;
3783  const T* pend = pcurr + (*pcurr >> 3);
3784  ++pcurr;
3785 
3786  unsigned bitval = *buf & 1;
3787 
3788  bm::id_t count = bitval ? *pcurr + 1
3789  : bm::bit_block_calc_count_range(block, 0, *pcurr);
3790  for (bitval^=1, ++pcurr; pcurr <= pend; bitval^=1, ++pcurr)
3791  {
3792  T prev = (T)(*(pcurr-1)+1);
3793  bm::id_t c =
3794  bitval ? (*pcurr - prev + 1)
3795  : bm::bit_block_calc_count_range(block, prev, *pcurr);
3796  count += c;
3797  }
3798  return count;
3799 }
3800 
3801 /*!
3802  \brief Compute bitcount test of bit block OR masked by GAP block
3803  \param block - bitblock buffer pointer
3804  \param buf - GAP buffer pointer
3805  \return non zero value if union (OR) returns anything
3806 
3807  @ingroup gapfunc
3808 */
3809 template<typename T>
3810 bm::id_t gap_bitset_or_any(const unsigned* BMRESTRICT block,
3811  const T* BMRESTRICT buf) BMNOEXCEPT
3812 {
3813  bool b = !bm::gap_is_all_zero(buf) ||
3814  !bm::bit_is_all_zero(block);
3815  return b;
3816 }
3817 
3818 
3819 
3820 /*!
3821  \brief Bitblock memset operation.
3822 
3823  \param dst - destination block.
3824  \param value - value to set.
3825 
3826  @ingroup bitfunc
3827 */
3828 inline
3830 {
3831 #ifdef BMVECTOPT
3832  VECT_SET_BLOCK(dst, value);
3833 #else
3834  ::memset(dst, int(value), bm::set_block_size * sizeof(bm::word_t));
3835 #endif
3836 }
3837 
3838 
3839 /*!
3840  \brief GAP block to bitblock conversion.
3841  \param dest - bitblock buffer pointer.
3842  \param buf - GAP buffer pointer.
3843 
3844  @ingroup gapfunc
3845 */
3846 template<typename T>
3847 void gap_convert_to_bitset(unsigned* BMRESTRICT dest,
3848  const T* BMRESTRICT buf) BMNOEXCEPT
3849 {
3850  bm::bit_block_set(dest, 0);
3851  bm::gap_add_to_bitset(dest, buf);
3852 }
3853 
3854 
3855 
3856 /*!
3857  \brief Smart GAP block to bitblock conversion.
3858 
3859  Checks if GAP block is ALL-ZERO or ALL-ON. In those cases returns
3860  pointer on special static bitblocks.
3861 
3862  \param dest - bitblock buffer pointer.
3863  \param buf - GAP buffer pointer.
3864  \param set_max - max possible bitset length
3865 
3866  @ingroup gapfunc
3867 */
3868 template<typename T>
3869 unsigned* gap_convert_to_bitset_smart(unsigned* BMRESTRICT dest,
3870  const T* BMRESTRICT buf,
3871  id_t set_max) BMNOEXCEPT
3872 {
3873  if (buf[1] == set_max - 1)
3874  return (buf[0] & 1) ? FULL_BLOCK_REAL_ADDR : 0;
3875  bm::gap_convert_to_bitset(dest, buf);
3876  return dest;
3877 }
3878 
3879 
3880 /*!
3881  \brief Calculates sum of all words in GAP block. (For debugging purposes)
3882  \note For debugging and testing ONLY.
3883  \param buf - GAP buffer pointer.
3884  \return Sum of all words.
3885 
3886  @ingroup gapfunc
3887  @internal
3888 */
3889 template<typename T>
3890 unsigned gap_control_sum(const T* buf) BMNOEXCEPT
3891 {
3892  unsigned end = *buf >> 3;
3893 
3894  const T* pcurr = buf;
3895  const T* pend = pcurr + (*pcurr >> 3);
3896  ++pcurr;
3897 
3898  if (*buf & 1) // Starts with 1
3899  {
3900  ++pcurr;
3901  }
3902  ++pcurr; // now we are in GAP "1" again
3903  while (pcurr <= pend)
3904  {
3905  BM_ASSERT(*pcurr > *(pcurr-1));
3906  pcurr += 2;
3907  }
3908  return buf[end];
3909 }
3910 
3911 
3912 /*!
3913  \brief Sets all bits to 0 or 1 (GAP)
3914  \param buf - GAP buffer pointer.
3915  \param set_max - max possible bitset length
3916  \param value - value to set
3917 
3918  @ingroup gapfunc
3919 */
3920 template<class T>
3921 void gap_set_all(T* buf, unsigned set_max, unsigned value) BMNOEXCEPT
3922 {
3923  BM_ASSERT(value == 0 || value == 1);
3924  *buf = (T)((*buf & 6u) + (1u << 3) + value);
3925  *(++buf) = (T)(set_max - 1);
3926 }
3927 
3928 
3929 /*!
3930  \brief Init gap block so it has block in it (can be whole block)
3931  \param buf - GAP buffer pointer.
3932  \param from - one block start
3933  \param to - one block end
3934  \param value - (block value)1 or 0
3935 
3936  @ingroup gapfunc
3937 */
3938 template<class T>
3940  T from,
3941  T to,
3942  T value) BMNOEXCEPT
3943 {
3944  BM_ASSERT(value == 0 || value == 1);
3945  const unsigned set_max = bm::bits_in_block;
3946 
3947  unsigned gap_len;
3948  if (from == 0)
3949  {
3950  if (to == set_max - 1)
3951  {
3952  bm::gap_set_all(buf, set_max, value);
3953  }
3954  else
3955  {
3956  gap_len = 2;
3957  buf[1] = (T)to;
3958  buf[2] = (T)(set_max - 1);
3959  buf[0] = (T)((*buf & 6u) + (gap_len << 3) + value);
3960  }
3961  return;
3962  }
3963  // from != 0
3964 
3965  value = !value;
3966  if (to == set_max - 1)
3967  {
3968  gap_len = 2;
3969  buf[1] = (T)(from - 1);
3970  buf[2] = (T)(set_max - 1);
3971  }
3972  else
3973  {
3974  gap_len = 3;
3975  buf[1] = (T) (from - 1);
3976  buf[2] = (T) to;
3977  buf[3] = (T)(set_max - 1);
3978  }
3979  buf[0] = (T)((*buf & 6u) + (gap_len << 3) + value);
3980 }
3981 
3982 
3983 /*!
3984  \brief Inverts all bits in the GAP buffer.
3985  \param buf - GAP buffer pointer.
3986 
3987  @ingroup gapfunc
3988 */
3989 template<typename T> void gap_invert(T* buf) BMNOEXCEPT
3990 {
3991  *buf ^= 1;
3992 }
3993 
3994 
3995 #ifdef __GNUG__
3996 #pragma GCC diagnostic push
3997 #pragma GCC diagnostic ignored "-Wconversion"
3998 #endif
3999 
4000 /*!
4001  \brief Sets GAP block capacity level.
4002  \param buf - GAP buffer pointer.
4003  \param level new GAP block capacity level.
4004 
4005  @ingroup gapfunc
4006 */
4007 template<typename T>
4008 void set_gap_level(T* buf, int level) BMNOEXCEPT
4009 {
4010  BM_ASSERT(level >= 0);
4011  BM_ASSERT(unsigned(level) < bm::gap_levels);
4012 
4013  *buf = (T)(((level & 3) << 1) | (*buf & 1) | (*buf & ~7));
4014 }
4015 #ifdef __GNUG__
4016 #pragma GCC diagnostic pop
4017 #endif
4018 
4019 
4020 
4021 /*!
4022  \brief Calculates GAP block capacity level.
4023  \param len - GAP buffer length.
4024  \param glevel_len - GAP lengths table
4025  \return GAP block capacity level.
4026  -1 if block does not fit any level.
4027  @ingroup gapfunc
4028 */
4029 template<typename T>
4030 int gap_calc_level(unsigned len, const T* glevel_len) BMNOEXCEPT
4031 {
4032  if (len <= unsigned(glevel_len[0]-4)) return 0;
4033  if (len <= unsigned(glevel_len[1]-4)) return 1;
4034  if (len <= unsigned(glevel_len[2]-4)) return 2;
4035  if (len <= unsigned(glevel_len[3]-4)) return 3;
4036 
4037  BM_ASSERT(bm::gap_levels == 4);
4038  return -1;
4039 }
4040 
4041 /*! @brief Returns number of free elements in GAP block array.
4042  Difference between GAP block capacity on this level and actual GAP length.
4043 
4044  @param buf - GAP buffer pointer
4045  @param glevel_len - GAP lengths table
4046 
4047  @return Number of free GAP elements
4048  @ingroup gapfunc
4049 */
4050 template<typename T>
4051 inline unsigned gap_free_elements(const T* BMRESTRICT buf,
4052  const T* BMRESTRICT glevel_len) BMNOEXCEPT
4053 {
4054  unsigned len = bm::gap_length(buf);
4055  unsigned capacity = bm::gap_capacity(buf, glevel_len);
4056  return capacity - len;
4057 }
4058 
4059 /*!
4060  \brief Lexicographical comparison of BIT buffers.
4061  \param buf1 - First buffer pointer.
4062  \param buf2 - Second buffer pointer.
4063  \param len - Buffer length in elements (T).
4064  \return <0 - less, =0 - equal, >0 - greater.
4065 
4066  @ingroup bitfunc
4067 */
4068 template<typename T>
4069 int bitcmp(const T* buf1, const T* buf2, unsigned len) BMNOEXCEPT
4070 {
4071  BM_ASSERT(len);
4072  const T* pend1 = buf1 + len;
4073  do
4074  {
4075  T w1 = *buf1++;
4076  T w2 = *buf2++;
4077  T diff = w1 ^ w2;
4078  if (diff)
4079  return (w1 & diff & -diff) ? 1 : -1;
4080  } while (buf1 < pend1);
4081  return 0;
4082 }
4083 
4084 /*!
4085  \brief Find first bit which is different between two bit-blocks
4086  \param blk1 - block 1
4087  \param blk2 - block 2
4088  \param pos - out - position of difference (undefined if blocks are equal)
4089  \return true if difference was found
4090 
4091  @ingroup bitfunc
4092 */
4093 inline
4095  const bm::word_t* BMRESTRICT blk2,
4096  unsigned* BMRESTRICT pos) BMNOEXCEPT
4097 {
4098  BM_ASSERT(blk1 && blk2 && pos);
4099 #ifdef VECT_BIT_FIND_DIFF
4100  bool f = VECT_BIT_FIND_DIFF(blk1, blk2, pos);
4101  return f;
4102 #else
4103 #ifdef BM64OPT
4104  BM_ASSERT(sizeof(bm::wordop_t) == 8);
4105 
4106  const bm::wordop_t* b1 = (const bm::wordop_t*) blk1;
4107  const bm::wordop_t* b2 = (const bm::wordop_t*) blk2;
4108 
4109  for (unsigned i = 0; i < bm::set_block_size/2; ++i)
4110  {
4111  bm::wordop_t w1 = b1[i]; bm::wordop_t w2 = b2[i];
4112  bm::wordop_t diff = w1 ^ w2;
4113  if (diff)
4114  {
4115  unsigned idx = bm::count_trailing_zeros_u64(diff);
4116  *pos = unsigned(idx + (i * 8u * unsigned(sizeof(bm::wordop_t))));
4117  return true;
4118  }
4119  } // for
4120 #else
4121  for (unsigned i = 0; i < bm::set_block_size; ++i)
4122  {
4123  bm::word_t w1 = blk1[i]; bm::word_t w2 = blk2[i];
4124  bm::word_t diff = w1 ^ w2;
4125  if (diff)
4126  {
4127  unsigned idx = bm::bit_scan_forward32(diff); // trailing zeros
4128  *pos = unsigned(idx + (i * 8u * sizeof(bm::word_t)));
4129  return true;
4130  }
4131  } // for
4132 #endif
4133 #endif
4134  return false;
4135 }
4136 
4137 
4138 #ifndef BMAVX2OPT
4139 
4140 /*!
4141  \brief Converts bit block to GAP.
4142  \param dest - Destinatio GAP buffer.
4143  \param block - Source bitblock buffer.
4144  \param dest_len - length of the destination buffer.
4145  \return New length of GAP block or 0 if conversion failed
4146  (insufficicent space).
4147 
4148  @ingroup gapfunc
4149 */
4150 inline
4152  const unsigned* BMRESTRICT block,
4153  unsigned dest_len) BMNOEXCEPT
4154 {
4155  const unsigned* BMRESTRICT block_end = block + bm::set_block_size;
4156  gap_word_t* BMRESTRICT pcurr = dest;
4157  gap_word_t* BMRESTRICT end = dest + dest_len; (void)end;
4158 
4159  unsigned bitval = (*block) & 1u;
4160  *pcurr++ = bm::gap_word_t(bitval);
4161  *pcurr = 0;
4162  unsigned bit_idx = 0;
4163 
4164  do
4165  {
4166  unsigned val = *block;
4167  while (!val || val == ~0u)
4168  {
4169  if (bitval != unsigned(bool(val)))
4170  {
4171  *pcurr++ = (gap_word_t)(bit_idx-1);
4172  bitval ^= 1u;
4173  BM_ASSERT((pcurr-1) == (dest+1) || *(pcurr-1) > *(pcurr-2));
4174  BM_ASSERT(pcurr != end);
4175  }
4176  bit_idx += unsigned(sizeof(*block) * 8);
4177  if (++block >= block_end)
4178  goto complete;
4179  val = *block;
4180  } // while
4181 
4182  // process "0100011" word
4183  //
4184  unsigned bits_consumed = 0;
4185  do
4186  {
4187  unsigned tz = 1u;
4188  if (bitval != (val & 1u))
4189  {
4190  *pcurr++ = (gap_word_t)(bit_idx-1);
4191  bitval ^= 1u;
4192  BM_ASSERT((pcurr-1) == (dest+1) || *(pcurr-1) > *(pcurr-2));
4193  BM_ASSERT(pcurr != end);
4194  }
4195  else // match, find the next idx
4196  {
4197  tz = bm::bit_scan_forward32(bitval ? ~val : val);
4198  // possible alternative:
4199  // tz = bm::count_trailing_zeros(bitval ? ~val : val);
4200  }
4201 
4202  bits_consumed += tz;
4203  bit_idx += tz;
4204  val >>= tz;
4205 
4206  if (!val)
4207  {
4208  if (bits_consumed < 32u)
4209  {
4210  *pcurr++ = (gap_word_t)(bit_idx-1);
4211  bitval ^= 1u;
4212  bit_idx += 32u - bits_consumed;
4213  BM_ASSERT((pcurr-1) == (dest+1) || *(pcurr-1) > *(pcurr-2));
4214  BM_ASSERT(pcurr != end);
4215  }
4216  break;
4217  }
4218  } while (1);
4219 
4220  } while(++block < block_end);
4221 
4222 complete:
4223  *pcurr = (gap_word_t)(bit_idx-1);
4224  unsigned len = (unsigned)(pcurr - dest);
4225  *dest = (gap_word_t)((*dest & 7) + (len << 3));
4226  return len;
4227 }
4228 #endif
4229 
4230 /**
4231  Convert bit block to GAP representation
4232  @internal
4233  @ingroup bitfunc
4234 */
4235 inline
4237  const unsigned* BMRESTRICT block,
4238  unsigned dest_len) BMNOEXCEPT
4239 {
4240 #if defined(VECT_BIT_TO_GAP)
4241  return VECT_BIT_TO_GAP(dest, block, dest_len);
4242 #else
4243  return bm::bit_block_to_gap(dest, block, dest_len);
4244 #endif
4245 }
4246 
4247 
4248 /*!
4249  \brief Iterate gap block as delta-bits with a functor
4250  @ingroup gapfunc
4251 */
4252 template<class T, class F>
4253 void for_each_gap_dbit(const T* buf, F& func)
4254 {
4255  const T* pcurr = buf;
4256  const T* pend = pcurr + (*pcurr >> 3);
4257 
4258  ++pcurr;
4259 
4260  unsigned prev = 0;
4261  unsigned first_inc;
4262 
4263  if (*buf & 1)
4264  {
4265  first_inc = 0;
4266  unsigned to = *pcurr;
4267  for (unsigned i = 0; i <= to; ++i)
4268  {
4269  func(1);
4270  }
4271  prev = to;
4272  ++pcurr;
4273  }
4274  else
4275  {
4276  first_inc = 1;
4277  }
4278  ++pcurr; // set GAP to 1
4279 
4280  while (pcurr <= pend)
4281  {
4282  unsigned from = *(pcurr-1)+1;
4283  unsigned to = *pcurr;
4284  if (first_inc)
4285  {
4286  func(from - prev + first_inc);
4287  first_inc = 0;
4288  }
4289  else
4290  {
4291  func(from - prev);
4292  }
4293 
4294  for (unsigned i = from+1; i <= to; ++i)
4295  {
4296  func(1);
4297  }
4298  prev = to;
4299  pcurr += 2; // jump to the next positive GAP
4300  }
4301 }
4302 
4303 /*!
4304  \brief Convert gap block into array of ints corresponding to 1 bits
4305  @ingroup gapfunc
4306 */
4307 template<typename D, typename T>
4309  const T* BMRESTRICT buf,
4310  unsigned dest_len,
4311  bool invert = false) BMNOEXCEPT
4312 {
4313  const T* BMRESTRICT pcurr = buf;
4314  const T* pend = pcurr + (*pcurr >> 3);
4315 
4316  D* BMRESTRICT dest_curr = dest;
4317  ++pcurr;
4318 
4319  int bitval = (*buf) & 1;
4320  if (invert)
4321  bitval = !bitval; // invert the GAP buffer
4322 
4323  if (bitval)
4324  {
4325  if (unsigned(*pcurr + 1) >= dest_len)
4326  return 0; // insufficient space
4327  dest_len -= *pcurr;
4328  T to = *pcurr;
4329  for (T i = 0; ;++i)
4330  {
4331  *dest_curr++ = i;
4332  if (i == to) break;
4333  }
4334  ++pcurr;
4335  }
4336  ++pcurr; // set GAP to 1
4337 
4338  while (pcurr <= pend)
4339  {
4340  unsigned pending = *pcurr - *(pcurr-1);
4341  if (pending >= dest_len)
4342  return 0;
4343  dest_len -= pending;
4344  T from = (T)(*(pcurr-1)+1);
4345  T to = *pcurr;
4346  for (T i = from; ;++i)
4347  {
4348  *dest_curr++ = i;
4349  if (i == to) break;
4350  }
4351  pcurr += 2; // jump to the next positive GAP
4352  }
4353  return (D) (dest_curr - dest);
4354 }
4355 
4356 
4357 
4358 /*!
4359  @brief Bitcount for bit block
4360 
4361  Function calculates number of 1 bits in the given array of words.
4362  Make sure the addresses are aligned.
4363 
4364  @ingroup bitfunc
4365 */
4366 inline
4368 {
4369  const bm::word_t* block_end = block + bm::set_block_size;
4370  bm::id_t count = 0;
4371 
4372 #ifdef BMVECTOPT
4373  count = VECT_BITCOUNT(block, block_end);
4374 #else
4375 #ifdef BM64OPT
4376  // 64-bit optimized algorithm. No sparse vect opt.
4377  // instead it uses 4-way parallel pipelined version
4378 
4379  const bm::id64_t* b1 = (bm::id64_t*) block;
4380  const bm::id64_t* b2 = (bm::id64_t*) block_end;
4381  do
4382  {
4383  bm::id64_t x = b1[0];
4384  bm::id64_t y = b1[1];
4385  bm::id64_t u = b1[2];
4386  bm::id64_t v = b1[3];
4387 
4388  if (x | y | u | v)
4389  {
4390  unsigned c = bitcount64_4way(x, y, u, v);
4391  BM_ASSERT(c);
4392  count += c;
4393  }
4394  b1 += 4;
4395  } while (b1 < b2);
4396 #else
4397  // For 32 bit code the fastest method is
4398  // to use bitcount table for each byte in the block.
4399  // As optimization for sparse bitsets used bits accumulator
4400  // to collect ON bits using bitwise OR.
4401  bm::word_t acc = *block++;
4402  do
4403  {
4404  bm::word_t in = *block++;
4405  bm::word_t acc_prev = acc;
4406  acc |= in;
4407  if (acc_prev &= in) // accumulator miss: counting bits
4408  {
4409  BM_INCWORD_BITCOUNT(count, acc);
4410  acc = acc_prev;
4411  }
4412  } while (block < block_end);
4413 
4414  BM_INCWORD_BITCOUNT(count, acc); // count-in remaining accumulator
4415 
4416 #endif
4417 #endif
4418  return count;
4419 }
4420 
4421 /*!
4422  @brief Bitcount for bit block
4423 
4424  Function calculates number of 1 bits in the given array of words.
4425  uses digest to understand zero areas
4426 
4427  @ingroup bitfunc
4428 */
4429 inline
4431  bm::id64_t digest) BMNOEXCEPT
4432 {
4433 #ifdef VECT_BIT_COUNT_DIGEST
4434  return VECT_BIT_COUNT_DIGEST(block, digest);
4435 #else
4436  bm::id_t count = 0;
4437  bm::id64_t d = digest;
4438  while (d)
4439  {
4440  bm::id64_t t = bm::bmi_blsi_u64(d); // d & -d;
4441 
4442  unsigned wave = bm::word_bitcount64(t - 1);
4443  unsigned off = wave * bm::set_block_digest_wave_size;
4444 
4445  const bm::bit_block_t::bunion_t* BMRESTRICT src_u =
4446  (const bm::bit_block_t::bunion_t*)(&block[off]);
4447  unsigned j = 0;
4448  do
4449  {
4450  count += bm::word_bitcount64(src_u->w64[j+0]) +
4451  bm::word_bitcount64(src_u->w64[j+1]) +
4452  bm::word_bitcount64(src_u->w64[j+2]) +
4453  bm::word_bitcount64(src_u->w64[j+3]);
4454  j += 4;
4455  } while (j < bm::set_block_digest_wave_size/2);
4456 
4457  d = bm::bmi_bslr_u64(d); // d &= d - 1;
4458  } // while
4459  return count;
4460 #endif
4461 }
4462 
4463 
4464 
4465 /*!
4466  @brief Bitcount for bit string
4467 
4468  Added for back-compatibility purposes, not block aligned,
4469  not SIMD accelerated
4470 
4471  @ingroup bitfunc
4472 */
4473 inline
4475  const bm::word_t* block_end) BMNOEXCEPT
4476 {
4477  bm::id_t count = 0;
4478  bm::word_t acc = *block++;
4479  do
4480  {
4481  bm::word_t in = *block++;
4482  bm::word_t acc_prev = acc;
4483  acc |= in;
4484  if (acc_prev &= in) // accumulator miss: counting bits
4485  {
4486  BM_INCWORD_BITCOUNT(count, acc);
4487  acc = acc_prev;
4488  }
4489  } while (block < block_end);
4490 
4491  BM_INCWORD_BITCOUNT(count, acc); // count-in remaining accumulator
4492  return count;
4493 }
4494 
4495 
4496 
4497 /*!
4498  Function calculates number of times when bit value changed
4499  (1-0 or 0-1).
4500 
4501  For 001 result is 2
4502  010 - 3
4503  011 - 2
4504  111 - 1
4505 
4506  @ingroup bitfunc
4507 */
4508 inline
4510 {
4511  unsigned count = 1;
4512  w ^= (w >> 1);
4513 
4514  count += bm::word_bitcount(w);
4515  count -= (w >> ((sizeof(w) * 8) - 1));
4516  return count;
4517 }
4518 
4519 
4520 /*!
4521  Function calculates number of times when bit value changed
4522  @internal
4523 */
4524 inline
4525 unsigned bit_block_change32(const bm::word_t* block, unsigned size) BMNOEXCEPT
4526 {
4527  unsigned gap_count = 1;
4528 
4529  bm::word_t w, w0, w_prev, w_l;
4530  w = w0 = *block;
4531 
4532  const int w_shift = int(sizeof(w) * 8 - 1);
4533  w ^= (w >> 1);
4534  BM_INCWORD_BITCOUNT(gap_count, w);
4535  gap_count -= (w_prev = (w0 >> w_shift)); // negative value correction
4536 
4537  const bm::word_t* block_end = block + size; // bm::set_block_size;
4538  for (++block; block < block_end; ++block)
4539  {
4540  w = w0 = *block;
4541  ++gap_count;
4542  if (!w)
4543  {
4544  gap_count -= !w_prev;
4545  w_prev = 0;
4546  }
4547  else
4548  {
4549  w ^= (w >> 1);
4550  BM_INCWORD_BITCOUNT(gap_count, w);
4551 
4552  w_l = w0 & 1;
4553  gap_count -= (w0 >> w_shift); // negative value correction
4554  gap_count -= !(w_prev ^ w_l); // word border correction
4555 
4556  w_prev = (w0 >> w_shift);
4557  }
4558  } // for
4559  return gap_count;
4560 }
4561 
4562 
4563 /*!
4564  Function calculates basic bit-block statistics
4565  number of times when bit value changed (GAPS)
4566  and population count
4567  @param block - bit-block pointer
4568  @param gc - [output] gap_count
4569  @param bc - [output] bit count
4570  @internal
4571 */
4572 inline
4574  unsigned* BMRESTRICT gc, unsigned* BMRESTRICT bc) BMNOEXCEPT
4575 {
4576  BM_ASSERT(gc);
4577  BM_ASSERT(bc);
4578 
4579  #ifdef VECT_BLOCK_CHANGE_BC
4580  VECT_BLOCK_CHANGE_BC(block, gc, bc);
4581  #else
4583  *bc = bm::bit_block_count(block);
4584  #endif
4585 }
4586 
4587 
4588 
4589 /*!
4590  Function calculates number of times when bit value changed
4591  (1-0 or 0-1) in the bit block.
4592 
4593  @param block - bit-block start pointer
4594  @return number of 1-0, 0-1 transitions
4595 
4596  @ingroup bitfunc
4597 */
4598 inline
4600 {
4601 #if defined(VECT_BLOCK_CHANGE)
4602  return VECT_BLOCK_CHANGE(block, bm::set_block_size);
4603 #else
4605 #endif
4606 }
4607 
4608 /*!
4609  Check if all bits are 1 in [left, right] range
4610  @ingroup bitfunc
4611 */
4612 inline
4614  bm::word_t left,
4615  bm::word_t right) BMNOEXCEPT
4616 {
4617  BM_ASSERT(left <= right);
4618  BM_ASSERT(right <= bm::gap_max_bits-1);
4619 
4620  unsigned nword, nbit, bitcount, temp;
4621  nbit = left & bm::set_word_mask;
4622  const bm::word_t* word =
4623  block + (nword = unsigned(left >> bm::set_word_shift));
4624  if (left == right) // special case (only 1 bit to check)
4625  return (*word >> nbit) & 1u;
4626 
4627  if (nbit) // starting position is not aligned
4628  {
4629  unsigned right_margin = nbit + right - left;
4630  if (right_margin < 32)
4631  {
4632  unsigned mask =
4634  block_set_table<true>::_left[right_margin];
4635  return mask == (*word & mask);
4636  }
4637  temp = *word & block_set_table<true>::_right[nbit];
4638  if (temp != block_set_table<true>::_right[nbit])
4639  return false;
4640  bitcount = (right - left + 1u) - (32 - nbit);
4641  ++word;
4642  }
4643  else
4644  {
4645  bitcount = right - left + 1u;
4646  }
4647 
4648  // now when we are word aligned, we can scan the bit-stream
4649  const bm::id64_t maskFF64 = ~0ull;
4650  const bm::word_t maskFF = ~0u;
4651  // loop unrolled to evaluate 4 words at a time
4652  // SIMD showed no advantage, unless evaluate sub-wave intervals
4653  //
4654  for ( ;bitcount >= 128; bitcount-=128, word+=4)
4655  {
4656  bm::id64_t w64_0 = bm::id64_t(word[0]) + (bm::id64_t(word[1]) << 32);
4657  bm::id64_t w64_1 = bm::id64_t(word[2]) + (bm::id64_t(word[3]) << 32);
4658  if ((w64_0 ^ maskFF64) | (w64_1 ^ maskFF64))
4659  return false;
4660  } // for
4661 
4662  for ( ;bitcount >= 32; bitcount-=32, ++word)
4663  {
4664  if (*word != maskFF)
4665  return false;
4666  } // for
4667  BM_ASSERT(bitcount < 32);
4668 
4669  if (bitcount) // we have a tail to count
4670  {
4671  temp = *word & block_set_table<true>::_left[bitcount-1];
4672  if (temp != block_set_table<true>::_left[bitcount-1])
4673  return false;
4674  }
4675 
4676  return true;
4677 }
4678 
4679 
4680 
4681 
4682 /*!
4683  Function calculates number of 1 bits in the given array of words in
4684  the range between left anf right bits (borders included)
4685  Make sure the addr is aligned.
4686 
4687  @ingroup bitfunc
4688 */
4689 inline
4691  bm::word_t left,
4692  bm::word_t right) BMNOEXCEPT
4693 {
4694  BM_ASSERT(left <= right);
4695  BM_ASSERT(right <= bm::gap_max_bits-1);
4696 
4697  unsigned nword, nbit, bitcount, count;
4698  nbit = left & bm::set_word_mask;
4699  const bm::word_t* word =
4700  block + (nword = unsigned(left >> bm::set_word_shift));
4701  if (left == right) // special case (only 1 bit to check)
4702  {
4703  return (*word >> nbit) & 1u;
4704  }
4705 
4706  count = 0;
4707  bitcount = right - left + 1u;
4708  if (nbit) // starting position is not aligned
4709  {
4710  unsigned right_margin = nbit + right - left;
4711  if (right_margin < 32)
4712  {
4713  unsigned mask =
4715  block_set_table<true>::_left[right_margin];
4716  return bm::word_bitcount(*word & mask);
4717  }
4718  count = bm::word_bitcount(*word & block_set_table<true>::_right[nbit]);
4719  bitcount -= 32 - nbit;
4720  ++word;
4721  }
4722 
4723  // now when we are word aligned, we can count bits the usual way
4724  //
4725  #if defined(BM64_SSE4) || defined(BM64_AVX2) || defined(BM64_AVX512)
4726  for ( ;bitcount >= 64; bitcount-=64)
4727  {
4728  bm::id64_t w64 = *((bm::id64_t*)word); // x86 unaligned(!) read
4729  count += unsigned(_mm_popcnt_u64(w64));
4730  word += 2;
4731  }
4732  if (bitcount >= 32)
4733  {
4734  count += bm::word_bitcount(*word++);
4735  bitcount-=32;
4736  }
4737  #else
4738  for ( ;bitcount >= 32; bitcount-=32, ++word)
4739  count += bm::word_bitcount(*word);
4740  #endif
4741  BM_ASSERT(bitcount < 32);
4742 
4743  if (bitcount) // we have a tail to count
4744  {
4745  count += bm::word_bitcount(
4746  *word & block_set_table<true>::_left[bitcount-1]);
4747  }
4748  return count;
4749 }
4750 
4751 /*!
4752  Function calculates number of 1 bits in the given array of words in
4753  the range between 0 anf right bits (borders included)
4754  Make sure the addr is aligned.
4755 
4756  @ingroup bitfunc
4757 */
4758 inline
4760  bm::word_t right) BMNOEXCEPT
4761 {
4762  BM_ASSERT(block);
4763  if (!right) // special case, first bit check
4764  return *block & 1u;
4765  bm::id_t count = 0;
4766 
4767  unsigned bitcount = right + 1;
4768 
4769  // AVX2 or 64-bit loop unroll
4770  #if defined(BMAVX2OPT) || defined(BMAVX512OPT)
4771  BM_AVX2_POPCNT_PROLOG
4772 
4773  __m256i cnt = _mm256_setzero_si256();
4774  bm::id64_t* cnt64;
4775 
4776  for ( ;bitcount >= 256; bitcount -= 256)
4777  {
4778  const __m256i* src = (__m256i*)block;
4779  __m256i xmm256 = _mm256_load_si256(src);
4780  BM_AVX2_BIT_COUNT(bc, xmm256);
4781  cnt = _mm256_add_epi64(cnt, bc);
4782 
4783  block += 8;
4784  }
4785  cnt64 = (bm::id64_t*)&cnt;
4786  count += (unsigned)(cnt64[0] + cnt64[1] + cnt64[2] + cnt64[3]);
4787  #endif
4788 
4789  for ( ;bitcount >= 64; bitcount -= 64)
4790  {
4791  bm::id64_t* p = (bm::id64_t*)block;
4792  count += bm::word_bitcount64(*p);
4793  block += 2;
4794  }
4795  if (bitcount >= 32)
4796  {
4797  count += bm::word_bitcount(*block++);
4798  bitcount-=32;
4799  }
4800 
4801  if (bitcount) // we have a tail to count
4802  {
4803  count +=
4804  bm::word_bitcount(*block & block_set_table<true>::_left[bitcount-1]);
4805  }
4806  return count;
4807 }
4808 
4809 
4810 
4811 /*!
4812  Cyclic rotation of bit-block left by 1 bit
4813  @ingroup bitfunc
4814 */
4815 inline
4817 {
4818  bm::word_t co_flag = (block[0] >> 31) & 1; // carry over bit
4819  for (unsigned i = 0; i < bm::set_block_size-1; ++i)
4820  {
4821  block[i] = (block[i] << 1) | (block[i + 1] >> 31);
4822  }
4823  block[set_block_size - 1] = (block[set_block_size - 1] << 1) | co_flag;
4824 }
4825 
4826 /*!
4827  @brief Unrolled cyclic rotation of bit-block left by 1 bit
4828  @param block - bit-block pointer
4829  @ingroup bitfunc
4830 */
4831 inline
4833 {
4834  bm::word_t co_flag = (block[0] >> 31) & 1; // carry over bit
4835  const unsigned unroll_factor = 4;
4836  bm::word_t w0, w1, w2, w3;
4837 
4838  unsigned i;
4839  for (i = 0; i < bm::set_block_size - unroll_factor; i += unroll_factor)
4840  {
4841  w0 = block[i + 1] >> 31;
4842  w1 = block[i + 2] >> 31;
4843  w2 = block[i + 3] >> 31;
4844  w3 = block[i + 4] >> 31;
4845 
4846  block[0 + i] = (block[0 + i] << 1) | w0;
4847  block[1 + i] = (block[1 + i] << 1) | w1;
4848  block[2 + i] = (block[2 + i] << 1) | w2;
4849  block[3 + i] = (block[3 + i] << 1) | w3;
4850  }
4851  block[i] = (block[i] << 1) | (block[i + 1] >> 31);
4852  block[i + 1] = (block[i + 1] << 1) | (block[i + 2] >> 31);
4853  block[i + 2] = (block[i + 2] << 1) | (block[i + 3] >> 31);
4854  block[set_block_size - 1] = (block[set_block_size - 1] << 1) | co_flag;
4855 }
4856 
4857 /*!
4858  @brief insert bit into position and shift the rest right with carryover
4859 
4860  @param block - bit-block pointer
4861  @param bitpos - bit position to insert
4862  @param value - bit value (0|1) to insert
4863 
4864  @return carry over value
4865  @ingroup bitfunc
4866 */
4867 inline
4869  unsigned bitpos, bool value) BMNOEXCEPT
4870 {
4871  BM_ASSERT(block);
4872  BM_ASSERT(bitpos < 65536);
4873 
4874  unsigned nbit = unsigned(bitpos & bm::set_block_mask);
4875  unsigned nword = unsigned(nbit >> bm::set_word_shift);
4876  nbit &= bm::set_word_mask;
4877 
4878  bm::word_t co_flag = 0;
4879  if (nbit)
4880  {
4881  unsigned r_mask = bm::block_set_table<true>::_right[nbit];
4882  bm::word_t w = block[nword] & r_mask;
4883  bm::word_t wl = block[nword] & ~r_mask;
4884  co_flag = w >> 31;
4885  w <<= 1u;
4886  block[nword] = w | (unsigned(value) << nbit) | wl;
4887  ++nword;
4888  }
4889  else
4890  {
4891  co_flag = value;
4892  }
4893 
4894  for (unsigned i = nword; i < bm::set_block_size; ++i)
4895  {
4896  bm::word_t w = block[i];
4897  bm::word_t co_flag1 = w >> 31;
4898  w = (w << 1u) | co_flag;
4899  block[i] = w;
4900  co_flag = co_flag1;
4901  } // for i
4902  return co_flag;
4903 }
4904 
4905 
4906 
4907 /*!
4908  @brief Right bit-shift bitblock by 1 bit (reference)
4909  @param block - bit-block pointer
4910  @param empty_acc - [out] contains 0 if block is empty
4911  @param co_flag - carry over from the previous block
4912 
4913  @return carry over bit (1 or 0)
4914  @ingroup bitfunc
4915 */
4916 inline
4918  bm::word_t* BMRESTRICT empty_acc,
4919  bm::word_t co_flag) BMNOEXCEPT
4920 {
4921  BM_ASSERT(block);
4922  BM_ASSERT(empty_acc);
4923 
4924  bm::word_t acc = 0;
4925  for (unsigned i = 0; i < bm::set_block_size; ++i)
4926  {
4927  bm::word_t w = block[i];
4928  bm::word_t co_flag1 = w >> 31;
4929  acc |= w = (w << 1u) | co_flag;
4930  block[i] = w;
4931  co_flag = co_flag1;
4932  }
4933  *empty_acc = acc;
4934  return co_flag;
4935 }
4936 
4937 /*!
4938  @brief Right bit-shift of bit-block by 1 bit (loop unrolled)
4939  @param block - bit-block pointer
4940  @param empty_acc - [out] contains 0 if block is empty
4941  @param co_flag - carry over from the previous block
4942 
4943  @return carry over bit (1 or 0)
4944  @ingroup bitfunc
4945 */
4946 inline
4948  bm::word_t* BMRESTRICT empty_acc,
4949  bm::word_t co_flag) BMNOEXCEPT
4950 {
4951  BM_ASSERT(block);
4952  BM_ASSERT(empty_acc);
4953  #if defined(VECT_SHIFT_R1)
4954  return VECT_SHIFT_R1(block, empty_acc, co_flag);
4955  #else
4956  return bm::bit_block_shift_r1(block, empty_acc, co_flag);
4957  #endif
4958 }
4959 
4960 
4961 /*!
4962  @brief Left bit-shift bitblock by 1 bit (reference)
4963  @param block - bit-block pointer
4964  @param empty_acc - [out] contains 0 if block is empty
4965  @param co_flag - carry over from the prev/next block
4966 
4967  @return carry over bit (1 or 0)
4968 
4969  @ingroup bitfunc
4970 */
4971 inline
4973  bm::word_t* empty_acc, bm::word_t co_flag) BMNOEXCEPT
4974 {
4975  BM_ASSERT(block);
4976  BM_ASSERT(empty_acc);
4977 
4978  bm::word_t acc = 0;
4979  for (int i = bm::set_block_size-1; i >= 0; --i)
4980  {
4981  bm::word_t w = block[i];
4982  bm::word_t co_flag1 = w & 1u;
4983  acc |= w = (w >> 1u) | (co_flag << 31u);
4984  block[i] = w;
4985  co_flag = co_flag1;
4986  }
4987 
4988  *empty_acc = acc;
4989  return co_flag;
4990 }
4991 
4992 /*!
4993  @brief Left bit-shift of bit-block by 1 bit (loop unrolled)
4994  @param block - bit-block pointer
4995  @param empty_acc - [out] contains 0 if block is empty
4996  @param co_flag - carry over from the prev/next block
4997 
4998  @return carry over bit (1 or 0)
4999  @ingroup bitfunc
5000 */
5001 inline
5003  bm::word_t* empty_acc,
5004  bm::word_t co_flag) BMNOEXCEPT
5005 {
5006  BM_ASSERT(block);
5007  BM_ASSERT(empty_acc);
5008  #if defined(VECT_SHIFT_L1)
5009  return VECT_SHIFT_L1(block, empty_acc, co_flag);
5010  #else
5011  return bm::bit_block_shift_l1(block, empty_acc, co_flag);
5012  #endif
5013 }
5014 
5015 /*!
5016  @brief erase bit from position and shift the rest right with carryover
5017 
5018  @param block - bit-block pointer
5019  @param bitpos - bit position to insert
5020  @param carry_over - bit value to add to the end (0|1)
5021 
5022  @ingroup bitfunc
5023 */
5024 inline
5026  unsigned bitpos,
5027  bool carry_over) BMNOEXCEPT
5028 {
5029  BM_ASSERT(block);
5030  BM_ASSERT(bitpos < 65536);
5031 
5032  if (!bitpos)
5033  {
5034  bm::word_t acc;
5035  bm::bit_block_shift_l1_unr(block, &acc, carry_over);
5036  return;
5037  }
5038 
5039  unsigned nbit = unsigned(bitpos & bm::set_block_mask);
5040  unsigned nword = unsigned(nbit >> bm::set_word_shift);
5041  nbit &= bm::set_word_mask;
5042 
5043  bm::word_t co_flag = carry_over;
5044  for (unsigned i = bm::set_block_size-1; i > nword; --i)
5045  {
5046  bm::word_t w = block[i];
5047  bm::word_t co_flag1 = w & 1u;
5048  w = (w >> 1u) | (co_flag << 31u);
5049  block[i] = w;
5050  co_flag = co_flag1;
5051  }
5052 
5053  if (nbit)
5054  {
5055  unsigned r_mask = bm::block_set_table<true>::_right[nbit];
5056  bm::word_t w = block[nword] & r_mask;
5057  bm::word_t wl = block[nword] & ~r_mask;
5058  w &= ~(1 << nbit); // clear the removed bit
5059  w >>= 1u;
5060  w |= wl | (co_flag << 31u);
5061  block[nword] = w;
5062  }
5063  else
5064  {
5065  block[nword] = (block[nword] >> 1u) | (co_flag << 31u);
5066  }
5067 }
5068 
5069 
5070 /*!
5071  @brief Right bit-shift of bit-block by 1 bit (reference) + AND
5072  @param block - bit-block pointer
5073  @param co_flag - carry over from the previous block
5074  @param mask_block - mask bit-block pointer
5075  @param digest - block digest
5076 
5077  @return carry over bit (1 or 0)
5078  @ingroup bitfunc
5079 */
5080 inline
5082  bm::word_t co_flag,
5083  const bm::word_t* BMRESTRICT mask_block,
5085 {
5086  BM_ASSERT(block);
5087  BM_ASSERT(mask_block);
5088  BM_ASSERT(digest && *digest);
5089 
5090 
5091  bm::id64_t d = *digest;
5092 
5093  unsigned di = 0;
5094  if (!co_flag)
5095  {
5096  bm::id64_t t = d & -d;
5097  di = bm::word_bitcount64(t - 1); // find start bit-index
5098  }
5099 
5100  for (; di < 64; ++di)
5101  {
5102  const unsigned d_base = di * bm::set_block_digest_wave_size;
5103  bm::id64_t dmask = (1ull << di);
5104  if (d & dmask) // digest stride not empty
5105  {
5106  bm::word_t acc = 0;
5107  for (unsigned i = d_base; i < d_base + bm::set_block_digest_wave_size; ++i)
5108  {
5110 
5111  bm::word_t w = block[i];
5112  bm::word_t co_flag1 = w >> 31;
5113  w = (w << 1u) | co_flag;
5114  acc |= block[i] = w & mask_block[i];
5115  co_flag = co_flag1;
5116  }
5117  if (!acc)
5118  d &= ~dmask; // update digest: clear stride bit
5119  }
5120  else // stride is empty
5121  {
5122  BM_ASSERT(block[d_base + bm::set_block_digest_wave_size -1]==0);
5123  BM_ASSERT(block[d_base]==0);
5124 
5125  if (co_flag) // there is carry-over
5126  {
5127  BM_ASSERT(co_flag == 1);
5128  BM_ASSERT(block[d_base] == 0);
5129 
5130  block[d_base] = co_flag & mask_block[d_base];
5131  if (block[d_base])
5132  d |= dmask; // update digest
5133  co_flag = 0;
5134  }
5135  }
5136  } // for di
5137 
5138  *digest = d;
5139  return co_flag;
5140 }
5141 
5142 /*!
5143  @brief Right bit-shift bitblock by 1 bit (reference) + AND
5144  @param block - bit-block pointer
5145  @param co_flag - carry over from the previous block
5146  @param mask_block - mask bit-block pointer
5147  @param digest - block digest
5148 
5149  @return carry over bit (1 or 0)
5150  @ingroup bitfunc
5151 */
5152 inline
5154  bm::word_t co_flag,
5155  const bm::word_t* BMRESTRICT mask_block,
5157 {
5158  BM_ASSERT(block);
5159  BM_ASSERT(mask_block);
5160  BM_ASSERT(digest);
5161 
5162  #if defined(VECT_SHIFT_R1_AND)
5163  return VECT_SHIFT_R1_AND(block, co_flag, mask_block, digest);
5164  #else
5165  return bm::bit_block_shift_r1_and(block, co_flag, mask_block, digest);
5166  #endif
5167 }
5168 
5169 
5170 /*!
5171  Function calculates if there is any number of 1 bits
5172  in the given array of words in the range between left anf right bits
5173  (borders included). Make sure the addresses are aligned.
5174 
5175  @ingroup bitfunc
5176 */
5177 inline
5179  bm::word_t left,
5180  bm::word_t right) BMNOEXCEPT
5181 {
5182  BM_ASSERT(left <= right);
5183 
5184  unsigned nbit = left; // unsigned(left & bm::set_block_mask);
5185  unsigned nword = unsigned(nbit >> bm::set_word_shift);
5186  nbit &= bm::set_word_mask;
5187 
5188  const bm::word_t* word = block + nword;
5189 
5190  if (left == right) // special case (only 1 bit to check)
5191  {
5192  return (*word >> nbit) & 1;
5193  }
5194  unsigned acc;
5195  unsigned bitcount = right - left + 1;
5196 
5197  if (nbit) // starting position is not aligned
5198  {
5199  unsigned right_margin = nbit + (right - left);
5200  if (right_margin < 32)
5201  {
5202  unsigned mask =
5204  block_set_table<true>::_left[right_margin];
5205  return *word & mask;
5206  }
5207  else
5208  {
5209  acc = *word & block_set_table<true>::_right[nbit];
5210  if (acc)
5211  return acc;
5212  bitcount -= 32 - nbit;
5213  }
5214  ++word;
5215  }
5216 
5217  // loop unrolled to evaluate 4 words at a time
5218  // SIMD showed no advantage, unless evaluate sub-wave intervals
5219  //
5220  for ( ;bitcount >= 128; bitcount-=128, word+=4)
5221  {
5222  acc = word[0] | word[1] | word[2] | word[3];
5223  if (acc)
5224  return acc;
5225  } // for
5226 
5227  acc = 0;
5228  for ( ;bitcount >= 32; bitcount -= 32)
5229  {
5230  acc |= *word++;
5231  } // for
5232 
5233  if (bitcount) // we have a tail to count
5234  acc |= (*word) & block_set_table<true>::_left[bitcount-1];
5235 
5236  return acc;
5237 }
5238 
5239 // ----------------------------------------------------------------------
5240 
5241 /*! Function inverts block of bits
5242  @ingroup bitfunc
5243 */
5244 template<typename T>
5245 void bit_invert(T* start) BMNOEXCEPT
5246 {
5248 #ifdef BMVECTOPT
5249  VECT_INVERT_BLOCK(start);
5250 #else
5251  T* end = (T*)((unsigned*)(start) + bm::set_block_size);
5252  do
5253  {
5254  start[0] = ~start[0];
5255  start[1] = ~start[1];
5256  start[2] = ~start[2];
5257  start[3] = ~start[3];
5258  start+=4;
5259  } while (start < end);
5260 #endif
5261 }
5262 
5263 // ----------------------------------------------------------------------
5264 
5265 /*! @brief Returns "true" if all bits in the block are 1
5266  @ingroup bitfunc
5267 */
5268 inline
5270 {
5271 #if defined(BMSSE42OPT) || defined(BMAVX2OPT)
5272  return VECT_IS_ONE_BLOCK(start);
5273 #else
5274  const bm::word_t* BMRESTRICT src_end = (bm::word_t*)start + bm::set_block_size;
5275  const bm::wordop_t* end = (const bm::wordop_t*)src_end;
5276  do
5277  {
5278  bm::wordop_t tmp =
5279  start[0] & start[1] & start[2] & start[3];
5280  if (tmp != bm::all_bits_mask)
5281  return false;
5282  start += 4;
5283  } while (start < end);
5284  return true;
5285 #endif
5286 }
5287 
5288 // ----------------------------------------------------------------------
5289 
5290 /*! @brief Returns "true" if all bits are 1 in the block [left, right]
5291  Function check for block varieties
5292  @internal
5293 */
5294 inline
5296  unsigned left, unsigned right) BMNOEXCEPT
5297 {
5298  BM_ASSERT(left <= right);
5299  BM_ASSERT(right < bm::gap_max_bits);
5300  if (block)
5301  {
5302  if (BM_IS_GAP(block))
5303  return bm::gap_is_all_one_range(BMGAP_PTR(block), left, right);
5304  if (block == FULL_BLOCK_FAKE_ADDR)
5305  return true;
5306  return bm::bit_block_is_all_one_range(block, left, right);
5307  }
5308  return false;
5309 }
5310 
5311 /*! @brief Returns "true" if all bits are 1 in the block [left, right]
5312  and border bits are 0
5313  @internal
5314 */
5315 inline
5316 bool block_is_interval(const bm::word_t* const BMRESTRICT block,
5317  unsigned left, unsigned right) BMNOEXCEPT
5318 {
5319  BM_ASSERT(left <= right);
5320  BM_ASSERT(right < bm::gap_max_bits-1);
5321 
5322  if (block)
5323  {
5324  bool is_left, is_right, all_one;
5325  if (BM_IS_GAP(block))
5326  {
5327  const bm::gap_word_t* gap = BMGAP_PTR(block);
5328  all_one = bm::gap_is_interval(gap, left, right);
5329  return all_one;
5330  }
5331  else // bit-block
5332  {
5333  if (block == FULL_BLOCK_FAKE_ADDR)
5334  return false;
5335  unsigned nword = ((left-1) >> bm::set_word_shift);
5336  is_left = block[nword] & (1u << ((left-1) & bm::set_word_mask));
5337  if (is_left == false)
5338  {
5339  nword = ((right + 1) >> bm::set_word_shift);
5340  is_right = block[nword] & (1u << ((right + 1) & bm::set_word_mask));
5341  if (is_right == false)
5342  {
5343  all_one = bm::bit_block_is_all_one_range(block, left, right);
5344  return all_one;
5345  }
5346  }
5347  }
5348  }
5349 
5350  return false;
5351 }
5352 
5353 // ----------------------------------------------------------------------
5354 
5355 /**
5356  \brief Searches for the last 1 bit in the 111 interval of a BIT block
5357  \param block - BIT buffer
5358  \param nbit - bit index to start checking from
5359  \param pos - [out] found value
5360 
5361  \return false if not found
5362  @ingroup bitfunc
5363 */
5364 inline
5366  unsigned nbit, unsigned* BMRESTRICT pos) BMNOEXCEPT
5367 {
5368  BM_ASSERT(block);
5369  BM_ASSERT(pos);
5370 
5371  unsigned nword = unsigned(nbit >> bm::set_word_shift);
5372  unsigned bit_pos = (nbit & bm::set_word_mask);
5373  bm::word_t w = block[nword];
5374  w &= (1u << bit_pos);
5375  if (!w)
5376  return false;
5377 
5378  if (nbit == bm::gap_max_bits-1)
5379  {
5380  *pos = bm::gap_max_bits-1;
5381  return true;
5382  }
5383  *pos = nbit;
5384 
5385  ++nbit;
5386  nword = unsigned(nbit >> bm::set_word_shift);
5387  bit_pos = (nbit & bm::set_word_mask);
5388 
5389  w = (~block[nword]) >> bit_pos;
5390  w <<= bit_pos; // clear the trailing bits
5391  if (w)
5392  {
5393  bit_pos = bm::bit_scan_forward32(w); // trailing zeros
5394  *pos = unsigned(bit_pos + (nword * 8u * unsigned(sizeof(bm::word_t)))-1);
5395  return true;
5396  }
5397 
5398  for (++nword; nword < bm::set_block_size; ++nword)
5399  {
5400  w = ~block[nword];
5401  if (w)
5402  {
5403  bit_pos = bm::bit_scan_forward32(w); // trailing zeros
5404  *pos = unsigned(bit_pos + (nword * 8u * unsigned(sizeof(bm::word_t)))-1);
5405  return true;
5406  }
5407  } // for nword
5408 
5409  // 0 not found, all block is 1s...
5410  *pos = bm::gap_max_bits-1;
5411  return true;
5412 }
5413 
5414 
5415 /*! @brief Find end of the current 111 interval
5416  @return search result code 0 - not found, 1 found, 2 - found at the end
5417  @internal
5418 */
5419 inline
5421  unsigned nbit_from,
5422  unsigned* BMRESTRICT found_nbit) BMNOEXCEPT
5423 {
5424  BM_ASSERT(block && found_nbit);
5425  BM_ASSERT(nbit_from < bm::gap_max_bits);
5426 
5427  bool b;
5428  if (BM_IS_GAP(block))
5429  {
5430  const bm::gap_word_t* gap = BMGAP_PTR(block);
5431  b = bm::gap_find_interval_end(gap, nbit_from, found_nbit);
5432  if (b && *found_nbit == bm::gap_max_bits-1)
5433  return 2; // end of block, keep searching
5434  }
5435  else // bit-block
5436  {
5437  if (IS_FULL_BLOCK(block))
5438  {
5439  *found_nbit = bm::gap_max_bits-1;
5440  return 2;
5441  }
5442  b = bm::bit_block_find_interval_end(block, nbit_from, found_nbit);
5443  if (b && *found_nbit == bm::gap_max_bits-1)
5444  return 2; // end of block, keep searching
5445  }
5446  return b;
5447 }
5448 
5449 // ----------------------------------------------------------------------
5450 
5451 /**
5452  \brief Searches for the first 1 bit in the 111 interval of a BIT block
5453  \param block - BIT buffer
5454  \param nbit - bit index to start checking from
5455  \param pos - [out] found value
5456 
5457  \return false if not found
5458  @ingroup bitfunc
5459 */
5460 inline
5462  unsigned nbit, unsigned* BMRESTRICT pos) BMNOEXCEPT
5463 {
5464  BM_ASSERT(block);
5465  BM_ASSERT(pos);
5466 
5467  unsigned nword = unsigned(nbit >> bm::set_word_shift);
5468  unsigned bit_pos = (nbit & bm::set_word_mask);
5469  bm::word_t w = block[nword];
5470  w &= (1u << bit_pos);
5471  if (!w)
5472  return false;
5473 
5474  if (nbit == 0)
5475  {
5476  *pos = 0;
5477  return true;
5478  }
5479  *pos = nbit;
5480 
5481  --nbit;
5482  nword = unsigned(nbit >> bm::set_word_shift);
5483  bit_pos = (nbit & bm::set_word_mask);
5484 
5485  w = (~block[nword]) & block_set_table<true>::_left[bit_pos];
5486  if (w)
5487  {
5488  bit_pos = bm::bit_scan_reverse32(w);
5489  *pos = unsigned(bit_pos + (nword * 8u * unsigned(sizeof(bm::word_t)))+1);
5490  return true;
5491  }
5492 
5493  if (nword)
5494  {
5495  for (--nword; true; --nword)
5496  {
5497  w = ~block[nword];
5498  if (w)
5499  {
5500  bit_pos = bm::bit_scan_reverse32(w); // trailing zeros
5501  *pos = unsigned(bit_pos + (nword * 8u * unsigned(sizeof(bm::word_t)))+1);
5502  return true;
5503  }
5504  if (!nword)
5505  break;
5506  } // for nword
5507  }
5508 
5509  // 0 not found, all block is 1s...
5510  *pos = 0;
5511  return true;
5512 }
5513 
5514 
5515 /*! @brief Find start of the current 111 interval
5516  @return search result code 0 - not found, 1 found, 2 - found at the start
5517  @internal
5518 */
5519 inline
5521  unsigned nbit_from,
5522  unsigned* BMRESTRICT found_nbit) BMNOEXCEPT
5523 {
5524  BM_ASSERT(block && found_nbit);
5525  BM_ASSERT(nbit_from < bm::gap_max_bits);
5526  bool b;
5527  if (BM_IS_GAP(block))
5528  {
5529  const bm::gap_word_t* gap = BMGAP_PTR(block);
5530  b = bm::gap_find_interval_start(gap, nbit_from, found_nbit);
5531  if (b && *found_nbit == 0)
5532  return 2; // start of block, keep searching
5533  }
5534  else // bit-block
5535  {
5536  if (IS_FULL_BLOCK(block))
5537  {
5538  *found_nbit = 0;
5539  return 2;
5540  }
5541  b = bm::bit_block_find_interval_start(block, nbit_from, found_nbit);
5542  if (b && *found_nbit == 0)
5543  return 2; // start of block, keep searching
5544  }
5545  return b;
5546 }
5547 
5548 // ----------------------------------------------------------------------
5549 
5550 /*! @brief Returns "true" if one bit is set in the block [left, right]
5551  Function check for block varieties
5552  @internal
5553 */
5554 inline
5555 bool block_any_range(const bm::word_t* const BMRESTRICT block,
5556  unsigned left, unsigned right) BMNOEXCEPT
5557 {
5558  BM_ASSERT(left <= right);
5559  BM_ASSERT(right < bm::gap_max_bits);
5560  if (!block)
5561  return false;
5562  if (BM_IS_GAP(block))
5563  return bm::gap_any_range(BMGAP_PTR(block), left, right);
5564  if (IS_FULL_BLOCK(block))
5565  return true;
5566  return bm::bit_block_any_range(block, left, right);
5567 }
5568 
5569 // ----------------------------------------------------------------------
5570 
5571 /*! @brief Returns "true" if one bit is set in the block
5572  Function check for block varieties
5573  @internal
5574 */
5575 inline
5576 bool block_any(const bm::word_t* const BMRESTRICT block) BMNOEXCEPT
5577 {
5578  if (!block)
5579  return false;
5580  if (IS_FULL_BLOCK(block))
5581  return true;
5582  bool all_zero = (BM_IS_GAP(block)) ?
5584  : bm::bit_is_all_zero(block);
5585  return !all_zero;
5586 }
5587 
5588 
5589 
5590 // ----------------------------------------------------------------------
5591 
5592 // GAP blocks manipulation functions:
5593 
5594 
5595 /*!
5596  \brief GAP AND operation.
5597 
5598  Function performs AND logical operation on gap vectors.
5599  If possible function put the result into vect1 and returns this
5600  pointer. Otherwise result is put into tmp_buf, which should be
5601  twice of the vector size.
5602 
5603  \param vect1 - operand 1
5604  \param vect2 - operand 2
5605  \param tmp_buf - pointer on temporary buffer
5606  \param dsize - out size of the destination
5607  \return Result pointer (tmp_buf OR vect1)
5608 
5609  @ingroup gapfunc
5610 */
5611 inline
5613  const gap_word_t* BMRESTRICT vect2,
5614  gap_word_t* BMRESTRICT tmp_buf,
5615  unsigned& dsize) BMNOEXCEPT
5616 {
5617  bm::gap_buff_op<bm::gap_word_t, bm::and_func>(
5618  tmp_buf, vect1, 0, vect2, 0, dsize);
5619  return tmp_buf;
5620 }
5621 
5622 /*!
5623  \brief GAP AND operation test.
5624 
5625  Function performs AND logical operation on gap vectors.
5626  If possible function put the result into vect1 and returns this
5627  pointer. Otherwise result is put into tmp_buf, which should be
5628  twice of the vector size.
5629 
5630  \param vect1 - operand 1
5631  \param vect2 - operand 2
5632  \return non zero value if operation returns any 1 bit
5633 
5634  @ingroup gapfunc
5635 */
5636 inline
5638  const gap_word_t* BMRESTRICT vect2) BMNOEXCEPT
5639 {
5640  return gap_buff_any_op<bm::gap_word_t, bm::and_func>(vect1, 0, vect2, 0);
5641 }
5642 
5643 
5644 /*!
5645  \brief GAP bitcount AND operation test.
5646 
5647  \param vect1 - operand 1
5648  \param vect2 - operand 2
5649  \return bitcount of vect1 AND vect2
5650 
5651  @ingroup gapfunc
5652 */
5653 inline
5654 unsigned gap_count_and(const gap_word_t* BMRESTRICT vect1,
5655  const gap_word_t* BMRESTRICT vect2) BMNOEXCEPT
5656 {
5657  return bm::gap_buff_count_op<bm::gap_word_t, bm::and_func>(vect1, vect2);
5658 }
5659 
5660 
5661 
5662 /*!
5663  \brief GAP XOR operation.
5664 
5665  Function performs XOR logical operation on gap vectors.
5666  If possible function put the result into vect1 and returns this
5667  pointer. Otherwise result is put into tmp_buf, which should be
5668  twice of the vector size.
5669 
5670  \param vect1 - operand 1
5671  \param vect2 - operand 2
5672  \param tmp_buf - pointer on temporary buffer
5673  \param dsize - out destination size
5674  \return Result pointer (tmp_buf)
5675 
5676  @ingroup gapfunc
5677 */
5678 inline
5680  const gap_word_t* BMRESTRICT vect2,
5681  gap_word_t* BMRESTRICT tmp_buf,
5682  unsigned& dsize) BMNOEXCEPT
5683 {
5684  bm::gap_buff_op<bm::gap_word_t, bm::xor_func>(
5685  tmp_buf, vect1, 0, vect2, 0, dsize);
5686  return tmp_buf;
5687 }
5688 
5689 /*! Light weight gap_operation_xor for len prediction
5690  @ingroup gapfunc
5691 */
5692 inline
5694  const gap_word_t* BMRESTRICT vect2,
5695  unsigned& dsize,
5696  unsigned limit) BMNOEXCEPT
5697 {
5698  return
5699  bm::gap_buff_dry_op<bm::gap_word_t, bm::xor_func>(vect1, vect2, dsize, limit);
5700 }
5701 
5702 
5703 /*!
5704  \brief GAP XOR operation test.
5705 
5706  Function performs AND logical operation on gap vectors.
5707  If possible function put the result into vect1 and returns this
5708  pointer. Otherwise result is put into tmp_buf, which should be
5709  twice of the vector size.
5710 
5711  \param vect1 - operand 1
5712  \param vect2 - operand 2
5713  \return non zero value if operation returns any 1 bit
5714 
5715  @ingroup gapfunc
5716 */
5719  const gap_word_t* BMRESTRICT vect2) BMNOEXCEPT
5720 {
5721  return gap_buff_any_op<bm::gap_word_t, bm::xor_func>(vect1, 0, vect2, 0);
5722 }
5723 
5724 /*!
5725  \brief GAP bitcount XOR operation test.
5726 
5727  \param vect1 - operand 1
5728  \param vect2 - operand 2
5729  \return bitcount of vect1 XOR vect2
5730 
5731  @ingroup gapfunc
5732 */
5734 unsigned gap_count_xor(const gap_word_t* BMRESTRICT vect1,
5735  const gap_word_t* BMRESTRICT vect2) BMNOEXCEPT
5736 {
5737  return bm::gap_buff_count_op<bm::gap_word_t, bm::xor_func>(vect1, vect2);
5738 }
5739 
5740 
5741 /*!
5742  \brief GAP OR operation.
5743 
5744  Function performs OR logical oparation on gap vectors.
5745  If possible function put the result into vect1 and returns this
5746  pointer. Otherwise result is put into tmp_buf, which should be
5747  twice of the vector size.
5748 
5749  \param vect1 - operand 1
5750  \param vect2 - operand 2
5751  \param tmp_buf - pointer on temporary buffer
5752  \param dsize - out destination size
5753 
5754  \return Result pointer (tmp_buf)
5755 
5756  @ingroup gapfunc
5757 */
5758 inline
5760  const gap_word_t* BMRESTRICT vect2,
5761  gap_word_t* BMRESTRICT tmp_buf,
5762  unsigned& dsize) BMNOEXCEPT
5763 {
5764  bm::gap_buff_op<bm::gap_word_t, bm::and_func>(tmp_buf, vect1, 1, vect2, 1, dsize);
5765  bm::gap_invert(tmp_buf);
5766  return tmp_buf;
5767 }
5768 
5769 /*!
5770  \brief GAP bitcount OR operation test.
5771 
5772  \param vect1 - operand 1
5773  \param vect2 - operand 2
5774  \return bitcount of vect1 OR vect2
5775 
5776  @ingroup gapfunc
5777 */
5779 unsigned gap_count_or(const gap_word_t* BMRESTRICT vect1,
5780  const gap_word_t* BMRESTRICT vect2) BMNOEXCEPT
5781 {
5782  return gap_buff_count_op<bm::gap_word_t, bm::or_func>(vect1, vect2);
5783 }
5784 
5785 
5786 
5787 /*!
5788  \brief GAP SUB (AND NOT) operation.
5789 
5790  Function performs SUB logical operation on gap vectors.
5791  If possible function put the result into vect1 and returns this
5792  pointer. Otherwise result is put into tmp_buf, which should be
5793  twice of the vector size.
5794 
5795  \param vect1 - operand 1
5796  \param vect2 - operand 2
5797  \param tmp_buf - pointer on temporary buffer
5798  \param dsize - out destination size
5799 
5800  \return Result pointer (tmp_buf)
5801 
5802  @ingroup gapfunc
5803 */
5804 inline
5806  const gap_word_t* BMRESTRICT vect2,
5807  gap_word_t* BMRESTRICT tmp_buf,
5808  unsigned& dsize) BMNOEXCEPT
5809 {
5810  bm::gap_buff_op<bm::gap_word_t, bm::and_func>( // no bug here
5811  tmp_buf, vect1, 0, vect2, 1, dsize);
5812  return tmp_buf;
5813 }
5814 
5815 
5816 /*!
5817  \brief GAP SUB operation test.
5818 
5819  Function performs AND logical operation on gap vectors.
5820  If possible function put the result into vect1 and returns this
5821  pointer. Otherwise result is put into tmp_buf, which should be
5822  twice of the vector size.
5823 
5824  \param vect1 - operand 1
5825  \param vect2 - operand 2
5826  \return non zero value if operation returns any 1 bit
5827 
5828  @ingroup gapfunc
5829 */
5830 inline
5832  const gap_word_t* BMRESTRICT vect2) BMNOEXCEPT
5833 {
5834  return
5835  bm::gap_buff_any_op<bm::gap_word_t, bm::and_func>( // no bug here
5836  vect1, 0, vect2, 1);
5837 }
5838 
5839 
5840 /*!
5841 \brief GAP bitcount SUB (AND NOT) operation test.
5842 
5843 \param vect1 - operand 1
5844 \param vect2 - operand 2
5845 \return bitcount of vect1 SUB (AND NOT) vect2
5846 
5847 @ingroup gapfunc
5848 */
5850 unsigned gap_count_sub(const gap_word_t* BMRESTRICT vect1,
5851  const gap_word_t* BMRESTRICT vect2) BMNOEXCEPT
5852 {
5853  return bm::gap_buff_count_op<bm::gap_word_t, bm::sub_func>(vect1, vect2);
5854 }
5855 
5856 
5857 // ----------------------------------------------------------------------
5858 
5859 // BIT blocks manipulation functions:
5860 
5861 
5862 /*!
5863  \brief Bitblock copy operation.
5864 
5865  \param dst - destination block.
5866  \param src - source block.
5867 
5868  @ingroup bitfunc
5869 */
5870 inline
5872  const bm::word_t* BMRESTRICT src) BMNOEXCEPT
5873 {
5874 #ifdef BMVECTOPT
5875  VECT_COPY_BLOCK(dst, src);
5876 #else
5877  ::memcpy(dst, src, bm::set_block_size * sizeof(bm::word_t));
5878 #endif
5879 }
5880 
5881 /*!
5882  \brief Bitblock copy/stream operation.
5883 
5884  \param dst - destination block.
5885  \param src - source block.
5886 
5887  @ingroup bitfunc
5888 */
5889 inline
5891  const bm::word_t* BMRESTRICT src) BMNOEXCEPT
5892 {
5893 #ifdef VECT_STREAM_BLOCK
5894  VECT_STREAM_BLOCK(dst, src);
5895 #else
5896  ::memcpy(dst, src, bm::set_block_size * sizeof(bm::word_t));
5897 #endif
5898 }
5899 
5900 
5901 /*!
5902  \brief Plain bitblock AND operation.
5903  Function does not analyse availability of source and destination blocks.
5904 
5905  \param dst - destination block.
5906  \param src - source block.
5907 
5908  \return 0 if AND operation did not produce anything (no 1s in the output)
5909 
5910  @ingroup bitfunc
5911 */
5912 inline
5914  const bm::word_t* BMRESTRICT src) BMNOEXCEPT
5915 {
5916  BM_ASSERT(dst);
5917  BM_ASSERT(src);
5918  BM_ASSERT(dst != src);
5919 
5920 #ifdef BMVECTOPT
5921  bm::id64_t acc = VECT_AND_BLOCK(dst, src);
5922 #else
5923  unsigned arr_sz = bm::set_block_size / 2;
5924 
5927 
5928  bm::id64_t acc = 0;
5929  for (unsigned i = 0; i < arr_sz; i+=4)
5930  {
5931  acc |= dst_u->w64[i] &= src_u->w64[i];
5932  acc |= dst_u->w64[i+1] &= src_u->w64[i+1];
5933  acc |= dst_u->w64[i+2] &= src_u->w64[i+2];
5934  acc |= dst_u->w64[i+3] &= src_u->w64[i+3];
5935  }
5936 #endif
5937  return acc;
5938 }
5939 
5940 /*!
5941  \brief digest based bit-block AND
5942 
5943  \param dst - destination block.
5944  \param src - source block.
5945  \param digest - known digest of dst block
5946 
5947  \return new digest
5948 
5949  @ingroup bitfunc
5950 */
5951 inline
5953  const bm::word_t* BMRESTRICT src,
5954  bm::id64_t digest) BMNOEXCEPT
5955 {
5956  BM_ASSERT(dst);
5957  BM_ASSERT(src);
5958  BM_ASSERT(dst != src);
5959 
5960  const bm::id64_t mask(1ull);
5961  bm::id64_t d = digest;
5962  while (d)
5963  {
5964  bm::id64_t t = bm::bmi_blsi_u64(d); // d & -d;
5965 
5966  unsigned wave = bm::word_bitcount64(t - 1);
5967  unsigned off = wave * bm::set_block_digest_wave_size;
5968 
5969  #if defined(VECT_AND_DIGEST)
5970  bool all_zero = VECT_AND_DIGEST(&dst[off], &src[off]);
5971  if (all_zero)
5972  digest &= ~(mask << wave);
5973  #else
5974  const bm::bit_block_t::bunion_t* BMRESTRICT src_u = (const bm::bit_block_t::bunion_t*)(&src[off]);
5976 
5977  bm::id64_t acc = 0;
5978  unsigned j = 0;
5979  do
5980  {
5981  acc |= dst_u->w64[j+0] &= src_u->w64[j+0];
5982  acc |= dst_u->w64[j+1] &= src_u->w64[j+1];
5983  acc |= dst_u->w64[j+2] &= src_u->w64[j+2];
5984  acc |= dst_u->w64[j+3] &= src_u->w64[j+3];
5985  j+=4;
5986  } while (j < bm::set_block_digest_wave_size/2);
5987 
5988  if (!acc) // all zero
5989  digest &= ~(mask << wave);
5990  #endif
5991 
5992  d = bm::bmi_bslr_u64(d); // d &= d - 1;
5993  } // while
5994 
5995  return digest;
5996 }
5997 
5998 
5999 /*!
6000  \brief digest based bit-block AND 5-way
6001 
6002  \return new digest
6003 
6004  @ingroup bitfunc
6005 */
6006 inline
6008  const bm::word_t* BMRESTRICT src0,
6009  const bm::word_t* BMRESTRICT src1,
6010  const bm::word_t* BMRESTRICT src2,
6011  const bm::word_t* BMRESTRICT src3,
6012  bm::id64_t digest) BMNOEXCEPT
6013 {
6014  BM_ASSERT(dst);
6015  BM_ASSERT(src0 && src1 && src2 && src3);
6016 
6017  const bm::id64_t mask(1ull);
6018  bm::id64_t d = digest;
6019  while (d)
6020  {
6021  bm::id64_t t = bm::bmi_blsi_u64(d); // d & -d;
6022 
6023  unsigned wave = bm::word_bitcount64(t - 1);
6024  unsigned off = wave * bm::set_block_digest_wave_size;
6025 
6026 #if defined(VECT_AND_DIGEST_5WAY)
6027  bool all_zero = VECT_AND_DIGEST_5WAY(&dst[off], &src0[off], &src1[off], &src2[off], &src3[off]);
6028  if (all_zero)
6029  digest &= ~(mask << wave);
6030 #else
6031  const bm::bit_block_t::bunion_t* BMRESTRICT src_u0 = (const bm::bit_block_t::bunion_t*)(&src0[off]);
6032  const bm::bit_block_t::bunion_t* BMRESTRICT src_u1 = (const bm::bit_block_t::bunion_t*)(&src1[off]);
6033  const bm::bit_block_t::bunion_t* BMRESTRICT src_u2 = (const bm::bit_block_t::bunion_t*)(&src2[off]);
6034  const bm::bit_block_t::bunion_t* BMRESTRICT src_u3 = (const bm::bit_block_t::bunion_t*)(&src3[off]);
6036 
6037  bm::id64_t acc = 0;
6038  unsigned j = 0;
6039  do
6040  {
6041  acc |= dst_u->w64[j + 0] &= src_u0->w64[j + 0] & src_u1->w64[j + 0] & src_u2->w64[j + 0] & src_u3->w64[j + 0];
6042  acc |= dst_u->w64[j + 1] &= src_u0->w64[j + 1] & src_u1->w64[j + 1] & src_u2->w64[j + 1] & src_u3->w64[j + 1];
6043  acc |= dst_u->w64[j + 2] &= src_u0->w64[j + 2] & src_u1->w64[j + 2] & src_u2->w64[j + 2] & src_u3->w64[j + 2];
6044  acc |= dst_u->w64[j + 3] &= src_u0->w64[j + 3] & src_u1->w64[j + 3] & src_u2->w64[j + 3] & src_u3->w64[j + 3];
6045  j += 4;
6046  } while (j < bm::set_block_digest_wave_size / 2);
6047 
6048  if (!acc) // all zero
6049  digest &= ~(mask << wave);
6050 #endif
6051 
6052  d = bm::bmi_bslr_u64(d); // d &= d - 1;
6053  } // while
6054 
6055  return digest;
6056 }
6057 
6058 
6059 /*!
6060  \brief digest based bit-block AND
6061 
6062  dst = src1 AND src2
6063 
6064  \param dst - destination block.
6065  \param src1 - source block.
6066  \param src2 - source block.
6067  \param digest - known initial digest
6068 
6069  \return new digest
6070 
6071  @ingroup bitfunc
6072 */
6073 inline
6075  const bm::word_t* BMRESTRICT src1,
6076  const bm::word_t* BMRESTRICT src2,
6077  bm::id64_t digest) BMNOEXCEPT
6078 {
6079  BM_ASSERT(dst);
6080  BM_ASSERT(src1 && src2);
6081  BM_ASSERT(dst != src1 && dst != src2);
6082 
6083  const bm::id64_t mask(1ull);
6084  bm::id64_t d = digest;
6085  while (d)
6086  {
6087  bm::id64_t t = bm::bmi_blsi_u64(d); // d & -d;
6088 
6089  unsigned wave = bm::word_bitcount64(t - 1);
6090  unsigned off = wave * bm::set_block_digest_wave_size;
6091 
6092  #if defined(VECT_AND_DIGEST_2WAY)
6093  bool all_zero = VECT_AND_DIGEST_2WAY(&dst[off], &src1[off], &src2[off]);
6094  if (all_zero)
6095  digest &= ~(mask << wave);
6096  #else
6097  const bm::bit_block_t::bunion_t* BMRESTRICT src_u1 =
6098  (const bm::bit_block_t::bunion_t*)(&src1[off]);
6099  const bm::bit_block_t::bunion_t* BMRESTRICT src_u2 =
6100  (const bm::bit_block_t::bunion_t*)(&src2[off]);
6102  (bm::bit_block_t::bunion_t*)(&dst[off]);
6103 
6104  bm::id64_t acc = 0;
6105  unsigned j = 0;
6106  do
6107  {
6108  acc |= dst_u->w64[j+0] = src_u1->w64[j+0] & src_u2->w64[j+0];
6109  acc |= dst_u->w64[j+1] = src_u1->w64[j+1] & src_u2->w64[j+1];
6110  acc |= dst_u->w64[j+2] = src_u1->w64[j+2] & src_u2->w64[j+2];
6111  acc |= dst_u->w64[j+3] = src_u1->w64[j+3] & src_u2->w64[j+3];
6112  j+=4;
6113  } while (j < bm::set_block_digest_wave_size/2);
6114 
6115  if (!acc) // all zero
6116  digest &= ~(mask << wave);
6117  #endif
6118 
6119  d = bm::bmi_bslr_u64(d); // d &= d - 1;
6120  } // while
6121 
6122  return digest;
6123 }
6124 
6125 
6126 
6127 /*!
6128  \brief Function ANDs two bitblocks and computes the bitcount.
6129  Function does not analyse availability of source blocks.
6130 
6131  \param src1 - first bit block
6132  \param src2 - second bit block
6133 
6134  @ingroup bitfunc
6135 */
6136 inline
6138  const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
6139 {
6140  unsigned count;
6141  const bm::word_t* src1_end = src1 + bm::set_block_size;
6142 #ifdef BMVECTOPT
6143  count = VECT_BITCOUNT_AND(src1, src1_end, src2);
6144 #else
6145  count = 0;
6146 # ifdef BM64OPT
6147  const bm::id64_t* b1 = (bm::id64_t*) src1;
6148  const bm::id64_t* b1_end = (bm::id64_t*) src1_end;
6149  const bm::id64_t* b2 = (bm::id64_t*) src2;
6150  do
6151  {
6152  count += bitcount64_4way(b1[0] & b2[0],
6153  b1[1] & b2[1],
6154  b1[2] & b2[2],
6155  b1[3] & b2[3]);
6156  b1 += 4;
6157  b2 += 4;
6158  } while (b1 < b1_end);
6159 # else
6160  do
6161  {
6162  BM_INCWORD_BITCOUNT(count, src1[0] & src2[0]);
6163  BM_INCWORD_BITCOUNT(count, src1[1] & src2[1]);
6164  BM_INCWORD_BITCOUNT(count, src1[2] & src2[2]);
6165  BM_INCWORD_BITCOUNT(count, src1[3] & src2[3]);
6166 
6167  src1+=4;
6168  src2+=4;
6169  } while (src1 < src1_end);
6170 # endif
6171 #endif
6172  return count;
6173 }
6174 
6175 
6176 /*!
6177  \brief Function ANDs two bitblocks and tests for any bit.
6178  Function does not analyse availability of source blocks.
6179 
6180  \param src1 - first bit block
6181  \param src2 - second bit block
6182 
6183  @ingroup bitfunc
6184 */
6185 inline
6186 unsigned bit_block_and_any(const bm::word_t* src1,
6187  const bm::word_t* src2) BMNOEXCEPT
6188 {
6189  unsigned count = 0;
6190  const bm::word_t* src1_end = src1 + bm::set_block_size;
6191  do
6192  {
6193  count = (src1[0] & src2[0]) |
6194  (src1[1] & src2[1]) |
6195  (src1[2] & src2[2]) |
6196  (src1[3] & src2[3]);
6197 
6198  src1+=4; src2+=4;
6199  } while ((src1 < src1_end) && !count);
6200  return count;
6201 }
6202 
6203 
6204 
6205 
6206 /*!
6207  \brief Function XORs two bitblocks and computes the bitcount.
6208  Function does not analyse availability of source blocks.
6209 
6210  \param src1 - first bit block
6211  \param src2 - second bit block
6212 
6213  @ingroup bitfunc
6214 */
6215 inline
6217  const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
6218 {
6219  unsigned count;
6220  const bm::word_t* BMRESTRICT src1_end = src1 + bm::set_block_size;
6221 #ifdef BMVECTOPT
6222  count = VECT_BITCOUNT_XOR(src1, src1_end, src2);
6223 #else
6224  count = 0;
6225 # ifdef BM64OPT
6226  const bm::id64_t* b1 = (bm::id64_t*) src1;
6227  const bm::id64_t* b1_end = (bm::id64_t*) src1_end;
6228  const bm::id64_t* b2 = (bm::id64_t*) src2;
6229  do
6230  {
6231  count += bitcount64_4way(b1[0] ^ b2[0],
6232  b1[1] ^ b2[1],
6233  b1[2] ^ b2[2],
6234  b1[3] ^ b2[3]);
6235  b1 += 4;
6236  b2 += 4;
6237  } while (b1 < b1_end);
6238 # else
6239  do
6240  {
6241  BM_INCWORD_BITCOUNT(count, src1[0] ^ src2[0]);
6242  BM_INCWORD_BITCOUNT(count, src1[1] ^ src2[1]);
6243  BM_INCWORD_BITCOUNT(count, src1[2] ^ src2[2]);
6244  BM_INCWORD_BITCOUNT(count, src1[3] ^ src2[3]);
6245 
6246  src1+=4;
6247  src2+=4;
6248  } while (src1 < src1_end);
6249 # endif
6250 #endif
6251  return count;
6252 }
6253 
6254 
6255 /*!
6256  \brief Function XORs two bitblocks and and tests for any bit.
6257  Function does not analyse availability of source blocks.
6258 
6259  \param src1 - first bit block.
6260  \param src2 - second bit block.
6261 
6262  @ingroup bitfunc
6263 */
6264 inline
6266  const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
6267 {
6268  unsigned count = 0;
6269  const bm::word_t* BMRESTRICT src1_end = src1 + bm::set_block_size;
6270  do
6271  {
6272  count = (src1[0] ^ src2[0]) |
6273  (src1[1] ^ src2[1]) |
6274  (src1[2] ^ src2[2]) |
6275  (src1[3] ^ src2[3]);
6276 
6277  src1+=4; src2+=4;
6278  } while (!count && (src1 < src1_end));
6279  return count;
6280 }
6281 
6282 /*!
6283  \brief Function SUBs two bitblocks and computes the bitcount.
6284  Function does not analyse availability of source blocks.
6285 
6286  \param src1 - first bit block.
6287  \param src2 - second bit block.
6288 
6289  @ingroup bitfunc
6290 */
6291 inline
6293  const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
6294 {
6295  unsigned count;
6296  const bm::word_t* BMRESTRICT src1_end = src1 + bm::set_block_size;
6297 #ifdef BMVECTOPT
6298  count = VECT_BITCOUNT_SUB(src1, src1_end, src2);
6299 #else
6300  count = 0;
6301 # ifdef BM64OPT
6302  const bm::id64_t* b1 = (bm::id64_t*) src1;
6303  const bm::id64_t* b1_end = (bm::id64_t*) src1_end;
6304  const bm::id64_t* b2 = (bm::id64_t*) src2;
6305  do
6306  {
6307  count += bitcount64_4way(b1[0] & ~b2[0],
6308  b1[1] & ~b2[1],
6309  b1[2] & ~b2[2],
6310  b1[3] & ~b2[3]);
6311  b1 += 4;
6312  b2 += 4;
6313  } while (b1 < b1_end);
6314 # else
6315  do
6316  {
6317  BM_INCWORD_BITCOUNT(count, src1[0] & ~src2[0]);
6318  BM_INCWORD_BITCOUNT(count, src1[1] & ~src2[1]);
6319  BM_INCWORD_BITCOUNT(count, src1[2] & ~src2[2]);
6320  BM_INCWORD_BITCOUNT(count, src1[3] & ~src2[3]);
6321 
6322  src1+=4;
6323  src2+=4;
6324  } while (src1 < src1_end);
6325 # endif
6326 #endif
6327  return count;
6328 }
6329 
6330 /*!
6331  \brief Function SUBs two bitblocks and and tests for any bit.
6332  Function does not analyse availability of source blocks.
6333 
6334  \param src1 - first bit block.
6335  \param src2 - second bit block.
6336 
6337  @ingroup bitfunc
6338 */
6339 inline
6341  const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
6342 {
6343  unsigned count = 0;
6344  const bm::word_t* BMRESTRICT src1_end = src1 + bm::set_block_size;
6345 
6346  do
6347  {
6348  count = (src1[0] & ~src2[0]) |
6349  (src1[1] & ~src2[1]) |
6350  (src1[2] & ~src2[2]) |
6351  (src1[3] & ~src2[3]);
6352 
6353  src1+=4; src2+=4;
6354  } while ((src1 < src1_end) && (count == 0));
6355  return count;
6356 }
6357 
6358 
6359 /*!
6360  \brief Function ORs two bitblocks and computes the bitcount.
6361  Function does not analyse availability of source blocks.
6362 
6363  \param src1 - first bit block
6364  \param src2 - second bit block.
6365 
6366  @ingroup bitfunc
6367 */
6368 inline
6369 unsigned bit_block_or_count(const bm::word_t* src1,
6370  const bm::word_t* src2) BMNOEXCEPT
6371 {
6372  unsigned count;
6373  const bm::word_t* src1_end = src1 + bm::set_block_size;
6374 #ifdef BMVECTOPT
6375  count = VECT_BITCOUNT_OR(src1, src1_end, src2);
6376 #else
6377  count = 0;
6378 # ifdef BM64OPT
6379  const bm::id64_t* b1 = (bm::id64_t*) src1;
6380  const bm::id64_t* b1_end = (bm::id64_t*) src1_end;
6381  const bm::id64_t* b2 = (bm::id64_t*) src2;
6382  do
6383  {
6384  count += bitcount64_4way(b1[0] | b2[0],
6385  b1[1] | b2[1],
6386  b1[2] | b2[2],
6387  b1[3] | b2[3]);
6388  b1 += 4;
6389  b2 += 4;
6390  } while (b1 < b1_end);
6391 # else
6392  do
6393  {
6394  BM_INCWORD_BITCOUNT(count, src1[0] | src2[0]);
6395  BM_INCWORD_BITCOUNT(count, src1[1] | src2[1]);
6396  BM_INCWORD_BITCOUNT(count, src1[2] | src2[2]);
6397  BM_INCWORD_BITCOUNT(count, src1[3] | src2[3]);
6398 
6399  src1+=4;
6400  src2+=4;
6401  } while (src1 < src1_end);
6402 # endif
6403 #endif
6404  return count;
6405 }
6406 
6407 /*!
6408  \brief Function ORs two bitblocks and and tests for any bit.
6409  Function does not analyse availability of source blocks.
6410 
6411  \param src1 - first bit block.
6412  \param src2 - second bit block.
6413 
6414  @ingroup bitfunc
6415 */
6416 inline
6418  const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
6419 {
6420  unsigned count = 0;
6421  const bm::word_t* BMRESTRICT src1_end = src1 + bm::set_block_size;
6422  do
6423  {
6424  count = (src1[0] | src2[0]) |
6425  (src1[1] | src2[1]) |
6426  (src1[2] | src2[2]) |
6427  (src1[3] | src2[3]);
6428 
6429  src1+=4; src2+=4;
6430  } while (!count && (src1 < src1_end));
6431  return count;
6432 }
6433 
6434 
6435 
6436 
6437 /*!
6438  \brief bitblock AND operation.
6439 
6440  \param dst - destination block.
6441  \param src - source block.
6442 
6443  \returns pointer on destination block.
6444  If returned value equal to src means that block mutation requested.
6445  NULL is valid return value.
6446 
6447  @ingroup bitfunc
6448 */
6450  const bm::word_t* BMRESTRICT src) BMNOEXCEPT
6451 {
6452  BM_ASSERT(dst || src);
6453 
6454  bm::word_t* ret = dst;
6455 
6456  if (IS_VALID_ADDR(dst)) // The destination block already exists
6457  {
6458  if (!IS_VALID_ADDR(src))
6459  {
6460  if (IS_EMPTY_BLOCK(src))
6461  {
6462  //If the source block is zero
6463  //just clean the destination block
6464  return 0;
6465  }
6466  }
6467  else
6468  {
6469  // Regular operation AND on the whole block.
6470  //
6471  auto any = bm::bit_block_and(dst, src);
6472  if (!any)
6473  ret = 0;
6474  }
6475  }
6476  else // The destination block does not exist yet
6477  {
6478  if(!IS_VALID_ADDR(src))
6479  {
6480  if(IS_EMPTY_BLOCK(src))
6481  {
6482  // The source block is empty.
6483  // One argument empty - all result is empty.
6484  return 0;
6485  }
6486  // Here we have nothing to do.
6487  // Src block is all ON, dst block remains as it is
6488  }
6489  else // destination block does not exists, src - valid block
6490  {
6491  if (IS_FULL_BLOCK(dst))
6492  return const_cast<bm::word_t*>(src);
6493  // Nothng to do.
6494  // Dst block is all ZERO no combination required.
6495  }
6496  }
6497 
6498  return ret;
6499 }
6500 
6501 
6502 /*!
6503  \brief Performs bitblock AND operation and calculates bitcount of the result.
6504 
6505  \param src1 - first bit block.
6506  \param src2 - second bit block.
6507 
6508  \returns bitcount value
6509 
6510  @ingroup bitfunc
6511 */
6512 inline
6514  const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
6515 {
6516  if (IS_EMPTY_BLOCK(src1) || IS_EMPTY_BLOCK(src2))
6517  return 0;
6518  if (src1 == FULL_BLOCK_FAKE_ADDR)
6519  src1 = FULL_BLOCK_REAL_ADDR;
6520  if (src2 == FULL_BLOCK_FAKE_ADDR)
6521  src2 = FULL_BLOCK_REAL_ADDR;
6522 
6523  return bit_block_and_count(src1, src2);
6524 }
6525 
6526 /*!
6527  \brief Performs bitblock AND operation test.
6528 
6529  \param src1 - first bit block.
6530  \param src2 - second bit block.
6531 
6532  \returns non zero if there is any value
6533 
6534  @ingroup bitfunc
6535 */
6536 inline
6538  const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
6539 {
6540  if (IS_EMPTY_BLOCK(src1) || IS_EMPTY_BLOCK(src2))
6541  return 0;
6542  if (src1 == FULL_BLOCK_FAKE_ADDR)
6543  src1 = FULL_BLOCK_REAL_ADDR;
6544  if (src2 == FULL_BLOCK_FAKE_ADDR)
6545  src2 = FULL_BLOCK_REAL_ADDR;
6546  return bit_block_and_any(src1, src2);
6547 }
6548 
6549 
6550 
6551 /*!
6552  \brief Performs bitblock SUB operation and calculates bitcount of the result.
6553 
6554  \param src1 - first bit block.
6555  \param src2 - second bit block
6556 
6557  \returns bitcount value
6558 
6559  @ingroup bitfunc
6560 */
6561 inline
6563  const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
6564 {
6565  if (src1 == src2)
6566  return 0;
6567  if (IS_EMPTY_BLOCK(src1))
6568  return 0;
6569  if (IS_EMPTY_BLOCK(src2)) // nothing to diff
6570  {
6571  if (IS_FULL_BLOCK(src1))
6572  return bm::gap_max_bits;
6573  return bm::bit_block_count(src1);
6574  }
6575  if (IS_FULL_BLOCK(src2))
6576  return 0;
6577 
6578  if (src1 == FULL_BLOCK_FAKE_ADDR)
6579  src1 = FULL_BLOCK_REAL_ADDR;
6580  if (src2 == FULL_BLOCK_FAKE_ADDR)
6581  src2 = FULL_BLOCK_REAL_ADDR;
6582 
6583  return bm::bit_block_sub_count(src1, src2);
6584 }
6585 
6586 
6587 /*!
6588  \brief Performs inverted bitblock SUB operation and calculates
6589  bitcount of the result.
6590 
6591  \param src1 - first bit block.
6592  \param src2 - second bit block
6593 
6594  \returns bitcount value
6595 
6596  @ingroup bitfunc
6597 */
6598 inline
6600  const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
6601 {
6602  return bit_operation_sub_count(src2, src1);
6603 }
6604 
6605 
6606 /*!
6607  \brief Performs bitblock test of SUB operation.
6608 
6609  \param src1 - first bit block.
6610  \param src2 - second bit block
6611 
6612  \returns non zero value if there are any bits
6613 
6614  @ingroup bitfunc
6615 */
6616 inline
6618  const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
6619 {
6620  if (IS_EMPTY_BLOCK(src1))
6621  return 0;
6622  if (src1 == src2)
6623  return 0;
6624 
6625  if (IS_EMPTY_BLOCK(src2)) // nothing to diff
6626  return !bit_is_all_zero(src1);
6627 
6628  if (IS_FULL_BLOCK(src2))
6629  return 0;
6630 
6631  if (src1 == FULL_BLOCK_FAKE_ADDR)
6632  src1 = FULL_BLOCK_REAL_ADDR;
6633  if (src2 == FULL_BLOCK_FAKE_ADDR)
6634  src2 = FULL_BLOCK_REAL_ADDR;
6635 
6636  return bm::bit_block_sub_any(src1, src2);
6637 }
6638 
6639 
6640 
6641 /*!
6642  \brief Performs bitblock OR operation and calculates bitcount of the result.
6643 
6644  \param src1 - first bit block.
6645  \param src2 - second bit block.
6646 
6647  \returns bitcount value
6648 
6649  @ingroup bitfunc
6650 */
6651 inline
6653  const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
6654 {
6655  if (IS_FULL_BLOCK(src1) || IS_FULL_BLOCK(src2))
6656  return bm::gap_max_bits;
6657 
6658  if (IS_EMPTY_BLOCK(src1))
6659  {
6660  if (!IS_EMPTY_BLOCK(src2))
6661  return bm::bit_block_count(src2);
6662  else
6663  return 0; // both blocks are empty
6664  }
6665  else
6666  {
6667  if (IS_EMPTY_BLOCK(src2))
6668  return bm::bit_block_count(src1);
6669  }
6670  if (src1 == FULL_BLOCK_FAKE_ADDR)
6671  src1 = FULL_BLOCK_REAL_ADDR;
6672  if (src2 == FULL_BLOCK_FAKE_ADDR)
6673  src2 = FULL_BLOCK_REAL_ADDR;
6674 
6675  return bm::bit_block_or_count(src1, src2);
6676 }
6677 
6678 /*!
6679  \brief Performs bitblock OR operation test.
6680 
6681  \param src1 - first bit block.
6682  \param src2 - second bit block.
6683 
6684  \returns non zero value if there are any bits
6685 
6686  @ingroup bitfunc
6687 */
6688 inline
6690  const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
6691 {
6692  if (IS_EMPTY_BLOCK(src1))
6693  {
6694  if (!IS_EMPTY_BLOCK(src2))
6695  return !bit_is_all_zero(src2);
6696  else
6697  return 0; // both blocks are empty
6698  }
6699  else
6700  {
6701  if (IS_EMPTY_BLOCK(src2))
6702  return !bit_is_all_zero(src1);
6703  }
6704  if (src1 == FULL_BLOCK_FAKE_ADDR)
6705  src1 = FULL_BLOCK_REAL_ADDR;
6706  if (src2 == FULL_BLOCK_FAKE_ADDR)
6707  src2 = FULL_BLOCK_REAL_ADDR;
6708