BitMagic-C++
bmfunc.h
Go to the documentation of this file.
1 #ifndef BMFUNC__H__INCLUDED__
2 #define BMFUNC__H__INCLUDED__
3 /*
4 Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 For more information please visit: http://bitmagic.io
19 */
20 
21 /*! \file bmfunc.h
22  \brief Bit manipulation primitives (internal)
23 */
24 
25 #include <memory.h>
26 
27 #include "bmdef.h"
28 #include "bmutil.h"
29 
30 
31 #ifdef _MSC_VER
32 # pragma warning( disable: 4146 )
33 #endif
34 
35 namespace bm
36 {
37 
38 
39 inline
41  bm::word_t left,
42  bm::word_t right) BMNOEXCEPT;
43 
44 inline
46  bm::word_t left,
47  bm::word_t right) BMNOEXCEPT;
48 
49 /*!
50  @brief Structure with statistical information about memory
51  allocation footprint, serialization projection, number of vectors
52  @ingroup bvector
53 */
55 {
56  size_t bit_blocks; ///< Number of bit blocks
57  size_t gap_blocks; ///< Number of GAP blocks
58  size_t ptr_sub_blocks; ///< Number of sub-blocks
59  size_t bv_count; ///< Number of bit-vectors
60  size_t max_serialize_mem; ///< estimated maximum memory for serialization
61  size_t memory_used; ///< memory usage for all blocks and service tables
62  size_t gap_cap_overhead; ///< gap memory overhead between length and capacity
63  gap_word_t gap_levels[bm::gap_levels]; ///< GAP block lengths in the bvect
64  unsigned long long gaps_by_level[bm::gap_levels]; ///< number of GAP blocks at each level
65 
66  /// cound bit block
67  void add_bit_block() BMNOEXCEPT
68  {
69  ++bit_blocks;
70  size_t mem_used = sizeof(bm::word_t) * bm::set_block_size;
71  memory_used += mem_used;
72  max_serialize_mem += mem_used;
73  }
74 
75  /// count gap block
76  void add_gap_block(unsigned capacity, unsigned length) BMNOEXCEPT
77  {
78  ++gap_blocks;
79  size_t mem_used = (capacity * sizeof(gap_word_t));
80  memory_used += mem_used;
81  max_serialize_mem += (unsigned)(length * sizeof(gap_word_t));
82  BM_ASSERT(length <= capacity);
83  gap_cap_overhead += (capacity - length) * sizeof(gap_word_t);
84  for (unsigned i = 0; i < bm::gap_levels; ++i)
85  {
86  if (capacity == gap_levels[i])
87  {
88  gaps_by_level[i]++;
89  return;
90  }
91  }
92  BM_ASSERT(0);
93  }
94 
95  /// Reset statisctics
96  void reset() BMNOEXCEPT
97  {
98  bit_blocks = gap_blocks = ptr_sub_blocks = bv_count = 0;
99  max_serialize_mem = memory_used = gap_cap_overhead = 0;
100  for (unsigned i = 0; i < bm::gap_levels; ++i)
101  gaps_by_level[i] = 0ull;
102  }
103 
104  /// Sum data from another sttructure
105  void add(const bv_statistics& st) BMNOEXCEPT
106  {
107  bit_blocks += st.bit_blocks;
108  gap_blocks += st.gap_blocks;
109  ptr_sub_blocks += st.ptr_sub_blocks;
110  bv_count += st.bv_count;
111  max_serialize_mem += st.max_serialize_mem + 8;
112  memory_used += st.memory_used;
113  gap_cap_overhead += st.gap_cap_overhead;
114  }
115 };
116 
117 /**
118  @brief Pair type
119 */
120 template<typename First, typename Second>
121 struct pair
122 {
123  First first;
124  Second second;
125 
126  pair(First f, Second s) : first(f), second(s) {}
127 };
128 
129 /**
130  \brief bit-decode cache structure
131 */
133 {
134  unsigned short bits[65]; //< decoded bits
135  unsigned bcnt; //< length of bits array
136  bm::id64_t cvalue; //< cache decoded value
137 
138  bit_decode_cache() : bcnt(0), cvalue(0) {}
139 };
140 
141 
142 /**
143  \brief Recalc linear bvector block index into 2D matrix coordinates
144  \internal
145 */
146 template<typename BI_TYPE>
148 void get_block_coord(BI_TYPE nb, unsigned& i, unsigned& j) BMNOEXCEPT
149 {
150  i = unsigned(nb >> bm::set_array_shift); // top block address
151  j = unsigned(nb & bm::set_array_mask); // address in sub-block
152 }
153 
154 /**
155  Compute bit address of the first bit in a superblock
156  \internal
157 */
158 template<typename RTYPE>
159 BMFORCEINLINE RTYPE get_super_block_start(unsigned i) BMNOEXCEPT
160 {
161  return RTYPE(i) * bm::set_sub_total_bits;
162 }
163 
164 /**
165  Compute bit address of the first bit in a block
166  \internal
167 */
168 template<typename RTYPE>
169 BMFORCEINLINE RTYPE get_block_start(unsigned i, unsigned j) BMNOEXCEPT
170 {
171  RTYPE idx = bm::get_super_block_start<RTYPE>(i);
172  idx += (j) * bm::gap_max_bits;
173  return idx;
174 }
175 
176 
177 /*!
178  @defgroup gapfunc GAP functions
179  GAP functions implement different opereations on GAP compressed blocks (internals)
180  and serve as a minimal building blocks.
181  \internal
182  @ingroup bvector
183  */
184 
185 /*!
186  @defgroup bitfunc BIT functions
187  Bit functions implement different opereations on bit blocks (internals)
188  and serve as a minimal building blocks.
189  \internal
190  @ingroup bvector
191  */
192 
193 
194 /*!
195  Returns bit count
196  @ingroup bitfunc
197 */
200 {
201 #if defined(BMSSE42OPT) || defined(BMAVX2OPT) || defined(BMAVX512OPT)
202  return bm::id_t(_mm_popcnt_u32(w));
203 #else
204  #if defined(BM_USE_GCC_BUILD)
205  return (bm::id_t)__builtin_popcount(w);
206  #else
207  return
208  bm::bit_count_table<true>::_count[(unsigned char)(w)] +
209  bm::bit_count_table<true>::_count[(unsigned char)((w) >> 8)] +
210  bm::bit_count_table<true>::_count[(unsigned char)((w) >> 16)] +
211  bm::bit_count_table<true>::_count[(unsigned char)((w) >> 24)];
212  #endif
213 #endif
214 }
215 
216 inline
217 int parallel_popcnt_32(unsigned int n) BMNOEXCEPT
218 {
219  unsigned int tmp;
220 
221  tmp = n - ((n >> 1) & 033333333333)
222  - ((n >> 2) & 011111111111);
223  return ((tmp + (tmp >> 3)) & 030707070707) % 63;
224 }
225 
226 
227 /*!
228  Function calculates number of 1 bits in 64-bit word.
229  @ingroup bitfunc
230 */
232 unsigned word_bitcount64(bm::id64_t x) BMNOEXCEPT
233 {
234 #if defined(BMSSE42OPT) || defined(BMAVX2OPT) || defined(BMAVX512OPT)
235  #if defined(BM64_SSE4) || defined(BM64_AVX2) || defined(BM64_AVX512)
236  return unsigned(_mm_popcnt_u64(x));
237  #else // 32-bit
238  return _mm_popcnt_u32(x >> 32) + _mm_popcnt_u32((unsigned)x);
239  #endif
240 #else
241  #if defined(BM_USE_GCC_BUILD)
242  return (unsigned)__builtin_popcountll(x);
243  #else
244  #if (defined(__arm__)) // 32-bit
245  return bm::word_bitcount(x >> 32) + bm::word_bitcount((unsigned)x);
246  #else
247  x = x - ((x >> 1) & 0x5555555555555555);
248  x = (x & 0x3333333333333333) + ((x >> 2) & 0x3333333333333333);
249  x = (x + (x >> 4)) & 0x0F0F0F0F0F0F0F0F;
250  x = x + (x >> 8);
251  x = x + (x >> 16);
252  x = x + (x >> 32);
253  return x & 0xFF;
254  #endif
255  #endif
256 #endif
257 }
258 
259 /*!
260  Parallel popcount on 4x 64-bit words
261  @ingroup bitfunc
262 */
263 inline
265  bm::id64_t u, bm::id64_t v) BMNOEXCEPT
266 {
267  const bm::id64_t m1 = 0x5555555555555555U;
268  const bm::id64_t m2 = 0x3333333333333333U;
269  const bm::id64_t m3 = 0x0F0F0F0F0F0F0F0FU;
270  const bm::id64_t m4 = 0x000000FF000000FFU;
271 
272  x = x - ((x >> 1) & m1);
273  y = y - ((y >> 1) & m1);
274  u = u - ((u >> 1) & m1);
275  v = v - ((v >> 1) & m1);
276  x = (x & m2) + ((x >> 2) & m2);
277  y = (y & m2) + ((y >> 2) & m2);
278  u = (u & m2) + ((u >> 2) & m2);
279  v = (v & m2) + ((v >> 2) & m2);
280  x = x + y;
281  u = u + v;
282  x = (x & m3) + ((x >> 4) & m3);
283  u = (u & m3) + ((u >> 4) & m3);
284  x = x + u;
285  x = x + (x >> 8);
286  x = x + (x >> 16);
287  x = x & m4;
288  x = x + (x >> 32);
289  return x & 0x000001FFU;
290 }
291 
292 
293 // --------------------------------------------------------------
294 // Functions for bit-scanning
295 // --------------------------------------------------------------
296 
297 /*!
298  \brief Templated algorithm to unpacks octet based word into list of ON bit indexes
299  \param w - value
300  \param func - bit functor
301 
302  @ingroup bitfunc
303 */
304 template<typename T, typename F>
305 void bit_for_each_4(T w, F& func)
306 {
307  for (unsigned sub_octet = 0; w != 0; w >>= 4, sub_octet += 4)
308  {
309  switch (w & 15) // 1111
310  {
311  case 0: // 0000
312  break;
313  case 1: // 0001
314  func(sub_octet);
315  break;
316  case 2: // 0010
317  func(sub_octet + 1);
318  break;
319  case 3: // 0011
320  func(sub_octet, sub_octet + 1);
321  break;
322  case 4: // 0100
323  func(sub_octet + 2);
324  break;
325  case 5: // 0101
326  func(sub_octet, sub_octet + 2);
327  break;
328  case 6: // 0110
329  func(sub_octet + 1, sub_octet + 2);
330  break;
331  case 7: // 0111
332  func(sub_octet, sub_octet + 1, sub_octet + 2);
333  break;
334  case 8: // 1000
335  func(sub_octet + 3);
336  break;
337  case 9: // 1001
338  func(sub_octet, sub_octet + 3);
339  break;
340  case 10: // 1010
341  func(sub_octet + 1, sub_octet + 3);
342  break;
343  case 11: // 1011
344  func(sub_octet, sub_octet + 1, sub_octet + 3);
345  break;
346  case 12: // 1100
347  func(sub_octet + 2, sub_octet + 3);
348  break;
349  case 13: // 1101
350  func(sub_octet, sub_octet + 2, sub_octet + 3);
351  break;
352  case 14: // 1110
353  func(sub_octet + 1, sub_octet + 2, sub_octet + 3);
354  break;
355  case 15: // 1111
356  func(sub_octet, sub_octet + 1, sub_octet + 2, sub_octet + 3);
357  break;
358  default:
359  BM_ASSERT(0);
360  break;
361  }
362 
363  } // for
364 }
365 
366 
367 /*!
368  \brief Templated algorithm to unpacks word into list of ON bit indexes
369  \param w - value
370  \param func - bit functor
371 
372  @ingroup bitfunc
373 */
374 template<typename T, typename F>
375 void bit_for_each(T w, F& func)
376 {
377  // Note: 4-bit table method works slower than plain check approach
378  for (unsigned octet = 0; w != 0; w >>= 8, octet += 8)
379  {
380  if (w & 1) func(octet + 0);
381  if (w & 2) func(octet + 1);
382  if (w & 4) func(octet + 2);
383  if (w & 8) func(octet + 3);
384  if (w & 16) func(octet + 4);
385  if (w & 32) func(octet + 5);
386  if (w & 64) func(octet + 6);
387  if (w & 128) func(octet + 7);
388 
389  } // for
390 }
391 
392 /**
393  portable, switch based bitscan
394  @internal
395  @ingroup bitfunc
396  */
397 inline
398 unsigned bitscan_nibble(unsigned w, unsigned* bits) BMNOEXCEPT
399 {
400  unsigned cnt = 0;
401  for (unsigned sub_octet = 0; w; w >>= 4, sub_octet+=4)
402  {
403  switch (w & 15) // 1111
404  {
405  //case 0: // 0000
406  // break;
407  case 1: // 0001
408  bits[cnt++] = 0 + sub_octet;
409  break;
410  case 2: // 0010
411  bits[cnt++] = 1 + sub_octet;
412  break;
413  case 3: // 0011
414  bits[cnt] = 0 + sub_octet;
415  bits[cnt+1] = 1 + sub_octet;
416  cnt += 2;
417  break;
418  case 4: // 0100
419  bits[cnt++] = 2 + sub_octet;
420  break;
421  case 5: // 0101
422  bits[cnt+0] = 0 + sub_octet;
423  bits[cnt+1] = 2 + sub_octet;
424  cnt += 2;
425  break;
426  case 6: // 0110
427  bits[cnt+0] = 1 + sub_octet;
428  bits[cnt+1] = 2 + sub_octet;
429  cnt += 2;
430  break;
431  case 7: // 0111
432  bits[cnt+0] = 0 + sub_octet;
433  bits[cnt+1] = 1 + sub_octet;
434  bits[cnt+2] = 2 + sub_octet;
435  cnt += 3;
436  break;
437  case 8: // 1000
438  bits[cnt++] = 3 + sub_octet;
439  break;
440  case 9: // 1001
441  bits[cnt+0] = 0 + sub_octet;
442  bits[cnt+1] = 3 + sub_octet;
443  cnt += 2;
444  break;
445  case 10: // 1010
446  bits[cnt+0] = 1 + sub_octet;
447  bits[cnt+1] = 3 + sub_octet;
448  cnt += 2;
449  break;
450  case 11: // 1011
451  bits[cnt+0] = 0 + sub_octet;
452  bits[cnt+1] = 1 + sub_octet;
453  bits[cnt+2] = 3 + sub_octet;
454  cnt += 3;
455  break;
456  case 12: // 1100
457  bits[cnt+0] = 2 + sub_octet;
458  bits[cnt+1] = 3 + sub_octet;
459  cnt += 2;
460  break;
461  case 13: // 1101
462  bits[cnt+0] = 0 + sub_octet;
463  bits[cnt+1] = 2 + sub_octet;
464  bits[cnt+2] = 3 + sub_octet;
465  cnt += 3;
466  break;
467  case 14: // 1110
468  bits[cnt+0] = 1 + sub_octet;
469  bits[cnt+1] = 2 + sub_octet;
470  bits[cnt+2] = 3 + sub_octet;
471  cnt += 3;
472  break;
473  case 15: // 1111
474  bits[cnt+0] = 0 + sub_octet;
475  bits[cnt+1] = 1 + sub_octet;
476  bits[cnt+2] = 2 + sub_octet;
477  bits[cnt+3] = 3 + sub_octet;
478  cnt += 4;
479  break;
480  default:
481  break;
482  }
483  } // for
484  return cnt;
485 }
486 
487 #ifdef BM_NONSTANDARD_EXTENTIONS
488 #ifdef __GNUC__
489 /**
490  bitscan based on computed goto (GCC/clang)
491  @internal
492  @ingroup bitfunc
493  */
494 inline
495 unsigned bitscan_nibble_gcc(unsigned w, unsigned* bits) BMNOEXCEPT
496 {
497  static void* d_table[] = { &&l0,
498  &&l1, &&l3_1, &&l3, &&l7_1, &&l5, &&l7_0, &&l7, &&l15_1,
499  &&l9, &&l11_0, &&l11, &&l15_0, &&l13, &&l14, &&l15 };
500  unsigned cnt = 0;
501 
502  for (unsigned sub_octet = 0; w; w >>= 4, sub_octet+=4)
503  {
504  goto *d_table[w & 15];
505  l1: // 0001
506  bits[cnt++] = sub_octet;
507  continue;
508  l3: // 0011
509  bits[cnt++] = sub_octet;
510  l3_1:
511  bits[cnt++] = 1 + sub_octet;
512  continue;
513  l5: // 0101
514  bits[cnt++] = sub_octet;
515  goto l7_1;
516  l7: // 0111
517  bits[cnt++] = sub_octet;
518  l7_0:
519  bits[cnt++] = 1 + sub_octet;
520  l7_1:
521  bits[cnt++] = 2 + sub_octet;
522  continue;
523  l9: // 1001
524  bits[cnt++] = sub_octet;
525  goto l15_1;
526  continue;
527  l11: // 1011
528  bits[cnt++] = sub_octet;
529  l11_0:
530  bits[cnt++] = 1 + sub_octet;
531  bits[cnt++] = 3 + sub_octet;
532  continue;
533  l13: // 1101
534  bits[cnt++] = sub_octet;
535  goto l15_0;
536  l14: // 1110
537  bits[cnt++] = 1 + sub_octet;
538  goto l15_0;
539  l15: // 1111
540  bits[cnt++] = 0 + sub_octet;
541  bits[cnt++] = 1 + sub_octet;
542  l15_0:
543  bits[cnt++] = 2 + sub_octet;
544  l15_1:
545  bits[cnt++] = 3 + sub_octet;
546  l0:
547  continue;
548  } // for
549  return cnt;
550 }
551 
552 #endif
553 #endif
554 
555 /*! @brief Adaptor to copy 1 bits to array
556  @internal
557 */
558 template<typename B>
560 {
561 public:
562  copy_to_array_functor(B* bits): bp_(bits)
563  {}
564 
565  B* ptr() { return bp_; }
566 
567  void operator()(unsigned bit_idx) BMNOEXCEPT { *bp_++ = (B)bit_idx; }
568 
569  void operator()(unsigned bit_idx0,
570  unsigned bit_idx1) BMNOEXCEPT
571  {
572  bp_[0] = (B)bit_idx0; bp_[1] = (B)bit_idx1;
573  bp_+=2;
574  }
575 
576  void operator()(unsigned bit_idx0,
577  unsigned bit_idx1,
578  unsigned bit_idx2) BMNOEXCEPT
579  {
580  bp_[0] = (B)bit_idx0; bp_[1] = (B)bit_idx1; bp_[2] = (B)bit_idx2;
581  bp_+=3;
582  }
583 
584  void operator()(unsigned bit_idx0,
585  unsigned bit_idx1,
586  unsigned bit_idx2,
587  unsigned bit_idx3) BMNOEXCEPT
588  {
589  bp_[0] = (B)bit_idx0; bp_[1] = (B)bit_idx1;
590  bp_[2] = (B)bit_idx2; bp_[3] = (B)bit_idx3;
591  bp_+=4;
592  }
593 
594 private:
596  copy_to_array_functor& operator=(const copy_to_array_functor&);
597 private:
598  B* bp_;
599 };
600 
601 
602 /*!
603  \brief Unpacks word into list of ON bit indexes
604  \param w - value
605  \param bits - pointer on the result array
606  \return number of bits in the list
607 
608  @ingroup bitfunc
609 */
610 template<typename T,typename B>
611 unsigned bit_list(T w, B* bits) BMNOEXCEPT
612 {
613  copy_to_array_functor<B> func(bits);
614  bit_for_each(w, func);
615  return (unsigned)(func.ptr() - bits);
616 }
617 
618 
619 
620 /*!
621  \brief Unpacks word into list of ON bit indexes (quad-bit based)
622  \param w - value
623  \param bits - pointer on the result array
624  \return number of bits in the list
625 
626  @ingroup bitfunc
627 */
628 template<typename T,typename B>
629 unsigned bit_list_4(T w, B* bits) BMNOEXCEPT
630 {
631  copy_to_array_functor<B> func(bits);
632  bit_for_each_4(w, func);
633  return (unsigned)(func.ptr() - bits);
634 }
635 
636 /*!
637  \brief Unpacks word into list of ON bit indexes using popcnt method
638  \param w - value
639  \param bits - pointer on the result array
640  \param offs - value to add to bit position (programmed shift)
641  \return number of bits in the list
642 
643  @ingroup bitfunc
644  @internal
645 */
646 template<typename B>
647 unsigned short
648 bitscan_popcnt(bm::id_t w, B* bits, unsigned short offs) BMNOEXCEPT
649 {
650  unsigned pos = 0;
651  while (w)
652  {
653  bm::id_t t = w & -w;
654  bits[pos++] = (B)(bm::word_bitcount(t - 1) + offs);
655  w &= w - 1;
656  }
657  return (unsigned short)pos;
658 }
659 
660 /*!
661  \brief Unpacks word into list of ON bit indexes using popcnt method
662  \param w - value
663  \param bits - pointer on the result array
664  \return number of bits in the list
665 
666  @ingroup bitfunc
667  @internal
668 */
669 template<typename B>
670 unsigned short bitscan_popcnt(bm::id_t w, B* bits) BMNOEXCEPT
671 {
672  unsigned pos = 0;
673  while (w)
674  {
675  bm::id_t t = w & -w;
676  bits[pos++] = (B)(bm::word_bitcount(t - 1));
677  w &= w - 1;
678  }
679  return (unsigned short)pos;
680 }
681 
682 
683 /*!
684  \brief Unpacks 64-bit word into list of ON bit indexes using popcnt method
685  \param w - value
686  \param bits - pointer on the result array
687  \return number of bits in the list
688  @ingroup bitfunc
689 */
690 template<typename B>
691 unsigned short bitscan_popcnt64(bm::id64_t w, B* bits) BMNOEXCEPT
692 {
693  unsigned short pos = 0;
694  while (w)
695  {
696  bm::id64_t t = bmi_blsi_u64(w); // w & -w;
697  bits[pos++] = (B) bm::word_bitcount64(t - 1);
698  w = bmi_bslr_u64(w); // w &= w - 1;
699  }
700  return pos;
701 }
702 
703 /*!
704  \brief Unpacks word into list of ON bits (BSF/__builtin_ctz)
705  \param w - value
706  \param bits - pointer on the result array
707  \return number of bits in the list
708 
709  @ingroup bitfunc
710  @internal
711 */
712 template<typename B>
713 unsigned short bitscan_bsf(unsigned w, B* bits) BMNOEXCEPT
714 {
715  unsigned short pos = 0;
716  while (w)
717  {
718  bits[pos++] = count_trailing_zeros_u32(w);
719  w &= w - 1;
720  }
721  return pos;
722 }
723 
724 template<typename B, typename OT>
725 unsigned short bitscan_bsf(unsigned w, B* bits, OT offs) BMNOEXCEPT
726 {
727  unsigned short pos = 0;
728  while (w)
729  {
730  bits[pos++] = (B) (bm::count_trailing_zeros_u32(w) + offs);
731  w &= w - 1;
732  }
733  return pos;
734 }
735 
736 /*!
737  \brief Unpacks word into list of ON bits (BSF/__builtin_ctz)
738  \param w - value
739  \param bits - pointer on the result array
740  \return number of bits in the list
741 
742  @ingroup bitfunc
743  @internal
744 */
745 template<typename B>
746 unsigned short bitscan_bsf64(bm::id64_t w, B* bits) BMNOEXCEPT
747 {
748  unsigned short pos = 0;
749  while (w)
750  {
751  bits[pos++] = bm::count_trailing_zeros_u64(w);
752  w &= w - 1;
753  }
754  return pos;
755 }
756 
757 
758 /*!
759  \brief Unpacks 64-bit word into list of ON bit indexes using popcnt method
760  \param w - value
761  \param bits - pointer on the result array
762  \param offs - value to add to bit position (programmed shift)
763  \return number of bits in the list
764  @ingroup bitfunc
765 */
766 template<typename B>
767 unsigned short
768 bitscan_popcnt64(bm::id64_t w, B* bits, unsigned short offs) BMNOEXCEPT
769 {
770  unsigned short pos = 0;
771  while (w)
772  {
773  bm::id64_t t = bmi_blsi_u64(w); // w & -w;
774  bits[pos++] = B(bm::word_bitcount64(t - 1) + offs);
775  w = bmi_bslr_u64(w); // w &= w - 1;
776  }
777  return pos;
778 }
779 
780 /**
781  Templated Bitscan with dynamic dispatch for best type
782  @ingroup bitfunc
783  */
784 template<typename V, typename B>
785 unsigned short bitscan(V w, B* bits) BMNOEXCEPT
786 {
787 #if (defined(__arm__) || defined(__aarch64__))
788  if (bm::conditional<sizeof(V) == 8>::test())
789  return bm::bitscan_bsf64(w, bits);
790  else
791  return bm::bitscan_bsf((bm::word_t)w, bits);
792 #else
793  if (bm::conditional<sizeof(V) == 8>::test())
794  return bm::bitscan_popcnt64(w, bits);
795  else
796  return bm::bitscan_popcnt((bm::word_t)w, bits);
797 #endif
798 }
799 
800 // --------------------------------------------------------------
801 // Functions for select
802 // --------------------------------------------------------------
803 
804 /**
805  \brief word find index of the rank-th bit set by bit-testing
806  \param w - 64-bit work to search
807  \param rank - rank to select (should be > 0)
808 
809  \return selected value (inxed of bit set)
810  @ingroup bitfunc
811  @internal
812 */
813 inline
814 unsigned word_select64_linear(bm::id64_t w, unsigned rank) BMNOEXCEPT
815 {
816  BM_ASSERT(w);
817  BM_ASSERT(rank);
818 
819  for (unsigned count = 0; w; w >>=1ull, ++count)
820  {
821  rank -= unsigned(w & 1ull);
822  if (!rank)
823  return count;
824  }
825  BM_ASSERT(0); // shoud not be here if rank is achievable
826  return ~0u;
827 }
828 
829 /**
830  \brief word find index of the rank-th bit set by bit-testing
831  \param w - 64-bit work to search
832  \param rank - rank to select (should be > 0)
833 
834  \return selected value (inxed of bit set)
835  @ingroup bitfunc
836  @internal
837 
838 */
839 inline
840 unsigned word_select64_bitscan_popcnt(bm::id64_t w, unsigned rank) BMNOEXCEPT
841 {
842  BM_ASSERT(w);
843  BM_ASSERT(rank);
844  BM_ASSERT(rank <= bm::word_bitcount64(w));
845 
846  do
847  {
848  --rank;
849  if (!rank)
850  break;
851  w &= w - 1;
852  } while (1);
853  bm::id64_t t = w & -w;
854  unsigned count = bm::word_bitcount64(t - 1);
855  return count;
856 }
857 
858 /**
859  \brief word find index of the rank-th bit set by bit-testing
860  \param w - 64-bit work to search
861  \param rank - rank to select (should be > 0)
862 
863  \return selected value (inxed of bit set)
864  @ingroup bitfunc
865  @internal
866 
867 */
868 inline
869 unsigned word_select64_bitscan_tz(bm::id64_t w, unsigned rank) BMNOEXCEPT
870 {
871  BM_ASSERT(w);
872  BM_ASSERT(rank);
873  BM_ASSERT(rank <= bm::word_bitcount64(w));
874 
875  do
876  {
877  if (!(--rank))
878  break;
879  w &= w - 1;
880  } while (1);
881  bm::id64_t t = w & -w;
882  unsigned count = bm::count_trailing_zeros_u64(t);
883  return count;
884 }
885 
886 
887 
888 /**
889  \brief word find index of the rank-th bit set by bit-testing
890  \param w - 32-bit work to search
891  \param rank - rank to select (should be > 0)
892 
893  \return selected value (inxed of bit set)
894  @ingroup bitfunc
895  @internal
896 */
897 inline
898 unsigned word_select32_bitscan_popcnt(unsigned w, unsigned rank) BMNOEXCEPT
899 {
900  BM_ASSERT(w);
901  BM_ASSERT(rank);
902  BM_ASSERT(rank <= bm::word_bitcount(w));
903 
904  do
905  {
906  --rank;
907  if (!rank)
908  break;
909  w &= w - 1;
910  } while (1);
911  unsigned t = w & -w;
912  unsigned count = bm::word_bitcount(t - 1);
913  return count;
914 }
915 
916 /**
917  \brief word find index of the rank-th bit set by bit-testing
918  \param w - 32-bit work to search
919  \param rank - rank to select (should be > 0)
920 
921  \return selected value (inxed of bit set)
922  @ingroup bitfunc
923  @internal
924 
925 */
926 inline
927 unsigned word_select32_bitscan_tz(unsigned w, unsigned rank) BMNOEXCEPT
928 {
929  BM_ASSERT(w);
930  BM_ASSERT(rank);
931  BM_ASSERT(rank <= bm::word_bitcount(w));
932 
933  do
934  {
935  if (!(--rank))
936  break;
937  w &= w - 1;
938  } while (1);
939  return bm::count_trailing_zeros_u32(w & -w);
940 }
941 
942 
943 /**
944  \brief word find index of the rank-th bit set by bit-testing
945  \param w - 64-bit work to search
946  \param rank - rank to select (should be > 0)
947 
948  \return selected value (inxed of bit set)
949  @ingroup bitfunc
950  @internal
951 */
952 inline
953 unsigned word_select64(bm::id64_t w, unsigned rank) BMNOEXCEPT
954 {
955 #if defined(BMI2_SELECT64)
956  return BMI2_SELECT64(w, rank);
957 #else
958  #if defined(BMI1_SELECT64)
959  return BMI2_SELECT64(w, rank);
960  #else
961  #if (defined(__arm__) || defined(__aarch64__))
962  return bm::word_select64_bitscan_tz(w, rank);
963  #else
964  return bm::word_select64_bitscan_popcnt(w, rank);
965  #endif
966  #endif
967 #endif
968 }
969 
970 /**
971  \brief word find index of the rank-th bit set by bit-testing
972  \param w - 32-bit work to search
973  \param rank - rank to select (should be > 0)
974 
975  \return selected value (inxed of bit set)
976  @ingroup bitfunc
977  @internal
978 */
979 inline
980 unsigned word_select32(unsigned w, unsigned rank) BMNOEXCEPT
981 {
982 #if defined(BMI2_SELECT64)
983  return BMI2_SELECT64(w, rank);
984 #else
985  #if defined(BMI1_SELECT64)
986  return BMI2_SELECT64(w, rank);
987  #else
988  #if (defined(__arm__) || defined(__aarch64__))
989  return bm::word_select32_bitscan_tz(w, rank);
990  #else
991  return bm::word_select32_bitscan_popcnt(w, rank);
992  #endif
993  #endif
994 #endif
995 }
996 
997 
998 
999 // --------------------------------------------------------------
1000 // Functions for bit-block digest calculation
1001 // --------------------------------------------------------------
1002 
1003 /*!
1004  \brief Compute digest mask for word address in block
1005 
1006  @return digest bit-mask
1007 
1008  @ingroup bitfunc
1009  @internal
1010 */
1012 bm::id64_t widx_to_digest_mask(unsigned w_idx) BMNOEXCEPT
1013 {
1014  bm::id64_t mask(1ull);
1015  return mask << (w_idx / bm::set_block_digest_wave_size);
1016 }
1017 
1018 /** digest mask control generation (for debug and test only)
1019  @internal
1020  */
1021 inline
1022 bm::id64_t dm_control(unsigned from, unsigned to) BMNOEXCEPT
1023 {
1024  bm::id64_t m = 0;
1025  for (unsigned i = from; i <= to; ++i)
1026  m |= (1ull << (i / 1024));
1027  return m;
1028 }
1029 
1030 
1031 /**
1032  \brief Compute digest mask for [from..to] positions
1033  \param from - range from (in bit-block coordinates)
1034  \param to - range to (in bit-block coordinates)
1035 
1036  @ingroup bitfunc
1037  @internal
1038 */
1040 bm::id64_t digest_mask(unsigned from, unsigned to) BMNOEXCEPT
1041 {
1042  BM_ASSERT(from <= to);
1044 
1045  bm::id64_t digest_from = from >> bm::set_block_digest_pos_shift;
1046  bm::id64_t digest_to = to >> bm::set_block_digest_pos_shift;;
1047  bm::id64_t mask(~0ull);
1048  mask = (mask >> (63 - (digest_to - digest_from))) << digest_from;
1049 
1050  //BM_ASSERT(mask == bm::dm_control(from, to));
1051 
1052  return mask;
1053 }
1054 
1055 
1056 /*!
1057  \brief check if all digest bits for the range [from..to] are 0
1058 
1059  \param digest - current digest
1060  \param bitpos_from - range from (in bit-block coordinates)
1061  \param bitpos_to - range to (in bit-block coordinates)
1062 
1063  @return true if all range is zero
1064 
1065  @ingroup bitfunc
1066  @internal
1067 */
1068 inline
1070  unsigned bitpos_from, unsigned bitpos_to) BMNOEXCEPT
1071 {
1072  bm::id64_t mask = bm::digest_mask(bitpos_from, bitpos_to);
1073  return !(digest & mask);
1074 }
1075 
1076 /*!
1077  \brief Init block with 000111000 pattren based on digest
1078  \param block - Bit block [out]
1079  \param digest - digest (used for block initialization)
1080 
1081  @ingroup bitfunc
1082  @internal
1083 */
1084 inline
1085 void block_init_digest0(bm::word_t* const block, bm::id64_t digest) BMNOEXCEPT
1086 {
1087  unsigned off;
1088  for (unsigned i = 0; i < 64; ++i)
1089  {
1091  bm::word_t mask = (digest & 1) ? ~0u : 0u;
1092 #if defined(VECT_BLOCK_SET_DIGEST)
1093  VECT_BLOCK_SET_DIGEST(&block[off], mask);
1094 #else
1095  for (unsigned j = 0; j < bm::set_block_digest_wave_size; j+=4)
1096  {
1097  block[off+j+0] = block[off+j+1] =
1098  block[off+j+2] = block[off+j+3] = mask;
1099  } // for j
1100 #endif
1101  digest >>= 1ull;
1102  } // for
1103 }
1104 
1105 /*!
1106  \brief Compute digest for 64 non-zero areas
1107  \param block - Bit block
1108 
1109  @return digest bit-mask (0 means all block is empty)
1110 
1111  @ingroup bitfunc
1112  @internal
1113 */
1114 inline
1115 bm::id64_t calc_block_digest0(const bm::word_t* const block) BMNOEXCEPT
1116 {
1117  bm::id64_t digest0 = 0;
1118  unsigned off;
1119 
1120  for (unsigned i = 0; i < 64; ++i)
1121  {
1123  #if defined(VECT_IS_DIGEST_ZERO)
1124  bool all_zero = VECT_IS_DIGEST_ZERO(&block[off]);
1125  digest0 |= bm::id64_t(!all_zero) << i;
1126  #else
1127  const bm::id64_t mask(1ull);
1128  for (unsigned j = 0; j < bm::set_block_digest_wave_size; j+=4)
1129  {
1130  bm::word_t w = block[off+j+0] | block[off+j+1] |
1131  block[off+j+2] | block[off+j+3];
1132  if (w)
1133  {
1134  digest0 |= (mask << i);
1135  break;
1136  }
1137  } // for j
1138  #endif
1139 
1140  } // for i
1141  return digest0;
1142 }
1143 
1144 /*!
1145  \brief Compute digest for 64 non-zero areas based on existing digest
1146  (function revalidates zero areas)
1147  \param block - bit block
1148  \param digest - start digest
1149 
1150  @return digest bit-mask (0 means all block is empty)
1151 
1152  @ingroup bitfunc
1153  @internal
1154 */
1155 inline
1156 bm::id64_t
1157 update_block_digest0(const bm::word_t* const block, bm::id64_t digest) BMNOEXCEPT
1158 {
1159  const bm::id64_t mask(1ull);
1160  bm::id64_t d = digest;
1161 
1162  while (d)
1163  {
1164  bm::id64_t t = bm::bmi_blsi_u64(d); // d & -d;
1165 
1166  unsigned wave = bm::word_bitcount64(t - 1);
1167  unsigned off = wave * bm::set_block_digest_wave_size;
1168 
1169  #if defined(VECT_IS_DIGEST_ZERO)
1170  bool all_zero = VECT_IS_DIGEST_ZERO(&block[off]);
1171  digest &= all_zero ? ~(mask << wave) : digest;
1172  #else
1173  const bm::bit_block_t::bunion_t* BMRESTRICT src_u =
1174  (const bm::bit_block_t::bunion_t*)(&block[off]);
1175  bm::id64_t w64 = 0;
1176  unsigned j = 0;
1177  do
1178  {
1179  w64 |=
1180  src_u->w64[j+0] | src_u->w64[j+1] | src_u->w64[j+2] | src_u->w64[j+3];
1181  j += 4;
1182  } while ((j < bm::set_block_digest_wave_size/2) & !w64);
1183  digest &= w64 ? digest : ~(mask << wave);
1184  #endif
1185 
1186  d = bm::bmi_bslr_u64(d); // d &= d - 1;
1187  } // while
1188 
1189  BM_ASSERT(bm::calc_block_digest0(block) == digest);
1190  return digest;
1191 }
1192 
1193 // --------------------------------------------------------------
1194 
1195 
1196 /// Returns true if set operation is constant (bitcount)
1197 inline
1199 {
1200  return (int(op) >= int(set_COUNT));
1201 }
1202 
1203 /**
1204  Convert set operation to operation
1205 */
1206 inline
1208 {
1209  BM_ASSERT(op == set_AND ||
1210  op == set_OR ||
1211  op == set_SUB ||
1212  op == set_XOR);
1213  return (bm::operation) op;
1214 }
1215 
1216 //---------------------------------------------------------------------
1217 
1218 /**
1219  Structure carries pointer on bit block with all bits 1
1220  @ingroup bitfunc
1221  @internal
1222 */
1223 template<bool T> struct all_set
1224 {
1226  {
1230 
1232  {
1233  ::memset(_p, 0xFF, sizeof(_p)); // set FULL BLOCK content (all 1s)
1234  if (bm::conditional<sizeof(void*) == 8>::test())
1235  {
1236  const unsigned long long magic_mask = 0xFFFFfffeFFFFfffe;
1237  ::memcpy(&_p_fullp, &magic_mask, sizeof(magic_mask));
1238  for (unsigned i = 0; i < bm::set_sub_array_size; ++i)
1239  _s[i] = reinterpret_cast<bm::word_t*>(magic_mask);
1240  }
1241  else
1242  {
1243  const unsigned magic_mask = 0xFFFFfffe;
1244  ::memcpy(&_p_fullp, &magic_mask, sizeof(magic_mask));
1245  for (unsigned i = 0; i < bm::set_sub_array_size; ++i)
1246  _s[i] = reinterpret_cast<bm::word_t*>(magic_mask);
1247  }
1248  }
1249  };
1250 
1251  // version with minimal branching, super-scalar friendly
1252  //
1253  inline
1254  static bm::id64_t block_type(const bm::word_t* bp) BMNOEXCEPT
1255  {
1256  bm::id64_t type;
1257  if (bm::conditional<sizeof(void*) == 8>::test())
1258  {
1259  bm::id64_t w = reinterpret_cast<unsigned long long>(bp);
1260  type = (w & 3) | // FULL BLOCK or GAP
1261  ((bp == _block._p) << 1);
1262  type = type ? type : w;
1263  }
1264  else
1265  {
1266  unsigned w = reinterpret_cast<unsigned long>(bp);
1267  type = (w & 3) | // FULL BLOCK or GAP
1268  ((bp == _block._p) << 1);
1269  type = type ? type : w;
1270  }
1271  return type;
1272  }
1273 
1274  BMFORCEINLINE
1275  static bool is_full_block(const bm::word_t* bp) BMNOEXCEPT
1276  { return (bp == _block._p || bp == _block._p_fullp); }
1277 
1278  BMFORCEINLINE
1279  static bool is_valid_block_addr(const bm::word_t* bp) BMNOEXCEPT
1280  { return (bp && !(bp == _block._p || bp == _block._p_fullp)); }
1281 
1283 };
1284 
1285 
1286 template<bool T> typename all_set<T>::all_set_block all_set<T>::_block;
1287 
1288 /// XOR swap two scalar variables
1289 template<typename W>
1290 void xor_swap(W& x, W& y) BMNOEXCEPT
1291 {
1292  BM_ASSERT(&x != &y);
1293  x ^= y;
1294  y ^= x;
1295  x ^= y;
1296 }
1297 
1298 /*!
1299  Fini not NULL position
1300  @return index of not NULL pointer
1301  @internal
1302 */
1303 template<typename N>
1304 bool find_not_null_ptr(bm::word_t*** arr, N start, N size, N* pos) BMNOEXCEPT
1305 {
1306  BM_ASSERT(pos);
1307 // BM_ASSERT(start < size);
1308 //#if defined(BM64_AVX2) || defined(BM64_AVX512)
1309 // TODO: optimization for SIMD based next ptr scan
1310 #if 0
1311  const unsigned unroll_factor = 4;
1312  const unsigned len = (size - start);
1313  const unsigned len_unr = len - (len % unroll_factor);
1314  unsigned k;
1315 
1316  arr += start;
1317  for (k = 0; k < len_unr; k+=unroll_factor)
1318  {
1319  if (!avx2_test_all_zero_wave(arr+k))
1320  {
1321  if (arr[k])
1322  {
1323  *pos = k + start;
1324  return true;
1325  }
1326  if (arr[k+1])
1327  {
1328  *pos = k + start + 1;
1329  return true;
1330  }
1331  if (arr[k+2])
1332  {
1333  *pos = k + start + 2;
1334  return true;
1335  }
1336  if (arr[k+3])
1337  {
1338  *pos = k + start + 3;
1339  return true;
1340  }
1341  }
1342  } // for k
1343 
1344  for (; k < len; ++k)
1345  {
1346  if (arr[k])
1347  {
1348  *pos = k + start;
1349  return true;
1350  }
1351  } // for k
1352 #else
1353  for (; start < size; ++start)
1354  {
1355  if (arr[start])
1356  {
1357  *pos = start;
1358  return true;
1359  }
1360  } // for i
1361 #endif
1362  return false;
1363 }
1364 
1365 
1366 
1367 
1368 //---------------------------------------------------------------------
1369 
1370 /*!
1371  \brief Lexicographical comparison of two words as bit strings (reference)
1372  Auxiliary implementation for testing and reference purposes.
1373  \param w1 - First word.
1374  \param w2 - Second word.
1375  \return <0 - less, =0 - equal, >0 - greater.
1376 
1377  @ingroup bitfunc
1378 */
1379 template<typename T> int wordcmp0(T w1, T w2)
1380 {
1381  while (w1 != w2)
1382  {
1383  int res = (w1 & 1) - (w2 & 1);
1384  if (res != 0) return res;
1385  w1 >>= 1;
1386  w2 >>= 1;
1387  }
1388  return 0;
1389 }
1390 
1391 
1392 /*
1393 template<typename T> int wordcmp(T w1, T w2)
1394 {
1395  T diff = w1 ^ w2;
1396  return diff ? ((w1 & diff & (diff ^ (diff - 1)))? 1 : -1) : 0;
1397 }
1398 */
1399 /*!
1400  \brief Lexicographical comparison of two words as bit strings.
1401  Auxiliary implementation for testing and reference purposes.
1402  \param a - First word.
1403  \param b - Second word.
1404  \return <0 - less, =0 - equal, >0 - greater.
1405 
1406  @ingroup bitfunc
1407 */
1408 template<typename T> int wordcmp(T a, T b)
1409 {
1410  T diff = a ^ b;
1411  return diff? ( (a & diff & -diff)? 1 : -1 ) : 0;
1412 }
1413 
1414 
1415 // Low bit extraction
1416 // x & (x ^ (x-1))
1417 
1418 
1419 // ----------------------------------------------------------------------
1420 
1421 
1422 /*! @brief Returns "true" if all bits in the block are 0
1423  @ingroup bitfunc
1424 */
1425 inline
1426 bool bit_is_all_zero(const bm::word_t* BMRESTRICT start) BMNOEXCEPT
1427 {
1428 #if defined(VECT_IS_ZERO_BLOCK)
1429  return VECT_IS_ZERO_BLOCK(start);
1430 #else
1431  const bm::wordop_t* BMRESTRICT blk = (bm::wordop_t*) (start);
1432  const bm::wordop_t* BMRESTRICT blk_end = (bm::wordop_t*)(start + bm::set_block_size);
1433  do
1434  {
1435  if (blk[0] | blk[1] | blk[2] | blk[3])
1436  return false;
1437  blk += 4;
1438  } while (blk < blk_end);
1439  return true;
1440 #endif
1441 }
1442 
1443 // ----------------------------------------------------------------------
1444 
1445 /*!
1446  \brief Checks if GAP block is all-zero.
1447  \param buf - GAP buffer pointer.
1448  \returns true if all-zero.
1449 
1450  @ingroup gapfunc
1451 */
1453 bool gap_is_all_zero(const bm::gap_word_t* BMRESTRICT buf) BMNOEXCEPT
1454 {
1455  // (almost) branchless variant:
1456  return (!(*buf & 1u)) & (!(bm::gap_max_bits - 1 - buf[1]));
1457 }
1458 
1459 /*!
1460  \brief Checks if GAP block is all-one.
1461  \param buf - GAP buffer pointer.
1462  \returns true if all-one.
1463  @ingroup gapfunc
1464 */
1466 bool gap_is_all_one(const bm::gap_word_t* BMRESTRICT buf) BMNOEXCEPT
1467 {
1468  return ((*buf & 1u) && (buf[1] == bm::gap_max_bits - 1));
1469 }
1470 
1471 /*!
1472  \brief Returs GAP block length.
1473  \param buf - GAP buffer pointer.
1474  \returns GAP block length.
1475 
1476  @ingroup gapfunc
1477 */
1480 {
1481  return (bm::gap_word_t)((*buf >> 3) + 1);
1482 }
1483 
1484 
1485 /*!
1486  \brief Returs GAP block capacity
1487  \param buf - GAP buffer pointer
1488  \param glevel_len - pointer on GAP header word
1489  \returns GAP block capacity.
1490 
1491  @ingroup gapfunc
1492 */
1493 template<typename T>
1494 unsigned
1495 gap_capacity(const T* BMRESTRICT buf, const T* BMRESTRICT glevel_len) BMNOEXCEPT
1496 {
1497  return glevel_len[(*buf >> 1) & 3];
1498 }
1499 
1500 
1501 /*!
1502  \brief Returs GAP block capacity limit.
1503  \param buf - GAP buffer pointer.
1504  \param glevel_len - GAP lengths table (gap_len_table)
1505  \returns GAP block limit.
1506 
1507  @ingroup gapfunc
1508 */
1509 template<typename T>
1510 unsigned
1511 gap_limit(const T* BMRESTRICT buf, const T* BMRESTRICT glevel_len) BMNOEXCEPT
1512 {
1513  return glevel_len[(*buf >> 1) & 3]-4;
1514 }
1515 
1516 
1517 /*!
1518  \brief Returs GAP blocks capacity level.
1519  \param buf - GAP buffer pointer.
1520  \returns GAP block capacity level.
1521 
1522  @ingroup gapfunc
1523 */
1524 template<typename T>
1525 T gap_level(const T* BMRESTRICT buf) BMNOEXCEPT
1526 {
1527  return T((*buf >> 1) & 3u);
1528 }
1529 
1530 
1531 /*!
1532  \brief GAP block find the last set bit
1533 
1534  \param buf - GAP buffer pointer.
1535  \param last - index of the last 1 bit
1536 
1537  \return 0 if 1 bit was NOT found
1538 
1539  @ingroup gapfunc
1540 */
1541 template<typename T>
1542 unsigned
1543 gap_find_last(const T* BMRESTRICT buf, unsigned* BMRESTRICT last) BMNOEXCEPT
1544 {
1545  BM_ASSERT(last);
1546 
1547  T is_set = (*buf) & 1u;
1548  T end = T((*buf) >> 3u);
1549 
1550  BM_ASSERT(buf[end] == bm::gap_max_bits - 1);
1551 
1552  is_set ^= T((end-1) & 1u);
1553  if (is_set)
1554  {
1555  *last = buf[end];
1556  return is_set;
1557  }
1558  *last = buf[--end];
1559  return end;
1560 }
1561 
1562 /*!
1563  \brief GAP block find the first set bit
1564 
1565  \param buf - GAP buffer pointer.
1566  \param first - index of the first 1 bit
1567 
1568  \return 0 if 1 bit was NOT found
1569 
1570  @ingroup gapfunc
1571 */
1572 template<typename T>
1573 unsigned
1574 gap_find_first(const T* BMRESTRICT buf, unsigned* BMRESTRICT first) BMNOEXCEPT
1575 {
1576  BM_ASSERT(first);
1577 
1578  T is_set = (*buf) & 1u;
1579  if (is_set)
1580  {
1581  *first = 0;
1582  return is_set;
1583  }
1584  if (buf[1] == bm::gap_max_bits - 1)
1585  return 0;
1586  *first = buf[1] + 1;
1587  return 1;
1588 }
1589 
1590 
1591 
1592 /*
1593  \brief Binary search for the block where bit = pos located.
1594  \param buf - GAP buffer pointer.
1595  \param pos - index of the element.
1596  \param is_set - output. GAP value (0 or 1).
1597  \return GAP index.
1598  @ingroup gapfunc
1599 */
1600 template<typename T>
1601 unsigned gap_bfind(const T* BMRESTRICT buf,
1602  unsigned pos, unsigned* BMRESTRICT is_set) BMNOEXCEPT
1603 {
1604  BM_ASSERT(pos < bm::gap_max_bits);
1605  #undef VECT_GAP_BFIND // TODO: VECTOR bfind causes performance degradation
1606  #ifdef VECT_GAP_BFIND
1607  return VECT_GAP_BFIND(buf, pos, is_set);
1608  #else
1609  *is_set = (*buf) & 1;
1610 
1611  unsigned start = 1;
1612  unsigned end = 1 + ((*buf) >> 3);
1613 
1614  while ( start != end )
1615  {
1616  unsigned curr = (start + end) >> 1;
1617  if ( buf[curr] < pos )
1618  start = curr + 1;
1619  else
1620  end = curr;
1621  }
1622  *is_set ^= ((start-1) & 1);
1623  return start;
1624  #endif
1625 }
1626 
1627 
1628 /*!
1629  \brief Tests if bit = pos is true.
1630  \param buf - GAP buffer pointer.
1631  \param pos - index of the element.
1632  \return true if position is in "1" gap
1633  @ingroup gapfunc
1634 */
1635 template<typename T>
1636 unsigned gap_test(const T* BMRESTRICT buf, unsigned pos) BMNOEXCEPT
1637 {
1638  BM_ASSERT(pos < bm::gap_max_bits);
1639 
1640  unsigned start = 1;
1641  unsigned end = 1 + ((*buf) >> 3);
1642  if (end - start < 10)
1643  {
1644  unsigned sv = *buf & 1;
1645  unsigned sv1= sv ^ 1;
1646  if (buf[1] >= pos) return sv;
1647  if (buf[2] >= pos) return sv1;
1648  if (buf[3] >= pos) return sv;
1649  if (buf[4] >= pos) return sv1;
1650  if (buf[5] >= pos) return sv;
1651  if (buf[6] >= pos) return sv1;
1652  if (buf[7] >= pos) return sv;
1653  if (buf[8] >= pos) return sv1;
1654  if (buf[9] >= pos) return sv;
1655  BM_ASSERT(0);
1656  }
1657  else
1658  {
1659  while (start != end)
1660  {
1661  unsigned curr = (start + end) >> 1;
1662  if (buf[curr] < pos)
1663  start = curr + 1;
1664  else
1665  end = curr;
1666  }
1667  }
1668  return ((*buf) & 1) ^ ((--start) & 1);
1669 }
1670 
1671 /*!
1672  \brief Tests if bit = pos is true. Analog of bm::gap_test with SIMD unrolling.
1673  \param buf - GAP buffer pointer.
1674  \param pos - index of the element.
1675  \return true if position is in "1" gap
1676  @ingroup gapfunc
1677 */
1678 template<typename T>
1679 unsigned gap_test_unr(const T* BMRESTRICT buf, const unsigned pos) BMNOEXCEPT
1680 {
1681  BM_ASSERT(pos < bm::gap_max_bits);
1682 
1683  if (pos == 0) // quick answer possible
1684  {
1685  return (*buf) & 1;
1686  }
1687 #if defined(BMSSE2OPT)
1688  unsigned res = bm::sse2_gap_test(buf, pos);
1689  BM_ASSERT(res == bm::gap_test(buf, pos));
1690 #elif defined(BMSSE42OPT)
1691  unsigned res = bm::sse42_gap_test(buf, pos);
1692  BM_ASSERT(res == bm::gap_test(buf, pos));
1693 #elif defined(BMAVX2OPT)
1694  unsigned res = bm::avx2_gap_test(buf, pos);
1695  BM_ASSERT(res == bm::gap_test(buf, pos));
1696 #else
1697  unsigned res = bm::gap_test(buf, pos);
1698 #endif
1699  return res;
1700 }
1701 
1702 /*! For each non-zero block in [from, to] executes supplied functor
1703  \internal
1704 */
1705 template<typename T, typename N, typename F>
1706 void for_each_nzblock_range(T*** root,
1707  N top_size, N nb_from, N nb_to, F& f) BMNOEXCEPT
1708 {
1709  BM_ASSERT(top_size);
1710  if (nb_from > nb_to)
1711  return;
1712  unsigned i_from = unsigned(nb_from >> bm::set_array_shift);
1713  unsigned j_from = unsigned(nb_from & bm::set_array_mask);
1714  unsigned i_to = unsigned(nb_to >> bm::set_array_shift);
1715  unsigned j_to = unsigned(nb_to & bm::set_array_mask);
1716 
1717  if (i_from >= top_size)
1718  return;
1719  if (i_to >= top_size)
1720  {
1721  i_to = unsigned(top_size-1);
1722  j_to = bm::set_sub_array_size-1;
1723  }
1724 
1725  for (unsigned i = i_from; i <= i_to; ++i)
1726  {
1727  T** blk_blk = root[i];
1728  if (!blk_blk)
1729  continue;
1730  if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR)
1731  {
1732  unsigned j = (i == i_from) ? j_from : 0;
1733  if (!j && (i != i_to)) // full sub-block
1734  f.add_full(bm::set_sub_total_bits);
1735  else
1736  {
1737  do
1738  {
1739  f.add_full(bm::gap_max_bits);
1740  if ((i == i_to) && (j == j_to))
1741  return;
1742  } while (++j < bm::set_sub_array_size);
1743  }
1744  }
1745  else
1746  {
1747  unsigned j = (i == i_from) ? j_from : 0;
1748  do
1749  {
1750  if (blk_blk[j])
1751  f(blk_blk[j]);
1752  if ((i == i_to) && (j == j_to))
1753  return;
1754  } while (++j < bm::set_sub_array_size);
1755  }
1756  } // for i
1757 }
1758 
1759 /*! For each non-zero block executes supplied function.
1760  \internal
1761 */
1762 template<class T, class F>
1763 void for_each_nzblock(T*** root, unsigned size1, F& f)
1764 {
1765  typedef typename F::size_type size_type;
1766  for (unsigned i = 0; i < size1; ++i)
1767  {
1768  T** blk_blk = root[i];
1769  if (!blk_blk)
1770  {
1771  f.on_empty_top(i);
1772  continue;
1773  }
1774  f.on_non_empty_top(i);
1775  if ((bm::word_t*)blk_blk == FULL_BLOCK_FAKE_ADDR)
1776  {
1777  size_type r = i * bm::set_sub_array_size;
1778  unsigned j = 0;
1779  do
1780  {
1781  f(FULL_BLOCK_FAKE_ADDR, r + j);
1782  } while (++j < bm::set_sub_array_size);
1783  continue;
1784  }
1785 
1786  unsigned non_empty_top = 0;
1787  size_type r = i * bm::set_sub_array_size;
1788  unsigned j = 0;
1789  do
1790  {
1791 #if defined(BM64_AVX2) || defined(BM64_AVX512)
1792  if (!avx2_test_all_zero_wave(blk_blk + j))
1793  {
1794  non_empty_top = 1;
1795  T* blk0 = blk_blk[j + 0];
1796  T* blk1 = blk_blk[j + 1];
1797  T* blk2 = blk_blk[j + 2];
1798  T* blk3 = blk_blk[j + 3];
1799 
1800  size_type block_idx = r + j + 0;
1801  if (blk0)
1802  f(blk0, block_idx);
1803  else
1804  f.on_empty_block(block_idx);
1805 
1806  if (blk1)
1807  f(blk1, block_idx + 1);
1808  else
1809  f.on_empty_block(block_idx + 1);
1810 
1811  if (blk2)
1812  f(blk2, block_idx + 2);
1813  else
1814  f.on_empty_block(block_idx + 2);
1815 
1816  if (blk3)
1817  f(blk3, block_idx + 3);
1818  else
1819  f.on_empty_block(block_idx + 3);
1820  }
1821  else
1822  {
1823  f.on_empty_block(r + j + 0); f.on_empty_block(r + j + 1);
1824  f.on_empty_block(r + j + 2); f.on_empty_block(r + j + 3);
1825  }
1826  j += 4;
1827 #elif defined(BM64_SSE4)
1828  if (!sse42_test_all_zero_wave((blk_blk + j)))
1829  {
1830  non_empty_top = 1;
1831  T* blk0 = blk_blk[j + 0];
1832  T* blk1 = blk_blk[j + 1];
1833 
1834  size_type block_idx = r + j + 0;
1835  if (blk0)
1836  f(blk0, block_idx);
1837  else
1838  f.on_empty_block(block_idx);
1839 
1840  ++block_idx;
1841  if (blk1)
1842  f(blk1, block_idx);
1843  else
1844  f.on_empty_block(block_idx);
1845  }
1846  else
1847  {
1848  f.on_empty_block(r + j + 0);
1849  f.on_empty_block(r + j + 1);
1850  }
1851  j += 2;
1852 #else
1853  if (blk_blk[j])
1854  {
1855  f(blk_blk[j], r + j);
1856  non_empty_top = 1;
1857  }
1858  else
1859  f.on_empty_block(r + j);
1860  ++j;
1861 #endif
1862  } while (j < bm::set_sub_array_size);
1863 
1864  if (non_empty_top == 0)
1865  f.on_empty_top(i);
1866  } // for i
1867 }
1868 
1869 /*! For each non-zero block executes supplied function.
1870 */
1871 template<class T, class F>
1872 void for_each_nzblock2(T*** root, unsigned size1, F& f)
1873 {
1874 #ifdef BM64_SSE4
1875  for (unsigned i = 0; i < size1; ++i)
1876  {
1877  T** blk_blk;
1878  if ((blk_blk = root[i])!=0)
1879  {
1880  if (blk_blk == (T**)FULL_BLOCK_FAKE_ADDR)
1881  {
1882  for (unsigned j = 0; j < bm::set_sub_array_size; ++j)
1883  {
1884  f(FULL_BLOCK_FAKE_ADDR);
1885  }
1886  continue;
1887  }
1888  {
1889  __m128i w0;
1890  for (unsigned j = 0; j < bm::set_sub_array_size; j+=4)
1891  {
1892  w0 = _mm_loadu_si128((__m128i*)(blk_blk + j));
1893  if (!_mm_testz_si128(w0, w0))
1894  {
1895  T* blk0 = blk_blk[j + 0];
1896  T* blk1 = blk_blk[j + 1];
1897 
1898  if (blk0)
1899  f(blk0);
1900  if (blk1)
1901  f(blk1);
1902  }
1903  w0 = _mm_loadu_si128((__m128i*)(blk_blk + j + 2));
1904  if (!_mm_testz_si128(w0, w0))
1905  {
1906  T* blk0 = blk_blk[j + 2];
1907  T* blk1 = blk_blk[j + 3];
1908  if (blk0)
1909  f(blk0);
1910  if (blk1)
1911  f(blk1);
1912  }
1913  } // for j
1914  }
1915  }
1916  } // for i
1917 #elif defined(BM64_AVX2) || defined(BM64_AVX512)
1918  for (unsigned i = 0; i < size1; ++i)
1919  {
1920  T** blk_blk;
1921  if ((blk_blk = root[i]) != 0)
1922  {
1923  if (blk_blk == (T**)FULL_BLOCK_FAKE_ADDR)
1924  {
1925  for (unsigned j = 0; j < bm::set_sub_array_size; ++j)
1926  {
1927  f(FULL_BLOCK_FAKE_ADDR);
1928  }
1929  continue;
1930  }
1931  {
1932  for (unsigned j = 0; j < bm::set_sub_array_size; j += 4)
1933  {
1934  __m256i w0 = _mm256_loadu_si256((__m256i*)(blk_blk + j));
1935  if (!_mm256_testz_si256(w0, w0))
1936  {
1937  // as a variant could use: blk0 = (T*)_mm256_extract_epi64(w0, 0);
1938  // but it measures marginally slower
1939  T* blk0 = blk_blk[j + 0];
1940  T* blk1 = blk_blk[j + 1];
1941  T* blk2 = blk_blk[j + 2];
1942  T* blk3 = blk_blk[j + 3];
1943  if (blk0)
1944  f(blk0);
1945  if (blk1)
1946  f(blk1);
1947  if (blk2)
1948  f(blk2);
1949  if (blk3)
1950  f(blk3);
1951  }
1952  } // for j
1953  }
1954  }
1955  } // for i
1956 
1957 #else // non-SIMD mode
1958  for (unsigned i = 0; i < size1; ++i)
1959  {
1960  T** blk_blk;
1961  if ((blk_blk = root[i])!=0)
1962  {
1963  if (blk_blk == (T**)FULL_BLOCK_FAKE_ADDR)
1964  {
1965  for (unsigned j = 0; j < bm::set_sub_array_size; ++j)
1966  {
1967  f(FULL_BLOCK_FAKE_ADDR);
1968  }
1969  continue;
1970  }
1971  unsigned j = 0;
1972  do
1973  {
1974  if (blk_blk[j])
1975  f(blk_blk[j]);
1976  if (blk_blk[j+1])
1977  f(blk_blk[j+1]);
1978  if (blk_blk[j+2])
1979  f(blk_blk[j+2]);
1980  if (blk_blk[j+3])
1981  f(blk_blk[j+3]);
1982  j += 4;
1983  } while (j < bm::set_sub_array_size);
1984  }
1985  } // for i
1986 #endif
1987 }
1988 
1989 
1990 /*! For each non-zero block executes supplied function-predicate.
1991  Function returns if function-predicate returns true
1992 */
1993 template<typename T, typename BI, typename F>
1994 bool for_each_nzblock_if(T*** root, BI size1, F& f) BMNOEXCEPT
1995 {
1996  BI block_idx = 0;
1997  for (BI i = 0; i < size1; ++i)
1998  {
1999  T** blk_blk = root[i];
2000  if (!blk_blk)
2001  {
2002  block_idx += bm::set_sub_array_size;
2003  continue;
2004  }
2005  if (blk_blk == (T**)FULL_BLOCK_FAKE_ADDR)
2006  {
2007  for (unsigned j = 0; j < bm::set_sub_array_size; ++j, ++block_idx)
2008  {
2009  if (f(FULL_BLOCK_FAKE_ADDR, block_idx))
2010  return true;
2011  } // for j
2012  continue;
2013  }
2014 
2015  for (unsigned j = 0;j < bm::set_sub_array_size; ++j, ++block_idx)
2016  {
2017  if (blk_blk[j])
2018  if (f(blk_blk[j], block_idx))
2019  return true;
2020  } // for j
2021  } // for i
2022  return false;
2023 }
2024 
2025 /*! For each block executes supplied function.
2026 */
2027 template<class T, class F, typename BLOCK_IDX>
2028 void for_each_block(T*** root, unsigned size1, F& f, BLOCK_IDX start)
2029 {
2030  BLOCK_IDX block_idx = start;
2031  for (unsigned i = 0; i < size1; ++i)
2032  {
2033  T** blk_blk = root[i];
2034  if (blk_blk)
2035  {
2036  if (blk_blk == (T**)FULL_BLOCK_FAKE_ADDR)
2037  {
2038  for (unsigned j = 0; j < bm::set_sub_array_size; ++j, ++block_idx)
2039  {
2040  f(FULL_BLOCK_FAKE_ADDR, block_idx);
2041  }
2042  continue;
2043  }
2044  for (unsigned j = 0;j < bm::set_sub_array_size; ++j, ++block_idx)
2045  {
2046  f(blk_blk[j], block_idx);
2047  }
2048  }
2049  else
2050  {
2051  for (unsigned j = 0;j < bm::set_sub_array_size; ++j, ++block_idx)
2052  {
2053  f(0, block_idx);
2054  }
2055  }
2056  }
2057 }
2058 
2059 
2060 
2061 /*! Special BM optimized analog of STL for_each
2062 */
2063 template<class T, class F> F bmfor_each(T first, T last, F f)
2064 {
2065  do
2066  {
2067  f(*first);
2068  ++first;
2069  } while (first < last);
2070  return f;
2071 }
2072 
2073 /*! Computes SUM of all elements of the sequence
2074 */
2075 template<typename T>
2076 bm::id64_t sum_arr(const T* first, const T* last) BMNOEXCEPT
2077 {
2078  bm::id64_t sum = 0;
2079  for (;first < last; ++first)
2080  sum += *first;
2081  return sum;
2082 }
2083 
2084 /*!
2085  Extract short (len=1) exceptions from the GAP block
2086  \param buf - GAP buffer to split
2087  \param arr0 - [OUT] list of isolates 0 positions (clear list)
2088  \param arr1 - [OUT] list of isolated 1 positions (set list)
2089  \param arr0_cnt - [OUT] arr0 size
2090  \param arr1_cnt -
2091  @ingroup gapfunc
2092 */
2093 template<typename T>
2094 void gap_split(const T* buf,
2095  T* arr0, T* arr1, T& arr0_cnt, T& arr1_cnt) BMNOEXCEPT
2096 {
2097  const T* pcurr = buf;
2098  unsigned len = (*pcurr >> 3);
2099  const T* pend = pcurr + len;
2100 
2101  T cnt0, cnt1;
2102  cnt0 = cnt1 = 0;
2103  unsigned is_set = (*buf & 1);
2104 
2105  if (*pcurr == 0)
2106  {
2107  if (is_set)
2108  {
2109  arr1[cnt1] = *pcurr;
2110  ++cnt1;
2111  }
2112  else
2113  {
2114  arr0[cnt0] = *pcurr;
2115  ++cnt0;
2116  }
2117  }
2118  T prev = *pcurr;
2119  ++pcurr;
2120 
2121  while (pcurr <= pend)
2122  {
2123  is_set ^= 1;
2124  T delta = *pcurr - prev;
2125  if (delta == 1)
2126  {
2127  if (is_set)
2128  {
2129  arr1[cnt1] = prev;
2130  ++cnt1;
2131  }
2132  else
2133  {
2134  arr0[cnt0] = prev;
2135  ++cnt0;
2136  }
2137  }
2138  prev = *pcurr++;
2139  } // while
2140 
2141  arr0_cnt = cnt0;
2142  arr1_cnt = cnt1;
2143 }
2144 
2145 
2146 /*!
2147  \brief Calculates number of bits ON in GAP buffer.
2148  \param buf - GAP buffer pointer.
2149  \param dsize - buffer size
2150  \return Number of non-zero bits.
2151  @ingroup gapfunc
2152 */
2153 template<typename T>
2154 unsigned gap_bit_count(const T* buf, unsigned dsize=0) BMNOEXCEPT
2155 {
2156  const T* pcurr = buf;
2157  if (dsize == 0)
2158  dsize = (*pcurr >> 3);
2159 
2160  const T* pend = pcurr + dsize;
2161 
2162  unsigned bits_counter = 0;
2163  ++pcurr;
2164 
2165  if (*buf & 1)
2166  {
2167  bits_counter += *pcurr + 1;
2168  ++pcurr;
2169  }
2170  for (++pcurr; pcurr <= pend; pcurr += 2)
2171  bits_counter += *pcurr - *(pcurr-1);
2172  return bits_counter;
2173 }
2174 
2175 /*!
2176  \brief Calculates number of bits ON in GAP buffer. Loop unrolled version.
2177  \param buf - GAP buffer pointer.
2178  \return Number of non-zero bits.
2179  @ingroup gapfunc
2180 */
2181 template<typename T>
2182 unsigned gap_bit_count_unr(const T* buf) BMNOEXCEPT
2183 {
2184  const T* pcurr = buf;
2185  unsigned dsize = (*pcurr >> 3);
2186 
2187  unsigned cnt = 0;
2188  pcurr = buf + 1; // set up start position
2189  T first_one = *buf & 1;
2190  if (first_one)
2191  {
2192  cnt += *pcurr + 1;
2193  ++pcurr;
2194  }
2195  ++pcurr; // set GAP to 1
2196 
2197  #if defined(BMAVX2OPT) || defined(BMAVX512OPT)
2198  if (dsize > 34)
2199  {
2200  const unsigned unr_factor = 32;
2201  unsigned waves = (dsize-2) / unr_factor;
2202  pcurr = avx2_gap_sum_arr(pcurr, waves, &cnt);
2203  }
2204  #elif defined(BMSSE42OPT) || defined(BMSSE2OPT)
2205  if (dsize > 18)
2206  {
2207  const unsigned unr_factor = 16;
2208  unsigned waves = (dsize - 2) / unr_factor;
2209  pcurr = sse2_gap_sum_arr(pcurr, waves, &cnt);
2210  }
2211  #else
2212  if (dsize > 10)
2213  {
2214  const unsigned unr_factor = 8;
2215  unsigned waves = (dsize - 2) / unr_factor;
2216  for (unsigned i = 0; i < waves; i += unr_factor)
2217  {
2218  cnt += pcurr[0] - pcurr[0 - 1];
2219  cnt += pcurr[2] - pcurr[2 - 1];
2220  cnt += pcurr[4] - pcurr[4 - 1];
2221  cnt += pcurr[6] - pcurr[6 - 1];
2222 
2223  pcurr += unr_factor;
2224  } // for
2225  }
2226  #endif
2227 
2228  const T* pend = buf + dsize;
2229  for ( ; pcurr <= pend ; pcurr+=2)
2230  {
2231  cnt += *pcurr - *(pcurr - 1);
2232  }
2233  BM_ASSERT(cnt == bm::gap_bit_count(buf));
2234  return cnt;
2235 }
2236 
2237 
2238 
2239 /*!
2240  \brief Counts 1 bits in GAP buffer in the closed [left, right] range.
2241  \param buf - GAP buffer pointer.
2242  \param left - leftmost bit index to start from
2243  \param right- rightmost bit index
2244  \return Number of non-zero bits.
2245  @ingroup gapfunc
2246 */
2247 template<typename T>
2248 unsigned gap_bit_count_range(const T* const buf,
2249  unsigned left, unsigned right) BMNOEXCEPT
2250 {
2251  BM_ASSERT(left <= right);
2252  BM_ASSERT(right < bm::gap_max_bits);
2253 
2254  const T* pcurr = buf;
2255  const T* pend = pcurr + (*pcurr >> 3);
2256 
2257  unsigned bits_counter = 0;
2258  unsigned is_set;
2259  unsigned start_pos = bm::gap_bfind(buf, left, &is_set);
2260  is_set = ~(is_set - 1u); // 0xFFF.. if true (mask for branchless code)
2261 
2262  pcurr = buf + start_pos;
2263  if (right <= *pcurr) // we are in the target gap right now
2264  {
2265  bits_counter = unsigned(right - left + 1u) & is_set; // & is_set == if(is_set)
2266  return bits_counter;
2267  }
2268  bits_counter += unsigned(*pcurr - left + 1u) & is_set;
2269 
2270  unsigned prev_gap = *pcurr++;
2271  for (is_set ^= ~0u; right > *pcurr; is_set ^= ~0u)
2272  {
2273  bits_counter += (*pcurr - prev_gap) & is_set;
2274  if (pcurr == pend)
2275  return bits_counter;
2276  prev_gap = *pcurr++;
2277  }
2278  bits_counter += unsigned(right - prev_gap) & is_set;
2279  return bits_counter;
2280 }
2281 
2282 /*!
2283  \brief Test if all bits are 1 in GAP buffer in the [left, right] range.
2284  \param buf - GAP buffer pointer.
2285  \param left - leftmost bit index to start from
2286  \param right- rightmost bit index
2287  \return true if all bits are "11111"
2288  @ingroup gapfunc
2289 */
2290 template<typename T>
2291 bool gap_is_all_one_range(const T* const BMRESTRICT buf,
2292  unsigned left, unsigned right) BMNOEXCEPT
2293 {
2294  BM_ASSERT(left <= right);
2295  BM_ASSERT(right < bm::gap_max_bits);
2296 
2297  unsigned is_set;
2298  unsigned start_pos = bm::gap_bfind(buf, left, &is_set);
2299  if (!is_set) // GAP is 0
2300  return false;
2301  const T* const pcurr = buf + start_pos;
2302  return (right <= *pcurr);
2303 }
2304 
2305 /*!
2306  \brief Test if any bits are 1 in GAP buffer in the [left, right] range.
2307  \param buf - GAP buffer pointer.
2308  \param left - leftmost bit index to start from
2309  \param right- rightmost bit index
2310  \return true if at least 1 "00010"
2311  @ingroup gapfunc
2312 */
2313 template<typename T>
2314 bool gap_any_range(const T* const BMRESTRICT buf,
2315  unsigned left, unsigned right) BMNOEXCEPT
2316 {
2317  BM_ASSERT(left <= right);
2318  BM_ASSERT(right < bm::gap_max_bits);
2319 
2320  unsigned is_set;
2321  unsigned start_pos = bm::gap_bfind(buf, left, &is_set);
2322  const T* const pcurr = buf + start_pos;
2323 
2324  if (!is_set) // start GAP is 0 ...
2325  {
2326  if (right <= *pcurr) // ...bit if the interval goes into at least 1 blk
2327  return false; // .. nope
2328  return true;
2329  }
2330  return true;
2331 }
2332 
2333 /*!
2334  \brief Test if any bits are 1 in GAP buffer in the [left, right] range
2335  and flanked with 0s
2336  \param buf - GAP buffer pointer.
2337  \param left - leftmost bit index to start from
2338  \param right- rightmost bit index
2339  \return true if "011110"
2340  @ingroup gapfunc
2341 */
2342 template<typename T>
2343 bool gap_is_interval(const T* const BMRESTRICT buf,
2344  unsigned left, unsigned right) BMNOEXCEPT
2345 {
2346  BM_ASSERT(left <= right);
2347  BM_ASSERT(left > 0); // cannot check left-1 otherwise
2348  BM_ASSERT(right < bm::gap_max_bits-1); // cannot check right+1 otherwise
2349 
2350  unsigned is_set;
2351  unsigned start_pos = bm::gap_bfind(buf, left, &is_set);
2352 
2353  const T* pcurr = buf + start_pos;
2354  if (!is_set || (right != *pcurr) || (start_pos <= 1))
2355  return false;
2356  --pcurr;
2357  if (*pcurr != left-1)
2358  return false;
2359  return true;
2360 }
2361 
2362 /**
2363  \brief Searches for the last 1 bit in the 111 interval of a GAP block
2364  \param buf - BIT block buffer
2365  \param nbit - bit index to start checking from
2366  \param pos - [out] found value
2367 
2368  \return false if not found
2369  @ingroup gapfunc
2370 */
2371 template<typename T>
2372 bool gap_find_interval_end(const T* const BMRESTRICT buf,
2373  unsigned nbit, unsigned* BMRESTRICT pos) BMNOEXCEPT
2374 {
2375  BM_ASSERT(pos);
2376  BM_ASSERT(nbit < bm::gap_max_bits);
2377 
2378  unsigned is_set;
2379  unsigned start_pos = bm::gap_bfind(buf, nbit, &is_set);
2380  if (!is_set)
2381  return false;
2382  *pos = buf[start_pos];
2383  return true;
2384 }
2385 
2386 
2387 /**
2388  \brief Searches for the first 1 bit in the 111 interval of a GAP block
2389  \param buf - GAP block buffer
2390  \param nbit - bit index to start checking from
2391  \param pos - [out] found value
2392 
2393  \return false if not found
2394  @ingroup gapfunc
2395 */
2396 template<typename T>
2397 bool gap_find_interval_start(const T* const BMRESTRICT buf,
2398  unsigned nbit, unsigned* BMRESTRICT pos) BMNOEXCEPT
2399 {
2400  BM_ASSERT(pos);
2401  BM_ASSERT(nbit < bm::gap_max_bits);
2402 
2403  unsigned is_set;
2404  unsigned start_pos = bm::gap_bfind(buf, nbit, &is_set);
2405  if (!is_set)
2406  return false;
2407  --start_pos;
2408  if (!start_pos)
2409  *pos = 0;
2410  else
2411  *pos = buf[start_pos]+1;
2412  return true;
2413 }
2414 
2415 /**
2416  \brief reverse search for the first 1 bit of a GAP block
2417  \param buf - GAP block buffer
2418  \param nbit - bit index to start checking from
2419  \param pos - [out] found value
2420 
2421  \return false if not found
2422  @ingroup gapfunc
2423 */
2424 template<typename T>
2425 bool gap_find_prev(const T* const BMRESTRICT buf,
2426  unsigned nbit, unsigned* BMRESTRICT pos) BMNOEXCEPT
2427 {
2428  BM_ASSERT(pos);
2429  BM_ASSERT(nbit < bm::gap_max_bits);
2430 
2431  unsigned is_set;
2432  unsigned start_pos = bm::gap_bfind(buf, nbit, &is_set);
2433  if (is_set)
2434  {
2435  *pos = nbit;
2436  return true;
2437  }
2438  --start_pos;
2439  if (!start_pos)
2440  return false;
2441  else
2442  *pos = buf[start_pos];
2443  return true;
2444 }
2445 
2446 
2447 
2448 /*!
2449  \brief GAP block find position for the rank
2450 
2451  \param block - bit block buffer pointer
2452  \param rank - rank to find (must be > 0)
2453  \param nbit_from - start bit position in block
2454  \param nbit_pos - found position
2455 
2456  \return 0 if position with rank was found, or
2457  the remaining rank (rank - population count)
2458 
2459  @ingroup gapfunc
2460 */
2461 template<typename T, typename SIZE_TYPE>
2462 SIZE_TYPE gap_find_rank(const T* const block,
2463  SIZE_TYPE rank,
2464  unsigned nbit_from,
2465  unsigned& nbit_pos) BMNOEXCEPT
2466 {
2467  BM_ASSERT(block);
2468  BM_ASSERT(rank);
2469 
2470  const T* pcurr = block;
2471  const T* pend = pcurr + (*pcurr >> 3);
2472 
2473  unsigned bits_counter = 0;
2474  unsigned is_set;
2475  unsigned start_pos = bm::gap_bfind(block, nbit_from, &is_set);
2476  is_set = ~(is_set - 1u); // 0xFFF.. if true (mask for branchless code)
2477 
2478  pcurr = block + start_pos;
2479  bits_counter += unsigned(*pcurr - nbit_from + 1u) & is_set;
2480  if (bits_counter >= rank) // found!
2481  {
2482  nbit_pos = nbit_from + unsigned(rank) - 1u;
2483  return 0;
2484  }
2485  rank -= bits_counter;
2486  unsigned prev_gap = *pcurr++;
2487  for (is_set ^= ~0u; pcurr <= pend; is_set ^= ~0u)
2488  {
2489  bits_counter = (*pcurr - prev_gap) & is_set;
2490  if (bits_counter >= rank) // found!
2491  {
2492  nbit_pos = prev_gap + unsigned(rank);
2493  return 0;
2494  }
2495  rank -= bits_counter;
2496  prev_gap = *pcurr++;
2497  } // for
2498 
2499  return rank;
2500 }
2501 
2502 
2503 
2504 /*!
2505  \brief Counts 1 bits in GAP buffer in the closed [0, right] range.
2506  \param buf - GAP buffer pointer.
2507  \param right- rightmost bit index
2508  \param is_corrected - if true the result will be rank corrected
2509  if right bit == true count=count-1
2510  \return Number of non-zero bits
2511  @ingroup gapfunc
2512 */
2513 template<typename T>
2514 unsigned gap_bit_count_to(const T* const buf, T right,
2515  bool is_corrected=false) BMNOEXCEPT
2516 {
2517  const T* pcurr = buf;
2518  const T* pend = pcurr + (*pcurr >> 3);
2519 
2520  unsigned bits_counter = 0;
2521  unsigned is_set = ~((unsigned(*buf) & 1u) - 1u); // 0xFFF.. if true (mask for branchless code)
2522  BM_ASSERT(is_set == 0u || is_set == ~0u);
2523  pcurr = buf + 1;
2524 
2525  if (right <= *pcurr) // we are in the target block right now
2526  {
2527  bits_counter = (right + 1u) & is_set; // & is_set == if (is_set)
2528  bits_counter -= (is_set & unsigned(is_corrected));
2529  return bits_counter;
2530  }
2531  bits_counter += (*pcurr + 1u) & is_set;
2532 
2533  unsigned prev_gap = *pcurr++;
2534  for (is_set ^= ~0u; right > *pcurr; is_set ^= ~0u)
2535  {
2536  bits_counter += (*pcurr - prev_gap) & is_set;
2537  if (pcurr == pend)
2538  {
2539  bits_counter -= (is_set & unsigned(is_corrected));
2540  return bits_counter;
2541  }
2542  prev_gap = *pcurr++;
2543  }
2544  bits_counter += (right - prev_gap) & is_set;
2545  bits_counter -= (is_set & unsigned(is_corrected));
2546  return bits_counter;
2547 }
2548 
2549 
2550 /*!
2551  D-GAP block for_each algorithm
2552 
2553  D-Gap Functor is called for each element but last one.
2554 
2555  \param gap_buf - GAP buffer
2556  \param func - functor object
2557 
2558 */
2559 template<class T, class Func>
2560 void for_each_dgap(const T* gap_buf, Func& func)
2561 {
2562  const T* pcurr = gap_buf;
2563  const T* pend = pcurr + (*pcurr >> 3);
2564  ++pcurr;
2565 
2566  T prev = *pcurr;
2567  func((T)(prev + 1)); // first element incremented to avoid 0
2568  ++pcurr;
2569  do
2570  {
2571  func((T)(*pcurr - prev)); // all others are [N] - [N-1]
2572  prev = *pcurr;
2573  } while (++pcurr < pend);
2574 }
2575 
2576 /** d-Gap copy functor
2577  @internal
2578 */
2579 template<typename T> struct d_copy_func
2580 {
2581  d_copy_func(T* dg_buf) : dgap_buf_(dg_buf) {}
2582  void operator()(T dgap) { *dgap_buf_++ = dgap; }
2583 
2585 };
2586 
2587 /*!
2588  \brief Convert GAP buffer into D-GAP buffer
2589 
2590  Delta GAP representation is DGAP[N] = GAP[N] - GAP[N-1]
2591 
2592  \param gap_buf - GAP buffer
2593  \param dgap_buf - Delta-GAP buffer
2594  \param copy_head - flag to copy GAP header
2595 
2596  \internal
2597 
2598  @ingroup gapfunc
2599 */
2600 template<typename T>
2601 T* gap_2_dgap(const T* BMRESTRICT gap_buf,
2602  T* BMRESTRICT dgap_buf, bool copy_head=true) BMNOEXCEPT
2603 {
2604  if (copy_head) // copy GAP header
2605  {
2606  *dgap_buf++ = *gap_buf;
2607  }
2608 
2609  d_copy_func<T> copy_func(dgap_buf);
2610  for_each_dgap<T, d_copy_func<T> >(gap_buf, copy_func);
2611  return copy_func.dgap_buf_;
2612 }
2613 
2614 /*!
2615  \brief Convert D-GAP buffer into GAP buffer
2616 
2617  GAP representation is GAP[N] = DGAP[N] + DGAP[N-1]
2618 
2619  \param dgap_buf - Delta-GAP buffer
2620  \param gap_header - GAP header word
2621  \param gap_buf - GAP buffer
2622 
2623  \internal
2624  @ingroup gapfunc
2625 */
2626 template<typename T>
2627 void dgap_2_gap(const T* BMRESTRICT dgap_buf,
2628  T* BMRESTRICT gap_buf, T gap_header=0) BMNOEXCEPT
2629 {
2630  const T* pcurr = dgap_buf;
2631  unsigned len;
2632  if (!gap_header) // GAP header is already part of the stream
2633  {
2634  len = *pcurr >> 3;
2635  *gap_buf++ = *pcurr++; // copy GAP header
2636  }
2637  else // GAP header passed as a parameter
2638  {
2639  len = gap_header >> 3;
2640  *gap_buf++ = gap_header; // assign GAP header
2641  }
2642  --len; // last element is actually not encoded
2643  const T* pend = pcurr + len;
2644 
2645  *gap_buf = *pcurr++; // copy first element
2646  if (*gap_buf == 0)
2647  *gap_buf = 65535; // fix +1 overflow
2648  else
2649  *gap_buf = T(*gap_buf - 1);
2650 
2651  for (++gap_buf; pcurr < pend; ++pcurr)
2652  {
2653  T prev = *(gap_buf-1); // don't remove temp(undef expression!)
2654  *gap_buf++ = T(*pcurr + prev);
2655  }
2656  *gap_buf = 65535; // add missing last element
2657 }
2658 
2659 
2660 /*!
2661  \brief Lexicographical comparison of GAP buffers.
2662  \param buf1 - First GAP buffer pointer.
2663  \param buf2 - Second GAP buffer pointer.
2664  \return <0 - less, =0 - equal, >0 - greater.
2665 
2666  @ingroup gapfunc
2667 */
2668 template<typename T>
2669 int gapcmp(const T* buf1, const T* buf2) BMNOEXCEPT
2670 {
2671  const T* pcurr1 = buf1;
2672  const T* pend1 = pcurr1 + (*pcurr1 >> 3);
2673  unsigned bitval1 = *buf1 & 1;
2674  ++pcurr1;
2675 
2676  const T* pcurr2 = buf2;
2677  unsigned bitval2 = *buf2 & 1;
2678  ++pcurr2;
2679 
2680  while (pcurr1 <= pend1)
2681  {
2682  if (*pcurr1 == *pcurr2)
2683  {
2684  if (bitval1 != bitval2)
2685  {
2686  return (bitval1) ? 1 : -1;
2687  }
2688  }
2689  else
2690  {
2691  if (bitval1 == bitval2)
2692  {
2693  if (bitval1)
2694  {
2695  return (*pcurr1 < *pcurr2) ? -1 : 1;
2696  }
2697  else
2698  {
2699  return (*pcurr1 < *pcurr2) ? 1 : -1;
2700  }
2701  }
2702  else
2703  {
2704  return (bitval1) ? 1 : -1;
2705  }
2706  }
2707  ++pcurr1; ++pcurr2;
2708  bitval1 ^= 1;
2709  bitval2 ^= 1;
2710  }
2711 
2712  return 0;
2713 }
2714 
2715 /*!
2716  \brief Find first bit which is different between two GAP-blocks
2717  \param buf1 - block 1
2718  \param buf2 - block 2
2719  \param pos - out - position of difference (undefined if blocks are equal)
2720  \return true if difference was found
2721 
2722  @ingroup gapfunc
2723 */
2724 template<typename T>
2725 bool gap_find_first_diff(const T* BMRESTRICT buf1,
2726  const T* BMRESTRICT buf2,
2727  unsigned* BMRESTRICT pos) BMNOEXCEPT
2728 {
2729  BM_ASSERT(buf1 && buf2 && pos);
2730 
2731  const T* pcurr1 = buf1;
2732  const T* pend1 = pcurr1 + (*pcurr1 >> 3);
2733  const T* pcurr2 = buf2;
2734  for (++pcurr1, ++pcurr2; pcurr1 <= pend1; ++pcurr1, ++pcurr2)
2735  {
2736  if (*pcurr1 != *pcurr2)
2737  {
2738  *pos = 1 + ((*pcurr1 < *pcurr2) ? *pcurr1 : *pcurr2);
2739  return true;
2740  }
2741  } // for
2742  return false;
2743 }
2744 
2745 // -------------------------------------------------------------------------
2746 //
2747 
2748 /*!
2749  \brief Abstract operation for GAP buffers.
2750  Receives functor F as a template argument
2751  \param dest - destination memory buffer.
2752  \param vect1 - operand 1 GAP encoded buffer.
2753  \param vect1_mask - XOR mask for starting bitflag for vector1
2754  can be 0 or 1 (1 inverts the vector)
2755  \param vect2 - operand 2 GAP encoded buffer.
2756  \param vect2_mask - same as vect1_mask
2757  \param dlen - destination length after the operation
2758 
2759  \note Internal function.
2760  @internal
2761 
2762  @ingroup gapfunc
2763 */
2764 template<typename T, class F>
2765 void gap_buff_op(T* BMRESTRICT dest,
2766  const T* BMRESTRICT vect1,
2767  unsigned vect1_mask,
2768  const T* BMRESTRICT vect2,
2769  unsigned vect2_mask,
2770  unsigned& dlen) BMNOEXCEPT2
2771 {
2772  const T* cur1 = vect1;
2773  const T* cur2 = vect2;
2774 
2775  T bitval1 = (T)((*cur1++ & 1) ^ vect1_mask);
2776  T bitval2 = (T)((*cur2++ & 1) ^ vect2_mask);
2777 
2778  T bitval = (T) F::op(bitval1, bitval2);
2779  T bitval_prev = bitval;
2780 
2781  T* res = dest;
2782  *res = bitval;
2783  ++res;
2784 
2785  T c1 = *cur1; T c2 = *cur2;
2786  while (1)
2787  {
2788  bitval = (T) F::op(bitval1, bitval2);
2789 
2790  // Check if GAP value changes and we need to
2791  // start the next one
2792  //
2793  res += (bitval != bitval_prev);
2794  bitval_prev = bitval;
2795  if (c1 < c2) // (*cur1 < *cur2)
2796  {
2797  *res = c1;
2798  ++cur1; c1 = *cur1;
2799  bitval1 ^= 1;
2800  }
2801  else // >=
2802  {
2803  *res = c2;
2804  if (c2 < c1) // (*cur2 < *cur1)
2805  {
2806  bitval2 ^= 1;
2807  }
2808  else // equal
2809  {
2810  if (c2 == (bm::gap_max_bits - 1))
2811  break;
2812 
2813  ++cur1; c1 = *cur1;
2814  bitval1 ^= 1; bitval2 ^= 1;
2815  }
2816  ++cur2; c2 = *cur2;
2817  }
2818  } // while
2819 
2820  dlen = (unsigned)(res - dest);
2821  *dest = (T)((*dest & 7) + (dlen << 3));
2822 }
2823 
2824 
2825 /*!
2826  \brief Abstract operation for GAP buffers (predicts legth)
2827  Receives functor F as a template argument
2828  \param vect1 - operand 1 GAP encoded buffer.
2829  \param vect2 - operand 2 GAP encoded buffer.
2830  \param dlen - destination length after the operation
2831  \param limit - maximum target length limit,
2832  returns false if limit is reached
2833  \return true if operation would be successfull or
2834  false if limit reached
2835 
2836  \note Internal function.
2837  @internal
2838 
2839  @ingroup gapfunc
2840 */
2841 /*
2842 template<typename T, class F>
2843 bool gap_buff_dry_op(const T* BMRESTRICT vect1,
2844  const T* BMRESTRICT vect2,
2845  unsigned& dlen,
2846  unsigned limit) BMNOEXCEPT2
2847 {
2848  const T* cur1 = vect1;
2849  const T* cur2 = vect2;
2850 
2851  T bitval1 = (T)((*cur1++ & 1));
2852  T bitval2 = (T)((*cur2++ & 1));
2853 
2854  T bitval = (T)F::op(bitval1, bitval2);
2855  T bitval_prev = bitval;
2856 
2857  unsigned len = 1;
2858 
2859  T c1 = *cur1; T c2 = *cur2;
2860  while (1)
2861  {
2862  bitval = (T) F::op(bitval1, bitval2);
2863 
2864  // Check if GAP value changes and we need to
2865  // start the next one
2866  //
2867  len += (bitval != bitval_prev);
2868  if (len > limit)
2869  return false;
2870  bitval_prev = bitval;
2871  if (c1 < c2)
2872  {
2873  ++cur1; c1 = *cur1;
2874  bitval1 ^= 1;
2875  }
2876  else // >=
2877  {
2878  if (c2 < c1) // (*cur2 < *cur1)
2879  {
2880  bitval2 ^= 1;
2881  }
2882  else // equal
2883  {
2884  if (c2 == (bm::gap_max_bits - 1))
2885  break;
2886 
2887  ++cur1; c1 = *cur1;
2888  bitval1 ^= 1; bitval2 ^= 1;
2889  }
2890  ++cur2; c2 = *cur2;
2891  }
2892 
2893  } // while
2894 
2895  dlen = len;
2896  return true;
2897 }
2898 */
2899 
2900 /*!
2901  \brief Abstract distance test operation for GAP buffers.
2902  Receives functor F as a template argument
2903  \param vect1 - operand 1 GAP encoded buffer.
2904  \param vect1_mask - XOR mask for starting bitflag for vector1
2905  can be 0 or 1 (1 inverts the vector)
2906  \param vect2 - operand 2 GAP encoded buffer.
2907  \param vect2_mask - same as vect1_mask
2908  \note Internal function.
2909  \return non zero value if operation result returns any 1 bit
2910 
2911  @ingroup gapfunc
2912 */
2913 template<typename T, class F>
2914 unsigned gap_buff_any_op(const T* BMRESTRICT vect1,
2915  unsigned vect1_mask,
2916  const T* BMRESTRICT vect2,
2917  unsigned vect2_mask) BMNOEXCEPT2
2918 {
2919  const T* cur1 = vect1;
2920  const T* cur2 = vect2;
2921 
2922  unsigned bitval1 = (*cur1++ & 1) ^ vect1_mask;
2923  unsigned bitval2 = (*cur2++ & 1) ^ vect2_mask;
2924 
2925  unsigned bitval = F::op(bitval1, bitval2);
2926  if (bitval)
2927  return bitval;
2928  unsigned bitval_prev = bitval;
2929 
2930  while (1)
2931  {
2932  bitval = F::op(bitval1, bitval2);
2933  if (bitval)
2934  return bitval;
2935 
2936  if (bitval != bitval_prev)
2937  bitval_prev = bitval;
2938 
2939  if (*cur1 < *cur2)
2940  {
2941  ++cur1;
2942  bitval1 ^= 1;
2943  }
2944  else // >=
2945  {
2946  if (*cur2 < *cur1)
2947  {
2948  bitval2 ^= 1;
2949  }
2950  else // equal
2951  {
2952  if (*cur2 == (bm::gap_max_bits - 1))
2953  {
2954  break;
2955  }
2956  ++cur1;
2957  bitval1 ^= 1; bitval2 ^= 1;
2958  }
2959  ++cur2;
2960  }
2961 
2962  } // while
2963 
2964  return 0;
2965 }
2966 
2967 
2968 
2969 /*!
2970  \brief Abstract distance(similarity) operation for GAP buffers.
2971  Receives functor F as a template argument
2972  \param vect1 - operand 1 GAP encoded buffer.
2973  \param vect2 - operand 2 GAP encoded buffer.
2974  \note Internal function.
2975 
2976  @ingroup gapfunc
2977 */
2978 template<typename T, class F>
2979 unsigned gap_buff_count_op(const T* vect1, const T* vect2) BMNOEXCEPT2
2980 {
2981  unsigned count;// = 0;
2982  const T* cur1 = vect1;
2983  const T* cur2 = vect2;
2984 
2985  unsigned bitval1 = (*cur1++ & 1);
2986  unsigned bitval2 = (*cur2++ & 1);
2987  unsigned bitval = count = F::op(bitval1, bitval2);
2988  unsigned bitval_prev = bitval;
2989 
2990  T res, res_prev;
2991  res = res_prev = 0;
2992 
2993  while (1)
2994  {
2995  bitval = F::op(bitval1, bitval2);
2996  // Check if GAP value changes and we need to
2997  // start the next one.
2998  if (bitval != bitval_prev)
2999  {
3000  bitval_prev = bitval;
3001  res_prev = res;
3002  }
3003 
3004  if (*cur1 < *cur2)
3005  {
3006  res = *cur1;
3007  if (bitval)
3008  {
3009  count += res - res_prev;
3010  res_prev = res;
3011  }
3012  ++cur1; bitval1 ^= 1;
3013  }
3014  else // >=
3015  {
3016  res = *cur2;
3017  if (bitval)
3018  {
3019  count += res - res_prev;
3020  res_prev = res;
3021  }
3022  if (*cur2 < *cur1)
3023  {
3024  bitval2 ^= 1;
3025  }
3026  else // equal
3027  {
3028  if (*cur2 == (bm::gap_max_bits - 1))
3029  break;
3030 
3031  ++cur1;
3032  bitval1 ^= 1; bitval2 ^= 1;
3033  }
3034  ++cur2;
3035  }
3036 
3037  } // while
3038 
3039  return count;
3040 }
3041 
3042 
3043 #ifdef __GNUG__
3044 #pragma GCC diagnostic push
3045 #pragma GCC diagnostic ignored "-Wconversion"
3046 #endif
3047 
3048 /*!
3049  \brief Sets or clears bit in the GAP buffer.
3050 
3051  \param val - new bit value
3052  \param buf - GAP buffer.
3053  \param pos - Index of bit to set.
3054  \param is_set - (OUT) flag if bit was actually set.
3055 
3056  \return New GAP buffer length.
3057 
3058  @ingroup gapfunc
3059 */
3060 template<typename T>
3061 unsigned gap_set_value(unsigned val,
3062  T* BMRESTRICT buf,
3063  unsigned pos,
3064  unsigned* BMRESTRICT is_set) BMNOEXCEPT
3065 {
3066  BM_ASSERT(pos < bm::gap_max_bits);
3067 
3068  unsigned curr = bm::gap_bfind(buf, pos, is_set);
3069  T end = (T)(*buf >> 3);
3070  if (*is_set == val)
3071  {
3072  *is_set = 0;
3073  return end;
3074  }
3075  *is_set = 1;
3076 
3077  T* pcurr = buf + curr;
3078  T* pprev = pcurr - 1;
3079  T* pend = buf + end;
3080 
3081  // Special case, first bit GAP operation. There is no platform beside it.
3082  // initial flag must be inverted.
3083  if (!pos)
3084  {
3085  *buf ^= 1;
3086  if (buf[1]) // We need to insert a 1 bit GAP here
3087  {
3088  ::memmove(&buf[2], &buf[1], (end - 1) * sizeof(gap_word_t));
3089  buf[1] = 0;
3090  ++end;
3091  }
3092  else // Only 1 bit in the GAP. We need to delete the first GAP.
3093  {
3094  pprev = buf + 1; pcurr = pprev + 1;
3095  goto copy_gaps;
3096  }
3097  }
3098  else
3099  if (curr > 1 && ((unsigned)(*pprev))+1 == pos) // Left border bit
3100  {
3101  ++(*pprev);
3102  if (*pprev == *pcurr) // Curr. GAP to be merged with prev.GAP.
3103  {
3104  --end;
3105  if (pcurr != pend) // GAP merge: 2 GAPS to be deleted
3106  {
3107  ++pcurr;
3108  copy_gaps:
3109  --end;
3110  do { *pprev++ = *pcurr++; } while (pcurr < pend);
3111  }
3112  }
3113  }
3114  else
3115  if (*pcurr == pos) // Rightmost bit in the GAP. Border goes left.
3116  {
3117  --(*pcurr);
3118  end += (pcurr == pend);
3119  }
3120  else // Worst case: split current GAP
3121  {
3122  if (*pcurr != bm::gap_max_bits-1) // last gap does not need memmove
3123  ::memmove(pcurr+2, pcurr, (end - curr + 1)*(sizeof(T)));
3124  end += 2;
3125  pcurr[0] = (T)(pos-1);
3126  pcurr[1] = (T)pos;
3127  }
3128 
3129  // Set correct length word and last border word
3130  *buf = (T)((*buf & 7) + (end << 3));
3131  buf[end] = bm::gap_max_bits-1;
3132  return end;
3133 }
3134 
3135 /*!
3136  \brief Sets or clears bit in the GAP buffer.
3137 
3138  \param val - new bit value
3139  \param buf - GAP buffer.
3140  \param pos - Index of bit to set.
3141 
3142  \return New GAP buffer length.
3143 
3144  @ingroup gapfunc
3145 */
3146 template<typename T>
3147 unsigned gap_set_value(unsigned val,
3148  T* BMRESTRICT buf,
3149  unsigned pos) BMNOEXCEPT
3150 {
3151  BM_ASSERT(pos < bm::gap_max_bits);
3152  unsigned is_set;
3153  unsigned curr = bm::gap_bfind(buf, pos, &is_set);
3154  T end = (T)(*buf >> 3);
3155  if (is_set == val)
3156  return end;
3157 
3158  T* pcurr = buf + curr;
3159  T* pprev = pcurr - 1;
3160  T* pend = buf + end;
3161 
3162  // Special case, first bit GAP operation. There is no platform beside it.
3163  // initial flag must be inverted.
3164  if (!pos)
3165  {
3166  *buf ^= 1;
3167  if (buf[1]) // We need to insert a 1 bit GAP here
3168  {
3169  ::memmove(&buf[2], &buf[1], (end - 1) * sizeof(gap_word_t));
3170  buf[1] = 0;
3171  ++end;
3172  }
3173  else // Only 1 bit in the GAP. We need to delete the first GAP.
3174  {
3175  pprev = buf + 1; pcurr = pprev + 1;
3176  goto copy_gaps;
3177  }
3178  }
3179  else
3180  if (curr > 1 && ((unsigned)(*pprev))+1 == pos) // Left border bit
3181  {
3182  ++(*pprev);
3183  if (*pprev == *pcurr) // Curr. GAP to be merged with prev.GAP.
3184  {
3185  --end;
3186  if (pcurr != pend) // GAP merge: 2 GAPS to be deleted
3187  {
3188  ++pcurr;
3189  copy_gaps:
3190  --end;
3191  do { *pprev++ = *pcurr++; } while (pcurr < pend);
3192  }
3193  }
3194  }
3195  else
3196  if (*pcurr == pos) // Rightmost bit in the GAP. Border goes left.
3197  {
3198  --(*pcurr);
3199  end += (pcurr == pend);
3200  }
3201  else // Worst case: split current GAP
3202  {
3203  if (*pcurr != bm::gap_max_bits-1) // last gap does not need memmove
3204  ::memmove(pcurr+2, pcurr, (end - curr + 1)*(sizeof(T)));
3205  end += 2;
3206  pcurr[0] = (T)(pos-1);
3207  pcurr[1] = (T)pos;
3208  }
3209 
3210  // Set correct length word and last border word
3211  *buf = (T)((*buf & 7) + (end << 3));
3212  buf[end] = bm::gap_max_bits-1;
3213  return end;
3214 }
3215 
3216 /*!
3217  \brief Add new value to the end of GAP buffer.
3218 
3219  \param buf - GAP buffer.
3220  \param pos - Index of bit to set.
3221 
3222  \return New GAP buffer length.
3223 
3224  @ingroup gapfunc
3225 */
3226 template<typename T>
3227 unsigned gap_add_value(T* buf, unsigned pos) BMNOEXCEPT
3228 {
3229  BM_ASSERT(pos < bm::gap_max_bits);
3230 
3231  T end = (T)(*buf >> 3);
3232  T curr = end;
3233  T* pcurr = buf + end;
3234  T* pend = pcurr;
3235  T* pprev = pcurr - 1;
3236 
3237  // Special case, first bit GAP operation. There is no platform beside it.
3238  // initial flag must be inverted.
3239  if (!pos)
3240  {
3241  *buf ^= 1;
3242  if ( buf[1] ) // We need to insert a 1 bit platform here.
3243  {
3244  ::memmove(&buf[2], &buf[1], (end - 1) * sizeof(gap_word_t));
3245  buf[1] = 0;
3246  ++end;
3247  }
3248  else // Only 1 bit in the GAP. We need to delete the first GAP.
3249  {
3250  pprev = buf + 1; pcurr = pprev + 1;
3251  --end;
3252  do { *pprev++ = *pcurr++; } while (pcurr < pend);
3253  }
3254  }
3255  else if (((unsigned)(*pprev))+1 == pos && (curr > 1) ) // Left border bit
3256  {
3257  ++(*pprev);
3258  if (*pprev == *pcurr) // Curr. GAP to be merged with prev.GAP.
3259  {
3260  --end;
3261  BM_ASSERT(pcurr == pend);
3262  }
3263  }
3264  else if (*pcurr == pos) // Rightmost bit in the GAP. Border goes left.
3265  {
3266  --(*pcurr);
3267  end += (pcurr == pend);
3268  }
3269  else // Worst case we need to split current block.
3270  {
3271  pcurr[0] = (T)(pos-1);
3272  pcurr[1] = (T)pos;
3273  end = (T)(end+2);
3274  }
3275 
3276  // Set correct length word.
3277  *buf = (T)((*buf & 7) + (end << 3));
3278  buf[end] = bm::gap_max_bits - 1;
3279  return end;
3280 }
3281 
3282 #ifdef __GNUG__
3283 #pragma GCC diagnostic pop
3284 #endif
3285 
3286 
3287 /*!
3288  @brief Right shift GAP block by 1 bit
3289  @param buf - block pointer
3290  @param co_flag - carry over from the previous block
3291  @param new_len - output length of the GAP block after the operation
3292 
3293  @return carry over bit (1 or 0)
3294  @ingroup gapfunc
3295 */
3296 template<typename T>
3298  unsigned co_flag, unsigned* BMRESTRICT new_len) BMNOEXCEPT
3299 {
3300  BM_ASSERT(new_len);
3301  bool co;
3302  // 1: increment all GAP values by 1
3303  {
3304  unsigned bitval = *buf & 1;
3305  if (buf[1] == bm::gap_max_bits-1) // full GAP block
3306  {
3307  co = bitval;
3308  }
3309  else
3310  {
3311  unsigned len = (*buf >> 3);
3312  unsigned i = 1;
3313  for (; i < len; ++i)
3314  {
3315  buf[i]++;
3316  bitval ^= 1;
3317  } // for i
3318  BM_ASSERT(buf[i] == bm::gap_max_bits-1);
3319  if (buf[i-1] == bm::gap_max_bits-1) // last element shifts out
3320  {
3321  // Set correct length word
3322  --len;
3323  *buf = (T)((*buf & 7) + (len << 3));
3324  }
3325  co = bitval;
3326  }
3327  }
3328  // set bit position 0 with carry-in flag
3329  {
3330  unsigned is_set;
3331  *new_len = bm::gap_set_value(co_flag, buf, 0, &is_set);
3332  }
3333  return co;
3334 }
3335 
3336 /*!
3337  @brief Left shift GAP block by 1 bit
3338  @param buf - block pointer
3339  @param co_flag - carry over from the previous block
3340  @param new_len - new length of the block
3341 
3342  @return carry over bit (1 or 0)
3343  @ingroup gapfunc
3344 */
3345 template<typename T>
3347  unsigned co_flag, unsigned* BMRESTRICT new_len) BMNOEXCEPT
3348 {
3349  BM_ASSERT(new_len);
3350  unsigned is_set;
3351 
3352  // 1: decrement all GAP values by 1
3353  //
3354  unsigned bitval = *buf & 1;
3355  bool co0 = bitval;
3356 
3357  if (!buf[1]) // cannot decrement (corner case)
3358  {
3359  bitval ^= 1;
3360  *new_len = bm::gap_set_value(bitval, buf, 0, &is_set);
3361 
3362  BM_ASSERT(is_set);
3363  BM_ASSERT(buf[1]);
3364  BM_ASSERT(bitval == unsigned(*buf & 1u));
3365 
3366  if (*new_len == 1)
3367  {
3368  *new_len = bm::gap_set_value(co_flag, buf,
3369  bm::gap_max_bits-1, &is_set);
3370  return co0;
3371  }
3372  }
3373  if (buf[1] != bm::gap_max_bits-1) // full GAP block
3374  {
3375  BM_ASSERT(buf[1]);
3376  unsigned len = (*buf >> 3);
3377  unsigned i = 1;
3378  for (; i < len; ++i)
3379  {
3380  buf[i]--;
3381  bitval ^= 1;
3382  } // for i
3383  BM_ASSERT(buf[i] == bm::gap_max_bits-1);
3384  }
3385  // 2: set last bit position with carry-in flag
3386  //
3387  *new_len = bm::gap_set_value(co_flag, buf, bm::gap_max_bits-1, &is_set);
3388  return co0;
3389 }
3390 
3391 
3392 /*!
3393  \brief Convert array to GAP buffer.
3394 
3395  \param buf - GAP buffer.
3396  \param arr - array of values to set
3397  \param len - length of the array
3398 
3399  \return New GAP buffer length.
3400 
3401  @ingroup gapfunc
3402 */
3403 
3404 template<typename T>
3405 unsigned gap_set_array(T* buf, const T* arr, unsigned len) BMNOEXCEPT
3406 {
3407  *buf = (T)((*buf & 6u) + (1u << 3)); // gap header setup
3408 
3409  T* pcurr = buf + 1;
3410 
3411  unsigned i = 0;
3412  T curr = arr[i];
3413  if (curr != 0) // need to add the first gap: (0 to arr[0]-1)
3414  {
3415  *pcurr = (T)(curr - 1);
3416  ++pcurr;
3417  }
3418  else
3419  {
3420  ++(*buf); // GAP starts with 1
3421  }
3422  T prev = curr;
3423  T acc = prev;
3424 
3425  for (i = 1; i < len; ++i)
3426  {
3427  curr = arr[i];
3428  if (curr == prev + 1)
3429  {
3430  ++acc;
3431  prev = curr;
3432  }
3433  else
3434  {
3435  *pcurr++ = acc;
3436  acc = curr;
3437  *pcurr++ = (T)(curr-1);
3438  }
3439  prev = curr;
3440  }
3441  *pcurr = acc;
3442  if (acc != bm::gap_max_bits - 1)
3443  {
3444  ++pcurr;
3445  *pcurr = bm::gap_max_bits - 1;
3446  }
3447 
3448  unsigned gap_len = unsigned(pcurr - buf);
3449  BM_ASSERT(gap_len == ((gap_len << 3) >> 3));
3450 
3451  *buf = (T)((*buf & 7) + (gap_len << 3));
3452  return gap_len+1;
3453 }
3454 
3455 
3456 //------------------------------------------------------------------------
3457 
3458 /**
3459  \brief Compute number of GAPs in bit-array
3460  \param arr - array of BITs
3461  \param len - array length
3462 
3463  @ingroup gapfunc
3464 */
3465 template<typename T>
3466 unsigned bit_array_compute_gaps(const T* arr, unsigned len) BMNOEXCEPT
3467 {
3468  unsigned gap_count = 1;
3469  T prev = arr[0];
3470  if (prev > 0)
3471  ++gap_count;
3472  for (unsigned i = 1; i < len; ++i)
3473  {
3474  T curr = arr[i];
3475  if (curr != prev + 1)
3476  {
3477  gap_count += 2;
3478  }
3479  prev = curr;
3480  }
3481  return gap_count;
3482 }
3483 
3484 
3485 //------------------------------------------------------------------------
3486 
3487 /**
3488  \brief Searches for the next 1 bit in the GAP block
3489  \param buf - GAP buffer
3490  \param nbit - bit index to start checking from.
3491  \param prev - returns previously checked value
3492 
3493  \return 0 if not found
3494 
3495  @ingroup gapfunc
3496 */
3497 template<typename T>
3498 unsigned gap_block_find(const T* BMRESTRICT buf,
3499  unsigned nbit,
3500  bm::id_t* BMRESTRICT prev) BMNOEXCEPT
3501 {
3502  BM_ASSERT(nbit < bm::gap_max_bits);
3503 
3504  unsigned bitval;
3505  unsigned gap_idx = bm::gap_bfind(buf, nbit, &bitval);
3506 
3507  if (bitval) // positive block.
3508  {
3509  *prev = nbit;
3510  return 1u;
3511  }
3512  unsigned val = buf[gap_idx] + 1;
3513  *prev = val;
3514  return (val != bm::gap_max_bits); // no bug here.
3515 }
3516 
3517 //------------------------------------------------------------------------
3518 
3519 
3520 /*!
3521  \brief Set 1 bit in a block
3522  @ingroup bitfunc
3523 */
3525 void set_bit(unsigned* dest, unsigned bitpos) BMNOEXCEPT
3526 {
3527  unsigned nbit = unsigned(bitpos & bm::set_block_mask);
3528  unsigned nword = unsigned(nbit >> bm::set_word_shift);
3529  nbit &= bm::set_word_mask;
3530  dest[nword] |= unsigned(1u << nbit);
3531 }
3532 
3533 /*!
3534  \brief Set 1 bit in a block
3535  @ingroup bitfunc
3536 */
3538 void clear_bit(unsigned* dest, unsigned bitpos) BMNOEXCEPT
3539 {
3540  unsigned nbit = unsigned(bitpos & bm::set_block_mask);
3541  unsigned nword = unsigned(nbit >> bm::set_word_shift);
3542  nbit &= bm::set_word_mask;
3543  dest[nword] &= ~(unsigned(1u << nbit));
3544 }
3545 
3546 /*!
3547  \brief Test 1 bit in a block
3548 
3549  @ingroup bitfunc
3550 */
3552 unsigned test_bit(const unsigned* block, unsigned bitpos) BMNOEXCEPT
3553 {
3554  unsigned nbit = unsigned(bitpos & bm::set_block_mask);
3555  unsigned nword = unsigned(nbit >> bm::set_word_shift);
3556  nbit &= bm::set_word_mask;
3557  return (block[nword] >> nbit) & 1u;
3558 }
3559 
3560 
3561 /*!
3562  \brief Sets bits to 1 in the bitblock.
3563  \param dest - Bitset buffer.
3564  \param bitpos - Offset of the start bit.
3565  \param bitcount - number of bits to set.
3566 
3567  @ingroup bitfunc
3568 */
3569 inline
3570 void or_bit_block(unsigned* dest, unsigned bitpos, unsigned bitcount) BMNOEXCEPT
3571 {
3572  const unsigned maskFF = ~0u;
3573 
3574  dest += unsigned(bitpos >> bm::set_word_shift); // nword
3575  bitpos &= bm::set_word_mask;
3576 
3577  if (bitcount == 1u) // special case (only 1 bit to set)
3578  {
3579  *dest |= (1u << bitpos);
3580  return;
3581  }
3582 
3583  if (bitpos) // starting pos is not aligned
3584  {
3585  unsigned mask_r = maskFF << bitpos;
3586  unsigned right_margin = bitpos + bitcount;
3587  if (right_margin < 32)
3588  {
3589  *dest |= (maskFF >> (32 - right_margin)) & mask_r;
3590  return;
3591  }
3592  *dest++ |= mask_r;
3593  bitcount -= 32 - bitpos;
3594  }
3595  for ( ;bitcount >= 64; bitcount-=64, dest+=2)
3596  dest[0] = dest[1] = maskFF;
3597  if (bitcount >= 32)
3598  {
3599  *dest++ = maskFF; bitcount -= 32;
3600  }
3601  if (bitcount)
3602  {
3603  *dest |= maskFF >> (32 - bitcount);
3604  }
3605 }
3606 
3607 
3608 /*!
3609  \brief SUB (AND NOT) bit interval to 1 in the bitblock.
3610  \param dest - Bitset buffer.
3611  \param bitpos - Offset of the start bit.
3612  \param bitcount - number of bits to set.
3613 
3614  @ingroup bitfunc
3615 */
3616 inline
3617 void sub_bit_block(unsigned* dest, unsigned bitpos, unsigned bitcount) BMNOEXCEPT
3618 {
3619  const unsigned maskFF = ~0u;
3620 
3621  dest += unsigned(bitpos >> bm::set_word_shift); // nword
3622  bitpos &= bm::set_word_mask;
3623 
3624  if (bitcount == 1u) // special case (only 1 bit to set)
3625  {
3626  *dest &= ~(1u << bitpos);
3627  return;
3628  }
3629 
3630  if (bitpos) // starting pos is not aligned
3631  {
3632  unsigned mask_r = maskFF << bitpos;
3633  unsigned right_margin = bitpos + bitcount;
3634  if (right_margin < 32)
3635  {
3636  *dest &= ~((maskFF >> (32 - right_margin)) & mask_r);
3637  return;
3638  }
3639  *dest++ &= ~mask_r;
3640  bitcount -= 32 - bitpos;
3641  }
3642  for ( ;bitcount >= 64; bitcount-=64, dest+=2)
3643  dest[0] = dest[1] = 0u;
3644  if (bitcount >= 32)
3645  {
3646  *dest++ = 0u; bitcount -= 32;
3647  }
3648  if (bitcount)
3649  {
3650  *dest &= ~(maskFF >> (32 - bitcount));
3651  }
3652 }
3653 
3654 
3655 
3656 /*!
3657  \brief XOR bit interval to 1 in the bitblock.
3658  \param dest - Bitset buffer.
3659  \param bitpos - Offset of the start bit.
3660  \param bitcount - number of bits to set.
3661 
3662  @ingroup bitfunc
3663 */
3664 inline void xor_bit_block(unsigned* dest,
3665  unsigned bitpos,
3666  unsigned bitcount) BMNOEXCEPT
3667 {
3668  unsigned nbit = unsigned(bitpos & bm::set_block_mask);
3669  unsigned nword = unsigned(nbit >> bm::set_word_shift);
3670  nbit &= bm::set_word_mask;
3671 
3672  bm::word_t* word = dest + nword;
3673 
3674  if (bitcount == 1) // special case (only 1 bit to set)
3675  {
3676  *word ^= unsigned(1 << nbit);
3677  return;
3678  }
3679 
3680  if (nbit) // starting position is not aligned
3681  {
3682  unsigned right_margin = nbit + bitcount;
3683 
3684  // here we checking if we setting bits only in the current
3685  // word. Example: 00111000000000000000000000000000 (32 bits word)
3686 
3687  if (right_margin < 32)
3688  {
3689  unsigned mask_r = bm::mask_r_u32(nbit);
3690  unsigned mask_l = bm::mask_l_u32(right_margin-1);
3691  unsigned mask = mask_r & mask_l;
3692  *word ^= mask;
3693  return;
3694  }
3695  *word ^= bm::mask_r_u32(nbit);
3696  bitcount -= 32 - nbit;
3697  ++word;
3698  }
3699  for ( ;bitcount >= 64; bitcount-=64, word+=2)
3700  {
3701  word[0] ^= ~0u; word[1] ^= ~0u;
3702  }
3703  if (bitcount >= 32)
3704  {
3705  *word++ ^= ~0u; bitcount -= 32;
3706  }
3707  if (bitcount)
3708  {
3709  *word ^= bm::mask_l_u32(bitcount-1);
3710  }
3711 }
3712 
3713 
3714 /*!
3715  \brief SUB (AND NOT) GAP block to bitblock.
3716  \param dest - bitblock buffer pointer.
3717  \param pcurr - GAP buffer pointer.
3718 
3719  @ingroup gapfunc
3720 */
3721 template<typename T>
3722 void gap_sub_to_bitset(unsigned* BMRESTRICT dest,
3723  const T* BMRESTRICT pcurr) BMNOEXCEPT
3724 {
3725  BM_ASSERT(dest && pcurr);
3726 
3727  const T* pend = pcurr + (*pcurr >> 3);
3728  if (*pcurr & 1) // Starts with 1
3729  {
3730  bm::sub_bit_block(dest, 0, 1 + pcurr[1]);
3731  ++pcurr;
3732  }
3733  for (pcurr += 2; pcurr <= pend; pcurr += 2)
3734  {
3735  BM_ASSERT(*pcurr > pcurr[-1]);
3736  bm::sub_bit_block(dest, 1 + pcurr[-1], *pcurr - pcurr[-1]);
3737  }
3738 }
3739 
3740 
3741 /*!
3742  \brief SUB (AND NOT) GAP block to bitblock with digest assist
3743 
3744  \param dest - bitblock buffer pointer.
3745  \param pcurr - GAP buffer pointer.
3746  \param digest0 - digest of 0 strides inside bit block
3747 
3748  @ingroup gapfunc
3749 */
3750 template<typename T>
3751 void gap_sub_to_bitset(unsigned* BMRESTRICT dest,
3752  const T* BMRESTRICT pcurr, bm::id64_t digest0) BMNOEXCEPT
3753 {
3754  BM_ASSERT(dest && pcurr);
3755 
3756  const T* pend = pcurr + (*pcurr >> 3);
3757  if (*pcurr & 1) // Starts with 1
3758  {
3759  bool all_zero = bm::check_zero_digest(digest0, 0, pcurr[1]);
3760  if (!all_zero)
3761  bm::sub_bit_block(dest, 0, pcurr[1] + 1); // (not AND) - SUB [0] gaps
3762  pcurr += 3;
3763  }
3764  else
3765  pcurr += 2;
3766 
3767  // wind forward to digest start
3768  {
3769  unsigned tz = bm::count_trailing_zeros_u64(digest0);
3770  unsigned start_pos = tz << set_block_digest_pos_shift;
3771  for (; pcurr <= pend; pcurr += 2) // now we are in GAP "0"
3772  {
3773  if (*pcurr >= start_pos)
3774  break;
3775  }
3776  }
3777 
3778  unsigned lz = bm::count_leading_zeros_u64(digest0);
3779  unsigned stop_pos = (64u - lz) << set_block_digest_pos_shift;
3780 
3781  unsigned bc, pos;
3782  T prev;
3783  for (; pcurr <= pend; pcurr += 2) // now we are in GAP "1" again
3784  {
3785  BM_ASSERT(*pcurr > *(pcurr-1));
3786  prev = pcurr[-1];
3787  bc = *pcurr - prev;
3788  pos = 1u + prev;
3789 
3790  bool all_zero = bm::check_zero_digest(digest0, prev, *pcurr);
3791  if (!all_zero)
3792  bm::sub_bit_block(dest, pos, bc);
3793 
3794  if (pos > stop_pos)
3795  break; // early break is possible based on digest tail
3796 
3797  } // for
3798 }
3799 
3800 
3801 
3802 /*!
3803  \brief XOR GAP block to bitblock.
3804  \param dest - bitblock buffer pointer.
3805  \param pcurr - GAP buffer pointer.
3806 
3807  @ingroup gapfunc
3808 */
3809 template<typename T>
3810 void gap_xor_to_bitset(unsigned* BMRESTRICT dest,
3811  const T* BMRESTRICT pcurr) BMNOEXCEPT
3812 {
3813  BM_ASSERT(dest && pcurr);
3814 
3815  const T* pend = pcurr + (*pcurr >> 3);
3816  if (*pcurr & 1) // Starts with 1
3817  {
3818  bm::xor_bit_block(dest, 0, 1 + pcurr[1]);
3819  ++pcurr;
3820  }
3821  for (pcurr += 2; pcurr <= pend; pcurr += 2)
3822  {
3823  BM_ASSERT(*pcurr > pcurr[-1]);
3824  bm::xor_bit_block(dest, 1 + pcurr[-1], *pcurr - pcurr[-1]);
3825  }
3826 }
3827 
3828 
3829 /*!
3830  \brief Adds(OR) GAP block to bitblock.
3831  \param dest - bitblock buffer pointer.
3832  \param pcurr - GAP buffer pointer.
3833  \param len - gap length
3834 
3835  @ingroup gapfunc
3836 */
3837 template<typename T>
3838 void gap_add_to_bitset(unsigned* BMRESTRICT dest,
3839  const T* BMRESTRICT pcurr, unsigned len) BMNOEXCEPT
3840 {
3841  BM_ASSERT(dest && pcurr);
3842 
3843  const T* pend = pcurr + len;
3844  if (*pcurr & 1) // Starts with 1
3845  {
3846  bm::or_bit_block(dest, 0, 1 + pcurr[1]);
3847  pcurr += 3;
3848  }
3849  else
3850  pcurr += 2;
3851 
3852  unsigned bc, pos;
3853  for (; pcurr <= pend; )
3854  {
3855  BM_ASSERT(*pcurr > pcurr[-1]);
3856  pos = 1u + pcurr[-1];
3857  bc = *pcurr - pcurr[-1];
3858  pcurr += 2;
3859  bm::or_bit_block(dest, pos, bc);
3860  }
3861 }
3862 
3863 
3864 /*!
3865  \brief Adds(OR) GAP block to bitblock.
3866  \param dest - bitblock buffer pointer.
3867  \param pcurr - GAP buffer pointer.
3868 
3869  @ingroup gapfunc
3870 */
3871 template<typename T>
3872 void gap_add_to_bitset(unsigned* BMRESTRICT dest,
3873  const T* BMRESTRICT pcurr) BMNOEXCEPT
3874 {
3875  unsigned len = (*pcurr >> 3);
3876  gap_add_to_bitset(dest, pcurr, len);
3877 }
3878 
3879 
3880 /*!
3881  \brief ANDs GAP block to bitblock.
3882  \param dest - bitblock buffer pointer.
3883  \param pcurr - GAP buffer pointer.
3884 
3885  @ingroup gapfunc
3886 */
3887 template<typename T>
3888 void gap_and_to_bitset(unsigned* BMRESTRICT dest,
3889  const T* BMRESTRICT pcurr) BMNOEXCEPT
3890 {
3891  BM_ASSERT(dest && pcurr);
3892 
3893  const T* pend = pcurr + (*pcurr >> 3);
3894  if (!(*pcurr & 1) ) // Starts with 0
3895  {
3896  bm::sub_bit_block(dest, 0, pcurr[1] + 1); // (not AND) - SUB [0] gaps
3897  pcurr += 3;
3898  }
3899  else
3900  pcurr += 2;
3901 
3902  unsigned bc, pos;
3903  for (; pcurr <= pend; ) // now we are in GAP "0" again
3904  {
3905  BM_ASSERT(*pcurr > *(pcurr-1));
3906  pos = 1u + pcurr[-1];
3907  bc = *pcurr - pcurr[-1];
3908  pcurr += 2;
3909  bm::sub_bit_block(dest, pos, bc);
3910  }
3911 }
3912 
3913 
3914 /*!
3915  \brief ANDs GAP block to bitblock with digest assist
3916  \param dest - bitblock buffer pointer.
3917  \param pcurr - GAP buffer pointer.
3918  \param digest0 - digest of 0 strides for the destination
3919 
3920  @ingroup gapfunc
3921 */
3922 template<typename T>
3923 void gap_and_to_bitset(unsigned* BMRESTRICT dest,
3924  const T* BMRESTRICT pcurr, bm::id64_t digest0) BMNOEXCEPT
3925 {
3926  BM_ASSERT(dest && pcurr);
3927  if (!digest0)
3928  return;
3929 
3930  const T* pend = pcurr + (*pcurr >> 3);
3931  if (!(*pcurr & 1) ) // Starts with 0
3932  {
3933  bool all_zero = bm::check_zero_digest(digest0, 0, pcurr[1]);
3934  if (!all_zero)
3935  bm::sub_bit_block(dest, 0, pcurr[1] + 1); // (not AND) - SUB [0] gaps
3936  pcurr += 3;
3937  }
3938  else
3939  pcurr += 2;
3940 
3941  // wind forward to digest start
3942  {
3943  unsigned tz = bm::count_trailing_zeros_u64(digest0);
3944  unsigned start_pos = tz << set_block_digest_pos_shift;
3945  for (; pcurr <= pend; pcurr += 2) // now we are in GAP "0"
3946  {
3947  if (*pcurr >= start_pos)
3948  break;
3949  }
3950  }
3951 
3952  unsigned lz = bm::count_leading_zeros_u64(digest0);
3953  unsigned stop_pos = (64u - lz) << set_block_digest_pos_shift;
3954 
3955  unsigned bc, pos;
3956  T prev;
3957  for (; pcurr <= pend; pcurr += 2) // now we are in GAP "0" again
3958  {
3959  BM_ASSERT(*pcurr > *(pcurr-1));
3960 
3961  prev = pcurr[-1];
3962  bc = *pcurr - prev;
3963  pos = 1u + prev;
3964 
3965  bool all_zero = bm::check_zero_digest(digest0, prev, *pcurr);
3966  if (!all_zero)
3967  bm::sub_bit_block(dest, pos, bc);
3968 
3969  if (pos > stop_pos) // early break is possible based on digest tail
3970  break;
3971 
3972  } // for
3973 }
3974 
3975 
3976 /*!
3977  \brief Compute bitcount of bit block AND masked by GAP block
3978  \param block - bitblock buffer pointer
3979  \param pcurr - GAP buffer pointer
3980  \return bitcount - cardinality of the AND product
3981 
3982  @ingroup gapfunc
3983 */
3984 template<typename T>
3986  const T* BMRESTRICT pcurr) BMNOEXCEPT
3987 {
3988  BM_ASSERT(block);
3989  const T* pend = pcurr + (*pcurr >> 3);
3990  bm::id_t count = 0;
3991  if (*pcurr & 1) // Starts with 1
3992  {
3993  count += bm::bit_block_calc_count_range(block, 0, pcurr[1]);
3994  ++pcurr;
3995  }
3996  for (pcurr +=2 ;pcurr <= pend; pcurr += 2)
3997  {
3998  count += bm::bit_block_calc_count_range(block, pcurr[-1]+1, *pcurr);
3999  }
4000  return count;
4001 }
4002 
4003 
4004 /*!
4005  \brief Bitcount test of bit block AND masked by GAP block.
4006  \param block - bitblock buffer pointer
4007  \param pcurr - GAP buffer pointer
4008  \return non-zero value if AND produces any result
4009 
4010  @ingroup gapfunc
4011 */
4012 template<typename T>
4014  const T* BMRESTRICT pcurr) BMNOEXCEPT
4015 {
4016  BM_ASSERT(block);
4017 
4018  const T* pend = pcurr + (*pcurr >> 3);
4019  bm::id_t count = 0;
4020  if (*pcurr & 1) // Starts with 1
4021  {
4022  count = bm::bit_block_any_range(block, 0, pcurr[1]);
4023  ++pcurr;
4024  }
4025  for (pcurr +=2 ;!count && pcurr <= pend; pcurr += 2)
4026  {
4027  count = bm::bit_block_any_range(block, pcurr[-1]+1, *pcurr);
4028  }
4029  return count;
4030 }
4031 
4032 
4033 
4034 /*!
4035  \brief Compute bitcount of bit block SUB masked by GAP block.
4036  \param block - bitblock buffer pointer.
4037  \param buf - GAP buffer pointer.
4038  \return bit-count result of AND NOT operation
4039 
4040  @ingroup gapfunc
4041 */
4042 template<typename T>
4044  const T* BMRESTRICT buf) BMNOEXCEPT
4045 {
4046  BM_ASSERT(block);
4047 
4048  const T* pcurr = buf;
4049  const T* pend = pcurr + (*pcurr >> 3);
4050  ++pcurr;
4051 
4052  bm::id_t count = 0;
4053 
4054  if (!(*buf & 1)) // Starts with 0
4055  {
4056  count += bit_block_calc_count_range(block, 0, *pcurr);
4057  ++pcurr;
4058  }
4059  ++pcurr; // now we are in GAP "0" again
4060 
4061  for (;pcurr <= pend; pcurr+=2)
4062  {
4063  count += bm::bit_block_calc_count_range(block, *(pcurr-1)+1, *pcurr);
4064  }
4065  return count;
4066 }
4067 
4068 
4069 /*!
4070  \brief Compute bitcount test of bit block SUB masked by GAP block
4071  \param block - bitblock buffer pointer
4072  \param buf - GAP buffer pointer
4073  \return non-zero value if AND NOT produces any 1 bits
4074 
4075  @ingroup gapfunc
4076 */
4077 template<typename T>
4079  const T* BMRESTRICT buf) BMNOEXCEPT
4080 {
4081  BM_ASSERT(block);
4082 
4083  const T* pcurr = buf;
4084  const T* pend = pcurr + (*pcurr >> 3);
4085  ++pcurr;
4086 
4087  bm::id_t count = 0;
4088 
4089  if (!(*buf & 1)) // Starts with 0
4090  {
4091  count += bit_block_any_range(block, 0, *pcurr);
4092  if (count)
4093  return count;
4094  ++pcurr;
4095  }
4096  ++pcurr; // now we are in GAP "0" again
4097 
4098  for (; !count && pcurr <= pend; pcurr+=2)
4099  {
4100  count += bm::bit_block_any_range(block, *(pcurr-1)+1, *pcurr);
4101  }
4102  return count;
4103 }
4104 
4105 
4106 
4107 /*!
4108  \brief Compute bitcount of bit block XOR masked by GAP block
4109  \param block - bitblock buffer pointer
4110  \param buf - GAP buffer pointer
4111  \return bit count value of XOR operation
4112 
4113  @ingroup gapfunc
4114 */
4115 template<typename T>
4117  const T* BMRESTRICT buf) BMNOEXCEPT
4118 {
4119  BM_ASSERT(block);
4120 
4121  const T* pcurr = buf;
4122  const T* pend = pcurr + (*pcurr >> 3);
4123  ++pcurr;
4124 
4125  unsigned bitval = *buf & 1;
4126 
4127  bm::id_t count = bm::bit_block_calc_count_range(block, 0, *pcurr);
4128  if (bitval)
4129  {
4130  count = *pcurr + 1 - count;
4131  }
4132 
4133  for (bitval^=1, ++pcurr; pcurr <= pend; bitval^=1, ++pcurr)
4134  {
4135  T prev = (T)(*(pcurr-1)+1);
4136  bm::id_t c = bit_block_calc_count_range(block, prev, *pcurr);
4137 
4138  if (bitval) // 1 gap; means Result = Total_Bits - BitCount;
4139  c = (*pcurr - prev + 1) - c;
4140  count += c;
4141  }
4142  return count;
4143 }
4144 
4145 /*!
4146  \brief Compute bitcount test of bit block XOR masked by GAP block.
4147  \param block - bitblock buffer pointer
4148  \param buf - GAP buffer pointer
4149  \return non-zero value if XOR returns anything
4150 
4151  @ingroup gapfunc
4152 */
4153 template<typename T>
4155  const T* BMRESTRICT buf) BMNOEXCEPT
4156 {
4157  BM_ASSERT(block);
4158 
4159  const T* pcurr = buf;
4160  const T* pend = pcurr + (*pcurr >> 3);
4161  ++pcurr;
4162 
4163  unsigned bitval = *buf & 1;
4164 
4165  bm::id_t count = bit_block_any_range(block, 0, *pcurr);
4166  if (bitval)
4167  count = *pcurr + 1 - count;
4168 
4169  for (bitval^=1, ++pcurr; !count && pcurr <= pend; bitval^=1, ++pcurr)
4170  {
4171  T prev = (T)(*(pcurr-1)+1);
4172  bm::id_t c = bit_block_any_range(block, prev, *pcurr);
4173 
4174  if (bitval) // 1 gap; means Result = Total_Bits - BitCount;
4175  c = (*pcurr - prev + 1) - c;
4176  count += c;
4177  }
4178  return count;
4179 }
4180 
4181 
4182 
4183 /*!
4184  \brief Compute bitcount of bit block OR masked by GAP block.
4185  \param block - bitblock buffer pointer.
4186  \param buf - GAP buffer pointer.
4187  \return bit count of OR
4188 
4189  @ingroup gapfunc
4190 */
4191 template<typename T>
4193  const T* BMRESTRICT buf) BMNOEXCEPT
4194 {
4195  BM_ASSERT(block);
4196  const T* pcurr = buf;
4197  const T* pend = pcurr + (*pcurr >> 3);
4198  ++pcurr;
4199 
4200  unsigned bitval = *buf & 1;
4201 
4202  bm::id_t count = bitval ? *pcurr + 1
4203  : bm::bit_block_calc_count_range(block, 0, *pcurr);
4204  for (bitval^=1, ++pcurr; pcurr <= pend; bitval^=1, ++pcurr)
4205  {
4206  T prev = (T)(*(pcurr-1)+1);
4207  bm::id_t c =
4208  bitval ? (*pcurr - prev + 1)
4209  : bm::bit_block_calc_count_range(block, prev, *pcurr);
4210  count += c;
4211  }
4212  return count;
4213 }
4214 
4215 /*!
4216  \brief Compute bitcount test of bit block OR masked by GAP block
4217  \param block - bitblock buffer pointer
4218  \param buf - GAP buffer pointer
4219  \return non zero value if union (OR) returns anything
4220 
4221  @ingroup gapfunc
4222 */
4223 template<typename T>
4224 bm::id_t gap_bitset_or_any(const unsigned* BMRESTRICT block,
4225  const T* BMRESTRICT buf) BMNOEXCEPT
4226 {
4227  bool b = !bm::gap_is_all_zero(buf) ||
4228  !bm::bit_is_all_zero(block);
4229  return b;
4230 }
4231 
4232 
4233 
4234 /*!
4235  \brief Bitblock memset operation.
4236 
4237  \param dst - destination block.
4238  \param value - value to set.
4239 
4240  @ingroup bitfunc
4241 */
4242 inline
4243 void bit_block_set(bm::word_t* BMRESTRICT dst, bm::word_t value) BMNOEXCEPT
4244 {
4245 #ifdef BMVECTOPT
4246  VECT_SET_BLOCK(dst, value);
4247 #else
4248  ::memset(dst, int(value), bm::set_block_size * sizeof(bm::word_t));
4249 #endif
4250 }
4251 
4252 
4253 /*!
4254  \brief GAP block to bitblock conversion.
4255  \param dest - bitblock buffer pointer.
4256  \param buf - GAP buffer pointer.
4257 
4258  @ingroup gapfunc
4259 */
4260 template<typename T>
4261 void gap_convert_to_bitset(unsigned* BMRESTRICT dest,
4262  const T* BMRESTRICT buf) BMNOEXCEPT
4263 {
4264  bm::bit_block_set(dest, 0);
4265  bm::gap_add_to_bitset(dest, buf);
4266 }
4267 
4268 
4269 
4270 /*!
4271  \brief Smart GAP block to bitblock conversion.
4272 
4273  Checks if GAP block is ALL-ZERO or ALL-ON. In those cases returns
4274  pointer on special static bitblocks.
4275 
4276  \param dest - bitblock buffer pointer.
4277  \param buf - GAP buffer pointer.
4278  \param set_max - max possible bitset length
4279 
4280  @ingroup gapfunc
4281 */
4282 template<typename T>
4283 unsigned* gap_convert_to_bitset_smart(unsigned* BMRESTRICT dest,
4284  const T* BMRESTRICT buf,
4285  id_t set_max) BMNOEXCEPT
4286 {
4287  if (buf[1] == set_max - 1)
4288  return (buf[0] & 1) ? FULL_BLOCK_REAL_ADDR : 0;
4289  bm::gap_convert_to_bitset(dest, buf);
4290  return dest;
4291 }
4292 
4293 
4294 /*!
4295  \brief Calculates sum of all words in GAP block. (For debugging purposes)
4296  \note For debugging and testing ONLY.
4297  \param buf - GAP buffer pointer.
4298  \return Sum of all words.
4299 
4300  @ingroup gapfunc
4301  @internal
4302 */
4303 template<typename T>
4304 unsigned gap_control_sum(const T* buf) BMNOEXCEPT
4305 {
4306  unsigned end = *buf >> 3;
4307 
4308  const T* pcurr = buf;
4309  const T* pend = pcurr + (*pcurr >> 3);
4310  ++pcurr;
4311 
4312  if (*buf & 1) // Starts with 1
4313  {
4314  ++pcurr;
4315  }
4316  ++pcurr; // now we are in GAP "1" again
4317  while (pcurr <= pend)
4318  {
4319  BM_ASSERT(*pcurr > *(pcurr-1));
4320  pcurr += 2;
4321  }
4322  return buf[end];
4323 }
4324 
4325 
4326 /*!
4327  \brief Sets all bits to 0 or 1 (GAP)
4328  \param buf - GAP buffer pointer.
4329  \param set_max - max possible bitset length
4330  \param value - value to set
4331 
4332  @ingroup gapfunc
4333 */
4334 template<class T>
4335 void gap_set_all(T* buf, unsigned set_max, unsigned value) BMNOEXCEPT
4336 {
4337  BM_ASSERT(value == 0 || value == 1);
4338  *buf = (T)((*buf & 6u) + (1u << 3) + value);
4339  *(++buf) = (T)(set_max - 1);
4340 }
4341 
4342 
4343 /*!
4344  \brief Init gap block so it has block in it (can be whole block)
4345  \param buf - GAP buffer pointer.
4346  \param from - one block start
4347  \param to - one block end
4348  \param value - (block value)1 or 0
4349 
4350  @ingroup gapfunc
4351 */
4352 template<class T>
4354  T from,
4355  T to,
4356  T value) BMNOEXCEPT
4357 {
4358  BM_ASSERT(value == 0 || value == 1);
4359  const unsigned set_max = bm::bits_in_block;
4360 
4361  unsigned gap_len;
4362  if (from == 0)
4363  {
4364  if (to == set_max - 1)
4365  {
4366  bm::gap_set_all(buf, set_max, value);
4367  }
4368  else
4369  {
4370  gap_len = 2;
4371  buf[1] = (T)to;
4372  buf[2] = (T)(set_max - 1);
4373  buf[0] = (T)((*buf & 6u) + (gap_len << 3) + value);
4374  }
4375  return;
4376  }
4377  // from != 0
4378 
4379  value = !value;
4380  if (to == set_max - 1)
4381  {
4382  gap_len = 2;
4383  buf[1] = (T)(from - 1);
4384  buf[2] = (T)(set_max - 1);
4385  }
4386  else
4387  {
4388  gap_len = 3;
4389  buf[1] = (T) (from - 1);
4390  buf[2] = (T) to;
4391  buf[3] = (T)(set_max - 1);
4392  }
4393  buf[0] = (T)((*buf & 6u) + (gap_len << 3) + value);
4394 }
4395 
4396 
4397 /*!
4398  \brief Inverts all bits in the GAP buffer.
4399  \param buf - GAP buffer pointer.
4400 
4401  @ingroup gapfunc
4402 */
4403 template<typename T> void gap_invert(T* buf) BMNOEXCEPT
4404 {
4405  *buf ^= 1;
4406 }
4407 
4408 
4409 #ifdef __GNUG__
4410 #pragma GCC diagnostic push
4411 #pragma GCC diagnostic ignored "-Wconversion"
4412 #endif
4413 
4414 /*!
4415  \brief Sets GAP block capacity level.
4416  \param buf - GAP buffer pointer.
4417  \param level new GAP block capacity level.
4418 
4419  @ingroup gapfunc
4420 */
4421 template<typename T>
4422 void set_gap_level(T* buf, int level) BMNOEXCEPT
4423 {
4424  BM_ASSERT(level >= 0);
4425  BM_ASSERT(unsigned(level) < bm::gap_levels);
4426 
4427  *buf = (T)(((level & 3) << 1) | (*buf & 1) | (*buf & ~7));
4428 }
4429 #ifdef __GNUG__
4430 #pragma GCC diagnostic pop
4431 #endif
4432 
4433 
4434 
4435 /*!
4436  \brief Calculates GAP block capacity level.
4437  \param len - GAP buffer length.
4438  \param glevel_len - GAP lengths table
4439  \return GAP block capacity level.
4440  -1 if block does not fit any level.
4441  @ingroup gapfunc
4442 */
4443 template<typename T>
4444 int gap_calc_level(unsigned len, const T* glevel_len) BMNOEXCEPT
4445 {
4446  if (len <= unsigned(glevel_len[0]-4)) return 0;
4447  if (len <= unsigned(glevel_len[1]-4)) return 1;
4448  if (len <= unsigned(glevel_len[2]-4)) return 2;
4449  if (len <= unsigned(glevel_len[3]-4)) return 3;
4450 
4451  BM_ASSERT(bm::gap_levels == 4);
4452  return -1;
4453 }
4454 
4455 /*! @brief Returns number of free elements in GAP block array.
4456  Difference between GAP block capacity on this level and actual GAP length.
4457 
4458  @param buf - GAP buffer pointer
4459  @param glevel_len - GAP lengths table
4460 
4461  @return Number of free GAP elements
4462  @ingroup gapfunc
4463 */
4464 template<typename T>
4465 inline unsigned gap_free_elements(const T* BMRESTRICT buf,
4466  const T* BMRESTRICT glevel_len) BMNOEXCEPT
4467 {
4468  unsigned len = bm::gap_length(buf);
4469  unsigned capacity = bm::gap_capacity(buf, glevel_len);
4470  return capacity - len;
4471 }
4472 
4473 /*!
4474  \brief Lexicographical comparison of BIT buffers.
4475  \param buf1 - First buffer pointer.
4476  \param buf2 - Second buffer pointer.
4477  \param len - Buffer length in elements (T).
4478  \return <0 - less, =0 - equal, >0 - greater.
4479 
4480  @ingroup bitfunc
4481 */
4482 template<typename T>
4483 int bitcmp(const T* buf1, const T* buf2, unsigned len) BMNOEXCEPT
4484 {
4485  BM_ASSERT(len);
4486  const T* pend1 = buf1 + len;
4487  do
4488  {
4489  T w1 = *buf1++;
4490  T w2 = *buf2++;
4491  T diff = w1 ^ w2;
4492  if (diff)
4493  return (w1 & diff & -diff) ? 1 : -1;
4494  } while (buf1 < pend1);
4495  return 0;
4496 }
4497 
4498 /*!
4499  \brief Find first bit which is different between two bit-blocks
4500  \param blk1 - block 1
4501  \param blk2 - block 2
4502  \param pos - out - position of difference (undefined if blocks are equal)
4503  \return true if difference was found
4504 
4505  @ingroup bitfunc
4506 */
4507 inline
4509  const bm::word_t* BMRESTRICT blk2,
4510  unsigned* BMRESTRICT pos) BMNOEXCEPT
4511 {
4512  BM_ASSERT(blk1 && blk2 && pos);
4513 #ifdef VECT_BIT_FIND_DIFF
4514  bool f = VECT_BIT_FIND_DIFF(blk1, blk2, pos);
4515  return f;
4516 #else
4517  #ifdef BM64OPT
4518  BM_ASSERT(sizeof(bm::wordop_t) == 8);
4519 
4521  (const bm::bit_block_t::bunion_t*)(blk1);
4523  (const bm::bit_block_t::bunion_t*)(blk2);
4524 
4525  for (unsigned i = 0; i < bm::set_block_size/2; ++i)
4526  {
4527  bm::wordop_t w1 = b1_u->w64[i];
4528  bm::wordop_t w2 = b2_u->w64[i];
4529  bm::wordop_t diff = w1 ^ w2;
4530  if (diff)
4531  {
4532  unsigned idx = bm::count_trailing_zeros_u64(diff);
4533  *pos = unsigned(idx + (i * 8u * unsigned(sizeof(bm::wordop_t))));
4534  return true;
4535  }
4536  } // for
4537  #else
4538  for (unsigned i = 0; i < bm::set_block_size; ++i)
4539  {
4540  bm::word_t w1 = blk1[i]; bm::word_t w2 = blk2[i];
4541  bm::word_t diff = w1 ^ w2;
4542  if (diff)
4543  {
4544  unsigned idx = bm::bit_scan_forward32(diff); // trailing zeros
4545  *pos = unsigned(idx + (i * 8u * sizeof(bm::word_t)));
4546  return true;
4547  }
4548  } // for
4549  #endif
4550 #endif
4551  return false;
4552 }
4553 
4554 
4555 //#ifndef BMAVX2OPT
4556 
4557 /*!
4558  \brief Converts bit block to GAP.
4559  \param dest - Destinatio GAP buffer.
4560  \param block - Source bitblock buffer.
4561  \param dest_len - length of the destination buffer.
4562  \return New length of GAP block or 0 if conversion failed
4563  (insufficicent space).
4564 
4565  @ingroup gapfunc
4566 */
4567 inline
4569  const unsigned* BMRESTRICT block,
4570  unsigned dest_len) BMNOEXCEPT
4571 {
4572  const unsigned* BMRESTRICT block_end = block + bm::set_block_size;
4573  gap_word_t* BMRESTRICT pcurr = dest;
4574  gap_word_t* BMRESTRICT end = dest + dest_len; (void)end;
4575 
4576  unsigned bitval = (*block) & 1u;
4577  *pcurr++ = bm::gap_word_t(bitval);
4578  *pcurr = 0;
4579  unsigned bit_idx = 0;
4580 
4581  do
4582  {
4583  unsigned val = *block;
4584  while (!val || val == ~0u)
4585  {
4586  if (bitval != unsigned(bool(val)))
4587  {
4588  *pcurr++ = (gap_word_t)(bit_idx-1);
4589  bitval ^= 1u;
4590  BM_ASSERT((pcurr-1) == (dest+1) || *(pcurr-1) > *(pcurr-2));
4591  BM_ASSERT(pcurr != end);
4592  }
4593  bit_idx += unsigned(sizeof(*block) * 8);
4594  if (++block >= block_end)
4595  goto complete;
4596  val = *block;
4597  } // while
4598 
4599  // process "0100011" word
4600  //
4601  unsigned bits_consumed = 0;
4602  do
4603  {
4604  unsigned tz = 1u;
4605  if (bitval != (val & 1u))
4606  {
4607  *pcurr++ = (gap_word_t)(bit_idx-1);
4608  bitval ^= 1u;
4609  BM_ASSERT((pcurr-1) == (dest+1) || *(pcurr-1) > *(pcurr-2));
4610  BM_ASSERT(pcurr != end);
4611  }
4612  else // match, find the next idx
4613  {
4614  tz = bm::bit_scan_forward32(bitval ? ~val : val);
4615  // possible alternative:
4616  // tz = bm::count_trailing_zeros(bitval ? ~val : val);
4617  }
4618 
4619  bits_consumed += tz;
4620  bit_idx += tz;
4621  val >>= tz;
4622 
4623  if (!val)
4624  {
4625  if (bits_consumed < 32u)
4626  {
4627  *pcurr++ = (gap_word_t)(bit_idx-1);
4628  bitval ^= 1u;
4629  bit_idx += 32u - bits_consumed;
4630  BM_ASSERT((pcurr-1) == (dest+1) || *(pcurr-1) > *(pcurr-2));
4631  BM_ASSERT(pcurr != end);
4632  }
4633  break;
4634  }
4635  } while (1);
4636 
4637  } while(++block < block_end);
4638 
4639 complete:
4640  *pcurr = (gap_word_t)(bit_idx-1);
4641  unsigned len = (unsigned)(pcurr - dest);
4642  *dest = (gap_word_t)((*dest & 7) + (len << 3));
4643  return len;
4644 }
4645 //#endif
4646 
4647 /**
4648  Convert bit block to GAP representation
4649  @internal
4650  @ingroup bitfunc
4651 */
4652 inline
4654  const unsigned* BMRESTRICT block,
4655  unsigned dest_len) BMNOEXCEPT
4656 {
4657 #if defined(VECT_BIT_TO_GAP)
4658  return VECT_BIT_TO_GAP(dest, block, dest_len);
4659 #else
4660  return bm::bit_block_to_gap(dest, block, dest_len);
4661 #endif
4662 }
4663 
4664 
4665 /*!
4666  \brief Iterate gap block as delta-bits with a functor
4667  @ingroup gapfunc
4668 */
4669 template<class T, class F>
4670 void for_each_gap_dbit(const T* buf, F& func)
4671 {
4672  const T* pcurr = buf;
4673  const T* pend = pcurr + (*pcurr >> 3);
4674 
4675  ++pcurr;
4676 
4677  unsigned prev = 0;
4678  unsigned first_inc;
4679 
4680  if (*buf & 1)
4681  {
4682  first_inc = 0;
4683  unsigned to = *pcurr;
4684  for (unsigned i = 0; i <= to; ++i)
4685  {
4686  func(1);
4687  }
4688  prev = to;
4689  ++pcurr;
4690  }
4691  else
4692  {
4693  first_inc = 1;
4694  }
4695  ++pcurr; // set GAP to 1
4696 
4697  while (pcurr <= pend)
4698  {
4699  unsigned from = *(pcurr-1)+1;
4700  unsigned to = *pcurr;
4701  if (first_inc)
4702  {
4703  func(from - prev + first_inc);
4704  first_inc = 0;
4705  }
4706  else
4707  {
4708  func(from - prev);
4709  }
4710 
4711  for (unsigned i = from+1; i <= to; ++i)
4712  {
4713  func(1);
4714  }
4715  prev = to;
4716  pcurr += 2; // jump to the next positive GAP
4717  }
4718 }
4719 
4720 /*!
4721  \brief Convert gap block into array of ints corresponding to 1 bits
4722  @ingroup gapfunc
4723 */
4724 template<typename D, typename T>
4726  const T* BMRESTRICT buf,
4727  unsigned dest_len,
4728  bool invert = false) BMNOEXCEPT
4729 {
4730  const T* BMRESTRICT pcurr = buf;
4731  const T* pend = pcurr + (*pcurr >> 3);
4732 
4733  D* BMRESTRICT dest_curr = dest;
4734  ++pcurr;
4735 
4736  int bitval = (*buf) & 1;
4737  if (invert)
4738  bitval = !bitval; // invert the GAP buffer
4739 
4740  if (bitval)
4741  {
4742  if (unsigned(*pcurr + 1) >= dest_len)
4743  return 0; // insufficient space
4744  dest_len -= *pcurr;
4745  T to = *pcurr;
4746  for (T i = 0; ;++i)
4747  {
4748  *dest_curr++ = i;
4749  if (i == to) break;
4750  }
4751  ++pcurr;
4752  }
4753  ++pcurr; // set GAP to 1
4754 
4755  while (pcurr <= pend)
4756  {
4757  unsigned pending = *pcurr - *(pcurr-1);
4758  if (pending >= dest_len)
4759  return 0;
4760  dest_len -= pending;
4761  T from = (T)(*(pcurr-1)+1);
4762  T to = *pcurr;
4763  for (T i = from; ;++i)
4764  {
4765  *dest_curr++ = i;
4766  if (i == to) break;
4767  }
4768  pcurr += 2; // jump to the next positive GAP
4769  }
4770  return (D) (dest_curr - dest);
4771 }
4772 
4773 
4774 /*!
4775  @brief Bitcount for bit block without agressive unrolling
4776  @ingroup bitfunc
4777 */
4780  const bm::word_t* BMRESTRICT block_end) BMNOEXCEPT
4781 {
4782  unsigned count = 0;
4783 #ifdef BM64OPT__
4784  do
4785  {
4786  const bm::bit_block_t::bunion_t* BMRESTRICT src_u =
4787  (const bm::bit_block_t::bunion_t*)(block);
4788 
4789  bm::id64_t x = src_u->w64[0]; bm::id64_t y = src_u->w64[1];
4790  bm::id64_t u = src_u->w64[2]; bm::id64_t v = src_u->w64[3];
4791 
4792  #if defined(BM_USE_GCC_BUILD)
4793  count += unsigned(__builtin_popcountll(x) + __builtin_popcountll(y)
4794  + __builtin_popcountll(u) + __builtin_popcountll(v));
4795  #else
4796  // 64-bit optimized algorithm. No sparse vect opt
4797  // instead it uses 4-way parallel pipelined version
4798  if (x | y | u | v)
4799  {
4800  unsigned c = bitcount64_4way(x, y, u, v);
4801  BM_ASSERT(c);
4802  count += c;
4803  }
4804  #endif
4805  block += 8;
4806  } while (block < block_end);
4807 #else
4808  do
4809  {
4810  unsigned c1= bm::word_bitcount(*block);
4811  unsigned c2 = bm::word_bitcount(block[1]);
4812  count += c1 + c2;
4813  c1= bm::word_bitcount(block[2]);
4814  c2 = bm::word_bitcount(block[3]);
4815  count += c1 + c2;
4816  block+=4;
4817  } while (block < block_end);
4818 
4819 #endif
4820  return count;
4821 }
4822 
4823 
4824 
4825 /*!
4826  @brief Bitcount for bit block
4827 
4828  Function calculates number of 1 bits in the given array of words.
4829  Make sure the addresses are aligned.
4830 
4831  @ingroup bitfunc
4832 */
4833 inline
4834 bm::id_t bit_block_count(const bm::word_t* block) BMNOEXCEPT
4835 {
4836  const bm::word_t* block_end = block + bm::set_block_size;
4837 #ifdef BMVECTOPT
4838  return VECT_BITCOUNT(block, block_end);
4839 #else
4840  return bm::bit_count_min_unroll(block, block_end);
4841 #endif
4842 }
4843 
4844 /*!
4845  @brief Bitcount for bit block
4846 
4847  Function calculates number of 1 bits in the given array of words.
4848  uses digest to understand zero areas
4849 
4850  @ingroup bitfunc
4851 */
4852 inline
4854  bm::id64_t digest) BMNOEXCEPT
4855 {
4856 #ifdef VECT_BIT_COUNT_DIGEST
4857  return VECT_BIT_COUNT_DIGEST(block, digest);
4858 #else
4859  bm::id_t count = 0;
4860  bm::id64_t d = digest;
4861 
4862  // TODO: use platform neutral bitscan to decode digest here
4863 
4864  while (d)
4865  {
4866  bm::id64_t t = bm::bmi_blsi_u64(d); // d & -d;
4867 
4868  unsigned wave = bm::word_bitcount64(t - 1);
4869  unsigned off = wave * bm::set_block_digest_wave_size;
4870 
4871 #ifdef BM64OPT
4872  const bm::bit_block_t::bunion_t* BMRESTRICT src_u =
4873  (const bm::bit_block_t::bunion_t*)(&block[off]);
4874  unsigned j = 0;
4875  do
4876  {
4877  bm::id64_t x = src_u->w64[0+j]; bm::id64_t y = src_u->w64[1+j];
4878  bm::id64_t u = src_u->w64[2+j]; bm::id64_t v = src_u->w64[3+j];
4879  #if defined(BM_USE_GCC_BUILD)
4880  count += unsigned(__builtin_popcountll(x) + __builtin_popcountll(y)
4881  + __builtin_popcountll(u) + __builtin_popcountll(v));
4882  #else
4883  // 64-bit optimized algorithm. No sparse vect opt
4884  // instead it uses 4-way parallel pipelined version
4885  if (x | y | u | v)
4886  {
4887  unsigned c = bm::bitcount64_4way(x, y, u, v);
4888  BM_ASSERT(c);
4889  count += c;
4890  }
4891  #endif
4892  j += 4;
4893  } while (j < bm::set_block_digest_wave_size/2);
4894 #else // 32-bit version
4895  const bm::word_t* BMRESTRICT blk = &block[off];
4896  const bm::word_t* BMRESTRICT blk_end = &block[off+bm::set_block_digest_wave_size];
4897  do
4898  {
4899  unsigned c1= bm::word_bitcount(*blk);
4900  unsigned c2 = bm::word_bitcount(blk[1]);
4901  count += c1 + c2;
4902  c1= bm::word_bitcount(blk[2]);
4903  c2 = bm::word_bitcount(blk[3]);
4904  count += c1 + c2;
4905  blk+=4;
4906  } while (blk < blk_end);
4907 #endif
4908 
4909  d = bm::bmi_bslr_u64(d); // d &= d - 1;
4910  } // while
4911  return count;
4912 #endif
4913 }
4914 
4915 
4916 /*!
4917  Function calculates number of times when bit value changed
4918  (1-0 or 0-1).
4919 
4920  For 001 result is 2
4921  010 - 3
4922  011 - 2
4923  111 - 1
4924 
4925  @ingroup bitfunc
4926 */
4927 inline
4929 {
4930  unsigned count = 1;
4931  w ^= (w >> 1);
4932 
4933  count += bm::word_bitcount(w);
4934  count -= (w >> ((sizeof(w) * 8) - 1));
4935  return count;
4936 }
4937 
4938 
4939 /*!
4940  Function calculates number of times when bit value changed
4941  @internal
4942 */
4943 inline
4944 unsigned bit_block_change32(const bm::word_t* block, unsigned size) BMNOEXCEPT
4945 {
4946  unsigned gap_count = 1;
4947 
4948  bm::word_t w, w0, w_prev, w_l;
4949  w = w0 = *block;
4950 
4951  const int w_shift = int(sizeof(w) * 8 - 1);
4952  w ^= (w >> 1);
4953  BM_INCWORD_BITCOUNT(gap_count, w);
4954  gap_count -= (w_prev = (w0 >> w_shift)); // negative value correction
4955 
4956  const bm::word_t* block_end = block + size;
4957  for (++block; block < block_end; ++block)
4958  {
4959  w = w0 = *block;
4960  ++gap_count;
4961  if (!w)
4962  {
4963  gap_count -= !w_prev;
4964  w_prev = 0;
4965  }
4966  else
4967  {
4968  w ^= (w >> 1);
4969  BM_INCWORD_BITCOUNT(gap_count, w);
4970 
4971  w_l = w0 & 1;
4972  gap_count -= (w0 >> w_shift); // negative value correction
4973  gap_count -= !(w_prev ^ w_l); // word border correction
4974 
4975  w_prev = (w0 >> w_shift);
4976  }
4977  } // for
4978  return gap_count;
4979 }
4980 
4981 
4982 /*!
4983  Function calculates basic bit-block statistics
4984  number of times when bit value changed (GAPS)
4985  and population count
4986  @param block - bit-block pointer
4987  @param gc - [output] gap_count
4988  @param bc - [output] bit count
4989  @internal
4990 */
4991 inline
4993  unsigned* BMRESTRICT gc, unsigned* BMRESTRICT bc) BMNOEXCEPT
4994 {
4995  BM_ASSERT(gc);
4996  BM_ASSERT(bc);
4997 
4998  #ifdef VECT_BLOCK_CHANGE_BC
4999  VECT_BLOCK_CHANGE_BC(block, gc, bc);
5000  #else
5001  // TODO: one pass algo
5003  *bc = bm::bit_block_count(block);
5004  #endif
5005 }
5006 
5007 
5008 
5009 /*!
5010  Function calculates number of times when bit value changed
5011  (1-0 or 0-1) in the bit block.
5012 
5013  @param block - bit-block start pointer
5014  @return number of 1-0, 0-1 transitions
5015 
5016  @ingroup bitfunc
5017 */
5018 inline
5019 unsigned bit_block_calc_change(const bm::word_t* block) BMNOEXCEPT
5020 {
5021 #if defined(VECT_BLOCK_CHANGE)
5022  return VECT_BLOCK_CHANGE(block, bm::set_block_size);
5023 #else
5025 #endif
5026 }
5027 
5028 /*!
5029  Check if all bits are 1 in [left, right] range
5030  @ingroup bitfunc
5031 */
5032 inline
5034  bm::word_t left,
5035  bm::word_t right) BMNOEXCEPT
5036 {
5037  BM_ASSERT(left <= right);
5038  BM_ASSERT(right <= bm::gap_max_bits-1);
5039 
5040  unsigned nword, nbit, bitcount, temp;
5041  nbit = left & bm::set_word_mask;
5042  const bm::word_t* word =
5043  block + (nword = unsigned(left >> bm::set_word_shift));
5044  if (left == right) // special case (only 1 bit to check)
5045  return (*word >> nbit) & 1u;
5046 
5047  if (nbit) // starting position is not aligned
5048  {
5049  unsigned right_margin = nbit + right - left;
5050  if (right_margin < 32)
5051  {
5052  unsigned mask_r = bm::mask_r_u32(nbit);
5053  unsigned mask_l = bm::mask_l_u32(right_margin);
5054  unsigned mask = mask_r & mask_l;
5055  return mask == (*word & mask);
5056  }
5057 
5058  unsigned mask_r = bm::mask_r_u32(nbit);
5059  temp = *word & mask_r;
5060  if (temp != mask_r)
5061  return false;
5062  bitcount = (right - left + 1u) - (32 - nbit);
5063  ++word;
5064  }
5065  else
5066  {
5067  bitcount = right - left + 1u;
5068  }
5069 
5070  const bm::word_t maskFF = ~0u;
5071  // loop unrolled to evaluate 4 words at a time
5072  // SIMD showed no advantage, unless evaluate sub-wave intervals
5073  //
5074  #if defined(BM64OPT) || defined(BM64_SSE4) || defined(BMAVX2OPT) || defined(BMAVX512OPT)
5075  const bm::id64_t maskFF64 = ~0ull;
5076  for ( ;bitcount >= 128; bitcount-=128, word+=4)
5077  {
5078  bm::id64_t w64_0 = bm::id64_t(word[0]) + (bm::id64_t(word[1]) << 32);
5079  bm::id64_t w64_1 = bm::id64_t(word[2]) + (bm::id64_t(word[3]) << 32);
5080  if ((w64_0 ^ maskFF64) | (w64_1 ^ maskFF64))
5081  return false;
5082  } // for
5083  #else
5084  for ( ;bitcount >= 128; bitcount-=128, word+=4)
5085  {
5086  bm::word_t m = (word[0] != maskFF) || (word[1] != maskFF) |
5087  (word[2] != maskFF) || (word[3] != maskFF);
5088  if (m)
5089  return false;
5090  } // for
5091  #endif
5092 
5093  for ( ;bitcount >= 32; bitcount-=32, ++word)
5094  {
5095  if (*word != maskFF)
5096  return false;
5097  } // for
5098  BM_ASSERT(bitcount < 32);
5099 
5100  if (bitcount) // we have a tail to count
5101  {
5102  unsigned mask_l = bm::mask_l_u32(bitcount-1);
5103  temp = *word & mask_l;
5104  if (temp != mask_l)
5105  return false;
5106  }
5107 
5108  return true;
5109 }
5110 
5111 
5112 
5113 
5114 /*!
5115  Function calculates number of 1 bits in the given array of words in
5116  the range between left anf right bits (borders included)
5117  Make sure the addr is aligned.
5118 
5119  @ingroup bitfunc
5120 */
5121 inline
5123  bm::word_t left,
5124  bm::word_t right) BMNOEXCEPT
5125 {
5126  BM_ASSERT(left <= right);
5127  BM_ASSERT(right <= bm::gap_max_bits-1);
5128 
5129  unsigned nword, nbit, bitcount, count;
5130  nbit = left & bm::set_word_mask;
5131  const bm::word_t* word =
5132  block + (nword = unsigned(left >> bm::set_word_shift));
5133  if (left == right) // special case (only 1 bit to check)
5134  {
5135  return (*word >> nbit) & 1u;
5136  }
5137 
5138  count = 0;
5139  bitcount = right - left + 1u;
5140  if (nbit) // starting position is not aligned
5141  {
5142  unsigned right_margin = nbit + right - left;
5143  if (right_margin < 32)
5144  {
5145  unsigned mask_r = bm::mask_r_u32(nbit);
5146  unsigned mask_l = bm::mask_l_u32(right_margin);
5147  unsigned mask = mask_r & mask_l;
5148  return bm::word_bitcount(*word & mask);
5149  }
5150  unsigned mask_r = bm::mask_r_u32(nbit);
5151  count = bm::word_bitcount(*word & mask_r);
5152  bitcount -= 32 - nbit;
5153  ++word;
5154  }
5155 
5156  // now when we are word aligned, we can count bits the usual way
5157  //
5158  #if defined(BM64_SSE4) || defined(BM64_AVX2) || defined(BM64_AVX512)
5159  for ( ;bitcount >= 64; bitcount-=64)
5160  {
5161  bm::id64_t w64 = *((bm::id64_t*)word); // x86 unaligned(!) read
5162  count += unsigned(_mm_popcnt_u64(w64));
5163  word += 2;
5164  }
5165  if (bitcount >= 32)
5166  {
5167  count += bm::word_bitcount(*word++);
5168  bitcount-=32;
5169  }
5170  #else
5171  for ( ;bitcount >= 32; bitcount-=32, ++word)
5172  count += bm::word_bitcount(*word);
5173  #endif
5174  BM_ASSERT(bitcount < 32);
5175 
5176  if (bitcount) // we have a tail to count
5177  {
5178  unsigned mask_l = bm::mask_l_u32(bitcount-1);
5179  count += bm::word_bitcount(*word & mask_l);
5180  }
5181  return count;
5182 }
5183 
5184 /*!
5185  Function calculates number of 1 bits in the given array of words in
5186  the range between 0 anf right bits (borders included)
5187  Make sure the addr is aligned.
5188 
5189  @ingroup bitfunc
5190 */
5191 inline
5193  bm::word_t right) BMNOEXCEPT
5194 {
5195  BM_ASSERT(block);
5196  if (!right) // special case, first bit check
5197  return *block & 1u;
5198  bm::id_t count = 0;
5199 
5200  unsigned bitcount = right + 1;
5201 
5202  // AVX2 or 64-bit loop unroll
5203  #if defined(BMAVX2OPT) || defined(BMAVX512OPT)
5205 
5206  __m256i cnt = _mm256_setzero_si256();
5207  bm::id64_t* cnt64;
5208 
5209  for ( ;bitcount >= 256; bitcount -= 256)
5210  {
5211  const __m256i* src = (__m256i*)block;
5212  __m256i xmm256 = _mm256_load_si256(src);
5213  BM_AVX2_BIT_COUNT(bc, xmm256);
5214  cnt = _mm256_add_epi64(cnt, bc);
5215 
5216  block += 8;
5217  } // for
5218  cnt64 = (bm::id64_t*)&cnt;
5219  count += (unsigned)(cnt64[0] + cnt64[1] + cnt64[2] + cnt64[3]);
5220  #endif
5221 
5222  for ( ;bitcount >= 64; bitcount -= 64)
5223  {
5224  bm::id64_t* p = (bm::id64_t*)block;
5225  count += bm::word_bitcount64(*p);
5226  block += 2;
5227  }
5228  if (bitcount >= 32)
5229  {
5230  count += bm::word_bitcount(*block++);
5231  bitcount-=32;
5232  }
5233 
5234  if (bitcount) // we have a tail to count
5235  {
5236  unsigned mask_l = bm::mask_l_u32(bitcount-1);
5237  count += bm::word_bitcount(*block & mask_l);
5238  }
5239  return count;
5240 }
5241 
5242 
5243 
5244 /*!
5245  Cyclic rotation of bit-block left by 1 bit
5246  @ingroup bitfunc
5247 */
5248 inline
5249 void bit_block_rotate_left_1(bm::word_t* block) BMNOEXCEPT
5250 {
5251  bm::word_t co_flag = (block[0] >> 31) & 1; // carry over bit
5252  for (unsigned i = 0; i < bm::set_block_size-1; ++i)
5253  {
5254  block[i] = (block[i] << 1) | (block[i + 1] >> 31);
5255  }
5256  block[set_block_size - 1] = (block[set_block_size - 1] << 1) | co_flag;
5257 }
5258 
5259 /*!
5260  @brief Unrolled cyclic rotation of bit-block left by 1 bit
5261  @param block - bit-block pointer
5262  @ingroup bitfunc
5263 */
5264 inline
5266 {
5267  bm::word_t co_flag = (block[0] >> 31) & 1; // carry over bit
5268  const unsigned unroll_factor = 4;
5269  bm::word_t w0, w1, w2, w3;
5270 
5271  unsigned i;
5272  for (i = 0; i < bm::set_block_size - unroll_factor; i += unroll_factor)
5273  {
5274  w0 = block[i + 1] >> 31;
5275  w1 = block[i + 2] >> 31;
5276  w2 = block[i + 3] >> 31;
5277  w3 = block[i + 4] >> 31;
5278 
5279  block[0 + i] = (block[0 + i] << 1) | w0;
5280  block[1 + i] = (block[1 + i] << 1) | w1;
5281  block[2 + i] = (block[2 + i] << 1) | w2;
5282  block[3 + i] = (block[3 + i] << 1) | w3;
5283  }
5284  block[i] = (block[i] << 1) | (block[i + 1] >> 31);
5285  block[i + 1] = (block[i + 1] << 1) | (block[i + 2] >> 31);
5286  block[i + 2] = (block[i + 2] << 1) | (block[i + 3] >> 31);
5287  block[set_block_size - 1] = (block[set_block_size - 1] << 1) | co_flag;
5288 }
5289 
5290 /*!
5291  @brief insert bit into position and shift the rest right with carryover
5292 
5293  @param block - bit-block pointer
5294  @param bitpos - bit position to insert
5295  @param value - bit value (0|1) to insert
5296 
5297  @return carry over value
5298  @ingroup bitfunc
5299 */
5300 inline
5302  unsigned bitpos, bool value) BMNOEXCEPT
5303 {
5304  BM_ASSERT(block);
5305  BM_ASSERT(bitpos < 65536);
5306 
5307  unsigned nbit = unsigned(bitpos & bm::set_block_mask);
5308  unsigned nword = unsigned(nbit >> bm::set_word_shift);
5309  nbit &= bm::set_word_mask;
5310 
5311  bm::word_t co_flag = 0;
5312  if (nbit)
5313  {
5314  unsigned mask_r = bm::mask_r_u32(nbit);
5315  bm::word_t w = block[nword] & mask_r;
5316  bm::word_t wl = block[nword] & ~mask_r;
5317  co_flag = w >> 31;
5318  w <<= 1u;
5319  block[nword] = w | (unsigned(value) << nbit) | wl;
5320  ++nword;
5321  }
5322  else
5323  {
5324  co_flag = value;
5325  }
5326 
5327  for (unsigned i = nword; i < bm::set_block_size; ++i)
5328  {
5329  bm::word_t w = block[i];
5330  bm::word_t co_flag1 = w >> 31;
5331  w = (w << 1u) | co_flag;
5332  block[i] = w;
5333  co_flag = co_flag1;
5334  } // for i
5335  return co_flag;
5336 }
5337 
5338 
5339 
5340 /*!
5341  @brief Right bit-shift bitblock by 1 bit (reference)
5342  @param block - bit-block pointer
5343  @param empty_acc - [out] contains 0 if block is empty
5344  @param co_flag - carry over from the previous block
5345 
5346  @return carry over bit (1 or 0)
5347  @ingroup bitfunc
5348 */
5349 inline
5351  bm::word_t* BMRESTRICT empty_acc,
5352  bm::word_t co_flag) BMNOEXCEPT
5353 {
5354  BM_ASSERT(block);
5355  BM_ASSERT(empty_acc);
5356 
5357  bm::word_t acc(0);
5358  for (unsigned i = 0; i < bm::set_block_size; ++i)
5359  {
5360  bm::word_t w = block[i];
5361  bm::word_t co_flag1 = w >> 31;
5362  acc |= w = (w << 1u) | co_flag;
5363 
5364  block[i] = w;
5365  co_flag = co_flag1;
5366  } // for i
5367  *empty_acc = acc;
5368  return co_flag;
5369 }
5370 
5371 /*!
5372  @brief Right bit-shift bitblock by 1 bit (minimum unroll)
5373  @param block - bit-block pointer
5374  @param empty_acc - [out] contains 0 if block is empty
5375  @param co_flag - carry over from the previous block
5376 
5377  @return carry over bit (1 or 0)
5378  @ingroup bitfunc
5379 */
5380 inline
5382  bm::word_t* BMRESTRICT empty_acc,
5383  bm::id64_t co_flag) BMNOEXCEPT
5384 {
5385 
5386 #if defined(BM64OPT)
5387  // warning: all this code maybe not safe on big-endian
5389  (bm::bit_block_t::bunion_t*)(block);
5390 
5391  bm::id64_t acc0(0), acc1(0);
5392  unsigned i = 0;
5393  do
5394  {
5395  bm::id64_t w, co_flag1;
5396  w = b_u->w64[i];
5397  co_flag1 = w >> 63;
5398  acc0 |= w = (w << 1u) | co_flag;
5399  b_u->w64[i++] = w;
5400  co_flag = co_flag1;
5401 
5402  w = b_u->w64[i];
5403  co_flag1 = w >> 63;
5404  acc1 |= w = (w << 1u) | co_flag;
5405  b_u->w64[i] = w;
5406  co_flag = co_flag1;
5407 
5408  } while(++i < bm::set_block_size/2);
5409  *empty_acc = bool(acc0 | acc1);
5410  return co_flag;
5411 #else
5412  return bm::bit_block_shift_r1(block, empty_acc, unsigned(co_flag));
5413 #endif
5414 }
5415 
5416 
5417 /*!
5418  @brief Right bit-shift of bit-block by 1 bit (loop unrolled)
5419  @param block - bit-block pointer
5420  @param empty_acc - [out] contains 0 if block is empty
5421  @param co_flag - carry over from the previous block
5422 
5423  @return carry over bit (1 or 0)
5424  @ingroup bitfunc
5425 */
5426 inline
5428  bm::word_t* BMRESTRICT empty_acc,
5429  bm::word_t co_flag) BMNOEXCEPT
5430 {
5431  BM_ASSERT(block);
5432  BM_ASSERT(empty_acc);
5433  #if defined(VECT_SHIFT_R1)
5434  return VECT_SHIFT_R1(block, empty_acc, co_flag);
5435  #else
5436  return bm::bit_block_shift_r1_unr_min(block, empty_acc, co_flag);
5437  #endif
5438 }
5439 
5440 
5441 /*!
5442  @brief Left bit-shift bitblock by 1 bit (reference)
5443  @param block - bit-block pointer
5444  @param empty_acc - [out] contains 0 if block is empty
5445  @param co_flag - carry over from the prev/next block
5446 
5447  @return carry over bit (1 or 0)
5448 
5449  @ingroup bitfunc
5450 */
5451 inline
5453  bm::word_t* empty_acc, bm::word_t co_flag) BMNOEXCEPT
5454 {
5455  BM_ASSERT(block);
5456  BM_ASSERT(empty_acc);
5457 
5458  bm::word_t acc = 0;
5459  for (int i = bm::set_block_size-1; i >= 0; --i)
5460  {
5461  bm::word_t w = block[i];
5462  bm::word_t co_flag1 = w & 1u;
5463  acc |= w = (w >> 1u) | (co_flag << 31u);
5464  block[i] = w;
5465  co_flag = co_flag1;
5466  }
5467 
5468  *empty_acc = acc;
5469  return co_flag;
5470 }
5471 
5472 /*!
5473  @brief Left bit-shift bitblock by 1 bit (minimum unroll)
5474  @param block - bit-block pointer
5475  @param empty_acc - [out] contains 0 if block is empty
5476  @param co_flag - carry over from the prev/next block
5477 
5478  @return carry over bit (1 or 0)
5479 
5480  @ingroup bitfunc
5481 */
5482 inline
5484  bm::word_t* BMRESTRICT empty_acc,
5485  unsigned co_flag) BMNOEXCEPT
5486 {
5487  bm::word_t acc = 0;
5488  for (int i = bm::set_block_size-1; i >= 0; i-=2)
5489  {
5490  bm::word_t w0, w1, co_flag1;
5491  w0 = block[i]; w1 = block[i-1];
5492  co_flag1 = w0 & 1u;
5493  acc |= w0 = (w0 >> 1u) | (co_flag << 31u);
5494  block[i] = w0;
5495  co_flag = co_flag1;
5496  co_flag1 = w1 & 1u;
5497  acc |= w1 = (w1 >> 1u) | (co_flag << 31u);
5498  block[i-1] = w1;
5499  co_flag = co_flag1;
5500 
5501  i-=2;
5502  w0 = block[i]; w1 = block[i-1];
5503  co_flag1 = w0 & 1u;
5504  acc |= w0 = (w0 >> 1u) | (co_flag << 31u);
5505  block[i] = w0;
5506  co_flag = co_flag1;
5507  co_flag1 = w1 & 1u;
5508  acc |= w1 = (w1 >> 1u) | (co_flag << 31u);
5509  block[i-1] = w1;
5510  co_flag = co_flag1;
5511  } // for i
5512  *empty_acc = acc;
5513  return co_flag;
5514 }
5515 
5516 
5517 /*!
5518  @brief Left bit-shift of bit-block by 1 bit (loop unrolled)
5519  @param block - bit-block pointer
5520  @param empty_acc - [out] contains 0 if block is empty
5521  @param co_flag - carry over from the prev/next block
5522 
5523  @return carry over bit (1 or 0)
5524  @ingroup bitfunc
5525 */
5526 inline
5528  bm::word_t* empty_acc,
5529  bm::word_t co_flag) BMNOEXCEPT
5530 {
5531  BM_ASSERT(block);
5532  BM_ASSERT(empty_acc);
5533  #if defined(VECT_SHIFT_L1)
5534  return VECT_SHIFT_L1(block, empty_acc, co_flag);
5535  #else
5536  return bm::bit_block_shift_l1_unr_min(block, empty_acc, co_flag);
5537  #endif
5538 }
5539 
5540 /*!
5541  @brief erase bit from position and shift the rest right with carryover
5542 
5543  @param block - bit-block pointer
5544  @param bitpos - bit position to insert
5545  @param carry_over - bit value to add to the end (0|1)
5546 
5547  @ingroup bitfunc
5548 */
5549 inline
5551  unsigned bitpos,
5552  bool carry_over) BMNOEXCEPT
5553 {
5554  BM_ASSERT(block);
5555  BM_ASSERT(bitpos < 65536);
5556 
5557  if (!bitpos)
5558  {
5559  bm::word_t acc;
5560  bm::bit_block_shift_l1_unr(block, &acc, carry_over);
5561  return;
5562  }
5563 
5564  unsigned nbit = unsigned(bitpos & bm::set_block_mask);
5565  unsigned nword = unsigned(nbit >> bm::set_word_shift);
5566  nbit &= bm::set_word_mask;
5567 
5568  bm::word_t co_flag = carry_over;
5569  for (unsigned i = bm::set_block_size-1; i > nword; --i)
5570  {
5571  bm::word_t w = block[i];
5572  bm::word_t co_flag1 = w & 1u;
5573  w = (w >> 1u) | (co_flag << 31u);
5574  block[i] = w;
5575  co_flag = co_flag1;
5576  }
5577 
5578  if (nbit)
5579  {
5580  unsigned mask_r = bm::mask_r_u32(nbit);
5581  bm::word_t w = block[nword] & mask_r;
5582  bm::word_t wl = block[nword] & ~mask_r;
5583  w &= ~(1 << nbit); // clear the removed bit
5584  w >>= 1u;
5585  w |= wl | (co_flag << 31u);
5586  block[nword] = w;
5587  }
5588  else
5589  {
5590  block[nword] = (block[nword] >> 1u) | (co_flag << 31u);
5591  }
5592 }
5593 
5594 
5595 /*!
5596  @brief Right bit-shift of bit-block by 1 bit (reference) + AND
5597  @param block - bit-block pointer
5598  @param co_flag - carry over from the previous block
5599  @param mask_block - mask bit-block pointer
5600  @param digest - block digest
5601 
5602  @return carry over bit (1 or 0)
5603  @ingroup bitfunc
5604 */
5605 inline
5607  bm::word_t co_flag,
5608  const bm::word_t* BMRESTRICT mask_block,
5609  bm::id64_t* BMRESTRICT digest) BMNOEXCEPT
5610 {
5611  BM_ASSERT(block);
5612  BM_ASSERT(mask_block);
5613  BM_ASSERT(digest && *digest);
5614 
5615 
5616  bm::id64_t d = *digest;
5617 
5618  unsigned di = 0;
5619  if (!co_flag)
5620  {
5621  bm::id64_t t = d & -d;
5622  di = bm::word_bitcount64(t - 1); // find start bit-index
5623  }
5624 
5625  for (; di < 64; ++di)
5626  {
5627  const unsigned d_base = di * bm::set_block_digest_wave_size;
5628  bm::id64_t dmask = (1ull << di);
5629  if (d & dmask) // digest stride not empty
5630  {
5631  bm::word_t acc = 0;
5632  for (unsigned i = d_base; i < d_base + bm::set_block_digest_wave_size; ++i)
5633  {
5635 
5636  bm::word_t w = block[i];
5637  bm::word_t co_flag1 = w >> 31;
5638  w = (w << 1u) | co_flag;
5639  acc |= block[i] = w & mask_block[i];
5640  co_flag = co_flag1;
5641  }
5642  if (!acc)
5643  d &= ~dmask; // update digest: clear stride bit
5644  }
5645  else // stride is empty
5646  {
5647  BM_ASSERT(block[d_base + bm::set_block_digest_wave_size -1]==0);
5648  BM_ASSERT(block[d_base]==0);
5649 
5650  if (co_flag) // there is carry-over
5651  {
5652  BM_ASSERT(co_flag == 1);
5653  BM_ASSERT(block[d_base] == 0);
5654 
5655  block[d_base] = co_flag & mask_block[d_base];
5656  if (block[d_base])
5657  d |= dmask; // update digest
5658  co_flag = 0;
5659  }
5660  }
5661  } // for di
5662 
5663  *digest = d;
5664  return co_flag;
5665 }
5666 
5667 /*!
5668  @brief Right bit-shift bitblock by 1 bit (reference) + AND
5669  @param block - bit-block pointer
5670  @param co_flag - carry over from the previous block
5671  @param mask_block - mask bit-block pointer
5672  @param digest - block digest
5673 
5674  @return carry over bit (1 or 0)
5675  @ingroup bitfunc
5676 */
5677 inline
5679  bm::word_t co_flag,
5680  const bm::word_t* BMRESTRICT mask_block,
5681  bm::id64_t* BMRESTRICT digest) BMNOEXCEPT
5682 {
5683  BM_ASSERT(block);
5684  BM_ASSERT(mask_block);
5685  BM_ASSERT(digest);
5686 
5687  #if defined(VECT_SHIFT_R1_AND)
5688  return VECT_SHIFT_R1_AND(block, co_flag, mask_block, digest);
5689  #else
5690  return bm::bit_block_shift_r1_and(block, co_flag, mask_block, digest);
5691  #endif
5692 }
5693 
5694 
5695 /*!
5696  Function calculates if there is any number of 1 bits
5697  in the given array of words in the range between left anf right bits
5698  (borders included). Make sure the addresses are aligned.
5699 
5700  @ingroup bitfunc
5701 */
5702 inline
5704  bm::word_t left,
5705  bm::word_t right) BMNOEXCEPT
5706 {
5707  BM_ASSERT(left <= right);
5708 
5709  unsigned nbit = left; // unsigned(left & bm::set_block_mask);
5710  unsigned nword = unsigned(nbit >> bm::set_word_shift);
5711  nbit &= bm::set_word_mask;
5712 
5713  const bm::word_t* word = block + nword;
5714 
5715  if (left == right) // special case (only 1 bit to check)
5716  {
5717  return (*word >> nbit) & 1;
5718  }
5719  unsigned acc;
5720  unsigned bitcount = right - left + 1;
5721 
5722  if (nbit) // starting position is not aligned
5723  {
5724  unsigned right_margin = nbit + (right - left);
5725  if (right_margin < 32)
5726  {
5727  unsigned mask_r = bm::mask_r_u32(nbit);
5728  unsigned mask_l = bm::mask_l_u32(right_margin);
5729  unsigned mask = mask_r & mask_l;
5730  return *word & mask;
5731  }
5732  else
5733  {
5734  unsigned mask_r = bm::mask_r_u32(nbit);
5735  acc = *word & mask_r;
5736  if (acc)
5737  return acc;
5738  bitcount -= 32 - nbit;
5739  }
5740  ++word;
5741  }
5742 
5743  // loop unrolled to evaluate 4 words at a time
5744  // SIMD showed no advantage, unless evaluate sub-wave intervals
5745  //
5746  for ( ;bitcount >= 128; bitcount-=128, word+=4)
5747  {
5748  acc = word[0] | word[1] | word[2] | word[3];
5749  if (acc)
5750  return acc;
5751  } // for
5752 
5753  acc = 0;
5754  for ( ;bitcount >= 32; bitcount -= 32)
5755  {
5756  acc |= *word++;
5757  } // for
5758 
5759  if (bitcount) // we have a tail to count
5760  {
5761  unsigned mask_l = bm::mask_l_u32(bitcount-1);
5762  acc |= (*word) & mask_l;
5763  }
5764  return acc;
5765 }
5766 
5767 // ----------------------------------------------------------------------
5768 
5769 /*! Function inverts block of bits
5770  @ingroup bitfunc
5771 */
5772 template<typename T>
5773 void bit_invert(T* start) BMNOEXCEPT
5774 {
5776 #ifdef BMVECTOPT
5777  VECT_INVERT_BLOCK(start);
5778 #else
5779  T* end = (T*)((unsigned*)(start) + bm::set_block_size);
5780  do
5781  {
5782  start[0] = ~start[0];
5783  start[1] = ~start[1];
5784  start[2] = ~start[2];
5785  start[3] = ~start[3];
5786  start+=4;
5787  } while (start < end);
5788 #endif
5789 }
5790 
5791 // ----------------------------------------------------------------------
5792 
5793 /*! @brief Returns "true" if all bits in the block are 1
5794  @ingroup bitfunc
5795 */
5796 inline
5797 bool is_bits_one(const bm::wordop_t* start) BMNOEXCEPT
5798 {
5799 #if defined(BMSSE42OPT) || defined(BMAVX2OPT)
5800  return VECT_IS_ONE_BLOCK(start);
5801 #else
5802  const bm::word_t* BMRESTRICT src_end = (bm::word_t*)start + bm::set_block_size;
5803  const bm::wordop_t* end = (const bm::wordop_t*)src_end;
5804  do
5805  {
5806  bm::wordop_t tmp =
5807  start[0] & start[1] & start[2] & start[3];
5808  if (tmp != bm::all_bits_mask)
5809  return false;
5810  start += 4;
5811  } while (start < end);
5812  return true;
5813 #endif
5814 }
5815 
5816 // ----------------------------------------------------------------------
5817 
5818 /*! @brief Returns "true" if all bits are 1 in the block [left, right]
5819  Function check for block varieties
5820  @internal
5821 */
5822 inline
5824  unsigned left, unsigned right) BMNOEXCEPT
5825 {
5826  BM_ASSERT(left <= right);
5827  BM_ASSERT(right < bm::gap_max_bits);
5828  if (block)
5829  {
5830  if (BM_IS_GAP(block))
5831  return bm::gap_is_all_one_range(BMGAP_PTR(block), left, right);
5832  if (block == FULL_BLOCK_FAKE_ADDR)
5833  return true;
5834  return bm::bit_block_is_all_one_range(block, left, right);
5835  }
5836  return false;
5837 }
5838 
5839 /*! @brief Returns "true" if all bits are 1 in the block [left, right]
5840  and border bits are 0
5841  @internal
5842 */
5843 inline
5844 bool block_is_interval(const bm::word_t* const BMRESTRICT block,
5845  unsigned left, unsigned right) BMNOEXCEPT
5846 {
5847  BM_ASSERT(left <= right);
5848  BM_ASSERT(right < bm::gap_max_bits-1);
5849 
5850  if (block)
5851  {
5852  bool is_left, is_right, all_one;
5853  if (BM_IS_GAP(block))
5854  {
5855  const bm::gap_word_t* gap = BMGAP_PTR(block);
5856  all_one = bm::gap_is_interval(gap, left, right);
5857  return all_one;
5858  }
5859  else // bit-block
5860  {
5861  if (block == FULL_BLOCK_FAKE_ADDR)
5862  return false;
5863  unsigned nword = ((left-1) >> bm::set_word_shift);
5864  is_left = block[nword] & (1u << ((left-1) & bm::set_word_mask));
5865  if (is_left == false)
5866  {
5867  nword = ((right + 1) >> bm::set_word_shift);
5868  is_right = block[nword] & (1u << ((right + 1) & bm::set_word_mask));
5869  if (is_right == false)
5870  {
5871  all_one = bm::bit_block_is_all_one_range(block, left, right);
5872  return all_one;
5873  }
5874  }
5875  }
5876  }
5877 
5878  return false;
5879 }
5880 
5881 // ----------------------------------------------------------------------
5882 
5883 /**
5884  \brief Searches for the last 1 bit in the 111 interval of a BIT block
5885  \param block - BIT buffer
5886  \param nbit - bit index to start checking from
5887  \param pos - [out] found value
5888 
5889  \return false if not found
5890  @ingroup bitfunc
5891 */
5892 inline
5894  unsigned nbit, unsigned* BMRESTRICT pos) BMNOEXCEPT
5895 {
5896  BM_ASSERT(block);
5897  BM_ASSERT(pos);
5898 
5899  unsigned nword = unsigned(nbit >> bm::set_word_shift);
5900  unsigned bit_pos = (nbit & bm::set_word_mask);
5901  bm::word_t w = block[nword];
5902  w &= (1u << bit_pos);
5903  if (!w)
5904  return false;
5905 
5906  if (nbit == bm::gap_max_bits-1)
5907  {
5908  *pos = bm::gap_max_bits-1;
5909  return true;
5910  }
5911  *pos = nbit;
5912 
5913  ++nbit;
5914  nword = unsigned(nbit >> bm::set_word_shift);
5915  bit_pos = (nbit & bm::set_word_mask);
5916 
5917  w = (~block[nword]) >> bit_pos;
5918  w <<= bit_pos; // clear the trailing bits
5919  if (w)
5920  {
5921  bit_pos = bm::bit_scan_forward32(w); // trailing zeros
5922  *pos = unsigned(bit_pos + (nword * 8u * unsigned(sizeof(bm::word_t)))-1);
5923  return true;
5924  }
5925 
5926  for (++nword; nword < bm::set_block_size; ++nword)
5927  {
5928  w = ~block[nword];
5929  if (w)
5930  {
5931  bit_pos = bm::bit_scan_forward32(w); // trailing zeros
5932  *pos = unsigned(bit_pos + (nword * 8u * unsigned(sizeof(bm::word_t)))-1);
5933  return true;
5934  }
5935  } // for nword
5936 
5937  // 0 not found, all block is 1s...
5938  *pos = bm::gap_max_bits-1;
5939  return true;
5940 }
5941 
5942 
5943 /*! @brief Find end of the current 111 interval
5944  @return search result code 0 - not found, 1 found, 2 - found at the end
5945  @internal
5946 */
5947 inline
5949  unsigned nbit_from,
5950  unsigned* BMRESTRICT found_nbit) BMNOEXCEPT
5951 {
5952  BM_ASSERT(block && found_nbit);
5953  BM_ASSERT(nbit_from < bm::gap_max_bits);
5954 
5955  bool b;
5956  if (BM_IS_GAP(block))
5957  {
5958  const bm::gap_word_t* gap = BMGAP_PTR(block);
5959  b = bm::gap_find_interval_end(gap, nbit_from, found_nbit);
5960  if (b && *found_nbit == bm::gap_max_bits-1)
5961  return 2; // end of block, keep searching
5962  }
5963  else // bit-block
5964  {
5965  if (IS_FULL_BLOCK(block))
5966  {
5967  *found_nbit = bm::gap_max_bits-1;
5968  return 2;
5969  }
5970  b = bm::bit_block_find_interval_end(block, nbit_from, found_nbit);
5971  if (b && *found_nbit == bm::gap_max_bits-1)
5972  return 2; // end of block, keep searching
5973  }
5974  return b;
5975 }
5976 
5977 // ----------------------------------------------------------------------
5978 
5979 /**
5980  \brief Searches for the first 1 bit in the 111 interval of a BIT block
5981  \param block - BIT buffer
5982  \param nbit - bit index to start checking from
5983  \param pos - [out] found value
5984 
5985  \return false if not found
5986  @ingroup bitfunc
5987 */
5988 inline
5990  unsigned nbit, unsigned* BMRESTRICT pos) BMNOEXCEPT
5991 {
5992  BM_ASSERT(block);
5993  BM_ASSERT(pos);
5994 
5995  unsigned nword = unsigned(nbit >> bm::set_word_shift);
5996  unsigned bit_pos = (nbit & bm::set_word_mask);
5997  bm::word_t w = block[nword];
5998  w &= (1u << bit_pos);
5999  if (!w)
6000  return false;
6001 
6002  if (nbit == 0)
6003  {
6004  *pos = 0;
6005  return true;
6006  }
6007  *pos = nbit;
6008 
6009  --nbit;
6010  nword = unsigned(nbit >> bm::set_word_shift);
6011  bit_pos = (nbit & bm::set_word_mask);
6012 
6013  unsigned mask_l = bm::mask_l_u32(bit_pos);
6014  w = (~block[nword]) & mask_l;
6015  if (w)
6016  {
6017  bit_pos = bm::bit_scan_reverse32(w);
6018  *pos = unsigned(bit_pos + (nword * 8u * unsigned(sizeof(bm::word_t)))+1);
6019  return true;
6020  }
6021 
6022  if (nword)
6023  {
6024  for (--nword; true; --nword)
6025  {
6026  w = ~block[nword];
6027  if (w)
6028  {
6029  bit_pos = bm::bit_scan_reverse32(w); // trailing zeros
6030  *pos = unsigned(bit_pos + (nword * 8u * unsigned(sizeof(bm::word_t)))+1);
6031  return true;
6032  }
6033  if (!nword)
6034  break;
6035  } // for nword
6036  }
6037 
6038  // 0 not found, all block is 1s...
6039  *pos = 0;
6040  return true;
6041 }
6042 
6043 /**
6044  \brief Reverse search for the previous 1 bit
6045  \param block - BIT buffer
6046  \param nbit - bit index to start checking from
6047  \param pos - [out] found value
6048 
6049  \return false if not found
6050  @ingroup bitfunc
6051 */
6052 inline
6054  unsigned nbit, unsigned* BMRESTRICT pos) BMNOEXCEPT
6055 {
6056  BM_ASSERT(block);
6057  BM_ASSERT(pos);
6058 
6059  unsigned nword = unsigned(nbit >> bm::set_word_shift);
6060  unsigned bit_pos = (nbit & bm::set_word_mask);
6061  bm::word_t w = block[nword];
6062  w &= (1u << bit_pos);
6063  if (w)
6064  {
6065  *pos = unsigned(bit_pos + (nword * 8u * unsigned(sizeof(bm::word_t))));
6066  return true;
6067  }
6068 
6069  if (!nbit)
6070  return false;
6071 
6072  --nbit;
6073  nword = unsigned(nbit >> bm::set_word_shift);
6074  bit_pos = (nbit & bm::set_word_mask);
6075 
6076  unsigned mask_l = bm::mask_l_u32(bit_pos);
6077  w = block[nword] & mask_l;
6078  if (w)
6079  {
6080  bit_pos = bm::bit_scan_reverse32(w);
6081  *pos = unsigned(bit_pos + (nword * 8u * unsigned(sizeof(bm::word_t))));
6082  return true;
6083  }
6084 
6085  if (nword)
6086  {
6087  for (--nword; true; --nword)
6088  {
6089  w = block[nword];
6090  if (w)
6091  {
6092  bit_pos = bm::bit_scan_reverse32(w); // trailing zeros
6093  *pos =
6094  unsigned(bit_pos + (nword * 8u * unsigned(sizeof(bm::word_t))));
6095  return true;
6096  }
6097  if (!nword)
6098  break;
6099  } // for nword
6100  }
6101  return false;
6102 }
6103 
6104 
6105 // ----------------------------------------------------------------------
6106 
6107 /*! @brief Find start of the current 111 interval
6108  @return search result code 0 - not found, 1 found, 2 - found at the start
6109  @internal
6110 */
6111 inline
6113  unsigned nbit_from,
6114  unsigned* BMRESTRICT found_nbit) BMNOEXCEPT
6115 {
6116  BM_ASSERT(block && found_nbit);
6117  BM_ASSERT(nbit_from < bm::gap_max_bits);
6118  bool b;
6119  if (BM_IS_GAP(block))
6120  {
6121  const bm::gap_word_t* gap = BMGAP_PTR(block);
6122  b = bm::gap_find_interval_start(gap, nbit_from, found_nbit);
6123  if (b && *found_nbit == 0)
6124  return 2; // start of block, keep searching
6125  }
6126  else // bit-block
6127  {
6128  if (IS_FULL_BLOCK(block))
6129  {
6130  *found_nbit = 0;
6131  return 2;
6132  }
6133  b = bm::bit_block_find_interval_start(block, nbit_from, found_nbit);
6134  if (b && *found_nbit == 0)
6135  return 2; // start of block, keep searching
6136  }
6137  return b;
6138 }
6139 
6140 // ----------------------------------------------------------------------
6141 
6142 /*! @brief Reverse find 1
6143  @return search result code 0 - not found, 1 found
6144  @internal
6145 */
6146 inline
6148  unsigned nbit_from,
6149  unsigned* BMRESTRICT found_nbit) BMNOEXCEPT
6150 {
6151  BM_ASSERT(block && found_nbit);
6152  BM_ASSERT(nbit_from < bm::gap_max_bits);
6153  bool b;
6154  if (BM_IS_GAP(block))
6155  {
6156  b = bm::gap_find_prev(BMGAP_PTR(block), nbit_from, found_nbit);
6157  }
6158  else // bit-block
6159  {
6160  if (IS_FULL_BLOCK(block))
6161  {
6162  *found_nbit = nbit_from;
6163  return true;
6164  }
6165  b = bm::bit_block_find_prev(block, nbit_from, found_nbit);
6166  }
6167  return b;
6168 }
6169 
6170 // ----------------------------------------------------------------------
6171 
6172 /*! @brief Returns "true" if one bit is set in the block [left, right]
6173  Function check for block varieties
6174  @internal
6175 */
6176 inline
6177 bool block_any_range(const bm::word_t* const BMRESTRICT block,
6178  unsigned left, unsigned right) BMNOEXCEPT
6179 {
6180  BM_ASSERT(left <= right);
6181  BM_ASSERT(right < bm::gap_max_bits);
6182  if (!block)
6183  return false;
6184  if (BM_IS_GAP(block))
6185  return bm::gap_any_range(BMGAP_PTR(block), left, right);
6186  if (IS_FULL_BLOCK(block))
6187  return true;
6188  return bm::bit_block_any_range(block, left, right);
6189 }
6190 
6191 // ----------------------------------------------------------------------
6192 
6193 /*! @brief Returns "true" if one bit is set in the block
6194  Function check for block varieties
6195  @internal
6196 */
6197 inline
6198 bool block_any(const bm::word_t* const BMRESTRICT block) BMNOEXCEPT
6199 {
6200  if (!block)
6201  return false;
6202  if (IS_FULL_BLOCK(block))
6203  return true;
6204  bool all_zero = (BM_IS_GAP(block)) ?
6206  : bm::bit_is_all_zero(block);
6207  return !all_zero;
6208 }
6209 
6210 
6211 
6212 // ----------------------------------------------------------------------
6213 
6214 // GAP blocks manipulation functions:
6215 
6216 
6217 /*!
6218  \brief GAP AND operation.
6219 
6220  Function performs AND logical operation on gap vectors.
6221  If possible function put the result into vect1 and returns this
6222  pointer. Otherwise result is put into tmp_buf, which should be
6223  twice of the vector size.
6224 
6225  \param vect1 - operand 1
6226  \param vect2 - operand 2
6227  \param tmp_buf - pointer on temporary buffer
6228  \param dsize - out size of the destination
6229  \return Result pointer (tmp_buf OR vect1)
6230 
6231  @ingroup gapfunc
6232 */
6233 inline
6235  const gap_word_t* BMRESTRICT vect2,
6236  gap_word_t* BMRESTRICT tmp_buf,
6237  unsigned& dsize) BMNOEXCEPT
6238 {
6239  bm::gap_buff_op<bm::gap_word_t, bm::and_func>(
6240  tmp_buf, vect1, 0, vect2, 0, dsize);
6241  return tmp_buf;
6242 }
6243 
6244 /*!
6245  \brief GAP AND operation test.
6246 
6247  Function performs AND logical operation on gap vectors.
6248  If possible function put the result into vect1 and returns this
6249  pointer. Otherwise result is put into tmp_buf, which should be
6250  twice of the vector size.
6251 
6252  \param vect1 - operand 1
6253  \param vect2 - operand 2
6254  \return non zero value if operation returns any 1 bit
6255 
6256  @ingroup gapfunc
6257 */
6258 inline
6260  const gap_word_t* BMRESTRICT vect2) BMNOEXCEPT
6261 {
6262  return gap_buff_any_op<bm::gap_word_t, bm::and_func>(vect1, 0, vect2, 0);
6263 }
6264 
6265 
6266 /*!
6267  \brief GAP bitcount AND operation test.
6268 
6269  \param vect1 - operand 1
6270  \param vect2 - operand 2
6271  \return bitcount of vect1 AND vect2
6272 
6273  @ingroup gapfunc
6274 */
6275 inline
6276 unsigned gap_count_and(const gap_word_t* BMRESTRICT vect1,
6277  const gap_word_t* BMRESTRICT vect2) BMNOEXCEPT
6278 {
6279  return bm::gap_buff_count_op<bm::gap_word_t, bm::and_func>(vect1, vect2);
6280 }
6281 
6282 
6283 
6284 /*!
6285  \brief GAP XOR operation.
6286 
6287  Function performs XOR logical operation on gap vectors.
6288  If possible function put the result into vect1 and returns this
6289  pointer. Otherwise result is put into tmp_buf, which should be
6290  twice of the vector size.
6291 
6292  \param vect1 - operand 1
6293  \param vect2 - operand 2
6294  \param tmp_buf - pointer on temporary buffer
6295  \param dsize - out destination size
6296  \return Result pointer (tmp_buf)
6297 
6298  @ingroup gapfunc
6299 */
6300 inline
6302  const gap_word_t* BMRESTRICT vect2,
6303  gap_word_t* BMRESTRICT tmp_buf,
6304  unsigned& dsize) BMNOEXCEPT
6305 {
6306  bm::gap_buff_op<bm::gap_word_t, bm::xor_func>(
6307  tmp_buf, vect1, 0, vect2, 0, dsize);
6308  return tmp_buf;
6309 }
6310 
6311 /*! Light weight gap_operation_xor for len prediction
6312  @ingroup gapfunc
6313 */
6314 /*
6315 inline
6316 bool gap_operation_dry_xor(const gap_word_t* BMRESTRICT vect1,
6317  const gap_word_t* BMRESTRICT vect2,
6318  unsigned& dsize,
6319  unsigned limit) BMNOEXCEPT
6320 {
6321  return
6322  bm::gap_buff_dry_op<bm::gap_word_t, bm::xor_func>(vect1, vect2, dsize, limit);
6323 }
6324 */
6325 
6326 /*!
6327  \brief GAP XOR operation test.
6328 
6329  Function performs AND logical operation on gap vectors.
6330  If possible function put the result into vect1 and returns this
6331  pointer. Otherwise result is put into tmp_buf, which should be
6332  twice of the vector size.
6333 
6334  \param vect1 - operand 1
6335  \param vect2 - operand 2
6336  \return non zero value if operation returns any 1 bit
6337 
6338  @ingroup gapfunc
6339 */
6342  const gap_word_t* BMRESTRICT vect2) BMNOEXCEPT
6343 {
6344  return gap_buff_any_op<bm::gap_word_t, bm::xor_func>(vect1, 0, vect2, 0);
6345 }
6346 
6347 /*!
6348  \brief GAP bitcount XOR operation test.
6349 
6350  \param vect1 - operand 1
6351  \param vect2 - operand 2
6352  \return bitcount of vect1 XOR vect2
6353 
6354  @ingroup gapfunc
6355 */
6357 unsigned gap_count_xor(const gap_word_t* BMRESTRICT vect1,
6358  const gap_word_t* BMRESTRICT vect2) BMNOEXCEPT
6359 {
6360  return bm::gap_buff_count_op<bm::gap_word_t, bm::xor_func>(vect1, vect2);
6361 }
6362 
6363 
6364 /*!
6365  \brief GAP OR operation.
6366 
6367  Function performs OR logical oparation on gap vectors.
6368  If possible function put the result into vect1 and returns this
6369  pointer. Otherwise result is put into tmp_buf, which should be
6370  twice of the vector size.
6371 
6372  \param vect1 - operand 1
6373  \param vect2 - operand 2
6374  \param tmp_buf - pointer on temporary buffer
6375  \param dsize - out destination size
6376 
6377  \return Result pointer (tmp_buf)
6378 
6379  @ingroup gapfunc
6380 */
6381 inline
6383  const gap_word_t* BMRESTRICT vect2,
6384  gap_word_t* BMRESTRICT tmp_buf,
6385  unsigned& dsize) BMNOEXCEPT
6386 {
6387  bm::gap_buff_op<bm::gap_word_t, bm::and_func>(tmp_buf, vect1, 1, vect2, 1, dsize);
6388  bm::gap_invert(tmp_buf);
6389  return tmp_buf;
6390 }
6391 
6392 /*!
6393  \brief GAP bitcount OR operation test.
6394 
6395  \param vect1 - operand 1
6396  \param vect2 - operand 2
6397  \return bitcount of vect1 OR vect2
6398 
6399  @ingroup gapfunc
6400 */
6402 unsigned gap_count_or(const gap_word_t* BMRESTRICT vect1,
6403  const gap_word_t* BMRESTRICT vect2) BMNOEXCEPT
6404 {
6405  return gap_buff_count_op<bm::gap_word_t, bm::or_func>(vect1, vect2);
6406 }
6407 
6408 
6409 
6410 /*!
6411  \brief GAP SUB (AND NOT) operation.
6412 
6413  Function performs SUB logical operation on gap vectors.
6414  If possible function put the result into vect1 and returns this
6415  pointer. Otherwise result is put into tmp_buf, which should be
6416  twice of the vector size.
6417 
6418  \param vect1 - operand 1
6419  \param vect2 - operand 2
6420  \param tmp_buf - pointer on temporary buffer
6421  \param dsize - out destination size
6422 
6423  \return Result pointer (tmp_buf)
6424 
6425  @ingroup gapfunc
6426 */
6427 inline
6429  const gap_word_t* BMRESTRICT vect2,
6430  gap_word_t* BMRESTRICT tmp_buf,
6431  unsigned& dsize) BMNOEXCEPT
6432 {
6433  bm::gap_buff_op<bm::gap_word_t, bm::and_func>( // no bug here
6434  tmp_buf, vect1, 0, vect2, 1, dsize);
6435  return tmp_buf;
6436 }
6437 
6438 
6439 /*!
6440  \brief GAP SUB operation test.
6441 
6442  Function performs AND logical operation on gap vectors.
6443  If possible function put the result into vect1 and returns this
6444  pointer. Otherwise result is put into tmp_buf, which should be
6445  twice of the vector size.
6446 
6447  \param vect1 - operand 1
6448  \param vect2 - operand 2
6449  \return non zero value if operation returns any 1 bit
6450 
6451  @ingroup gapfunc
6452 */
6453 inline
6455  const gap_word_t* BMRESTRICT vect2) BMNOEXCEPT
6456 {
6457  return
6458  bm::gap_buff_any_op<bm::gap_word_t, bm::and_func>( // no bug here
6459  vect1, 0, vect2, 1);
6460 }
6461 
6462 
6463 /*!
6464 \brief GAP bitcount SUB (AND NOT) operation test.
6465 
6466 \param vect1 - operand 1
6467 \param vect2 - operand 2
6468 \return bitcount of vect1 SUB (AND NOT) vect2
6469 
6470 @ingroup gapfunc
6471 */
6473 unsigned gap_count_sub(const gap_word_t* BMRESTRICT vect1,
6474  const gap_word_t* BMRESTRICT vect2) BMNOEXCEPT
6475 {
6476  return bm::gap_buff_count_op<bm::gap_word_t, bm::sub_func>(vect1, vect2);
6477 }
6478 
6479 
6480 // ----------------------------------------------------------------------
6481 
6482 // BIT blocks manipulation functions:
6483 
6484 
6485 /*!
6486  \brief Bitblock copy operation.
6487 
6488  \param dst - destination block.
6489  \param src - source block.
6490 
6491  @ingroup bitfunc
6492 */
6493 inline
6495  const bm::word_t* BMRESTRICT src) BMNOEXCEPT
6496 {
6497 #ifdef BMVECTOPT
6498  VECT_COPY_BLOCK(dst, src);
6499 #else
6500  ::memcpy(dst, src, bm::set_block_size * sizeof(bm::word_t));
6501 #endif
6502 }
6503 
6504 /*!
6505  \brief Bitblock copy/stream operation.
6506 
6507  \param dst - destination block.
6508  \param src - source block.
6509 
6510  @ingroup bitfunc
6511 */
6512 inline
6514  const bm::word_t* BMRESTRICT src) BMNOEXCEPT
6515 {
6516 #ifdef VECT_STREAM_BLOCK
6517  VECT_STREAM_BLOCK(dst, src);
6518 #else
6519  ::memcpy(dst, src, bm::set_block_size * sizeof(bm::word_t));
6520 #endif
6521 }
6522 
6523 
6524 /*!
6525  \brief Plain bitblock AND operation.
6526  Function does not analyse availability of source and destination blocks.
6527 
6528  \param dst - destination block.
6529  \param src - source block.
6530 
6531  \return 0 if AND operation did not produce anything (no 1s in the output)
6532 
6533  @ingroup bitfunc
6534 */
6535 inline
6537  const bm::word_t* BMRESTRICT src) BMNOEXCEPT
6538 {
6539  BM_ASSERT(dst);
6540  BM_ASSERT(src);
6541  BM_ASSERT(dst != src);
6542 
6543 #ifdef BMVECTOPT
6544  bm::id64_t acc = VECT_AND_BLOCK(dst, src);
6545 #else
6546  unsigned arr_sz = bm::set_block_size / 2;
6547 
6550 
6551  bm::id64_t acc = 0;
6552  for (unsigned i = 0; i < arr_sz; i+=4)
6553  {
6554  acc |= dst_u->w64[i] &= src_u->w64[i];
6555  acc |= dst_u->w64[i+1] &= src_u->w64[i+1];
6556  acc |= dst_u->w64[i+2] &= src_u->w64[i+2];
6557  acc |= dst_u->w64[i+3] &= src_u->w64[i+3];
6558  }
6559 #endif
6560  return acc;
6561 }
6562 
6563 /*!
6564  \brief digest based bit-block AND
6565 
6566  \param dst - destination block.
6567  \param src - source block.
6568  \param digest - known digest of dst block
6569 
6570  \return new digest
6571 
6572  @ingroup bitfunc
6573 */
6574 inline
6576  const bm::word_t* BMRESTRICT src,
6577  bm::id64_t digest) BMNOEXCEPT
6578 {
6579  BM_ASSERT(dst);
6580  BM_ASSERT(src);
6581  BM_ASSERT(dst != src);
6582 
6583  const bm::id64_t mask(1ull);
6584  bm::id64_t d = digest;
6585  while (d)
6586  {
6587  bm::id64_t t = bm::bmi_blsi_u64(d); // d & -d;
6588 
6589  unsigned wave = bm::word_bitcount64(t - 1);
6590  unsigned off = wave * bm::set_block_digest_wave_size;
6591 
6592  #if defined(VECT_AND_DIGEST)
6593  bool all_zero = VECT_AND_DIGEST(&dst[off], &src[off]);
6594  if (all_zero)
6595  digest &= ~(mask << wave);
6596  #else
6597  const bm::bit_block_t::bunion_t* BMRESTRICT src_u = (const bm::bit_block_t::bunion_t*)(&src[off]);
6599 
6600  bm::id64_t acc = 0;
6601  unsigned j = 0;
6602  do
6603  {
6604  acc |= dst_u->w64[j+0] &= src_u->w64[j+0];
6605  acc |= dst_u->w64[j+1] &= src_u->w64[j+1];
6606  acc |= dst_u->w64[j+2] &= src_u->w64[j+2];
6607  acc |= dst_u->w64[j+3] &= src_u->w64[j+3];
6608  j+=4;
6609  } while (j < bm::set_block_digest_wave_size/2);
6610 
6611  if (!acc) // all zero
6612  digest &= ~(mask << wave);
6613  #endif
6614 
6615  d = bm::bmi_bslr_u64(d); // d &= d - 1;
6616  } // while
6617 
6618  return digest;
6619 }
6620 
6621 
6622 /*!
6623  \brief digest based bit-block AND 5-way
6624 
6625  \return new digest
6626 
6627  @ingroup bitfunc
6628 */
6629 inline
6631  const bm::word_t* BMRESTRICT src0,
6632  const bm::word_t* BMRESTRICT src1,
6633  const bm::word_t* BMRESTRICT src2,
6634  const bm::word_t* BMRESTRICT src3,
6635  bm::id64_t digest) BMNOEXCEPT
6636 {
6637  BM_ASSERT(dst);
6638  BM_ASSERT(src0 && src1 && src2 && src3);
6639 
6640  const bm::id64_t mask(1ull);
6641  bm::id64_t d = digest;
6642  while (d)
6643  {
6644  bm::id64_t t = bm::bmi_blsi_u64(d); // d & -d;
6645 
6646  unsigned wave = bm::word_bitcount64(t - 1);
6647  unsigned off = wave * bm::set_block_digest_wave_size;
6648 
6649 #if defined(VECT_AND_DIGEST_5WAY)
6650  bool all_zero = VECT_AND_DIGEST_5WAY(&dst[off], &src0[off], &src1[off], &src2[off], &src3[off]);
6651  if (all_zero)
6652  digest &= ~(mask << wave);
6653 #else
6654  const bm::bit_block_t::bunion_t* BMRESTRICT src_u0 = (const bm::bit_block_t::bunion_t*)(&src0[off]);
6655  const bm::bit_block_t::bunion_t* BMRESTRICT src_u1 = (const bm::bit_block_t::bunion_t*)(&src1[off]);
6656  const bm::bit_block_t::bunion_t* BMRESTRICT src_u2 = (const bm::bit_block_t::bunion_t*)(&src2[off]);
6657  const bm::bit_block_t::bunion_t* BMRESTRICT src_u3 = (const bm::bit_block_t::bunion_t*)(&src3[off]);
6659 
6660  bm::id64_t acc = 0;
6661  unsigned j = 0;
6662  do
6663  {
6664  acc |= dst_u->w64[j + 0] &= src_u0->w64[j + 0] & src_u1->w64[j + 0] & src_u2->w64[j + 0] & src_u3->w64[j + 0];
6665  acc |= dst_u->w64[j + 1] &= src_u0->w64[j + 1] & src_u1->w64[j + 1] & src_u2->w64[j + 1] & src_u3->w64[j + 1];
6666  acc |= dst_u->w64[j + 2] &= src_u0->w64[j + 2] & src_u1->w64[j + 2] & src_u2->w64[j + 2] & src_u3->w64[j + 2];
6667  acc |= dst_u->w64[j + 3] &= src_u0->w64[j + 3] & src_u1->w64[j + 3] & src_u2->w64[j + 3] & src_u3->w64[j + 3];
6668  j += 4;
6669  } while (j < bm::set_block_digest_wave_size / 2);
6670 
6671  if (!acc) // all zero
6672  digest &= ~(mask << wave);
6673 #endif
6674 
6675  d = bm::bmi_bslr_u64(d); // d &= d - 1;
6676  } // while
6677 
6678  return digest;
6679 }
6680 
6681 
6682 /*!
6683  \brief digest based bit-block AND
6684 
6685  dst = src1 AND src2
6686 
6687  \param dst - destination block.
6688  \param src1 - source block.
6689  \param src2 - source block.
6690  \param digest - known initial digest
6691 
6692  \return new digest
6693 
6694  @ingroup bitfunc
6695 */
6696 inline
6698  const bm::word_t* BMRESTRICT src1,
6699  const bm::word_t* BMRESTRICT src2,
6700  bm::id64_t digest) BMNOEXCEPT
6701 {
6702  BM_ASSERT(dst);
6703  BM_ASSERT(src1 && src2);
6704  BM_ASSERT(dst != src1 && dst != src2);
6705 
6706  const bm::id64_t mask(1ull);
6707  bm::id64_t d = digest;
6708  while (d)
6709  {
6710  bm::id64_t t = bm::bmi_blsi_u64(d); // d & -d;
6711 
6712  unsigned wave = bm::word_bitcount64(t - 1);
6713  unsigned off = wave * bm::set_block_digest_wave_size;
6714 
6715  #if defined(VECT_AND_DIGEST_2WAY)
6716  bool all_zero = VECT_AND_DIGEST_2WAY(&dst[off], &src1[off], &src2[off]);
6717  if (all_zero)
6718  digest &= ~(mask << wave);
6719  #else
6720  const bm::bit_block_t::bunion_t* BMRESTRICT src_u1 =
6721  (const bm::bit_block_t::bunion_t*)(&src1[off]);
6722  const bm::bit_block_t::bunion_t* BMRESTRICT src_u2 =
6723  (const bm::bit_block_t::bunion_t*)(&src2[off]);
6725  (bm::bit_block_t::bunion_t*)(&dst[off]);
6726 
6727  bm::id64_t acc = 0;
6728  unsigned j = 0;
6729  do
6730  {
6731  acc |= dst_u->w64[j+0] = src_u1->w64[j+0] & src_u2->w64[j+0];
6732  acc |= dst_u->w64[j+1] = src_u1->w64[j+1] & src_u2->w64[j+1];
6733  acc |= dst_u->w64[j+2] = src_u1->w64[j+2] & src_u2->w64[j+2];
6734  acc |= dst_u->w64[j+3] = src_u1->w64[j+3] & src_u2->w64[j+3];
6735  j+=4;
6736  } while (j < bm::set_block_digest_wave_size/2);
6737 
6738  if (!acc) // all zero
6739  digest &= ~(mask << wave);
6740  #endif
6741 
6742  d = bm::bmi_bslr_u64(d); // d &= d - 1;
6743  } // while
6744 
6745  return digest;
6746 }
6747 
6748 
6749 
6750 /*!
6751  \brief Function ANDs two bitblocks and computes the bitcount.
6752  Function does not analyse availability of source blocks.
6753 
6754  \param src1 - first bit block
6755  \param src2 - second bit block
6756 
6757  @ingroup bitfunc
6758 */
6759 inline
6761  const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
6762 {
6763  unsigned count;
6764  const bm::word_t* src1_end = src1 + bm::set_block_size;
6765 #ifdef BMVECTOPT
6766  count = VECT_BITCOUNT_AND(src1, src1_end, src2);
6767 #else
6768  count = 0;
6769 # ifdef BM64OPT
6770  const bm::id64_t* b1 = (bm::id64_t*) src1;
6771  const bm::id64_t* b1_end = (bm::id64_t*) src1_end;
6772  const bm::id64_t* b2 = (bm::id64_t*) src2;
6773  do
6774  {
6775  count += bitcount64_4way(b1[0] & b2[0],
6776  b1[1] & b2[1],
6777  b1[2] & b2[2],
6778  b1[3] & b2[3]);
6779  b1 += 4;
6780  b2 += 4;
6781  } while (b1 < b1_end);
6782 # else
6783  do
6784  {
6785  BM_INCWORD_BITCOUNT(count, src1[0] & src2[0]);
6786  BM_INCWORD_BITCOUNT(count, src1[1] & src2[1]);
6787  BM_INCWORD_BITCOUNT(count, src1[2] & src2[2]);
6788  BM_INCWORD_BITCOUNT(count, src1[3] & src2[3]);
6789 
6790  src1+=4;
6791  src2+=4;
6792  } while (src1 < src1_end);
6793 # endif
6794 #endif
6795  return count;
6796 }
6797 
6798 
6799 /*!
6800  \brief Function ANDs two bitblocks and tests for any bit.
6801  Function does not analyse availability of source blocks.
6802 
6803  \param src1 - first bit block
6804  \param src2 - second bit block
6805 
6806  @ingroup bitfunc
6807 */
6808 inline
6809 unsigned bit_block_and_any(const bm::word_t* src1,
6810  const bm::word_t* src2) BMNOEXCEPT
6811 {
6812  unsigned count = 0;
6813  const bm::word_t* src1_end = src1 + bm::set_block_size;
6814  do
6815  {
6816  count = (src1[0] & src2[0]) |
6817  (src1[1] & src2[1]) |
6818  (src1[2] & src2[2]) |
6819  (src1[3] & src2[3]);
6820 
6821  src1+=4; src2+=4;
6822  } while ((src1 < src1_end) && !count);
6823  return count;
6824 }
6825 
6826 
6827 
6828 
6829 /*!
6830  \brief Function XORs two bitblocks and computes the bitcount.
6831  Function does not analyse availability of source blocks.
6832 
6833  \param src1 - first bit block
6834  \param src2 - second bit block
6835 
6836  @ingroup bitfunc
6837 */
6838 inline
6840  const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
6841 {
6842  unsigned count;
6843  const bm::word_t* BMRESTRICT src1_end = src1 + bm::set_block_size;
6844 #ifdef BMVECTOPT
6845  count = VECT_BITCOUNT_XOR(src1, src1_end, src2);
6846 #else
6847  count = 0;
6848 # ifdef BM64OPT
6849  const bm::id64_t* b1 = (bm::id64_t*) src1;
6850  const bm::id64_t* b1_end = (bm::id64_t*) src1_end;
6851  const bm::id64_t* b2 = (bm::id64_t*) src2;
6852  do
6853  {
6854  count += bitcount64_4way(b1[0] ^ b2[0],
6855  b1[1] ^ b2[1],
6856  b1[2] ^ b2[2],
6857  b1[3] ^ b2[3]);
6858  b1 += 4;
6859  b2 += 4;
6860  } while (b1 < b1_end);
6861 # else
6862  do
6863  {
6864  BM_INCWORD_BITCOUNT(count, src1[0] ^ src2[0]);
6865  BM_INCWORD_BITCOUNT(count, src1[1] ^ src2[1]);
6866  BM_INCWORD_BITCOUNT(count, src1[2] ^ src2[2]);
6867  BM_INCWORD_BITCOUNT(count, src1[3] ^ src2[3]);
6868 
6869  src1+=4;
6870  src2+=4;
6871  } while (src1 < src1_end);
6872 # endif
6873 #endif
6874  return count;
6875 }
6876 
6877 
6878 /*!
6879  \brief Function XORs two bitblocks and and tests for any bit.
6880  Function does not analyse availability of source blocks.
6881 
6882  \param src1 - first bit block.
6883  \param src2 - second bit block.
6884 
6885  @ingroup bitfunc
6886 */
6887 inline
6889  const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
6890 {
6891  unsigned count = 0;
6892  const bm::word_t* BMRESTRICT src1_end = src1 + bm::set_block_size;
6893  do
6894  {
6895  count = (src1[0] ^ src2[0]) |
6896  (src1[1] ^ src2[1]) |
6897  (src1[2] ^ src2[2]) |
6898  (src1[3] ^ src2[3]);
6899 
6900  src1+=4; src2+=4;
6901  } while (!count && (src1 < src1_end));
6902  return count;
6903 }
6904 
6905 /*!
6906  \brief Function SUBs two bitblocks and computes the bitcount.
6907  Function does not analyse availability of source blocks.
6908 
6909  \param src1 - first bit block.
6910  \param src2 - second bit block.
6911 
6912  @ingroup bitfunc
6913 */
6914 inline
6916  const bm::word_t* BMRESTRICT src2) BMNOEXCEPT
6917 {
6918  unsigned count;
6919  const bm::word_t* BMRESTRICT