BitMagic-C++
bmfunc.h
Go to the documentation of this file.
1 #ifndef BMFUNC__H__INCLUDED__
2 #define BMFUNC__H__INCLUDED__
3 /*
4 Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 For more information please visit: http://bitmagic.io
19 */
20 
21 /*! \file bmfunc.h
22  \brief Bit manipulation primitives (internal)
23 */
24 
25 #include <memory.h>
26 
27 #include "bmdef.h"
28 #include "bmutil.h"
29 
30 
31 #ifdef _MSC_VER
32 # pragma warning( disable: 4146 )
33 #endif
34 
35 namespace bm
36 {
37 
38 
39 inline
41  bm::word_t left,
42  bm::word_t right);
43 
44 inline
46  bm::word_t left,
47  bm::word_t right);
48 
49 /*!
50  @brief Structure with statistical information about bitset's memory
51  allocation details.
52  @ingroup bvector
53 */
55 {
56  unsigned bit_blocks; ///< Number of bit blocks
57  unsigned gap_blocks; ///< Number of GAP blocks
58  unsigned ptr_sub_blocks; ///< Number of sub-blocks
59  /// Estimated maximum of memory required for serialization.
61  /// Memory used by bitvector including temp and service blocks
62  size_t memory_used;
63  /// Array of all GAP block lengths in the bvector.
65  /// GAP lengths used by bvector
67 
68 
69  /// cound bit block
71  {
72  ++bit_blocks;
73  unsigned mem_used = (unsigned)(sizeof(bm::word_t) * bm::set_block_size);
74  memory_used += mem_used;
75  max_serialize_mem += mem_used;
76  }
77 
78  /// count gap block
79  void add_gap_block(unsigned capacity, unsigned length)
80  {
81  (gap_blocks < bm::set_total_blocks) ? gap_length[gap_blocks] = (gap_word_t)length : 0;
82  ++gap_blocks;
83  unsigned mem_used = (unsigned)(capacity * sizeof(gap_word_t));
84  memory_used += mem_used;
85  max_serialize_mem += (unsigned)(length * sizeof(gap_word_t));
86  }
87 
88  /// Reset statisctics
89  void reset()
90  {
91  bit_blocks = gap_blocks = ptr_sub_blocks = 0;
92  max_serialize_mem = memory_used = 0;
93  }
94 };
95 
96 /**
97  @brief Pair type
98 */
99 template<typename First, typename Second>
100 struct pair
101 {
102  First first;
103  Second second;
104 };
105 
106 /**
107  \brief bit-decode cache structure
108 */
110 {
111  unsigned short bits[65]; //< decoded bits
112  unsigned bcnt; //< length of bits array
113  bm::id64_t cvalue; //< cache decoded value
114 
115  bit_decode_cache() : bcnt(0), cvalue(0) {}
116 };
117 
118 
119 /**
120  \brief ad-hoc conditional expressions
121  \internal
122 */
123 template <bool b> struct conditional
124 {
125  static bool test() { return true; }
126 };
127 template <> struct conditional<false>
128 {
129  static bool test() { return false; }
130 };
131 
132 /*!
133  @defgroup gapfunc GAP functions
134  GAP functions implement different opereations on GAP compressed blocks (internals)
135  and serve as a minimal building blocks.
136  \internal
137  @ingroup bvector
138  */
139 
140 /*!
141  @defgroup bitfunc BIT functions
142  Bit functions implement different opereations on bit blocks (internals)
143  and serve as a minimal building blocks.
144  \internal
145  @ingroup bvector
146  */
147 
148 
149 
150 
151 /*!
152  Returns BSR value
153  @ingroup bitfunc
154 */
155 template <class T>
156 unsigned bit_scan_reverse(T value)
157 {
158  BM_ASSERT(value);
159 
160  if (bm::conditional<sizeof(T)==8>::test())
161  {
162  bm::id64_t v8 = value;
163  v8 >>= 32;
164  unsigned v = (unsigned)v8;
165  if (v)
166  {
167  return bit_scan_reverse32(v) + 32;
168  }
169  }
170  return bit_scan_reverse32((unsigned)value);
171 }
172 
173 
174 /*!
175  Returns bit count
176  @ingroup bitfunc
177 */
180 {
181 #if defined(BMSSE42OPT) || defined(BMAVX2OPT)
182  return bm::id_t(_mm_popcnt_u32(w));
183 #else
184  return
185  bm::bit_count_table<true>::_count[(unsigned char)(w)] +
186  bm::bit_count_table<true>::_count[(unsigned char)((w) >> 8)] +
187  bm::bit_count_table<true>::_count[(unsigned char)((w) >> 16)] +
188  bm::bit_count_table<true>::_count[(unsigned char)((w) >> 24)];
189 #endif
190 }
191 
192 inline
193 int parallel_popcnt_32(unsigned int n)
194 {
195  unsigned int tmp;
196 
197  tmp = n - ((n >> 1) & 033333333333)
198  - ((n >> 2) & 011111111111);
199  return ((tmp + (tmp >> 3)) & 030707070707) % 63;
200 }
201 
202 
203 /*!
204  Function calculates number of 1 bits in 64-bit word.
205  @ingroup bitfunc
206 */
209 {
210 #if defined(BMSSE42OPT) || defined(BMAVX2OPT)
211 #if defined(BM64_SSE4) || defined(BM64_AVX2) || defined(BM64_AVX512)
212  return unsigned(_mm_popcnt_u64(x));
213 #else
214  return _mm_popcnt_u32(x >> 32) + _mm_popcnt_u32((unsigned)x);
215 #endif
216 #else
217  x = x - ((x >> 1) & 0x5555555555555555);
218  x = (x & 0x3333333333333333) + ((x >> 2) & 0x3333333333333333);
219  x = (x + (x >> 4)) & 0x0F0F0F0F0F0F0F0F;
220  x = x + (x >> 8);
221  x = x + (x >> 16);
222  x = x + (x >> 32);
223  return x & 0xFF;
224 #endif
225 }
226 
227 inline
229  bm::id64_t u, bm::id64_t v)
230 {
231  const bm::id64_t m1 = 0x5555555555555555;
232  const bm::id64_t m2 = 0x3333333333333333;
233  const bm::id64_t m3 = 0x0F0F0F0F0F0F0F0F;
234  const bm::id64_t m4 = 0x000000FF000000FF;
235 
236  x = x - ((x >> 1) & m1);
237  y = y - ((y >> 1) & m1);
238  u = u - ((u >> 1) & m1);
239  v = v - ((v >> 1) & m1);
240  x = (x & m2) + ((x >> 2) & m2);
241  y = (y & m2) + ((y >> 2) & m2);
242  u = (u & m2) + ((u >> 2) & m2);
243  v = (v & m2) + ((v >> 2) & m2);
244  x = x + y;
245  u = u + v;
246  x = (x & m3) + ((x >> 4) & m3);
247  u = (u & m3) + ((u >> 4) & m3);
248  x = x + u;
249  x = x + (x >> 8);
250  x = x + (x >> 16);
251  x = x & m4;
252  x = x + (x >> 32);
253  return x & 0x000001FF;
254 }
255 
256 
257 // --------------------------------------------------------------
258 // Functions for bit-scanning
259 // --------------------------------------------------------------
260 
261 /*!
262  \brief Templated algorithm to unpacks octet based word into list of ON bit indexes
263  \param w - value
264  \param func - bit functor
265 
266  @ingroup bitfunc
267 */
268 template<typename T, typename F>
269 void bit_for_each_4(T w, F& func)
270 {
271  for (unsigned sub_octet = 0; w != 0; w >>= 4, sub_octet += 4)
272  {
273  switch (w & 15) // 1111
274  {
275  case 0: // 0000
276  break;
277  case 1: // 0001
278  func(sub_octet);
279  break;
280  case 2: // 0010
281  func(sub_octet + 1);
282  break;
283  case 3: // 0011
284  func(sub_octet, sub_octet + 1);
285  break;
286  case 4: // 0100
287  func(sub_octet + 2);
288  break;
289  case 5: // 0101
290  func(sub_octet, sub_octet + 2);
291  break;
292  case 6: // 0110
293  func(sub_octet + 1, sub_octet + 2);
294  break;
295  case 7: // 0111
296  func(sub_octet, sub_octet + 1, sub_octet + 2);
297  break;
298  case 8: // 1000
299  func(sub_octet + 3);
300  break;
301  case 9: // 1001
302  func(sub_octet, sub_octet + 3);
303  break;
304  case 10: // 1010
305  func(sub_octet + 1, sub_octet + 3);
306  break;
307  case 11: // 1011
308  func(sub_octet, sub_octet + 1, sub_octet + 3);
309  break;
310  case 12: // 1100
311  func(sub_octet + 2, sub_octet + 3);
312  break;
313  case 13: // 1101
314  func(sub_octet, sub_octet + 2, sub_octet + 3);
315  break;
316  case 14: // 1110
317  func(sub_octet + 1, sub_octet + 2, sub_octet + 3);
318  break;
319  case 15: // 1111
320  func(sub_octet, sub_octet + 1, sub_octet + 2, sub_octet + 3);
321  break;
322  default:
323  BM_ASSERT(0);
324  break;
325  }
326 
327  } // for
328 }
329 
330 
331 /*!
332  \brief Templated algorithm to unpacks word into list of ON bit indexes
333  \param w - value
334  \param func - bit functor
335 
336  @ingroup bitfunc
337 */
338 template<typename T, typename F>
339 void bit_for_each(T w, F& func)
340 {
341  // Note: 4-bit table method works slower than plain check approach
342  for (unsigned octet = 0; w != 0; w >>= 8, octet += 8)
343  {
344  if (w & 1) func(octet + 0);
345  if (w & 2) func(octet + 1);
346  if (w & 4) func(octet + 2);
347  if (w & 8) func(octet + 3);
348  if (w & 16) func(octet + 4);
349  if (w & 32) func(octet + 5);
350  if (w & 64) func(octet + 6);
351  if (w & 128) func(octet + 7);
352 
353  } // for
354 }
355 
356 /*! @brief Adaptor to copy 1 bits to array
357  @internal
358 */
359 template<typename B> class copy_to_array_functor
360 {
361 public:
362  copy_to_array_functor(B* bits): bp_(bits)
363  {}
364 
365  B* ptr() { return bp_; }
366 
367  void operator()(unsigned bit_idx) { *bp_++ = (B)bit_idx; }
368 
369  void operator()(unsigned bit_idx0,
370  unsigned bit_idx1)
371  {
372  bp_[0] = (B)bit_idx0; bp_[1] = (B)bit_idx1;
373  bp_+=2;
374  }
375 
376  void operator()(unsigned bit_idx0,
377  unsigned bit_idx1,
378  unsigned bit_idx2)
379  {
380  bp_[0] = (B)bit_idx0; bp_[1] = (B)bit_idx1; bp_[2] = (B)bit_idx2;
381  bp_+=3;
382  }
383 
384  void operator()(unsigned bit_idx0,
385  unsigned bit_idx1,
386  unsigned bit_idx2,
387  unsigned bit_idx3)
388  {
389  bp_[0] = (B)bit_idx0; bp_[1] = (B)bit_idx1;
390  bp_[2] = (B)bit_idx2; bp_[3] = (B)bit_idx3;
391  bp_+=4;
392  }
393 
394 private:
396  copy_to_array_functor& operator=(const copy_to_array_functor&);
397 private:
398  B* bp_;
399 };
400 
401 
402 /*!
403  \brief Unpacks word into list of ON bit indexes
404  \param w - value
405  \param bits - pointer on the result array
406  \return number of bits in the list
407 
408  @ingroup bitfunc
409 */
410 template<typename T,typename B> unsigned bit_list(T w, B* bits)
411 {
412  copy_to_array_functor<B> func(bits);
413  bit_for_each(w, func);
414  return (unsigned)(func.ptr() - bits);
415 }
416 
417 
418 
419 /*!
420  \brief Unpacks word into list of ON bit indexes (quad-bit based)
421  \param w - value
422  \param bits - pointer on the result array
423  \return number of bits in the list
424 
425  @ingroup bitfunc
426 */
427 template<typename T,typename B> unsigned bit_list_4(T w, B* bits)
428 {
429  copy_to_array_functor<B> func(bits);
430  bit_for_each_4(w, func);
431  return (unsigned)(func.ptr() - bits);
432 }
433 
434 /*!
435  \brief Unpacks word into list of ON bit indexes using popcnt method
436  \param w - value
437  \param bits - pointer on the result array
438  \param offs - value to add to bit position (programmed shift)
439  \return number of bits in the list
440 
441  @ingroup bitfunc
442  @internal
443 */
444 template<typename B>
445 unsigned short bitscan_popcnt(bm::id_t w, B* bits, unsigned short offs)
446 {
447  unsigned pos = 0;
448  while (w)
449  {
450  bm::id_t t = w & -w;
451  bits[pos++] = (B)(bm::word_bitcount(t - 1) + offs);
452  w &= w - 1;
453  }
454  return (unsigned short)pos;
455 }
456 
457 /*!
458  \brief Unpacks word into list of ON bit indexes using popcnt method
459  \param w - value
460  \param bits - pointer on the result array
461  \return number of bits in the list
462 
463  @ingroup bitfunc
464  @internal
465 */
466 template<typename B>
467 unsigned short bitscan_popcnt(bm::id_t w, B* bits)
468 {
469  unsigned pos = 0;
470  while (w)
471  {
472  bm::id_t t = w & -w;
473  bits[pos++] = (B)(bm::word_bitcount(t - 1));
474  w &= w - 1;
475  }
476  return (unsigned short)pos;
477 }
478 
479 
480 /*!
481  \brief Unpacks 64-bit word into list of ON bit indexes using popcnt method
482  \param w - value
483  \param bits - pointer on the result array
484  \param offs - bit address offset to add (0 - default)
485  \return number of bits in the list
486 
487  @ingroup bitfunc
488 */
489 template<typename B>
490 unsigned short bitscan_popcnt64(bm::id64_t w, B* bits)
491 {
492  unsigned short pos = 0;
493  while (w)
494  {
495  bm::id64_t t = w & -w;
496  bits[pos++] = (B) bm::word_bitcount64(t - 1);
497  w &= w - 1;
498  }
499  return pos;
500 }
501 
502 template<typename V, typename B>
503 unsigned short bitscan(V w, B* bits)
504 {
505  if (bm::conditional<sizeof(V) == 8>::test())
506  {
507  return bm::bitscan_popcnt64(w, bits);
508  }
509  else
510  {
511  return bm::bitscan_popcnt((bm::word_t)w, bits);
512  }
513 }
514 
515 // --------------------------------------------------------------
516 // Functions for select
517 // --------------------------------------------------------------
518 
519 /**
520  \brief word find index of the rank-th bit set by bit-testing
521  \param w - 64-bit work to search
522  \param rank - rank to select (should be > 0)
523 
524  \return selected value (inxed of bit set)
525 */
526 inline
527 unsigned word_select64_linear(bm::id64_t w, unsigned rank)
528 {
529  BM_ASSERT(w);
530  BM_ASSERT(rank);
531 
532  for (unsigned count = 0; w; w >>=1ull, ++count)
533  {
534  rank -= unsigned(w & 1ull);
535  if (!rank)
536  return count;
537  }
538  BM_ASSERT(0); // shoud not be here if rank is achievable
539  return ~0u;
540 }
541 
542 /**
543  \brief word find index of the rank-th bit set by bit-testing
544  \param w - 64-bit work to search
545  \param rank - rank to select (should be > 0)
546 
547  \return selected value (inxed of bit set)
548 */
549 inline
550 unsigned word_select64_bitscan(bm::id64_t w, unsigned rank)
551 {
552  BM_ASSERT(w);
553  BM_ASSERT(rank);
554  BM_ASSERT(rank <= bm::word_bitcount64(w));
555 
556  do
557  {
558  --rank;
559  if (!rank)
560  break;
561  w &= w - 1;
562  } while (1);
563 
564  bm::id64_t t = w & -w;
565  unsigned count = bm::word_bitcount64(t - 1);
566  return count;
567 }
568 
569 /**
570  \brief word find index of the rank-th bit set by bit-testing
571  \param w - 64-bit work to search
572  \param rank - rank to select (should be > 0)
573 
574  \return selected value (inxed of bit set)
575 */
576 inline
577 unsigned word_select64(bm::id64_t w, unsigned rank)
578 {
579 #if defined(BMI2_SELECT64)
580  return BMI2_SELECT64(w, rank);
581 #else
582  #if defined(BMI1_SELECT64)
583  return BMI2_SELECT64(w, rank);
584  #else
585  return bm::word_select64_bitscan(w, rank);
586  #endif
587 #endif
588 }
589 
590 // --------------------------------------------------------------
591 // Functions for bit-block digest calculation
592 // --------------------------------------------------------------
593 
594 /*!
595  \brief Compute digest mask for word address in block
596 
597  @return digest bit-mask
598 
599  @ingroup bitfunc
600  @internal
601 */
604 {
605  bm::id64_t mask(1ull);
606  return mask << (w_idx / bm::set_block_digest_wave_size);
607 }
608 
609 /**
610  \brief Compute digest mask for [from..to] positions
611  \param from - range from (in bit-block coordinates)
612  \param to - range to (in bit-block coordinates)
613 
614  @ingroup bitfunc
615  @internal
616 */
618 bm::id64_t digest_mask(unsigned from, unsigned to)
619 {
620  BM_ASSERT(from <= to);
621 
622  bm::id64_t digest_from = from >> bm::set_block_digest_pos_shift;
623  bm::id64_t digest_to = to >> bm::set_block_digest_pos_shift;;
624  const bm::id64_t maskFF(~0ull);
625  return ((maskFF) >> (63 - (digest_to - digest_from))) << digest_from;
626 }
627 
628 /*!
629  \brief check if all digest bits for the range [from..to] are 0
630 
631  \param digest - current digest
632  \param bitpos_from - range from (in bit-block coordinates)
633  \param bitpos_to - range to (in bit-block coordinates)
634 
635  @return true if all range is zero
636 
637  @ingroup bitfunc
638  @internal
639 */
640 inline
641 bool check_zero_digest(bm::id64_t digest, unsigned bitpos_from, unsigned bitpos_to)
642 {
643  bm::id64_t mask = bm::digest_mask(bitpos_from, bitpos_to);
644  return !(digest & mask);
645 }
646 
647 /*!
648  \brief Init block with 000111000 pattren based on digest
649  \param block - Bit block [out]
650  \param digest - digest (used for block initialization)
651 
652  @ingroup bitfunc
653  @internal
654 */
655 inline
656 void block_init_digest0(bm::word_t* const block, bm::id64_t digest)
657 {
658  unsigned off;
659  for (unsigned i = 0; i < 64; ++i)
660  {
662  bm::word_t mask = (digest & 1) ? ~0u : 0u;
663 #if defined(VECT_BLOCK_SET_DIGEST)
664  VECT_BLOCK_SET_DIGEST(&block[off], mask);
665 #else
666  for (unsigned j = 0; j < bm::set_block_digest_wave_size; j+=4)
667  {
668  block[off+j+0] = block[off+j+1] =
669  block[off+j+2] = block[off+j+3] = mask;
670  } // for j
671 #endif
672  digest >>= 1ull;
673  } // for
674 }
675 
676 /*!
677  \brief Compute digest for 64 non-zero areas
678  \param block - Bit block
679 
680  @return digest bit-mask (0 means all block is empty)
681 
682  @ingroup bitfunc
683  @internal
684 */
685 inline
687 {
688  bm::id64_t digest0 = 0;
689  unsigned off;
690 
691  for (unsigned i = 0; i < 64; ++i)
692  {
694  #if defined(VECT_IS_DIGEST_ZERO)
695  bool all_zero = VECT_IS_DIGEST_ZERO(&block[off]);
696  digest0 |= bm::id64_t(!all_zero) << i;
697  #else
698  const bm::id64_t mask(1ull);
699  for (unsigned j = 0; j < bm::set_block_digest_wave_size; j+=4)
700  {
701  bm::word_t w =
702  block[off+j+0] | block[off+j+1] | block[off+j+2] | block[off+j+3];
703  if (w)
704  {
705  digest0 |= (mask << i);
706  break;
707  }
708  } // for j
709  #endif
710 
711  } // for i
712  return digest0;
713 }
714 
715 /*!
716  \brief Compute digest for 64 non-zero areas based on existing digest
717  (function revalidates zero areas)
718  \param block - bit block
719  \param digest - start digest
720 
721  @return digest bit-mask (0 means all block is empty)
722 
723  @ingroup bitfunc
724  @internal
725 */
726 inline
728 {
729  const bm::id64_t mask(1ull);
730  bm::id64_t d = digest;
731 
732  while (d)
733  {
734  bm::id64_t t = bm::bmi_blsi_u64(d); // d & -d;
735 
736  unsigned wave = bm::word_bitcount64(t - 1);
737  unsigned off = wave * bm::set_block_digest_wave_size;
738 
739  #if defined(VECT_IS_DIGEST_ZERO)
740  bool all_zero = VECT_IS_DIGEST_ZERO(&block[off]);
741  digest &= all_zero ? ~(mask << wave) : digest;
742  #else
744  (const bm::bit_block_t::bunion_t*)(&block[off]);
745  bm::id64_t w64 = 0;
746  unsigned j = 0;
747  do
748  {
749  w64 |=
750  src_u->w64[j+0] | src_u->w64[j+1] | src_u->w64[j+2] | src_u->w64[j+3];
751  j += 4;
752  } while ((j < bm::set_block_digest_wave_size/2) & !w64);
753  digest &= w64 ? digest : ~(mask << wave);
754  #endif
755 
756  d = bm::bmi_bslr_u64(d); // d &= d - 1;
757  } // while
758 
759  BM_ASSERT(bm::calc_block_digest0(block) == digest);
760  return digest;
761 }
762 
763 // --------------------------------------------------------------
764 
765 
766 /// Returns true if set operation is constant (bitcount)
767 inline
769 {
770  return (int(op) >= int(set_COUNT));
771 }
772 
773 /**
774  Convert set operation to operation
775 */
776 inline
778 {
779  BM_ASSERT(op == set_AND ||
780  op == set_OR ||
781  op == set_SUB ||
782  op == set_XOR);
783  return (bm::operation) op;
784 }
785 
786 //---------------------------------------------------------------------
787 
788 /**
789  Structure carries pointer on bit block with all bits 1
790  @ingroup bitfunc
791 */
792 template<bool T> struct all_set
793 {
795  {
798 
800  {
801  ::memset(_p, 0xFF, sizeof(_p)); // set FULL BLOCK content (all 1s)
802  if (bm::conditional<sizeof(void*) == 8>::test())
803  {
804  const unsigned long long magic_mask = 0xFFFFfffeFFFFfffe;
805  ::memcpy(&_p_fullp, &magic_mask, sizeof(magic_mask));
806  }
807  else
808  {
809  const unsigned magic_mask = 0xFFFFfffe;
810  ::memcpy(&_p_fullp, &magic_mask, sizeof(magic_mask));
811  }
812  }
813  };
814 
815  // version with minimal branching, super-scalar friendly
816  //
817 
818  inline
819  static bm::id64_t block_type(const bm::word_t* bp)
820  {
821  bm::id64_t type;
822  if (bm::conditional<sizeof(void*) == 8>::test())
823  {
824  bm::id64_t w = reinterpret_cast<unsigned long long>(bp);
825  type = (w & 3) | // FULL BLOCK or GAP
826  ((bp == _block._p) << 1);
827  type = type ? type : w;
828  }
829  else
830  {
831  unsigned w = reinterpret_cast<unsigned long>(bp);
832  type = (w & 3) | // FULL BLOCK or GAP
833  ((bp == _block._p) << 1);
834  type = type ? type : w;
835  }
836  return type;
837  }
838 
840  static bool is_full_block(const bm::word_t* bp)
841  { return (bp == _block._p || bp == _block._p_fullp); }
842 
844  static bool is_valid_block_addr(const bm::word_t* bp)
845  { return (bp && !(bp == _block._p || bp == _block._p_fullp)); }
846 
848 };
849 
850 
851 template<bool T> typename all_set<T>::all_set_block all_set<T>::_block;
852 
853 /// XOR swap two scalar variables
854 template<typename W>
855 void xor_swap(W& x, W& y)
856 {
857  BM_ASSERT(&x != &y);
858  x ^= y;
859  y ^= x;
860  x ^= y;
861 }
862 
863 
864 //---------------------------------------------------------------------
865 
866 /*!
867  \brief Lexicographical comparison of two words as bit strings (reference)
868  Auxiliary implementation for testing and reference purposes.
869  \param w1 - First word.
870  \param w2 - Second word.
871  \return <0 - less, =0 - equal, >0 - greater.
872 
873  @ingroup bitfunc
874 */
875 template<typename T> int wordcmp0(T w1, T w2)
876 {
877  while (w1 != w2)
878  {
879  int res = (w1 & 1) - (w2 & 1);
880  if (res != 0) return res;
881  w1 >>= 1;
882  w2 >>= 1;
883  }
884  return 0;
885 }
886 
887 
888 /*
889 template<typename T> int wordcmp(T w1, T w2)
890 {
891  T diff = w1 ^ w2;
892  return diff ? ((w1 & diff & (diff ^ (diff - 1)))? 1 : -1) : 0;
893 }
894 */
895 /*!
896  \brief Lexicographical comparison of two words as bit strings.
897  Auxiliary implementation for testing and reference purposes.
898  \param a - First word.
899  \param b - Second word.
900  \return <0 - less, =0 - equal, >0 - greater.
901 
902  @ingroup bitfunc
903 */
904 
905 template<typename T> int wordcmp(T a, T b)
906 {
907  T diff = a ^ b;
908  return diff? ( (a & diff & -diff)? 1 : -1 ) : 0;
909 }
910 
911 
912 // Low bit extraction
913 // x & (x ^ (x-1))
914 
915 
916 // ----------------------------------------------------------------------
917 
918 
919 /*! @brief Returns "true" if all bits in the block are 0
920  @ingroup bitfunc
921 */
922 inline
924 {
925 #if defined(VECT_IS_ZERO_BLOCK)
926  return VECT_IS_ZERO_BLOCK(start);
927 #else
928  const bm::wordop_t* BMRESTRICT blk = (bm::wordop_t*) (start);
929  const bm::wordop_t* BMRESTRICT blk_end = (bm::wordop_t*)(start + bm::set_block_size);
930  do
931  {
932  if (blk[0] | blk[1] | blk[2] | blk[3])
933  return false;
934  blk += 4;
935  } while (blk < blk_end);
936  return true;
937 #endif
938 }
939 
940 // ----------------------------------------------------------------------
941 
942 /*!
943  \brief Checks if GAP block is all-zero.
944  \param buf - GAP buffer pointer.
945  \returns true if all-zero.
946 
947  @ingroup gapfunc
948 */
951 {
952  // (almost) branchless variant:
953  return (!(*buf & 1u)) & (!(bm::gap_max_bits - 1 - buf[1]));
954 }
955 
956 /*!
957  \brief Checks if GAP block is all-one.
958  \param buf - GAP buffer pointer.
959  \param set_max - max possible bitset length
960  \returns true if all-one.
961 
962  @ingroup gapfunc
963 */
966 {
967  return ((*buf & 1u) && (buf[1] == bm::gap_max_bits - 1));
968 }
969 
970 /*!
971  \brief Returs GAP block length.
972  \param buf - GAP buffer pointer.
973  \returns GAP block length.
974 
975  @ingroup gapfunc
976 */
979 {
980  return (bm::gap_word_t)((*buf >> 3) + 1);
981 }
982 
983 
984 /*!
985  \brief Returs GAP block capacity
986  \param buf - GAP buffer pointer
987  \param glevel_len - pointer on GAP header word
988  \returns GAP block capacity.
989 
990  @ingroup gapfunc
991 */
992 template<typename T>
993 unsigned gap_capacity(const T* buf, const T* glevel_len)
994 {
995  return glevel_len[(*buf >> 1) & 3];
996 }
997 
998 
999 /*!
1000  \brief Returs GAP block capacity limit.
1001  \param buf - GAP buffer pointer.
1002  \param glevel_len - GAP lengths table (gap_len_table)
1003  \returns GAP block limit.
1004 
1005  @ingroup gapfunc
1006 */
1007 template<typename T>
1008 unsigned gap_limit(const T* buf, const T* glevel_len)
1009 {
1010  return glevel_len[(*buf >> 1) & 3]-4;
1011 }
1012 
1013 
1014 /*!
1015  \brief Returs GAP blocks capacity level.
1016  \param buf - GAP buffer pointer.
1017  \returns GAP block capacity level.
1018 
1019  @ingroup gapfunc
1020 */
1021 template<typename T>
1022 T gap_level(const T* buf)
1023 {
1024  return T((*buf >> 1) & 3u);
1025 }
1026 
1027 
1028 /*!
1029  \brief GAP block find the last set bit
1030 
1031  \param buf - GAP buffer pointer.
1032  \param last - index of the last 1 bit
1033 
1034  \return 0 if 1 bit was NOT found
1035 
1036  @ingroup gapfunc
1037 */
1038 template<typename T>
1039 unsigned gap_find_last(const T* buf, unsigned* last)
1040 {
1041  BM_ASSERT(last);
1042 
1043  T is_set = (*buf) & 1u;
1044  T end = T((*buf) >> 3u);
1045 
1046  BM_ASSERT(buf[end] == bm::gap_max_bits - 1);
1047 
1048  is_set ^= T((end-1) & 1u);
1049  if (is_set)
1050  {
1051  *last = buf[end];
1052  return is_set;
1053  }
1054  *last = buf[--end];
1055  return end;
1056 }
1057 
1058 /*!
1059  \brief GAP block find the first set bit
1060 
1061  \param buf - GAP buffer pointer.
1062  \param first - index of the first 1 bit
1063 
1064  \return 0 if 1 bit was NOT found
1065 
1066  @ingroup gapfunc
1067 */
1068 template<typename T>
1069 unsigned gap_find_first(const T* buf, unsigned* first)
1070 {
1071  BM_ASSERT(first);
1072 
1073  T is_set = (*buf) & 1u;
1074  if (is_set)
1075  {
1076  *first = 0;
1077  return is_set;
1078  }
1079  if (buf[1] == bm::gap_max_bits - 1)
1080  return 0;
1081  *first = buf[1] + 1;
1082  return 1;
1083 }
1084 
1085 
1086 
1087 /*
1088  \brief Binary search for the block where bit = pos located.
1089  \param buf - GAP buffer pointer.
1090  \param pos - index of the element.
1091  \param is_set - output. GAP value (0 or 1).
1092  \return GAP index.
1093  @ingroup gapfunc
1094 */
1095 template<typename T>
1096 unsigned gap_bfind(const T* buf, unsigned pos, unsigned* is_set)
1097 {
1098  BM_ASSERT(pos < bm::gap_max_bits);
1099  *is_set = (*buf) & 1;
1100 
1101  unsigned start = 1;
1102  unsigned end = 1 + ((*buf) >> 3);
1103 
1104  while ( start != end )
1105  {
1106  unsigned curr = (start + end) >> 1;
1107  if ( buf[curr] < pos )
1108  start = curr + 1;
1109  else
1110  end = curr;
1111  }
1112  *is_set ^= ((start-1) & 1);
1113  return start;
1114 }
1115 
1116 
1117 /*!
1118  \brief Tests if bit = pos is true.
1119  \param buf - GAP buffer pointer.
1120  \param pos - index of the element.
1121  \return true if position is in "1" gap
1122  @ingroup gapfunc
1123 */
1124 template<typename T> unsigned gap_test(const T* buf, unsigned pos)
1125 {
1126  BM_ASSERT(pos < bm::gap_max_bits);
1127 
1128  unsigned start = 1;
1129  unsigned end = 1 + ((*buf) >> 3);
1130  if (end - start < 10)
1131  {
1132  unsigned sv = *buf & 1;
1133  unsigned sv1= sv ^ 1;
1134  if (buf[1] >= pos) return sv;
1135  if (buf[2] >= pos) return sv1;
1136  if (buf[3] >= pos) return sv;
1137  if (buf[4] >= pos) return sv1;
1138  if (buf[5] >= pos) return sv;
1139  if (buf[6] >= pos) return sv1;
1140  if (buf[7] >= pos) return sv;
1141  if (buf[8] >= pos) return sv1;
1142  if (buf[9] >= pos) return sv;
1143  BM_ASSERT(0);
1144  }
1145  else
1146  {
1147  while (start != end)
1148  {
1149  unsigned curr = (start + end) >> 1;
1150  if (buf[curr] < pos)
1151  start = curr + 1;
1152  else
1153  end = curr;
1154  }
1155  }
1156  return ((*buf) & 1) ^ ((--start) & 1);
1157 }
1158 
1159 /*!
1160  \brief Tests if bit = pos is true. Analog of bm::gap_test with SIMD unrolling.
1161  \param buf - GAP buffer pointer.
1162  \param pos - index of the element.
1163  \return true if position is in "1" gap
1164  @ingroup gapfunc
1165 */
1166 template<typename T>
1167 unsigned gap_test_unr(const T* buf, const unsigned pos)
1168 {
1169  BM_ASSERT(pos < bm::gap_max_bits);
1170 
1171  if (pos == 0) // quick answer possible
1172  {
1173  return (*buf) & 1;
1174  }
1175 #if defined(BMSSE2OPT)
1176  unsigned start = 1;
1177  unsigned end = 1 + ((*buf) >> 3);
1178  unsigned dsize = end - start;
1179 
1180  if (dsize < 17)
1181  {
1182  start = bm::sse2_gap_find(buf + 1, (bm::gap_word_t)pos, dsize);
1183  unsigned res = ((*buf) & 1) ^ ((start) & 1);
1184  BM_ASSERT(buf[start + 1] >= pos);
1185  BM_ASSERT(buf[start] < pos || (start == 0));
1186  BM_ASSERT(res == bm::gap_test(buf, pos));
1187  return res;
1188  }
1189  unsigned arr_end = end;
1190  while (start != end)
1191  {
1192  unsigned curr = (start + end) >> 1;
1193  if (buf[curr] < pos)
1194  start = curr + 1;
1195  else
1196  end = curr;
1197 
1198  unsigned size = end - start;
1199  if (size < 16)
1200  {
1201  size += (end != arr_end);
1202  unsigned idx = bm::sse2_gap_find(buf + start, (bm::gap_word_t)pos, size);
1203  start += idx;
1204 
1205  BM_ASSERT(buf[start] >= pos);
1206  BM_ASSERT(buf[start - 1] < pos || (start == 1));
1207  break;
1208  }
1209  }
1210 
1211  unsigned res = ((*buf) & 1) ^ ((--start) & 1);
1212 
1213  BM_ASSERT(res == bm::gap_test(buf, pos));
1214  return res;
1215 //#endif
1216 #elif defined(BMSSE42OPT)
1217  unsigned start = 1;
1218  unsigned end = 1 + ((*buf) >> 3);
1219  unsigned dsize = end - start;
1220 
1221  if (dsize < 17)
1222  {
1223  start = bm::sse4_gap_find(buf+1, (bm::gap_word_t)pos, dsize);
1224  unsigned res = ((*buf) & 1) ^ ((start) & 1);
1225  BM_ASSERT(buf[start+1] >= pos);
1226  BM_ASSERT(buf[start] < pos || (start==0));
1227  BM_ASSERT(res == bm::gap_test(buf, pos));
1228  return res;
1229  }
1230  unsigned arr_end = end;
1231  while (start != end)
1232  {
1233  unsigned curr = (start + end) >> 1;
1234  if (buf[curr] < pos)
1235  start = curr + 1;
1236  else
1237  end = curr;
1238 
1239  unsigned size = end - start;
1240  if (size < 16)
1241  {
1242  size += (end != arr_end);
1243  unsigned idx = bm::sse4_gap_find(buf + start, (bm::gap_word_t)pos, size);
1244  start += idx;
1245 
1246  BM_ASSERT(buf[start] >= pos);
1247  BM_ASSERT(buf[start - 1] < pos || (start == 1));
1248  break;
1249  }
1250  }
1251 
1252  unsigned res = ((*buf) & 1) ^ ((--start) & 1);
1253 
1254  BM_ASSERT(res == bm::gap_test(buf, pos));
1255 #elif defined(BMAVX2OPT)
1256  unsigned res = bm::avx2_gap_test(buf, pos);
1257  BM_ASSERT(res == bm::gap_test(buf, pos));
1258 #else
1259  unsigned res = bm::gap_test(buf, pos);
1260 #endif
1261  return res;
1262 }
1263 
1264 
1265 /*! For each non-zero block executes supplied function.
1266  \internal
1267 */
1268 template<class T, class F>
1269 void for_each_nzblock(T*** root, unsigned size1, F& f)
1270 {
1271  for (unsigned i = 0; i < size1; ++i)
1272  {
1273  T** blk_blk = root[i];
1274  if (!blk_blk)
1275  {
1276  f.on_empty_top(i);
1277  continue;
1278  }
1279 
1280  unsigned non_empty_top = 0;
1281  unsigned r = i * bm::set_array_size;
1282  unsigned j = 0;
1283  do
1284  {
1285 #if defined(BM64_AVX2) || defined(BM64_AVX512)
1286  if (!avx2_test_all_zero_wave(blk_blk + j))
1287  {
1288  non_empty_top = 1;
1289  T* blk0 = blk_blk[j + 0];
1290  T* blk1 = blk_blk[j + 1];
1291  T* blk2 = blk_blk[j + 2];
1292  T* blk3 = blk_blk[j + 3];
1293 
1294  unsigned block_idx = r + j + 0;
1295  if (blk0)
1296  f(blk0, block_idx);
1297  else
1298  f.on_empty_block(block_idx);
1299 
1300  if (blk1)
1301  f(blk1, block_idx + 1);
1302  else
1303  f.on_empty_block(block_idx + 1);
1304 
1305  if (blk2)
1306  f(blk2, block_idx + 2);
1307  else
1308  f.on_empty_block(block_idx + 2);
1309 
1310  if (blk3)
1311  f(blk3, block_idx + 3);
1312  else
1313  f.on_empty_block(block_idx + 3);
1314  }
1315  else
1316  {
1317  f.on_empty_block(r + j + 0); f.on_empty_block(r + j + 1);
1318  f.on_empty_block(r + j + 2); f.on_empty_block(r + j + 3);
1319  }
1320  j += 4;
1321 #elif defined(BM64_SSE4)
1322  if (!sse42_test_all_zero_wave((blk_blk + j)))
1323  {
1324  non_empty_top = 1;
1325  T* blk0 = blk_blk[j + 0];
1326  T* blk1 = blk_blk[j + 1];
1327 
1328  unsigned block_idx = r + j + 0;
1329  if (blk0)
1330  f(blk0, block_idx);
1331  else
1332  f.on_empty_block(block_idx);
1333 
1334  ++block_idx;
1335  if (blk1)
1336  f(blk1, block_idx);
1337  else
1338  f.on_empty_block(block_idx);
1339  }
1340  else
1341  {
1342  f.on_empty_block(r + j + 0);
1343  f.on_empty_block(r + j + 1);
1344  }
1345  j += 2;
1346 #else
1347  if (blk_blk[j])
1348  {
1349  f(blk_blk[j], r + j);
1350  non_empty_top = 1;
1351  }
1352  else
1353  f.on_empty_block(r + j);
1354  ++j;
1355 #endif
1356  } while (j < bm::set_array_size);
1357 
1358  if (non_empty_top == 0)
1359  f.on_empty_top(i);
1360  } // for i
1361 }
1362 
1363 /*! For each non-zero block executes supplied function.
1364 */
1365 template<class T, class F>
1366 void for_each_nzblock2(T*** root, unsigned size1, F& f)
1367 {
1368 #ifdef BM64_SSE4
1369  for (unsigned i = 0; i < size1; ++i)
1370  {
1371  T** blk_blk;
1372  if ((blk_blk = root[i])!=0)
1373  {
1374  {
1375  __m128i w0;
1376  for (unsigned j = 0; j < bm::set_array_size; j+=4)
1377  {
1378  w0 = _mm_loadu_si128((__m128i*)(blk_blk + j));
1379  if (!_mm_testz_si128(w0, w0))
1380  {
1381  T* blk0 = blk_blk[j + 0];
1382  T* blk1 = blk_blk[j + 1];
1383 
1384  if (blk0)
1385  f(blk0);
1386  if (blk1)
1387  f(blk1);
1388  }
1389  w0 = _mm_loadu_si128((__m128i*)(blk_blk + j + 2));
1390  if (!_mm_testz_si128(w0, w0))
1391  {
1392  T* blk0 = blk_blk[j + 2];
1393  T* blk1 = blk_blk[j + 3];
1394  if (blk0)
1395  f(blk0);
1396  if (blk1)
1397  f(blk1);
1398  }
1399  } // for j
1400  }
1401  }
1402  } // for i
1403 #elif defined(BM64_AVX2) || defined(BM64_AVX512)
1404  for (unsigned i = 0; i < size1; ++i)
1405  {
1406  T** blk_blk;
1407  if ((blk_blk = root[i]) != 0)
1408  {
1409  {
1410  for (unsigned j = 0; j < bm::set_array_size; j += 4)
1411  {
1412  __m256i w0 = _mm256_loadu_si256((__m256i*)(blk_blk + j));
1413  if (!_mm256_testz_si256(w0, w0))
1414  {
1415  // as a variant could use: blk0 = (T*)_mm256_extract_epi64(w0, 0);
1416  // but it measures marginally slower
1417  T* blk0 = blk_blk[j + 0];
1418  T* blk1 = blk_blk[j + 1];
1419  T* blk2 = blk_blk[j + 2];
1420  T* blk3 = blk_blk[j + 3];
1421  if (blk0)
1422  f(blk0);
1423  if (blk1)
1424  f(blk1);
1425  if (blk2)
1426  f(blk2);
1427  if (blk3)
1428  f(blk3);
1429  }
1430  } // for j
1431  }
1432  }
1433  } // for i
1434 
1435 #else // non-SIMD mode
1436  for (unsigned i = 0; i < size1; ++i)
1437  {
1438  T** blk_blk;
1439  if ((blk_blk = root[i])!=0)
1440  {
1441  unsigned j = 0;
1442  do
1443  {
1444  if (blk_blk[j])
1445  f(blk_blk[j]);
1446  if (blk_blk[j+1])
1447  f(blk_blk[j+1]);
1448  if (blk_blk[j+2])
1449  f(blk_blk[j+2]);
1450  if (blk_blk[j+3])
1451  f(blk_blk[j+3]);
1452  j += 4;
1453  } while (j < bm::set_array_size);
1454  }
1455  } // for i
1456 #endif
1457 }
1458 
1459 
1460 /*! For each non-zero block executes supplied function-predicate.
1461  Function returns if function-predicate returns true
1462 */
1463 template<class T, class F>
1464 bool for_each_nzblock_if(T*** root, unsigned size1, F& f)
1465 {
1466  unsigned block_idx = 0;
1467  for (unsigned i = 0; i < size1; ++i)
1468  {
1469  T** blk_blk = root[i];
1470  if (!blk_blk)
1471  {
1472  block_idx += bm::set_array_size;
1473  continue;
1474  }
1475 
1476  for (unsigned j = 0;j < bm::set_array_size; ++j, ++block_idx)
1477  {
1478  if (blk_blk[j])
1479  if (f(blk_blk[j], block_idx)) return true;
1480  }
1481  }
1482  return false;
1483 }
1484 
1485 /*! For each block executes supplied function.
1486 */
1487 template<class T, class F>
1488 void for_each_block(T*** root, unsigned size1, F& f)
1489 {
1490  unsigned block_idx = 0;
1491 
1492  for (unsigned i = 0; i < size1; ++i)
1493  {
1494  T** blk_blk = root[i];
1495 
1496  if (blk_blk)
1497  {
1498  for (unsigned j = 0;j < bm::set_array_size; ++j, ++block_idx)
1499  {
1500  f(blk_blk[j], block_idx);
1501  }
1502  }
1503  else
1504  {
1505  for (unsigned j = 0;j < bm::set_array_size; ++j, ++block_idx)
1506  {
1507  f(0, block_idx);
1508  }
1509  }
1510  }
1511 }
1512 
1513 
1514 
1515 /*! Special BM optimized analog of STL for_each
1516 */
1517 template<class T, class F> F bmfor_each(T first, T last, F f)
1518 {
1519  do
1520  {
1521  f(*first);
1522  ++first;
1523  } while (first < last);
1524  return f;
1525 }
1526 
1527 /*! Computes SUM of all elements of the sequence
1528 */
1529 template<class T> T sum_arr(T* first, T* last)
1530 {
1531  T sum = 0;
1532  while (first < last)
1533  {
1534  sum += *first;
1535  ++first;
1536  }
1537  return sum;
1538 }
1539 
1540 
1541 
1542 
1543 /*!
1544  \brief Calculates number of bits ON in GAP buffer.
1545  \param buf - GAP buffer pointer.
1546  \param dsize - buffer size
1547  \return Number of non-zero bits.
1548  @ingroup gapfunc
1549 */
1550 template<typename T> unsigned gap_bit_count(const T* buf, unsigned dsize=0)
1551 {
1552  const T* pcurr = buf;
1553  if (dsize == 0)
1554  dsize = (*pcurr >> 3);
1555 
1556  const T* pend = pcurr + dsize;
1557 
1558  unsigned bits_counter = 0;
1559  ++pcurr;
1560 
1561  if (*buf & 1)
1562  {
1563  bits_counter += *pcurr + 1;
1564  ++pcurr;
1565  }
1566  ++pcurr; // set GAP to 1
1567 
1568  while (pcurr <= pend)
1569  {
1570  bits_counter += *pcurr - *(pcurr-1);
1571  pcurr += 2; // jump to the next positive GAP
1572  }
1573 
1574  return bits_counter;
1575 }
1576 
1577 /*!
1578  \brief Calculates number of bits ON in GAP buffer. Loop unrolled version.
1579  \param buf - GAP buffer pointer.
1580  \return Number of non-zero bits.
1581  @ingroup gapfunc
1582 */
1583 template<typename T> unsigned gap_bit_count_unr(const T* buf)
1584 {
1585  const T* pcurr = buf;
1586  unsigned dsize = (*pcurr >> 3);
1587 
1588  unsigned cnt = 0;
1589  pcurr = buf + 1; // set up start position
1590  T first_one = *buf & 1;
1591  if (first_one)
1592  {
1593  cnt += *pcurr + 1;
1594  ++pcurr;
1595  }
1596  ++pcurr; // set GAP to 1
1597 
1598  #if defined(BMAVX2OPT) || defined(BMAVX512OPT)
1599  if (dsize > 34)
1600  {
1601  const unsigned unr_factor = 32;
1602  unsigned waves = (dsize-2) / unr_factor;
1603  pcurr = avx2_gap_sum_arr(pcurr, waves, &cnt);
1604  }
1605  #elif defined(BMSSE42OPT) || defined(BMSSE2OPT)
1606  if (dsize > 18)
1607  {
1608  const unsigned unr_factor = 16;
1609  unsigned waves = (dsize - 2) / unr_factor;
1610  pcurr = sse2_gap_sum_arr(pcurr, waves, &cnt);
1611  }
1612  #else
1613  if (dsize > 10)
1614  {
1615  const unsigned unr_factor = 8;
1616  unsigned waves = (dsize - 2) / unr_factor;
1617  for (unsigned i = 0; i < waves; i += unr_factor)
1618  {
1619  cnt += pcurr[0] - pcurr[0 - 1];
1620  cnt += pcurr[2] - pcurr[2 - 1];
1621  cnt += pcurr[4] - pcurr[4 - 1];
1622  cnt += pcurr[6] - pcurr[6 - 1];
1623 
1624  pcurr += unr_factor;
1625  } // for
1626  }
1627  #endif
1628 
1629  const T* pend = buf + dsize;
1630  for ( ; pcurr <= pend ; pcurr+=2)
1631  {
1632  cnt += *pcurr - *(pcurr - 1);
1633  }
1634  BM_ASSERT(cnt == gap_bit_count(buf));
1635  return cnt;
1636 }
1637 
1638 
1639 
1640 /*!
1641  \brief Counts 1 bits in GAP buffer in the closed [left, right] range.
1642  \param buf - GAP buffer pointer.
1643  \param left - leftmost bit index to start from
1644  \param right- rightmost bit index
1645  \return Number of non-zero bits.
1646  @ingroup gapfunc
1647 */
1648 template<typename T>
1649 unsigned gap_bit_count_range(const T* const buf, unsigned left, unsigned right)
1650 {
1651  BM_ASSERT(left <= right);
1652 
1653  const T* pcurr = buf;
1654  const T* pend = pcurr + (*pcurr >> 3);
1655 
1656  unsigned bits_counter = 0;
1657  unsigned is_set;
1658  unsigned start_pos = bm::gap_bfind(buf, left, &is_set);
1659  is_set = ~(is_set - 1u); // 0xFFF.. if true (mask for branchless code)
1660 
1661  pcurr = buf + start_pos;
1662  if (right <= *pcurr) // we are in the target gap right now
1663  {
1664  bits_counter = unsigned(right - left + 1u) & is_set; // & is_set == if(is_set)
1665  return bits_counter;
1666  }
1667  bits_counter += unsigned(*pcurr - left + 1u) & is_set;
1668 
1669  unsigned prev_gap = *pcurr++;
1670  for (is_set ^= ~0u; right > *pcurr; is_set ^= ~0u)
1671  {
1672  bits_counter += (*pcurr - prev_gap) & is_set;
1673  if (pcurr == pend)
1674  return bits_counter;
1675  prev_gap = *pcurr++;
1676  }
1677  bits_counter += unsigned(right - prev_gap) & is_set;
1678  return bits_counter;
1679 }
1680 
1681 /*!
1682  \brief GAP block find position for the rank
1683 
1684  \param block - bit block buffer pointer
1685  \param rank - rank to find (must be > 0)
1686  \param nbit_from - start bit position in block
1687  \param nbit_pos - found position
1688 
1689  \return 0 if position with rank was found, or
1690  the remaining rank (rank - population count)
1691 
1692  @ingroup gapfunc
1693 */
1694 template<typename T>
1695 bm::id_t gap_find_rank(const T* const block,
1696  bm::id_t rank,
1697  unsigned nbit_from,
1698  unsigned& nbit_pos)
1699 {
1700  BM_ASSERT(block);
1701  BM_ASSERT(rank);
1702 
1703  const T* pcurr = block;
1704  const T* pend = pcurr + (*pcurr >> 3);
1705 
1706  unsigned bits_counter = 0;
1707  unsigned is_set;
1708  unsigned start_pos = bm::gap_bfind(block, nbit_from, &is_set);
1709  is_set = ~(is_set - 1u); // 0xFFF.. if true (mask for branchless code)
1710 
1711  pcurr = block + start_pos;
1712  bits_counter += unsigned(*pcurr - nbit_from + 1u) & is_set;
1713  if (bits_counter >= rank) // found!
1714  {
1715  nbit_pos = nbit_from + rank - 1u;
1716  return 0;
1717  }
1718  rank -= bits_counter;
1719  unsigned prev_gap = *pcurr++;
1720  for (is_set ^= ~0u; pcurr <= pend; is_set ^= ~0u)
1721  {
1722  bits_counter = (*pcurr - prev_gap) & is_set;
1723  if (bits_counter >= rank) // found!
1724  {
1725  nbit_pos = prev_gap + rank;
1726  return 0;
1727  }
1728  rank -= bits_counter;
1729  prev_gap = *pcurr++;
1730  } // for
1731 
1732  return rank;
1733 }
1734 
1735 
1736 
1737 /*!
1738  \brief Counts 1 bits in GAP buffer in the closed [0, right] range.
1739  \param buf - GAP buffer pointer.
1740  \param right- rightmost bit index
1741  \return Number of non-zero bits.
1742  @ingroup gapfunc
1743 */
1744 template<typename T>
1745 unsigned gap_bit_count_to(const T* const buf, T right)
1746 {
1747  const T* pcurr = buf;
1748  const T* pend = pcurr + (*pcurr >> 3);
1749 
1750  unsigned bits_counter = 0;
1751  unsigned is_set = ~((unsigned(*buf) & 1u) - 1u); // 0xFFF.. if true (mask for branchless code)
1752  BM_ASSERT(is_set == 0u || is_set == ~0u);
1753  pcurr = buf + 1;
1754 
1755  if (right <= *pcurr) // we are in the target block right now
1756  {
1757  bits_counter = (right + 1u) & is_set; // & is_set == if (is_set)
1758  return bits_counter;
1759  }
1760  bits_counter += (*pcurr + 1u) & is_set;
1761 
1762  unsigned prev_gap = *pcurr++;
1763  for (is_set ^= ~0u; right > *pcurr; is_set ^= ~0u)
1764  {
1765  bits_counter += (*pcurr - prev_gap) & is_set;
1766  if (pcurr == pend)
1767  return bits_counter;
1768  prev_gap = *pcurr++;
1769  }
1770  bits_counter += (right - prev_gap) & is_set;
1771  return bits_counter;
1772 }
1773 
1774 
1775 /*!
1776  D-GAP block for_each algorithm
1777 
1778  D-Gap Functor is called for each element but last one.
1779 
1780  \param gap_buf - GAP buffer
1781  \param func - functor object
1782 
1783 */
1784 template<class T, class Func>
1785 void for_each_dgap(const T* gap_buf, Func& func)
1786 {
1787  const T* pcurr = gap_buf;
1788  const T* pend = pcurr + (*pcurr >> 3);
1789  ++pcurr;
1790 
1791  T prev = *pcurr;
1792  func((T)(prev + 1)); // first element incremented to avoid 0
1793  ++pcurr;
1794  do
1795  {
1796  func((T)(*pcurr - prev)); // all others are [N] - [N-1]
1797  prev = *pcurr;
1798  } while (++pcurr < pend);
1799 }
1800 
1801 /** d-Gap copy functor
1802  @internal
1803 */
1804 template<typename T> struct d_copy_func
1805 {
1806  d_copy_func(T* dg_buf) : dgap_buf_(dg_buf) {}
1807  void operator()(T dgap) { *dgap_buf_++ = dgap; }
1808 
1810 };
1811 
1812 /*!
1813  \brief Convert GAP buffer into D-GAP buffer
1814 
1815  Delta GAP representation is DGAP[N] = GAP[N] - GAP[N-1]
1816 
1817  \param gap_buf - GAP buffer
1818  \param dgap_buf - Delta-GAP buffer
1819  \param copy_head - flag to copy GAP header
1820 
1821  \internal
1822 
1823  @ingroup gapfunc
1824 */
1825 template<typename T>
1826 T* gap_2_dgap(const T* gap_buf, T* dgap_buf, bool copy_head=true)
1827 {
1828  if (copy_head) // copy GAP header
1829  {
1830  *dgap_buf++ = *gap_buf;
1831  }
1832 
1833  d_copy_func<T> copy_func(dgap_buf);
1834  for_each_dgap<T, d_copy_func<T> >(gap_buf, copy_func);
1835  return copy_func.dgap_buf_;
1836 }
1837 
1838 /*!
1839  \brief Convert D-GAP buffer into GAP buffer
1840 
1841  GAP representation is GAP[N] = DGAP[N] + DGAP[N-1]
1842 
1843  \param dgap_buf - Delta-GAP buffer
1844  \param gap_header - GAP header word
1845  \param gap_buf - GAP buffer
1846 
1847  \internal
1848  @ingroup gapfunc
1849 */
1850 template<typename T>
1851 void dgap_2_gap(const T* dgap_buf, T* gap_buf, T gap_header=0)
1852 {
1853  const T* pcurr = dgap_buf;
1854  unsigned len;
1855  if (!gap_header) // GAP header is already part of the stream
1856  {
1857  len = *pcurr >> 3;
1858  *gap_buf++ = *pcurr++; // copy GAP header
1859  }
1860  else // GAP header passed as a parameter
1861  {
1862  len = gap_header >> 3;
1863  *gap_buf++ = gap_header; // assign GAP header
1864  }
1865  --len; // last element is actually not encoded
1866  const T* pend = pcurr + len;
1867 
1868  *gap_buf = *pcurr++; // copy first element
1869  if (*gap_buf == 0)
1870  *gap_buf = 65535; // fix +1 overflow
1871  else
1872  *gap_buf = T(*gap_buf - 1);
1873 
1874  for (++gap_buf; pcurr < pend; ++pcurr)
1875  {
1876  T prev = *(gap_buf-1); // don't remove temp(undef expression!)
1877  *gap_buf++ = T(*pcurr + prev);
1878  }
1879  *gap_buf = 65535; // add missing last element
1880 }
1881 
1882 
1883 /*!
1884  \brief Lexicographical comparison of GAP buffers.
1885  \param buf1 - First GAP buffer pointer.
1886  \param buf2 - Second GAP buffer pointer.
1887  \return <0 - less, =0 - equal, >0 - greater.
1888 
1889  @ingroup gapfunc
1890 */
1891 template<typename T> int gapcmp(const T* buf1, const T* buf2)
1892 {
1893  const T* pcurr1 = buf1;
1894  const T* pend1 = pcurr1 + (*pcurr1 >> 3);
1895  unsigned bitval1 = *buf1 & 1;
1896  ++pcurr1;
1897 
1898  const T* pcurr2 = buf2;
1899  unsigned bitval2 = *buf2 & 1;
1900  ++pcurr2;
1901 
1902  while (pcurr1 <= pend1)
1903  {
1904  if (*pcurr1 == *pcurr2)
1905  {
1906  if (bitval1 != bitval2)
1907  {
1908  return (bitval1) ? 1 : -1;
1909  }
1910  }
1911  else
1912  {
1913  if (bitval1 == bitval2)
1914  {
1915  if (bitval1)
1916  {
1917  return (*pcurr1 < *pcurr2) ? -1 : 1;
1918  }
1919  else
1920  {
1921  return (*pcurr1 < *pcurr2) ? 1 : -1;
1922  }
1923  }
1924  else
1925  {
1926  return (bitval1) ? 1 : -1;
1927  }
1928  }
1929 
1930  ++pcurr1; ++pcurr2;
1931 
1932  bitval1 ^= 1;
1933  bitval2 ^= 1;
1934  }
1935 
1936  return 0;
1937 }
1938 
1939 
1940 /*!
1941  \brief Abstract operation for GAP buffers.
1942  Receives functor F as a template argument
1943  \param dest - destination memory buffer.
1944  \param vect1 - operand 1 GAP encoded buffer.
1945  \param vect1_mask - XOR mask for starting bitflag for vector1
1946  can be 0 or 1 (1 inverts the vector)
1947  \param vect2 - operand 2 GAP encoded buffer.
1948  \param vect2_mask - same as vect1_mask
1949  \param f - operation functor.
1950  \param dlen - destination length after the operation
1951 
1952  \note Internal function.
1953  @internal
1954 
1955  @ingroup gapfunc
1956 */
1957 template<typename T, class F>
1958 void gap_buff_op(T* BMRESTRICT dest,
1959  const T* BMRESTRICT vect1,
1960  unsigned vect1_mask,
1961  const T* BMRESTRICT vect2,
1962  unsigned vect2_mask,
1963  F& f,
1964  unsigned& dlen)
1965 {
1966  const T* cur1 = vect1;
1967  const T* cur2 = vect2;
1968 
1969  T bitval1 = (T)((*cur1++ & 1) ^ vect1_mask);
1970  T bitval2 = (T)((*cur2++ & 1) ^ vect2_mask);
1971 
1972  T bitval = (T) f(bitval1, bitval2);
1973  T bitval_prev = bitval;
1974 
1975  T* res = dest;
1976  *res = bitval;
1977  ++res;
1978 
1979  while (1)
1980  {
1981  bitval = (T) f(bitval1, bitval2);
1982 
1983  // Check if GAP value changes and we need to
1984  // start the next one.
1985  if (bitval != bitval_prev)
1986  {
1987  ++res;
1988  bitval_prev = bitval;
1989  }
1990 
1991  if (*cur1 < *cur2)
1992  {
1993  *res = *cur1;
1994  ++cur1;
1995  bitval1 ^= 1;
1996  }
1997  else // >=
1998  {
1999  *res = *cur2;
2000  if (*cur2 < *cur1)
2001  {
2002  bitval2 ^= 1;
2003  }
2004  else // equal
2005  {
2006  if (*cur2 == (bm::gap_max_bits - 1))
2007  {
2008  break;
2009  }
2010 
2011  ++cur1;
2012  bitval1 ^= 1;
2013  bitval2 ^= 1;
2014  }
2015  ++cur2;
2016  }
2017 
2018  } // while
2019 
2020  dlen = (unsigned)(res - dest);
2021  *dest = (T)((*dest & 7) + (dlen << 3));
2022 }
2023 
2024 /*!
2025  \brief Abstract distance test operation for GAP buffers.
2026  Receives functor F as a template argument
2027  \param vect1 - operand 1 GAP encoded buffer.
2028  \param vect1_mask - XOR mask for starting bitflag for vector1
2029  can be 0 or 1 (1 inverts the vector)
2030  \param vect2 - operand 2 GAP encoded buffer.
2031  \param vect2_mask - same as vect1_mask
2032  \param f - operation functor.
2033  \note Internal function.
2034  \return non zero value if operation result returns any 1 bit
2035 
2036  @ingroup gapfunc
2037 */
2038 template<typename T, class F>
2039 unsigned gap_buff_any_op(const T* BMRESTRICT vect1,
2040  unsigned vect1_mask,
2041  const T* BMRESTRICT vect2,
2042  unsigned vect2_mask,
2043  F f)
2044 {
2045  const T* cur1 = vect1;
2046  const T* cur2 = vect2;
2047 
2048  unsigned bitval1 = (*cur1++ & 1) ^ vect1_mask;
2049  unsigned bitval2 = (*cur2++ & 1) ^ vect2_mask;
2050 
2051  unsigned bitval = f(bitval1, bitval2);
2052  if (bitval)
2053  return bitval;
2054  unsigned bitval_prev = bitval;
2055 
2056  while (1)
2057  {
2058  bitval = f(bitval1, bitval2);
2059  if (bitval)
2060  return bitval;
2061 
2062  if (bitval != bitval_prev)
2063  bitval_prev = bitval;
2064 
2065  if (*cur1 < *cur2)
2066  {
2067  ++cur1;
2068  bitval1 ^= 1;
2069  }
2070  else // >=
2071  {
2072  if (*cur2 < *cur1)
2073  {
2074  bitval2 ^= 1;
2075  }
2076  else // equal
2077  {
2078  if (*cur2 == (bm::gap_max_bits - 1))
2079  {
2080  break;
2081  }
2082 
2083  ++cur1;
2084  bitval1 ^= 1;
2085  bitval2 ^= 1;
2086  }
2087  ++cur2;
2088  }
2089 
2090  } // while
2091 
2092  return 0;
2093 }
2094 
2095 
2096 
2097 /*!
2098  \brief Abstract distance(similarity) operation for GAP buffers.
2099  Receives functor F as a template argument
2100  \param vect1 - operand 1 GAP encoded buffer.
2101  \param vect2 - operand 2 GAP encoded buffer.
2102  \param f - operation functor.
2103  \note Internal function.
2104 
2105  @ingroup gapfunc
2106 */
2107 template<typename T, class F>
2108 unsigned gap_buff_count_op(const T* vect1, const T* vect2, F f)
2109 {
2110  unsigned count;// = 0;
2111  const T* cur1 = vect1;
2112  const T* cur2 = vect2;
2113 
2114  unsigned bitval1 = (*cur1++ & 1);
2115  unsigned bitval2 = (*cur2++ & 1);
2116  unsigned bitval = count = f(bitval1, bitval2);
2117  unsigned bitval_prev = bitval;
2118 
2119  //if (bitval) ++count;
2120 
2121  T res, res_prev;
2122  res = res_prev = 0;
2123 
2124  while (1)
2125  {
2126  bitval = f(bitval1, bitval2);
2127 
2128  // Check if GAP value changes and we need to
2129  // start the next one.
2130  if (bitval != bitval_prev)
2131  {
2132  bitval_prev = bitval;
2133  res_prev = res;
2134  }
2135 
2136  if (*cur1 < *cur2)
2137  {
2138  res = *cur1;
2139  if (bitval)
2140  {
2141  count += res - res_prev;
2142  res_prev = res;
2143  }
2144  ++cur1;
2145  bitval1 ^= 1;
2146  }
2147  else // >=
2148  {
2149  res = *cur2;
2150  if (bitval)
2151  {
2152  count += res - res_prev;
2153  res_prev = res;
2154  }
2155  if (*cur2 < *cur1)
2156  {
2157  bitval2 ^= 1;
2158  }
2159  else // equal
2160  {
2161  if (*cur2 == (bm::gap_max_bits - 1))
2162  {
2163  break;
2164  }
2165 
2166  ++cur1;
2167  bitval1 ^= 1;
2168  bitval2 ^= 1;
2169  }
2170  ++cur2;
2171  }
2172 
2173  } // while
2174 
2175  return count;
2176 }
2177 
2178 
2179 
2180 /*!
2181  \brief Sets or clears bit in the GAP buffer.
2182 
2183  \param val - new bit value
2184  \param buf - GAP buffer.
2185  \param pos - Index of bit to set.
2186  \param is_set - (OUT) flag if bit was actually set.
2187 
2188  \return New GAP buffer length.
2189 
2190  @ingroup gapfunc
2191 */
2192 template<typename T>
2193 unsigned gap_set_value(unsigned val,
2194  T* BMRESTRICT buf,
2195  unsigned pos,
2196  unsigned* BMRESTRICT is_set)
2197 {
2198  BM_ASSERT(pos < bm::gap_max_bits);
2199  unsigned curr = gap_bfind(buf, pos, is_set);
2200 
2201  T end = (T)(*buf >> 3);
2202  if (*is_set == val)
2203  {
2204  *is_set = 0;
2205  return end;
2206  }
2207  *is_set = 1;
2208 
2209  T* pcurr = buf + curr;
2210  T* pprev = pcurr - 1;
2211  T* pend = buf + end;
2212 
2213  // Special case, first bit GAP operation. There is no platform beside it.
2214  // initial flag must be inverted.
2215  if (pos == 0)
2216  {
2217  *buf ^= 1;
2218  if ( buf[1] ) // We need to insert a 1 bit platform here.
2219  {
2220  ::memmove(&buf[2], &buf[1], (end - 1) * sizeof(gap_word_t));
2221  buf[1] = 0;
2222  ++end;
2223  }
2224  else // Only 1 bit in the GAP. We need to delete the first GAP.
2225  {
2226  pprev = buf + 1;
2227  pcurr = pprev + 1;
2228  do
2229  {
2230  *pprev++ = *pcurr++;
2231  } while (pcurr < pend);
2232  --end;
2233  }
2234  }
2235  else if (curr > 1 && ((unsigned)(*pprev))+1 == pos) // Left border bit
2236  {
2237  ++(*pprev);
2238  if (*pprev == *pcurr) // Curr. GAP to be merged with prev.GAP.
2239  {
2240  --end;
2241  if (pcurr != pend) // GAP merge: 2 GAPS to be deleted
2242  {
2243  --end;
2244  ++pcurr;
2245  do
2246  {
2247  *pprev++ = *pcurr++;
2248  } while (pcurr < pend);
2249  }
2250  }
2251  }
2252  else if (*pcurr == pos) // Rightmost bit in the GAP. Border goes left.
2253  {
2254  --(*pcurr);
2255  if (pcurr == pend)
2256  {
2257  ++end;
2258  }
2259  }
2260  else // Worst case we need to split current block.
2261  {
2262  ::memmove(pcurr+2, pcurr,(end - curr + 1)*sizeof(T));
2263  *pcurr++ = (T)(pos - 1);
2264  *pcurr = (T)pos;
2265  end = (T)(end + 2);
2266  }
2267 
2268  // Set correct length word.
2269  *buf = (T)((*buf & 7) + (end << 3));
2270 
2271  buf[end] = bm::gap_max_bits - 1;
2272  return end;
2273 }
2274 
2275 /*!
2276  \brief Add new value to the end of GAP buffer.
2277 
2278  \param buf - GAP buffer.
2279  \param pos - Index of bit to set.
2280 
2281  \return New GAP buffer length.
2282 
2283  @ingroup gapfunc
2284 */
2285 template<typename T>
2286 unsigned gap_add_value(T* buf, unsigned pos)
2287 {
2288  BM_ASSERT(pos < bm::gap_max_bits);
2289 
2290  T end = (T)(*buf >> 3);
2291  T curr = end;
2292  T* pcurr = buf + end;
2293  T* pend = pcurr;
2294  T* pprev = pcurr - 1;
2295 
2296  // Special case, first bit GAP operation. There is no platform beside it.
2297  // initial flag must be inverted.
2298  if (pos == 0)
2299  {
2300  *buf ^= 1;
2301  if ( buf[1] ) // We need to insert a 1 bit platform here.
2302  {
2303  ::memmove(&buf[2], &buf[1], (end - 1) * sizeof(gap_word_t));
2304  buf[1] = 0;
2305  ++end;
2306  }
2307  else // Only 1 bit in the GAP. We need to delete the first GAP.
2308  {
2309  pprev = buf + 1;
2310  pcurr = pprev + 1;
2311  do
2312  {
2313  *pprev++ = *pcurr++;
2314  } while (pcurr < pend);
2315  --end;
2316  }
2317  }
2318  else if (((unsigned)(*pprev))+1 == pos && (curr > 1) ) // Left border bit
2319  {
2320  ++(*pprev);
2321  if (*pprev == *pcurr) // Curr. GAP to be merged with prev.GAP.
2322  {
2323  --end;
2324  if (pcurr != pend) // GAP merge: 2 GAPS to be deleted
2325  {
2326  // TODO: should never get here...
2327  --end;
2328  ++pcurr;
2329  do
2330  {
2331  *pprev++ = *pcurr++;
2332  } while (pcurr < pend);
2333  }
2334  }
2335  }
2336  else if (*pcurr == pos) // Rightmost bit in the GAP. Border goes left.
2337  {
2338  --(*pcurr);
2339  if (pcurr == pend)
2340  {
2341  ++end;
2342  }
2343  }
2344  else // Worst case we need to split current block.
2345  {
2346  *pcurr++ = (T)(pos - 1);
2347  *pcurr = (T)pos;
2348  end = (T)(end+2);
2349  }
2350 
2351  // Set correct length word.
2352  *buf = (T)((*buf & 7) + (end << 3));
2353 
2354  buf[end] = bm::gap_max_bits - 1;
2355  return end;
2356 }
2357 
2358 /*!
2359  @brief Right shift GAP block by 1 bit
2360  @param buf - block pointer
2361  @param co_falg - carry over from the previous block
2362 
2363  @return carry over bit (1 or 0)
2364  @ingroup gapfunc
2365 */
2366 template<typename T>
2367 bool gap_shift_r1(T* buf, unsigned co_flag, unsigned* new_len)
2368 {
2369  BM_ASSERT(new_len);
2370  bool co;
2371  // 1: increment all GAP values by 1
2372  {
2373  unsigned bitval = *buf & 1;
2374  if (buf[1] == bm::gap_max_bits-1) // full GAP block
2375  {
2376  co = bitval;
2377  }
2378  else
2379  {
2380  unsigned len = (*buf >> 3);
2381  unsigned i = 1;
2382  for (; i < len; ++i)
2383  {
2384  buf[i]++;
2385  bitval ^= 1;
2386  } // for i
2387  BM_ASSERT(buf[i] == bm::gap_max_bits-1);
2388  if (buf[i-1] == bm::gap_max_bits-1) // last element shifts out
2389  {
2390  // Set correct length word
2391  --len;
2392  *buf = (T)((*buf & 7) + (len << 3));
2393  }
2394  co = bitval;
2395  }
2396  }
2397  // set bit position 0 with carry-in flag
2398  {
2399  unsigned is_set;
2400  *new_len = bm::gap_set_value(co_flag, buf, 0, &is_set);
2401  }
2402  return co;
2403 }
2404 
2405 /*!
2406  \brief Convert array to GAP buffer.
2407 
2408  \param buf - GAP buffer.
2409  \param arr - array of values to set
2410  \param len - length of the array
2411 
2412  \return New GAP buffer length.
2413 
2414  @ingroup gapfunc
2415 */
2416 
2417 template<typename T>
2418 unsigned gap_set_array(T* buf, const T* arr, unsigned len)
2419 {
2420  *buf = (T)((*buf & 6u) + (1u << 3)); // gap header setup
2421 
2422  T* pcurr = buf + 1;
2423 
2424  unsigned i = 0;
2425  T curr = arr[i];
2426  if (curr != 0) // need to add the first gap: (0 to arr[0]-1)
2427  {
2428  *pcurr = (T)(curr - 1);
2429  ++pcurr;
2430  }
2431  else
2432  {
2433  ++(*buf); // GAP starts with 1
2434  }
2435  T prev = curr;
2436  T acc = prev;
2437 
2438  for (i = 1; i < len; ++i)
2439  {
2440  curr = arr[i];
2441  if (curr == prev + 1)
2442  {
2443  ++acc;
2444  prev = curr;
2445  }
2446  else
2447  {
2448  *pcurr++ = acc;
2449  acc = curr;
2450  *pcurr++ = (T)(curr-1);
2451  }
2452  prev = curr;
2453  }
2454  *pcurr = acc;
2455  if (acc != bm::gap_max_bits - 1)
2456  {
2457  ++pcurr;
2458  *pcurr = bm::gap_max_bits - 1;
2459  }
2460 
2461  unsigned end = unsigned(pcurr - buf);
2462 
2463  *buf = (T)((*buf & 7) + (end << 3));
2464  return end+1;
2465 }
2466 
2467 
2468 //------------------------------------------------------------------------
2469 
2470 /**
2471  \brief Compute number of GAPs in bit-array
2472  \param arr - array of BITs
2473  \param len - array length
2474 
2475  @ingroup gapfunc
2476 */
2477 template<typename T>
2478 unsigned bit_array_compute_gaps(const T* arr,
2479  unsigned len)
2480 {
2481  unsigned gap_count = 1;
2482  T prev = arr[0];
2483  if (prev > 0)
2484  ++gap_count;
2485  for (unsigned i = 1; i < len; ++i)
2486  {
2487  T curr = arr[i];
2488  if (curr != prev + 1)
2489  {
2490  gap_count += 2;
2491  }
2492  prev = curr;
2493  }
2494  return gap_count;
2495 }
2496 
2497 
2498 //------------------------------------------------------------------------
2499 
2500 /**
2501  \brief Searches for the next 1 bit in the GAP block
2502  \param buf - GAP buffer
2503  \param nbit - bit index to start checking from.
2504  \param prev - returns previously checked value
2505 
2506  \return 0 if not found
2507 
2508  @ingroup gapfunc
2509 */
2510 template<typename T>
2511 unsigned gap_find_in_block(const T* buf,
2512  unsigned nbit,
2513  bm::id_t* prev)
2514 {
2515  BM_ASSERT(nbit < bm::gap_max_bits);
2516 
2517  unsigned bitval;
2518  unsigned gap_idx = bm::gap_bfind(buf, nbit, &bitval);
2519 
2520  if (bitval) // positive block.
2521  {
2522  return 1u;
2523  }
2524 
2525  unsigned val = buf[gap_idx] + 1;
2526  *prev += val - nbit;
2527 
2528  return (val != bm::gap_max_bits); // no bug here.
2529 }
2530 
2531 /*!
2532  \brief Set 1 bit in a block
2533 
2534  @ingroup bitfunc
2535 */
2537 void set_bit(unsigned* dest, unsigned bitpos)
2538 {
2539  unsigned nbit = unsigned(bitpos & bm::set_block_mask);
2540  unsigned nword = unsigned(nbit >> bm::set_word_shift);
2541  nbit &= bm::set_word_mask;
2542  dest[nword] |= unsigned(1u << nbit);
2543 }
2544 
2545 /*!
2546  \brief Test 1 bit in a block
2547 
2548  @ingroup bitfunc
2549 */
2551 unsigned test_bit(const unsigned* block, unsigned bitpos)
2552 {
2553  unsigned nbit = unsigned(bitpos & bm::set_block_mask);
2554  unsigned nword = unsigned(nbit >> bm::set_word_shift);
2555  nbit &= bm::set_word_mask;
2556  return (block[nword] >> nbit) & 1u;
2557 }
2558 
2559 
2560 /*!
2561  \brief Sets bits to 1 in the bitblock.
2562  \param dest - Bitset buffer.
2563  \param bitpos - Offset of the start bit.
2564  \param bitcount - number of bits to set.
2565 
2566  @ingroup bitfunc
2567 */
2568 inline
2569 void or_bit_block(unsigned* dest, unsigned bitpos, unsigned bitcount)
2570 {
2571  const unsigned maskFF = ~0u;
2572 
2573  dest += unsigned(bitpos >> bm::set_word_shift); // nword
2574  bitpos &= bm::set_word_mask;
2575 
2576  if (bitcount == 1u) // special case (only 1 bit to set)
2577  {
2578  *dest |= (1u << bitpos);
2579  return;
2580  }
2581 
2582  if (bitpos) // starting pos is not aligned
2583  {
2584  unsigned mask_r = maskFF << bitpos;
2585  unsigned right_margin = bitpos + bitcount;
2586  if (right_margin < 32)
2587  {
2588  *dest |= (maskFF >> (32 - right_margin)) & mask_r;
2589  return;
2590  }
2591  *dest++ |= mask_r;
2592  bitcount -= 32 - bitpos;
2593  }
2594  for ( ;bitcount >= 64; bitcount-=64, dest+=2)
2595  dest[0] = dest[1] = maskFF;
2596  if (bitcount >= 32)
2597  {
2598  *dest++ = maskFF; bitcount -= 32;
2599  }
2600  if (bitcount)
2601  {
2602  *dest |= maskFF >> (32 - bitcount);
2603  }
2604 }
2605 
2606 
2607 /*!
2608  \brief SUB (AND NOT) bit interval to 1 in the bitblock.
2609  \param dest - Bitset buffer.
2610  \param bitpos - Offset of the start bit.
2611  \param bitcount - number of bits to set.
2612 
2613  @ingroup bitfunc
2614 */
2615 inline
2616 void sub_bit_block(unsigned* dest, unsigned bitpos, unsigned bitcount)
2617 {
2618  const unsigned maskFF = ~0u;
2619 
2620  dest += unsigned(bitpos >> bm::set_word_shift); // nword
2621  bitpos &= bm::set_word_mask;
2622 
2623  if (bitcount == 1u) // special case (only 1 bit to set)
2624  {
2625  *dest &= ~(1u << bitpos);
2626  return;
2627  }
2628 
2629  if (bitpos) // starting pos is not aligned
2630  {
2631  unsigned mask_r = maskFF << bitpos;
2632  unsigned right_margin = bitpos + bitcount;
2633  if (right_margin < 32)
2634  {
2635  *dest &= ~((maskFF >> (32 - right_margin)) & mask_r);
2636  return;
2637  }
2638  *dest++ &= ~mask_r;
2639  bitcount -= 32 - bitpos;
2640  }
2641  for ( ;bitcount >= 64; bitcount-=64, dest+=2)
2642  dest[0] = dest[1] = 0u;
2643  if (bitcount >= 32)
2644  {
2645  *dest++ = 0u; bitcount -= 32;
2646  }
2647  if (bitcount)
2648  {
2649  *dest &= ~(maskFF >> (32 - bitcount));
2650  }
2651 }
2652 
2653 
2654 
2655 /*!
2656  \brief XOR bit interval to 1 in the bitblock.
2657  \param dest - Bitset buffer.
2658  \param bitpos - Offset of the start bit.
2659  \param bitcount - number of bits to set.
2660 
2661  @ingroup bitfunc
2662 */
2663 inline void xor_bit_block(unsigned* dest,
2664  unsigned bitpos,
2665  unsigned bitcount)
2666 {
2667  unsigned nbit = unsigned(bitpos & bm::set_block_mask);
2668  unsigned nword = unsigned(nbit >> bm::set_word_shift);
2669  nbit &= bm::set_word_mask;
2670 
2671  bm::word_t* word = dest + nword;
2672 
2673  if (bitcount == 1) // special case (only 1 bit to set)
2674  {
2675  *word ^= unsigned(1 << nbit);
2676  return;
2677  }
2678 
2679  if (nbit) // starting position is not aligned
2680  {
2681  unsigned right_margin = nbit + bitcount;
2682 
2683  // here we checking if we setting bits only in the current
2684  // word. Example: 00111000000000000000000000000000 (32 bits word)
2685 
2686  if (right_margin < 32)
2687  {
2688  unsigned mask =
2690  bm::block_set_table<true>::_left[right_margin-1];
2691  *word ^= mask;
2692  return;
2693  }
2694  *word ^= block_set_table<true>::_right[nbit];
2695  bitcount -= 32 - nbit;
2696  ++word;
2697  }
2698  for ( ;bitcount >= 64; bitcount-=64, word+=2)
2699  {
2700  word[0] ^= ~0u; word[1] ^= ~0u;
2701  }
2702  if (bitcount >= 32)
2703  {
2704  *word++ ^= ~0u; bitcount -= 32;
2705  }
2706  if (bitcount)
2707  *word ^= block_set_table<true>::_left[bitcount-1];
2708 }
2709 
2710 
2711 /*!
2712  \brief SUB (AND NOT) GAP block to bitblock.
2713  \param dest - bitblock buffer pointer.
2714  \param pcurr - GAP buffer pointer.
2715 
2716  @ingroup gapfunc
2717 */
2718 template<typename T>
2719 void gap_sub_to_bitset(unsigned* dest, const T* pcurr)
2720 {
2721  BM_ASSERT(dest && pcurr);
2722 
2723  const T* pend = pcurr + (*pcurr >> 3);
2724  if (*pcurr & 1) // Starts with 1
2725  {
2726  bm::sub_bit_block(dest, 0, 1 + pcurr[1]);
2727  ++pcurr;
2728  }
2729  for (pcurr += 2; pcurr <= pend; pcurr += 2)
2730  {
2731  BM_ASSERT(*pcurr > pcurr[-1]);
2732  bm::sub_bit_block(dest, 1 + pcurr[-1], *pcurr - pcurr[-1]);
2733  }
2734 }
2735 
2736 
2737 /*!
2738  \brief SUB (AND NOT) GAP block to bitblock with digest assist
2739 
2740  \param dest - bitblock buffer pointer.
2741  \param pcurr - GAP buffer pointer.
2742  \param digest0 - digest of 0 strides inside bit block
2743 
2744  @ingroup gapfunc
2745 */
2746 template<typename T>
2747 void gap_sub_to_bitset(unsigned* dest, const T* pcurr, bm::id64_t digest0)
2748 {
2749  BM_ASSERT(dest && pcurr);
2750 
2751  const T* pend = pcurr + (*pcurr >> 3);
2752  if (*pcurr & 1) // Starts with 1
2753  {
2754  bool all_zero = bm::check_zero_digest(digest0, 0, pcurr[1]+1);
2755  if (!all_zero)
2756  bm::sub_bit_block(dest, 0, pcurr[1] + 1); // (not AND) - SUB [0] gaps
2757  pcurr += 3;
2758  }
2759  else
2760  pcurr += 2;
2761 
2762  // wind forward to digest start
2763  {
2764  unsigned tz = bm::count_trailing_zeros_u64(digest0);
2765  unsigned start_pos = tz << set_block_digest_pos_shift;
2766  for (; pcurr <= pend; pcurr += 2) // now we are in GAP "0"
2767  {
2768  if (*pcurr >= start_pos)
2769  break;
2770  }
2771  }
2772 
2773  unsigned lz = bm::count_leading_zeros_u64(digest0);
2774  unsigned stop_pos = (64u - lz) << set_block_digest_pos_shift;
2775 
2776  unsigned bc, pos;
2777  T prev;
2778  for (; pcurr <= pend; pcurr += 2) // now we are in GAP "1" again
2779  {
2780  BM_ASSERT(*pcurr > *(pcurr-1));
2781  prev = pcurr[-1];
2782  bc = *pcurr - prev;
2783  pos = 1u + prev;
2784 
2785  bool all_zero = bm::check_zero_digest(digest0, prev, *pcurr);
2786  if (!all_zero)
2787  bm::sub_bit_block(dest, pos, bc);
2788 
2789  if (pos > stop_pos)
2790  break; // early break is possible based on digest tail
2791 
2792  } // for
2793 }
2794 
2795 
2796 
2797 /*!
2798  \brief XOR GAP block to bitblock.
2799  \param dest - bitblock buffer pointer.
2800  \param pcurr - GAP buffer pointer.
2801 
2802  @ingroup gapfunc
2803 */
2804 template<typename T>
2805 void gap_xor_to_bitset(unsigned* dest, const T* pcurr)
2806 {
2807  BM_ASSERT(dest && pcurr);
2808 
2809  const T* pend = pcurr + (*pcurr >> 3);
2810  if (*pcurr & 1) // Starts with 1
2811  {
2812  bm::xor_bit_block(dest, 0, 1 + pcurr[1]);
2813  ++pcurr;
2814  }
2815  for (pcurr += 2; pcurr <= pend; pcurr += 2)
2816  {
2817  BM_ASSERT(*pcurr > pcurr[-1]);
2818  bm::xor_bit_block(dest, 1 + pcurr[-1], *pcurr - pcurr[-1]);
2819  }
2820 }
2821 
2822 
2823 /*!
2824  \brief Adds(OR) GAP block to bitblock.
2825  \param dest - bitblock buffer pointer.
2826  \param pcurr - GAP buffer pointer.
2827 
2828  @ingroup gapfunc
2829 */
2830 template<typename T>
2831 void gap_add_to_bitset(unsigned* dest, const T* pcurr)
2832 {
2833  BM_ASSERT(dest && pcurr);
2834 
2835  const T* pend = pcurr + (*pcurr >> 3);
2836  if (*pcurr & 1) // Starts with 1
2837  {
2838  bm::or_bit_block(dest, 0, 1 + pcurr[1]);
2839  pcurr += 3;
2840  }
2841  else
2842  pcurr += 2;
2843 
2844  unsigned bc, pos;
2845  for (; pcurr <= pend; )
2846  {
2847  BM_ASSERT(*pcurr > pcurr[-1]);
2848  pos = 1u + pcurr[-1];
2849  bc = *pcurr - pcurr[-1];
2850  pcurr += 2;
2851  bm::or_bit_block(dest, pos, bc);
2852  }
2853 }
2854 
2855 /*!
2856  \brief ANDs GAP block to bitblock.
2857  \param dest - bitblock buffer pointer.
2858  \param pcurr - GAP buffer pointer.
2859 
2860  @ingroup gapfunc
2861 */
2862 template<typename T>
2863 void gap_and_to_bitset(unsigned* dest, const T* pcurr)
2864 {
2865  BM_ASSERT(dest && pcurr);
2866 
2867  const T* pend = pcurr + (*pcurr >> 3);
2868  if (!(*pcurr & 1) ) // Starts with 0
2869  {
2870  bm::sub_bit_block(dest, 0, pcurr[1] + 1); // (not AND) - SUB [0] gaps
2871  pcurr += 3;
2872  }
2873  else
2874  pcurr += 2;
2875 
2876  unsigned bc, pos;
2877  for (; pcurr <= pend; ) // now we are in GAP "0" again
2878  {
2879  BM_ASSERT(*pcurr > *(pcurr-1));
2880  pos = 1u + pcurr[-1];
2881  bc = *pcurr - pcurr[-1];
2882  pcurr += 2;
2883  bm::sub_bit_block(dest, pos, bc);
2884  }
2885 }
2886 
2887 
2888 /*!
2889  \brief ANDs GAP block to bitblock with digest assist
2890  \param dest - bitblock buffer pointer.
2891  \param pcurr - GAP buffer pointer.
2892  \param digest0 - digest of 0 strides for the destination
2893 
2894  @ingroup gapfunc
2895 */
2896 template<typename T>
2897 void gap_and_to_bitset(unsigned* dest, const T* pcurr, bm::id64_t digest0)
2898 {
2899  BM_ASSERT(dest && pcurr);
2900  if (!digest0)
2901  return;
2902 
2903  const T* pend = pcurr + (*pcurr >> 3);
2904  if (!(*pcurr & 1) ) // Starts with 0
2905  {
2906  bool all_zero = bm::check_zero_digest(digest0, 0, pcurr[1]+1);
2907  if (!all_zero)
2908  bm::sub_bit_block(dest, 0, pcurr[1] + 1); // (not AND) - SUB [0] gaps
2909  pcurr += 3;
2910  }
2911  else
2912  pcurr += 2;
2913 
2914  // wind forward to digest start
2915  {
2916  unsigned tz = bm::count_trailing_zeros_u64(digest0);
2917  unsigned start_pos = tz << set_block_digest_pos_shift;
2918  for (; pcurr <= pend; pcurr += 2) // now we are in GAP "0"
2919  {
2920  if (*pcurr >= start_pos)
2921  break;
2922  }
2923  }
2924 
2925  unsigned lz = bm::count_leading_zeros_u64(digest0);
2926  unsigned stop_pos = (64u - lz) << set_block_digest_pos_shift;
2927 
2928  unsigned bc, pos;
2929  T prev;
2930  for (; pcurr <= pend; pcurr += 2) // now we are in GAP "0" again
2931  {
2932  BM_ASSERT(*pcurr > *(pcurr-1));
2933 
2934  prev = pcurr[-1];
2935  bc = *pcurr - prev;
2936  pos = 1u + prev;
2937 
2938  bool all_zero = bm::check_zero_digest(digest0, prev, *pcurr);
2939  if (!all_zero)
2940  bm::sub_bit_block(dest, pos, bc);
2941 
2942  if (pos > stop_pos) // early break is possible based on digest tail
2943  break;
2944 
2945  } // for
2946 }
2947 
2948 
2949 /*!
2950  \brief Compute bitcount of bit block AND masked by GAP block
2951  \param block - bitblock buffer pointer
2952  \param pcurr - GAP buffer pointer
2953  \return bitcount - cardinality of the AND product
2954 
2955  @ingroup gapfunc
2956 */
2957 template<typename T>
2958 bm::id_t gap_bitset_and_count(const unsigned* block, const T* pcurr)
2959 {
2960  BM_ASSERT(block);
2961  const T* pend = pcurr + (*pcurr >> 3);
2962  bm::id_t count = 0;
2963  if (*pcurr & 1) // Starts with 1
2964  {
2965  count += bm::bit_block_calc_count_range(block, 0, pcurr[1]);
2966  ++pcurr;
2967  }
2968  for (pcurr +=2 ;pcurr <= pend; pcurr += 2)
2969  {
2970  count += bm::bit_block_calc_count_range(block, pcurr[-1]+1, *pcurr);
2971  }
2972  return count;
2973 }
2974 
2975 
2976 /*!
2977  \brief Bitcount test of bit block AND masked by GAP block.
2978  \param block - bitblock buffer pointer
2979  \param pcurr - GAP buffer pointer
2980  \return non-zero value if AND produces any result
2981 
2982  @ingroup gapfunc
2983 */
2984 template<typename T>
2985 bm::id_t gap_bitset_and_any(const unsigned* block, const T* pcurr)
2986 {
2987  BM_ASSERT(block);
2988 
2989  const T* pend = pcurr + (*pcurr >> 3);
2990  bm::id_t count = 0;
2991  if (*pcurr & 1) // Starts with 1
2992  {
2993  count = bm::bit_block_any_range(block, 0, pcurr[1]);
2994  ++pcurr;
2995  }
2996  for (pcurr +=2 ;!count && pcurr <= pend; pcurr += 2)
2997  {
2998  count = bm::bit_block_any_range(block, pcurr[-1]+1, *pcurr);
2999  }
3000  return count;
3001 }
3002 
3003 
3004 
3005 /*!
3006  \brief Compute bitcount of bit block SUB masked by GAP block.
3007  \param block - bitblock buffer pointer.
3008  \param buf - GAP buffer pointer.
3009  \return bit-count result of AND NOT operation
3010 
3011  @ingroup gapfunc
3012 */
3013 template<typename T>
3014 bm::id_t gap_bitset_sub_count(const unsigned* block, const T* buf)
3015 {
3016  BM_ASSERT(block);
3017 
3018  const T* pcurr = buf;
3019  const T* pend = pcurr + (*pcurr >> 3);
3020  ++pcurr;
3021 
3022  bm::id_t count = 0;
3023 
3024  if (!(*buf & 1)) // Starts with 0
3025  {
3026  count += bit_block_calc_count_range(block, 0, *pcurr);
3027  ++pcurr;
3028  }
3029  ++pcurr; // now we are in GAP "0" again
3030 
3031  for (;pcurr <= pend; pcurr+=2)
3032  {
3033  count += bm::bit_block_calc_count_range(block, *(pcurr-1)+1, *pcurr);
3034  }
3035  return count;
3036 }
3037 
3038 
3039 /*!
3040  \brief Compute bitcount test of bit block SUB masked by GAP block
3041  \param block - bitblock buffer pointer
3042  \param buf - GAP buffer pointer
3043  \return non-zero value if AND NOT produces any 1 bits
3044 
3045  @ingroup gapfunc
3046 */
3047 template<typename T>
3048 bm::id_t gap_bitset_sub_any(const unsigned* block, const T* buf)
3049 {
3050  BM_ASSERT(block);
3051 
3052  const T* pcurr = buf;
3053  const T* pend = pcurr + (*pcurr >> 3);
3054  ++pcurr;
3055 
3056  bm::id_t count = 0;
3057 
3058  if (!(*buf & 1)) // Starts with 0
3059  {
3060  count += bit_block_any_range(block, 0, *pcurr);
3061  if (count)
3062  return count;
3063  ++pcurr;
3064  }
3065  ++pcurr; // now we are in GAP "0" again
3066 
3067  for (; !count && pcurr <= pend; pcurr+=2)
3068  {
3069  count += bm::bit_block_any_range(block, *(pcurr-1)+1, *pcurr);
3070  }
3071  return count;
3072 }
3073 
3074 
3075 
3076 /*!
3077  \brief Compute bitcount of bit block XOR masked by GAP block
3078  \param block - bitblock buffer pointer
3079  \param buf - GAP buffer pointer
3080  \return bit count value of XOR operation
3081 
3082  @ingroup gapfunc
3083 */
3084 template<typename T>
3085 bm::id_t gap_bitset_xor_count(const unsigned* block, const T* buf)
3086 {
3087  BM_ASSERT(block);
3088 
3089  const T* pcurr = buf;
3090  const T* pend = pcurr + (*pcurr >> 3);
3091  ++pcurr;
3092 
3093  unsigned bitval = *buf & 1;
3094 
3095  bm::id_t count = bm::bit_block_calc_count_range(block, 0, *pcurr);
3096  if (bitval)
3097  {
3098  count = *pcurr + 1 - count;
3099  }
3100 
3101  for (bitval^=1, ++pcurr; pcurr <= pend; bitval^=1, ++pcurr)
3102  {
3103  T prev = (T)(*(pcurr-1)+1);
3104  bm::id_t c = bit_block_calc_count_range(block, prev, *pcurr);
3105 
3106  if (bitval) // 1 gap; means Result = Total_Bits - BitCount;
3107  c = (*pcurr - prev + 1) - c;
3108  count += c;
3109  }
3110  return count;
3111 }
3112 
3113 /*!
3114  \brief Compute bitcount test of bit block XOR masked by GAP block.
3115  \param block - bitblock buffer pointer
3116  \param buf - GAP buffer pointer
3117  \return non-zero value if XOR returns anything
3118 
3119  @ingroup gapfunc
3120 */
3121 template<typename T>
3122 bm::id_t gap_bitset_xor_any(const unsigned* block, const T* buf)
3123 {
3124  BM_ASSERT(block);
3125 
3126  const T* pcurr = buf;
3127  const T* pend = pcurr + (*pcurr >> 3);
3128  ++pcurr;
3129 
3130  unsigned bitval = *buf & 1;
3131 
3132  bm::id_t count = bit_block_any_range(block, 0, *pcurr);
3133  if (bitval)
3134  count = *pcurr + 1 - count;
3135 
3136  for (bitval^=1, ++pcurr; !count && pcurr <= pend; bitval^=1, ++pcurr)
3137  {
3138  T prev = (T)(*(pcurr-1)+1);
3139  bm::id_t c = bit_block_any_range(block, prev, *pcurr);
3140 
3141  if (bitval) // 1 gap; means Result = Total_Bits - BitCount;
3142  c = (*pcurr - prev + 1) - c;
3143  count += c;
3144  }
3145  return count;
3146 }
3147 
3148 
3149 
3150 /*!
3151  \brief Compute bitcount of bit block OR masked by GAP block.
3152  \param block - bitblock buffer pointer.
3153  \param buf - GAP buffer pointer.
3154  \return bit count of OR
3155 
3156  @ingroup gapfunc
3157 */
3158 template<typename T>
3159 bm::id_t gap_bitset_or_count(const unsigned* block, const T* buf)
3160 {
3161  BM_ASSERT(block);
3162 
3163  const T* pcurr = buf;
3164  const T* pend = pcurr + (*pcurr >> 3);
3165  ++pcurr;
3166 
3167  unsigned bitval = *buf & 1;
3168 
3169  bm::id_t count = bitval ? *pcurr + 1
3170  : bm::bit_block_calc_count_range(block, 0, *pcurr);
3171  for (bitval^=1, ++pcurr; pcurr <= pend; bitval^=1, ++pcurr)
3172  {
3173  T prev = (T)(*(pcurr-1)+1);
3174  bm::id_t c =
3175  bitval ? (*pcurr - prev + 1)
3176  : bm::bit_block_calc_count_range(block, prev, *pcurr);
3177  count += c;
3178  }
3179  return count;
3180 }
3181 
3182 /*!
3183  \brief Compute bitcount test of bit block OR masked by GAP block
3184  \param block - bitblock buffer pointer
3185  \param buf - GAP buffer pointer
3186  \return non zero value if union (OR) returns anything
3187 
3188  @ingroup gapfunc
3189 */
3190 template<typename T>
3191 bm::id_t gap_bitset_or_any(const unsigned* block, const T* buf)
3192 {
3193  bool b = !bm::gap_is_all_zero(buf) ||
3194  !bm::bit_is_all_zero(block);
3195  return b;
3196 }
3197 
3198 
3199 
3200 /*!
3201  \brief Bitblock memset operation.
3202 
3203  \param dst - destination block.
3204  \param value - value to set.
3205 
3206  @ingroup bitfunc
3207 */
3208 inline
3210 {
3211 #ifdef BMVECTOPT
3212  VECT_SET_BLOCK(dst, value);
3213 #else
3214  ::memset(dst, int(value), bm::set_block_size * sizeof(bm::word_t));
3215 #endif
3216 }
3217 
3218 
3219 /*!
3220  \brief GAP block to bitblock conversion.
3221  \param dest - bitblock buffer pointer.
3222  \param buf - GAP buffer pointer.
3223 
3224  @ingroup gapfunc
3225 */
3226 template<typename T>
3227 void gap_convert_to_bitset(unsigned* dest, const T* buf)
3228 {
3229  bm::bit_block_set(dest, 0);
3230  bm::gap_add_to_bitset(dest, buf);
3231 }
3232 
3233 
3234 /*!
3235  \brief GAP block to bitblock conversion.
3236  \param dest - bitblock buffer pointer.
3237  \param buf - GAP buffer pointer.
3238  \param dest_len - length/size of the destination buffer.
3239 
3240  @ingroup gapfunc
3241 */
3242 template<typename T>
3243 void gap_convert_to_bitset(unsigned* dest, const T* buf, unsigned dest_len)
3244 {
3245  ::memset(dest, 0, dest_len * sizeof(unsigned));
3246  bm::gap_add_to_bitset(dest, buf);
3247 }
3248 
3249 
3250 /*!
3251  \brief Smart GAP block to bitblock conversion.
3252 
3253  Checks if GAP block is ALL-ZERO or ALL-ON. In those cases returns
3254  pointer on special static bitblocks.
3255 
3256  \param dest - bitblock buffer pointer.
3257  \param buf - GAP buffer pointer.
3258  \param set_max - max possible bitset length
3259 
3260  @ingroup gapfunc
3261 */
3262 template<typename T>
3263 unsigned* gap_convert_to_bitset_smart(unsigned* dest,
3264  const T* buf,
3265  id_t set_max)
3266 {
3267  if (buf[1] == set_max - 1)
3268  return (buf[0] & 1) ? FULL_BLOCK_REAL_ADDR : 0;
3269 
3270  bm::gap_convert_to_bitset(dest, buf);
3271  return dest;
3272 }
3273 
3274 
3275 /*!
3276  \brief Calculates sum of all words in GAP block. (For debugging purposes)
3277  \note For debugging and testing ONLY.
3278  \param buf - GAP buffer pointer.
3279  \return Sum of all words.
3280 
3281  @ingroup gapfunc
3282  @internal
3283 */
3284 template<typename T> unsigned gap_control_sum(const T* buf)
3285 {
3286  unsigned end = *buf >> 3;
3287 
3288  const T* pcurr = buf;
3289  const T* pend = pcurr + (*pcurr >> 3);
3290  ++pcurr;
3291 
3292  if (*buf & 1) // Starts with 1
3293  {
3294  ++pcurr;
3295  }
3296  ++pcurr; // now we are in GAP "1" again
3297 
3298  while (pcurr <= pend)
3299  {
3300  BM_ASSERT(*pcurr > *(pcurr-1));
3301  pcurr += 2;
3302  }
3303  return buf[end];
3304 }
3305 
3306 
3307 /*!
3308  \brief Sets all bits to 0 or 1 (GAP)
3309  \param buf - GAP buffer pointer.
3310  \param set_max - max possible bitset length
3311  \param value - value to set
3312 
3313  @ingroup gapfunc
3314 */
3315 template<class T> void gap_set_all(T* buf,
3316  unsigned set_max,
3317  unsigned value)
3318 {
3319  BM_ASSERT(value == 0 || value == 1);
3320  *buf = (T)((*buf & 6u) + (1u << 3) + value);
3321  *(++buf) = (T)(set_max - 1);
3322 }
3323 
3324 
3325 /*!
3326  \brief Init gap block so it has block in it (can be whole block)
3327  \param buf - GAP buffer pointer.
3328  \param from - one block start
3329  \param to - one block end
3330  \param value - (block value)1 or 0
3331 
3332  @ingroup gapfunc
3333 */
3334 template<class T>
3336  T from,
3337  T to,
3338  T value)
3339  //unsigned set_max)
3340 {
3341  BM_ASSERT(value == 0 || value == 1);
3342  const unsigned set_max = bm::bits_in_block;
3343 
3344  unsigned gap_len;
3345  if (from == 0)
3346  {
3347  if (to == set_max - 1)
3348  {
3349  bm::gap_set_all(buf, set_max, value);
3350  }
3351  else
3352  {
3353  gap_len = 2;
3354  buf[1] = (T)to;
3355  buf[2] = (T)(set_max - 1);
3356  buf[0] = (T)((*buf & 6u) + (gap_len << 3) + value);
3357  }
3358  return;
3359  }
3360  // from != 0
3361 
3362  value = !value;
3363  if (to == set_max - 1)
3364  {
3365  gap_len = 2;
3366  buf[1] = (T)(from - 1);
3367  buf[2] = (T)(set_max - 1);
3368  }
3369  else
3370  {
3371  gap_len = 3;
3372  buf[1] = (T) (from - 1);
3373  buf[2] = (T) to;
3374  buf[3] = (T)(set_max - 1);
3375  }
3376  buf[0] = (T)((*buf & 6u) + (gap_len << 3) + value);
3377 }
3378 
3379 
3380 /*!
3381  \brief Inverts all bits in the GAP buffer.
3382  \param buf - GAP buffer pointer.
3383 
3384  @ingroup gapfunc
3385 */
3386 template<typename T> void gap_invert(T* buf)
3387 {
3388  *buf ^= 1;
3389 }
3390 
3391 
3392 #ifdef __GNUG__
3393 #pragma GCC diagnostic push
3394 #pragma GCC diagnostic ignored "-Wconversion"
3395 #endif
3396 
3397 /*!
3398  \brief Sets GAP block capacity level.
3399  \param buf - GAP buffer pointer.
3400  \param level new GAP block capacity level.
3401 
3402  @ingroup gapfunc
3403 */
3404 template<typename T>
3405 void set_gap_level(T* buf, int level)
3406 {
3407  BM_ASSERT(level >= 0);
3408  BM_ASSERT(unsigned(level) < bm::gap_levels);
3409 
3410  *buf = (T)(((level & 3) << 1) | (*buf & 1) | (*buf & ~7));
3411 }
3412 #ifdef __GNUG__
3413 #pragma GCC diagnostic pop
3414 #endif
3415 
3416 
3417 
3418 /*!
3419  \brief Calculates GAP block capacity level.
3420  \param len - GAP buffer length.
3421  \param glevel_len - GAP lengths table
3422  \return GAP block capacity level.
3423  -1 if block does not fit any level.
3424  @ingroup gapfunc
3425 */
3426 template<typename T>
3427 inline int gap_calc_level(unsigned len, const T* glevel_len)
3428 {
3429  if (len <= unsigned(glevel_len[0]-4)) return 0;
3430  if (len <= unsigned(glevel_len[1]-4)) return 1;
3431  if (len <= unsigned(glevel_len[2]-4)) return 2;
3432  if (len <= unsigned(glevel_len[3]-4)) return 3;
3433 
3434  BM_ASSERT(bm::gap_levels == 4);
3435  return -1;
3436 }
3437 
3438 /*! @brief Returns number of free elements in GAP block array.
3439  Difference between GAP block capacity on this level and actual GAP length.
3440 
3441  @param buf - GAP buffer pointer
3442  @param glevel_len - GAP lengths table
3443 
3444  @return Number of free GAP elements
3445  @ingroup gapfunc
3446 */
3447 template<typename T>
3448 inline unsigned gap_free_elements(const T* buf, const T* glevel_len)
3449 {
3450  unsigned len = gap_length(buf);
3451  unsigned capacity = gap_capacity(buf, glevel_len);
3452  return capacity - len;
3453 }
3454 
3455 
3456 /*!
3457  \brief Lexicographical comparison of BIT buffers.
3458  \param buf1 - First buffer pointer.
3459  \param buf2 - Second buffer pointer.
3460  \param len - Buffer length in elements (T).
3461  \return <0 - less, =0 - equal, >0 - greater.
3462 
3463  @ingroup bitfunc
3464 */
3465 template<typename T>
3466 int bitcmp(const T* buf1, const T* buf2, unsigned len)
3467 {
3468  BM_ASSERT(len);
3469  const T* pend1 = buf1 + len;
3470  do
3471  {
3472  T w1 = *buf1++;
3473  T w2 = *buf2++;
3474  T diff = w1 ^ w2;
3475 
3476  if (diff)
3477  {
3478  return (w1 & diff & -diff) ? 1 : -1;
3479  }
3480 
3481  } while (buf1 < pend1);
3482 
3483  return 0;
3484 }
3485 
3486 
3487 
3488 
3489 /*!
3490  \brief Converts bit block to GAP.
3491  \param dest - Destinatio GAP buffer.
3492  \param block - Source bitblock buffer.
3493  \param dest_len - length of the dest. buffer.
3494  \return New length of GAP block or 0 if conversion failed
3495  (insufficicent space).
3496 
3497  @ingroup gapfunc
3498 */
3499 inline
3501  const unsigned* BMRESTRICT block,
3502  unsigned dest_len)
3503 {
3504  const unsigned* BMRESTRICT block_end = block + bm::set_block_size;
3505  gap_word_t* BMRESTRICT pcurr = dest;
3506  gap_word_t* BMRESTRICT end = dest + dest_len;
3507 
3508  unsigned bitval = (*block) & 1u;
3509  *pcurr++ = bm::gap_word_t(bitval);
3510  *pcurr = 0;
3511  unsigned bit_idx = 0;
3512 
3513  do
3514  {
3515  unsigned val = *block;
3516  while (!val || val == ~0u)
3517  {
3518  if (bitval != unsigned(bool(val)))
3519  {
3520  *pcurr++ = (gap_word_t)(bit_idx-1);
3521  BM_ASSERT((pcurr-1) == (dest+1) || *(pcurr-1) > *(pcurr-2));
3522  if (pcurr == end)
3523  return 0; // OUT of target memory
3524  bitval ^= 1u;
3525  }
3526  bit_idx += unsigned(sizeof(*block) * 8);
3527  if (++block >= block_end)
3528  goto complete;
3529  val = *block;
3530  } // while
3531 
3532  // process "0100011" word
3533  //
3534  unsigned bits_consumed = 0;
3535  do
3536  {
3537  unsigned tz = 1u;
3538  if (bitval != (val & 1u))
3539  {
3540  *pcurr++ = (gap_word_t)(bit_idx-1);
3541  BM_ASSERT((pcurr-1) == (dest+1) || *(pcurr-1) > *(pcurr-2));
3542  if (pcurr == end)
3543  return 0; // OUT of target memory
3544  bitval ^= 1u;
3545  }
3546  else // match, find the next idx
3547  {
3548  tz = bm::bit_scan_forward32(bitval ? ~val : val);
3549  // alternative:
3550  // tz = bm::count_trailing_zeros(bitval ? ~val : val);
3551  }
3552 
3553  bits_consumed += tz;
3554  bit_idx += tz;
3555  val >>= tz;
3556 
3557  if (!val)
3558  {
3559  if (bits_consumed < 32u)
3560  {
3561  *pcurr++ = (gap_word_t)(bit_idx-1);
3562  BM_ASSERT((pcurr-1) == (dest+1) || *(pcurr-1) > *(pcurr-2));
3563  if (pcurr == end)
3564  return 0; // OUT of target memory
3565  bitval ^= 1u;
3566  bit_idx += 32u - bits_consumed;
3567  }
3568  break;
3569  }
3570  } while (1);
3571 
3572  } while(++block < block_end);
3573 
3574 complete:
3575  *pcurr = (gap_word_t)(bit_idx-1);
3576  unsigned len = (unsigned)(pcurr - dest);
3577  *dest = (gap_word_t)((*dest & 7) + (len << 3));
3578  return len;
3579 }
3580 
3581 inline
3583  const unsigned* BMRESTRICT block,
3584  unsigned dest_len)
3585 {
3586 #if defined(VECT_BIT_TO_GAP)
3587  return VECT_BIT_TO_GAP(dest, block, dest_len);
3588 #else
3589  return bm::bit_block_to_gap(dest, block, dest_len);
3590 #endif
3591 }
3592 
3593 
3594 
3595 
3596 
3597 /*!
3598  \brief Iterate gap block as delta-bits with a functor
3599  @ingroup gapfunc
3600 */
3601 template<class T, class F>
3602 void for_each_gap_dbit(const T* buf, F& func)
3603 {
3604  const T* pcurr = buf;
3605  const T* pend = pcurr + (*pcurr >> 3);
3606 
3607  ++pcurr;
3608 
3609  unsigned prev = 0;
3610  unsigned first_inc;
3611 
3612  if (*buf & 1)
3613  {
3614  first_inc = 0;
3615  unsigned to = *pcurr;
3616  for (unsigned i = 0; i <= to; ++i)
3617  {
3618  func(1);
3619  }
3620  prev = to;
3621  ++pcurr;
3622  }
3623  else
3624  {
3625  first_inc = 1;
3626  }
3627  ++pcurr; // set GAP to 1
3628 
3629  while (pcurr <= pend)
3630  {
3631  unsigned from = *(pcurr-1)+1;
3632  unsigned to = *pcurr;
3633  if (first_inc)
3634  {
3635  func(from - prev + first_inc);
3636  first_inc = 0;
3637  }
3638  else
3639  {
3640  func(from - prev);
3641  }
3642 
3643  for (unsigned i = from+1; i <= to; ++i)
3644  {
3645  func(1);
3646  }
3647  prev = to;
3648  pcurr += 2; // jump to the next positive GAP
3649  }
3650 }
3651 
3652 /*!
3653  \brief Convert gap block into array of ints corresponding to 1 bits
3654  @ingroup gapfunc
3655 */
3656 template<typename D, typename T>
3658  const T* BMRESTRICT buf,
3659  unsigned dest_len,
3660  bool invert = false)
3661 {
3662  BMREGISTER const T* BMRESTRICT pcurr = buf;
3663  BMREGISTER const T* pend = pcurr + (*pcurr >> 3);
3664 
3665  D* BMRESTRICT dest_curr = dest;
3666  ++pcurr;
3667 
3668  int bitval = (*buf) & 1;
3669  if (invert)
3670  bitval = !bitval; // invert the GAP buffer
3671 
3672  if (bitval)
3673  {
3674  if (unsigned(*pcurr + 1) >= dest_len)
3675  return 0; // insufficient space
3676  dest_len -= *pcurr;
3677  T to = *pcurr;
3678  for (T i = 0; ;++i)
3679  {
3680  *dest_curr++ = i;
3681  if (i == to) break;
3682  }
3683  ++pcurr;
3684  }
3685  ++pcurr; // set GAP to 1
3686 
3687  while (pcurr <= pend)
3688  {
3689  unsigned pending = *pcurr - *(pcurr-1);
3690  if (pending >= dest_len)
3691  return 0;
3692  dest_len -= pending;
3693  T from = (T)(*(pcurr-1)+1);
3694  T to = *pcurr;
3695  for (T i = from; ;++i)
3696  {
3697  *dest_curr++ = i;
3698  if (i == to) break;
3699  }
3700  pcurr += 2; // jump to the next positive GAP
3701  }
3702  return (D) (dest_curr - dest);
3703 }
3704 
3705 
3706 
3707 /*!
3708  @brief Bitcount for bit block
3709 
3710  Function calculates number of 1 bits in the given array of words.
3711  Make sure the addresses are aligned.
3712 
3713  @ingroup bitfunc
3714 */
3715 inline
3717 {
3718  const bm::word_t* block_end = block + bm::set_block_size;
3719  bm::id_t count = 0;
3720 
3721 #ifdef BMVECTOPT
3722  count = VECT_BITCOUNT(block, block_end);
3723 #else
3724 #ifdef BM64OPT
3725  // 64-bit optimized algorithm. No sparse vect opt.
3726  // instead it uses 4-way parallel pipelined version
3727 
3728  const bm::id64_t* b1 = (bm::id64_t*) block;
3729  const bm::id64_t* b2 = (bm::id64_t*) block_end;
3730  do
3731  {
3732  count += bitcount64_4way(b1[0], b1[1], b1[2], b1[3]);
3733  b1 += 4;
3734  } while (b1 < b2);
3735 #else
3736  // For 32 bit code the fastest method is
3737  // to use bitcount table for each byte in the block.
3738  // As optimization for sparse bitsets used bits accumulator
3739  // to collect ON bits using bitwise OR.
3740  bm::word_t acc = *block++;
3741  do
3742  {
3743  bm::word_t in = *block++;
3744  bm::word_t acc_prev = acc;
3745  acc |= in;
3746  if (acc_prev &= in) // accumulator miss: counting bits
3747  {
3748  BM_INCWORD_BITCOUNT(count, acc);
3749  acc = acc_prev;
3750  }
3751  } while (block < block_end);
3752 
3753  BM_INCWORD_BITCOUNT(count, acc); // count-in remaining accumulator
3754 
3755 #endif
3756 #endif
3757  return count;
3758 }
3759 
3760 /*!
3761  @brief Bitcount for bit block
3762 
3763  Function calculates number of 1 bits in the given array of words.
3764  uses digest to understand zero areas
3765 
3766  @ingroup bitfunc
3767 */
3768 inline
3769 bm::id_t bit_block_count(const bm::word_t* const block, bm::id64_t digest)
3770 {
3771  bm::id_t count = 0;
3772  bm::id64_t d = digest;
3773  while (d)
3774  {
3775  bm::id64_t t = bm::bmi_blsi_u64(d); // d & -d;
3776 
3777  unsigned wave = bm::word_bitcount64(t - 1);
3778  unsigned off = wave * bm::set_block_digest_wave_size;
3779 
3780  const bm::bit_block_t::bunion_t* BMRESTRICT src_u =
3781  (const bm::bit_block_t::bunion_t*)(&block[off]);
3782  unsigned j = 0;
3783  do
3784  {
3785  count += bm::word_bitcount64(src_u->w64[j+0]) +
3786  bm::word_bitcount64(src_u->w64[j+1]) +
3787  bm::word_bitcount64(src_u->w64[j+2]) +
3788  bm::word_bitcount64(src_u->w64[j+3]);
3789  j += 4;
3790  } while (j < bm::set_block_digest_wave_size/2);
3791 
3792  d = bm::bmi_bslr_u64(d); // d &= d - 1;
3793  } // while
3794  return count;
3795 }
3796 
3797 
3798 
3799 /*!
3800  @brief Bitcount for bit string
3801 
3802  Added for back-compatibility purposes, not block aligned,
3803  not SIMD accelerated
3804 
3805  @ingroup bitfunc
3806 */
3807 inline
3809  const bm::word_t* block_end)
3810 {
3811  bm::id_t count = 0;
3812  bm::word_t acc = *block++;
3813  do
3814  {
3815  bm::word_t in = *block++;
3816  bm::word_t acc_prev = acc;
3817  acc |= in;
3818  if (acc_prev &= in) // accumulator miss: counting bits
3819  {
3820  BM_INCWORD_BITCOUNT(count, acc);
3821  acc = acc_prev;
3822  }
3823  } while (block < block_end);
3824 
3825  BM_INCWORD_BITCOUNT(count, acc); // count-in remaining accumulator
3826  return count;
3827 }
3828 
3829 
3830 
3831 /*!
3832  Function calculates number of times when bit value changed
3833  (1-0 or 0-1).
3834 
3835  For 001 result is 2
3836  010 - 3
3837  011 - 2
3838  111 - 1
3839 
3840  @ingroup bitfunc
3841 */
3842 inline
3844 {
3845  unsigned count = 1;
3846  w ^= (w >> 1);
3847 
3848  count += bm::word_bitcount(w);
3849  count -= (w >> ((sizeof(w) * 8) - 1));
3850  return count;
3851 }
3852 
3853 
3854 /*!
3855  Function calculates number of times when bit value changed
3856  @internal
3857 */
3858 inline
3859 unsigned bit_block_change32(const bm::word_t* block)
3860 {
3861  unsigned gap_count = 1;
3862 
3863  bm::word_t w, w0, w_prev, w_l;
3864  w = w0 = *block;
3865 
3866  const int w_shift = int(sizeof(w) * 8 - 1);
3867  w ^= (w >> 1);
3868  BM_INCWORD_BITCOUNT(gap_count, w);
3869  gap_count -= (w_prev = (w0 >> w_shift)); // negative value correction
3870 
3871  const bm::word_t* block_end = block + bm::set_block_size;
3872  for (++block ;block < block_end; ++block)
3873  {
3874  w = w0 = *block;
3875  ++gap_count;
3876  if (!w)
3877  {
3878  gap_count -= !w_prev;
3879  w_prev = 0;
3880  }
3881  else
3882  {
3883  w ^= (w >> 1);
3884  BM_INCWORD_BITCOUNT(gap_count, w);
3885 
3886  w_l = w0 & 1;
3887  gap_count -= (w0 >> w_shift); // negative value correction
3888  gap_count -= !(w_prev ^ w_l); // word border correction
3889 
3890  w_prev = (w0 >> w_shift);
3891  }
3892  } // for
3893  return gap_count;
3894 }
3895 
3896 
3897 /*!
3898  Function calculates number of times when bit value changed
3899  (1-0 or 0-1) in the bit block.
3900 
3901  @param block - bit-block start pointer
3902  @return number of 1-0, 0-1 transitions
3903 
3904  @ingroup bitfunc
3905 */
3906 inline
3907 unsigned bit_block_calc_change(const bm::word_t* block)
3908 {
3909 #if defined(VECT_BLOCK_CHANGE)
3910  return VECT_BLOCK_CHANGE(block);
3911 #else
3912  return bm::bit_block_change32(block);
3913 #endif
3914 }
3915 
3916 
3917 /*!
3918  Function calculates number of 1 bits in the given array of words in
3919  the range between left anf right bits (borders included)
3920  Make sure the addr is aligned.
3921 
3922  @ingroup bitfunc
3923 */
3924 inline
3926  bm::word_t left,
3927  bm::word_t right)
3928 {
3929  BM_ASSERT(left <= right);
3930  BM_ASSERT(right <= bm::gap_max_bits-1);
3931 
3932  unsigned nword, nbit, bitcount, count;
3933  nbit = left & bm::set_word_mask;
3934  const bm::word_t* word =
3935  block + (nword = unsigned(left >> bm::set_word_shift));
3936  if (left == right) // special case (only 1 bit to check)
3937  {
3938  return (*word >> nbit) & 1u;
3939  }
3940 
3941  count = 0;
3942  bitcount = right - left + 1u;
3943  if (nbit) // starting position is not aligned
3944  {
3945  unsigned right_margin = nbit + right - left;
3946  if (right_margin < 32)
3947  {
3948  unsigned mask =
3950  block_set_table<true>::_left[right_margin];
3951  return bm::word_bitcount(*word & mask);
3952  }
3953  count = bm::word_bitcount(*word & block_set_table<true>::_right[nbit]);
3954  bitcount -= 32 - nbit;
3955  ++word;
3956  }
3957 
3958  // now when we are word aligned, we can count bits the usual way
3959  //
3960  #if defined(BM64_SSE4) || defined(BM64_AVX2) || defined(BM64_AVX512)
3961  for ( ;bitcount >= 64; bitcount-=64)
3962  {
3963  bm::id64_t w64 = *((bm::id64_t*)word); // x86 unaligned(!) read
3964  count += unsigned(_mm_popcnt_u64(w64));
3965  word += 2;
3966  }
3967  if (bitcount >= 32)
3968  {
3969  count += bm::word_bitcount(*word++);
3970  bitcount-=32;
3971  }
3972  #else
3973  for ( ;bitcount >= 32; bitcount-=32, ++word)
3974  count += bm::word_bitcount(*word);
3975  #endif
3976  BM_ASSERT(bitcount < 32);
3977 
3978  if (bitcount) // we have a tail to count
3979  {
3980  count += bm::word_bitcount(
3981  *word & block_set_table<true>::_left[bitcount-1]);
3982  }
3983  return count;
3984 }
3985 
3986 /*!
3987  Function calculates number of 1 bits in the given array of words in
3988  the range between 0 anf right bits (borders included)
3989  Make sure the addr is aligned.
3990 
3991  @ingroup bitfunc
3992 */
3993 inline
3995  bm::word_t right)
3996 {
3997  BM_ASSERT(block);
3998  if (!right) // special case, first bit check
3999  return *block & 1u;
4000  bm::id_t count = 0;
4001 
4002  unsigned bitcount = right + 1;
4003 
4004  // AVX2 or 64-bit loop unroll
4005  #if defined(BMAVX2OPT) || defined(BMAVX512OPT)
4006  BM_AVX2_POPCNT_PROLOG
4007 
4008  __m256i cnt = _mm256_setzero_si256();
4009  bm::id64_t* cnt64;
4010 
4011  for ( ;bitcount >= 256; bitcount -= 256)
4012  {
4013  const __m256i* src = (__m256i*)block;
4014  __m256i xmm256 = _mm256_load_si256(src);
4015  BM_AVX2_BIT_COUNT(bc, xmm256);
4016  cnt = _mm256_add_epi64(cnt, bc);
4017 
4018  block += 8;
4019  }
4020  cnt64 = (bm::id64_t*)&cnt;
4021  count += (unsigned)(cnt64[0] + cnt64[1] + cnt64[2] + cnt64[3]);
4022  #endif
4023 
4024  for ( ;bitcount >= 64; bitcount -= 64)
4025  {
4026  bm::id64_t* p = (bm::id64_t*)block;
4027  count += bm::word_bitcount64(*p);
4028  block += 2;
4029  }
4030  if (bitcount >= 32)
4031  {
4032  count += bm::word_bitcount(*block++);
4033  bitcount-=32;
4034  }
4035 
4036  if (bitcount) // we have a tail to count
4037  {
4038  count +=
4039  bm::word_bitcount(*block & block_set_table<true>::_left[bitcount-1]);
4040  }
4041  return count;
4042 }
4043 
4044 
4045 
4046 /*!
4047  Cyclic rotation of bit-block left by 1 bit
4048  @ingroup bitfunc
4049 */
4050 inline
4052 {
4053  bm::word_t co_flag = (block[0] >> 31) & 1; // carry over bit
4054  for (unsigned i = 0; i < bm::set_block_size-1; ++i)
4055  {
4056  block[i] = (block[i] << 1) | (block[i + 1] >> 31);
4057  }
4058  block[set_block_size - 1] = (block[set_block_size - 1] << 1) | co_flag;
4059 }
4060 
4061 /*!
4062  @brief Unrolled cyclic rotation of bit-block left by 1 bit
4063  @param block - bit-block pointer
4064  @ingroup bitfunc
4065 */
4066 inline
4068 {
4069  bm::word_t co_flag = (block[0] >> 31) & 1; // carry over bit
4070  const unsigned unroll_factor = 4;
4071  bm::word_t w0, w1, w2, w3;
4072 
4073  unsigned i;
4074  for (i = 0; i < bm::set_block_size - unroll_factor; i += unroll_factor)
4075  {
4076  w0 = block[i + 1] >> 31;
4077  w1 = block[i + 2] >> 31;
4078  w2 = block[i + 3] >> 31;
4079  w3 = block[i + 4] >> 31;
4080 
4081  block[0 + i] = (block[0 + i] << 1) | w0;
4082  block[1 + i] = (block[1 + i] << 1) | w1;
4083  block[2 + i] = (block[2 + i] << 1) | w2;
4084  block[3 + i] = (block[3 + i] << 1) | w3;
4085  }
4086  block[i] = (block[i] << 1) | (block[i + 1] >> 31);
4087  block[i + 1] = (block[i + 1] << 1) | (block[i + 2] >> 31);
4088  block[i + 2] = (block[i + 2] << 1) | (block[i + 3] >> 31);
4089  block[set_block_size - 1] = (block[set_block_size - 1] << 1) | co_flag;
4090 }
4091 
4092 /*!
4093  @brief insert bit into position and shift the rest right with carryover
4094 
4095  @param block - bit-block pointer
4096  @param bitpos - bit position to insert
4097  @param value - bit value (0|1) to insert
4098 
4099  @return carry over value
4100  @ingroup bitfunc
4101 */
4102 inline
4103 bm::word_t bit_block_insert(bm::word_t* block, unsigned bitpos, bool value)
4104 {
4105  BM_ASSERT(block);
4106  BM_ASSERT(bitpos < 65536);
4107 
4108  unsigned nbit = unsigned(bitpos & bm::set_block_mask);
4109  unsigned nword = unsigned(nbit >> bm::set_word_shift);
4110  nbit &= bm::set_word_mask;
4111 
4112  bm::word_t co_flag = 0;
4113  if (nbit)
4114  {
4115  unsigned r_mask = bm::block_set_table<true>::_right[nbit];
4116  bm::word_t w = block[nword] & r_mask;
4117  bm::word_t wl = block[nword] & ~r_mask;
4118  co_flag = w >> 31;
4119  w <<= 1u;
4120  block[nword] = w | (unsigned(value) << nbit) | wl;
4121  ++nword;
4122  }
4123  else
4124  {
4125  co_flag = value;
4126  }
4127 
4128  for (unsigned i = nword; i < bm::set_block_size; ++i)
4129  {
4130  bm::word_t w = block[i];
4131  bm::word_t co_flag1 = w >> 31;
4132  w = (w << 1u) | co_flag;
4133  block[i] = w;
4134  co_flag = co_flag1;
4135  } // for i
4136  return co_flag;
4137 }
4138 
4139 /*!
4140  @brief Right bit-shift bitblock by 1 bit (reference)
4141  @param block - bit-block pointer
4142  @param empty_acc - [out] contains 0 if block is empty
4143  @param co_flag - carry over from the previous block
4144 
4145  @return carry over bit (1 or 0)
4146  @ingroup bitfunc
4147 */
4148 inline
4150  bm::word_t* empty_acc, bm::word_t co_flag)
4151 {
4152  BM_ASSERT(block);
4153  BM_ASSERT(empty_acc);
4154 
4155  bm::word_t acc = 0;
4156  for (unsigned i = 0; i < bm::set_block_size; ++i)
4157  {
4158  bm::word_t w = block[i];
4159  bm::word_t co_flag1 = w >> 31;
4160  acc |= w = (w << 1u) | co_flag;
4161  block[i] = w;
4162  co_flag = co_flag1;
4163  }
4164  *empty_acc = acc;
4165  return co_flag;
4166 }
4167 
4168 /*!
4169  @brief Right bit-shift of bit-block by 1 bit (loop unrolled)
4170  @param block - bit-block pointer
4171  @param empty_acc - [out] contains 0 if block is empty
4172  @param co_flag - carry over from the previous block
4173 
4174  @return carry over bit (1 or 0)
4175  @ingroup bitfunc
4176 */
4177 inline
4179  bm::word_t* empty_acc, bm::word_t co_flag)
4180 {
4181  BM_ASSERT(block);
4182  BM_ASSERT(empty_acc);
4183  #if defined(VECT_SHIFT_R1)
4184  return VECT_SHIFT_R1(block, empty_acc, co_flag);
4185  #else
4186  return bm::bit_block_shift_r1(block, empty_acc, co_flag);
4187  #endif
4188 }
4189 
4190 
4191 /*!
4192  @brief Right bit-shift of bit-block by 1 bit (reference) + AND
4193  @param block - bit-block pointer
4194  @param co_flag - carry over from the previous block
4195  @param mask_block - mask bit-block pointer
4196  @param digest - block digest
4197 
4198  @return carry over bit (1 or 0)
4199  @ingroup bitfunc
4200 */
4201 inline
4203  bm::word_t co_flag,
4204  const bm::word_t* BMRESTRICT mask_block,
4205  bm::id64_t* BMRESTRICT digest)
4206 {
4207  BM_ASSERT(block);
4208  BM_ASSERT(mask_block);
4209  BM_ASSERT(digest && *digest);
4210 
4211 
4212  bm::id64_t d = *digest;
4213 
4214  unsigned di = 0;
4215  if (!co_flag)
4216  {
4217  bm::id64_t t = d & -d;
4218  di = bm::word_bitcount64(t - 1); // find start bit-index
4219  }
4220 
4221  for (; di < 64; ++di)
4222  {
4223  const unsigned d_base = di * bm::set_block_digest_wave_size;
4224  bm::id64_t dmask = (1ull << di);
4225  if (d & dmask) // digest stride not empty
4226  {
4227  bm::word_t acc = 0;
4228  for (unsigned i = d_base; i < d_base + bm::set_block_digest_wave_size; ++i)
4229  {
4231 
4232  bm::word_t w = block[i];
4233  bm::word_t co_flag1 = w >> 31;
4234  w = (w << 1u) | co_flag;
4235  acc |= block[i] = w & mask_block[i];
4236  co_flag = co_flag1;
4237  }
4238  if (!acc)
4239  d &= ~dmask; // update digest: clear stride bit
4240  }
4241  else // stride is empty
4242  {
4243  BM_ASSERT(block[d_base + bm::set_block_digest_wave_size -1]==0);
4244  BM_ASSERT(block[d_base]==0);
4245 
4246  if (co_flag) // there is carry-over
4247  {
4248  BM_ASSERT(co_flag == 1);
4249  BM_ASSERT(block[d_base] == 0);
4250 
4251  block[d_base] = co_flag & mask_block[d_base];
4252  if (block[d_base])
4253  d |= dmask; // update d
4254  co_flag = 0;
4255  }
4256  }
4257  } // for di
4258 
4259  *digest = d;
4260  return co_flag;
4261 }
4262 
4263 /*!
4264  @brief Right bit-shift bitblock by 1 bit (reference) + AND
4265  @param block - bit-block pointer
4266  @param co_flag - carry over from the previous block
4267  @param mask_block - mask bit-block pointer
4268  @param digest - block digest
4269 
4270  @return carry over bit (1 or 0)
4271  @ingroup bitfunc
4272 */
4273 inline
4275  bm::word_t co_flag,
4276  const bm::word_t* BMRESTRICT mask_block,
4277  bm::id64_t* BMRESTRICT digest)
4278 {
4279  BM_ASSERT(block);
4280  BM_ASSERT(mask_block);
4281  BM_ASSERT(digest);
4282 
4283  #if defined(VECT_SHIFT_R1_AND)
4284  return VECT_SHIFT_R1_AND(block, co_flag, mask_block, digest);
4285  #else
4286  return bm::bit_block_shift_r1_and(block, co_flag, mask_block, digest);
4287  #endif
4288 }
4289 
4290 
4291 /*!
4292  Function calculates if there is any number of 1 bits
4293  in the given array of words in the range between left anf right bits
4294  (borders included). Make sure the addresses are aligned.
4295 
4296  @ingroup bitfunc
4297 */
4298 inline
4300  bm::word_t left,
4301  bm::word_t right)
4302 {
4303  BM_ASSERT(left <= right);
4304 
4305  unsigned nbit = left; // unsigned(left & bm::set_block_mask);
4306  unsigned nword = unsigned(nbit >> bm::set_word_shift);
4307  nbit &= bm::set_word_mask;
4308 
4309  const bm::word_t* word = block + nword;
4310 
4311  if (left == right) // special case (only 1 bit to check)
4312  {
4313  return (*word >> nbit) & 1;
4314  }
4315  unsigned acc;
4316  unsigned bitcount = right - left + 1;
4317 
4318  if (nbit) // starting position is not aligned
4319  {
4320  unsigned right_margin = nbit + (right - left);
4321  if (right_margin < 32)
4322  {
4323  unsigned mask =
4325  block_set_table<true>::_left[right_margin];
4326  acc = *word & mask;
4327  return acc;
4328  }
4329  else
4330  {
4331  acc = *word & block_set_table<true>::_right[nbit];
4332  if (acc)
4333  return acc;
4334  bitcount -= 32 - nbit;
4335  }
4336  ++word;
4337  }
4338 
4339  // now when we are word aligned, we can check bits the usual way
4340  for ( ;bitcount >= 32; bitcount -= 32)
4341  {
4342  acc = *word++;
4343  if (acc)
4344  return acc;
4345  }
4346 
4347  if (bitcount) // we have a tail to count
4348  {
4349  acc = (*word) & block_set_table<true>::_left[bitcount-1];
4350  if (acc)
4351  return acc;
4352  }
4353 
4354  return 0;
4355 }
4356 
4357 // ----------------------------------------------------------------------
4358 
4359 /*! Function inverts block of bits
4360  @ingroup bitfunc
4361 */
4362 template<typename T> void bit_invert(T* start)
4363 {
4365 #ifdef BMVECTOPT
4366  VECT_INVERT_BLOCK(start);
4367 #else
4368  T* end = (T*)((unsigned*)(start) + bm::set_block_size);
4369  do
4370  {
4371  start[0] = ~start[0];
4372  start[1] = ~start[1];
4373  start[2] = ~start[2];
4374  start[3] = ~start[3];
4375  start+=4;
4376  } while (start < end);
4377 #endif
4378 }
4379 
4380 // ----------------------------------------------------------------------
4381 
4382 /*! @brief Returns "true" if all bits in the block are 1
4383  @ingroup bitfunc
4384 */
4385 inline
4386 bool is_bits_one(const bm::wordop_t* start)
4387 {
4388 #if defined(BMSSE42OPT) || defined(BMAVX2OPT)
4389  return VECT_IS_ONE_BLOCK(start);
4390 #else
4391  const bm::word_t* BMRESTRICT src_end = (bm::word_t*)start + bm::set_block_size;
4392  const bm::wordop_t* end = (const bm::wordop_t*)src_end;
4393  do
4394  {
4395  bm::wordop_t tmp =
4396  start[0] & start[1] & start[2] & start[3];
4397  if (tmp != bm::all_bits_mask)
4398  return false;
4399  start += 4;
4400  } while (start < end);
4401  return true;
4402 #endif
4403 }
4404 
4405 // ----------------------------------------------------------------------
4406 
4407 // GAP blocks manipulation functions:
4408 
4409 /*! \brief GAP and functor */
4410 BMFORCEINLINE unsigned and_op(unsigned v1, unsigned v2)
4411 {
4412  return v1 & v2;
4413 }
4414 
4415 
4416 /*! \brief GAP xor functor */
4417 BMFORCEINLINE unsigned xor_op(unsigned v1, unsigned v2)
4418 {
4419  return v1 ^ v2;
4420 }
4421 
4422 
4423 /*! \brief GAP or functor */
4424 BMFORCEINLINE unsigned or_op(unsigned v1, unsigned v2)
4425 {
4426  return v1 | v2;
4427 }
4428 
4429 /*! \brief GAP or functor */
4430 BMFORCEINLINE unsigned sub_op(unsigned v1, unsigned v2)
4431 {
4432  return v1 & ~v2;
4433 }
4434 
4435 
4436 /*!
4437  \brief GAP AND operation.
4438 
4439  Function performs AND logical operation on gap vectors.
4440  If possible function put the result into vect1 and returns this
4441  pointer. Otherwise result is put into tmp_buf, which should be
4442  twice of the vector size.
4443 
4444  \param vect1 - operand 1
4445  \param vect2 - operand 2
4446  \param tmp_buf - pointer on temporary buffer
4447  \param dsize - out size of the destination
4448  \return Result pointer (tmp_buf OR vect1)
4449 
4450  @ingroup gapfunc
4451 */
4454  const gap_word_t* BMRESTRICT vect2,
4455  gap_word_t* BMRESTRICT tmp_buf,
4456  unsigned& dsize)
4457 {
4458  gap_buff_op(tmp_buf, vect1, 0, vect2, 0, and_op, dsize);
4459  return tmp_buf;
4460 }
4461 
4462 /*!
4463  \brief GAP AND operation test.
4464 
4465  Function performs AND logical operation on gap vectors.
4466  If possible function put the result into vect1 and returns this
4467  pointer. Otherwise result is put into tmp_buf, which should be
4468  twice of the vector size.
4469 
4470  \param vect1 - operand 1
4471  \param vect2 - operand 2
4472  \return non zero value if operation returns any 1 bit
4473 
4474  @ingroup gapfunc
4475 */
4478  const gap_word_t* BMRESTRICT vect2)
4479 {
4480  return gap_buff_any_op(vect1, 0, vect2, 0, and_op);
4481 }
4482 
4483 
4484 /*!
4485  \brief GAP bitcount AND operation test.
4486 
4487  \param vect1 - operand 1
4488  \param vect2 - operand 2
4489  \return bitcount of vect1 AND vect2
4490 
4491  @ingroup gapfunc
4492 */
4494 unsigned gap_count_and(const gap_word_t* BMRESTRICT vect1,
4495  const gap_word_t* BMRESTRICT vect2)
4496 {
4497  return gap_buff_count_op(vect1, vect2, and_op);
4498 }
4499 
4500 
4501 
4502 /*!
4503  \brief GAP XOR operation.
4504 
4505  Function performs XOR logical operation on gap vectors.
4506  If possible function put the result into vect1 and returns this
4507  pointer. Otherwise result is put into tmp_buf, which should be
4508  twice of the vector size.
4509 
4510  \param vect1 - operand 1
4511  \param vect2 - operand 2
4512  \param tmp_buf - pointer on temporary buffer
4513  \param dsize - out destination size
4514  \return Result pointer (tmp_buf)
4515 
4516  @ingroup gapfunc
4517 */
4520  const gap_word_t* BMRESTRICT vect2,
4521  gap_word_t* BMRESTRICT tmp_buf,
4522  unsigned& dsize)
4523 {
4524  gap_buff_op(tmp_buf, vect1, 0, vect2, 0, bm::xor_op, dsize);
4525  return tmp_buf;
4526 }
4527 
4528 
4529 /*!
4530  \brief GAP XOR operation test.
4531 
4532  Function performs AND logical operation on gap vectors.
4533  If possible function put the result into vect1 and returns this
4534  pointer. Otherwise result is put into tmp_buf, which should be
4535  twice of the vector size.
4536 
4537  \param vect1 - operand 1
4538  \param vect2 - operand 2
4539  \return non zero value if operation returns any 1 bit
4540 
4541  @ingroup gapfunc
4542 */
4545  const gap_word_t* BMRESTRICT vect2)
4546 {
4547  return gap_buff_any_op(vect1, 0, vect2, 0, bm::xor_op);
4548 }
4549 
4550 /*!
4551  \brief GAP bitcount XOR operation test.
4552 
4553  \param vect1 - operand 1
4554  \param vect2 - operand 2
4555  \return bitcount of vect1 XOR vect2
4556 
4557  @ingroup gapfunc
4558 */
4560 unsigned gap_count_xor(const gap_word_t* BMRESTRICT vect1,
4561  const gap_word_t* BMRESTRICT vect2)
4562 {
4563  return gap_buff_count_op(vect1, vect2, bm::xor_op);
4564 }
4565 
4566 
4567 /*!
4568  \brief GAP OR operation.
4569 
4570  Function performs OR logical oparation on gap vectors.
4571  If possible function put the result into vect1 and returns this
4572  pointer. Otherwise result is put into tmp_buf, which should be
4573  twice of the vector size.
4574 
4575  \param vect1 - operand 1
4576  \param vect2 - operand 2
4577  \param tmp_buf - pointer on temporary buffer
4578  \param dsize - out destination size
4579 
4580  \return Result pointer (tmp_buf)
4581 
4582  @ingroup gapfunc
4583 */
4584 inline
4586  const gap_word_t* BMRESTRICT vect2,
4587  gap_word_t* BMRESTRICT tmp_buf,
4588  unsigned& dsize)
4589 {
4590  gap_buff_op(tmp_buf, vect1, 1, vect2, 1, bm::and_op, dsize);
4591  gap_invert(tmp_buf);
4592  return tmp_buf;
4593 }
4594 
4595 /*!
4596  \brief GAP bitcount OR operation test.
4597 
4598  \param vect1 - operand 1
4599  \param vect2 - operand 2
4600  \return bitcount of vect1 OR vect2
4601 
4602  @ingroup gapfunc
4603 */
4605 unsigned gap_count_or(const gap_word_t* BMRESTRICT vect1,
4606  const gap_word_t* BMRESTRICT vect2)
4607 {
4608  return gap_buff_count_op(vect1, vect2, bm::or_op);
4609 }
4610 
4611 
4612 
4613 /*!
4614  \brief GAP SUB (AND NOT) operation.
4615 
4616  Function performs SUB logical operation on gap vectors.
4617  If possible function put the result into vect1 and returns this
4618  pointer. Otherwise result is put into tmp_buf, which should be
4619  twice of the vector size.
4620 
4621  \param vect1 - operand 1
4622  \param vect2 - operand 2
4623  \param tmp_buf - pointer on temporary buffer
4624  \param dsize - out destination size
4625 
4626  \return Result pointer (tmp_buf)
4627 
4628  @ingroup gapfunc
4629 */
4631  const gap_word_t* BMRESTRICT vect2,
4632  gap_word_t* BMRESTRICT tmp_buf,
4633  unsigned& dsize)
4634 {
4635  gap_buff_op(tmp_buf, vect1, 0, vect2, 1, and_op, dsize);
4636  return tmp_buf;
4637 }
4638 
4639 
4640 /*!
4641  \brief GAP SUB operation test.
4642 
4643  Function performs AND logical operation on gap vectors.
4644  If possible function put the result into vect1 and returns this
4645  pointer. Otherwise result is put into tmp_buf, which should be
4646  twice of the vector size.
4647 
4648  \param vect1 - operand 1
4649  \param vect2 - operand 2
4650  \return non zero value if operation returns any 1 bit
4651 
4652  @ingroup gapfunc
4653 */
4656  const gap_word_t* BMRESTRICT vect2)
4657 {
4658  return gap_buff_any_op(vect1, 0, vect2, 1, bm::and_op);
4659 }
4660 
4661 
4662 /*!
4663 \brief GAP bitcount SUB (AND NOT) operation test.
4664 
4665 \param vect1 - operand 1
4666 \param vect2 - operand 2
4667 \return bitcount of vect1 SUB (AND NOT) vect2
4668 
4669 @ingroup gapfunc
4670 */
4672 unsigned gap_count_sub(const gap_word_t* BMRESTRICT vect1,
4673  const gap_word_t* BMRESTRICT vect2)
4674 {
4675  return gap_buff_count_op(vect1, vect2, bm::sub_op);
4676 }
4677 
4678 
4679 // ----------------------------------------------------------------------
4680 
4681 // BIT blocks manipulation functions:
4682 
4683 
4684 /*!
4685  \brief Bitblock copy operation.
4686 
4687  \param dst - destination block.
4688  \param src - source block.
4689 
4690  @ingroup bitfunc
4691 */
4692 inline
4694 {
4695 #ifdef BMVECTOPT
4696  VECT_COPY_BLOCK(dst, src);
4697 #else
4698  ::memcpy(dst, src, bm::set_block_size * sizeof(bm::word_t));
4699 #endif
4700 }
4701 
4702 /*!
4703  \brief Bitblock copy/stream operation.
4704 
4705  \param dst - destination block.
4706  \param src - source block.
4707 
4708  @ingroup bitfunc
4709 */
4710 inline
4712 {
4713 #ifdef VECT_STREAM_BLOCK
4714  VECT_STREAM_BLOCK(dst, src);
4715 #else
4716  ::memcpy(dst, src, bm::set_block_size * sizeof(bm::word_t));
4717 #endif
4718 }
4719 
4720 
4721 /*!
4722  \brief Plain bitblock AND operation.
4723  Function does not analyse availability of source and destination blocks.
4724 
4725  \param dst - destination block.
4726  \param src - source block.
4727 
4728  \return 0 if AND operation did not produce anything (no 1s in the output)
4729 
4730  @ingroup bitfunc
4731 */
4732 inline
4734 {
4735  BM_ASSERT(dst);
4736  BM_ASSERT(src);
4737  BM_ASSERT(dst != src);
4738 
4739 #ifdef BMVECTOPT
4740  bm::id64_t acc = VECT_AND_BLOCK(dst, src);
4741 #else
4742  unsigned arr_sz = bm::set_block_size / 2;
4743 
4746 
4747  bm::id64_t acc = 0;
4748  for (unsigned i = 0; i < arr_sz; i+=4)
4749  {
4750  acc |= dst_u->w64[i] &= src_u->w64[i];
4751  acc |= dst_u->w64[i+1] &= src_u->w64[i+1];
4752  acc |= dst_u->w64[i+2] &= src_u->w64[i+2];
4753  acc |= dst_u->w64[i+3] &= src_u->w64[i+3];
4754  }
4755 #endif
4756  return acc;
4757 }
4758 
4759 /*!
4760  \brief digest based bit-block AND
4761 
4762  \param dst - destination block.
4763  \param src - source block.
4764  \param digest - known digest of dst block
4765 
4766  \return new digest
4767 
4768  @ingroup bitfunc
4769 */
4770 inline
4772  const bm::word_t* BMRESTRICT src,
4773  bm::id64_t digest)
4774 {
4775  BM_ASSERT(dst);
4776  BM_ASSERT(src);
4777  BM_ASSERT(dst != src);
4778 
4779  const bm::id64_t mask(1ull);
4780  bm::id64_t d = digest;
4781  while (d)
4782  {
4783  bm::id64_t t = bm::bmi_blsi_u64(d); // d & -d;
4784 
4785  unsigned wave = bm::word_bitcount64(t - 1);
4786  unsigned off = wave * bm::set_block_digest_wave_size;
4787 
4788  #if defined(VECT_AND_DIGEST)
4789  bool all_zero = VECT_AND_DIGEST(&dst[off], &src[off]);
4790  if (all_zero)
4791  digest &= ~(mask << wave);
4792  #else
4793  const bm::bit_block_t::bunion_t* BMRESTRICT src_u = (const bm::bit_block_t::bunion_t*)(&src[off]);
4795 
4796  bm::id64_t acc = 0;
4797  unsigned j = 0;
4798  do
4799  {
4800  acc |= dst_u->w64[j+0] &= src_u->w64[j+0];
4801  acc |= dst_u->w64[j+1] &= src_u->w64[j+1];
4802  acc |= dst_u->w64[j+2] &= src_u->w64[j+2];
4803  acc |= dst_u->w64[j+3] &= src_u->w64[j+3];
4804  j+=4;
4805  } while (j < bm::set_block_digest_wave_size/2);
4806 
4807  if (!acc) // all zero
4808  digest &= ~(mask << wave);
4809  #endif
4810 
4811  d = bm::bmi_bslr_u64(d); // d &= d - 1;
4812  } // while
4813 
4814  return digest;
4815 }
4816 
4817 
4818 /*!
4819  \brief digest based bit-block AND 5-way
4820 
4821  \return new digest
4822 
4823  @ingroup bitfunc
4824 */
4825 inline
4827  const bm::word_t* BMRESTRICT src0,
4828  const bm::word_t* BMRESTRICT src1,
4829  const bm::word_t* BMRESTRICT src2,
4830  const bm::word_t* BMRESTRICT src3,
4831  bm::id64_t digest)
4832 {
4833  BM_ASSERT(dst);
4834  BM_ASSERT(src0 && src1 && src2 && src3);
4835 
4836  const bm::id64_t mask(1ull);
4837  bm::id64_t d = digest;
4838  while (d)
4839  {
4840  bm::id64_t t = bm::bmi_blsi_u64(d); // d & -d;
4841 
4842  unsigned wave = bm::word_bitcount64(t - 1);
4843  unsigned off = wave * bm::set_block_digest_wave_size;
4844 
4845 #if defined(VECT_AND_DIGEST_5WAY)
4846  bool all_zero = VECT_AND_DIGEST_5WAY(&dst[off], &src0[off], &src1[off], &src2[off], &src3[off]);
4847  if (all_zero)
4848  digest &= ~(mask << wave);
4849 #else
4850  const bm::bit_block_t::bunion_t* BMRESTRICT src_u0 = (const bm::bit_block_t::bunion_t*)(&src0[off]);
4851  const bm::bit_block_t::bunion_t* BMRESTRICT src_u1 = (const bm::bit_block_t::bunion_t*)(&src1[off]);
4852  const bm::bit_block_t::bunion_t* BMRESTRICT src_u2 = (const bm::bit_block_t::bunion_t*)(&src2[off]);
4853  const bm::bit_block_t::bunion_t* BMRESTRICT src_u3 = (const bm::bit_block_t::bunion_t*)(&src3[off]);
4855 
4856  bm::id64_t acc = 0;
4857  unsigned j = 0;
4858  do
4859  {
4860  acc |= dst_u->w64[j + 0] &= src_u0->w64[j + 0] & src_u1->w64[j + 0] & src_u2->w64[j + 0] & src_u3->w64[j + 0];
4861  acc |= dst_u->w64[j + 1] &= src_u0->w64[j + 1] & src_u1->w64[j + 1] & src_u2->w64[j + 1] & src_u3->w64[j + 1];
4862  acc |= dst_u->w64[j + 2] &= src_u0->w64[j + 2] & src_u1->w64[j + 2] & src_u2->w64[j + 2] & src_u3->w64[j + 2];
4863  acc |= dst_u->w64[j + 3] &= src_u0->w64[j + 3] & src_u1->w64[j + 3] & src_u2->w64[j + 3] & src_u3->w64[j + 3];
4864  j += 4;
4865  } while (j < bm::set_block_digest_wave_size / 2);
4866 
4867  if (!acc) // all zero
4868  digest &= ~(mask << wave);
4869 #endif
4870 
4871  d = bm::bmi_bslr_u64(d); // d &= d - 1;
4872  } // while
4873 
4874  return digest;
4875 }
4876 
4877 
4878 /*!
4879  \brief digest based bit-block AND
4880 
4881  dst = src1 AND src2
4882 
4883  \param dst - destination block.
4884  \param src1 - source block.
4885  \param src2 - source block.
4886  \param digest - known initial digest
4887 
4888  \return new digest
4889 
4890  @ingroup bitfunc
4891 */
4892 inline
4894  const bm::word_t* BMRESTRICT src1,
4895  const bm::word_t* BMRESTRICT src2,
4896  bm::id64_t digest)
4897 {
4898  BM_ASSERT(dst);
4899  BM_ASSERT(src1 && src2);
4900  BM_ASSERT(dst != src1 && dst != src2);
4901 
4902  const bm::id64_t mask(1ull);
4903  bm::id64_t d = digest;
4904  while (d)
4905  {
4906  bm::id64_t t = bm::bmi_blsi_u64(d); // d & -d;
4907 
4908  unsigned wave = bm::word_bitcount64(t - 1);
4909  unsigned off = wave * bm::set_block_digest_wave_size;
4910 
4911  #if defined(VECT_AND_DIGEST_2WAY)
4912  bool all_zero = VECT_AND_DIGEST_2WAY(&dst[off], &src1[off], &src2[off]);
4913  if (all_zero)
4914  digest &= ~(mask << wave);
4915  #else
4916  const bm::bit_block_t::bunion_t* BMRESTRICT src_u1 =
4917  (const bm::bit_block_t::bunion_t*)(&src1[off]);
4918  const bm::bit_block_t::bunion_t* BMRESTRICT src_u2 =
4919  (const bm::bit_block_t::bunion_t*)(&src2[off]);
4921  (bm::bit_block_t::bunion_t*)(&dst[off]);
4922 
4923  bm::id64_t acc = 0;
4924  unsigned j = 0;
4925  do
4926  {
4927  acc |= dst_u->w64[j+0] = src_u1->w64[j+0] & src_u2->w64[j+0];
4928  acc |= dst_u->w64[j+1] = src_u1->w64[j+1] & src_u2->w64[j+1];
4929  acc |= dst_u->w64[j+2] = src_u1->w64[j+2] & src_u2->w64[j+2];
4930  acc |= dst_u->w64[j+3] = src_u1->w64[j+3] & src_u2->w64[j+3];
4931  j+=4;
4932  } while (j < bm::set_block_digest_wave_size/2);
4933 
4934  if (!acc) // all zero
4935  digest &= ~(mask << wave);
4936  #endif
4937 
4938  d = bm::bmi_bslr_u64(d); // d &= d - 1;
4939  } // while
4940 
4941  return digest;
4942 }
4943 
4944 
4945 
4946 /*!
4947  \brief Function ANDs two bitblocks and computes the bitcount.
4948  Function does not analyse availability of source blocks.
4949 
4950  \param src1 - first bit block
4951  \param src2 - second bit block
4952 
4953  @ingroup bitfunc
4954 */
4955 inline
4957  const bm::word_t* BMRESTRICT src2)
4958 {
4959  unsigned count;
4960  const bm::word_t* src1_end = src1 + bm::set_block_size;
4961 #ifdef BMVECTOPT
4962  count = VECT_BITCOUNT_AND(src1, src1_end, src2);
4963 #else
4964  count = 0;
4965 # ifdef BM64OPT
4966  const bm::id64_t* b1 = (bm::id64_t*) src1;
4967  const bm::id64_t* b1_end = (bm::id64_t*) src1_end;
4968  const bm::id64_t* b2 = (bm::id64_t*) src2;
4969  do
4970  {
4971  count += bitcount64_4way(b1[0] & b2[0],
4972  b1[1] & b2[1],
4973  b1[2] & b2[2],
4974  b1[3] & b2[3]);
4975  b1 += 4;
4976  b2 += 4;
4977  } while (b1 < b1_end);
4978 # else
4979  do
4980  {
4981  BM_INCWORD_BITCOUNT(count, src1[0] & src2[0]);
4982  BM_INCWORD_BITCOUNT(count, src1[1] & src2[1]);
4983  BM_INCWORD_BITCOUNT(count, src1[2] & src2[2]);
4984  BM_INCWORD_BITCOUNT(count, src1[3] & src2[3]);
4985 
4986  src1+=4;
4987  src2+=4;
4988  } while (src1 < src1_end);
4989 # endif
4990 #endif
4991  return count;
4992 }
4993 
4994 
4995 /*!
4996  \brief Function ANDs two bitblocks and tests for any bit.
4997  Function does not analyse availability of source blocks.
4998 
4999  \param src1 - first bit block
5000  \param src2 - second bit block
5001 
5002  @ingroup bitfunc
5003 */
5004 inline
5005 unsigned bit_block_and_any(const bm::word_t* src1,
5006  const bm::word_t* src2)
5007 {
5008  unsigned count = 0;
5009  const bm::word_t* src1_end = src1 + bm::set_block_size;
5010  do
5011  {
5012  count = (src1[0] & src2[0]) |
5013  (src1[1] & src2[1]) |
5014  (src1[2] & src2[2]) |
5015  (src1[3] & src2[3]);
5016 
5017  src1+=4; src2+=4;
5018  } while ((src1 < src1_end) && !count);
5019  return count;
5020 }
5021 
5022 
5023 
5024 
5025 /*!
5026  \brief Function XORs two bitblocks and computes the bitcount.
5027  Function does not analyse availability of source blocks.
5028 
5029  \param src1 - first bit block
5030  \param src2 - second bit block
5031 
5032  @ingroup bitfunc
5033 */
5034 inline
5036  const bm::word_t* BMRESTRICT src2)
5037 {
5038  unsigned count;
5039  const bm::word_t* BMRESTRICT src1_end = src1 + bm::set_block_size;
5040 #ifdef BMVECTOPT
5041  count = VECT_BITCOUNT_XOR(src1, src1_end, src2);
5042 #else
5043  count = 0;
5044 # ifdef BM64OPT
5045  const bm::id64_t* b1 = (bm::id64_t*) src1;
5046  const bm::id64_t* b1_end = (bm::id64_t*) src1_end;
5047  const bm::id64_t* b2 = (bm::id64_t*) src2;
5048  do
5049  {
5050  count += bitcount64_4way(b1[0] ^ b2[0],
5051  b1[1] ^ b2[1],
5052  b1[2] ^ b2[2],
5053  b1[3] ^ b2[3]);
5054  b1 += 4;
5055  b2 += 4;
5056  } while (b1 < b1_end);
5057 # else
5058  do
5059  {
5060  BM_INCWORD_BITCOUNT(count, src1[0] ^ src2[0]);
5061  BM_INCWORD_BITCOUNT(count, src1[1] ^ src2[1]);
5062  BM_INCWORD_BITCOUNT(count, src1[2] ^ src2[2]);
5063  BM_INCWORD_BITCOUNT(count, src1[3] ^ src2[3]);
5064 
5065  src1+=4;
5066  src2+=4;
5067  } while (src1 < src1_end);
5068 # endif
5069 #endif
5070  return count;
5071 }
5072 
5073 
5074 /*!
5075  \brief Function XORs two bitblocks and and tests for any bit.
5076  Function does not analyse availability of source blocks.
5077 
5078  \param src1 - first bit block.
5079  \param src2 - second bit block.
5080 
5081  @ingroup bitfunc
5082 */
5083 inline
5085  const bm::word_t* BMRESTRICT src2)
5086 {
5087  unsigned count = 0;
5088  const bm::word_t* BMRESTRICT src1_end = src1 + bm::set_block_size;
5089  do
5090  {
5091  count = (src1[0] ^ src2[0]) |
5092  (src1[1] ^ src2[1]) |
5093  (src1[2] ^ src2[2]) |
5094  (src1[3] ^ src2[3]);
5095 
5096  src1+=4; src2+=4;
5097  } while (!count && (src1 < src1_end));
5098  return count;
5099 }
5100 
5101 /*!
5102  \brief Function SUBs two bitblocks and computes the bitcount.
5103  Function does not analyse availability of source blocks.
5104 
5105  \param src1 - first bit block.
5106  \param src2 - second bit block.
5107 
5108  @ingroup bitfunc
5109 */
5110 inline
5112  const bm::word_t* BMRESTRICT src2)
5113 {
5114  unsigned count;
5115  const bm::word_t* BMRESTRICT src1_end = src1 + bm::set_block_size;
5116 #ifdef BMVECTOPT
5117  count = VECT_BITCOUNT_SUB(src1, src1_end, src2);
5118 #else
5119  count = 0;
5120 # ifdef BM64OPT
5121  const bm::id64_t* b1 = (bm::id64_t*) src1;
5122  const bm::id64_t* b1_end = (bm::id64_t*) src1_end;
5123  const bm::id64_t* b2 = (bm::id64_t*) src2;
5124  do
5125  {
5126  count += bitcount64_4way(b1[0] & ~b2[0],
5127  b1[1] & ~b2[1],
5128  b1[2] & ~b2[2],
5129  b1[3] & ~b2[3]);
5130  b1 += 4;
5131  b2 += 4;
5132  } while (b1 < b1_end);
5133 # else
5134  do
5135  {
5136  BM_INCWORD_BITCOUNT(count, src1[0] & ~src2[0]);
5137  BM_INCWORD_BITCOUNT(count, src1[1] & ~src2[1]);
5138  BM_INCWORD_BITCOUNT(count, src1[2] & ~src2[2]);
5139  BM_INCWORD_BITCOUNT(count, src1[3] & ~src2[3]);
5140 
5141  src1+=4;
5142  src2+=4;
5143  } while (src1 < src1_end);
5144 # endif
5145 #endif
5146  return count;
5147 }
5148 
5149 /*!
5150  \brief Function SUBs two bitblocks and and tests for any bit.
5151  Function does not analyse availability of source blocks.
5152 
5153  \param src1 - first bit block.
5154  \param src2 - second bit block.
5155 
5156  @ingroup bitfunc
5157 */
5158 inline
5160  const bm::word_t* BMRESTRICT src2)
5161 {
5162  unsigned count = 0;
5163  const bm::word_t* BMRESTRICT src1_end = src1 + bm::set_block_size;
5164 
5165  do
5166  {
5167  count = (src1[0] & ~src2[0]) |
5168  (src1[1] & ~src2[1]) |
5169  (src1[2] & ~src2[2]) |
5170  (src1[3] & ~src2[3]);
5171 
5172  src1+=4; src2+=4;
5173  } while ((src1 < src1_end) && (count == 0));
5174  return count;
5175 }
5176 
5177 
5178 /*!
5179  \brief Function ORs two bitblocks and computes the bitcount.
5180  Function does not analyse availability of source blocks.
5181 
5182  \param src1 - first bit block
5183  \param src2 - second bit block.
5184 
5185  @ingroup bitfunc
5186 */
5187 inline
5188 unsigned bit_block_or_count(const bm::word_t* src1,
5189  const bm::word_t* src2)
5190 {
5191  unsigned count;
5192  const bm::word_t* src1_end = src1 + bm::set_block_size;
5193 #ifdef BMVECTOPT
5194  count = VECT_BITCOUNT_OR(src1, src1_end, src2);
5195 #else
5196  count = 0;
5197 # ifdef BM64OPT
5198  const bm::id64_t* b1 = (bm::id64_t*) src1;
5199  const bm::id64_t* b1_end = (bm::id64_t*) src1_end;
5200  const bm::id64_t* b2 = (bm::id64_t*) src2;
5201  do
5202  {
5203  count += bitcount64_4way(b1[0] | b2[0],
5204  b1[1] | b2[1],
5205  b1[2] | b2[2],
5206  b1[3] | b2[3]);
5207  b1 += 4;
5208  b2 += 4;
5209  } while (b1 < b1_end);
5210 # else
5211  do
5212  {
5213  BM_INCWORD_BITCOUNT(count, src1[0] | src2[0]);
5214  BM_INCWORD_BITCOUNT(count, src1[1] | src2[1]);
5215  BM_INCWORD_BITCOUNT(count, src1[2] | src2[2]);
5216  BM_INCWORD_BITCOUNT(count, src1[3] | src2[3]);
5217 
5218  src1+=4;
5219  src2+=4;
5220  } while (src1 < src1_end);
5221 # endif
5222 #endif
5223  return count;
5224 }
5225 
5226 /*!
5227  \brief Function ORs two bitblocks and and tests for any bit.
5228  Function does not analyse availability of source blocks.
5229 
5230  \param src1 - first bit block.
5231  \param src2 - second bit block.
5232 
5233  @ingroup bitfunc
5234 */
5235 inline
5237  const bm::word_t* BMRESTRICT src2)
5238 {
5239  unsigned count = 0;
5240  const bm::word_t* BMRESTRICT src1_end = src1 + bm::set_block_size;
5241  do
5242  {
5243  count = (src1[0] | src2[0]) |
5244  (src1[1] | src2[1]) |
5245  (src1[2] | src2[2]) |
5246  (src1[3] | src2[3]);
5247 
5248  src1+=4; src2+=4;
5249  } while (!count && (src1 < src1_end));
5250  return count;
5251 }
5252 
5253 
5254 
5255 
5256 /*!
5257  \brief bitblock AND operation.
5258 
5259  \param dst - destination block.
5260  \param src - source block.
5261 
5262  \returns pointer on destination block.
5263  If returned value equal to src means that block mutation requested.
5264  NULL is valid return value.
5265 
5266  @ingroup bitfunc
5267 */
5269  const bm::word_t* BMRESTRICT src)
5270 {
5271  BM_ASSERT(dst || src);
5272 
5273  bm::word_t* ret = dst;
5274 
5275  if (IS_VALID_ADDR(dst)) // The destination block already exists
5276  {
5277  if (!IS_VALID_ADDR(src))
5278  {
5279  if (IS_EMPTY_BLOCK(src))
5280  {
5281  //If the source block is zero
5282  //just clean the destination block
5283  return 0;
5284  }
5285  }
5286  else
5287  {
5288  // Regular operation AND on the whole block.
5289  //
5290  auto any = bm::bit_block_and(dst, src);
5291  if (!any)
5292  ret = 0;
5293  }
5294  }
5295  else // The destination block does not exist yet
5296  {
5297  if(!IS_VALID_ADDR(src))
5298  {
5299  if(IS_EMPTY_BLOCK(src))
5300  {
5301  // The source block is empty.
5302  // One argument empty - all result is empty.
5303  return 0;
5304  }
5305  // Here we have nothing to do.
5306  // Src block is all ON, dst block remains as it is
5307  }
5308  else // destination block does not exists, src - valid block
5309  {
5310  if (IS_FULL_BLOCK(dst))
5311  return const_cast<bm::word_t*>(src);
5312  // Nothng to do.
5313  // Dst block is all ZERO no combination required.
5314  }
5315  }
5316 
5317  return ret;
5318 }
5319 
5320 
5321 /*!
5322  \brief Performs bitblock AND operation and calculates bitcount of the result.
5323 
5324  \param src1 - first bit block.
5325  \param src2 - second bit block.
5326 
5327  \returns bitcount value
5328 
5329  @ingroup bitfunc
5330 */
5331 inline
5333  const bm::word_t* BMRESTRICT src2)
5334 {
5335  if (IS_EMPTY_BLOCK(src1) || IS_EMPTY_BLOCK(src2))
5336  return 0;
5337  return bit_block_and_count(src1, src2);
5338 }
5339 
5340 /*!
5341  \brief Performs bitblock AND operation test.
5342 
5343  \param src1 - first bit block.
5344  \param src2 - second bit block.
5345 
5346  \returns non zero if there is any value
5347 
5348  @ingroup bitfunc
5349 */
5350 inline
5352  const bm::word_t* BMRESTRICT src2)
5353 {
5354  if (IS_EMPTY_BLOCK(src1) || IS_EMPTY_BLOCK(src2))
5355  return 0;
5356  return bit_block_and_any(src1, src2);
5357 }
5358 
5359 
5360 
5361 /*!
5362  \brief Performs bitblock SUB operation and calculates bitcount of the result.
5363 
5364  \param src1 - first bit block.
5365  \param src2 - second bit block
5366 
5367  \returns bitcount value
5368 
5369  @ingroup bitfunc
5370 */
5371 inline
5373  const bm::word_t* BMRESTRICT src2)
5374 {
5375  if (IS_EMPTY_BLOCK(src1))
5376  return 0;
5377 
5378  if (IS_EMPTY_BLOCK(src2)) // nothing to diff
5379  {
5380  return bit_block_count(src1);
5381  }
5382  return bit_block_sub_count(src1, src2);
5383 }
5384 
5385 
5386 /*!
5387  \brief Performs inverted bitblock SUB operation and calculates
5388  bitcount of the result.
5389 
5390  \param src1 - first bit block.
5391  \param src2 - second bit block
5392 
5393  \returns bitcount value
5394 
5395  @ingroup bitfunc
5396 */
5397 inline
5399  const bm::word_t* BMRESTRICT src2)
5400 {
5401  return bit_operation_sub_count(src2, src1);
5402 }
5403 
5404 
5405 /*!
5406  \brief Performs bitblock test of SUB operation.
5407 
5408  \param src1 - first bit block.
5409  \param src2 - second bit block
5410 
5411  \returns non zero value if there are any bits
5412 
5413  @ingroup bitfunc
5414 */
5415 inline
5417  const bm::word_t* BMRESTRICT src2)
5418 {
5419  if (IS_EMPTY_BLOCK(src1))
5420  return 0;
5421 
5422  if (IS_EMPTY_BLOCK(src2)) // nothing to diff
5423  return !bit_is_all_zero(src1);
5424  return bit_block_sub_any(src1, src2);
5425 }
5426 
5427 
5428 
5429 /*!
5430  \brief Performs bitblock OR operation and calculates bitcount of the result.
5431 
5432  \param src1 - first bit block.
5433  \param src2 - second bit block.
5434 
5435  \returns bitcount value
5436 
5437  @ingroup bitfunc
5438 */
5439 inline
5441  const bm::word_t* BMRESTRICT src2)
5442 {
5443  if (IS_EMPTY_BLOCK(src1))
5444  {
5445  if (!IS_EMPTY_BLOCK(src2))
5446  return bit_block_count(src2);
5447  else
5448  return 0; // both blocks are empty
5449  }
5450  else
5451  {
5452  if (IS_EMPTY_BLOCK(src2))
5453  return bit_block_count(src1);
5454  }
5455 
5456  return bit_block_or_count(src1, src2);
5457 }
5458 
5459 /*!
5460  \brief Performs bitblock OR operation test.
5461 
5462  \param src1 - first bit block.
5463  \param src2 - second bit block.
5464 
5465  \returns non zero value if there are any bits
5466 
5467  @ingroup bitfunc
5468 */
5469 inline
5471  const bm::word_t* BMRESTRICT src2)
5472 {
5473  if (IS_EMPTY_BLOCK(src1))
5474  {
5475  if (!IS_EMPTY_BLOCK(src2))
5476  return !bit_is_all_zero(src2);
5477  else
5478  return 0; // both blocks are empty
5479  }
5480  else
5481  {
5482  if (IS_EMPTY_BLOCK(src2))
5483  return !bit_is_all_zero(src1);
5484  }
5485 
5486  return bit_block_or_any(src1, src2);
5487 }
5488 
5489 /*!
5490  \brief Plain bitblock OR operation.
5491  Function does not analyse availability of source and destination blocks.
5492 
5493  \param dst - destination block.
5494  \param src - source block.
5495 
5496  @ingroup bitfunc
5497 */
5498 inline
5500  const bm::word_t* BMRESTRICT src)
5501 {
5502 #ifdef BMVECTOPT
5503  return VECT_OR_BLOCK(dst, src);
5504 #else
5505  const bm::wordop_t* BMRESTRICT wrd_ptr = (wordop_t*)src;
5506  const bm::wordop_t* BMRESTRICT wrd_end = (wordop_t*)(src + bm::set_block_size);
5507  bm::wordop_t* BMRESTRICT dst_ptr = (wordop_t*)dst;
5508 
5509  bm::wordop_t acc = 0;
5510  const bm::wordop_t not_acc = acc = ~acc;
5511 
5512  do
5513  {
5514  acc &= (dst_ptr[0] |= wrd_ptr[0]);
5515  acc &= (dst_ptr[1] |= wrd_ptr[1]);
5516  acc &= (dst_ptr[2] |= wrd_ptr[2]);
5517  acc &= (dst_ptr[3] |= wrd_ptr[3]);
5518 
5519  dst_ptr+=4;wrd_ptr+=4;
5520 
5521  } while (wrd_ptr < wrd_end);
5522  return acc == not_acc;
5523 #endif
5524 }
5525 
5526 /*!
5527  \brief 2 way (target := source1 | source2) bitblock OR operation.
5528  \param dst - dest block [out]
5529  \param src1 - source 1
5530  \param src2 - source 2
5531 
5532  @return 1 if produced block of ALL ones
5533  @ingroup bitfunc
5534 */
5535 inline
5537  const bm::word_t* BMRESTRICT src1,
5538  const bm::word_t* BMRESTRICT src2)
5539 {
5540 #ifdef BMVECTOPT
5541  return VECT_OR_BLOCK_2WAY(dst, src1, src2);
5542 #else
5543  const bm::wordop_t* BMRESTRICT wrd_ptr1 = (wordop_t*)src1;
5544  const bm::wordop_t* BMRESTRICT wrd_end1 = (wordop_t*)(src1 + set_block_size);
5545  const bm::wordop_t* BMRESTRICT wrd_ptr2 = (wordop_t*)src2;
5546  bm::wordop_t* BMRESTRICT dst_ptr = (wordop_t*)dst;
5547 
5548  bm::wordop_t acc = 0;
5549  const bm::wordop_t not_acc = acc = ~acc;
5550  do
5551  {
5552  acc &= (dst_ptr[0] = wrd_ptr1[0] | wrd_ptr2[0]);
5553  acc &= (dst_ptr[1] = wrd_ptr1[1] | wrd_ptr2[1]);
5554  acc &= (dst_ptr[2] = wrd_ptr1[2] | wrd_ptr2[2]);
5555  acc &= (dst_ptr[3] = wrd_ptr1[3] | wrd_ptr2[3]);
5556 
5557  dst_ptr+=4; wrd_ptr1+=4; wrd_ptr2+=4;
5558 
5559  } while (wrd_ptr1 < wrd_end1);
5560  return acc == not_acc;
5561 #endif
5562 }
5563 
5564 
5565 /*!
5566  \brief 2 way (target := source1 ^ source2) bitblock XOR operation.
5567  \param dst - dest block [out]
5568  \param src1 - source 1
5569  \param src2 - source 2
5570 
5571  @return OR accumulator
5572  @ingroup bitfunc
5573 */
5574 inline
5576  const bm::word_t* BMRESTRICT src1,
5577  const bm::word_t* BMRESTRICT src2)
5578 {
5579 #ifdef BMVECTOPT
5580  return VECT_XOR_BLOCK_2WAY(dst, src1, src2);
5581 #else
5582  const bm::wordop_t* BMRESTRICT wrd_ptr1 = (wordop_t*)src1;
5583  const bm::wordop_t* BMRESTRICT wrd_end1 = (wordop_t*)(src1 + set_block_size);
5584  const bm::wordop_t* BMRESTRICT wrd_ptr2 = (wordop_t*)src2;
5585  bm::wordop_t* BMRESTRICT dst_ptr = (wordop_t*)dst;
5586 
5587  bm::wordop_t acc = 0;
5588  do
5589  {
5590  acc |= (dst_ptr[0] = wrd_ptr1[0] ^ wrd_ptr2[0]);
5591  acc |= (dst_ptr[1] = wrd_ptr1[1] ^ wrd_ptr2[1]);
5592  acc |= (dst_ptr[2] = wrd_ptr1[2] ^ wrd_ptr2[2]);
5593  acc |= (dst_ptr[3] = wrd_ptr1[3] ^ wrd_ptr2[3]);
5594 
5595  dst_ptr+=4; wrd_ptr1+=4; wrd_ptr2+=4;
5596 
5597  } while (wrd_ptr1 < wrd_end1);
5598  return acc;
5599 #endif
5600 }
5601 
5602 
5603 /*!
5604  \brief 3 way (target | source1 | source2) bitblock OR operation.
5605  Function does not analyse availability of source and destination blocks.
5606 
5607  \param dst - sst-dest block [in,out]
5608  \param src1 - source 1
5609  \param src2 - source 2
5610 
5611  @return 1 if produced block of ALL ones
5612 
5613  @ingroup bitfunc
5614 */
5615 inline
5617  const bm::word_t* BMRESTRICT src1,
5618  const bm::word_t* BMRESTRICT src2)
5619 {
5620 #ifdef BMVECTOPT
5621  return VECT_OR_BLOCK_3WAY(dst, src1, src2);
5622 #else
5623  const bm::wordop_t* BMRESTRICT wrd_ptr1 = (wordop_t*)src1;
5624  const bm::wordop_t* BMRESTRICT wrd_end1 = (wordop_t*)(src1 + set_block_size);
5625  const bm::wordop_t* BMRESTRICT wrd_ptr2 = (wordop_t*)src2;
5626  bm::wordop_t* BMRESTRICT dst_ptr = (wordop_t*)dst;
5627 
5628  bm::wordop_t acc = 0;
5629  const bm::wordop_t not_acc = acc = ~acc;
5630  do
5631  {
5632  acc &= (dst_ptr[0] |= wrd_ptr1[0] | wrd_ptr2[0]);
5633  acc &= (dst_ptr[1] |= wrd_ptr1[1] | wrd_ptr2[1]);
5634  acc &= (dst_ptr[2] |= wrd_ptr1[2] | wrd_ptr2[2]);
5635  acc &= (dst_ptr[3] |= wrd_ptr1[3] | wrd_ptr2[3]);
5636 
5637  dst_ptr+=4; wrd_ptr1+=4;wrd_ptr2+=4;
5638 
5639  } while (wrd_ptr1 < wrd_end1);
5640  return acc == not_acc;
5641 #endif
5642 }
5643 
5644 
5645 /*!
5646  \brief 5 way (target, source1, source2) bitblock OR operation.
5647  Function does not analyse availability of source and destination blocks.
5648 
5649  \param dst - destination block.
5650  \param src1 - source1, etc
5651 
5652  @return 1 if produced block of ALL ones
5653 
5654  @ingroup bitfunc
5655 */
5656 inline
5658  const bm::word_t* BMRESTRICT src1,
5659  const bm::word_t* BMRESTRICT src2,
5660  const bm::word_t* BMRESTRICT src3,
5661  const bm::word_t* BMRESTRICT src4)
5662 {
5663 #ifdef BMVECTOPT
5664  return VECT_OR_BLOCK_5WAY(dst, src1, src2, src3, src4);
5665 #else
5666  const bm::wordop_t* BMRESTRICT wrd_ptr1 = (wordop_t*)src1;
5667  const bm::wordop_t* BMRESTRICT wrd_end1 = (wordop_t*)(src1 + set_block_size);
5668  const bm::wordop_t* BMRESTRICT wrd_ptr2 = (wordop_t*)src2;
5669  const bm::wordop_t* BMRESTRICT wrd_ptr3 = (wordop_t*)src3;
5670  const bm::wordop_t* BMRESTRICT wrd_ptr4 = (wordop_t*)src4;
5671  bm::wordop_t* BMRESTRICT dst_ptr = (wordop_t*)dst;
5672 
5673  bm::wordop_t acc = 0;
5674  const bm::wordop_t not_acc = acc = ~acc;
5675  do
5676  {
5677  acc &= (dst_ptr[0] |= wrd_ptr1[0] | wrd_ptr2[0] | wrd_ptr3[0] | wrd_ptr4[0]);
5678  acc &= (dst_ptr[1] |= wrd_ptr1[1] | wrd_ptr2[1] | wrd_ptr3[1] | wrd_ptr4[1]);
5679  acc &= (dst_ptr[2] |= wrd_ptr1[2] | wrd_ptr2[2] | wrd_ptr3[2] | wrd_ptr4[2]);
5680  acc &= (dst_ptr[3] |= wrd_ptr1[3] | wrd_ptr2[3] | wrd_ptr3[3] | wrd_ptr4[3]);
5681 
5682  dst_ptr+=4;
5683  wrd_ptr1+=4;wrd_ptr2+=4;wrd_ptr3+=4;wrd_ptr4+=4;
5684 
5685  } while (wrd_ptr1 < wrd_end1);
5686  return acc == not_acc;
5687 #endif
5688 }
5689 
5690 
5691 
5692 
5693 /*!
5694  \brief Block OR operation. Makes analysis if block is 0 or FULL.
5695 
5696  \param dst - destination block.
5697  \param src - source block.
5698 
5699  \returns pointer on destination block.
5700  If returned value equal to src means that block mutation requested.
5701  NULL is valid return value.
5702 
5703  @ingroup bitfunc
5704 */
5705 inline
5707  const bm::word_t* BMRESTRICT src)
5708 {
5709  BM_ASSERT(dst || src);
5710 
5711  bm::word_t* ret = dst;
5712 
5713  if (IS_VALID_ADDR(dst)) // The destination block already exists
5714  {
5715  if (!IS_VALID_ADDR(src))
5716  {
5717  if (IS_FULL_BLOCK(src))
5718  {
5719  // if the source block is all set
5720  // just set the destination block
5721  ::memset(dst, 0xFF, bm::set_block_size * sizeof(bm::word_t));
5722  }
5723  }
5724  else
5725  {
5726  // Regular operation OR on the whole block
5727  bm::bit_block_or(dst, src);
5728  }
5729  }
5730  else // The destination block does not exist yet
5731  {
5732  if (!IS_VALID_ADDR(src))
5733  {
5734  if (IS_FULL_BLOCK(src))
5735  {
5736  // The source block is all set, because dst does not exist
5737  // we can simply replace it.
5738  return const_cast<bm::word_t*>(FULL_BLOCK_FAKE_ADDR);
5739  }
5740  }
5741  else
5742  {
5743  if (dst == 0)
5744  {
5745  // The only case when we have to allocate the new block:
5746  // Src is all zero and Dst does not exist
5747  return const_cast<bm::word_t*>(src);
5748  }
5749  }
5750  }
5751  return ret;
5752 }
5753 
5754 /*!
5755  \brief Plain bitblock SUB (AND NOT) operation.
5756  Function does not analyse availability of source and destination blocks.
5757 
5758  \param dst - destination block.
5759  \param src - source block.
5760 
5761  \return 0 if SUB operation did not produce anything (no 1s in the output)
5762 
5763  @ingroup bitfunc
5764 */
5765 inline
5767  const bm::word_t* BMRESTRICT src)
5768 {
5769 #ifdef BMVECTOPT
5770  bm::id64_t acc = VECT_SUB_BLOCK(dst, src);
5771  return acc;
5772 #else
5773  unsigned arr_sz = bm::set_block_size / 2;
5774 
5777 
5778  bm::id64_t acc = 0;
5779  for (unsigned i = 0; i < arr_sz; i+=4)
5780  {
5781  acc |= dst_u->w64[i] &= ~src_u->w64[i];
5782  acc |= dst_u->w64[i+1] &= ~src_u->w64[i+1];
5783  acc |= dst_u->w64[i+2] &= ~src_u->w64[i+2];
5784  acc |= dst_u->w64[i+3] &= ~src_u->w64[i+3];
5785  }
5786  return acc;
5787 #endif
5788 }
5789 
5790 /*!
5791  \brief digest based bitblock SUB (AND NOT) operation
5792 
5793  \param dst - destination block.
5794  \param src - source block.
5795  \param digest - known digest of dst block
5796 
5797  \return new digest
5798 
5799  @ingroup bitfunc
5800 */
5801 inline
5803  const bm::word_t* BMRESTRICT src,
5804  bm::id64_t digest)
5805 {
5806  BM_ASSERT(dst);
5807  BM_ASSERT(src);
5808  BM_ASSERT(dst != src);
5809 
5810  const bm::id64_t mask(1ull);
5811 
5812  bm::id64_t d = digest;
5813  while (d)
5814  {
5815  bm::id64_t t = bm::bmi_blsi_u64(d); // d & -d;
5816 
5817  unsigned wave = bm::word_bitcount64(t - 1);
5818  unsigned off = wave * bm::set_block_digest_wave_size;
5819 
5820  #if defined(VECT_SUB_DIGEST)
5821  bool all_zero = VECT_SUB_DIGEST(&dst[off], &src[off]);
5822  if (all_zero)
5823  digest &= ~(mask << wave);
5824  #else
5825  const bm::bit_block_t::bunion_t* BMRESTRICT src_u = (const bm::bit_block_t::bunion_t*)(&src[off]);
5827 
5828  bm::id64_t acc = 0;
5829  unsigned j = 0;
5830  do
5831  {
5832  acc |= dst_u->w64[j+0] &= ~src_u->w64[j+0];
5833  acc |= dst_u->w64[j+1] &= ~src_u->w64[j+1];
5834  acc |= dst_u->w64[j+2] &= ~src_u->w64[j+2];
5835  acc |= dst_u->w64[j+3] &= ~src_u->w64[j+3];
5836  j+=4;
5837  } while (j < bm::set_block_digest_wave_size/2);
5838 
5839  if (!acc) // all zero
5840  digest &= ~(mask << wave);
5841  #endif
5842 
5843  d = bm::bmi_bslr_u64(d); // d &= d - 1;
5844  } // while
5845 
5846  return digest;
5847 }
5848 
5849 /*!
5850  \brief digest based bitblock SUB (AND NOT) operation (3 operand)
5851 
5852  \param dst - destination block.
5853  \param src1 - source block.
5854  \param src1 - source block.
5855  \param digest - known digest of dst block
5856 
5857  \return new digest
5858 
5859  @ingroup bitfunc
5860 */
5861 inline
5863  const bm::word_t* BMRESTRICT src1,
5864  const bm::word_t* BMRESTRICT src2,
5865  bm::id64_t digest)
5866 {
5867  BM_ASSERT(dst);
5868  BM_ASSERT(src1 && src2);
5869  BM_ASSERT(dst != src1 && dst != src2);
5870 
5871  const bm::id64_t mask(1ull);
5872 
5873  bm::id64_t d = digest;
5874  while (d)
5875  {
5876  bm::id64_t t = bm::bmi_blsi_u64(d); // d & -d;
5877 
5878  unsigned wave = bm::word_bitcount64(t - 1);
5879  unsigned off = wave * bm::set_block_digest_wave_size;
5880 
5881  #if defined(VECT_SUB_DIGEST_2WAY)
5882  bool all_zero = VECT_SUB_DIGEST_2WAY(&dst[off], &src1[off], &src2[off]);
5883  if (all_zero)
5884  digest &= ~(mask << wave);
5885  #else
5886  const bm::bit_block_t::bunion_t* BMRESTRICT src_u1 =
5887  (const bm::bit_block_t::bunion_t*)(&src1[off]);
5888  const bm::bit_block_t::bunion_t* BMRESTRICT src_u2 =
5889  (const bm::bit_block_t::bunion_t*)(&src2[off]);
5891  (bm::bit_block_t::bunion_t*)(&dst[off]);
5892 
5893  bm::id64_t acc = 0;
5894  unsigned j = 0;
5895  do
5896  {
5897  acc |= dst_u->w64[j+0] = src_u1->w64[j+0] & ~src_u2->w64[j+0];
5898  acc |= dst_u->w64[j+1] = src_u1->w64[j+1] & ~src_u2->w64[j+1];
5899  acc |= dst_u->w64[j+2] = src_u1->w64[j+2] & ~src_u2->w64[j+2];
5900  acc |= dst_u->w64[j+3] = src_u1->w64[j+3] & ~src_u2->w64[j+3];
5901  j+=4;
5902  } while (j < bm::set_block_digest_wave_size/2);
5903 
5904  if (!acc) // all zero
5905  digest &= ~(mask << wave);
5906  #endif
5907 
5908  d = bm::bmi_bslr_u64(d); // d &= d - 1;
5909  } // while
5910 
5911  return digest;
5912 }
5913 
5914 
5915 
5916 /*!
5917  \brief bitblock SUB operation.
5918 
5919  \param dst - destination block.
5920  \param src - source block.
5921 
5922  \returns pointer on destination block.
5923  If returned value equal to src means that block mutation requested.
5924  NULL is valid return value.
5925 
5926  @ingroup bitfunc
5927 */
5928 inline
5930  const bm::word_t* BMRESTRICT src)
5931 {
5932  BM_ASSERT(dst || src);
5933 
5934  bm::word_t* ret = dst;
5935  if (IS_VALID_ADDR(dst)) // The destination block already exists
5936  {
5937  if (!IS_VALID_ADDR(src))
5938  {
5939  if (IS_FULL_BLOCK(src))
5940  {
5941  // If the source block is all set
5942  // just clean the destination block
5943  return 0;
5944  }
5945  }
5946  else
5947  {
5948  auto any = bm::bit_block_sub(dst, src);
5949  if (!any)
5950  ret = 0;
5951  }
5952  }
5953  else // The destination block does not exist yet
5954  {
5955  if (!IS_VALID_ADDR(src))
5956  {
5957  if (IS_FULL_BLOCK(src))
5958  {
5959  // The source block is full
5960  return 0;
5961  }
5962  }
5963  else
5964  {
5965  if (IS_FULL_BLOCK(dst))
5966  {
5967  // The only case when we have to allocate the new block:
5968  // dst is all set and src exists
5969  return const_cast<bm::word_t*>(src);
5970  }
5971  }
5972  }
5973  return ret;
5974 }
5975 
5976 
5977 /*!
5978  \brief Plain bitblock XOR operation.
5979  Function does not analyse availability of source and destination blocks.
5980 
5981  \param dst - destination block.
5982  \param src - source block.
5983 
5984  @ingroup bitfunc
5985 */
5986 inline
5988  const bm::word_t* BMRESTRICT src)
5989 {
5990  BM_ASSERT(dst);
5991  BM_ASSERT(src);
5992  BM_ASSERT(dst != src);
5993 
5994 #ifdef BMVECTOPT
5995  bm::id64_t acc = VECT_XOR_BLOCK(dst, src);
5996 #else
5997  unsigned arr_sz = bm::set_block_size / 2;
5998 
6001 
6002  bm::id64_t acc = 0;
6003  for (unsigned i = 0; i < arr_sz; i+=4)
6004  {
6005  acc |= dst_u->w64[i] ^= src_u->w64[i];
6006  acc |= dst_u->w64[i+1] ^= src_u->w64[i+1];
6007  acc |= dst_u->w64[i+2] ^= src_u->w64[i+2];
6008  acc |= dst_u->w64[i+3] ^= src_u->w64[i+3];
6009  }
6010 #endif
6011  return acc;
6012 }
6013 
6014 /*!
6015  \brief bitblock AND NOT with constant ~0 mask operation.
6016 
6017  \param dst - destination block.
6018  \param src - source block.
6019 
6020  @ingroup bitfunc
6021 */
6022 inline
6024  const bm::word_t* BMRESTRICT src)
6025 {
6026  const bm::word_t* BMRESTRICT src_end = src + bm::set_block_size;
6027 #ifdef BMVECTOPT
6028  VECT_ANDNOT_ARR_2_MASK(dst, src, src_end, ~0u);
6029 #else
6030  bm::wordop_t* dst_ptr = (wordop_t*)dst;
6031  const bm::wordop_t* wrd_ptr = (wordop_t*) src;
6032  const bm::wordop_t* wrd_end = (wordop_t*) src_end;
6033 
6034  do
6035  {
6036  dst_ptr[0] = bm::all_bits_mask & ~wrd_ptr[0];
6037  dst_ptr[1] = bm::all_bits_mask & ~wrd_ptr[1];
6038  dst_ptr[2] = bm::all_bits_mask & ~wrd_ptr[2];
6039  dst_ptr[3] = bm::all_bits_mask & ~wrd_ptr[3];
6040  dst_ptr+=4; wrd_ptr+=4;
6041  } while (wrd_ptr < wrd_end);
6042 #endif
6043 }
6044 
6045 /*!
6046  \brief bitblock XOR operation.
6047 
6048  \param dst - destination block.
6049  \param src - source block.
6050 
6051  \returns pointer on destination block.
6052  If returned value equal to src means that block mutation requested.
6053  NULL is valid return value.
6054 
6055  @ingroup bitfunc
6056 */
6057 inline
6059  const bm::word_t* BMRESTRICT src)
6060 {
6061  BM_ASSERT(dst || src);
6062  if (src == dst) return 0; // XOR rule
6063 
6064  bm::word_t* ret = dst;
6065 
6066  if (IS_VALID_ADDR(dst)) // The destination block already exists
6067  {
6068  if (!src) return dst;
6069 
6070  bit_block_xor(dst, src);
6071  }
6072  else // The destination block does not exist yet
6073  {
6074  if (!src) return dst; // 1 xor 0 = 1
6075 
6076  // Here we have two cases:
6077  // if dest block is full or zero if zero we need to copy the source
6078  // otherwise XOR loop against 0xFF...
6079  //BM_ASSERT(dst == 0);
6080  return const_cast<bm::word_t*>(src); // src is the final result
6081  }
6082  return ret;
6083 }
6084 
6085 /*!
6086  \brief Performs bitblock XOR operation and calculates bitcount of the result.
6087 
6088  \param src1 - bit block start ptr
6089  \param src1_end - bit block end ptr
6090  \param src2 - second bit block
6091 
6092  \returns bitcount value
6093 
6094  @ingroup bitfunc
6095 */
6096 inline
6098  const bm::word_t* BMRESTRICT src2)
6099 {
6100  if (IS_EMPTY_BLOCK(src1) || IS_EMPTY_BLOCK(src2))
6101  {
6102  if (IS_EMPTY_BLOCK(src1) && IS_EMPTY_BLOCK(src2))
6103  return 0;
6104  const bm::word_t* block = IS_EMPTY_BLOCK(src1) ? src2 : src1;
6105  return bit_block_count(block);
6106  }
6107  return bit_block_xor_count(src1, src2);
6108 }
6109 
6110 /*!
6111  \brief Performs bitblock XOR operation test.
6112 
6113  \param src1 - bit block start ptr
6114  \param src2 - second bit block ptr
6115 
6116  \returns non zero value if there are bits
6117 
6118  @ingroup bitfunc
6119 */
6120 inline
6122  const bm::word_t* BMRESTRICT src2)
6123 {
6124  if (IS_EMPTY_BLOCK(src1) || IS_EMPTY_BLOCK(src2))
6125  {
6126  if (IS_EMPTY_BLOCK(src1) && IS_EMPTY_BLOCK(src2))
6127  return 0;
6128  const bm::word_t* block = IS_EMPTY_BLOCK(src1) ? src2 : src1;
6129  return !bit_is_all_zero(block);
6130  }
6131  return bit_block_xor_any(src1, src2);
6132 }
6133 
6134 
6135 
6136 /**
6137  \brief Inspects block for full zero words
6138 
6139  \param blk - bit block pointer
6140  \param data_size - data size
6141 
6142  \return size of all non-zero words
6143 
6144  @ingroup bitfunc
6145 */
6146 
6147 template<class T>
6148 unsigned bit_count_nonzero_size(const T* blk,
6149  unsigned data_size)
6150 {
6151  BM_ASSERT(blk && data_size);
6152  unsigned count = 0;
6153  const T* blk_end = blk + data_size - 2;
6154 
6155  do
6156  {
6157  if (*blk == 0)
6158  {
6159  // scan fwd to find 0 island length
6160  const T* blk_j = blk + 1;
6161  for (; blk_j < blk_end; ++blk_j)
6162  {
6163  if (*blk_j != 0)
6164  break;
6165  }
6166  blk = blk_j-1;
6167  count += (unsigned)sizeof(gap_word_t);
6168  }
6169  else
6170  {
6171  // scan fwd to find non-0 island length
6172  const T* blk_j = blk + 1;
6173  for ( ; blk_j < blk_end; ++blk_j)
6174  {
6175  if (*blk_j == 0)
6176  {
6177  // look ahead to identify and ignore short 0-run
6178  if (blk_j[1] | blk_j[2])
6179  {
6180  // skip zero word
6181  ++blk_j;
6182  continue;
6183  }
6184  break;
6185  }
6186  }
6187  count += unsigned(sizeof(gap_word_t));
6188  // count all bit-words now
6189  count += unsigned(blk_j - blk) * unsigned(sizeof(T));
6190  blk = blk_j;
6191  }
6192  ++blk;
6193  }
6194  while(blk < blk_end);
6195 
6196  return count + unsigned(2 * sizeof(T));
6197 }
6198 
6199 
6200 /**
6201  \brief Searches for the next 1 bit in the BIT block
6202  \param data - BIT buffer
6203  \param nbit - bit index to start checking from
6204  \param prev - returns previously checked value
6205 
6206  @ingroup bitfunc
6207 */
6208 inline
6210  unsigned nbit,
6211  bm::id_t* prev)
6212 {
6213  bm::id_t p = *prev;
6214  int found = 0;
6215 
6216  for(;;)
6217  {
6218  unsigned nword = nbit >> bm::set_word_shift;
6219  if (nword >= bm::set_block_size)
6220  break;
6221 
6222  bm::word_t val = data[nword] >> (p & bm::set_word_mask);
6223  if (val)
6224  {
6225  unsigned trail_z = bm::bit_scan_forward32(val);
6226  p += trail_z;
6227  ++found;
6228  break;
6229  }
6230  else
6231  {
6232  p += (bm::set_word_mask + 1) - (nbit & bm::set_word_mask);
6233  nbit += (bm::set_word_mask + 1) - (nbit & bm::set_word_mask);
6234  }
6235  }
6236  *prev = p;
6237  return found;
6238 }
6239 
6240 /*!
6241  \brief BIT block find the last set bit (backward search)
6242 
6243  \param block - bit block buffer pointer
6244  \param last - index of the last 1 bit (out)
6245  \return 0 is not found
6246 
6247  @ingroup bitfunc
6248 */
6249 inline
6250 unsigned bit_find_last(const bm::word_t* block, unsigned* last)
6251 {
6252  BM_ASSERT(block);
6253  BM_ASSERT(last);
6254 
6255  for (unsigned i = bm::set_block_size-1; true; --i)
6256  {
6257  bm::word_t w = block[i];
6258  if (w)
6259  {
6260  unsigned idx = bm::bit_scan_reverse(w);
6261  *last = unsigned(idx + (i * 8u * sizeof(bm::word_t)));
6262  return w;
6263  }
6264  if (i == 0)
6265  break;
6266  } // for i
6267  return 0u;
6268 }
6269 
6270 /*!
6271  \brief BIT block find the first set bit
6272 
6273  \param block - bit block buffer pointer
6274  \param first - index of the first 1 bit (out)
6275  \return 0 if not found
6276 
6277  @ingroup bitfunc
6278 */
6279 inline
6280 unsigned bit_find_first(const bm::word_t* block, unsigned* first)
6281 {
6282  BM_ASSERT(block);
6283  BM_ASSERT(first);
6284 
6285  for (unsigned i = 0; i < bm::set_block_size; ++i)
6286  {
6287  bm::word_t w = block[i];
6288  if (w)
6289  {
6290  unsigned idx = bit_scan_forward32(w); // trailing zeros
6291  *first = unsigned(idx + (i * 8u * sizeof(bm::word_t)));
6292  return w;
6293  }
6294  } // for i
6295  return 0u;
6296 }
6297 
6298 /*!
6299  \brief BIT block find the first set bit
6300 
6301  \param block - bit block buffer pointer
6302  \param first - index of the first 1 bit (out)
6303  \param digest - known digest of dst block
6304 
6305  \return 0 if not found
6306 
6307  @ingroup bitfunc
6308 */
6309 inline
6310 unsigned bit_find_first(const bm::word_t* block,
6311  unsigned* first,
6312  bm::id64_t digest)
6313 {
6314  BM_ASSERT(block);
6315  BM_ASSERT(first);
6316  BM_ASSERT(digest);
6317 
6318  bm::id64_t t = bm::bmi_blsi_u64(digest); // d & -d;
6319 
6320  unsigned wave = bm::word_bitcount64(t - 1);
6321  unsigned off = wave * bm::set_block_digest_wave_size;
6322  for (unsigned i = off; i < bm::set_block_size; ++i)
6323  {
6324  bm::word_t w = block[i];
6325  if (w)
6326  {
6327  unsigned idx = bit_scan_forward32(w); // trailing zeros
6328  *first = unsigned(idx + (i * 8u * sizeof(bm::word_t)));
6329  return w;
6330  }
6331  } // for i
6332  return 0u;
6333 }
6334 
6335 
6336 /*!
6337  \brief BIT block find the first set bit if only 1 bit is set
6338 
6339  \param block - bit block buffer pointer
6340  \param first - index of the first 1 bit (out)
6341  \param digest - known digest of dst block
6342 
6343  \return 0 if not found
6344 
6345  @ingroup bitfunc
6346 */
6347 inline
6349  unsigned* first,
6350  bm::id64_t digest)
6351 {
6352  BM_ASSERT(block);
6353  BM_ASSERT(first);
6354 
6355  unsigned bc = bm::word_bitcount64(digest);
6356  if (bc != 1)
6357  return false;
6358 
6359  bool found = false;
6360  bm::id64_t t = bm::bmi_blsi_u64(digest); // d & -d;
6361 
6362  unsigned wave = bm::word_bitcount64(t - 1);
6363  unsigned off = wave * bm::set_block_digest_wave_size;
6364  unsigned i;
6365  for (i = off; i < off + bm::set_block_digest_wave_size; ++i)
6366  {
6367  bm::word_t w = block[i];
6368  if (w)
6369  {
6370  bc = bm::word_bitcount(w);
6371  if (bc != 1)
6372  return false;
6373 
6374  unsigned idx = bit_scan_forward32(w); // trailing zeros
6375  *first = unsigned(idx + (i * 8u * sizeof(bm::word_t)));
6376  found = true;
6377  break;
6378  }
6379  } // for i
6380 
6381  // check if all other bits are zero
6382  for (++i; i < off + bm::set_block_digest_wave_size; ++i)
6383  {
6384  if (block[i])
6385  return false;
6386  }
6387  return found;
6388 }
6389 
6390 
6391 /*!
6392  \brief BIT block find position for the rank
6393 
6394  \param block - bit block buffer pointer
6395  \param rank - rank to find (must be > 0)
6396  \param nbit_from - start bit position in block
6397  \param nbit_pos - (out)found position
6398 
6399  \return 0 if position with rank was found, or
6400  the remaining rank (rank - population count)
6401 
6402  @ingroup bitfunc
6403 */
6404 inline
6405 bm::id_t bit_find_rank(const bm::word_t* const block,
6406  bm::id_t rank,
6407  unsigned nbit_from,
6408  unsigned& nbit_pos)
6409 {
6410  BM_ASSERT(block);
6411  BM_ASSERT(rank);
6412 
6413  unsigned nword = nbit_from >> bm::set_word_shift;
6414 
6415  BM_ASSERT(nword < bm::set_block_size);
6416 
6417  unsigned pos = nbit_from;
6418  bm::id_t nbit = (nbit_from & bm::set_word_mask);
6419 
6420  if (nbit)
6421  {
6422  bm::id_t w = block[nword];
6423  w >>= nbit;
6424  unsigned bc = bm::word_bitcount(w);
6425  if (bc < rank) // skip this
6426  {
6427  rank -= bc; pos += unsigned(32u - nbit);
6428  ++nword;
6429  }
6430  else // target word
6431  {
6432  unsigned idx = bm::word_select64(w, rank);
6433  nbit_pos = pos + idx;
6434  return 0;
6435  }
6436  }
6437 
6438  if (bm::conditional<sizeof(void*) == 8>::test()) // 64-bit fast scan
6439  {
6440  for (; nword < bm::set_block_size-1; nword+=2)
6441  {
6442  bm::id64_t w = (bm::id64_t(block[nword+1]) << 32) | bm::id64_t(block[nword]);
6443  bm::id_t bc = bm::word_bitcount64(w);
6444  if (bc >= rank) // target
6445  {
6446  unsigned idx = bm::word_select64(w, rank);
6447  nbit_pos = pos + idx;
6448  return 0;
6449  }
6450  rank -= bc;
6451  pos += 64u;
6452  }
6453  }
6454 
6455  for (; nword < bm::set_block_size; ++nword)
6456  {
6457  bm::id_t w = block[nword];
6458  bm::id_t bc = bm::word_bitcount(w);
6459  if (rank > bc)
6460  {
6461  rank -= bc; pos += 32u;
6462  continue;
6463  }
6464  unsigned idx = bm::word_select64(w, rank);
6465  nbit_pos = pos + idx;
6466  return 0;
6467  } // for nword
6468  return rank;
6469 }
6470 
6471 /**
6472  \brief Find rank in block (GAP or BIT)
6473 
6474  \param block - bit block buffer pointer
6475  \param rank - rank to find (must be > 0)
6476  \param nbit_from - start bit position in block
6477  \param nbit_pos - found position
6478 
6479  \return 0 if position with rank was found, or
6480  the remaining rank (rank - population count)
6481 
6482  @internal
6483 */
6484 inline
6486  bm::id_t rank,
6487  unsigned nbit_from,
6488  unsigned& nbit_pos)
6489 {
6490  if (BM_IS_GAP(block))
6491  {
6492  const bm::gap_word_t* const gap_block = BMGAP_PTR(block);
6493  rank = bm::gap_find_rank(gap_block, rank, nbit_from, nbit_pos);
6494  }
6495  else
6496  {
6497  rank = bm::bit_find_rank(block, rank, nbit_from, nbit_pos);
6498  }
6499  return rank;
6500 }
6501 
6502 
6503 
6504 /*!
6505  @brief Choose best representation for a bit-block
6506  @ingroup bitfunc
6507 */
6508 inline
6510  unsigned total_possible_bitcount,
6511  unsigned gap_count,
6512  unsigned block_size)
6513 {
6514  unsigned arr_size = unsigned(sizeof(bm::gap_word_t) * bit_count + sizeof(bm::gap_word_t));
6515  unsigned gap_size = unsigned(sizeof(bm::gap_word_t) * gap_count + sizeof(bm::gap_word_t));
6516  unsigned inv_arr_size = unsigned(sizeof(bm::gap_word_t) * (total_possible_bitcount - bit_count) + sizeof(bm::gap_word_t));
6517 
6518  if ((gap_size < block_size) && (gap_size < arr_size) && (gap_size < inv_arr_size))
6519  {
6520  return bm::set_gap;
6521  }
6522 
6523  if (arr_size < inv_arr_size)
6524  {
6525  if ((arr_size < block_size) && (arr_size < gap_size))
6526  {
6527  return bm::set_array1;
6528  }
6529  }
6530  else
6531  {
6532  if ((inv_arr_size < block_size) && (inv_arr_size < gap_size))
6533  {
6534  return bm::set_array0;
6535  }
6536  }
6537  return bm::set_bitset;
6538 }
6539 
6540 /*!
6541  @brief Convert bit block into an array of ints corresponding to 1 bits
6542  @ingroup bitfunc
6543 */
6544 template<typename T>
6546  const unsigned* BMRESTRICT src,
6547  bm::id_t bits,
6548  unsigned dest_len,
6549  unsigned mask = 0)
6550 {
6551  T* BMRESTRICT pcurr = dest;
6552  for (unsigned bit_idx=0; bit_idx < bits; ++src,bit_idx += unsigned(sizeof(*src) * 8))
6553  {
6554  unsigned val = *src ^ mask; // invert value by XOR 0xFF..
6555  if (val == 0)
6556  continue;
6557  if (pcurr + sizeof(val)*8 >= dest + dest_len) // insufficient space
6558  return 0;
6559  // popscan loop to decode bits in a word
6560  while (val)
6561  {
6562  bm::id_t t = val & -val;
6563  *pcurr++ = (T)(bm::word_bitcount(t - 1) + bit_idx);
6564  val &= val - 1;
6565  }
6566  } // for
6567  return (T)(pcurr - dest);
6568 }
6569 
6570 /**
6571  \brief Checks all conditions and returns true if block consists of only 0 bits
6572  \param blk - Blocks's pointer
6573  \param deep_scan - flag to do full bit block verification (scan)
6574  when deep scan is not requested result can be approximate
6575  \returns true if all bits are in the block are 0
6576 
6577  @internal
6578 */
6579 inline
6580 bool check_block_zero(const bm::word_t* blk, bool deep_scan)
6581 {
6582  if (!blk) return true;
6583 
6584  bool ret;
6585  if (BM_IS_GAP(blk))
6586  ret = gap_is_all_zero(BMGAP_PTR(blk));
6587  else
6588  ret = deep_scan ? bm::bit_is_all_zero(blk) : 0;
6589  return ret;
6590 }
6591 
6592 
6593 /**
6594  \brief Checks if block has only 1 bits
6595  \param blk - Block's pointer
6596  \param deep_scan - flag to do full bit block verification (scan)
6597  when deep scan is not requested result can be approximate
6598  \return true if block consists of 1 bits.
6599 
6600  @internal
6601 */
6602 inline
6603 bool check_block_one(const bm::word_t* blk, bool deep_scan)
6604 {
6605  if (blk == 0) return false;
6606 
6607  if (BM_IS_GAP(blk))
6608  return bm::gap_is_all_one(BMGAP_PTR(blk));
6609 
6610  if (IS_FULL_BLOCK(blk))
6611  return true;
6612 
6613  if (!deep_scan)
6614  return false; // block exists - presume it has 0 bits
6615 
6616  return bm::is_bits_one((wordop_t*)blk);
6617 }
6618 
6619 
6620 
6621 /*! @brief Calculates memory overhead for number of gap blocks sharing
6622  the same memory allocation table (level lengths table).
6623  @ingroup gapfunc
6624 */
6625 template<typename T>
6626 unsigned gap_overhead(const T* length,
6627  const T* length_end,
6628  const T* glevel_len)
6629 {
6630  BM_ASSERT(length && length_end && glevel_len);
6631 
6632  unsigned overhead = 0;
6633  for (;length < length_end; ++length)
6634  {
6635  unsigned len = *length;
6636  int level = gap_calc_level(len, glevel_len);
6637  BM_ASSERT(level >= 0 && level < (int)bm::gap_levels);
6638  unsigned capacity = glevel_len[level];
6639  BM_ASSERT(capacity >= len);
6640  overhead += capacity - len;
6641  }
6642  return overhead;
6643 }
6644 
6645 
6646 /*! @brief Finds optimal gap blocks lengths.
6647  @param length - first element of GAP lengths array
6648  @param length_end - end of the GAP lengths array
6649  @param glevel_len - destination GAP lengths array
6650  @ingroup gapfunc
6651 */
6652 template<typename T>
6653 bool improve_gap_levels(const T* length,
6654  const T* length_end,
6655  T* glevel_len)
6656 {
6657  BM_ASSERT(length && length_end && glevel_len);
6658 
6659  size_t lsize = size_t(length_end - length);
6660 
6661  BM_ASSERT(lsize);
6662 
6663  gap_word_t max_len = 0;
6664  unsigned i;
6665  for (i = 0; i < lsize; ++i)
6666  {
6667  if (length[i] > max_len)
6668  max_len = length[i];
6669  }
6670  if (max_len < 5 || lsize <= bm::gap_levels)
6671  {
6672  glevel_len[0] = T(max_len + 4);
6673  for (i = 1; i < bm::gap_levels; ++i)
6674  {
6675  glevel_len[i] = bm::gap_max_buff_len;
6676  }
6677  return true;
6678  }
6679 
6680  glevel_len[bm::gap_levels-1] = T(max_len + 5);
6681 
6682  unsigned min_overhead = gap_overhead(length, length_end, glevel_len);
6683  bool is_improved = false;
6684 
6685  // main problem solving loop
6686  //
6687  for (i = bm::gap_levels-2; ; --i)
6688  {
6689  unsigned opt_len = 0;
6690  unsigned j;
6691  bool imp_flag = false;
6692  gap_word_t gap_saved_value = glevel_len[i];
6693  for (j = 0; j < lsize; ++j)
6694  {
6695  glevel_len[i] = T(length[j] + 4);
6696  unsigned ov = gap_overhead(length, length_end, glevel_len);
6697  if (ov <= min_overhead)
6698  {
6699  min_overhead = ov;
6700  opt_len = length[j]+4;
6701  imp_flag = true;
6702  }
6703  }
6704  if (imp_flag)
6705  {
6706  glevel_len[i] = (T)opt_len; // length[opt_idx]+4;
6707  is_improved = true;
6708  }
6709  else
6710  {
6711  glevel_len[i] = gap_saved_value;
6712  }
6713  if (i == 0)
6714  break;
6715  }
6716 
6717  // Remove duplicates
6718  //
6719  T val = *glevel_len;
6720  T* gp = glevel_len;
6721  T* res = glevel_len;
6722  for (i = 0; i < bm::gap_levels; ++i)
6723  {
6724  if (val != *gp)
6725  {
6726  val = *gp;
6727  *++res = val;
6728  }
6729  ++gp;
6730  }
6731 
6732  // Filling the "unused" part with max. possible value
6733  while (++res < (glevel_len + bm::gap_levels))
6734  {
6735  *res = bm::gap_max_buff_len;
6736  }
6737 
6738  return is_improved;
6739 
6740 }
6741 
6742 
6743 
6744 /**
6745  Bit-block get adapter, takes bitblock and represents it as a
6746  get_32() accessor function
6747  \internal
6748 */
6750 {
6751 public:
6752  bitblock_get_adapter(const bm::word_t* bit_block) : b_(bit_block) {}
6753 
6755  bm::word_t get_32() { return *b_++; }
6756 private:
6757  const bm::word_t* b_;
6758 };
6759 
6760 
6761 /**
6762  Bit-block store adapter, takes bitblock and saves results into it
6763  \internal
6764 */
6766 {
6767 public:
6768  bitblock_store_adapter(bm::word_t* bit_block) : b_(bit_block) {}
6770  void push_back(bm::word_t w) { *b_++ = w; }
6771 private:
6772  bm::word_t* b_;
6773 };
6774 
6775 /**
6776  Bit-block sum adapter, takes values and sums it
6777  /internal
6778 */
6780 {
6781 public:
6782  bitblock_sum_adapter() : sum_(0) {}
6784  void push_back(bm::word_t w) { this->sum_+= w; }
6785  /// Get accumulated sum
6786  bm::word_t sum() const { return this->sum_; }
6787 private:
6788  bm::word_t sum_;
6789 };
6790 
6791 /**
6792  Adapter to get words from a range stream
6793  (see range serialized bit-block)
6794  \internal
6795 */
6796 template<class DEC> class decoder_range_adapter
6797 {
6798 public:
6799  decoder_range_adapter(DEC& dec, unsigned from_idx, unsigned to_idx)
6800  : decoder_(dec),
6801  from_(from_idx),
6802  to_(to_idx),
6803  cnt_(0)
6804  {}
6805 
6807  {
6808  if (cnt_ < from_ || cnt_ > to_)
6809  {
6810  ++cnt_; return 0;
6811  }
6812  ++cnt_;
6813  return decoder_.get_32();
6814  }
6815 
6816 private:
6817  DEC& decoder_;
6818  unsigned from_;
6819  unsigned to_;
6820  unsigned cnt_;
6821 };
6822 
6823 
6824 /*!
6825  Abstract recombination algorithm for two bit-blocks
6826  Bit blocks can come as dserialization decoders or bit-streams
6827 */
6828 template<class It1, class It2, class BinaryOp, class Encoder>
6829 void bit_recomb(It1& it1, It2& it2,
6830  BinaryOp& op,
6831  Encoder& enc,
6832  unsigned block_size = bm::set_block_size)
6833 {
6834  for (unsigned i = 0; i < block_size; ++i)
6835  {
6836  bm::word_t w1 = it1.get_32();
6837  bm::word_t w2 = it2.get_32();
6838  bm::word_t w = op(w1, w2);
6839  enc.push_back( w );
6840  } // for
6841 }
6842 
6843 /// Bit AND functor
6844 template<typename W> struct bit_AND
6845 {
6846  W operator()(W w1, W w2) { return w1 & w2; }
6847 };
6848 
6849 /// Bit OR functor
6850 template<typename W> struct bit_OR
6851 {
6852  W operator()(W w1, W w2) { return w1 | w2; }
6853 };
6854 
6855 /// Bit SUB functor
6856 template<typename W> struct bit_SUB
6857 {
6858  W operator()(W w1, W w2) { return w1 & ~w2; }
6859 };
6860 
6861 /// Bit XOR functor
6862 template<typename W> struct bit_XOR
6863 {
6864  W operator()(W w1, W w2) { return w1 ^ w2; }
6865 };
6866 
6867 /// Bit ASSIGN functor
6868 template<typename W> struct bit_ASSIGN
6869 {
6870  W operator()(W, W w2) { return w2; }
6871 };
6872 
6873 /// Bit COUNT functor
6874 template<typename W> struct bit_COUNT
6875 {
6876  W operator()(W w1, W w2)
6877  {
6878  w1 = 0;
6879  BM_INCWORD_BITCOUNT(w1, w2);
6880  return w1;
6881  }
6882 };
6883 
6884 /// Bit COUNT AND functo