1 #ifndef BMFUNC__H__INCLUDED__ 2 #define BMFUNC__H__INCLUDED__ 32 # pragma warning( disable: 4146 ) 71 memory_used += mem_used;
72 max_serialize_mem += mem_used;
79 size_t mem_used = (capacity *
sizeof(
gap_word_t));
80 memory_used += mem_used;
81 max_serialize_mem += (unsigned)(length *
sizeof(
gap_word_t));
83 gap_cap_overhead += (capacity - length) *
sizeof(
gap_word_t);
86 if (capacity == gap_levels[i])
98 bit_blocks = gap_blocks = ptr_sub_blocks = bv_count = 0;
99 max_serialize_mem = memory_used = gap_cap_overhead = 0;
101 gaps_by_level[i] = 0ull;
107 bit_blocks += st.bit_blocks;
108 gap_blocks += st.gap_blocks;
109 ptr_sub_blocks += st.ptr_sub_blocks;
110 bv_count += st.bv_count;
111 max_serialize_mem += st.max_serialize_mem + 8;
112 memory_used += st.memory_used;
113 gap_cap_overhead += st.gap_cap_overhead;
120 template<
typename First,
typename Second>
126 pair(First f, Second s) : first(f), second(s) {}
134 unsigned short bits[65];
146 template<
typename BI_TYPE>
158 template<
typename RTYPE>
168 template<
typename RTYPE>
171 RTYPE idx = bm::get_super_block_start<RTYPE>(i);
201 #if defined(BMSSE42OPT) || defined(BMAVX2OPT) || defined(BMAVX512OPT) 204 #if defined(BM_USE_GCC_BUILD) 205 return (
bm::id_t)__builtin_popcount(w);
221 tmp = n - ((n >> 1) & 033333333333)
222 - ((n >> 2) & 011111111111);
223 return ((tmp + (tmp >> 3)) & 030707070707) % 63;
234 #if defined(BMSSE42OPT) || defined(BMAVX2OPT) || defined(BMAVX512OPT) 235 #if defined(BM64_SSE4) || defined(BM64_AVX2) || defined(BM64_AVX512) 236 return unsigned(_mm_popcnt_u64(x));
238 return _mm_popcnt_u32(x >> 32) + _mm_popcnt_u32((
unsigned)x);
241 #if defined(BM_USE_GCC_BUILD) 242 return (
unsigned)__builtin_popcountll(x);
244 #if (defined(__arm__)) // 32-bit 247 x = x - ((x >> 1) & 0x5555555555555555);
248 x = (x & 0x3333333333333333) + ((x >> 2) & 0x3333333333333333);
249 x = (x + (x >> 4)) & 0x0F0F0F0F0F0F0F0F;
272 x = x - ((x >> 1) & m1);
273 y = y - ((y >> 1) & m1);
274 u = u - ((u >> 1) & m1);
275 v = v - ((v >> 1) & m1);
276 x = (x & m2) + ((x >> 2) & m2);
277 y = (y & m2) + ((y >> 2) & m2);
278 u = (u & m2) + ((u >> 2) & m2);
279 v = (v & m2) + ((v >> 2) & m2);
282 x = (x & m3) + ((x >> 4) & m3);
283 u = (u & m3) + ((u >> 4) & m3);
289 return x & 0x000001FFU;
304 template<
typename T,
typename F>
307 for (
unsigned sub_octet = 0; w != 0; w >>= 4, sub_octet += 4)
320 func(sub_octet, sub_octet + 1);
326 func(sub_octet, sub_octet + 2);
329 func(sub_octet + 1, sub_octet + 2);
332 func(sub_octet, sub_octet + 1, sub_octet + 2);
338 func(sub_octet, sub_octet + 3);
341 func(sub_octet + 1, sub_octet + 3);
344 func(sub_octet, sub_octet + 1, sub_octet + 3);
347 func(sub_octet + 2, sub_octet + 3);
350 func(sub_octet, sub_octet + 2, sub_octet + 3);
353 func(sub_octet + 1, sub_octet + 2, sub_octet + 3);
356 func(sub_octet, sub_octet + 1, sub_octet + 2, sub_octet + 3);
374 template<
typename T,
typename F>
378 for (
unsigned octet = 0; w != 0; w >>= 8, octet += 8)
380 if (w & 1) func(octet + 0);
381 if (w & 2) func(octet + 1);
382 if (w & 4) func(octet + 2);
383 if (w & 8) func(octet + 3);
384 if (w & 16) func(octet + 4);
385 if (w & 32) func(octet + 5);
386 if (w & 64) func(octet + 6);
387 if (w & 128) func(octet + 7);
401 for (
unsigned sub_octet = 0; w; w >>= 4, sub_octet+=4)
408 bits[cnt++] = 0 + sub_octet;
411 bits[cnt++] = 1 + sub_octet;
414 bits[cnt] = 0 + sub_octet;
415 bits[cnt+1] = 1 + sub_octet;
419 bits[cnt++] = 2 + sub_octet;
422 bits[cnt+0] = 0 + sub_octet;
423 bits[cnt+1] = 2 + sub_octet;
427 bits[cnt+0] = 1 + sub_octet;
428 bits[cnt+1] = 2 + sub_octet;
432 bits[cnt+0] = 0 + sub_octet;
433 bits[cnt+1] = 1 + sub_octet;
434 bits[cnt+2] = 2 + sub_octet;
438 bits[cnt++] = 3 + sub_octet;
441 bits[cnt+0] = 0 + sub_octet;
442 bits[cnt+1] = 3 + sub_octet;
446 bits[cnt+0] = 1 + sub_octet;
447 bits[cnt+1] = 3 + sub_octet;
451 bits[cnt+0] = 0 + sub_octet;
452 bits[cnt+1] = 1 + sub_octet;
453 bits[cnt+2] = 3 + sub_octet;
457 bits[cnt+0] = 2 + sub_octet;
458 bits[cnt+1] = 3 + sub_octet;
462 bits[cnt+0] = 0 + sub_octet;
463 bits[cnt+1] = 2 + sub_octet;
464 bits[cnt+2] = 3 + sub_octet;
468 bits[cnt+0] = 1 + sub_octet;
469 bits[cnt+1] = 2 + sub_octet;
470 bits[cnt+2] = 3 + sub_octet;
474 bits[cnt+0] = 0 + sub_octet;
475 bits[cnt+1] = 1 + sub_octet;
476 bits[cnt+2] = 2 + sub_octet;
477 bits[cnt+3] = 3 + sub_octet;
487 #ifdef BM_NONSTANDARD_EXTENTIONS 495 unsigned bitscan_nibble_gcc(
unsigned w,
unsigned* bits) BMNOEXCEPT
497 static void* d_table[] = { &&l0,
498 &&l1, &&l3_1, &&l3, &&l7_1, &&l5, &&l7_0, &&l7, &&l15_1,
499 &&l9, &&l11_0, &&l11, &&l15_0, &&l13, &&l14, &&l15 };
502 for (
unsigned sub_octet = 0; w; w >>= 4, sub_octet+=4)
504 goto *d_table[w & 15];
506 bits[cnt++] = sub_octet;
509 bits[cnt++] = sub_octet;
511 bits[cnt++] = 1 + sub_octet;
514 bits[cnt++] = sub_octet;
517 bits[cnt++] = sub_octet;
519 bits[cnt++] = 1 + sub_octet;
521 bits[cnt++] = 2 + sub_octet;
524 bits[cnt++] = sub_octet;
528 bits[cnt++] = sub_octet;
530 bits[cnt++] = 1 + sub_octet;
531 bits[cnt++] = 3 + sub_octet;
534 bits[cnt++] = sub_octet;
537 bits[cnt++] = 1 + sub_octet;
540 bits[cnt++] = 0 + sub_octet;
541 bits[cnt++] = 1 + sub_octet;
543 bits[cnt++] = 2 + sub_octet;
545 bits[cnt++] = 3 + sub_octet;
567 void operator()(
unsigned bit_idx) BMNOEXCEPT { *bp_++ = (B)bit_idx; }
570 unsigned bit_idx1) BMNOEXCEPT
572 bp_[0] = (B)bit_idx0; bp_[1] = (B)bit_idx1;
578 unsigned bit_idx2) BMNOEXCEPT
580 bp_[0] = (B)bit_idx0; bp_[1] = (B)bit_idx1; bp_[2] = (B)bit_idx2;
587 unsigned bit_idx3) BMNOEXCEPT
589 bp_[0] = (B)bit_idx0; bp_[1] = (B)bit_idx1;
590 bp_[2] = (B)bit_idx2; bp_[3] = (B)bit_idx3;
610 template<
typename T,
typename B>
615 return (
unsigned)(func.
ptr() - bits);
628 template<
typename T,
typename B>
633 return (
unsigned)(func.
ptr() - bits);
657 return (
unsigned short)pos;
679 return (
unsigned short)pos;
693 unsigned short pos = 0;
715 unsigned short pos = 0;
724 template<
typename B,
typename OT>
725 unsigned short bitscan_bsf(
unsigned w, B* bits, OT offs) BMNOEXCEPT
727 unsigned short pos = 0;
748 unsigned short pos = 0;
770 unsigned short pos = 0;
784 template<
typename V,
typename B>
785 unsigned short bitscan(V w, B* bits) BMNOEXCEPT
787 #if (defined(__arm__) || defined(__aarch64__)) 819 for (
unsigned count = 0; w; w >>=1ull, ++count)
821 rank -= unsigned(w & 1ull);
955 #if defined(BMI2_SELECT64) 956 return BMI2_SELECT64(w, rank);
958 #if defined(BMI1_SELECT64) 959 return BMI2_SELECT64(w, rank);
961 #if (defined(__arm__) || defined(__aarch64__)) 982 #if defined(BMI2_SELECT64) 983 return BMI2_SELECT64(w, rank);
985 #if defined(BMI1_SELECT64) 986 return BMI2_SELECT64(w, rank);
988 #if (defined(__arm__) || defined(__aarch64__)) 1025 for (
unsigned i = from; i <= to; ++i)
1026 m |= (1ull << (i / 1024));
1048 mask = (mask >> (63 - (digest_to - digest_from))) << digest_from;
1070 unsigned bitpos_from,
unsigned bitpos_to) BMNOEXCEPT
1073 return !(digest & mask);
1088 for (
unsigned i = 0; i < 64; ++i)
1092 #if defined(VECT_BLOCK_SET_DIGEST) 1097 block[off+j+0] = block[off+j+1] =
1098 block[off+j+2] = block[off+j+3] = mask;
1120 for (
unsigned i = 0; i < 64; ++i)
1123 #if defined(VECT_IS_DIGEST_ZERO) 1130 bm::word_t w = block[off+j+0] | block[off+j+1] |
1131 block[off+j+2] | block[off+j+3];
1134 digest0 |= (mask << i);
1169 #if defined(VECT_IS_DIGEST_ZERO) 1171 digest &= all_zero ? ~(mask << wave) : digest;
1180 src_u->w64[j+0] | src_u->w64[j+1] | src_u->w64[j+2] | src_u->w64[j+3];
1182 }
while ((j < bm::set_block_digest_wave_size/2) & !w64);
1183 digest &= w64 ? digest : ~(mask << wave);
1233 ::memset(_p, 0xFF,
sizeof(_p));
1236 const unsigned long long magic_mask = 0xFFFFfffeFFFFfffe;
1237 ::memcpy(&_p_fullp, &magic_mask,
sizeof(magic_mask));
1239 _s[i] = reinterpret_cast<bm::word_t*>(magic_mask);
1243 const unsigned magic_mask = 0xFFFFfffe;
1244 ::memcpy(&_p_fullp, &magic_mask,
sizeof(magic_mask));
1246 _s[i] = reinterpret_cast<bm::word_t*>(magic_mask);
1259 bm::id64_t w =
reinterpret_cast<unsigned long long>(bp);
1261 ((bp == _block._p) << 1);
1262 type = type ? type : w;
1266 unsigned w =
reinterpret_cast<unsigned long>(bp);
1268 ((bp == _block._p) << 1);
1269 type = type ? type : w;
1276 {
return (bp == _block._p || bp == _block._p_fullp); }
1280 {
return (bp && !(bp == _block._p || bp == _block._p_fullp)); }
1289 template<
typename W>
1303 template<
typename N>
1311 const unsigned unroll_factor = 4;
1312 const unsigned len = (size - start);
1313 const unsigned len_unr = len - (len % unroll_factor);
1317 for (k = 0; k < len_unr; k+=unroll_factor)
1328 *pos = k + start + 1;
1333 *pos = k + start + 2;
1338 *pos = k + start + 3;
1344 for (; k < len; ++k)
1353 for (; start < size; ++start)
1383 int res = (w1 & 1) - (w2 & 1);
1384 if (res != 0)
return res;
1411 return diff? ( (a & diff & -diff)? 1 : -1 ) : 0;
1428 #if defined(VECT_IS_ZERO_BLOCK) 1435 if (blk[0] | blk[1] | blk[2] | blk[3])
1438 }
while (blk < blk_end);
1493 template<
typename T>
1497 return glevel_len[(*buf >> 1) & 3];
1509 template<
typename T>
1513 return glevel_len[(*buf >> 1) & 3]-4;
1524 template<
typename T>
1527 return T((*buf >> 1) & 3u);
1541 template<
typename T>
1547 T is_set = (*buf) & 1u;
1548 T end = T((*buf) >> 3u);
1552 is_set ^= T((end-1) & 1u);
1572 template<
typename T>
1578 T is_set = (*buf) & 1u;
1586 *first = buf[1] + 1;
1600 template<
typename T>
1602 unsigned pos,
unsigned*
BMRESTRICT is_set) BMNOEXCEPT
1605 #undef VECT_GAP_BFIND // TODO: VECTOR bfind causes performance degradation 1606 #ifdef VECT_GAP_BFIND 1609 *is_set = (*buf) & 1;
1612 unsigned end = 1 + ((*buf) >> 3);
1614 while ( start != end )
1616 unsigned curr = (start + end) >> 1;
1617 if ( buf[curr] < pos )
1622 *is_set ^= ((start-1) & 1);
1635 template<
typename T>
1641 unsigned end = 1 + ((*buf) >> 3);
1642 if (end - start < 10)
1644 unsigned sv = *buf & 1;
1645 unsigned sv1= sv ^ 1;
1646 if (buf[1] >= pos)
return sv;
1647 if (buf[2] >= pos)
return sv1;
1648 if (buf[3] >= pos)
return sv;
1649 if (buf[4] >= pos)
return sv1;
1650 if (buf[5] >= pos)
return sv;
1651 if (buf[6] >= pos)
return sv1;
1652 if (buf[7] >= pos)
return sv;
1653 if (buf[8] >= pos)
return sv1;
1654 if (buf[9] >= pos)
return sv;
1659 while (start != end)
1661 unsigned curr = (start + end) >> 1;
1662 if (buf[curr] < pos)
1668 return ((*buf) & 1) ^ ((--start) & 1);
1678 template<
typename T>
1687 #if defined(BMSSE2OPT) 1690 #elif defined(BMSSE42OPT) 1693 #elif defined(BMAVX2OPT) 1705 template<
typename T,
typename N,
typename F>
1707 N top_size, N nb_from, N nb_to, F& f) BMNOEXCEPT
1710 if (nb_from > nb_to)
1715 unsigned j_to = unsigned(nb_to & bm::set_array_mask);
1717 if (i_from >= top_size)
1719 if (i_to >= top_size)
1721 i_to = unsigned(top_size-1);
1725 for (
unsigned i = i_from; i <= i_to; ++i)
1727 T** blk_blk = root[i];
1732 unsigned j = (i == i_from) ? j_from : 0;
1733 if (!j && (i != i_to))
1740 if ((i == i_to) && (j == j_to))
1747 unsigned j = (i == i_from) ? j_from : 0;
1752 if ((i == i_to) && (j == j_to))
1762 template<
class T,
class F>
1765 typedef typename F::size_type size_type;
1766 for (
unsigned i = 0; i < size1; ++i)
1768 T** blk_blk = root[i];
1774 f.on_non_empty_top(i);
1782 }
while (++j < bm::set_sub_array_size);
1786 unsigned non_empty_top = 0;
1791 #if defined(BM64_AVX2) || defined(BM64_AVX512) 1795 T* blk0 = blk_blk[j + 0];
1796 T* blk1 = blk_blk[j + 1];
1797 T* blk2 = blk_blk[j + 2];
1798 T* blk3 = blk_blk[j + 3];
1800 size_type block_idx = r + j + 0;
1804 f.on_empty_block(block_idx);
1807 f(blk1, block_idx + 1);
1809 f.on_empty_block(block_idx + 1);
1812 f(blk2, block_idx + 2);
1814 f.on_empty_block(block_idx + 2);
1817 f(blk3, block_idx + 3);
1819 f.on_empty_block(block_idx + 3);
1823 f.on_empty_block(r + j + 0); f.on_empty_block(r + j + 1);
1824 f.on_empty_block(r + j + 2); f.on_empty_block(r + j + 3);
1827 #elif defined(BM64_SSE4) 1831 T* blk0 = blk_blk[j + 0];
1832 T* blk1 = blk_blk[j + 1];
1834 size_type block_idx = r + j + 0;
1838 f.on_empty_block(block_idx);
1844 f.on_empty_block(block_idx);
1848 f.on_empty_block(r + j + 0);
1849 f.on_empty_block(r + j + 1);
1855 f(blk_blk[j], r + j);
1859 f.on_empty_block(r + j);
1862 }
while (j < bm::set_sub_array_size);
1864 if (non_empty_top == 0)
1871 template<
class T,
class F>
1875 for (
unsigned i = 0; i < size1; ++i)
1878 if ((blk_blk = root[i])!=0)
1884 f(FULL_BLOCK_FAKE_ADDR);
1892 w0 = _mm_loadu_si128((__m128i*)(blk_blk + j));
1893 if (!_mm_testz_si128(w0, w0))
1895 T* blk0 = blk_blk[j + 0];
1896 T* blk1 = blk_blk[j + 1];
1903 w0 = _mm_loadu_si128((__m128i*)(blk_blk + j + 2));
1904 if (!_mm_testz_si128(w0, w0))
1906 T* blk0 = blk_blk[j + 2];
1907 T* blk1 = blk_blk[j + 3];
1917 #elif defined(BM64_AVX2) || defined(BM64_AVX512) 1918 for (
unsigned i = 0; i < size1; ++i)
1921 if ((blk_blk = root[i]) != 0)
1927 f(FULL_BLOCK_FAKE_ADDR);
1934 __m256i w0 = _mm256_loadu_si256((__m256i*)(blk_blk + j));
1935 if (!_mm256_testz_si256(w0, w0))
1939 T* blk0 = blk_blk[j + 0];
1940 T* blk1 = blk_blk[j + 1];
1941 T* blk2 = blk_blk[j + 2];
1942 T* blk3 = blk_blk[j + 3];
1957 #else // non-SIMD mode 1958 for (
unsigned i = 0; i < size1; ++i)
1961 if ((blk_blk = root[i])!=0)
1967 f(FULL_BLOCK_FAKE_ADDR);
1993 template<
typename T,
typename BI,
typename F>
1997 for (BI i = 0; i < size1; ++i)
1999 T** blk_blk = root[i];
2009 if (f(FULL_BLOCK_FAKE_ADDR, block_idx))
2018 if (f(blk_blk[j], block_idx))
2027 template<
class T,
class F,
typename BLOCK_IDX>
2030 BLOCK_IDX block_idx = start;
2031 for (
unsigned i = 0; i < size1; ++i)
2033 T** blk_blk = root[i];
2040 f(FULL_BLOCK_FAKE_ADDR, block_idx);
2046 f(blk_blk[j], block_idx);
2069 }
while (first < last);
2075 template<
typename T>
2079 for (;first < last; ++first)
2093 template<
typename T>
2095 T* arr0, T* arr1, T& arr0_cnt, T& arr1_cnt) BMNOEXCEPT
2097 const T* pcurr = buf;
2098 unsigned len = (*pcurr >> 3);
2099 const T* pend = pcurr + len;
2103 unsigned is_set = (*buf & 1);
2109 arr1[cnt1] = *pcurr;
2114 arr0[cnt0] = *pcurr;
2121 while (pcurr <= pend)
2124 T delta = *pcurr - prev;
2153 template<
typename T>
2156 const T* pcurr = buf;
2158 dsize = (*pcurr >> 3);
2160 const T* pend = pcurr + dsize;
2162 unsigned bits_counter = 0;
2167 bits_counter += *pcurr + 1;
2170 for (++pcurr; pcurr <= pend; pcurr += 2)
2171 bits_counter += *pcurr - *(pcurr-1);
2172 return bits_counter;
2181 template<
typename T>
2184 const T* pcurr = buf;
2185 unsigned dsize = (*pcurr >> 3);
2189 T first_one = *buf & 1;
2197 #if defined(BMAVX2OPT) || defined(BMAVX512OPT) 2200 const unsigned unr_factor = 32;
2201 unsigned waves = (dsize-2) / unr_factor;
2204 #elif defined(BMSSE42OPT) || defined(BMSSE2OPT) 2207 const unsigned unr_factor = 16;
2208 unsigned waves = (dsize - 2) / unr_factor;
2214 const unsigned unr_factor = 8;
2215 unsigned waves = (dsize - 2) / unr_factor;
2216 for (
unsigned i = 0; i < waves; i += unr_factor)
2218 cnt += pcurr[0] - pcurr[0 - 1];
2219 cnt += pcurr[2] - pcurr[2 - 1];
2220 cnt += pcurr[4] - pcurr[4 - 1];
2221 cnt += pcurr[6] - pcurr[6 - 1];
2223 pcurr += unr_factor;
2228 const T* pend = buf + dsize;
2229 for ( ; pcurr <= pend ; pcurr+=2)
2231 cnt += *pcurr - *(pcurr - 1);
2247 template<
typename T>
2249 unsigned left,
unsigned right) BMNOEXCEPT
2254 const T* pcurr = buf;
2255 const T* pend = pcurr + (*pcurr >> 3);
2257 unsigned bits_counter = 0;
2260 is_set = ~(is_set - 1u);
2262 pcurr = buf + start_pos;
2263 if (right <= *pcurr)
2265 bits_counter = unsigned(right - left + 1u) & is_set;
2266 return bits_counter;
2268 bits_counter += unsigned(*pcurr - left + 1u) & is_set;
2270 unsigned prev_gap = *pcurr++;
2271 for (is_set ^= ~0u; right > *pcurr; is_set ^= ~0u)
2273 bits_counter += (*pcurr - prev_gap) & is_set;
2275 return bits_counter;
2276 prev_gap = *pcurr++;
2278 bits_counter += unsigned(right - prev_gap) & is_set;
2279 return bits_counter;
2290 template<
typename T>
2292 unsigned left,
unsigned right) BMNOEXCEPT
2301 const T*
const pcurr = buf + start_pos;
2302 return (right <= *pcurr);
2313 template<
typename T>
2315 unsigned left,
unsigned right) BMNOEXCEPT
2322 const T*
const pcurr = buf + start_pos;
2326 if (right <= *pcurr)
2342 template<
typename T>
2344 unsigned left,
unsigned right) BMNOEXCEPT
2353 const T* pcurr = buf + start_pos;
2354 if (!is_set || (right != *pcurr) || (start_pos <= 1))
2357 if (*pcurr != left-1)
2371 template<
typename T>
2373 unsigned nbit,
unsigned*
BMRESTRICT pos) BMNOEXCEPT
2382 *pos = buf[start_pos];
2396 template<
typename T>
2398 unsigned nbit,
unsigned*
BMRESTRICT pos) BMNOEXCEPT
2411 *pos = buf[start_pos]+1;
2424 template<
typename T>
2426 unsigned nbit,
unsigned*
BMRESTRICT pos) BMNOEXCEPT
2442 *pos = buf[start_pos];
2461 template<
typename T,
typename SIZE_TYPE>
2465 unsigned& nbit_pos) BMNOEXCEPT
2470 const T* pcurr = block;
2471 const T* pend = pcurr + (*pcurr >> 3);
2473 unsigned bits_counter = 0;
2475 unsigned start_pos =
bm::gap_bfind(block, nbit_from, &is_set);
2476 is_set = ~(is_set - 1u);
2478 pcurr = block + start_pos;
2479 bits_counter += unsigned(*pcurr - nbit_from + 1u) & is_set;
2480 if (bits_counter >= rank)
2482 nbit_pos = nbit_from + unsigned(rank) - 1u;
2485 rank -= bits_counter;
2486 unsigned prev_gap = *pcurr++;
2487 for (is_set ^= ~0u; pcurr <= pend; is_set ^= ~0u)
2489 bits_counter = (*pcurr - prev_gap) & is_set;
2490 if (bits_counter >= rank)
2492 nbit_pos = prev_gap + unsigned(rank);
2495 rank -= bits_counter;
2496 prev_gap = *pcurr++;
2513 template<
typename T>
2515 bool is_corrected=
false) BMNOEXCEPT
2517 const T* pcurr = buf;
2518 const T* pend = pcurr + (*pcurr >> 3);
2520 unsigned bits_counter = 0;
2521 unsigned is_set = ~((unsigned(*buf) & 1u) - 1u);
2522 BM_ASSERT(is_set == 0u || is_set == ~0u);
2525 if (right <= *pcurr)
2527 bits_counter = (right + 1u) & is_set;
2528 bits_counter -= (is_set & unsigned(is_corrected));
2529 return bits_counter;
2531 bits_counter += (*pcurr + 1u) & is_set;
2533 unsigned prev_gap = *pcurr++;
2534 for (is_set ^= ~0u; right > *pcurr; is_set ^= ~0u)
2536 bits_counter += (*pcurr - prev_gap) & is_set;
2539 bits_counter -= (is_set & unsigned(is_corrected));
2540 return bits_counter;
2542 prev_gap = *pcurr++;
2544 bits_counter += (right - prev_gap) & is_set;
2545 bits_counter -= (is_set & unsigned(is_corrected));
2546 return bits_counter;
2559 template<
class T,
class Func>
2562 const T* pcurr = gap_buf;
2563 const T* pend = pcurr + (*pcurr >> 3);
2567 func((T)(prev + 1));
2571 func((T)(*pcurr - prev));
2573 }
while (++pcurr < pend);
2600 template<
typename T>
2602 T*
BMRESTRICT dgap_buf,
bool copy_head=
true) BMNOEXCEPT
2606 *dgap_buf++ = *gap_buf;
2610 for_each_dgap<T, d_copy_func<T> >(gap_buf, copy_func);
2626 template<
typename T>
2628 T*
BMRESTRICT gap_buf, T gap_header=0) BMNOEXCEPT
2630 const T* pcurr = dgap_buf;
2635 *gap_buf++ = *pcurr++;
2639 len = gap_header >> 3;
2640 *gap_buf++ = gap_header;
2643 const T* pend = pcurr + len;
2645 *gap_buf = *pcurr++;
2649 *gap_buf = T(*gap_buf - 1);
2651 for (++gap_buf; pcurr < pend; ++pcurr)
2653 T prev = *(gap_buf-1);
2654 *gap_buf++ = T(*pcurr + prev);
2668 template<
typename T>
2669 int gapcmp(
const T* buf1,
const T* buf2) BMNOEXCEPT
2671 const T* pcurr1 = buf1;
2672 const T* pend1 = pcurr1 + (*pcurr1 >> 3);
2673 unsigned bitval1 = *buf1 & 1;
2676 const T* pcurr2 = buf2;
2677 unsigned bitval2 = *buf2 & 1;
2680 while (pcurr1 <= pend1)
2682 if (*pcurr1 == *pcurr2)
2684 if (bitval1 != bitval2)
2686 return (bitval1) ? 1 : -1;
2691 if (bitval1 == bitval2)
2695 return (*pcurr1 < *pcurr2) ? -1 : 1;
2699 return (*pcurr1 < *pcurr2) ? 1 : -1;
2704 return (bitval1) ? 1 : -1;
2724 template<
typename T>
2731 const T* pcurr1 = buf1;
2732 const T* pend1 = pcurr1 + (*pcurr1 >> 3);
2733 const T* pcurr2 = buf2;
2734 for (++pcurr1, ++pcurr2; pcurr1 <= pend1; ++pcurr1, ++pcurr2)
2736 if (*pcurr1 != *pcurr2)
2738 *pos = 1 + ((*pcurr1 < *pcurr2) ? *pcurr1 : *pcurr2);
2764 template<
typename T,
class F>
2767 unsigned vect1_mask,
2769 unsigned vect2_mask,
2772 const T* cur1 = vect1;
2773 const T* cur2 = vect2;
2775 T bitval1 = (T)((*cur1++ & 1) ^ vect1_mask);
2776 T bitval2 = (T)((*cur2++ & 1) ^ vect2_mask);
2778 T bitval = (T) F::op(bitval1, bitval2);
2779 T bitval_prev = bitval;
2785 T c1 = *cur1; T c2 = *cur2;
2788 bitval = (T) F::op(bitval1, bitval2);
2793 res += (bitval != bitval_prev);
2794 bitval_prev = bitval;
2814 bitval1 ^= 1; bitval2 ^= 1;
2820 dlen = (unsigned)(res - dest);
2821 *dest = (T)((*dest & 7) + (dlen << 3));
2913 template<
typename T,
class F>
2915 unsigned vect1_mask,
2919 const T* cur1 = vect1;
2920 const T* cur2 = vect2;
2922 unsigned bitval1 = (*cur1++ & 1) ^ vect1_mask;
2923 unsigned bitval2 = (*cur2++ & 1) ^ vect2_mask;
2925 unsigned bitval = F::op(bitval1, bitval2);
2928 unsigned bitval_prev = bitval;
2932 bitval = F::op(bitval1, bitval2);
2936 if (bitval != bitval_prev)
2937 bitval_prev = bitval;
2957 bitval1 ^= 1; bitval2 ^= 1;
2978 template<
typename T,
class F>
2982 const T* cur1 = vect1;
2983 const T* cur2 = vect2;
2985 unsigned bitval1 = (*cur1++ & 1);
2986 unsigned bitval2 = (*cur2++ & 1);
2987 unsigned bitval = count = F::op(bitval1, bitval2);
2988 unsigned bitval_prev = bitval;
2995 bitval = F::op(bitval1, bitval2);
2998 if (bitval != bitval_prev)
3000 bitval_prev = bitval;
3009 count += res - res_prev;
3012 ++cur1; bitval1 ^= 1;
3019 count += res - res_prev;
3032 bitval1 ^= 1; bitval2 ^= 1;
3044 #pragma GCC diagnostic push 3045 #pragma GCC diagnostic ignored "-Wconversion" 3060 template<
typename T>
3069 T end = (T)(*buf >> 3);
3077 T* pcurr = buf + curr;
3078 T* pprev = pcurr - 1;
3079 T* pend = buf + end;
3088 ::memmove(&buf[2], &buf[1], (end - 1) *
sizeof(
gap_word_t));
3094 pprev = buf + 1; pcurr = pprev + 1;
3099 if (curr > 1 && ((
unsigned)(*pprev))+1 == pos)
3102 if (*pprev == *pcurr)
3110 do { *pprev++ = *pcurr++; }
while (pcurr < pend);
3118 end += (pcurr == pend);
3123 ::memmove(pcurr+2, pcurr, (end - curr + 1)*(
sizeof(T)));
3125 pcurr[0] = (T)(pos-1);
3130 *buf = (T)((*buf & 7) + (end << 3));
3146 template<
typename T>
3149 unsigned pos) BMNOEXCEPT
3154 T end = (T)(*buf >> 3);
3158 T* pcurr = buf + curr;
3159 T* pprev = pcurr - 1;
3160 T* pend = buf + end;
3169 ::memmove(&buf[2], &buf[1], (end - 1) *
sizeof(
gap_word_t));
3175 pprev = buf + 1; pcurr = pprev + 1;
3180 if (curr > 1 && ((
unsigned)(*pprev))+1 == pos)
3183 if (*pprev == *pcurr)
3191 do { *pprev++ = *pcurr++; }
while (pcurr < pend);
3199 end += (pcurr == pend);
3204 ::memmove(pcurr+2, pcurr, (end - curr + 1)*(
sizeof(T)));
3206 pcurr[0] = (T)(pos-1);
3211 *buf = (T)((*buf & 7) + (end << 3));
3226 template<
typename T>
3231 T end = (T)(*buf >> 3);
3233 T* pcurr = buf + end;
3235 T* pprev = pcurr - 1;
3244 ::memmove(&buf[2], &buf[1], (end - 1) *
sizeof(
gap_word_t));
3250 pprev = buf + 1; pcurr = pprev + 1;
3252 do { *pprev++ = *pcurr++; }
while (pcurr < pend);
3255 else if (((
unsigned)(*pprev))+1 == pos && (curr > 1) )
3258 if (*pprev == *pcurr)
3264 else if (*pcurr == pos)
3267 end += (pcurr == pend);
3271 pcurr[0] = (T)(pos-1);
3277 *buf = (T)((*buf & 7) + (end << 3));
3283 #pragma GCC diagnostic pop 3296 template<
typename T>
3298 unsigned co_flag,
unsigned*
BMRESTRICT new_len) BMNOEXCEPT
3304 unsigned bitval = *buf & 1;
3311 unsigned len = (*buf >> 3);
3313 for (; i < len; ++i)
3323 *buf = (T)((*buf & 7) + (len << 3));
3345 template<
typename T>
3347 unsigned co_flag,
unsigned*
BMRESTRICT new_len) BMNOEXCEPT
3354 unsigned bitval = *buf & 1;
3364 BM_ASSERT(bitval ==
unsigned(*buf & 1u));
3376 unsigned len = (*buf >> 3);
3378 for (; i < len; ++i)
3404 template<
typename T>
3407 *buf = (T)((*buf & 6u) + (1u << 3));
3415 *pcurr = (T)(curr - 1);
3425 for (i = 1; i < len; ++i)
3428 if (curr == prev + 1)
3437 *pcurr++ = (T)(curr-1);
3448 unsigned gap_len = unsigned(pcurr - buf);
3449 BM_ASSERT(gap_len == ((gap_len << 3) >> 3));
3451 *buf = (T)((*buf & 7) + (gap_len << 3));
3465 template<
typename T>
3468 unsigned gap_count = 1;
3472 for (
unsigned i = 1; i < len; ++i)
3475 if (curr != prev + 1)
3497 template<
typename T>
3512 unsigned val = buf[gap_idx] + 1;
3525 void set_bit(
unsigned* dest,
unsigned bitpos) BMNOEXCEPT
3530 dest[nword] |= unsigned(1u << nbit);
3543 dest[nword] &= ~(unsigned(1u << nbit));
3552 unsigned test_bit(
const unsigned* block,
unsigned bitpos) BMNOEXCEPT
3557 return (block[nword] >> nbit) & 1u;
3570 void or_bit_block(
unsigned* dest,
unsigned bitpos,
unsigned bitcount) BMNOEXCEPT
3572 const unsigned maskFF = ~0u;
3579 *dest |= (1u << bitpos);
3585 unsigned mask_r = maskFF << bitpos;
3586 unsigned right_margin = bitpos + bitcount;
3587 if (right_margin < 32)
3589 *dest |= (maskFF >> (32 - right_margin)) & mask_r;
3593 bitcount -= 32 - bitpos;
3595 for ( ;bitcount >= 64; bitcount-=64, dest+=2)
3596 dest[0] = dest[1] = maskFF;
3599 *dest++ = maskFF; bitcount -= 32;
3603 *dest |= maskFF >> (32 - bitcount);
3617 void sub_bit_block(
unsigned* dest,
unsigned bitpos,
unsigned bitcount) BMNOEXCEPT
3619 const unsigned maskFF = ~0u;
3626 *dest &= ~(1u << bitpos);
3632 unsigned mask_r = maskFF << bitpos;
3633 unsigned right_margin = bitpos + bitcount;
3634 if (right_margin < 32)
3636 *dest &= ~((maskFF >> (32 - right_margin)) & mask_r);
3640 bitcount -= 32 - bitpos;
3642 for ( ;bitcount >= 64; bitcount-=64, dest+=2)
3643 dest[0] = dest[1] = 0u;
3646 *dest++ = 0u; bitcount -= 32;
3650 *dest &= ~(maskFF >> (32 - bitcount));
3666 unsigned bitcount) BMNOEXCEPT
3676 *word ^= unsigned(1 << nbit);
3682 unsigned right_margin = nbit + bitcount;
3687 if (right_margin < 32)
3691 unsigned mask = mask_r & mask_l;
3696 bitcount -= 32 - nbit;
3699 for ( ;bitcount >= 64; bitcount-=64, word+=2)
3701 word[0] ^= ~0u; word[1] ^= ~0u;
3705 *word++ ^= ~0u; bitcount -= 32;
3721 template<
typename T>
3727 const T* pend = pcurr + (*pcurr >> 3);
3733 for (pcurr += 2; pcurr <= pend; pcurr += 2)
3750 template<
typename T>
3756 const T* pend = pcurr + (*pcurr >> 3);
3771 for (; pcurr <= pend; pcurr += 2)
3773 if (*pcurr >= start_pos)
3783 for (; pcurr <= pend; pcurr += 2)
3809 template<
typename T>
3815 const T* pend = pcurr + (*pcurr >> 3);
3821 for (pcurr += 2; pcurr <= pend; pcurr += 2)
3837 template<
typename T>
3839 const T*
BMRESTRICT pcurr,
unsigned len) BMNOEXCEPT
3843 const T* pend = pcurr + len;
3853 for (; pcurr <= pend; )
3856 pos = 1u + pcurr[-1];
3857 bc = *pcurr - pcurr[-1];
3871 template<
typename T>
3875 unsigned len = (*pcurr >> 3);
3887 template<
typename T>
3893 const T* pend = pcurr + (*pcurr >> 3);
3903 for (; pcurr <= pend; )
3906 pos = 1u + pcurr[-1];
3907 bc = *pcurr - pcurr[-1];
3922 template<
typename T>
3930 const T* pend = pcurr + (*pcurr >> 3);
3945 for (; pcurr <= pend; pcurr += 2)
3947 if (*pcurr >= start_pos)
3957 for (; pcurr <= pend; pcurr += 2)
3984 template<
typename T>
3989 const T* pend = pcurr + (*pcurr >> 3);
3996 for (pcurr +=2 ;pcurr <= pend; pcurr += 2)
4012 template<
typename T>
4018 const T* pend = pcurr + (*pcurr >> 3);
4025 for (pcurr +=2 ;!count && pcurr <= pend; pcurr += 2)
4042 template<
typename T>
4048 const T* pcurr = buf;
4049 const T* pend = pcurr + (*pcurr >> 3);
4061 for (;pcurr <= pend; pcurr+=2)
4077 template<
typename T>
4083 const T* pcurr = buf;
4084 const T* pend = pcurr + (*pcurr >> 3);
4098 for (; !count && pcurr <= pend; pcurr+=2)
4115 template<
typename T>
4121 const T* pcurr = buf;
4122 const T* pend = pcurr + (*pcurr >> 3);
4125 unsigned bitval = *buf & 1;
4130 count = *pcurr + 1 - count;
4133 for (bitval^=1, ++pcurr; pcurr <= pend; bitval^=1, ++pcurr)
4135 T prev = (T)(*(pcurr-1)+1);
4139 c = (*pcurr - prev + 1) - c;
4153 template<
typename T>
4159 const T* pcurr = buf;
4160 const T* pend = pcurr + (*pcurr >> 3);
4163 unsigned bitval = *buf & 1;
4167 count = *pcurr + 1 - count;
4169 for (bitval^=1, ++pcurr; !count && pcurr <= pend; bitval^=1, ++pcurr)
4171 T prev = (T)(*(pcurr-1)+1);
4175 c = (*pcurr - prev + 1) - c;
4191 template<
typename T>
4196 const T* pcurr = buf;
4197 const T* pend = pcurr + (*pcurr >> 3);
4200 unsigned bitval = *buf & 1;
4202 bm::id_t count = bitval ? *pcurr + 1
4204 for (bitval^=1, ++pcurr; pcurr <= pend; bitval^=1, ++pcurr)
4206 T prev = (T)(*(pcurr-1)+1);
4208 bitval ? (*pcurr - prev + 1)
4223 template<
typename T>
4260 template<
typename T>
4282 template<
typename T>
4285 id_t set_max) BMNOEXCEPT
4287 if (buf[1] == set_max - 1)
4303 template<
typename T>
4306 unsigned end = *buf >> 3;
4308 const T* pcurr = buf;
4309 const T* pend = pcurr + (*pcurr >> 3);
4317 while (pcurr <= pend)
4338 *buf = (T)((*buf & 6u) + (1u << 3) + value);
4339 *(++buf) = (T)(set_max - 1);
4364 if (to == set_max - 1)
4372 buf[2] = (T)(set_max - 1);
4373 buf[0] = (T)((*buf & 6u) + (gap_len << 3) + value);
4380 if (to == set_max - 1)
4383 buf[1] = (T)(from - 1);
4384 buf[2] = (T)(set_max - 1);
4389 buf[1] = (T) (from - 1);
4391 buf[3] = (T)(set_max - 1);
4393 buf[0] = (T)((*buf & 6u) + (gap_len << 3) + value);
4410 #pragma GCC diagnostic push 4411 #pragma GCC diagnostic ignored "-Wconversion" 4421 template<
typename T>
4427 *buf = (T)(((level & 3) << 1) | (*buf & 1) | (*buf & ~7));
4430 #pragma GCC diagnostic pop 4443 template<
typename T>
4446 if (len <=
unsigned(glevel_len[0]-4))
return 0;
4447 if (len <=
unsigned(glevel_len[1]-4))
return 1;
4448 if (len <=
unsigned(glevel_len[2]-4))
return 2;
4449 if (len <=
unsigned(glevel_len[3]-4))
return 3;
4464 template<
typename T>
4470 return capacity - len;
4482 template<
typename T>
4483 int bitcmp(
const T* buf1,
const T* buf2,
unsigned len) BMNOEXCEPT
4486 const T* pend1 = buf1 + len;
4493 return (w1 & diff & -diff) ? 1 : -1;
4494 }
while (buf1 < pend1);
4513 #ifdef VECT_BIT_FIND_DIFF 4533 *pos = unsigned(idx + (i * 8u *
unsigned(
sizeof(
bm::wordop_t))));
4545 *pos = unsigned(idx + (i * 8u *
sizeof(
bm::word_t)));
4570 unsigned dest_len) BMNOEXCEPT
4576 unsigned bitval = (*block) & 1u;
4579 unsigned bit_idx = 0;
4583 unsigned val = *block;
4584 while (!val || val == ~0u)
4586 if (bitval !=
unsigned(
bool(val)))
4590 BM_ASSERT((pcurr-1) == (dest+1) || *(pcurr-1) > *(pcurr-2));
4593 bit_idx += unsigned(
sizeof(*block) * 8);
4594 if (++block >= block_end)
4601 unsigned bits_consumed = 0;
4605 if (bitval != (val & 1u))
4609 BM_ASSERT((pcurr-1) == (dest+1) || *(pcurr-1) > *(pcurr-2));
4619 bits_consumed += tz;
4625 if (bits_consumed < 32u)
4629 bit_idx += 32u - bits_consumed;
4630 BM_ASSERT((pcurr-1) == (dest+1) || *(pcurr-1) > *(pcurr-2));
4637 }
while(++block < block_end);
4641 unsigned len = (unsigned)(pcurr - dest);
4642 *dest = (
gap_word_t)((*dest & 7) + (len << 3));
4655 unsigned dest_len) BMNOEXCEPT
4657 #if defined(VECT_BIT_TO_GAP) 4669 template<
class T,
class F>
4672 const T* pcurr = buf;
4673 const T* pend = pcurr + (*pcurr >> 3);
4683 unsigned to = *pcurr;
4684 for (
unsigned i = 0; i <= to; ++i)
4697 while (pcurr <= pend)
4699 unsigned from = *(pcurr-1)+1;
4700 unsigned to = *pcurr;
4703 func(from - prev + first_inc);
4711 for (
unsigned i = from+1; i <= to; ++i)
4724 template<
typename D,
typename T>
4728 bool invert =
false) BMNOEXCEPT
4731 const T* pend = pcurr + (*pcurr >> 3);
4736 int bitval = (*buf) & 1;
4742 if (
unsigned(*pcurr + 1) >= dest_len)
4755 while (pcurr <= pend)
4757 unsigned pending = *pcurr - *(pcurr-1);
4758 if (pending >= dest_len)
4760 dest_len -= pending;
4761 T from = (T)(*(pcurr-1)+1);
4763 for (T i = from; ;++i)
4770 return (D) (dest_curr - dest);
4792 #if defined(BM_USE_GCC_BUILD) 4793 count += unsigned(__builtin_popcountll(x) + __builtin_popcountll(y)
4794 + __builtin_popcountll(u) + __builtin_popcountll(v));
4806 }
while (block < block_end);
4817 }
while (block < block_end);
4856 #ifdef VECT_BIT_COUNT_DIGEST 4879 #if defined(BM_USE_GCC_BUILD) 4880 count += unsigned(__builtin_popcountll(x) + __builtin_popcountll(y)
4881 + __builtin_popcountll(u) + __builtin_popcountll(v));
4893 }
while (j < bm::set_block_digest_wave_size/2);
4894 #else // 32-bit version 4906 }
while (blk < blk_end);
4934 count -= (w >> ((
sizeof(w) * 8) - 1));
4946 unsigned gap_count = 1;
4951 const int w_shift = int(
sizeof(w) * 8 - 1);
4954 gap_count -= (w_prev = (w0 >> w_shift));
4957 for (++block; block < block_end; ++block)
4963 gap_count -= !w_prev;
4972 gap_count -= (w0 >> w_shift);
4973 gap_count -= !(w_prev ^ w_l);
4975 w_prev = (w0 >> w_shift);
4998 #ifdef VECT_BLOCK_CHANGE_BC 5021 #if defined(VECT_BLOCK_CHANGE) 5040 unsigned nword, nbit, bitcount, temp;
5045 return (*word >> nbit) & 1u;
5049 unsigned right_margin = nbit + right - left;
5050 if (right_margin < 32)
5054 unsigned mask = mask_r & mask_l;
5055 return mask == (*word & mask);
5059 temp = *word & mask_r;
5062 bitcount = (right - left + 1u) - (32 - nbit);
5067 bitcount = right - left + 1u;
5074 #if defined(BM64OPT) || defined(BM64_SSE4) || defined(BMAVX2OPT) || defined(BMAVX512OPT) 5076 for ( ;bitcount >= 128; bitcount-=128, word+=4)
5080 if ((w64_0 ^ maskFF64) | (w64_1 ^ maskFF64))
5084 for ( ;bitcount >= 128; bitcount-=128, word+=4)
5086 bm::word_t m = (word[0] != maskFF) || (word[1] != maskFF) |
5087 (word[2] != maskFF) || (word[3] != maskFF);
5093 for ( ;bitcount >= 32; bitcount-=32, ++word)
5095 if (*word != maskFF)
5103 temp = *word & mask_l;
5129 unsigned nword, nbit, bitcount, count;
5135 return (*word >> nbit) & 1u;
5139 bitcount = right - left + 1u;
5142 unsigned right_margin = nbit + right - left;
5143 if (right_margin < 32)
5147 unsigned mask = mask_r & mask_l;
5152 bitcount -= 32 - nbit;
5158 #if defined(BM64_SSE4) || defined(BM64_AVX2) || defined(BM64_AVX512) 5159 for ( ;bitcount >= 64; bitcount-=64)
5162 count += unsigned(_mm_popcnt_u64(w64));
5171 for ( ;bitcount >= 32; bitcount-=32, ++word)
5200 unsigned bitcount = right + 1;
5203 #if defined(BMAVX2OPT) || defined(BMAVX512OPT) 5206 __m256i cnt = _mm256_setzero_si256();
5209 for ( ;bitcount >= 256; bitcount -= 256)
5211 const __m256i* src = (__m256i*)block;
5212 __m256i xmm256 = _mm256_load_si256(src);
5214 cnt = _mm256_add_epi64(cnt, bc);
5219 count += (unsigned)(cnt64[0] + cnt64[1] + cnt64[2] + cnt64[3]);
5222 for ( ;bitcount >= 64; bitcount -= 64)
5254 block[i] = (block[i] << 1) | (block[i + 1] >> 31);
5268 const unsigned unroll_factor = 4;
5274 w0 = block[i + 1] >> 31;
5275 w1 = block[i + 2] >> 31;
5276 w2 = block[i + 3] >> 31;
5277 w3 = block[i + 4] >> 31;
5279 block[0 + i] = (block[0 + i] << 1) | w0;
5280 block[1 + i] = (block[1 + i] << 1) | w1;
5281 block[2 + i] = (block[2 + i] << 1) | w2;
5282 block[3 + i] = (block[3 + i] << 1) | w3;
5284 block[i] = (block[i] << 1) | (block[i + 1] >> 31);
5285 block[i + 1] = (block[i + 1] << 1) | (block[i + 2] >> 31);
5286 block[i + 2] = (block[i + 2] << 1) | (block[i + 3] >> 31);
5302 unsigned bitpos,
bool value) BMNOEXCEPT
5319 block[nword] = w | (unsigned(value) << nbit) | wl;
5331 w = (w << 1u) | co_flag;
5362 acc |= w = (w << 1u) | co_flag;
5386 #if defined(BM64OPT) 5398 acc0 |= w = (w << 1u) | co_flag;
5404 acc1 |= w = (w << 1u) | co_flag;
5409 *empty_acc = bool(acc0 | acc1);
5433 #if defined(VECT_SHIFT_R1) 5463 acc |= w = (w >> 1u) | (co_flag << 31u);
5485 unsigned co_flag) BMNOEXCEPT
5491 w0 = block[i]; w1 = block[i-1];
5493 acc |= w0 = (w0 >> 1u) | (co_flag << 31u);
5497 acc |= w1 = (w1 >> 1u) | (co_flag << 31u);
5502 w0 = block[i]; w1 = block[i-1];
5504 acc |= w0 = (w0 >> 1u) | (co_flag << 31u);
5508 acc |= w1 = (w1 >> 1u) | (co_flag << 31u);
5533 #if defined(VECT_SHIFT_L1) 5552 bool carry_over) BMNOEXCEPT
5573 w = (w >> 1u) | (co_flag << 31u);
5585 w |= wl | (co_flag << 31u);
5590 block[nword] = (block[nword] >> 1u) | (co_flag << 31u);
5625 for (; di < 64; ++di)
5638 w = (w << 1u) | co_flag;
5639 acc |= block[i] = w & mask_block[i];
5647 BM_ASSERT(block[d_base + bm::set_block_digest_wave_size -1]==0);
5655 block[d_base] = co_flag & mask_block[d_base];
5687 #if defined(VECT_SHIFT_R1_AND) 5709 unsigned nbit = left;
5717 return (*word >> nbit) & 1;
5720 unsigned bitcount = right - left + 1;
5724 unsigned right_margin = nbit + (right - left);
5725 if (right_margin < 32)
5729 unsigned mask = mask_r & mask_l;
5730 return *word & mask;
5735 acc = *word & mask_r;
5738 bitcount -= 32 - nbit;
5746 for ( ;bitcount >= 128; bitcount-=128, word+=4)
5748 acc = word[0] | word[1] | word[2] | word[3];
5754 for ( ;bitcount >= 32; bitcount -= 32)
5762 acc |= (*word) & mask_l;
5772 template<
typename T>
5782 start[0] = ~start[0];
5783 start[1] = ~start[1];
5784 start[2] = ~start[2];
5785 start[3] = ~start[3];
5787 }
while (start < end);
5799 #if defined(BMSSE42OPT) || defined(BMAVX2OPT) 5807 start[0] & start[1] & start[2] & start[3];
5811 }
while (start < end);
5824 unsigned left,
unsigned right) BMNOEXCEPT
5845 unsigned left,
unsigned right) BMNOEXCEPT
5852 bool is_left, is_right, all_one;
5865 if (is_left ==
false)
5869 if (is_right ==
false)
5894 unsigned nbit,
unsigned*
BMRESTRICT pos) BMNOEXCEPT
5902 w &= (1u << bit_pos);
5917 w = (~block[nword]) >> bit_pos;
5922 *pos = unsigned(bit_pos + (nword * 8u *
unsigned(
sizeof(
bm::word_t)))-1);
5932 *pos = unsigned(bit_pos + (nword * 8u *
unsigned(
sizeof(
bm::word_t)))-1);
5990 unsigned nbit,
unsigned*
BMRESTRICT pos) BMNOEXCEPT
5998 w &= (1u << bit_pos);
6014 w = (~block[nword]) & mask_l;
6018 *pos = unsigned(bit_pos + (nword * 8u *
unsigned(
sizeof(
bm::word_t)))+1);
6024 for (--nword;
true; --nword)
6030 *pos = unsigned(bit_pos + (nword * 8u *
unsigned(
sizeof(
bm::word_t)))+1);
6054 unsigned nbit,
unsigned*
BMRESTRICT pos) BMNOEXCEPT
6062 w &= (1u << bit_pos);
6065 *pos = unsigned(bit_pos + (nword * 8u *
unsigned(
sizeof(
bm::word_t))));
6077 w = block[nword] & mask_l;
6081 *pos = unsigned(bit_pos + (nword * 8u *
unsigned(
sizeof(
bm::word_t))));
6087 for (--nword;
true; --nword)
6094 unsigned(bit_pos + (nword * 8u *
unsigned(
sizeof(
bm::word_t))));
6123 if (b && *found_nbit == 0)
6134 if (b && *found_nbit == 0)
6162 *found_nbit = nbit_from;
6178 unsigned left,
unsigned right) BMNOEXCEPT
6237 unsigned& dsize) BMNOEXCEPT
6239 bm::gap_buff_op<bm::gap_word_t, bm::and_func>(
6240 tmp_buf, vect1, 0, vect2, 0, dsize);
6262 return gap_buff_any_op<bm::gap_word_t, bm::and_func>(vect1, 0, vect2, 0);
6279 return bm::gap_buff_count_op<bm::gap_word_t, bm::and_func>(vect1, vect2);
6304 unsigned& dsize) BMNOEXCEPT
6306 bm::gap_buff_op<bm::gap_word_t, bm::xor_func>(
6307 tmp_buf, vect1, 0, vect2, 0, dsize);
6344 return gap_buff_any_op<bm::gap_word_t, bm::xor_func>(vect1, 0, vect2, 0);
6360 return bm::gap_buff_count_op<bm::gap_word_t, bm::xor_func>(vect1, vect2);
6385 unsigned& dsize) BMNOEXCEPT
6387 bm::gap_buff_op<bm::gap_word_t, bm::and_func>(tmp_buf, vect1, 1, vect2, 1, dsize);
6405 return gap_buff_count_op<bm::gap_word_t, bm::or_func>(vect1, vect2);
6431 unsigned& dsize) BMNOEXCEPT
6433 bm::gap_buff_op<bm::gap_word_t, bm::and_func>(
6434 tmp_buf, vect1, 0, vect2, 1, dsize);
6458 bm::gap_buff_any_op<bm::gap_word_t, bm::and_func>(
6459 vect1, 0, vect2, 1);
6476 return bm::gap_buff_count_op<bm::gap_word_t, bm::sub_func>(vect1, vect2);
6516 #ifdef VECT_STREAM_BLOCK 6552 for (
unsigned i = 0; i < arr_sz; i+=4)
6554 acc |= dst_u->w64[i] &= src_u->w64[i];
6555 acc |= dst_u->w64[i+1] &= src_u->w64[i+1];
6556 acc |= dst_u->w64[i+2] &= src_u->w64[i+2];
6557 acc |= dst_u->w64[i+3] &= src_u->w64[i+3];
6592 #if defined(VECT_AND_DIGEST) 6595 digest &= ~(mask << wave);
6604 acc |= dst_u->w64[j+0] &= src_u->w64[j+0];
6605 acc |= dst_u->w64[j+1] &= src_u->w64[j+1];
6606 acc |= dst_u->w64[j+2] &= src_u->w64[j+2];
6607 acc |= dst_u->w64[j+3] &= src_u->w64[j+3];
6609 }
while (j < bm::set_block_digest_wave_size/2);
6612 digest &= ~(mask << wave);
6638 BM_ASSERT(src0 && src1 && src2 && src3);
6649 #if defined(VECT_AND_DIGEST_5WAY) 6650 bool all_zero =
VECT_AND_DIGEST_5WAY(&dst[off], &src0[off], &src1[off], &src2[off], &src3[off]);
6652 digest &= ~(mask << wave);
6664 acc |= dst_u->w64[j + 0] &= src_u0->w64[j + 0] & src_u1->w64[j + 0] & src_u2->w64[j + 0] & src_u3->w64[j + 0];
6665 acc |= dst_u->w64[j + 1] &= src_u0->w64[j + 1] & src_u1->w64[j + 1] & src_u2->w64[j + 1] & src_u3->w64[j + 1];
6666 acc |= dst_u->w64[j + 2] &= src_u0->w64[j + 2] & src_u1->w64[j + 2] & src_u2->w64[j + 2] & src_u3->w64[j + 2];
6667 acc |= dst_u->w64[j + 3] &= src_u0->w64[j + 3] & src_u1->w64[j + 3] & src_u2->w64[j + 3] & src_u3->w64[j + 3];
6669 }
while (j < bm::set_block_digest_wave_size / 2);
6672 digest &= ~(mask << wave);
6715 #if defined(VECT_AND_DIGEST_2WAY) 6718 digest &= ~(mask << wave);
6731 acc |= dst_u->w64[j+0] = src_u1->w64[j+0] & src_u2->w64[j+0];
6732 acc |= dst_u->w64[j+1] = src_u1->w64[j+1] & src_u2->w64[j+1];
6733 acc |= dst_u->w64[j+2] = src_u1->w64[j+2] & src_u2->w64[j+2];
6734 acc |= dst_u->w64[j+3] = src_u1->w64[j+3] & src_u2->w64[j+3];
6736 }
while (j < bm::set_block_digest_wave_size/2);
6739 digest &= ~(mask << wave);
6781 }
while (b1 < b1_end);
6792 }
while (src1 < src1_end);
6816 count = (src1[0] & src2[0]) |
6817 (src1[1] & src2[1]) |
6818 (src1[2] & src2[2]) |
6819 (src1[3] & src2[3]);
6822 }
while ((src1 < src1_end) && !count);
6860 }
while (b1 < b1_end);
6871 }
while (src1 < src1_end);
6895 count = (src1[0] ^ src2[0]) |
6896 (src1[1] ^ src2[1]) |
6897 (src1[2] ^ src2[2]) |
6898 (src1[3] ^ src2[3]);
6901 }
while (!count && (src1 < src1_end));