|
#define | BM_CSA256(h, l, a, b, c) |
| More...
|
|
#define | BM_AVX2_BIT_COUNT(ret, v) |
| More...
|
|
#define | BM_AVX2_DECL_LOOKUP1 |
| More...
|
|
#define | BM_AVX2_DECL_LOOKUP2 |
| More...
|
|
#define | BM_AVX2_POPCNT_PROLOG |
| More...
|
|
#define | VECT_XOR_ARR_2_MASK(dst, src, src_end, mask) avx2_xor_arr_2_mask((__m256i*)(dst), (__m256i*)(src), (__m256i*)(src_end), (bm::word_t)mask) |
|
#define | VECT_ANDNOT_ARR_2_MASK(dst, src, src_end, mask) avx2_andnot_arr_2_mask((__m256i*)(dst), (__m256i*)(src), (__m256i*)(src_end), (bm::word_t)mask) |
|
#define | VECT_BITCOUNT(first, last) avx2_bit_count((__m256i*) (first), (__m256i*) (last)) |
|
#define | VECT_BITCOUNT_AND(first, last, mask) avx2_bit_count_and((__m256i*) (first), (__m256i*) (last), (__m256i*) (mask)) |
|
#define | VECT_BITCOUNT_OR(first, last, mask) avx2_bit_count_or((__m256i*) (first), (__m256i*) (last), (__m256i*) (mask)) |
|
#define | VECT_BITCOUNT_XOR(first, last, mask) avx2_bit_count_xor((__m256i*) (first), (__m256i*) (last), (__m256i*) (mask)) |
|
#define | VECT_BITCOUNT_SUB(first, last, mask) avx2_bit_count_sub((__m256i*) (first), (__m256i*) (last), (__m256i*) (mask)) |
|
#define | VECT_INVERT_BLOCK(first) avx2_invert_block((__m256i*)first); |
|
#define | VECT_AND_BLOCK(dst, src) avx2_and_block((__m256i*) dst, (const __m256i*) (src)) |
|
#define | VECT_AND_DIGEST(dst, src) avx2_and_digest((__m256i*) dst, (const __m256i*) (src)) |
|
#define | VECT_AND_DIGEST_2WAY(dst, src1, src2) avx2_and_digest_2way((__m256i*) dst, (const __m256i*) (src1), (const __m256i*) (src2)) |
|
#define | VECT_AND_OR_DIGEST_2WAY(dst, src1, src2) avx2_and_or_digest_2way((__m256i*) dst, (const __m256i*) (src1), (const __m256i*) (src2)) |
|
#define | VECT_AND_DIGEST_5WAY(dst, src1, src2, src3, src4) avx2_and_digest_5way((__m256i*) dst, (const __m256i*) (src1), (const __m256i*) (src2), (const __m256i*) (src3), (const __m256i*) (src4)) |
|
#define | VECT_OR_BLOCK(dst, src) avx2_or_block((__m256i*) dst, (__m256i*) (src)) |
|
#define | VECT_OR_BLOCK_3WAY(dst, src1, src2) avx2_or_block_3way((__m256i*) dst, (__m256i*) (src1), (__m256i*) (src2)) |
|
#define | VECT_OR_BLOCK_2WAY(dst, src1, src2) avx2_or_block_2way((__m256i*) dst, (__m256i*) (src1), (__m256i*) (src2)) |
|
#define | VECT_OR_BLOCK_3WAY(dst, src1, src2) avx2_or_block_3way((__m256i*) dst, (__m256i*) (src1), (__m256i*) (src2)) |
|
#define | VECT_OR_BLOCK_5WAY(dst, src1, src2, src3, src4) avx2_or_block_5way((__m256i*) dst, (__m256i*) (src1), (__m256i*) (src2), (__m256i*) (src3), (__m256i*) (src4)) |
|
#define | VECT_SUB_BLOCK(dst, src) avx2_sub_block((__m256i*) dst, (__m256i*) (src)) |
|
#define | VECT_SUB_DIGEST(dst, src) avx2_sub_digest((__m256i*) dst, (const __m256i*) (src)) |
|
#define | VECT_SUB_DIGEST_2WAY(dst, src1, src2) avx2_sub_digest_2way((__m256i*) dst, (const __m256i*) (src1), (const __m256i*) (src2)) |
|
#define | VECT_XOR_BLOCK(dst, src) avx2_xor_block((__m256i*) dst, (__m256i*) (src)) |
|
#define | VECT_XOR_BLOCK_2WAY(dst, src1, src2) avx2_xor_block_2way((__m256i*) dst, (__m256i*) (src1), (__m256i*) (src2)) |
|
#define | VECT_COPY_BLOCK(dst, src) avx2_copy_block((__m256i*) dst, (__m256i*) (src)) |
|
#define | VECT_COPY_BLOCK_UNALIGN(dst, src) avx2_copy_block_unalign((__m256i*) dst, (__m256i*) (src)) |
|
#define | VECT_STREAM_BLOCK(dst, src) avx2_stream_block((__m256i*) dst, (__m256i*) (src)) |
|
#define | VECT_STREAM_BLOCK_UNALIGN(dst, src) avx2_stream_block_unalign((__m256i*) dst, (__m256i*) (src)) |
|
#define | VECT_SET_BLOCK(dst, value) avx2_set_block((__m256i*) dst, (value)) |
|
#define | VECT_IS_ZERO_BLOCK(dst) avx2_is_all_zero((__m256i*) dst) |
|
#define | VECT_IS_ONE_BLOCK(dst) avx2_is_all_one((__m256i*) dst) |
|
#define | VECT_IS_DIGEST_ZERO(start) avx2_is_digest_zero((__m256i*)start) |
|
#define | VECT_BLOCK_SET_DIGEST(dst, val) avx2_block_set_digest((__m256i*)dst, val) |
|
#define | VECT_LOWER_BOUND_SCAN_U32(arr, target, from, to) avx2_lower_bound_scan_u32(arr, target, from, to) |
|
#define | VECT_SHIFT_L1(b, acc, co) avx2_shift_l1((__m256i*)b, acc, co) |
|
#define | VECT_SHIFT_R1(b, acc, co) avx2_shift_r1((__m256i*)b, acc, co) |
|
#define | VECT_SHIFT_R1_AND(b, co, m, digest) avx2_shift_r1_and((__m256i*)b, co, (__m256i*)m, digest) |
|
#define | VECT_ARR_BLOCK_LOOKUP(idx, size, nb, start) avx2_idx_arr_block_lookup(idx, size, nb, start) |
|
#define | VECT_SET_BLOCK_BITS(block, idx, start, stop) avx2_set_block_bits3(block, idx, start, stop) |
|
#define | VECT_BLOCK_CHANGE(block, size) avx2_bit_block_calc_change((__m256i*)block, size) |
|
#define | VECT_BLOCK_XOR_CHANGE(block, xor_block, size, gc, bc) avx2_bit_block_calc_xor_change((__m256i*)block, (__m256i*)xor_block, size, gc, bc) |
|
#define | VECT_BLOCK_CHANGE_BC(block, gc, bc) avx2_bit_block_calc_change_bc((__m256i*)block, gc, bc) |
|
#define | VECT_BIT_TO_GAP(dest, src, dest_len) avx2_bit_to_gap(dest, src, dest_len) |
|
#define | VECT_BIT_FIND_FIRST(src1, pos) avx2_bit_find_first((__m256i*) src1, pos) |
|
#define | VECT_BIT_FIND_DIFF(src1, src2, pos) avx2_bit_find_first_diff((__m256i*) src1, (__m256i*) (src2), pos) |
|
#define | VECT_BIT_BLOCK_XOR(t, src, src_xor, d) avx2_bit_block_xor(t, src, src_xor, d) |
|
#define | VECT_BIT_BLOCK_XOR_2WAY(t, src_xor, d) avx2_bit_block_xor_2way(t, src_xor, d) |
|
#define | VECT_GAP_BFIND(buf, pos, is_set) avx2_gap_bfind(buf, pos, is_set) |
|
#define | VECT_BIT_COUNT_DIGEST(blk, d) avx2_bit_block_count(blk, d) |
|
|
bm::id_t | bm::avx2_bit_count (const __m256i *BMRESTRICT block, const __m256i *BMRESTRICT block_end) |
| AVX2 Harley-Seal popcount The algorithm is based on the paper "Faster Population Counts
using AVX2 Instructions" by Daniel Lemire, Nathan Kurz and Wojciech Mula (23 Nov 2016). More...
|
|
bm::id_t | bm::avx2_bit_block_count (const bm::word_t *const block, bm::id64_t digest) |
| Calculate population count based on digest. More...
|
|
bm::id_t | bm::avx2_bit_count_and (const __m256i *BMRESTRICT block, const __m256i *BMRESTRICT block_end, const __m256i *BMRESTRICT mask_block) |
| AND bit count for two aligned bit-blocks. More...
|
|
bm::id_t | bm::avx2_bit_count_or (const __m256i *BMRESTRICT block, const __m256i *BMRESTRICT block_end, const __m256i *BMRESTRICT mask_block) |
| More...
|
|
bm::id_t | bm::avx2_bit_count_xor (const __m256i *BMRESTRICT block, const __m256i *BMRESTRICT block_end, const __m256i *BMRESTRICT mask_block) |
| XOR bit count for two aligned bit-blocks. More...
|
|
bm::id_t | bm::avx2_bit_count_sub (const __m256i *BMRESTRICT block, const __m256i *BMRESTRICT block_end, const __m256i *BMRESTRICT mask_block) |
| AND NOT bit count for two aligned bit-blocks. More...
|
|
void | bm::avx2_xor_arr_2_mask (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src, const __m256i *BMRESTRICT src_end, bm::word_t mask) |
| XOR array elements to specified mask dst = *src ^ mask. More...
|
|
void | bm::avx2_andnot_arr_2_mask (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src, const __m256i *BMRESTRICT src_end, bm::word_t mask) |
| Inverts array elements and NOT them to specified mask dst = ~*src & mask. More...
|
|
unsigned | bm::avx2_and_block (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src) |
| AND array elements against another array dst &= *src. More...
|
|
bool | bm::avx2_and_digest (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src) |
| AND block digest stride dst &= *src. More...
|
|
bool | bm::avx2_and_digest_2way (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src1, const __m256i *BMRESTRICT src2) |
| AND block digest stride 2 way dst = *src1 & *src2. More...
|
|
bool | bm::avx2_and_or_digest_2way (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src1, const __m256i *BMRESTRICT src2) |
| AND-OR block digest stride 2 way dst |= *src1 & *src2. More...
|
|
bool | bm::avx2_and_digest_5way (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src1, const __m256i *BMRESTRICT src2, const __m256i *BMRESTRICT src3, const __m256i *BMRESTRICT src4) |
| AND block digest stride. More...
|
|
unsigned | bm::avx2_and_arr_unal (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src, const __m256i *BMRESTRICT src_end) |
| AND array elements against another array (unaligned) dst &= *src. More...
|
|
bool | bm::avx2_or_block (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src) |
| OR array elements against another array dst |= *src. More...
|
|
bool | bm::avx2_or_arr_unal (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src, const __m256i *BMRESTRICT src_end) |
| OR array elements against another unaligned array dst |= *src. More...
|
|
bool | bm::avx2_or_block_2way (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src1, const __m256i *BMRESTRICT src2) |
| OR 2 arrays and copy to the destination dst = *src1 | src2. More...
|
|
bool | bm::avx2_or_block_3way (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src1, const __m256i *BMRESTRICT src2) |
| OR array elements against another 2 arrays dst |= *src1 | src2. More...
|
|
bool | bm::avx2_or_block_5way (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src1, const __m256i *BMRESTRICT src2, const __m256i *BMRESTRICT src3, const __m256i *BMRESTRICT src4) |
| OR array elements against another 4 arrays dst |= *src1 | src2. More...
|
|
unsigned | bm::avx2_xor_block (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src) |
| XOR block against another dst ^= *src. More...
|
|
unsigned | bm::avx2_xor_block_2way (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src1, const __m256i *BMRESTRICT src2) |
| 3 operand XOR dst = *src1 ^ src2 More...
|
|
unsigned | bm::avx2_sub_block (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src) |
| AND-NOT (SUB) array elements against another array dst &= ~*src. More...
|
|
bool | bm::avx2_sub_digest (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src) |
| SUB (AND NOT) block digest stride dst &= ~*src. More...
|
|
bool | bm::avx2_sub_digest_2way (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src1, const __m256i *BMRESTRICT src2) |
| 2-operand SUB (AND NOT) block digest stride dst = *src1 & ~*src2 More...
|
|
BMFORCEINLINE void | bm::avx2_set_block (__m256i *BMRESTRICT dst, bm::word_t value) |
| AVX2 block memset dst = value. More...
|
|
void | bm::avx2_copy_block (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src) |
| AVX2 block copy dst = *src. More...
|
|
void | bm::avx2_copy_block_unalign (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src) |
| AVX2 block copy (unaligned SRC) dst = *src. More...
|
|
void | bm::avx2_stream_block (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src) |
| AVX2 block copy dst = *src. More...
|
|
void | bm::avx2_stream_block_unalign (__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src) |
| AVX2 block copy (unaligned SRC) dst = *src. More...
|
|
void | bm::avx2_invert_block (__m256i *BMRESTRICT dst) |
| Invert bit-block dst = ~*dst or dst ^= *dst. More...
|
|
bool | bm::avx2_is_all_zero (const __m256i *BMRESTRICT block) |
| check if block is all zero bits More...
|
|
bool | bm::avx2_is_digest_zero (const __m256i *BMRESTRICT block) |
| check if digest stride is all zero bits More...
|
|
void | bm::avx2_block_set_digest (__m256i *dst, unsigned value) |
| set digest stride to 0xFF.. or 0x0 value More...
|
|
bool | bm::avx2_is_all_one (const __m256i *BMRESTRICT block) |
| check if block is all one bits More...
|
|
BMFORCEINLINE bool | bm::avx2_test_all_one_wave (const void *ptr) |
| check if wave of pointers is all 0xFFF More...
|
|
BMFORCEINLINE bool | bm::avx2_test_all_zero_wave (const void *ptr) |
| check if wave of pointers is all NULL More...
|
|
BMFORCEINLINE bool | bm::avx2_test_all_zero_wave2 (const void *ptr0, const void *ptr1) |
| check if 2 wave of pointers are all NULL More...
|
|
BMFORCEINLINE bool | bm::avx2_test_all_eq_wave2 (const void *ptr0, const void *ptr1) |
| check if 2 wave of pointers are all the same (NULL or FULL) More...
|
|
bool | bm::avx2_shift_l1 (__m256i *block, bm::word_t *empty_acc, unsigned co1) |
| block shift left by 1 More...
|
|
bool | bm::avx2_shift_r1 (__m256i *block, bm::word_t *empty_acc, unsigned co1) |
| block shift right by 1 More...
|
|
bool | bm::avx2_shift_r1_and (__m256i *BMRESTRICT block, bm::word_t co1, const __m256i *BMRESTRICT mask_block, bm::id64_t *BMRESTRICT digest) |
| fused block shift right by 1 plus AND More...
|
|
unsigned | bm::avx2_bit_block_calc_change (const __m256i *BMRESTRICT block, unsigned size) |
| More...
|
|
void | bm::avx2_bit_block_calc_xor_change (const __m256i *BMRESTRICT block, const __m256i *BMRESTRICT xor_block, unsigned size, unsigned *BMRESTRICT gcount, unsigned *BMRESTRICT bcount) |
| More...
|
|
void | bm::avx2_bit_block_calc_change_bc (const __m256i *BMRESTRICT block, unsigned *gcount, unsigned *bcount) |
| More...
|
|
bool | bm::avx2_bit_find_first_diff (const __m256i *BMRESTRICT block1, const __m256i *BMRESTRICT block2, unsigned *pos) |
| Find first bit which is different between two bit-blocks. More...
|
|
bool | bm::avx2_bit_find_first (const __m256i *BMRESTRICT block, unsigned *pos) |
| Find first bit set. More...
|
|
const bm::gap_word_t * | bm::avx2_gap_sum_arr (const bm::gap_word_t *pbuf, unsigned avx_vect_waves, unsigned *sum) |
| More...
|
|
unsigned | bm::avx2_idx_arr_block_lookup (const unsigned *idx, unsigned size, unsigned nb, unsigned start) |
| More...
|
|
void | bm::avx2_set_block_bits (bm::word_t *BMRESTRICT block, const unsigned *BMRESTRICT idx, unsigned start, unsigned stop) |
| More...
|
|
BMFORCEINLINE __m256i | bm::avx2_setbit_256 (__m256i target, __m256i source) |
| Set a bits in an AVX target, by indexes (int4) from the source. More...
|
|
void | bm::avx2_set_block_bits2 (bm::word_t *BMRESTRICT block, const unsigned *BMRESTRICT idx, unsigned start, unsigned stop) |
| Experimental code to set bits via AVX strides. More...
|
|
void | bm::avx2_set_block_bits3 (bm::word_t *BMRESTRICT block, const unsigned *BMRESTRICT idx, unsigned start, unsigned stop) |
| Experimental code to set bits via AVX strides. More...
|
|
__m256i | bm::avx2_setbit_to256 (unsigned i) |
| Experiemntal. More...
|
|
int | bm::avx2_cmpge_u32 (__m256i vect8, unsigned value) |
| Experimental (test) function to do SIMD vector search (lower bound) in sorted, growing array. More...
|
|
int | bm::avx2_cmpge_u16 (__m256i vect16, unsigned short value) |
| Experimental (test) function to do SIMD vector search in sorted, growing array. More...
|
|
unsigned | bm::avx2_gap_bfind (const unsigned short *BMRESTRICT buf, unsigned pos, unsigned *BMRESTRICT is_set) |
| Hybrid binary search, starts as binary, then switches to scan. More...
|
|
unsigned | bm::avx2_gap_test (const unsigned short *BMRESTRICT buf, unsigned pos) |
| Hybrid binary search, starts as binary, then switches to scan. More...
|
|
unsigned | bm::avx2_lower_bound_scan_u32 (const unsigned *BMRESTRICT arr, unsigned target, unsigned from, unsigned to) |
| lower bound (great or equal) linear scan in ascending order sorted array More...
|
|
void | bm::avx2_bit_block_gather_scatter (unsigned *BMRESTRICT arr, const unsigned *BMRESTRICT blk, const unsigned *BMRESTRICT idx, unsigned size, unsigned start, unsigned bit_idx) |
| More...
|
|
unsigned | bm::avx2_bit_to_gap (gap_word_t *BMRESTRICT dest, const unsigned *BMRESTRICT block, unsigned dest_len) |
| Convert bit block to GAP block. More...
|
|
void | bm::avx2_bit_block_xor (bm::word_t *target_block, const bm::word_t *block, const bm::word_t *xor_block, bm::id64_t digest) |
| Build partial XOR product of 2 bit-blocks using digest mask. More...
|
|
void | bm::avx2_bit_block_xor_2way (bm::word_t *target_block, const bm::word_t *xor_block, bm::id64_t digest) BMNOEXCEPT |
| Build partial XOR product of 2 bit-blocks using digest mask. More...
|
|