BitMagic-C++
Data Structures | Functions
SSE2 functions

Processor specific optimizations for SSE2 instructions (internals) More...

Data Structures

class  bm::sse_empty_guard
 SSE2 reinitialization guard class. More...
 

Functions

void bm::sse2_xor_arr_2_mask (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end, bm::word_t mask) BMNOEXCEPT
 XOR array elements to specified mask dst = *src ^ mask. More...
 
void bm::sse2_andnot_arr_2_mask (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end, bm::word_t mask) BMNOEXCEPT
 Inverts array elements and NOT them to specified mask dst = ~*src & mask. More...
 
unsigned bm::sse2_and_block (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src) BMNOEXCEPT
 AND blocks2 dst &= *src. More...
 
unsigned bm::sse2_and_arr_unal (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end) BMNOEXCEPT
 AND array elements against another array (unaligned) dst &= *src. More...
 
bool bm::sse2_or_block (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src) BMNOEXCEPT
 OR array elements against another array dst |= *src. More...
 
bool bm::sse2_or_arr_unal (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end) BMNOEXCEPT
 OR array elements against another array (unaligned) dst |= *src. More...
 
bool bm::sse2_or_block_2way (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src1, const __m128i *BMRESTRICT src2) BMNOEXCEPT
 OR 2 blocks anc copy result to the destination dst = *src1 | src2. More...
 
bool bm::sse2_or_block_3way (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src1, const __m128i *BMRESTRICT src2) BMNOEXCEPT
 OR array elements against another 2 arrays dst |= *src1 | src2. More...
 
bool bm::sse2_or_block_5way (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src1, const __m128i *BMRESTRICT src2, const __m128i *BMRESTRICT src3, const __m128i *BMRESTRICT src4) BMNOEXCEPT
 OR array elements against another 2 arrays dst |= *src1 | src2 | src3 | src4. More...
 
unsigned bm::sse2_xor_block (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src) BMNOEXCEPT
 XOR block against another dst ^= *src. More...
 
unsigned bm::sse2_xor_block_2way (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src1, const __m128i *BMRESTRICT src2) BMNOEXCEPT
 3 operand XOR dst = *src1 ^ src2 More...
 
unsigned bm::sse2_sub_block (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src) BMNOEXCEPT
 AND-NOT (SUB) array elements against another array dst &= ~*src. More...
 
void bm::sse2_set_block (__m128i *BMRESTRICT dst, bm::word_t value) BMNOEXCEPT
 SSE2 block memset dst = value. More...
 
void bm::sse2_copy_block (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src) BMNOEXCEPT
 SSE2 block copy dst = *src. More...
 
void bm::sse2_copy_block_unalign (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src) BMNOEXCEPT
 SSE2 block copy (unaligned SRC) dst = *src. More...
 
void bm::sse2_stream_block (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src) BMNOEXCEPT
 SSE2 block copy dst = *src. More...
 
void bm::sse2_stream_block_unalign (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src) BMNOEXCEPT
 SSE2 block copy (unaligned src) dst = *src. More...
 
void bm::sse2_invert_block (__m128i *BMRESTRICT dst) BMNOEXCEPT
 Invert bit block dst = ~*dst or dst ^= *dst. More...
 
unsigned bm::sse2_lower_bound_scan_u32 (const unsigned *BMRESTRICT arr, unsigned target, unsigned from, unsigned to) BMNOEXCEPT
 lower bound (great or equal) linear scan in ascending order sorted array More...
 
bm::id_t bm::sse2_bit_count (const __m128i *block, const __m128i *block_end)
  More...
 
bool bm::sse2_is_all_zero (const __m128i *BMRESTRICT block) BMNOEXCEPT
 check if block is all zero bits More...
 
bool bm::sse2_is_all_one (const __m128i *BMRESTRICT block) BMNOEXCEPT
 check if block is all ONE bits More...
 
BMFORCEINLINE bool bm::sse2_is_digest_zero (const __m128i *BMRESTRICT block) BMNOEXCEPT
 check if digest stride is all zero bits More...
 
BMFORCEINLINE void bm::sse2_block_set_digest (__m128i *dst, unsigned value) BMNOEXCEPT
 set digest stride to 0xFF.. or 0x0 value More...
 
void bm::sse2_bit_block_xor (bm::word_t *target_block, const bm::word_t *block, const bm::word_t *xor_block, bm::id64_t digest) BMNOEXCEPT
 Build partial XOR product of 2 bit-blocks using digest mask. More...
 
void bm::sse2_bit_block_xor_2way (bm::word_t *target_block, const bm::word_t *xor_block, bm::id64_t digest) BMNOEXCEPT
 Build partial XOR product of 2 bit-blocks using digest mask. More...
 
BMFORCEINLINE bool bm::sse2_and_digest (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src) BMNOEXCEPT
 AND block digest stride dst &= *src. More...
 
BMFORCEINLINE bool bm::sse2_and_or_digest_2way (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src1, const __m128i *BMRESTRICT src2) BMNOEXCEPT
 AND-OR block digest stride dst |= *src1 & src2. More...
 
bool bm::sse2_and_digest_5way (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src1, const __m128i *BMRESTRICT src2, const __m128i *BMRESTRICT src3, const __m128i *BMRESTRICT src4) BMNOEXCEPT
 AND block digest stride. More...
 
BMFORCEINLINE bool bm::sse2_and_digest_2way (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src1, const __m128i *BMRESTRICT src2) BMNOEXCEPT
 AND block digest stride dst = *src1 & src2. More...
 
BMFORCEINLINE bool bm::sse2_sub_digest (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src) BMNOEXCEPT
 SUB (AND NOT) block digest stride dst &= ~*src. More...
 
BMFORCEINLINE bool bm::sse2_sub_digest_2way (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src1, const __m128i *BMRESTRICT src2) BMNOEXCEPT
 2-operand SUB (AND NOT) block digest stride dst = src1 & ~*src2 More...
 
bool bm::sse2_bit_find_first (const __m128i *BMRESTRICT block, unsigned *pos) BMNOEXCEPT
 Find first non-zero bit. More...
 
bool bm::sse2_bit_find_first_diff (const __m128i *BMRESTRICT block1, const __m128i *BMRESTRICT block2, unsigned *pos) BMNOEXCEPT
 Find first bit which is different between two bit-blocks. More...
 
bool bm::sse2_shift_r1 (__m128i *block, unsigned *empty_acc, unsigned co1) BMNOEXCEPT
 block shift right by 1 More...
 
bool bm::sse2_shift_l1 (__m128i *block, unsigned *empty_acc, unsigned co1) BMNOEXCEPT
 block shift left by 1 More...
 
unsigned bm::sse2_gap_bfind (const unsigned short *BMRESTRICT buf, unsigned pos, unsigned *BMRESTRICT is_set)
 Hybrid binary search, starts as binary, then switches to linear scan. More...
 
unsigned bm::sse2_gap_test (const unsigned short *BMRESTRICT buf, unsigned pos)
 Hybrid binary search, starts as binary, then switches to scan. More...
 

Detailed Description

Processor specific optimizations for SSE2 instructions (internals)

Function Documentation

◆ sse2_and_arr_unal()

unsigned bm::sse2_and_arr_unal ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src,
const __m128i *BMRESTRICT  src_end 
)
inline

AND array elements against another array (unaligned) dst &= *src.

Returns
0 if no bits were set

Definition at line 259 of file bmsse_util.h.

References BM_ALIGN16, and BM_ALIGN16ATTR.

Referenced by bm::decoder::get_32_AND().

◆ sse2_and_block()

unsigned bm::sse2_and_block ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src 
)
inline

AND blocks2 dst &= *src.

Returns
0 if no bits were set

Definition at line 126 of file bmsse_util.h.

References BM_ALIGN16, BM_ALIGN16ATTR, BMRESTRICT, and bm::set_block_size.

◆ sse2_and_digest()

BMFORCEINLINE bool bm::sse2_and_digest ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src 
)

AND block digest stride dst &= *src.

Returns
true if stide is all zero

Definition at line 415 of file bmsse2.h.

◆ sse2_and_digest_2way()

BMFORCEINLINE bool bm::sse2_and_digest_2way ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src1,
const __m128i *BMRESTRICT  src2 
)

AND block digest stride dst = *src1 & src2.

Returns
true if stide is all zero

Definition at line 602 of file bmsse2.h.

◆ sse2_and_digest_5way()

bool bm::sse2_and_digest_5way ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src1,
const __m128i *BMRESTRICT  src2,
const __m128i *BMRESTRICT  src3,
const __m128i *BMRESTRICT  src4 
)
inline

AND block digest stride.

Returns
true if stide is all zero

Definition at line 519 of file bmsse2.h.

◆ sse2_and_or_digest_2way()

BMFORCEINLINE bool bm::sse2_and_or_digest_2way ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src1,
const __m128i *BMRESTRICT  src2 
)

AND-OR block digest stride dst |= *src1 & src2.

Returns
true if stide is all zero

Definition at line 464 of file bmsse2.h.

◆ sse2_andnot_arr_2_mask()

void bm::sse2_andnot_arr_2_mask ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src,
const __m128i *BMRESTRICT  src_end,
bm::word_t  mask 
)
inline

Inverts array elements and NOT them to specified mask dst = ~*src & mask.

Definition at line 102 of file bmsse_util.h.

◆ sse2_bit_block_xor()

void bm::sse2_bit_block_xor ( bm::word_t target_block,
const bm::word_t block,
const bm::word_t xor_block,
bm::id64_t  digest 
)
inline

Build partial XOR product of 2 bit-blocks using digest mask.

Parameters
target_block- target := block ^ xor_block
block- arg1
xor_block- arg2
digest- mask for each block wave to XOR (1) or just copy (0)

Definition at line 289 of file bmsse2.h.

References bm::block_waves, and bm::set_block_digest_wave_size.

◆ sse2_bit_block_xor_2way()

void bm::sse2_bit_block_xor_2way ( bm::word_t target_block,
const bm::word_t xor_block,
bm::id64_t  digest 
)
inline

Build partial XOR product of 2 bit-blocks using digest mask.

Parameters
target_block- target ^= xor_block
xor_block- arg1
digest- mask for each block wave to XOR (if 1)

Definition at line 360 of file bmsse2.h.

References bm::bmi_blsi_u64(), bm::bmi_bslr_u64(), bm::set_block_digest_wave_size, and bm::word_bitcount64().

◆ sse2_bit_count()

bm::id_t bm::sse2_bit_count ( const __m128i *  block,
const __m128i *  block_end 
)
inline
SSE2 optimized bitcounting function implements parallel bitcounting
algorithm for SSE2 instruction set.
unsigned CalcBitCount32(unsigned b)
{
    b = (b & 0x55555555) + (b >> 1 & 0x55555555);
    b = (b & 0x33333333) + (b >> 2 & 0x33333333);
    b = (b + (b >> 4)) & 0x0F0F0F0F;
    b = b + (b >> 8);
    b = (b + (b >> 16)) & 0x0000003F;
    return b;
}

Definition at line 66 of file bmsse2.h.

References BM_ALIGN16, and BM_ALIGN16ATTR.

◆ sse2_bit_find_first()

bool bm::sse2_bit_find_first ( const __m128i *BMRESTRICT  block,
unsigned *  pos 
)
inline

Find first non-zero bit.

Definition at line 749 of file bmsse2.h.

References bm::bit_scan_forward32(), BM_ALIGN32, BM_ALIGN32ATTR, BM_ASSERT, and bm::set_block_size.

◆ sse2_bit_find_first_diff()

bool bm::sse2_bit_find_first_diff ( const __m128i *BMRESTRICT  block1,
const __m128i *BMRESTRICT  block2,
unsigned *  pos 
)
inline

Find first bit which is different between two bit-blocks.

Definition at line 804 of file bmsse2.h.

References bm::bit_scan_forward32(), BM_ALIGN32, BM_ALIGN32ATTR, BM_ASSERT, and bm::set_block_size.

◆ sse2_block_set_digest()

BMFORCEINLINE void bm::sse2_block_set_digest ( __m128i *  dst,
unsigned  value 
)

set digest stride to 0xFF.. or 0x0 value

Definition at line 268 of file bmsse2.h.

◆ sse2_copy_block()

void bm::sse2_copy_block ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src 
)
inline

SSE2 block copy dst = *src.

Definition at line 838 of file bmsse_util.h.

References BMRESTRICT, and bm::set_block_size.

◆ sse2_copy_block_unalign()

void bm::sse2_copy_block_unalign ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src 
)
inline

SSE2 block copy (unaligned SRC) dst = *src.

Definition at line 879 of file bmsse_util.h.

References BMRESTRICT, and bm::set_block_size.

◆ sse2_gap_bfind()

unsigned bm::sse2_gap_bfind ( const unsigned short *BMRESTRICT  buf,
unsigned  pos,
unsigned *BMRESTRICT  is_set 
)
inline

Hybrid binary search, starts as binary, then switches to linear scan.

Parameters
buf- GAP buffer pointer.
pos- index of the element.
is_set- output. GAP value (0 or 1).
Returns
GAP index.

Definition at line 1203 of file bmsse2.h.

References BM_ASSERT, and bm::sse2_gap_find().

Referenced by bm::sse2_gap_test().

◆ sse2_gap_test()

unsigned bm::sse2_gap_test ( const unsigned short *BMRESTRICT  buf,
unsigned  pos 
)
inline

Hybrid binary search, starts as binary, then switches to scan.

Definition at line 1251 of file bmsse2.h.

References bm::sse2_gap_bfind().

Referenced by bm::gap_test_unr().

◆ sse2_invert_block()

void bm::sse2_invert_block ( __m128i *BMRESTRICT  dst)
inline

Invert bit block dst = ~*dst or dst ^= *dst.

Definition at line 1006 of file bmsse_util.h.

References BMRESTRICT, and bm::set_block_size.

◆ sse2_is_all_one()

bool bm::sse2_is_all_one ( const __m128i *BMRESTRICT  block)
inline

check if block is all ONE bits

Definition at line 219 of file bmsse2.h.

References BMRESTRICT, and bm::set_block_size.

◆ sse2_is_all_zero()

bool bm::sse2_is_all_zero ( const __m128i *BMRESTRICT  block)
inline

check if block is all zero bits

Definition at line 194 of file bmsse2.h.

References BMRESTRICT, and bm::set_block_size.

◆ sse2_is_digest_zero()

BMFORCEINLINE bool bm::sse2_is_digest_zero ( const __m128i *BMRESTRICT  block)

check if digest stride is all zero bits

Definition at line 244 of file bmsse2.h.

◆ sse2_lower_bound_scan_u32()

unsigned bm::sse2_lower_bound_scan_u32 ( const unsigned *BMRESTRICT  arr,
unsigned  target,
unsigned  from,
unsigned  to 
)
inline

lower bound (great or equal) linear scan in ascending order sorted array

Definition at line 1099 of file bmsse_util.h.

References bm::bit_scan_forward32(), and BMRESTRICT.

◆ sse2_or_arr_unal()

bool bm::sse2_or_arr_unal ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src,
const __m128i *BMRESTRICT  src_end 
)
inline

OR array elements against another array (unaligned) dst |= *src.

Returns
true if all bits are 1

Definition at line 426 of file bmsse_util.h.

Referenced by bm::decoder::get_32_OR().

◆ sse2_or_block()

bool bm::sse2_or_block ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src 
)
inline

OR array elements against another array dst |= *src.

Returns
true if all bits are 1

Definition at line 372 of file bmsse_util.h.

References BMRESTRICT, and bm::set_block_size.

◆ sse2_or_block_2way()

bool bm::sse2_or_block_2way ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src1,
const __m128i *BMRESTRICT  src2 
)
inline

OR 2 blocks anc copy result to the destination dst = *src1 | src2.

Returns
true if all bits are 1

Definition at line 478 of file bmsse_util.h.

References BMRESTRICT, and bm::set_block_size.

◆ sse2_or_block_3way()

bool bm::sse2_or_block_3way ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src1,
const __m128i *BMRESTRICT  src2 
)
inline

OR array elements against another 2 arrays dst |= *src1 | src2.

Returns
true if all bits are 1

Definition at line 524 of file bmsse_util.h.

References BMRESTRICT, and bm::set_block_size.

◆ sse2_or_block_5way()

bool bm::sse2_or_block_5way ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src1,
const __m128i *BMRESTRICT  src2,
const __m128i *BMRESTRICT  src3,
const __m128i *BMRESTRICT  src4 
)
inline

OR array elements against another 2 arrays dst |= *src1 | src2 | src3 | src4.

Returns
true if all bits are 1

Definition at line 575 of file bmsse_util.h.

References BMRESTRICT, and bm::set_block_size.

◆ sse2_set_block()

void bm::sse2_set_block ( __m128i *BMRESTRICT  dst,
bm::word_t  value 
)
inline

SSE2 block memset dst = value.

Definition at line 809 of file bmsse_util.h.

References BMRESTRICT, and bm::set_block_size.

◆ sse2_shift_l1()

bool bm::sse2_shift_l1 ( __m128i *  block,
unsigned *  empty_acc,
unsigned  co1 
)
inline

block shift left by 1

Definition at line 921 of file bmsse2.h.

References bm::set_block_size.

◆ sse2_shift_r1()

bool bm::sse2_shift_r1 ( __m128i *  block,
unsigned *  empty_acc,
unsigned  co1 
)
inline

block shift right by 1

Definition at line 868 of file bmsse2.h.

References bm::set_block_size.

◆ sse2_stream_block()

void bm::sse2_stream_block ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src 
)
inline

SSE2 block copy dst = *src.

Definition at line 921 of file bmsse_util.h.

References BMRESTRICT, and bm::set_block_size.

◆ sse2_stream_block_unalign()

void bm::sse2_stream_block_unalign ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src 
)
inline

SSE2 block copy (unaligned src) dst = *src.

Definition at line 962 of file bmsse_util.h.

References BMRESTRICT, and bm::set_block_size.

◆ sse2_sub_block()

unsigned bm::sse2_sub_block ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src 
)
inline

AND-NOT (SUB) array elements against another array dst &= ~*src.

Returns
0 if no bits were set

Definition at line 744 of file bmsse_util.h.

References BM_ALIGN16, BM_ALIGN16ATTR, BMRESTRICT, and bm::set_block_size.

◆ sse2_sub_digest()

BMFORCEINLINE bool bm::sse2_sub_digest ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src 
)

SUB (AND NOT) block digest stride dst &= ~*src.

Returns
true if stide is all zero

Definition at line 652 of file bmsse2.h.

◆ sse2_sub_digest_2way()

BMFORCEINLINE bool bm::sse2_sub_digest_2way ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src1,
const __m128i *BMRESTRICT  src2 
)

2-operand SUB (AND NOT) block digest stride dst = src1 & ~*src2

Returns
true if stide is all zero

Definition at line 701 of file bmsse2.h.

◆ sse2_xor_arr_2_mask()

void bm::sse2_xor_arr_2_mask ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src,
const __m128i *BMRESTRICT  src_end,
bm::word_t  mask 
)
inline

XOR array elements to specified mask dst = *src ^ mask.

Definition at line 78 of file bmsse_util.h.

◆ sse2_xor_block()

unsigned bm::sse2_xor_block ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src 
)
inline

XOR block against another dst ^= *src.

Returns
0 if no bits were set

Definition at line 646 of file bmsse_util.h.

References BM_ALIGN16, BM_ALIGN16ATTR, BMRESTRICT, and bm::set_block_size.

◆ sse2_xor_block_2way()

unsigned bm::sse2_xor_block_2way ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src1,
const __m128i *BMRESTRICT  src2 
)
inline

3 operand XOR dst = *src1 ^ src2

Returns
0 if no bits were set

Definition at line 693 of file bmsse_util.h.

References BM_ALIGN16, BM_ALIGN16ATTR, BMRESTRICT, and bm::set_block_size.