BitMagic-C++
Data Structures | Functions
SSE2 functions

Processor specific optimizations for SSE2 instructions (internals) More...

Data Structures

class  bm::sse_empty_guard
 SSE2 reinitialization guard class. More...
 

Functions

void bm::sse2_xor_arr_2_mask (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end, bm::word_t mask)
 XOR array elements to specified mask dst = *src ^ mask. More...
 
void bm::sse2_andnot_arr_2_mask (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end, bm::word_t mask)
 Inverts array elements and NOT them to specified mask dst = ~*src & mask. More...
 
unsigned bm::sse2_and_block (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src)
 AND blocks2 dst &= *src. More...
 
unsigned bm::sse2_and_arr_unal (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end)
 AND array elements against another array (unaligned) dst &= *src. More...
 
bool bm::sse2_or_block (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src)
 OR array elements against another array dst |= *src. More...
 
bool bm::sse2_or_arr_unal (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end)
 OR array elements against another array (unaligned) dst |= *src. More...
 
bool bm::sse2_or_block_2way (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src1, const __m128i *BMRESTRICT src2)
 OR 2 blocks anc copy result to the destination dst = *src1 | src2. More...
 
bool bm::sse2_or_block_3way (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src1, const __m128i *BMRESTRICT src2)
 OR array elements against another 2 arrays dst |= *src1 | src2. More...
 
bool bm::sse2_or_block_5way (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src1, const __m128i *BMRESTRICT src2, const __m128i *BMRESTRICT src3, const __m128i *BMRESTRICT src4)
 OR array elements against another 2 arrays dst |= *src1 | src2 | src3 | src4. More...
 
unsigned bm::sse2_xor_block (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src)
 XOR block against another dst ^= *src. More...
 
unsigned bm::sse2_xor_block_2way (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src1, const __m128i *BMRESTRICT src2)
 3 operand XOR dst = *src1 ^ src2 More...
 
unsigned bm::sse2_sub_block (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src)
 AND-NOT (SUB) array elements against another array dst &= ~*src. More...
 
BMFORCEINLINE void bm::sse2_set_block (__m128i *BMRESTRICT dst, bm::word_t value)
 SSE2 block memset dst = value. More...
 
BMFORCEINLINE void bm::sse2_copy_block (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src)
 SSE2 block copy dst = *src. More...
 
BMFORCEINLINE void bm::sse2_stream_block (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src)
 SSE2 block copy dst = *src. More...
 
void bm::sse2_invert_block (__m128i *dst)
 Invert bit block dst = ~*dst or dst ^= *dst. More...
 
bm::id_t bm::sse2_bit_count (const __m128i *block, const __m128i *block_end)
 
unsigned bm::sse2_gap_bfind (const unsigned short *BMRESTRICT buf, unsigned pos, unsigned *BMRESTRICT is_set)
 Hybrid binary search, starts as binary, then switches to linear scan. More...
 
unsigned bm::sse2_gap_test (const unsigned short *BMRESTRICT buf, unsigned pos)
 Hybrid binary search, starts as binary, then switches to scan. More...
 

Detailed Description

Processor specific optimizations for SSE2 instructions (internals)

Function Documentation

◆ sse2_and_arr_unal()

unsigned bm::sse2_and_arr_unal ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src,
const __m128i *BMRESTRICT  src_end 
)
inline

AND array elements against another array (unaligned) dst &= *src.

Returns
0 if no bits were set

Definition at line 175 of file bmsse_util.h.

References BM_ALIGN16, and BM_ALIGN16ATTR.

Referenced by bm::decoder::get_32_AND().

◆ sse2_and_block()

unsigned bm::sse2_and_block ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src 
)
inline

AND blocks2 dst &= *src.

Returns
0 if no bits were set

Definition at line 125 of file bmsse_util.h.

References BM_ALIGN16, BM_ALIGN16ATTR, BMRESTRICT, and bm::set_block_size.

◆ sse2_andnot_arr_2_mask()

void bm::sse2_andnot_arr_2_mask ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src,
const __m128i *BMRESTRICT  src_end,
bm::word_t  mask 
)
inline

Inverts array elements and NOT them to specified mask dst = ~*src & mask.

Definition at line 102 of file bmsse_util.h.

◆ sse2_bit_count()

bm::id_t bm::sse2_bit_count ( const __m128i *  block,
const __m128i *  block_end 
)
inline
SSE2 optimized bitcounting function implements parallel bitcounting
algorithm for SSE2 instruction set.
unsigned CalcBitCount32(unsigned b)
{
    b = (b & 0x55555555) + (b >> 1 & 0x55555555);
    b = (b & 0x33333333) + (b >> 2 & 0x33333333);
    b = (b + (b >> 4)) & 0x0F0F0F0F;
    b = b + (b >> 8);
    b = (b + (b >> 16)) & 0x0000003F;
    return b;
}

Definition at line 62 of file bmsse2.h.

References BM_ALIGN16, and BM_ALIGN16ATTR.

◆ sse2_copy_block()

BMFORCEINLINE void bm::sse2_copy_block ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src 
)

SSE2 block copy dst = *src.

Definition at line 739 of file bmsse_util.h.

References BMRESTRICT, and bm::set_block_size.

◆ sse2_gap_bfind()

unsigned bm::sse2_gap_bfind ( const unsigned short *BMRESTRICT  buf,
unsigned  pos,
unsigned *BMRESTRICT  is_set 
)
inline

Hybrid binary search, starts as binary, then switches to linear scan.

Parameters
buf- GAP buffer pointer.
pos- index of the element.
is_set- output. GAP value (0 or 1).
Returns
GAP index.

Definition at line 413 of file bmsse2.h.

References BM_ASSERT, and bm::sse2_gap_find().

Referenced by bm::sse2_gap_test().

◆ sse2_gap_test()

unsigned bm::sse2_gap_test ( const unsigned short *BMRESTRICT  buf,
unsigned  pos 
)
inline

Hybrid binary search, starts as binary, then switches to scan.

Definition at line 461 of file bmsse2.h.

References bm::sse2_gap_bfind().

Referenced by bm::gap_test_unr().

◆ sse2_invert_block()

void bm::sse2_invert_block ( __m128i *  dst)
inline

Invert bit block dst = ~*dst or dst ^= *dst.

Definition at line 824 of file bmsse_util.h.

References BMRESTRICT, and bm::set_block_size.

◆ sse2_or_arr_unal()

bool bm::sse2_or_arr_unal ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src,
const __m128i *BMRESTRICT  src_end 
)
inline

OR array elements against another array (unaligned) dst |= *src.

Returns
true if all bits are 1

Definition at line 342 of file bmsse_util.h.

Referenced by bm::decoder::get_32_OR().

◆ sse2_or_block()

bool bm::sse2_or_block ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src 
)
inline

OR array elements against another array dst |= *src.

Returns
true if all bits are 1

Definition at line 288 of file bmsse_util.h.

References BMRESTRICT, and bm::set_block_size.

◆ sse2_or_block_2way()

bool bm::sse2_or_block_2way ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src1,
const __m128i *BMRESTRICT  src2 
)
inline

OR 2 blocks anc copy result to the destination dst = *src1 | src2.

Returns
true if all bits are 1

Definition at line 394 of file bmsse_util.h.

References BMRESTRICT, and bm::set_block_size.

◆ sse2_or_block_3way()

bool bm::sse2_or_block_3way ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src1,
const __m128i *BMRESTRICT  src2 
)
inline

OR array elements against another 2 arrays dst |= *src1 | src2.

Returns
true if all bits are 1

Definition at line 440 of file bmsse_util.h.

References BMRESTRICT, and bm::set_block_size.

◆ sse2_or_block_5way()

bool bm::sse2_or_block_5way ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src1,
const __m128i *BMRESTRICT  src2,
const __m128i *BMRESTRICT  src3,
const __m128i *BMRESTRICT  src4 
)
inline

OR array elements against another 2 arrays dst |= *src1 | src2 | src3 | src4.

Returns
true if all bits are 1

Definition at line 491 of file bmsse_util.h.

References BMRESTRICT, and bm::set_block_size.

◆ sse2_set_block()

BMFORCEINLINE void bm::sse2_set_block ( __m128i *BMRESTRICT  dst,
bm::word_t  value 
)

SSE2 block memset dst = value.

Definition at line 710 of file bmsse_util.h.

References BMRESTRICT, and bm::set_block_size.

◆ sse2_stream_block()

BMFORCEINLINE void bm::sse2_stream_block ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src 
)

SSE2 block copy dst = *src.

Definition at line 780 of file bmsse_util.h.

References BMRESTRICT, and bm::set_block_size.

◆ sse2_sub_block()

unsigned bm::sse2_sub_block ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src 
)
inline

AND-NOT (SUB) array elements against another array dst &= ~*src.

Returns
0 if no bits were set

Definition at line 660 of file bmsse_util.h.

References BM_ALIGN16, BM_ALIGN16ATTR, BMRESTRICT, and bm::set_block_size.

◆ sse2_xor_arr_2_mask()

void bm::sse2_xor_arr_2_mask ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src,
const __m128i *BMRESTRICT  src_end,
bm::word_t  mask 
)
inline

XOR array elements to specified mask dst = *src ^ mask.

Definition at line 78 of file bmsse_util.h.

◆ sse2_xor_block()

unsigned bm::sse2_xor_block ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src 
)
inline

XOR block against another dst ^= *src.

Returns
0 if no bits were set

Definition at line 562 of file bmsse_util.h.

References BM_ALIGN16, BM_ALIGN16ATTR, BMRESTRICT, and bm::set_block_size.

◆ sse2_xor_block_2way()

unsigned bm::sse2_xor_block_2way ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src1,
const __m128i *BMRESTRICT  src2 
)
inline

3 operand XOR dst = *src1 ^ src2

Returns
0 if no bits were set

Definition at line 609 of file bmsse_util.h.

References BM_ALIGN16, BM_ALIGN16ATTR, BMRESTRICT, and bm::set_block_size.