BitMagic-C++
|
algorithms for sparse_vector scan/search More...
#include <bmsparsevec_algo.h>
Data Structures | |
class | pipeline |
Pipeline to run multiple searches against a particular SV faster using cache optimizations. More... | |
Public Types | |
typedef SV::bvector_type | bvector_type |
typedef const bvector_type * | bvector_type_const_ptr |
typedef bvector_type * | bvector_type_ptr |
typedef SV::value_type | value_type |
typedef SV::size_type | size_type |
typedef bvector_type::allocator_type | allocator_type |
typedef allocator_type::allocator_pool_type | allocator_pool_type |
typedef bm::aggregator< bvector_type > | aggregator_type |
typedef bm::heap_vector< value_type, typename bvector_type::allocator_type, true > | remap_vector_type |
typedef bm::heap_vector< bvector_type *, allocator_type, true > | bvect_vector_type |
typedef aggregator_type::bv_count_vector_type | bv_count_vector_type |
typedef aggregator_type::run_options | run_options |
Scanner options to control execution. More... | |
Public Member Functions | |
sparse_vector_scanner () | |
More... | |
void | bind (const SV &sv, bool sorted) |
bind sparse vector for all searches More... | |
void | reset_binding () BMNOEXCEPT |
reset sparse vector binding More... | |
template<typename SV > | |
bool | find_eq_str (const typename SV::value_type *str, typename SV::size_type &pos) |
More... | |
template<typename SV > | |
bool | find_eq_str (const SV &sv, const typename SV::value_type *str, typename SV::size_type &pos) |
template<typename SV > | |
bool | find_eq_str (const typename SV::value_type *str, typename SV::bvector_type &bv_out) |
More... | |
template<typename SV > | |
bool | find_eq_str (const SV &sv, const typename SV::value_type *str, typename SV::bvector_type &bv_out) |
Find in scalar vector | |
void | find_eq (const SV &sv, value_type value, bvector_type &bv_out) |
find all sparse vector elements EQ to search value More... | |
template<typename BII > | |
void | find_eq (const SV &sv, value_type value, BII bi) |
find all sparse vector elements EQ to search value More... | |
bool | find_eq (const SV &sv, value_type value, size_type &pos) |
find first sparse vector element More... | |
bool | bfind (const SV &sv, const value_type val, size_type &pos) |
binary search for position in the sorted sparse vector More... | |
void | find_gt (const SV &sv, value_type val, bvector_type &bv_out) |
find all elements sparse vector element greater (>) than value More... | |
void | find_ge (const SV &sv, value_type val, bvector_type &bv_out) |
find all elements sparse vector element greater or equal (>=) than value More... | |
void | find_lt (const SV &sv, value_type val, bvector_type &bv_out) |
find all elements sparse vector element less (<) than value More... | |
void | find_le (const SV &sv, value_type val, bvector_type &bv_out) |
find all elements sparse vector element less or equal (<=) than value More... | |
void | find_range (const SV &sv, value_type from, value_type to, bvector_type &bv_out) |
find all elements sparse vector element in closed range [left..right] interval More... | |
Find in bit-transposed string vector | |
enum | search_algo_params { linear_cutoff1 = 16 , linear_cutoff2 = 128 } |
typedef bm::dynamic_heap_matrix< value_type, allocator_type > | heap_matrix_type |
typedef bm::dynamic_heap_matrix< value_type, allocator_type > | matrix_search_buf_type |
typedef bm::heap_vector< bm::id64_t, typename bvector_type::allocator_type, true > | mask_vector_type |
bool | find_eq_str (const SV &sv, const value_type *str, bvector_type &bv_out) |
find sparse vector elements (string) More... | |
bool | find_eq_str (const value_type *str, bvector_type &bv_out) |
find sparse vector elementa (string) in the attached SV More... | |
bool | find_eq_str (const SV &sv, const value_type *str, size_type &pos) |
find first sparse vector element (string) More... | |
bool | find_eq_str (const value_type *str, size_type &pos) |
binary find first sparse vector element (string) Sparse vector must be attached (bind()) More... | |
bool | find_eq_str_prefix (const SV &sv, const value_type *str, bvector_type &bv_out) |
find sparse vector elements with a given prefix (string) More... | |
template<class TPipe > | |
void | find_eq_str (TPipe &pipe) |
find sparse vector elements using search pipeline More... | |
bool | bfind_eq_str (const SV &sv, const value_type *str, size_type &pos) |
binary find first sparse vector element (string) Sparse vector must be sorted. More... | |
bool | lower_bound_str (const SV &sv, const value_type *str, size_type &pos) |
lower bound search for an array position More... | |
bool | bfind_eq_str (const value_type *str, size_type &pos) |
binary find first sparse vector element (string) Sparse vector must be sorted and attached More... | |
void | find_zero (const SV &sv, bvector_type &bv_out, bool null_correct=true) |
find all sparse vector elements EQ to 0 More... | |
void | find_nonzero (const SV &sv, bvector_type &bv_out) |
Find non-zero elements Output vector is computed as a logical OR (join) of all planes. More... | |
void | find_positive (const SV &sv, bvector_type &bv_out) |
Find positive (greter than zero elements) Output vector is computed as a logical OR (join) of all planes. More... | |
void | invert (const SV &sv, bvector_type &bv_out) |
invert search result ("EQ" to "not EQ") More... | |
template<typename It > | |
void | find_eq (const SV &sv, It start, It end, bvector_type &bv_out) |
find all values A IN (C, D, E, F) More... | |
void | find_eq_with_nulls_horizontal (const SV &sv, value_type value, bvector_type &bv_out) |
For testing purposes only. More... | |
void | find_gt_horizontal (const SV &sv, value_type value, bvector_type &bv_out, bool null_correct=true) |
For testing purposes only. More... | |
void | correct_nulls (const SV &sv, bvector_type &bv_out) |
Exclude possible NULL values from the result vector. More... | |
allocator_pool_type & | get_bvector_alloc_pool () BMNOEXCEPT |
Return allocator pool for blocks (Can be used to improve performance of repeated searches with the same scanner) More... | |
static bool | remap_tosv (remap_vector_type &remap_vect_target, const value_type *str, const SV &sv) |
Remap input value into SV char encodings. More... | |
static void | aggregate_AND_OR_slices (bvector_type &bv_target, const bvector_type &bv_mask, const SV &sv, unsigned from, unsigned total_planes) |
More... | |
static constexpr int | gt_slice_limit () noexcept |
Return the slice limit for signed/unsigned vector value types. More... | |
template<typename AGG > | |
static void | add_agg_char (AGG &agg, const SV &sv, int octet_idx, bm::id64_t planes_mask, unsigned value) |
Addd char to aggregator (AND-SUB) More... | |
void | set_search_range (size_type from, size_type to) |
set search boundaries (hint for the aggregator) More... | |
void | reset_search_range () |
reset (disable) search range More... | |
bool | find_eq_with_nulls (const SV &sv, value_type value, bvector_type &bv_out, size_type search_limit=0) |
find value (may include NULL indexes) More... | |
bool | find_first_eq (const SV &sv, value_type value, size_type &idx) |
find first value (may include NULL indexes) More... | |
bool | find_first_eq (const SV &sv, const value_type *str, size_type &idx, bool remaped) |
find first string value (may include NULL indexes) More... | |
bool | find_eq_str_impl (const SV &sv, const value_type *str, bvector_type &bv_out, bool prefix_sub) |
find EQ str / prefix impl More... | |
bool | prepare_and_sub_aggregator (const SV &sv, value_type value) |
Prepare aggregator for AND-SUB (EQ) search. More... | |
bool | prepare_and_sub_aggregator (const SV &sv, const value_type *str, unsigned octet_start, bool prefix_sub) |
Prepare aggregator for AND-SUB (EQ) search (string) More... | |
void | decompress (const SV &sv, bvector_type &bv_out) |
Rank-Select decompression for RSC vectors. More... | |
int | compare_str (const SV &sv, size_type idx, const value_type *str) |
compare sv[idx] with input str More... | |
int | compare (const SV &sv, size_type idx, const value_type val) BMNOEXCEPT |
compare sv[idx] with input value More... | |
void | aggregate_OR_slices (bvector_type &bv_target, const SV &sv, unsigned from, unsigned total_planes) |
More... | |
sparse_vector_scanner (const sparse_vector_scanner &)=delete | |
void | operator= (const sparse_vector_scanner &)=delete |
algorithms for sparse_vector scan/search
Scanner uses properties of bit-vector planes to answer questions like "find all sparse vector elements equivalent to XYZ".
Class uses fast algorithms based on properties of bit-planes. This is NOT a brute force, direct scan, scanner uses search space pruning and cache optimizations to run the search.
Definition at line 499 of file bmsparsevec_algo.h.
typedef aggregator_type::run_options bm::sparse_vector_scanner< SV >::run_options |
Scanner options to control execution.
Definition at line 524 of file bmsparsevec_algo.h.
|
protected |
Enumerator | |
---|---|
linear_cutoff1 | |
linear_cutoff2 |
Definition at line 1024 of file bmsparsevec_algo.h.
|
inlinestaticprotected |
Addd char to aggregator (AND-SUB)
Definition at line 1042 of file bmsparsevec_algo.h.
References bm::bitscan(), BM_ASSERT, and bm::word_bitcount().
Referenced by bm::sparse_vector_scanner< SV >::pipeline< Opt >::add().
|
staticprotected |
Definition at line 2114 of file bmsparsevec_algo.h.
References BM_ASSERT.
|
protected |
Definition at line 2095 of file bmsparsevec_algo.h.
References BM_ASSERT.
bool bm::sparse_vector_scanner< SV >::bfind | ( | const SV & | sv, |
const value_type | val, | ||
typename SV::size_type & | pos | ||
) |
binary search for position in the sorted sparse vector
Method assumes the sparse array is sorted, if value is found pos returns its index, if not found, pos would contain index where it could be inserted to maintain the order
sv | - input sparse vector |
val | - value to search for |
pos | - output sparse vector element index (actual index if found or insertion point if not found) |
Definition at line 2463 of file bmsparsevec_algo.h.
References BM_ASSERT.
Referenced by insertion_sort().
bool bm::sparse_vector_scanner< SV >::bfind_eq_str | ( | const SV & | sv, |
const value_type * | str, | ||
size_type & | pos | ||
) |
binary find first sparse vector element (string)
Sparse vector must be sorted.
Referenced by run_benchmark().
bool bm::sparse_vector_scanner< SV >::bfind_eq_str | ( | const value_type * | str, |
size_type & | pos | ||
) |
binary find first sparse vector element (string) Sparse vector must be sorted and attached
void bm::sparse_vector_scanner< SV >::bind | ( | const SV & | sv, |
bool | sorted | ||
) |
bind sparse vector for all searches
sv | - input sparse vector to bind for searches |
sorted | - source index is sorted, build index for binary search |
Definition at line 1462 of file bmsparsevec_algo.h.
References BM_ASSERT, bm::gap_max_bits, bm::set_block_shift, and bm::sub_block3_size.
Referenced by main(), and run_benchmark().
|
protected |
compare sv[idx] with input value
Definition at line 2766 of file bmsparsevec_algo.h.
|
protected |
compare sv[idx] with input str
Definition at line 2717 of file bmsparsevec_algo.h.
References bm::set_block_mask, bm::set_block_shift, and bm::sub_block3_size.
void bm::sparse_vector_scanner< SV >::correct_nulls | ( | const SV & | sv, |
bvector_type & | bv_out | ||
) |
Exclude possible NULL values from the result vector.
sv | - input sparse vector |
bv_out | - output bit-bector of non-zero elements |
Definition at line 1564 of file bmsparsevec_algo.h.
Referenced by bm::sparse_vector_scanner< SV >::find_eq().
|
protected |
Rank-Select decompression for RSC vectors.
Definition at line 2892 of file bmsparsevec_algo.h.
References BM_ASSERT.
|
inline |
find all values A IN (C, D, E, F)
sv | - input sparse vector |
start | - start iterator (set to search) |
end | - end iterator (set to search) |
bv_out | - output bit-bector of non-zero elements |
Definition at line 888 of file bmsparsevec_algo.h.
References BM_ASSERT, bm::sparse_vector_scanner< SV >::correct_nulls(), and bm::sparse_vector_scanner< SV >::find_eq_with_nulls().
void bm::sparse_vector_scanner< SV >::find_eq | ( | const SV & | sv, |
value_type | value, | ||
BII | bi | ||
) |
find all sparse vector elements EQ to search value
Find all sparse vector elements equivalent to specified value
sv | - input sparse vector |
value | - value to search for |
bi | - back insert iterator for the search results |
Definition at line 2801 of file bmsparsevec_algo.h.
void bm::sparse_vector_scanner< SV >::find_eq | ( | const SV & | sv, |
typename SV::value_type | value, | ||
typename SV::bvector_type & | bv_out | ||
) |
find all sparse vector elements EQ to search value
Find all sparse vector elements equivalent to specified value
sv | - input sparse vector |
value | - value to search for |
bv_out | - search result bit-vector (search result is a vector of 1s when sv[i] == value) |
Definition at line 2777 of file bmsparsevec_algo.h.
Referenced by compute_frequent_kmers(), main(), and run_benchmark().
bool bm::sparse_vector_scanner< SV >::find_eq | ( | const SV & | sv, |
typename SV::value_type | value, | ||
typename SV::size_type & | pos | ||
) |
find first sparse vector element
Find all sparse vector elements equivalent to specified value. Works well if sperse vector represents unordered set
sv | - input sparse vector |
value | - value to search for |
pos | - output found sparse vector element index |
Definition at line 2834 of file bmsparsevec_algo.h.
bool bm::sparse_vector_scanner< SV >::find_eq_str | ( | const SV & | sv, |
const value_type * | str, | ||
bvector_type & | bv_out | ||
) |
find sparse vector elements (string)
sv | - sparse vector of strings to search |
str | - string to search for |
bv_out | - search result bit-vector (search result masks 1 elements) |
Referenced by main(), and run_benchmark().
bool bm::sparse_vector_scanner< SV >::find_eq_str | ( | const SV & | sv, |
const value_type * | str, | ||
size_type & | pos | ||
) |
find first sparse vector element (string)
sv | - sparse vector of strings to search |
str | - string to search for |
pos | - [out] index of the first found |
bool bm::sparse_vector_scanner< SV >::find_eq_str | ( | const typename SV::value_type * | str, |
typename SV::bvector_type & | bv_out | ||
) |
Definition at line 2202 of file bmsparsevec_algo.h.
References BM_ASSERT.
bool bm::sparse_vector_scanner< SV >::find_eq_str | ( | const typename SV::value_type * | str, |
typename SV::size_type & | pos | ||
) |
Definition at line 2143 of file bmsparsevec_algo.h.
References BM_ASSERT.
bool bm::sparse_vector_scanner< SV >::find_eq_str | ( | const value_type * | str, |
bvector_type & | bv_out | ||
) |
find sparse vector elementa (string) in the attached SV
str | - string to search for |
bv_out | - search result bit-vector (search result masks 1 elements) |
bool bm::sparse_vector_scanner< SV >::find_eq_str | ( | const value_type * | str, |
size_type & | pos | ||
) |
void bm::sparse_vector_scanner< SV >::find_eq_str | ( | TPipe & | pipe | ) |
find sparse vector elements using search pipeline
pipe | - pipeline to run |
Definition at line 2287 of file bmsparsevec_algo.h.
|
protected |
find EQ str / prefix impl
aggregator search
Definition at line 2236 of file bmsparsevec_algo.h.
References BM_ASSERT.
bool bm::sparse_vector_scanner< SV >::find_eq_str_prefix | ( | const SV & | sv, |
const value_type * | str, | ||
bvector_type & | bv_out | ||
) |
find sparse vector elements with a given prefix (string)
sv | - sparse vector of strings to search |
str | - string prefix to search for "123" is a prefix for "1234567" |
bv_out | - search result bit-vector (search result masks 1 elements) |
Definition at line 2132 of file bmsparsevec_algo.h.
Referenced by main().
|
protected |
find value (may include NULL indexes)
Definition at line 1575 of file bmsparsevec_algo.h.
Referenced by bm::sparse_vector_scanner< SV >::find_eq().
void bm::sparse_vector_scanner< SV >::find_eq_with_nulls_horizontal | ( | const SV & | sv, |
value_type | value, | ||
bvector_type & | bv_out | ||
) |
For testing purposes only.
Definition at line 1768 of file bmsparsevec_algo.h.
References bm::bitscan(), and BM_ASSERT.
|
protected |
find first string value (may include NULL indexes)
Definition at line 1630 of file bmsparsevec_algo.h.
References BM_ASSERT.
|
protected |
find first value (may include NULL indexes)
Definition at line 1606 of file bmsparsevec_algo.h.
References BM_ASSERT.
void bm::sparse_vector_scanner< SV >::find_ge | ( | const SV & | sv, |
value_type | val, | ||
bvector_type & | bv_out | ||
) |
find all elements sparse vector element greater or equal (>=) than value
sv | - input sparse vector |
value | - value to search for |
bv_out | - search result bit-vector (search result is a vector of 1s when sv[i] >= value) |
Definition at line 1831 of file bmsparsevec_algo.h.
Referenced by main().
void bm::sparse_vector_scanner< SV >::find_gt | ( | const SV & | sv, |
value_type | val, | ||
bvector_type & | bv_out | ||
) |
find all elements sparse vector element greater (>) than value
sv | - input sparse vector |
value | - value to search for |
bv_out | - search result bit-vector (search result is a vector of 1s when sv[i] > value) |
Definition at line 1820 of file bmsparsevec_algo.h.
Referenced by main().
void bm::sparse_vector_scanner< SV >::find_gt_horizontal | ( | const SV & | sv, |
value_type | value, | ||
bvector_type & | bv_out, | ||
bool | null_correct = true |
||
) |
For testing purposes only.
Definition at line 1909 of file bmsparsevec_algo.h.
References bm::bitscan(), and BM_ASSERT.
void bm::sparse_vector_scanner< SV >::find_le | ( | const SV & | sv, |
value_type | val, | ||
bvector_type & | bv_out | ||
) |
find all elements sparse vector element less or equal (<=) than value
sv | - input sparse vector |
value | - value to search for |
bv_out | - search result bit-vector (search result is a vector of 1s when sv[i] <= value) |
Definition at line 1880 of file bmsparsevec_algo.h.
Referenced by main().
void bm::sparse_vector_scanner< SV >::find_lt | ( | const SV & | sv, |
value_type | val, | ||
bvector_type & | bv_out | ||
) |
find all elements sparse vector element less (<) than value
sv | - input sparse vector |
value | - value to search for |
bv_out | - search result bit-vector (search result is a vector of 1s when sv[i] < value) |
Definition at line 1869 of file bmsparsevec_algo.h.
Referenced by main().
void bm::sparse_vector_scanner< SV >::find_nonzero | ( | const SV & | sv, |
typename SV::bvector_type & | bv_out | ||
) |
Find non-zero elements Output vector is computed as a logical OR (join) of all planes.
sv | - input sparse vector |
bv_out | - output bit-bector of non-zero elements |
Definition at line 2863 of file bmsparsevec_algo.h.
void bm::sparse_vector_scanner< SV >::find_positive | ( | const SV & | sv, |
typename SV::bvector_type & | bv_out | ||
) |
Find positive (greter than zero elements) Output vector is computed as a logical OR (join) of all planes.
sv | - input sparse vector |
bv_out | - output bit-bector of non-zero elements |
Definition at line 2877 of file bmsparsevec_algo.h.
References BM_ASSERT.
void bm::sparse_vector_scanner< SV >::find_range | ( | const SV & | sv, |
value_type | from, | ||
value_type | to, | ||
bvector_type & | bv_out | ||
) |
find all elements sparse vector element in closed range [left..right] interval
sv | - input sparse vector |
from | - range from |
to | - range to |
bv_out | - search result bit-vector (search result is a vector of 1s when sv[i] <= value) |
Definition at line 1891 of file bmsparsevec_algo.h.
References bm::xor_swap().
Referenced by main().
void bm::sparse_vector_scanner< SV >::find_zero | ( | const SV & | sv, |
bvector_type & | bv_out, | ||
bool | null_correct = true |
||
) |
find all sparse vector elements EQ to 0
sv | - input sparse vector |
bv_out | - output bit-vector (search result masks 1 elements) |
null_correct | - flag to perform NULL correction |
Definition at line 1519 of file bmsparsevec_algo.h.
References bm::id_max.
Referenced by bm::set2set_11_transform< SV >::attach_sv().
|
inline |
Return allocator pool for blocks (Can be used to improve performance of repeated searches with the same scanner)
Definition at line 940 of file bmsparsevec_algo.h.
Referenced by main().
|
inlinestaticconstexprprotectednoexcept |
Return the slice limit for signed/unsigned vector value types.
Definition at line 1010 of file bmsparsevec_algo.h.
void bm::sparse_vector_scanner< SV >::invert | ( | const SV & | sv, |
bvector_type & | bv_out | ||
) |
invert search result ("EQ" to "not EQ")
sv | - input sparse vector |
bv_out | - output bit-bector of non-zero elements |
Definition at line 1546 of file bmsparsevec_algo.h.
Referenced by main().
bool bm::sparse_vector_scanner< SV >::lower_bound_str | ( | const SV & | sv, |
const value_type * | str, | ||
size_type & | pos | ||
) |
lower bound search for an array position
Method assumes the sparse array is sorted
sv | - input sparse vector |
str | - value to search for |
pos | - output sparse vector element index |
Definition at line 2583 of file bmsparsevec_algo.h.
References BM_ASSERT.
Referenced by insertion_sort().
|
protected |
Prepare aggregator for AND-SUB (EQ) search (string)
Definition at line 1679 of file bmsparsevec_algo.h.
References BM_ASSERT.
|
protected |
Prepare aggregator for AND-SUB (EQ) search.
Definition at line 1728 of file bmsparsevec_algo.h.
References bm::bitscan(), and BM_ASSERT.
|
staticprotected |
Remap input value into SV char encodings.
Definition at line 2222 of file bmsparsevec_algo.h.
void bm::sparse_vector_scanner< SV >::reset_binding |
reset sparse vector binding
Definition at line 1510 of file bmsparsevec_algo.h.
|
protected |
reset (disable) search range
Definition at line 2924 of file bmsparsevec_algo.h.
References bm::id_max.
|
protected |
set search boundaries (hint for the aggregator)
Definition at line 2913 of file bmsparsevec_algo.h.
References BM_ASSERT.