1#ifndef BMSTRSPARSEVEC__H__INCLUDED__
2#define BMSTRSPARSEVEC__H__INCLUDED__
33#ifndef BM__H__INCLUDED__
36# error missing include (bm.h or bm64.h)
70template<
typename CharType,
typename BV,
unsigned STR_SIZE>
100 typedef bm::dynamic_heap_matrix<unsigned char, allocator_type>
118 bm::heap_vector<CharType, typename bvector_type::allocator_type, true>
134 : str_sv_(str_sv), idx_(idx)
146 str_sv_.get(idx_, this->
buf_.data(), str_sv_.effective_max_str());
147 return this->
buf_.data();
151 {
return bool(*
this) == bool(ref); }
167 : str_sv_(str_sv), idx_(idx)
179 str_sv_.get(idx_, this->
buf_.data(), str_sv_.effective_max_str());
180 return this->
buf_.data();
192 str_sv_.set(idx_, str);
196 {
return bool(*
this) == bool(ref); }
260 {
return (pos_ == it.pos_) && (sv_ == it.sv_); }
264 {
return pos_ < it.pos_; }
266 {
return pos_ <= it.pos_; }
268 {
return pos_ > it.pos_; }
270 {
return pos_ >= it.pos_; }
274 {
return this->
value(); }
278 { this->
advance();
return *
this; }
318 unsigned substr_from_;
366 bi.buf_matrix_.rows(), bi.buf_matrix_.cols());
396 { this->
add(v);
return *
this; }
400 template<
typename StrType>
403 this->
add(v.c_str());
return *
this;
460 typedef bm::dynamic_heap_matrix<CharType, allocator_type> buffer_matrix_type;
604 template<
typename StrType>
607 this->
insert(idx, str.c_str());
639 template<
typename StrType>
642 if (idx >= this->
size())
653 CharType ch = str[i];
658 ch = CharType(remap_value);
666 bv_null->set_bit_no_check(idx);
674 template<
typename StrType>
701 template<
typename StrType>
717 template<
typename StrType>
721 for (
unsigned i = 0;
true; ++i)
729 unsigned char remap_value = remap_row[unsigned(ch)];
736 ch = CharType(remap_value);
1063 template<
typename CharMatrix>
1067 bool zero_mem =
true)
const
1071 0,
unsigned(str_len-1), zero_mem);
1087 template<
typename CharMatrix>
1091 unsigned substr_from,
1093 bool zero_mem =
true)
const
1099 struct sv_decode_visitor_func
1101 sv_decode_visitor_func(CharMatrix& cmatr)
BMNOEXCEPT2
1114 const unsigned i = substr_i_;
1121 }
while (++j < bits_size);
1137 auto base = bv_offset - sv_off_;
1139 const unsigned i = substr_i_;
1160 unsigned substr_i_= 0;
1173 if (max_sz < dec_size)
1175 if (rows < dec_size)
1180 sv_decode_visitor_func func(cmatr);
1182 for (
unsigned i = substr_from; i <= substr_to; ++i)
1185 func.substr_i_ = i - substr_from;
1188 for (
unsigned k = i * 8; k < (i * 8) + 8; ++k, ++bi)
1198 func.sv_off_ = idx_from;
1209 for (
unsigned i = 0; i < dec_size; ++i)
1211 typename CharMatrix::value_type* str = cmatr.row(i);
1227 template<
typename CharMatrix>
1232 if (idx_from < this->
size_)
1235 this->
clear_range(idx_from, idx_from + imp_size - 1);
1248 template<
typename CharMatrix>
1307 void sync(
bool force);
1339 template<
typename CharMatrix,
size_t BufSize = ins_buf_size>
1342 bool set_not_null =
true)
1346 unsigned max_str_size = 0;
1348 for (
unsigned j = 0; j < imp_size; ++j)
1350 typename CharMatrix::value_type* str = cmatr.row(j);
1351 typename CharMatrix::size_type i;
1352 typename CharMatrix::size_type cols = cmatr.cols();
1353 for (i = 0; i < cols; ++i)
1359 (unsigned)((i > max_str_size) ? i : max_str_size);
1364 unsigned char remap_value =
1370 str[i] = CharType(remap_value);
1378 unsigned_value_type ch_slice[BufSize];
1379 for (
unsigned i = 0; i < max_str_size; ++i)
1381 unsigned ch_acc = 0;
1382#if defined(BMVECTOPT) || defined(BM_USE_GCC_BUILD)
1385 for (
size_type j = 0; j < imp_size; j+=4)
1392 ch_acc |= ch0 | ch1 | ch2 | ch3;
1393 ch_slice[j] = ch0; ch_slice[j+1] = ch1;
1394 ch_slice[j+2] = ch2; ch_slice[j+3] = ch3;
1400 for (
size_type j = 0; j < imp_size; ++j)
1410 size_type idx_to = idx_from + imp_size - 1;
1414 bv_null->set_range(idx_from, idx_to);
1416 if (idx_to >= this->
size())
1417 this->
size_ = idx_to+1;
1421#pragma warning( push )
1422#pragma warning( disable : 4146 )
1425 template<
size_t BufSize = ins_buf_size>
1432 for ( ;ch_acc; ch_acc &= ch_acc - 1)
1434 unsigned n_bits = 0;
1436 unsigned mask = 1u << bi;
1437#if defined(BMVECTOPT) || defined(BM_USE_GCC_BUILD)
1440 mask |= (mask << 8) | (mask << 16) | (mask << 24);
1441 for (
size_type j = 0; j < imp_size; j+=4)
1443 unsigned ch0 = ((unsigned)ch_slice[j+0]) |
1444 ((unsigned)ch_slice[j+1] << 8) |
1445 ((unsigned)ch_slice[j+2] << 16) |
1446 ((unsigned)ch_slice[j+3] << 24);
1448 ch0 = (ch0 >> bi) | (ch0 >> (bi+7)) |
1449 (ch0 >> (bi+14)) | (ch0 >> (bi+21));
1452 for (
size_type base_idx = idx_from + j ;ch0; ch0 &= ch0 - 1)
1454 const unsigned bit_idx =
1456 bit_list[n_bits++] = base_idx + bit_idx;
1463 for (
size_type j = 0; j < imp_size; ++j)
1465 unsigned ch = unsigned(ch_slice[j]);
1475 bv->import_sorted(&
bit_list[0], n_bits,
false);
1480#pragma warning( pop )
1533 *idx_from = from; *idx_to = to;
return true;
1544 void remap(back_insert_iterator& iit);
1567template<
class CharType,
class BV,
unsigned STR_SIZE>
1576 static_assert(STR_SIZE > 1,
1577 "BM:: String vector size must be > 1 (to accomodate 0 terminator)");
1583template<
class CharType,
class BV,
unsigned STR_SIZE>
1587 remap_flags_(str_sv.remap_flags_),
1588 remap_matrix1_(str_sv.remap_matrix1_),
1589 remap_matrix2_(str_sv.remap_matrix2_)
1591 static_assert(STR_SIZE > 1,
1592 "BM:: String vector size must be > 1 (to accomodate 0 terminator)");
1597template<
class CharType,
class BV,
unsigned STR_SIZE>
1601 remap_flags_(str_sv.remap_flags_),
1602 remap_matrix1_(str_sv.remap_matrix1_),
1603 remap_matrix2_(str_sv.remap_matrix2_)
1607 static_assert(STR_SIZE > 1,
1608 "BM:: String vector size must be > 1 (to accomodate 0 terminator)");
1614template<
class CharType,
class BV,
unsigned STR_SIZE>
1618 parent_type::swap(str_sv);
1620 remap_matrix1_.swap(str_sv.remap_matrix1_);
1621 remap_matrix2_.swap(str_sv.remap_matrix2_);
1626template<
class CharType,
class BV,
unsigned STR_SIZE>
1630 if (idx >= this->size())
1631 this->size_ = idx+1;
1632 set_value(idx, str);
1637template<
class CharType,
class BV,
unsigned STR_SIZE>
1641 if (idx >= this->size())
1643 this->size_ = idx+1;
1644 set_value(idx, str);
1647 insert_value(idx, str);
1653template<
class CharType,
class BV,
unsigned STR_SIZE>
1657 if (idx >= this->size_)
1659 this->erase_column(idx,
true);
1665template<
class CharType,
class BV,
unsigned STR_SIZE>
1668 if (idx >= this->size_)
1669 this->size_ = idx + 1;
1671 this->bmatr_.clear_column(idx, 0);
1675template<
class CharType,
class BV,
unsigned STR_SIZE>
1682 this->size_ += count;
1687template<
class CharType,
class BV,
unsigned STR_SIZE>
1691 set_value_no_null(idx, str);
1693 bv_null->set_bit_no_check(idx);
1698template<
class CharType,
class BV,
unsigned STR_SIZE>
1702 for (
unsigned i = 0;
true; ++i)
1704 CharType ch = str[i];
1707 this->clear_value_planes_from(i*8, idx);
1712 auto r = remap_matrix2_.rows();
1715 remap_matrix1_.resize(i + 1, remap_matrix1_.cols(),
true);
1716 remap_matrix2_.resize(i + 1, remap_matrix2_.cols(),
true);
1718 unsigned char remap_value = remap_matrix2_.get(i,
unsigned(ch));
1722 this->clear_value_planes_from(i*8, idx);
1725 ch = CharType(remap_value);
1727 this->bmatr_.set_octet(idx, i, (
unsigned char)ch);
1733template<
class CharType,
class BV,
unsigned STR_SIZE>
1737 insert_value_no_null(idx, str);
1738 this->insert_null(idx,
true);
1743template<
class CharType,
class BV,
unsigned STR_SIZE>
1747 for (
unsigned i = 0;
true; ++i)
1749 CharType ch = str[i];
1752 this->insert_clear_value_planes_from(i*8, idx);
1758 unsigned char remap_value = remap_matrix2_.get(i,
unsigned(ch));
1762 this->insert_clear_value_planes_from(i*8, idx);
1765 ch = CharType(remap_value);
1767 this->bmatr_.insert_octet(idx, i, (
unsigned char)ch);
1774template<
class CharType,
class BV,
unsigned STR_SIZE>
1784 CharType ch = CharType(this->bmatr_.get_octet(idx, i));
1790 remap_matrix1_.remap(str, i);
1796template<
class CharType,
class BV,
unsigned STR_SIZE>
1803 parent_type::optimize(temp_block, opt_mode, &stbv);
1811template<
class CharType,
class BV,
unsigned STR_SIZE>
1818 parent_type::calc_stat(&stbv);
1830 size_t remap_mem_usage =
sizeof(remap_flags_);
1831 remap_mem_usage += remap_matrix1_.get_buffer().mem_usage();
1832 remap_mem_usage += remap_matrix2_.get_buffer().mem_usage();
1834 st->memory_used += remap_mem_usage;
1837 st->max_serialize_mem += (remap_mem_usage * 2);
1843template<
class CharType,
class BV,
unsigned STR_SIZE>
1851 for (
unsigned i = 0;
true; ++i)
1853 CharType octet2 = str[i];
1854 CharType octet1 = (CharType)this->bmatr_.get_octet(idx, i);
1860 const unsigned char* remap_row = remap_matrix1_.row(i);
1861 unsigned char remap_value1 = remap_row[unsigned(octet1)];
1863 res = (remap_value1 > octet2) - (remap_value1 < octet2);
1872template<
class CharType,
class BV,
unsigned STR_SIZE>
1880 for (
unsigned i = 0;
true; ++i)
1882 CharType octet2 = str[i];
1883 CharType octet1 = (CharType)this->bmatr_.get_octet(idx, i);
1889 res = (octet1 > octet2) - (octet1 < octet2);
1899template<
class CharType,
class BV,
unsigned STR_SIZE>
1905 int res = remap_flags_ ? compare_remap(idx, str)
1906 : compare_nomap(idx, str);
1912template<
class CharType,
class BV,
unsigned STR_SIZE>
1922 for (
unsigned i = 0;
true; ++i)
1925 CharType octet2 = (CharType)this->bmatr_.get_octet(idx2, i);
1926 CharType octet1 = (CharType)this->bmatr_.get_octet(idx1, i);
1932 const unsigned char* remap_row = remap_matrix1_.row(i);
1933 unsigned char remap_value1 = remap_row[unsigned(octet1)];
1935 unsigned char remap_value2 = remap_row[unsigned(octet2)];
1937 res = (remap_value1 > remap_value2) - (remap_value1 < remap_value2);
1944 for (
unsigned i = 0;
true; ++i)
1946 CharType octet2 = (CharType)this->bmatr_.get_octet(idx2, i);
1947 CharType octet1 = (CharType)this->bmatr_.get_octet(idx1, i);
1953 res = (octet1 > octet2) - (octet1 < octet2);
1964template<
class CharType,
class BV,
unsigned STR_SIZE>
1971 CharType ch1 = CharType(this->bmatr_.get_octet(idx1, i));
1972 CharType ch2 = CharType(this->bmatr_.get_octet(idx2, i));
1988template<
class CharType,
class BV,
unsigned STR_SIZE>
2001template<
class CharType,
class BV,
unsigned STR_SIZE>
2006 return this->bmatr_.octet_size();
2011template<
class CharType,
class BV,
unsigned STR_SIZE>
2015 size_type max_str_len = effective_max_str();
2016 octet_matrix.resize(max_str_len, 256,
false);
2017 octet_matrix.set_zero();
2020 for(; it.
valid(); ++it)
2025 for (
unsigned i = 0;
true; ++i)
2031 octet_freq_matrix_type::value_type* row = octet_matrix.row(i);
2032 unsigned ch_idx = (
unsigned char)ch;
2041template<
class CharType,
class BV,
unsigned STR_SIZE>
2047 size_type max_str_len = effective_max_str();
2048 octet_remap_matrix1.resize(max_str_len, 256,
false);
2049 octet_remap_matrix1.set_zero();
2050 octet_remap_matrix2.resize(max_str_len, 256,
false);
2051 octet_remap_matrix2.set_zero();
2053 for (
unsigned i = 0; i < octet_occupancy_matrix.rows(); ++i)
2055 typename octet_freq_matrix_type::value_type* frq_row =
2056 octet_occupancy_matrix.row(i);
2058 unsigned char* remap_row1 = octet_remap_matrix1.row(i);
2059 unsigned char* remap_row2 = octet_remap_matrix2.row(i);
2061 const typename slice_octet_matrix_type::size_type row_size =
2062 octet_occupancy_matrix.cols();
2063 for (
unsigned remap_code = 1;
true; ++remap_code)
2065 typename octet_freq_matrix_type::size_type char_idx;
2073 unsigned char ch = (
unsigned char)char_idx;
2074 remap_row1[remap_code] = ch;
2075 remap_row2[ch] = (
unsigned char)remap_code;
2076 frq_row[char_idx] = 0;
2083template<
class CharType,
class BV,
unsigned STR_SIZE>
2088 auto rows = remap_matrix1_.rows();
2089 remap_matrix2_.resize(rows, remap_matrix1_.cols(),
false);
2092 remap_matrix2_.set_zero();
2094 for (
unsigned i = 0; i < remap_matrix1_.rows(); ++i)
2096 const unsigned char* remap_row1 = remap_matrix1_.row(i);
2097 unsigned char* remap_row2 = remap_matrix2_.row(i);
2098 for (
unsigned j = 1; j < remap_matrix1_.cols(); ++j)
2102 unsigned ch_code = remap_row1[j];
2103 remap_row2[ch_code] = (
unsigned char)j;
2113template<
class CharType,
class BV,
unsigned STR_SIZE>
2120 for (
unsigned i = 0; i < buf_size; ++i)
2122 CharType ch = str[i];
2128 const unsigned char* remap_row = octet_remap_matrix2.row(i);
2129 unsigned char remap_value = remap_row[unsigned(ch)];
2132 sv_str[i] = CharType(remap_value);
2139template<
class CharType,
class BV,
unsigned MAX_STR_SIZE>
2147 for (
unsigned i = 0; i < buf_size; ++i)
2149 CharType ch = sv_str[i];
2155 const unsigned char* remap_row = octet_remap_matrix1.row(i);
2156 unsigned char remap_value = remap_row[unsigned(ch)];
2159 str[i] = CharType(remap_value);
2166template<
class CharType,
class BV,
unsigned MAX_STR_SIZE>
2170 sv_tmp(this->get_null_support());
2177template<
class CharType,
class BV,
unsigned MAX_STR_SIZE>
2184 sv_tmp(this->get_null_support());
2194template<
class CharType,
class BV,
unsigned STR_SIZE>
2200 remap_from_impl(str_sv, omatrix,
false);
2205template<
class CharType,
class BV,
unsigned STR_SIZE>
2212 const unsigned buffer_size = ins_buf_size;
2222 bm::alloc_pool_guard<typename bvector_type::allocator_pool_type, str_sparse_vector> g1, g2;
2226 g1.assign_if_not_set(pool, *
this);
2227 g2.assign_if_not_set(pool, sv);
2230 pool.set_block_limit(r + 10);
2233 this->clear_all(
true);
2241 omatrix = &occ_matrix;
2246 typedef bm::dynamic_heap_matrix<CharType, allocator_type> buffer_matrix_type;
2249 buffer_matrix_type cmatr(buffer_size, str_len);
2252 for (
size_type i{0}, dsize;
true; i += dsize)
2254 dsize = str_sv.
decode(cmatr, i, buffer_size,
true);
2257 if (move_data && (dsize == ins_buf_size))
2266 this->
import(cmatr, i, dsize);
2278 *bv_null = *bv_null_arg;
2289template<
class CharType,
class BV,
unsigned STR_SIZE>
2293 recalc_remap_matrix2();
2298template<
class CharType,
class BV,
unsigned STR_SIZE>
2308 if (remap_flags_ != sv.remap_flags_)
2316 b = remap_matrix1_.equal_overlap(sv.remap_matrix1_);
2319 b = remap_matrix2_.equal_overlap(sv.remap_matrix2_);
2323 return parent_type::equal(sv, null_able);
2328template<
class CharType,
class BV,
unsigned STR_SIZE>
2336 this->clear_all(
true);
2342 this->copy_range_slices(sv, left, right, slice_null);
2343 this->resize(sv.
size());
2348template<
class CharType,
class BV,
unsigned STR_SIZE>
2354 if (this->size_ < arg_size)
2369 this->merge_matr(str_sv.
bmatr_);
2373 bv_null->set_range(0, arg_size-1);
2381template<
class CharType,
class BV,
unsigned STR_SIZE>
2388 this->keep_range_no_check(left, right, slice_null);
2393template<
class CharType,
class BV,
unsigned STR_SIZE>
2399 return it_type(
this);
2404template<
class CharType,
class BV,
unsigned STR_SIZE>
2408 parent_type::clear_all(free_mem);
2413template<
class CharType,
class BV,
unsigned STR_SIZE>
2415 const char* err_msg)
2418 throw std::range_error(err_msg);
2426template<
class CharType,
class BV,
unsigned STR_SIZE>
2428 const char* err_msg)
2432 err_msg =
"Unknown/incomparable dictionary character";
2433 throw std::domain_error(err_msg);
2445template<
class CharType,
class BV,
unsigned STR_SIZE>
2447: sv_(0), substr_from_(0), substr_to_(STR_SIZE),
2454template<
class CharType,
class BV,
unsigned STR_SIZE>
2458 substr_from_(it.substr_from_), substr_to_(it.substr_to_),
2459 pos_(it.pos_), pos_in_buf_(~
size_type(0))
2465template<
class CharType,
class BV,
unsigned STR_SIZE>
2471 substr_to_ = (unsigned) sv_->effective_max_str();
2472 buf_matrix_.resize(n_rows, substr_to_+1);
2477template<
class CharType,
class BV,
unsigned STR_SIZE>
2484 substr_to_ = (unsigned) sv_->effective_max_str();
2485 buf_matrix_.resize(n_rows, substr_to_+1);
2490template<
class CharType,
class BV,
unsigned STR_SIZE>
2494 unsigned max_str = sv_->effective_max_str();
2495 substr_from_ = from;
2499 substr_to_ = from + len;
2504 substr_to_ = substr_from_ + (len - 1);
2508 buf_matrix_.resize(n_rows, len+1,
false);
2513template<
class CharType,
class BV,
unsigned STR_SIZE>
2522 if (!buf_matrix_.is_init())
2525 size_type d = sv_->decode_substr(buf_matrix_, pos_, n_rows,
2526 substr_from_, substr_to_);
2535 return buf_matrix_.row(pos_in_buf_);
2540template<
class CharType,
class BV,
unsigned STR_SIZE>
2549 if (!buf_matrix_.is_init())
2552 size_type d = sv_->decode_substr(buf_matrix_, pos_, n_rows,
2553 substr_from_, substr_to_);
2568template<
class CharType,
class BV,
unsigned STR_SIZE>
2574 pos_ = (!sv_ || pos >= sv_->size()) ?
bm::id_max : pos;
2580template<
class CharType,
class BV,
unsigned STR_SIZE>
2588 if (pos_ >= sv_->size())
2595 if (pos_in_buf_ >= n_rows)
2605template<
class CharType,
class BV,
unsigned STR_SIZE>
2607: sv_(0), bv_null_(0), pos_in_buf_(~
size_type(0))
2612template<
class CharType,
class BV,
unsigned STR_SIZE>
2620 bv_null_ = sv_->get_null_bvect();
2621 unsigned esize = (unsigned) sv_->effective_max_str();
2622 if (esize < STR_SIZE)
2624 buf_matrix_.init_resize(n_buf_size, esize);
2628 bv_null_ = 0; prev_nb_ = 0;
2634template<
class CharType,
class BV,
unsigned STR_SIZE>
2637: sv_(bi.sv_), bv_null_(bi.bv_null_), buf_matrix_(bi.buf_matrix_.rows(), bi.buf_matrix_.cols()),
2638 pos_in_buf_(~
size_type(0)), prev_nb_(bi.prev_nb_), opt_mode_(bi.opt_mode_),
2646template<
class CharType,
class BV,
unsigned STR_SIZE>
2654template<
class CharType,
class BV,
unsigned STR_SIZE>
2659 return (pos_in_buf_ == ~
size_type(0) || !sv_);
2664template<
class CharType,
class BV,
unsigned STR_SIZE>
2678template<
class CharType,
class BV,
unsigned STR_SIZE>
2684 size_type imp_idx = sv_->size();
2685 sv_->import_no_check(buf_matrix_, imp_idx, pos_in_buf_+1,
false);
2690 sv_->optimize_block(prev_nb_, opt_mode_);
2698template<
class CharType,
class BV,
unsigned STR_SIZE>
2712 bv_null_->set_bit_no_check(sz + buf_idx + 1);
2717template<
class CharType,
class BV,
unsigned STR_SIZE>
2720 this->add_value(
"");
2725template<
class CharType,
class BV,
unsigned STR_SIZE>
2730 this->add_value(
"");
2736template<
class CharType,
class BV,
unsigned STR_SIZE>
2743 size_t slen = ::strlen(v);
2745 auto orows = omatrix_.rows();
2750 omatrix_.resize(slen, 256,
false);
2751 omatrix_.set_zero();
2755 omatrix_.resize(slen, 256,
true);
2756 for (; orows < omatrix_.rows(); ++orows)
2759 octet_freq_matrix_type::value_type* r = omatrix_.row(orows);
2760 ::memset(r, 0, 256 *
sizeof(r[0]));
2764 for (
size_t i = 0; i < slen; ++i)
2768 octet_freq_matrix_type::value_type* row = omatrix_.row(i);
2769 unsigned ch_idx = (
unsigned char)ch;
2776template<
class CharType,
class BV,
unsigned STR_SIZE>
2785 if (pos_in_buf_ >= buf_matrix_.rows()-1)
2787 if (pos_in_buf_ == ~
size_type(0) && (!buf_matrix_.is_init()))
2791 pos_in_buf_ = 0; buf_matrix_.set_zero();
2801 value_type* r = buf_matrix_.row(pos_in_buf_);
2803 typename buffer_matrix_type::size_type i;
2804 typename buffer_matrix_type::size_type cols = buf_matrix_.cols();
2805 for (i = 0; i < cols; ++i)
2813 for (cols = i;
true; ++cols)
2820 buf_matrix_.
resize(buf_matrix_.rows(), cols + 1);
2822 r = buf_matrix_.row(pos_in_buf_);
2823 cols = buf_matrix_.cols();
2824 for (; i < cols; ++i)
2835template<
class CharType,
class BV,
unsigned STR_SIZE>
2841 bool found = bv_null->find_reverse(this->
size_);
2842 this->
size_ += found;
Algorithms for bvector<> (main include)
basic bit-matrix class and utilities
Constants, lookup tables and typedefs.
#define BM_ASSERT_THROW(x, xerrcode)
Utilities for bit transposition (internal) (experimental!)
Base class for bit-transposed(bit-sliced) sparse vector construction.
void freeze_matr()
Turn on RO mode.
void swap(base_sparse_vector< CharType, BV, MAX_SIZE > &bsv) BMNOEXCEPT
void resize(size_type new_size, bool set_null)
const value_type & const_reference
void copy_from(const base_sparse_vector< CharType, BV, MAX_SIZE > &bsv)
void clear_range(size_type left, size_type right, bool set_null)
bvector_type_ptr get_create_slice(unsigned i)
get access to bit-plain, function checks and creates a plane
const bmatrix_type & get_bmatrix() const BMNOEXCEPT
bmatrix_type bmatr_
bit-transposed matrix
bool is_null(size_type idx) const BMNOEXCEPT
test if specified element is NULL
const bvector_type * get_null_bvector() const BMNOEXCEPT
Get bit-vector of assigned values or NULL (if not constructed that way)
size_type size_
array size
void bit_and_rows(const bvector_type &bv)
Set AND (intersect) operation on all existing bit-slices.
std::make_unsigned< value_type >::type unsigned_value_type
void bit_sub_rows(const bvector_type &bv, bool use_null)
Set SUB (MINUS) operation on all existing bit-slices.
bvector_type * get_null_bvect() BMNOEXCEPT
void clear_value_planes_from(unsigned plane_idx, size_type idx)
bool is_nullable() const BMNOEXCEPT
check if container supports NULL(unassigned) values
Basic dense bit-matrix class.
size_type rows_not_null() const BMNOEXCEPT
void allocate_rows(size_type rsize)
allocate matrix rows of bit-vectors (new rows are NULLs)
size_type rows() const BMNOEXCEPT
void set_octet(size_type pos, size_type octet_idx, unsigned char octet)
unsigned char get_octet(size_type pos, size_type octet_idx) const BMNOEXCEPT
bvector_type_const_ptr get_row(size_type i) const BMNOEXCEPT
Constant iterator designed to enumerate "ON" bits.
Bitvector Bit-vector container with runtime compression of bits.
optmode
Optimization mode Every next level means additional checks (better compression vs time)
@ opt_compress
compress blocks when possible (GAP/prefix sum)
allocator_type::allocator_pool_type allocator_pool_type
bvector_size_type size_type
sparse vector de-serializer
Back insert iterator implements buffered insert, faster than generic access assignment.
bvector_type * bv_null_
!< pointer on the parent vector
back_insert_iterator & operator*()
noop
void add_remap_stat(const value_type *v)
account new value as remap statistics
void add(const value_type *v)
add value to the container
size_type pos_in_buf_
!< value buffer
unsigned get_remap() const BMNOEXCEPT
Get curent remap state flags.
octet_freq_matrix_type omatrix_
octet frequency matrix
void add_value(const value_type *v)
add value to the buffer without changing the NULL vector
back_insert_iterator & operator++()
noop
void flush()
flush the accumulated buffer.
unsigned remap_flags_
target remapping
buffer_matrix_type buf_matrix_
!< not NULL vector pointer
back_insert_iterator & operator++(int)
noop
str_sparse_vector_type * str_sparse_vector_type_ptr
bvector_type::block_idx_type block_idx_type
str_sparse_vector_type::value_type value_type
str_sparse_vector_type::size_type size_type
void add_null()
add NULL (no-value) to the container
void set_optimize(typename bvector_type::optmode opt_mode) BMNOEXCEPT
Set optimization on load option (deafult: false)
bvector_type::allocator_type allocator_type
back_insert_iterator & operator=(const value_type *v)
push value to the vector
bvector_type::optmode opt_mode_
!< previous block added
void set_remap(bool flag) BMNOEXCEPT
Method to configure back inserter to collect statistics on optimal character codes.
back_insert_iterator() BMNOEXCEPT
block_idx_type prev_nb_
!< buffer position
str_sparse_vector< CharType, BV, STR_SIZE > str_sparse_vector_type
std::output_iterator_tag iterator_category
bool empty() const BMNOEXCEPT
return true if insertion buffer is empty
const octet_freq_matrix_type & get_octet_matrix() const noexcept
Get octet frequence matrix.
void add_null(size_type count)
add a series of consequitve NULLs (no-value) to the container
str_sparse_vector_type * sv_
allocator_type::allocator_pool_type allocator_pool_type
back_insert_iterator & operator=(const StrType &v)
push value to the vector
str_sparse_vector_type::bvector_type bvector_type
Const iterator to do quick traverse of the sparse vector.
str_sparse_vector_type * str_sparse_vector_type_ptr
std::input_iterator_tag iterator_category
size_type pos() const BMNOEXCEPT
Current position (index) in the vector.
void invalidate() BMNOEXCEPT
Invalidate current iterator.
bool operator<(const const_iterator &it) const BMNOEXCEPT
bool operator!=(const const_iterator &it) const BMNOEXCEPT
dynamic_heap_matrix< CharType, allocator_type > buffer_matrix_type
bool is_null() const BMNOEXCEPT
Get NULL status.
const value_type * operator*() const
Get current position (value)
const value_type * value() const
Get zero terminated string value at the current position.
allocator_type::allocator_pool_type allocator_pool_type
bvector_type::allocator_type allocator_type
void go_to(size_type pos) BMNOEXCEPT
re-position to a specified position
void advance() BMNOEXCEPT
advance iterator forward by one
const_iterator & operator++() BMNOEXCEPT
Advance to the next available value.
str_sparse_vector< CharType, BV, STR_SIZE > str_sparse_vector_type
const_iterator() BMNOEXCEPT
Construct iterator (not attached to any particular vector)
bool operator<=(const const_iterator &it) const BMNOEXCEPT
str_sparse_vector_type::bvector_type bvector_type
long long difference_type
bool valid() const BMNOEXCEPT
Returns true if iterator is at a valid position.
str_sparse_vector_type::size_type size_type
str_sparse_vector_type::value_type value_type
string_view_type get_string_view() const
Get current string as string_view.
bool operator>=(const const_iterator &it) const BMNOEXCEPT
const_iterator & operator++(int) BMNOEXCEPT
Advance to the next available value.
void set_substr(unsigned from, unsigned len=0) BMNOEXCEPT
setup iterator to retrieve a sub-string of a string
std::basic_string_view< CharType > string_view_type
bool operator==(const const_iterator &it) const BMNOEXCEPT
bool operator>(const const_iterator &it) const BMNOEXCEPT
Reference class to access elements via common [] operator.
const_reference(const str_sparse_vector< CharType, BV, STR_SIZE > &str_sv, size_type idx)
bool is_null() const BMNOEXCEPT
const value_type * get() const BMNOEXCEPT
bool operator==(const const_reference &ref) const BMNOEXCEPT
bm::heap_vector< CharType, typename bvector_type::allocator_type, true > bufffer_type
Reference class to access elements via common [] operator.
const value_type * get() const BMNOEXCEPT
reference(str_sparse_vector< CharType, BV, STR_SIZE > &str_sv, size_type idx)
reference & operator=(const value_type *str)
bool is_null() const BMNOEXCEPT
reference & operator=(const reference &ref)
bool operator==(const reference &ref) const BMNOEXCEPT
succinct sparse vector for strings with compression using bit-slicing ( transposition) method
void insert(size_type idx, const value_type *str)
insert the specified element
void swap(str_sparse_vector &str_sv) BMNOEXCEPT
void clear_all(bool free_mem) BMNOEXCEPT
resize to zero, free memory
void calc_octet_stat(octet_freq_matrix_type &octet_matrix) const
bool is_ro() const BMNOEXCEPT
Returns true if vector is read-only.
const_iterator end() const BMNOEXCEPT
Provide const iterator access to the end
void optimize(bm::word_t *temp_block=0, typename bvector_type::optmode opt_mode=bvector_type::opt_compress, typename str_sparse_vector< CharType, BV, STR_SIZE >::statistics *stat=0)
run memory optimization for all vector planes
void set_value(size_type idx, const value_type *str)
set value without checking boundaries
void resize(size_type sz)
resize vector
void calc_stat(struct str_sparse_vector< CharType, BV, STR_SIZE >::statistics *st) const BMNOEXCEPT
Calculates memory statistics.
bool empty() const
return true if vector is empty
void recalc_remap_matrix2()
static size_type max_str()
get maximum string length capacity
unsigned char * init_remap_buffer()
void set_null(size_type idx)
set NULL status for the specified element Vector is resized automatically
unsigned common_prefix_length(size_type idx1, size_type idx2) const BMNOEXCEPT
Find size of common prefix between two vector elements in octets.
void set_null(const bvector_type &bv_idx)
Set NULL all elements set as 1 in the argument vector.
void remap_from_impl(const str_sparse_vector &str_sv, octet_freq_matrix_type *omatrix, bool move_data)
Remap from implementation, please note that move_data flag can violate cosnt-ness.
void sync(bool force)
syncronize internal structures
slice_octet_matrix_type remap_matrix1_
octet remap table 1
const bvector_type * bvector_type_const_ptr
str_sparse_vector< CharType, BV, STR_SIZE > & merge(str_sparse_vector< CharType, BV, STR_SIZE > &str_sv)
merge with another sparse vector using OR operation Merge is different from join(),...
static constexpr bool is_compressed() BMNOEXCEPT
various type traits
void assign(size_type idx, const StrType &str)
set specified element with bounds checking and automatic resize
str_sparse_vector(bm::null_support null_able=bm::no_null, allocation_policy_type ap=allocation_policy_type(), size_type bv_max_size=bm::id_max, const allocator_type &alloc=allocator_type())
Sparse vector constructor.
bm::dynamic_heap_matrix< unsigned char, allocator_type > slice_octet_matrix_type
Matrix of character remappings.
bool equal(const str_sparse_vector< CharType, BV, STR_SIZE > &sv, bm::null_support null_able=bm::use_null) const BMNOEXCEPT
check if another sparse vector has the same content and size
void remap_from(const str_sparse_vector &str_sv, octet_freq_matrix_type *omatrix=0)
Build remapping profile and load content from another sparse vector Remapped vector likely saves memo...
size_type size() const
return size of the vector
int compare_nomap(size_type idx, const value_type *str) const BMNOEXCEPT
Variant of compare for non-mapped vectors.
void build_octet_remap(slice_octet_matrix_type &octet_remap_matrix1, slice_octet_matrix_type &octet_remap_matrix2, octet_freq_matrix_type &octet_occupancy_matrix) const
void keep(const bvector_type &bv_idx)
Set NULL all elements NOT set as 1 in the argument vector.
const_iterator begin() const BMNOEXCEPT
Provide const iterator access to container content
void import_char_slice(const unsigned_value_type *ch_slice, unsigned ch_acc, size_type char_slice_idx, size_type idx_from, size_type imp_size)
bm::basic_bmatrix< BV > bmatrix_type
bool is_remap() const BMNOEXCEPT
Get character remapping status (true | false)
void insert_value(size_type idx, const value_type *str)
insert value without checking boundaries
CharType * value_type_prt
void push_back(const StrType &str)
push back a string
void import_no_check(CharMatrix &cmatr, size_type idx_from, size_type imp_size, bool set_not_null=true)
bvector_type::allocation_policy allocation_policy_type
void clear(const bvector_type &bv_idx)
Set vector elements spcified by argument bit-vector to empty Note that set to empty elements are NOT ...
void freeze()
Turn sparse vector into immutable mode Read-only (immutable) vector uses less memory and allows faste...
BV::allocator_type allocator_type
void insert_value_no_null(size_type idx, const value_type *str)
insert value without checking boundaries or support of NULL
int compare(size_type idx, const value_type *str) const BMNOEXCEPT
Compare vector element with argument lexicographically.
bm::dynamic_heap_matrix< size_t, allocator_type > octet_freq_matrix_type
Matrix of character frequencies (for optimal code remap)
size_type get(size_type idx, value_type *str, size_type buf_size) const BMNOEXCEPT
get specified element
slice_octet_matrix_type remap_matrix2_
octet remap table 2
bool resolve_range(size_type from, size_type to, size_type *idx_from, size_type *idx_to) const
parent_type::unsigned_value_type unsigned_value_type
void import_back(CharMatrix &cmatr, size_type imp_size)
Bulk push-back import of strings from a C-style matrix of chars.
size_type decode(CharMatrix &cmatr, size_type idx_from, size_type dec_size, bool zero_mem=true) const
Bulk export strings to a C-style matrix of chars.
void resize_internal(size_type sz)
static bool find_rank(size_type rank, size_type &pos) BMNOEXCEPT
find position of compressed element by its rank
bool remap_tosv(value_type *sv_str, size_type buf_size, const value_type *str) const BMNOEXCEPT
bool try_get(size_type idx, StrType &str) const
get specified string element if NOT NULL Template method expects an STL-compatible type basic_string<...
void erase(size_type idx)
erase the specified element
int compare_remap(size_type idx, const value_type *str) const BMNOEXCEPT
Variant of compare for remapped vectors.
size_type effective_size() const BMNOEXCEPT
size of sparse vector (may be different for RSC)
const unsigned char * get_remap_buffer() const
bvector_type * bvector_type_ptr
str_sparse_vector(str_sparse_vector< CharType, BV, STR_SIZE > &&str_sv) BMNOEXCEPT
void insert(size_type idx, const StrType &str)
insert STL string
static void throw_bad_value(const char *err_msg)
throw domain error
void get(size_type idx, StrType &str) const
get specified string element Template method expects an STL-compatible type basic_string<>
reference operator[](size_type idx)
Operator to get write access to an element
static constexpr bool is_str() BMNOEXCEPT
void push_back(const value_type *str)
push back a string (zero terminated)
str_sparse_vector< CharType, BV, STR_SIZE > & operator=(const str_sparse_vector< CharType, BV, STR_SIZE > &str_sv)
size_type decode_substr(CharMatrix &cmatr, size_type idx_from, size_type dec_size, unsigned substr_from, unsigned substr_to, bool zero_mem=true) const
Bulk export strings to a C-style matrix of chars.
const remap_matrix_type * get_remap_matrix() const
base_sparse_vector< CharType, BV, STR_SIZE > parent_type
void push_back_null()
push back NULL value
size_type effective_vector_max() const
get effective string length used in vector
void remap()
Build remapping profile and re-load content to save memory.
back_insert_iterator get_back_inserter()
Provide back insert iterator Back insert iterator implements buffered insertion, which is faster,...
size_type effective_max_str() const BMNOEXCEPT
get effective string length used in vector Calculate and returns efficiency, how close are we to the ...
void sync_size() BMNOEXCEPT
recalculate size to exclude tail NULL elements After this call size() will return the true size of th...
slice_octet_matrix_type remap_matrix_type
bvector_type::enumerator bvector_enumerator_type
const_iterator get_const_iterator(size_type idx) const BMNOEXCEPT
Get const_itertor re-positioned to specific element.
void set_value_no_null(size_type idx, const value_type *str)
set value without checking boundaries or support of NULL
void copy_range(const str_sparse_vector< CharType, BV, STR_SIZE > &sv, size_type left, size_type right, bm::null_support slice_null=bm::use_null)
copy range of values from another sparse vector
size_t remap_size() const
static void throw_range_error(const char *err_msg)
throw range error
str_sparse_vector< CharType, BV, STR_SIZE > & clear_range(size_type left, size_type right, bool set_null=false)
clear range (assign bit 0 for all planes)
void clear() BMNOEXCEPT
resize to zero, free memory
size_type size_internal() const
static bool remap_fromsv(value_type *BMRESTRICT str, size_type buf_size, const value_type *BMRESTRICT sv_str, const slice_octet_matrix_type &BMRESTRICT octet_remap_matrix1) BMNOEXCEPT
void set(size_type idx, const value_type *str)
set specified element with bounds checking and automatic resize
static bool remap_tosv(value_type *BMRESTRICT sv_str, size_type buf_size, const value_type *BMRESTRICT str, const slice_octet_matrix_type &BMRESTRICT octet_remap_matrix2) BMNOEXCEPT
unsigned remap_flags_
remapping status
remap_matrix_type * get_remap_matrix()
const const_reference operator[](size_type idx) const
Operator to get read access to an element
void keep_range(size_type left, size_type right, bm::null_support slice_null=bm::use_null)
Keep only specified interval in the sparse vector, clear all other elements.
allocator_type::allocator_pool_type allocator_pool_type
bvector_type::size_type size_type
BMFORCEINLINE bm::id_t word_bitcount(bm::id_t w) BMNOEXCEPT
unsigned bit_list(T w, B *bits) BMNOEXCEPT
Unpacks word into list of ON bit indexes.
null_support
NULL-able value support.
@ use_null
support "non-assigned" or "NULL" logic
@ no_null
do not support NULL values
bool find_first_nz(const VT *arr, SZ arr_size, SZ *found_idx) BMNOEXCEPT
Find max non-zero value in an array.
int for_each_bit_range_no_check(const BV &bv, typename BV::size_type left, typename BV::size_type right, Func &bit_functor)
Implementation of for_each_bit_range without boilerplave checks.
BMFORCEINLINE void xor_swap(W &x, W &y) BMNOEXCEPT
XOR swap two variables.
@ COPY_RTABLES
copy remap tables only (without data)
bool find_max_nz(const VT *arr, SZ arr_size, SZ *found_idx) BMNOEXCEPT
Find max non-zero value in an array.
const unsigned gap_max_bits
const unsigned set_block_shift
Structure with statistical information about memory allocation footprint, serialization projection,...
size_t gap_cap_overhead
gap memory overhead between length and capacity
size_t ptr_sub_blocks
Number of sub-blocks.
size_t gap_blocks
Number of GAP blocks.
size_t bit_blocks
Number of bit blocks.
size_t bv_count
Number of bit-vectors.
size_t max_serialize_mem
estimated maximum memory for serialization
size_t memory_used
memory usage for all blocks and service tables
void add(const bv_statistics &st) BMNOEXCEPT
Sum data from another sttructure.
Statistical information about bitset's memory allocation details.