#include <iostream>
#include <string>
#include <vector>
#include <random>
#include <algorithm>
#include "bmdbg.h"
using namespace std;
static
const unsigned max_coll = 250000)
{
str_vec.resize(0);
string str;
for (unsigned i = 10; i < max_coll; i += unsigned(rand() % 3))
{
str = to_string(i);
str_vec.emplace_back(str);
}
std::random_device rd;
std::mt19937 g(rd());
std::shuffle(str_vec.begin(), str_vec.end(), g);
}
{
try
{
std::vector<string> str_vec;
{
for (auto it = str_vec.begin(); it != str_vec.end(); ++it)
bit = *it;
}
cout << endl;
std::vector<uint32_t> index(str_sv.
size());
std::generate(index.begin(), index.end(),
[n = 0] () mutable { return n++; });
{{
#if (1)
std::sort(index.begin(), index.end(),
[&str_sv](const uint32_t l, const uint32_t r)
{
static thread_local string last_right_str;
static thread_local uint32_t last_right = uint32_t(-1);
if (last_right != r)
{
last_right = r;
str_sv.get(last_right, last_right_str);
}
return str_sv.
compare(l, last_right_str.c_str()) < 0;
}
);
#else
std::sort(index.begin(), index.end(),
[&str_sv](const uint32_t l, const uint32_t r)
{ return str_sv.compare(l, r) < 0; }
);
#endif
}}
{
char buf[1024];
for (auto it = index.begin(); it != index.end(); ++it)
{
auto i = *it;
str_sv.
get(i, buf,
sizeof(buf));
bit = (const char*)buf;
}
bit.flush();
}
assert(str_sv_sorted.
size()==str_sv.
size());
std::sort(str_vec.begin(), str_vec.end());
{
std::vector<string>::const_iterator sit = str_vec.begin();
auto it = str_sv_sorted.
begin();
auto it_end = str_sv_sorted.
end();
for (; it != it_end; ++it, ++sit)
{
string s = *it;
if (*sit != s)
{
cerr << "Mismatch at:" << s << "!=" << *sit << endl;
return 1;
}
}
}
cout << "Sort validation Ok." << endl << endl;
cout << "Memory footprint statistics:\n" << endl;
cout << "SV sorted(before remap) memory_used : "
cout << "SV unsorted memory_used : "
cout << "SV sorted(after remap) memory_used : "
}
catch(std::exception& ex)
{
std::cerr << ex.what() << std::endl;
return 1;
}
return 0;
}
Compressed bit-vector bvector<> container, set algebraic methods, traversal iterators.
Algorithms for bm::sparse_vector.
string sparse vector based on bit-transposed matrix
Timing utilities for benchmarking (internal)
pre-processor un-defines to avoid global space pollution (internal)
Bitvector Bit-vector container with runtime compression of bits.
Utility class to collect performance measurements and statistics.
void flush()
flush the accumulated buffer.
succinct sparse vector for strings with compression using bit-slicing ( transposition) method
const_iterator end() const BMNOEXCEPT
Provide const iterator access to the end
void optimize(bm::word_t *temp_block=0, typename bvector_type::optmode opt_mode=bvector_type::opt_compress, typename str_sparse_vector< CharType, BV, STR_SIZE >::statistics *stat=0)
run memory optimization for all vector planes
void calc_stat(struct str_sparse_vector< CharType, BV, STR_SIZE >::statistics *st) const BMNOEXCEPT
Calculates memory statistics.
size_type size() const
return size of the vector
const_iterator begin() const BMNOEXCEPT
Provide const iterator access to container content
int compare(size_type idx, const value_type *str) const BMNOEXCEPT
Compare vector element with argument lexicographically.
size_type get(size_type idx, value_type *str, size_type buf_size) const BMNOEXCEPT
get specified element
void remap()
Build remapping profile and re-load content to save memory.
back_insert_iterator get_back_inserter()
Provide back insert iterator Back insert iterator implements buffered insertion, which is faster,...
static void generate_string_set(vector< string > &str_vec, const unsigned max_coll=250000)
generate collection of strings from integers and shuffle it
bm::str_sparse_vector< char, bvector_type, 3 > str_sv_type
size_t memory_used
memory usage for all blocks and service tables