Example of how to use collaborative compression (XOR compression) for a group of bit-transposed sparse vectors in a data-frame
#include <iostream>
#include <vector>
#include <cassert>
using namespace std;
{
{}
};
inline
{
size_t sz = 0;
return sz;
}
static
{
for (unsigned i = 0; i < 65536; i+=7)
{
}
}
static
{
for (unsigned i = 0; i < 65536; i+=7)
{
assert(v1 == 4); (void)v1;
assert(v2 == 8); (void)v2;
assert(v3 == 17); (void)v3;
assert(v0 == i % 256); (void)v0;
}
}
template<typename SVLay>
unsigned char*
copy_buffer(
unsigned char* buf_ptr,
const SVLay& sv_lay)
{
auto s = sv_lay.size();
::memcpy(buf_ptr, sv_lay.buf(), s);
return buf_ptr + s;
}
static
std::vector<unsigned char>& buf,
{
size_t sz = (sizeof(size_t) * 3) +
buf.resize(sz);
unsigned char* buf_ptr = buf.data();
{
::memcpy(buf_ptr, &s, sizeof(s));
buf_ptr += sizeof(s);
}
{
::memcpy(buf_ptr, &s, sizeof(s));
buf_ptr += sizeof(s);
}
{
::memcpy(buf_ptr, &s, sizeof(s));
buf_ptr += sizeof(s);
}
{
::memcpy(buf_ptr, &s, sizeof(s));
buf_ptr += sizeof(s);
}
}
static
const std::vector<unsigned char>& buf,
{
assert(buf.size() > sizeof(size_t)*3);
size_t sz1, sz2, sz3, sz0;
const unsigned char* data_ptr = buf.data();
::memcpy(&sz1, data_ptr, sizeof(size_t));
data_ptr += sizeof(size_t);
::memcpy(&sz2, data_ptr, sizeof(size_t));
data_ptr += sizeof(size_t);
::memcpy(&sz3, data_ptr, sizeof(size_t));
data_ptr += sizeof(size_t);
::memcpy(&sz0, data_ptr, sizeof(size_t));
data_ptr += sizeof(size_t);
data_ptr += sz1;
data_ptr += sz2;
data_ptr += sz3;
data_ptr += sz0;
}
static
std::vector<unsigned char>& buf,
{
try
{
size_t sz = (sizeof(size_t) * 4) +
buf.resize(sz);
unsigned char* buf_ptr = buf.data();
{
::memcpy(buf_ptr, &s, sizeof(s));
buf_ptr += sizeof(s);
}
{
::memcpy(buf_ptr, &s, sizeof(s));
buf_ptr += sizeof(s);
}
{
::memcpy(buf_ptr, &s, sizeof(s));
buf_ptr += sizeof(s);
}
{
::memcpy(buf_ptr, &s, sizeof(s));
buf_ptr += sizeof(s);
}
}
catch (...)
{
throw;
}
}
static
const std::vector<unsigned char>& buf,
{
assert(buf.size() > sizeof(size_t)*3);
try
{
size_t sz1, sz2, sz3, sz0;
const unsigned char* data_ptr = buf.data();
::memcpy(&sz1, data_ptr, sizeof(size_t));
data_ptr += sizeof(size_t);
::memcpy(&sz2, data_ptr, sizeof(size_t));
data_ptr += sizeof(size_t);
::memcpy(&sz3, data_ptr, sizeof(size_t));
data_ptr += sizeof(size_t);
::memcpy(&sz0, data_ptr, sizeof(size_t));
data_ptr += sizeof(size_t);
data_ptr += sz1;
data_ptr += sz2;
data_ptr += sz3;
data_ptr += sz0;
data_ptr = buf.data() + (4 * sizeof(size_t));
data_ptr += sz1;
data_ptr += sz2;
data_ptr += sz3;
data_ptr += sz0;
}
catch (...)
{
throw;
}
}
{
try
{
std::vector<unsigned char> buf0, buf2;
{
cout << "raw size: " << raw_size << endl;
cout << "Plain serializarion: " << buf0.size() << endl;
cout << "XOR data frame serialization: " << buf2.size() << endl;
}
{
}
{
}
}
catch(std::exception& ex)
{
std::cerr << ex.what() << std::endl;
return 1;
}
return 0;
}
Compressed bit-vector bvector<> container, set algebraic methods, traversal iterators.
Sparse constainer sparse_vector<> for integer types using bit-transposition transform.
Compressed sparse container rsc_sparse_vector<> for integer types.
Serialization for sparse_vector<>
const bmatrix_type & get_bmatrix() const BMNOEXCEPT
List of reference bit-vectors with their true index associations.
void add_vectors(const BMATR &bmatr)
Append basic bit-matrix to the list of reference vectors.
void add_sparse_vector(const SV &sv)
Add bit-transposed sparse vector as a bit-matrix.
Bitvector Bit-vector container with runtime compression of bits.
const bmatrix_type & get_bmatrix() const BMNOEXCEPT
void set(size_type idx, value_type v)
set specified element with bounds checking and automatic resize
void sync(bool force)
Re-calculate rank-select index for faster access to vector.
value_type get(size_type idx) const BMNOEXCEPT
get specified element without bounds checking
size_type size() const BMNOEXCEPT
return size of the vector
sparse vector de-serializer
void deserialize(SV &sv, const unsigned char *buf, bool clear_sv=true)
void deserialize_structure(SV &sv, const unsigned char *buf)
void set_xor_ref(bv_ref_vector_type *bv_ref_ptr)
Set external XOR reference vectors (data frame referenece vectors)
void set_xor_ref(bool is_enabled) BMNOEXCEPT
Turn ON and OFF XOR compression of sparse vectors Enables XOR reference compression for the sparse ve...
void compute_sim_model(xor_sim_model_type &sim_model, const bv_ref_vector_type &ref_vect, const bm::xor_sim_params ¶ms)
Calculate XOR similarity model for ref_vector refernece vector must be associated before.
void set_sim_model(const xor_sim_model_type *sim_model) BMNOEXCEPT
Attach serizalizer to a pre-computed similarity model.
void serialize(const SV &sv, sparse_vector_serial_layout< SV > &sv_layout)
Serialize sparse vector into a memory buffer(s) structure.
value_type get(size_type idx) const BMNOEXCEPT
get specified element without bounds checking
size_type size() const BMNOEXCEPT
return size of the vector
void set(size_type idx, value_type v)
set specified element with bounds checking and automatic resize
@ use_null
support "non-assigned" or "NULL" logic
bm::sparse_vector_serializer< sparse_vector_u16 > sv16_serializer_type
static void fill_test_data(sample_data_frame &df)
generate some data just to illustrate the case
bm::sparse_vector_serializer< rsc_sparse_vector_u32 > csv_serializer_type
bm::sparse_vector< unsigned, bvector_type > sparse_vector_u32
static void deserialize_df0(sample_data_frame &df, const std::vector< unsigned char > &buf, csv_deserializer_type &csv_dser, sv16_deserializer_type &sv16_dser)
Simple (individual) de-serialization of vectors in the data-frame.
bm::sparse_vector_deserializer< rsc_sparse_vector_u32 > csv_deserializer_type
bm::sparse_vector< unsigned short, bvector_type > sparse_vector_u16
bm::rsc_sparse_vector< unsigned, sparse_vector_u32 > rsc_sparse_vector_u32
static void test_data(sample_data_frame &df)
paranoiya check to make sure data frame matches pre-generated values
bm::sparse_vector_deserializer< sparse_vector_u16 > sv16_deserializer_type
unsigned char * copy_buffer(unsigned char *buf_ptr, const SVLay &sv_lay)
Copy buffer content into the buffer.
static void serialize_df0(const sample_data_frame &df, std::vector< unsigned char > &buf, csv_serializer_type &csv_ser, sv16_serializer_type &sv16_ser)
serialize with disabled XOR compression
static void deserialize_df2(sample_data_frame &df, const std::vector< unsigned char > &buf, csv_deserializer_type &csv_dser, sv16_deserializer_type &sv16_dser)
Collaborative de-serialization of vectors in the data-frame.
static void serialize_df2(const sample_data_frame &df, std::vector< unsigned char > &buf, csv_serializer_type &csv_ser, sv16_serializer_type &sv16_ser)
Data frame serialization using collaborative method (XOR compression)
size_t raw_data_size(const sample_data_frame &df)
Estimate raw data size.
layout class for serialization buffer structure
size_t size() const BMNOEXCEPT
return current serialized size
Parameters for XOR similarity search.
sample data-frame structure with multiple bm vectors
rsc_sparse_vector_u32 csv3
rsc_sparse_vector_u32 csv2
rsc_sparse_vector_u32 csv1