BitMagic-C++
bmsparsevec_serial.h
Go to the documentation of this file.
1 #ifndef BMSPARSEVEC_SERIAL__H__INCLUDED__
2 #define BMSPARSEVEC_SERIAL__H__INCLUDED__
3 /*
4 Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 For more information please visit: http://bitmagic.io
19 */
20 
21 /*! \file bmsparsevec_serial.h
22  \brief Serialization for sparse_vector<>
23 */
24 
25 
26 #ifndef BM__H__INCLUDED__
27 // BitMagic utility headers do not include main "bm.h" declaration
28 // #include "bm.h" or "bm64.h" explicitly
29 # error missing include (bm.h or bm64.h)
30 #endif
31 
32 #include "bmsparsevec.h"
33 #include "bmserial.h"
34 #include "bmbuffer.h"
35 #include "bmdef.h"
36 
37 namespace bm
38 {
39 
40 /** \defgroup svserial Sparse vector serialization
41  Sparse vector serialization
42  \ingroup svector
43  */
44 
45 
46 /*!
47  \brief layout class for serialization buffer structure
48 
49  Class keeps a memory block sized for the target sparse vector BLOB.
50  This class also provides acess to bit-plane memory, so it becomes possible
51  to use parallel storage methods to save bit-plains into
52  different storage shards.
53 
54  \ingroup svserial
55 */
56 template<class SV>
58 {
59  typedef typename SV::value_type value_type;
60  typedef typename SV::bvector_type bvector_type;
62 
64 
66 
67  /*!
68  \brief resize capacity
69  \param capacity - new capacity
70  \return new buffer or 0 if failed
71  */
72  unsigned char* reserve(size_t capacity)
73  {
74  if (capacity == 0)
75  {
76  freemem();
77  return 0;
78  }
79  buf_.reinit(capacity);
80  return buf_.data();
81  }
82 
83  /// return current serialized size
84  size_t size() const { return buf_.size(); }
85 
86  /// Set new serialized size
87  void resize(size_t ssize) { buf_.resize(ssize); }
88 
89  /// return serialization buffer capacity
90  size_t capacity() const { return buf_.capacity(); }
91 
92  /// free memory
93  void freemem() { buf_.release(); }
94 
95  /// Set plain output pointer and size
96  void set_plain(unsigned i, unsigned char* ptr, size_t buf_size)
97  {
98  plain_ptrs_[i] = ptr;
99  plane_size_[i] = buf_size;
100  }
101 
102  /// Get plain pointer
103  const unsigned char* get_plain(unsigned i) const { return plain_ptrs_[i]; }
104 
105  /// Return serialization buffer pointer
106  const unsigned char* buf() const { return buf_.buf(); }
107  /// Return serialization buffer pointer
108  const unsigned char* data() const { return buf_.buf(); }
109 
110 private:
112  void operator=(const sparse_vector_serial_layout&);
113 protected:
114  buffer_type buf_; ///< serialization buffer
115  unsigned char* plain_ptrs_[SV::sv_plains]; ///< pointers on serialized bit-plains
116  size_t plane_size_[SV::sv_plains]; ///< serialized plain size
117 };
118 
119 // -------------------------------------------------------------------------
120 
121 /*!
122  \brief Serialize sparse vector into a memory buffer(s) structure
123 
124  Serialization format:
125 
126  | HEADER | BIT-VECTORS ... | REMAP_MATRIX
127 
128  Header structure:
129  -----------------
130  BYTE+BYTE: Magic-signature 'BM' or 'BC' (c-compressed)
131  BYTE : Byte order ( 0 - Big Endian, 1 - Little Endian)
132  {
133  BYTE : Number of Bit-vector plains (total) (non-zero when < 255 plains)
134  |
135  BYTE: zero - flag of large plain matrix
136  INT64: Nnmber of bit-vector plains
137  }
138  INT64: Vector size
139  INT64: Offset of plain 0 from the header start (value 0 means plain is empty)
140  INT64: Offset of plain 1 from
141  ...
142  INT32: reserved
143 
144 Bit-vectors:
145 ------------
146  Based on current bit-vector serialization
147 
148 Remap Matrix:
149  SubHeader | Matrix BLOB
150 
151  sub-header:
152  BYTE: 'R' (remapping) or 'N' (no remapping)
153  N - means no other info is saved on the stream
154  INT64: remap matrix size
155 
156  \ingroup svector
157  \ingroup svserial
158 */
159 template<typename SV>
161 {
162 public:
163  typedef typename SV::bvector_type bvector_type;
164  typedef const bvector_type* bvector_type_const_ptr;
165  typedef bvector_type* bvector_type_ptr;
166  typedef typename SV::value_type value_type;
167  typedef typename SV::size_type size_type;
170  typedef typename
173 
174 public:
176 
177 
178  /*! @name Compression settings */
179  //@{
180 
181  /**
182  Add skip-markers for faster range deserialization
183 
184  @param enable - TRUE searilization will add bookmark codes
185  @param bm_interval - bookmark interval in (number of blocks)
186  (suggested between 4 and 512)
187  smaller interval means more bookmarks added to the skip list thus
188  more increasing the BLOB size
189  */
190  void set_bookmarks(bool enable, unsigned bm_interval = 256)
191  { bvs_.set_bookmarks(enable, bm_interval); }
192 
193 
194  /**
195  Enable XOR compression on vector serialization
196  @sa set_xor_ref
197  @sa disable_xor_compression
198  */
200  { set_xor_ref(true); }
201 
202  /**
203  Disable XOR compression on serialization
204  */
206  { set_xor_ref((const bv_ref_vector_type*)0); }
207 
208  /** Turn ON and OFF XOR compression of sparse vectors
209  Enables XOR reference compression for the sparse vector.
210  Default: disabled
211  Reference bit-vectors from the sparse vector itself
212  */
213  void set_xor_ref(bool is_enabled) BMNOEXCEPT;
214 
215  /** Enable external XOR serialization via external reference vectors
216  (data frame ref. vector).
217  This method is useful when we serialize a group of related
218  sparse vectors which benefits from the XOR referencial compression
219 
220  @param bv_ref_ptr - external reference vector
221  if NULL - resets the use of reference vector
222  */
223  void set_xor_ref(const bv_ref_vector_type* bv_ref_ptr) BMNOEXCEPT;
224 
225  /**
226  Returns the XOR reference compression status (enabled/disabled)
227  */
228  bool is_xor_ref() const BMNOEXCEPT { return is_xor_ref_; }
229 
230  //@}
231 
232 
233  /*!
234  \brief Serialize sparse vector into a memory buffer(s) structure
235 
236  \param sv - sparse vector to serialize
237  \param sv_layout - buffer structure to keep the result
238  as defined in bm::serialization_flags
239  */
240  void serialize(const SV& sv,
242 
243  /** Get access to the underlying bit-vector serializer
244  This access can be used to fine tune compression settings
245  @sa bm::serializer::set_compression_level
246  */
248  { return bvs_; }
249 
250 
251 protected:
252  void build_xor_ref_vector(const SV& sv);
253 
254  static
255  void build_plane_digest(bvector_type& digest_bv, const SV& sv);
256 
257  typedef typename SV::remap_matrix_type remap_matrix_type;
258 
259  /// serialize the remap matrix used for SV encoding
260  void encode_remap_matrix(bm::encoder& enc, const SV& sv);
261 
262  typedef bm::heap_vector<unsigned, alloc_type, true> u32_vector_type;
265 private:
267  sparse_vector_serializer& operator=(const sparse_vector_serializer&) = delete;
268 
269 protected:
271 
272  bvector_type plane_digest_bv_; ///< bv.digest of bit-planes
273  buffer_type plane_digest_buf_; ///< serialization buf
274  u32_vector_type plane_off_vect_;
275 
276  u32_vector_type remap_rlen_vect_;
277  // XOR compression member vars
279  bv_ref_vector_type bv_ref_;
280  const bv_ref_vector_type* bv_ref_ptr_;
281 };
282 
283 /**
284  sparse vector de-serializer
285 
286 */
287 template<typename SV>
289 {
290 public:
291  typedef typename SV::bvector_type bvector_type;
292  typedef const bvector_type* bvector_type_const_ptr;
293  typedef bvector_type* bvector_type_ptr;
294  typedef typename SV::value_type value_type;
295  typedef typename SV::size_type size_type;
298 
299 public:
302 
303  /** Set external XOR reference vectors
304  (data frame referenece vectors)
305 
306  @param bv_ref_ptr - external reference vector
307  if NULL - resets the use of reference
308  */
309  void set_xor_ref(bv_ref_vector_type* bv_ref_ptr);
310 
311  /*!
312  Deserialize sparse vector
313 
314  @param sv - [out] target sparse vector to populate
315  @param buf - input BLOB source memory pointer
316  @param clear_sv - if true clears the target vector
317 
318  @sa deserialize_range
319  */
320  void deserialize(SV& sv,
321  const unsigned char* buf,
322  bool clear_sv = true);
323 
324  /*!
325  Deserialize sparse vector for the range [from, to]
326 
327  @param sv - [out] target sparse vector to populate
328  @param buf - input BLOB source memory pointer
329  @param from - start vector index for deserialization range
330  @param to - end vector index for deserialization range
331  @param clear_sv - if true clears the target vector
332 
333  */
334  void deserialize_range(SV& sv, const unsigned char* buf,
335  size_type from, size_type to,
336  bool clear_sv = true);
337 
338  /*!
339  Better use deserialize_range()
340  @sa deserialize_range
341  */
342  void deserialize(SV& sv, const unsigned char* buf,
343  size_type from, size_type to)
344  {
345  deserialize_range(sv, buf, from, to);
346  }
347 
348 
349 
350  /*!
351  Deserialize sparse vector using address mask vector
352  Address mask defines (by set bits) which vector elements to be extracted
353  from the compressed BLOB
354 
355  @param sv - [out] target sparse vector to populate
356  @param buf - source memory pointer
357  @param mask_bv - AND mask bit-vector (address gather vector)
358  */
359  void deserialize(SV& sv,
360  const unsigned char* buf,
361  const bvector_type& mask_bv)
362  { idx_range_set_ = false;
363  deserialize_sv(sv, buf, &mask_bv, true);
364  }
365 
366 
367  /*!
368  Load serialization descriptor, create vectors
369  but DO NOT perform full deserialization
370  @param sv - [out] target sparse vector to populate
371  @param buf - source memory pointer
372  */
373  void deserialize_structure(SV& sv,
374  const unsigned char* buf);
375 
376 
377 protected:
379 
380 
381  /// Deserialize header/version and other common info
382  ///
383  /// @return number of bit-plains
384  ///
385  unsigned load_header(bm::decoder& dec, SV& sv, unsigned char& matr_s_ser);
386 
387  void deserialize_sv(SV& sv, const unsigned char* buf,
388  const bvector_type* mask_bv,
389  bool clear_sv);
390 
391 
392  /// deserialize bit-vector plains
393  void deserialize_plains(SV& sv, unsigned plains,
394  const unsigned char* buf,
395  const bvector_type* mask_bv = 0);
396 
397  /// load offset table
398  void load_plains_off_table(const unsigned char* buf, bm::decoder& dec, unsigned plains);
399 
400  /// load NULL bit-plain (returns new plains count)
401  int load_null_plain(SV& sv,
402  int plains,
403  const unsigned char* buf,
404  const bvector_type* mask_bv);
405 
406  /// load string remap dict
407  void load_remap(SV& sv, const unsigned char* remap_buf_ptr);
408 
409  /// throw error on incorrect deserialization
410  static void raise_invalid_header();
411  /// throw error on incorrect deserialization
412  static void raise_invalid_64bit();
413  /// throw error on incorrect deserialization
414  static void raise_invalid_bitdepth();
415  /// throw error on incorrect deserialization
416  static void raise_invalid_format();
417  /// throw error on incorrect deserialization
418  static void raise_missing_remap_matrix();
419  /// setup deserializers
420  void setup_xor_compression();
421 
422  /// unset XOR compression vectors
423  void clear_xor_compression();
424 
425 private:
427  sparse_vector_deserializer& operator=(const sparse_vector_deserializer&) = delete;
428 
429  typedef bm::heap_vector<unsigned, alloc_type, true> rlen_vector_type;
430 
431 protected:
432  const unsigned char* remap_buf_ptr_;
433  alloc_type alloc_;
435  allocator_pool_type pool_;
436 
437  bvector_type plane_digest_bv_; // digest of bit-planes
440 
444  bvector_type not_null_mask_bv_;
445  bvector_type rsc_mask_bv_;
446  bm::heap_vector<size_t, alloc_type, true> off_vect_;
447  bm::heap_vector<unsigned, alloc_type, true> off32_vect_;
448  rlen_vector_type remap_rlen_vect_;
449 
450  // XOR compression variables
451  bv_ref_vector_type bv_ref_; ///< reference vector
452  bv_ref_vector_type* bv_ref_ptr_; ///< external ref
453 
454  // Range deserialization parameters
456  size_type idx_range_from_;
457  size_type idx_range_to_;
458 };
459 
460 
461 
462 /*!
463  \brief Serialize sparse vector into a memory buffer(s) structure
464 
465  \param sv - sparse vector to serialize
466  \param sv_layout - buffer structure to keep the result
467  \param temp_block - temporary buffer
468  (allocate with BM_DECLARE_TEMP_BLOCK(x) for speed)
469 
470  \ingroup svserial
471 
472  @sa serialization_flags
473  @sa sparse_vector_deserializer
474 */
475 template<class SV>
477  const SV& sv,
479  bm::word_t* temp_block = 0)
480 {
481  (void)temp_block;
482  bm::sparse_vector_serializer<SV> sv_serializer;
483  sv_serializer.serialize(sv, sv_layout);
484 }
485 
486 // -------------------------------------------------------------------------
487 
488 /*!
489  \brief Deserialize sparse vector
490  \param sv - target sparse vector
491  \param buf - source memory buffer
492  \param temp_block - temporary block buffer to avoid re-allocations
493 
494  \return 0 (error processing via std::logic_error)
495 
496  \ingroup svserial
497  @sa sparse_vector_deserializer
498 */
499 template<class SV>
501  const unsigned char* buf,
502  bm::word_t* temp_block=0)
503 {
504  (void)temp_block;
505  bm::sparse_vector_deserializer<SV> sv_deserializer;
506  sv_deserializer.deserialize(sv, buf);
507  return 0;
508 }
509 
510 // -------------------------------------------------------------------------
511 
512 /**
513  Seriaizer for compressed collections
514 */
515 template<class CBC>
517 {
518 public:
520  typedef typename CBC::bvector_type bvector_type;
521  typedef typename CBC::buffer_type buffer_type;
522  typedef typename CBC::statistics statistics_type;
523  typedef typename CBC::address_resolver_type address_resolver_type;
524 
525 public:
526  void serialize(const CBC& buffer_coll,
527  buffer_type& buf,
528  bm::word_t* temp_block = 0);
529 };
530 
531 /**
532  Deseriaizer for compressed collections
533 */
534 template<class CBC>
536 {
537 public:
539  typedef typename CBC::bvector_type bvector_type;
541  typedef typename CBC::buffer_type buffer_type;
542  typedef typename CBC::statistics statistics_type;
543  typedef typename CBC::address_resolver_type address_resolver_type;
544  typedef typename CBC::container_type container_type;
545 
546 public:
547  int deserialize(CBC& buffer_coll,
548  const unsigned char* buf,
549  bm::word_t* temp_block=0);
550 };
551 
552 
553 // -------------------------------------------------------------------------
554 
555 /**
556  \brief Serialize compressed collection into memory buffer
557 
558 Serialization format:
559 
560 
561 <pre>
562  | MAGIC_HEADER | ADDRESS_BITVECTROR | LIST_OF_BUFFER_SIZES | BUFFER(s)
563 
564  MAGIC_HEADER:
565  BYTE+BYTE: Magic-signature 'BM' or 'BC'
566  BYTE : Byte order ( 0 - Big Endian, 1 - Little Endian)
567 
568  ADDRESS_BITVECTROR:
569  INT64: address bit-vector size
570  [memblock]: serialized address bit-vector
571 
572  LIST_OF_BUFFER_SIZES:
573  INT64 - buffer sizes count
574  INT32 - buffer size 0
575  INT32 - buffer size 1
576  ...
577 
578  BUFFERS:
579  [memblock]: block0
580  [memblock]: block1
581  ...
582 
583 </pre>
584 */
585 
586 template<class CBC>
588  buffer_type& buf,
589  bm::word_t* temp_block)
590 {
591  statistics_type st;
592  buffer_coll.calc_stat(&st);
593 
594  buf.resize(st.max_serialize_mem);
595 
596  // ptr where bit-plains start
597  unsigned char* buf_ptr = buf.data();
598 
599  bm::encoder enc(buf.data(), buf.capacity());
601  enc.put_8('B');
602  enc.put_8('C');
603  enc.put_8((unsigned char)bo);
604 
605  unsigned char* mbuf1 = enc.get_pos(); // bookmark position
606  enc.put_64(0); // address vector size (reservation)
607 
608  buf_ptr = enc.get_pos();
609 
610  const address_resolver_type& addr_res = buffer_coll.resolver();
611  const bvector_type& bv = addr_res.get_bvector();
612  {
613  bm::serializer<bvector_type > bvs(temp_block);
614  bvs.gap_length_serialization(false);
615 
616  size_t addr_bv_size = bvs.serialize(bv, buf_ptr, buf.size());
617  buf_ptr += addr_bv_size;
618 
619  enc.set_pos(mbuf1); // rewind to bookmark
620  enc.put_64(addr_bv_size); // save the address vector size
621  }
622  enc.set_pos(buf_ptr); // restore stream position
623  size_t coll_size = buffer_coll.size();
624 
625  enc.put_64(coll_size);
626 
627  // pass 1 (save buffer sizes)
628  {
629  for (unsigned i = 0; i < buffer_coll.size(); ++i)
630  {
631  const buffer_type& cbuf = buffer_coll.get(i);
632  size_t sz = cbuf.size();
633  enc.put_64(sz);
634  } // for i
635  }
636  // pass 2 (save buffers)
637  {
638  for (unsigned i = 0; i < buffer_coll.size(); ++i)
639  {
640  const buffer_type& cbuf = buffer_coll.get(i);
641  size_t sz = cbuf.size();
642  enc.memcpy(cbuf.buf(), sz);
643  } // for i
644  }
645  buf.resize(enc.size());
646 }
647 
648 // -------------------------------------------------------------------------
649 template<class CBC>
651  CBC& buffer_coll,
652  const unsigned char* buf,
653  bm::word_t* temp_block)
654 {
655  // TODO: implement correct processing of byte-order corect deserialization
656  // ByteOrder bo_current = globals<true>::byte_order();
657 
658  bm::decoder dec(buf);
659  unsigned char h1 = dec.get_8();
660  unsigned char h2 = dec.get_8();
661 
662  BM_ASSERT(h1 == 'B' && h2 == 'C');
663  if (h1 != 'B' && h2 != 'C') // no magic header? issue...
664  {
665  return -1;
666  }
667  //unsigned char bv_bo =
668  dec.get_8();
669 
670  // -----------------------------------------
671  // restore address resolver
672  //
673  bm::id64_t addr_bv_size = dec.get_64();
674 
675  const unsigned char* bv_buf_ptr = dec.get_pos();
676 
677  address_resolver_type& addr_res = buffer_coll.resolver();
678  bvector_type& bv = addr_res.get_bvector();
679  bv.clear();
680 
681  bm::deserialize(bv, bv_buf_ptr, temp_block);
682  addr_res.sync();
683 
684  typename bvector_type::size_type addr_cnt = bv.count();
685  dec.seek((int)addr_bv_size);
686 
687  // -----------------------------------------
688  // read buffer sizes
689  //
690  bm::id64_t coll_size = dec.get_64();
691  if (coll_size != addr_cnt)
692  {
693  return -2; // buffer size collection does not match address vector
694  }
695 
696  typedef size_t vect_size_type;
697  bm::heap_vector<bm::id64_t, allocator_type, true> buf_size_vec;
698 
699  buf_size_vec.resize(vect_size_type(coll_size));
700  {
701  for (unsigned i = 0; i < coll_size; ++i)
702  {
703  bm::id64_t sz = dec.get_64();
704  buf_size_vec[i] = sz;
705  } // for i
706  }
707 
708  {
709  container_type& buf_vect = buffer_coll.container();
710  buf_vect.resize(vect_size_type(coll_size));
711  for (unsigned i = 0; i < coll_size; ++i)
712  {
713  bm::id64_t sz = buf_size_vec[i];
714  buffer_type& b = buf_vect.at(i);
715  b.resize(sz);
716  dec.memcpy(b.data(), size_t(sz));
717  } // for i
718  }
719  buffer_coll.sync();
720  return 0;
721 }
722 
723 // -------------------------------------------------------------------------
724 //
725 // -------------------------------------------------------------------------
726 
727 template<typename SV>
729 : bv_ref_ptr_(0)
730 {
732  #ifdef BMXORCOMP
733  is_xor_ref_ = true;
734  #else
735  is_xor_ref_ = false;
736  #endif
737 }
738 
739 // -------------------------------------------------------------------------
740 
741 template<typename SV>
743  const bv_ref_vector_type* bv_ref_ptr) BMNOEXCEPT
744 {
745  bv_ref_ptr_ = bv_ref_ptr;
746  is_xor_ref_ = bool(bv_ref_ptr);
747 }
748 
749 // -------------------------------------------------------------------------
750 
751 template<typename SV>
753 {
754  bv_ref_ptr_ = 0; // reset external ref.vector
755  is_xor_ref_ = is_enabled;
756 }
757 
758 
759 // -------------------------------------------------------------------------
760 
761 template<typename SV>
763 {
764  //bv_ref_.reset();
765  bv_ref_.build(sv.get_bmatrix());
766 }
767 
768 // -------------------------------------------------------------------------
769 
770 template<typename SV>
772  const SV& sv)
773 {
774  const typename SV::remap_matrix_type* rm = sv.get_remap_matrix();
775  BM_ASSERT(rm);
776 
777  const remap_matrix_type& rmatr = *rm;
778 
779  size_t rows = rmatr.rows();
780  size_t cols = rmatr.cols();
781 
782  BM_ASSERT(cols <= 256);
783  BM_ASSERT(rows <= ~0u);
784 
785  // compute CSR capacity vector
786  remap_rlen_vect_.resize(0);
787  for (size_t r = 0; r < rows; ++r)
788  {
789  const unsigned char* BMRESTRICT remap_row = rmatr.row(r);
790  size_t cnt = bm::count_nz(remap_row, cols);
791  if (!cnt)
792  break;
793  remap_rlen_vect_.push_back(unsigned(cnt));
794  } // for r
795 
796  rows = remap_rlen_vect_.size(); // effective rows in the remap table
797 
798  size_t csr_size_max = rows * sizeof(bm::gap_word_t);
799  for (size_t r = 0; r < rows; ++r)
800  {
801  unsigned rl = remap_rlen_vect_[r];
802  csr_size_max += rl * 2;
803  } // for r
804 
805  size_t remap_size = sv.remap_size();
806 
807  if (remap_size < csr_size_max)
808  {
809  const unsigned char* matrix_buf = sv.get_remap_buffer();
810  BM_ASSERT(matrix_buf);
811  BM_ASSERT(remap_size);
812 
813  enc.put_8('R');
814  enc.put_64(remap_size);
815  enc.memcpy(matrix_buf, size_t(remap_size));
816  }
817  else
818  {
819  enc.put_8('C'); // Compressed sparse row (CSR)
820  enc.put_32(unsigned(rows));
821  enc.put_16(bm::gap_word_t(cols)); // <= 255 chars
822 
823  {
824  bm::bit_out<bm::encoder> bo(enc);
825  for (size_t r = 0; r < rows; ++r)
826  {
827  unsigned rl = remap_rlen_vect_[r];
828  bo.gamma(rl);
829  } // for r
830  }
831 
832  for (size_t r = 0; r < rows; ++r)
833  {
834  const unsigned char* BMRESTRICT row = rmatr.row(r);
835  for (size_t j = 0; j < cols; ++j)
836  {
837  unsigned char v = row[j];
838  if (v)
839  {
840  enc.put_8((unsigned char)j);
841  enc.put_8(v);
842  }
843  } // for j
844  } // for r
845  }
846 
847  enc.put_8('E'); // end of matrix (integrity check token)
848 }
849 
850 // -------------------------------------------------------------------------
851 
852 template<typename SV>
854  const SV& sv)
855 {
856  digest_bv.init();
857  digest_bv.clear(false);
858  unsigned plains = sv.stored_plains();
859  for (unsigned i = 0; i < plains; ++i)
860  {
861  typename SV::bvector_type_const_ptr bv = sv.get_plain(i);
862  if (bv)
863  digest_bv.set_bit_no_check(i);
864  } // for i
865 }
866 
867 // -------------------------------------------------------------------------
868 
869 template<typename SV>
872 {
873  bvs_.allow_stat_reset(false); // stats accumulate mode for all bit-slices
875 
878 
879  unsigned plains = sv.stored_plains();
880 
881  // ----------------------------------------------------
882  // memory pre-reservation
883  //
884  typename SV::statistics sv_stat;
885  sv.calc_stat(&sv_stat);
886  sv_stat.max_serialize_mem += plane_digest_buf_.size() + (8 * plains);
887  unsigned char* buf = sv_layout.reserve(sv_stat.max_serialize_mem);
888 
889  // ----------------------------------------------------
890  //
891  bm::encoder enc(buf, (unsigned)sv_layout.capacity());
892 
893  // header size in bytes
894  unsigned h_size = 1 + 1 + // "BM" or "BC" (magic header)
895  1 + // byte-order
896  1 + // number of bit-plains (for vector)
897  8 + // size (internal 64-bit)
898  8 + // offset to digest (64-bit)
899  4; // reserve
900  // for large plain matrixes
901  {
902  h_size += 1 + // version number
903  8; // number of plains (64-bit)
904  }
905 
906  // ----------------------------------------------------
907  // Setup XOR reference compression
908  //
909  if (is_xor_ref())
910  {
911  if (bv_ref_ptr_) // use external reference
912  {
914  }
915  else
916  {
919  }
920  }
921 
922  // ----------------------------------------------------
923  // Serialize all bvector plains
924  //
925 
926  unsigned char* buf_ptr = buf + h_size; // ptr where plains start (start+hdr)
927 
928  for (unsigned i = 0; i < plains; ++i)
929  {
930  typename SV::bvector_type_const_ptr bv = sv.get_plain(i);
931  if (!bv) // empty plain
932  {
933  sv_layout.set_plain(i, 0, 0);
934  continue;
935  }
936  if (is_xor_ref())
937  {
938  unsigned idx;
939  if (bv_ref_ptr_) // use external reference
940  idx = (unsigned)bv_ref_ptr_->find_bv(bv);
941  else
942  idx = (unsigned)bv_ref_.find_bv(bv);
943  BM_ASSERT(idx != bv_ref_.not_found());
944  bvs_.set_curr_ref_idx(idx);
945  }
946 
947  size_t buf_size = (size_t)
948  bvs_.serialize(*bv, buf_ptr, sv_stat.max_serialize_mem);
949 
950  sv_layout.set_plain(i, buf_ptr, buf_size);
951  buf_ptr += buf_size;
952  if (sv_stat.max_serialize_mem > buf_size)
953  {
954  sv_stat.max_serialize_mem -= buf_size;
955  continue;
956  }
957  BM_ASSERT(0); // TODO: throw an exception here
958  } // for i
959 
960  bvs_.set_ref_vectors(0); // dis-engage XOR ref vector
961 
962  // -----------------------------------------------------
963  // serialize the re-map matrix
964  //
965  if (bm::conditional<SV::is_remap_support::value>::test()) // test remapping trait
966  {
967  bm::encoder enc_m(buf_ptr, sv_stat.max_serialize_mem);
968  if (sv.is_remap())
969  encode_remap_matrix(enc_m, sv);
970  else
971  enc_m.put_8('N');
972  buf_ptr += enc_m.size(); // add encoded data size
973  }
974 
975  // ------------------------------------------------------
976  // save the digest vector
977  //
978  size_t digest_offset = size_t(buf_ptr - buf); // digest position from the start
979  ::memcpy(buf_ptr, plane_digest_buf_.buf(), plane_digest_buf_.size());
980  buf_ptr += plane_digest_buf_.size();
981  {
982  bool use_64bit = false;
983  plane_off_vect_.resize(0);
984  for (unsigned i = 0; i < plains; ++i)
985  {
986  const unsigned char* p = sv_layout.get_plain(i);
987  if (p)
988  {
989  size_t offset = size_t(p - buf);
990  if (offset > bm::id_max32)
991  {
992  use_64bit = true;
993  break;
994  }
995  plane_off_vect_.push_back(unsigned(offset));
996  }
997  } // for i
998  bm::encoder enc_o(buf_ptr, sv_stat.max_serialize_mem);
999  if (use_64bit || (plane_off_vect_.size() < 4))
1000  {
1001  enc_o.put_8('6');
1002  // save the offset table as a list of 64-bit values
1003  //
1004  for (unsigned i = 0; i < plains; ++i)
1005  {
1006  const unsigned char* p = sv_layout.get_plain(i);
1007  if (p)
1008  {
1009  size_t offset = size_t(p - buf);
1010  enc_o.put_64(offset);
1011  }
1012  } // for
1013  }
1014  else // searialize 32-bit offset table using BIC
1015  {
1016  BM_ASSERT(plane_off_vect_.size() == plane_digest_bv_.count());
1017  unsigned min_v = plane_off_vect_[0];
1018  unsigned max_v = plane_off_vect_[plane_off_vect_.size()-1];
1019 
1020  enc_o.put_8('3');
1021  enc_o.put_32(min_v);
1022  enc_o.put_32(max_v);
1023 
1024  bm::bit_out<bm::encoder> bo(enc_o);
1025  bo.bic_encode_u32_cm(plane_off_vect_.data()+1,
1026  unsigned(plane_off_vect_.size()-2),
1027  min_v, max_v);
1028  }
1029  buf_ptr += enc_o.size();
1030  }
1031 
1032 
1033 
1034  sv_layout.resize(size_t(buf_ptr - buf)); // set the true occupied size
1035 
1036  // -----------------------------------------------------
1037  // save the header
1038  //
1040 
1041  enc.put_8('B'); // magic header 'BM' - bit matrix 'BC' - bit compressed
1042  if (sv.is_compressed())
1043  enc.put_8('C');
1044  else
1045  enc.put_8('M');
1046 
1047  enc.put_8((unsigned char)bo); // byte order
1048 
1049  unsigned char matr_s_ser = 1;
1050 #ifdef BM64ADDR
1051  matr_s_ser = 2;
1052 #endif
1053 
1054  enc.put_8(0); // number of plains == 0 (legacy magic number)
1055  enc.put_8(matr_s_ser); // matrix serialization version
1056  {
1057  bm::id64_t plains_code = plains | (1ull << 63);
1058  enc.put_64(plains_code); // number of rows in the bit-matrix
1059  }
1060  enc.put_64(sv.size_internal());
1061  enc.put_64(bm::id64_t(digest_offset));
1062 }
1063 
1064 // -------------------------------------------------------------------------
1065 //
1066 // -------------------------------------------------------------------------
1067 
1068 template<typename SV>
1070  : remap_buf_ptr_(0), bv_ref_ptr_(0), idx_range_set_(false)
1071 {
1072  temp_block_ = alloc_.alloc_bit_block();
1073  not_null_mask_bv_.set_allocator_pool(&pool_);
1074  rsc_mask_bv_.set_allocator_pool(&pool_);
1075 }
1076 
1077 // -------------------------------------------------------------------------
1078 
1079 template<typename SV>
1080 void
1082 {
1083  bv_ref_ptr_ = bv_ref_ptr;
1084  if (!bv_ref_ptr_)
1086 }
1087 
1088 
1089 // -------------------------------------------------------------------------
1090 
1091 template<typename SV>
1093 {
1094  if (temp_block_)
1095  alloc_.free_bit_block(temp_block_);
1096 }
1097 
1098 // -------------------------------------------------------------------------
1099 
1100 template<typename SV>
1102 {
1105  bv_ref_.reset();
1106 }
1107 
1108 // -------------------------------------------------------------------------
1109 
1110 template<typename SV>
1112 {
1113  if (bv_ref_ptr_)
1114  {
1117  }
1118  else
1119  {
1122  }
1123 }
1124 
1125 // -------------------------------------------------------------------------
1126 
1127 template<typename SV>
1129  const unsigned char* buf,
1130  bool clear_sv)
1131 {
1132  idx_range_set_ = false;
1133  deserialize_sv(sv, buf, 0, clear_sv);
1134 }
1135 
1136 // -------------------------------------------------------------------------
1137 
1138 template<typename SV>
1140  const unsigned char* buf)
1141 {
1142  bm::decoder dec(buf); // TODO: implement correct processing of byte-order
1143 
1144  unsigned char matr_s_ser = 0;
1145  unsigned plains = load_header(dec, sv, matr_s_ser);
1146 
1147  // bm::id64_t sv_size = dec.get_64();
1148  load_plains_off_table(buf, dec, plains); // read the offset vector of bit-plains
1149 
1150  for (unsigned i = 0; i < plains; ++i)
1151  {
1152  if (!off_vect_[i]) // empty vector
1153  continue;
1154 
1155  bvector_type* bv = sv.get_plain(i);
1156  BM_ASSERT(bv); (void)bv;
1157 
1158  } // for
1159 }
1160 
1161 // -------------------------------------------------------------------------
1162 
1163 template<typename SV>
1165  const unsigned char* buf,
1166  size_type from,
1167  size_type to,
1168  bool clear_sv)
1169 {
1170  if (clear_sv)
1171  sv.clear_all(true);
1172 
1173  idx_range_set_ = true; idx_range_from_ = from; idx_range_to_ = to;
1174 
1175  remap_buf_ptr_ = 0;
1176  bm::decoder dec(buf); // TODO: implement correct processing of byte-order
1177 
1178  unsigned char matr_s_ser = 0;
1179  unsigned plains = load_header(dec, sv, matr_s_ser);
1180 
1181  if (!sv_size_) // empty vector
1182  return;
1183 
1184  sv.resize_internal(size_type(sv_size_));
1185  bv_ref_.reset();
1186 
1187  load_plains_off_table(buf, dec, plains); // read the offset vector of bit-plains
1188 
1190 
1191 
1192  // TODO: add range for not NULL plane
1193  plains = (unsigned)load_null_plain(sv, int(plains), buf, 0);
1194 
1195  // check if mask needs to be relaculated using the NULL (index) vector
1197  {
1198  // recalculate plains range
1199  size_type sv_left, sv_right;
1200  bool range_valid = sv.resolve_range(from, to, &sv_left, &sv_right);
1201  if (!range_valid)
1202  {
1203  sv.clear();
1204  idx_range_set_ = false;
1205  return;
1206  }
1207  else
1208  {
1209  idx_range_set_ = true; idx_range_from_ = sv_left; idx_range_to_ = sv_right;
1210  }
1211  }
1212 
1213  deserialize_plains(sv, plains, buf, 0);
1214 
1216 
1217  // load the remap matrix
1218  //
1219  if (bm::conditional<SV::is_remap_support::value>::test()) // test remap trait
1220  {
1221  if (matr_s_ser)
1223  } // if remap traits
1224 
1225  sv.sync(true); // force sync, recalculate RS index, remap tables, etc
1226 // sv.sync_size();
1227 
1228  remap_buf_ptr_ = 0;
1229 
1230  idx_range_set_ = false;
1231 }
1232 
1233 // -------------------------------------------------------------------------
1234 
1235 template<typename SV>
1237  const unsigned char* buf,
1238  const bvector_type* mask_bv,
1239  bool clear_sv)
1240 {
1241  if (clear_sv)
1242  sv.clear_all(true);
1243 
1244  remap_buf_ptr_ = 0;
1245  bm::decoder dec(buf); // TODO: implement correct processing of byte-order
1246 
1247  unsigned char matr_s_ser = 0;
1248  unsigned plains = load_header(dec, sv, matr_s_ser);
1249 
1250 // bm::id64_t sv_size = dec.get_64();
1251  if (!sv_size_)
1252  return; // empty vector
1253 
1254  sv.resize_internal(size_type(sv_size_));
1255  bv_ref_.reset();
1256 
1257  load_plains_off_table(buf, dec, plains); // read the offset vector of bit-plains
1258 
1260 
1261  plains = (unsigned)load_null_plain(sv, int(plains), buf, mask_bv);
1262 
1263  // check if mask needs to be relaculated using the NULL (index) vector
1265  {
1266  if (mask_bv)
1267  {
1268  const bvector_type* bv_null = sv.get_null_bvector();
1269  BM_ASSERT(bv_null);
1270  rsc_mask_bv_.clear(true);
1271  not_null_mask_bv_.bit_and(*bv_null, *mask_bv, bvector_type::opt_compress);
1273  mask_bv = &rsc_mask_bv_;
1274 
1275  // if it needs range recalculation
1276  if (idx_range_set_) // range setting is in effect
1277  {
1278  //bool rf =
1280  }
1281  }
1282  }
1283 
1284  deserialize_plains(sv, plains, buf, mask_bv);
1285 
1287 
1288 
1289  // load the remap matrix
1290  //
1291  if (bm::conditional<SV::is_remap_support::value>::test()) // test remap trait
1292  {
1293  if (matr_s_ser)
1295  } // if remap traits
1296 
1297  sv.sync(true); // force sync, recalculate RS index, remap tables, etc
1298 // sv.sync_size();
1299 
1300  remap_buf_ptr_ = 0;
1301 }
1302 
1303 // -------------------------------------------------------------------------
1304 
1305 template<typename SV>
1307  bm::decoder& dec, SV& sv, unsigned char& matr_s_ser)
1308 {
1309  bm::id64_t planes_code = 0;
1310  unsigned char h1 = dec.get_8();
1311  unsigned char h2 = dec.get_8();
1312 
1313  BM_ASSERT(h1 == 'B' && (h2 == 'M' || h2 == 'C'));
1314 
1315  bool sig2_ok = (h2 == 'M' || h2 == 'C');
1316  if (h1 != 'B' || !sig2_ok) //&& (h2 != 'M' || h2 != 'C')) // no magic header?
1318 
1319  unsigned char bv_bo = dec.get_8(); (void) bv_bo;
1320  unsigned plains = dec.get_8();
1321  if (plains == 0) // bit-matrix
1322  {
1323  matr_s_ser = dec.get_8(); // matrix serialization version
1324  planes_code = dec.get_64();
1325  plains = (unsigned) planes_code; // number of rows in the bit-matrix
1326  }
1327  #ifdef BM64ADDR
1328  #else
1329  if (matr_s_ser == 2) // 64-bit matrix
1330  {
1332  }
1333  #endif
1334 
1335  unsigned sv_plains = sv.stored_plains();
1336  if (!plains || plains > sv_plains)
1338 
1339  sv_size_ = dec.get_64();
1340 
1341  digest_offset_ = 0;
1342  if (planes_code & (1ull << 63))
1343  {
1344  digest_offset_ = dec.get_64();
1345  }
1346 
1347  return plains;
1348 }
1349 
1350 // -------------------------------------------------------------------------
1351 
1352 template<typename SV>
1354  SV& sv,
1355  unsigned plains,
1356  const unsigned char* buf,
1357  const bvector_type* mask_bv)
1358 {
1359  if (mask_bv && !idx_range_set_)
1360  idx_range_set_ = mask_bv->find_range(idx_range_from_, idx_range_to_);
1361 
1362  // read-deserialize the plains based on offsets
1363  // backward order to bring the NULL vector first
1364  //
1365  for (int i = int(plains-1); i >= 0; --i)
1366  {
1367  size_t offset = off_vect_[unsigned(i)];
1368  if (!offset) // empty vector
1369  continue;
1370  const unsigned char* bv_buf_ptr = buf + offset; // seek to position
1371  bvector_type* bv = sv.get_plain(unsigned(i));
1372  BM_ASSERT(bv);
1373 
1374  // add the vector into the XOR reference list
1375  if (!bv_ref_ptr_)
1376  bv_ref_.add(bv, unsigned(i));
1377 
1378  if (mask_bv) // gather mask set, use AND operation deserializer
1379  {
1380  typename bvector_type::mem_pool_guard mp_g_z(pool_, *bv);
1381 
1383  && !remap_buf_ptr_) // last plain vector (special case)
1384  {
1385  size_t read_bytes =
1386  deserial_.deserialize(*bv, bv_buf_ptr, temp_block_);
1387  remap_buf_ptr_ = bv_buf_ptr + read_bytes;
1388  bv->bit_and(*mask_bv, bvector_type::opt_compress);
1389  continue;
1390  }
1391  if (idx_range_set_)
1393  deserial_.deserialize(*bv, bv_buf_ptr);
1394 
1395  bv->bit_and(*mask_bv, bvector_type::opt_compress);
1396  }
1397  else
1398  {
1400  !remap_buf_ptr_)
1401  {
1402  size_t read_bytes =
1403  deserial_.deserialize(*bv, bv_buf_ptr, temp_block_);
1404  remap_buf_ptr_ = bv_buf_ptr + read_bytes;
1405  if (idx_range_set_)
1406  bv->keep_range(idx_range_from_, idx_range_to_);
1407  continue;
1408  }
1409  if (idx_range_set_)
1410  {
1412  deserial_.deserialize(*bv, bv_buf_ptr);
1413  bv->keep_range(idx_range_from_, idx_range_to_);
1414  }
1415  else
1416  {
1417  //size_t read_bytes =
1418  deserial_.deserialize(*bv, bv_buf_ptr, temp_block_);
1419  }
1420  }
1421 
1422  } // for i
1423 
1425 
1426 }
1427 
1428 // -------------------------------------------------------------------------
1429 
1430 template<typename SV>
1432  int plains,
1433  const unsigned char* buf,
1434  const bvector_type* mask_bv)
1435 {
1436  BM_ASSERT(plains > 0);
1437  if (!sv.is_nullable())
1438  return plains;
1439  int i = plains - 1;
1440  size_t offset = off_vect_[unsigned(i)];
1441  if (offset)
1442  {
1443  // TODO: improve serialization format to avoid non-range decode of
1444  // the NULL vector just to get to the offset of remap table
1445 
1446  const unsigned char* bv_buf_ptr = buf + offset; // seek to position
1447  bvector_type* bv = sv.get_plain(unsigned(i));
1448 
1449  if (!bv_ref_ptr_)
1450  bv_ref_.add(bv, unsigned(i));
1451 
1453  {
1454  // load the whole not-NULL vector regardless of range
1455  // TODO: load [0, idx_range_to_]
1456  size_t read_bytes = deserial_.deserialize(*bv, bv_buf_ptr, temp_block_);
1457  remap_buf_ptr_ = bv_buf_ptr + read_bytes;
1458  }
1459  else // non-compressed SV
1460  {
1461  // NULL plain in string vector with substitute re-map
1462  //
1464  {
1466  size_t read_bytes = deserial_.deserialize(*bv, bv_buf_ptr, temp_block_);
1467  remap_buf_ptr_ = bv_buf_ptr + read_bytes;
1468  if (idx_range_set_)
1469  bv->keep_range(idx_range_from_, idx_range_to_);
1470  }
1471  else
1472  if (idx_range_set_)
1473  {
1475  deserial_.deserialize(*bv, bv_buf_ptr, temp_block_);
1476  bv->keep_range(idx_range_from_, idx_range_to_);
1478  }
1479  else
1480  {
1481  deserial_.deserialize(*bv, bv_buf_ptr, temp_block_);
1482  }
1483  if (mask_bv)
1484  bv->bit_and(*mask_bv, bvector_type::opt_compress);
1485  }
1486  }
1487  return plains-1;
1488 }
1489 
1490 // -------------------------------------------------------------------------
1491 
1492 template<typename SV>
1494  const unsigned char* buf, bm::decoder& dec, unsigned plains)
1495 {
1496  off_vect_.resize(plains);
1497  if (digest_offset_)
1498  {
1499  plane_digest_bv_.clear(false);
1500  const unsigned char* buf_ptr = buf + digest_offset_;
1501  size_t read_bytes =
1503  buf_ptr += read_bytes;
1504 
1505  bm::decoder dec_o(buf_ptr);
1506 
1507  unsigned char dtype = dec_o.get_8();
1508  switch (dtype)
1509  {
1510  case '6':
1511  for (unsigned i = 0; i < plains; ++i)
1512  {
1513  size_t offset = 0;
1514  if (plane_digest_bv_.test(i))
1515  offset = (size_t) dec_o.get_64();
1516  off_vect_[i] = offset;
1517  } // for i
1518  break;
1519  case '3':
1520  {
1521  unsigned osize = (unsigned)plane_digest_bv_.count();
1522  BM_ASSERT(osize);
1523  off32_vect_.resize(osize);
1524 
1525  unsigned min_v = dec_o.get_32();
1526  unsigned max_v = dec_o.get_32();
1527 
1528  off32_vect_[0] = min_v;
1529  off32_vect_[osize-1] = max_v;
1530 
1531  bm::bit_in<bm::decoder> bi(dec_o);
1532  bi.bic_decode_u32_cm(off32_vect_.data()+1, osize-2, min_v, max_v);
1533 
1534  unsigned k = 0;
1535  for (unsigned i = 0; i < plains; ++i)
1536  {
1537  if (plane_digest_bv_.test(i))
1538  {
1539  off_vect_[i] = off32_vect_[k];
1540  ++k;
1541  }
1542  else
1543  off_vect_[i] = 0;
1544  }
1545  }
1546  break;
1547  default:
1548  // TODO: raise an exception
1549  BM_ASSERT(0);
1550  } // switch
1551  }
1552  else
1553  {
1554  for (unsigned i = 0; i < plains; ++i)
1555  {
1556  size_t offset = (size_t) dec.get_64();
1557  off_vect_[i] = offset;
1558  } // for i
1559  }
1560 }
1561 
1562 // -------------------------------------------------------------------------
1563 
1564 template<typename SV>
1566  const unsigned char* remap_buf_ptr)
1567 {
1568  if (!remap_buf_ptr)
1569  return;
1570 
1571  bm::decoder dec_m(remap_buf_ptr);
1572 
1573  unsigned char rh = dec_m.get_8();
1574  switch (rh)
1575  {
1576  case 'N':
1577  return;
1578  case 'R':
1579  {
1580  size_t remap_size = (size_t) dec_m.get_64();
1581  unsigned char* remap_buf = sv.init_remap_buffer();
1582  BM_ASSERT(remap_buf);
1583  size_t target_remap_size = sv.remap_size();
1584  if (!remap_size || !remap_buf || remap_size != target_remap_size)
1585  {
1587  }
1588  dec_m.memcpy(remap_buf, remap_size);
1589  }
1590  break;
1591 
1592  case 'C': // CSR remap
1593  {
1594  sv.init_remap_buffer();
1595  typename SV::remap_matrix_type* rmatr = sv.get_remap_matrix();
1596  if (!rmatr)
1597  {
1599  }
1600  size_t rows = (size_t) dec_m.get_32();
1601  size_t cols = dec_m.get_16();
1602  if (cols > 256)
1603  {
1605  }
1606 
1607  // read gamma encoded row lens
1608  remap_rlen_vect_.resize(0);
1609  {
1610  bm::bit_in<bm::decoder> bi(dec_m);
1611  for (size_t r = 0; r < rows; ++r)
1612  {
1613  unsigned rl = bi.gamma();
1614  remap_rlen_vect_.push_back(rl);
1615  } // for r
1616  }
1617 
1618  for (size_t r = 0; r < rows; ++r)
1619  {
1620  unsigned char* BMRESTRICT row = rmatr->row(r);
1621  size_t cnt = remap_rlen_vect_[r];
1622  if (!cnt || cnt > 256)
1623  {
1624  // throw an exception here (format corruption!)
1626  }
1627  for (size_t j = 0; j < cnt; ++j)
1628  {
1629  unsigned idx = dec_m.get_8();
1630  unsigned char v = dec_m.get_8();
1631  row[idx] = v;
1632  } // for j
1633  } // for r
1634  }
1635  break;
1636  default:
1637  // re-map matrix code error
1639  } // switch
1640 
1641  // finalize the remap matrix read
1642  //
1643  unsigned char end_tok = dec_m.get_8();
1644  if (end_tok != 'E')
1645  {
1647  }
1648  sv.set_remap();
1649 }
1650 
1651 // -------------------------------------------------------------------------
1652 
1653 template<typename SV>
1655 {
1656 #ifndef BM_NO_STL
1657  throw std::logic_error("BitMagic: Invalid serialization signature header");
1658 #else
1659  BM_THROW(BM_ERR_SERIALFORMAT);
1660 #endif
1661 }
1662 
1663 // -------------------------------------------------------------------------
1664 
1665 template<typename SV>
1667 {
1668 #ifndef BM_NO_STL
1669  throw std::logic_error("BitMagic: Invalid serialization target (64-bit BLOB)");
1670 #else
1671  BM_THROW(BM_ERR_SERIALFORMAT);
1672 #endif
1673 }
1674 
1675 // -------------------------------------------------------------------------
1676 
1677 template<typename SV>
1679 {
1680 #ifndef BM_NO_STL
1681  throw std::logic_error("BitMagic: Invalid serialization target (bit depth)");
1682 #else
1683  BM_THROW(BM_ERR_SERIALFORMAT);
1684 #endif
1685 }
1686 
1687 // -------------------------------------------------------------------------
1688 
1689 template<typename SV>
1691 {
1692 #ifndef BM_NO_STL
1693  throw std::logic_error("BitMagic: Invalid serialization fromat (BLOB corruption?)");
1694 #else
1695  BM_THROW(BM_ERR_SERIALFORMAT);
1696 #endif
1697 }
1698 
1699 // -------------------------------------------------------------------------
1700 
1701 template<typename SV>
1703 {
1704 #ifndef BM_NO_STL
1705  throw std::logic_error("BitMagic: Invalid serialization format (remap matrix)");
1706 #else
1707  BM_THROW(BM_ERR_SERIALFORMAT);
1708 #endif
1709 }
1710 
1711 // -------------------------------------------------------------------------
1712 
1713 } // namespace bm
1714 
1715 #endif
void set_bookmarks(bool enable, unsigned bm_interval=256)
Add skip-markers for faster range deserialization.
static void build_plane_digest(bvector_type &digest_bv, const SV &sv)
void gamma(unsigned value) BMNOEXCEPT
Elias Gamma encode the specified value.
Definition: encoding.h:1143
bm::serializer< bvector_type > & get_bv_serializer() BMNOEXCEPT
Get access to the underlying bit-vector serializer This access can be used to fine tune compression s...
void memcpy(unsigned char *dst, size_t count) BMNOEXCEPT
read bytes from the decode buffer
Definition: encoding.h:657
static void raise_invalid_header()
throw error on incorrect deserialization
void deserialize_range(SV &sv, const unsigned char *buf, size_type from, size_type to, bool clear_sv=true)
bm::heap_vector< size_t, alloc_type, true > off_vect_
size_t size() const
return current serialized size
void set_range(size_type from, size_type to) BMNOEXCEPT
set deserialization range [from, to] This is NOT exact, approximate range, content outside range is n...
Definition: bmserial.h:568
serializer< bvector_type >::buffer buffer_type
void gap_length_serialization(bool value) BMNOEXCEPT
Set GAP length serialization (serializes GAP levels of the original vector)
Definition: bmserial.h:1213
void reset()
reset the collection (resize(0))
Definition: bmxor.h:331
void deserialize(SV &sv, const unsigned char *buf, bool clear_sv=true)
void set_curr_ref_idx(size_type ref_idx) BMNOEXCEPT
Set current index in rer.vector collection (not a row idx or plain idx)
Definition: bmserial.h:1249
ad-hoc conditional expressions
Definition: bmutil.h:110
CBC::address_resolver_type address_resolver_type
unsigned char * plain_ptrs_[SV::sv_plains]
pointers on serialized bit-plains
const unsigned char * get_plain(unsigned i) const
Get plain pointer.
const bvector_type * bvector_type_const_ptr
bm::id64_t get_64() BMNOEXCEPT
Reads 64-bit word from the decoding buffer.
Definition: encoding.h:742
void deserialize(SV &sv, const unsigned char *buf, const bvector_type &mask_bv)
size_t plane_size_[SV::sv_plains]
serialized plain size
static void raise_missing_remap_matrix()
throw error on incorrect deserialization
void set_ref_vectors(const bv_ref_vector_type *ref_vect)
Attach collection of reference vectors for XOR de-serialization (no transfer of ownership for the poi...
Definition: bmserial.h:3570
unsigned long long int id64_t
Definition: bmconst.h:34
const unsigned char * get_pos() const BMNOEXCEPT
Return current buffer pointer.
Definition: encoding.h:104
Sparse constainer sparse_vector<> for integer types using bit-transposition transform.
buffer_type buf_
serialization buffer
void set_xor_ref(bv_ref_vector_type *bv_ref_ptr)
Set external XOR reference vectors (data frame referenece vectors)
void deserialize(SV &sv, const unsigned char *buf, size_type from, size_type to)
static size_type not_found() BMNOEXCEPT
not-found value for find methods
Definition: bmxor.h:362
bm::id_t size_type
Definition: bm.h:117
CBC::address_resolver_type address_resolver_type
bm::serializer< bvector_type > serializer_type
Definition: bm.h:76
bm::heap_vector< unsigned, alloc_type, true > off32_vect_
bm::operation_deserializer< bvector_type > op_deserial_
void seek(int delta) BMNOEXCEPT
change current position
Definition: encoding.h:98
Bit-vector serialization class.
Definition: bmserial.h:75
void load_remap(SV &sv, const unsigned char *remap_buf_ptr)
load string remap dict
void sparse_vector_serialize(const SV &sv, sparse_vector_serial_layout< SV > &sv_layout, bm::word_t *temp_block=0)
Serialize sparse vector into a memory buffer(s) structure.
void set_ref_vectors(const bv_ref_vector_type *ref_vect)
Attach collection of reference vectors for XOR serialization (no transfer of ownership for the pointe...
Definition: bmserial.h:1240
compress blocks when possible (GAP/prefix sum)
Definition: bm.h:134
void put_8(unsigned char c) BMNOEXCEPT
Puts one character into the encoding buffer.
Definition: encoding.h:429
bool is_xor_ref() const BMNOEXCEPT
Returns the XOR reference compression status (enabled/disabled)
Byte based reader for un-aligned bit streaming.
Definition: encoding.h:251
Class for decoding data from memory buffer.
Definition: encoding.h:120
size_t serialize(const BV &bv, unsigned char *buf, bm::word_t *temp_block=0, unsigned serialization_flags=0)
Saves bitvector into memory.
Definition: bmserial.h:2860
Seriaizer for compressed collections.
Deseriaizer for compressed collections.
bm::serializer< bvector_type >::bv_ref_vector_type bv_ref_vector_type
bvector_type plane_digest_bv_
bv.digest of bit-planes
static void raise_invalid_bitdepth()
throw error on incorrect deserialization
List of reference bit-vectors with their true index associations.
Definition: bmxor.h:320
int sparse_vector_deserialize(SV &sv, const unsigned char *buf, bm::word_t *temp_block=0)
Deserialize sparse vector.
static void raise_invalid_64bit()
throw error on incorrect deserialization
Alloc allocator_type
Definition: bm.h:110
void put_32(bm::word_t w) BMNOEXCEPT
Puts 32 bits word into encoding buffer.
Definition: encoding.h:566
int load_null_plain(SV &sv, int plains, const unsigned char *buf, const bvector_type *mask_bv)
load NULL bit-plain (returns new plains count)
void resize(size_t ssize)
Set new serialized size.
unsigned int word_t
Definition: bmconst.h:38
size_type serialize(const BV &bv, unsigned char *buf, size_t buf_size)
Bitvector serialization into memory block.
Definition: bmserial.h:2484
SV::remap_matrix_type remap_matrix_type
void memcpy(const unsigned char *src, size_t count) BMNOEXCEPT
copy bytes into target buffer or just rewind if src is NULL
Definition: encoding.h:511
unsigned char * reserve(size_t capacity)
resize capacity
bm::word_t get_32() BMNOEXCEPT
Reads 32-bit word from the decoding buffer.
Definition: encoding.h:707
bv_ref_vector_type * bv_ref_ptr_
external ref
void serialize(const SV &sv, sparse_vector_serial_layout< SV > &sv_layout)
Serialize sparse vector into a memory buffer(s) structure.
const unsigned char * data() const
Return serialization buffer pointer.
size_t size() const BMNOEXCEPT
Returns size of the current encoding stream.
Definition: encoding.h:524
void set_xor_ref(bool is_enabled) BMNOEXCEPT
Turn ON and OFF XOR compression of sparse vectors Enables XOR reference compression for the sparse ve...
bvector_type::allocator_type alloc_type
bm::heap_vector< unsigned, alloc_type, true > u32_vector_type
const unsigned id_max32
Definition: bmconst.h:49
void enable_xor_compression() BMNOEXCEPT
Enable XOR compression on vector serialization.
void unset_range() BMNOEXCEPT
Disable range deserialization.
Definition: bmserial.h:577
void encode_remap_matrix(bm::encoder &enc, const SV &sv)
serialize the remap matrix used for SV encoding
static ByteOrder byte_order()
Definition: bmconst.h:464
void deserialize_structure(SV &sv, const unsigned char *buf)
unsigned short gap_word_t
Definition: bmconst.h:77
void load_plains_off_table(const unsigned char *buf, bm::decoder &dec, unsigned plains)
load offset table
Byte based writer for un-aligned bit streaming.
Definition: encoding.h:177
void set_plain(unsigned i, unsigned char *ptr, size_t buf_size)
Set plain output pointer and size.
void add(const bvector_type *bv, size_type ref_idx)
Add reference vector.
Definition: bmxor.h:342
const bv_ref_vector_type * bv_ref_ptr_
void build_xor_ref_vector(const SV &sv)
int deserialize(CBC &buffer_coll, const unsigned char *buf, bm::word_t *temp_block=0)
void deserialize_sv(SV &sv, const unsigned char *buf, const bvector_type *mask_bv, bool clear_sv)
bm::serializer< bvector_type > bvs_
void disable_xor_compression() BMNOEXCEPT
Disable XOR compression on serialization.
void bic_encode_u32_cm(const bm::word_t *arr, unsigned sz, bm::word_t lo, bm::word_t hi) BMNOEXCEPT
Binary Interpolative encoding (array of 32-bit ints) cm - "center-minimal".
Definition: encoding.h:1250
allocator_type::allocator_pool_type allocator_pool_type
Definition: bm.h:111
serializer_type::buffer buffer_type
Definitions(internal)
bm::rank_compressor< bvector_type > rsc_compressor_
void deserialize_range(BV &bv, const unsigned char *buf, typename BV::size_type from, typename BV::size_type to, const bm::bv_ref_vector< BV > *ref_vect=0)
Bitvector range deserialization from a memory BLOB.
Definition: bmserial.h:2989
void allow_stat_reset(bool allow) BMNOEXCEPT
Enable/disable statistics reset on each serilaization.
Definition: bmserial.h:198
Serialize sparse vector into a memory buffer(s) structure.
void serialize(const CBC &buffer_coll, buffer_type &buf, bm::word_t *temp_block=0)
Serialize compressed collection into memory buffer.
bv_ref_vector_type bv_ref_
reference vector
void bic_decode_u32_cm(bm::word_t *arr, unsigned sz, bm::word_t lo, bm::word_t hi) BMNOEXCEPT
Binary Interpolative array decode (32-bit)
Definition: encoding.h:1470
bvector_type::allocator_type::allocator_pool_type allocator_pool_type
bvector_type::allocator_type::allocator_pool_type allocator_pool_type
size_t deserialize(bvector_type &bv, const unsigned char *buf, bm::word_t *temp_block=0)
Definition: bmserial.h:3899
bm::short_t get_16() BMNOEXCEPT
Reads 16-bit word from the decoding buffer.
Definition: encoding.h:678
Serialization / compression of bvector<>. Set theoretical operations on compressed BLOBs...
SZ count_nz(const VT *arr, SZ arr_size)
Find count of non-zero elements in the array.
Definition: bmfunc.h:9530
void put_16(bm::short_t s) BMNOEXCEPT
Puts short word (16 bits) into the encoding buffer.
Definition: encoding.h:439
bm::serializer< bvector_type >::bv_ref_vector_type bv_ref_vector_type
sparse vector de-serializer
const bvector_type * bvector_type_const_ptr
layout class for serialization buffer structure
bvector_type::allocator_type allocator_type
void build(const BMATR &bmatr)
Reset and build vector of references from a basic bit-matrix all NULL rows are skipped, not added to the ref.vector.
Definition: bmxor.h:391
const unsigned char * buf() const
Return serialization buffer pointer.
bvector_type::allocator_type alloc_type
buffer_type plane_digest_buf_
serialization buf
void compress(BV &bv_target, const BV &bv_idx, const BV &bv_src)
Rank compression algorithm based on two palallel iterators/enumerators set of source vector gets re-m...
Definition: bmalgo.h:514
void setup_xor_compression()
setup deserializers
unsigned load_header(bm::decoder &dec, SV &sv, unsigned char &matr_s_ser)
Deserialize header/version and other common info.
#define BM_ASSERT
Definition: bmdef.h:136
void put_64(bm::id64_t w) BMNOEXCEPT
Puts 64 bits word into encoding buffer.
Definition: encoding.h:601
const unsigned char * remap_buf_ptr_
unsigned char get_8() BMNOEXCEPT
Reads character from the decoding buffer.
Definition: encoding.h:92
#define BMNOEXCEPT
Definition: bmdef.h:81
void set_ref_vectors(const bv_ref_vector_type *ref_vect)
Attach collection of reference vectors for XOR serialization (no transfer of ownership for the pointe...
Definition: bmserial.h:958
bm::bvector bvector_type
Definition: xsample07a.cpp:94
void clear_xor_compression()
unset XOR compression vectors
size_t capacity() const
return serialization buffer capacity
void deserialize_plains(SV &sv, unsigned plains, const unsigned char *buf, const bvector_type *mask_bv=0)
deserialize bit-vector plains
Memory encoding.
Definition: encoding.h:49
static void raise_invalid_format()
throw error on incorrect deserialization
size_type find_bv(const bvector_type *bv) const BMNOEXCEPT
Find vector index by the pointer.
Definition: bmxor.h:377
bm::deserializer< bvector_type, bm::decoder > deserial_
ByteOrder
Byte orders recognized by the library.
Definition: bmconst.h:428
byte_buffer< allocator_type > buffer
Definition: bmserial.h:85
void reset_compression_stats() BMNOEXCEPT
Reset all accumulated compression statistics.
Definition: bmserial.h:1185
size_t deserialize(BV &bv, const unsigned char *buf, bm::word_t *temp_block=0, const bm::bv_ref_vector< BV > *ref_vect=0)
Bitvector deserialization from a memory BLOB.
Definition: bmserial.h:2926
#define BMRESTRICT
Definition: bmdef.h:199
unsigned gamma() BMNOEXCEPT
decode unsigned value using Elias Gamma coding
Definition: encoding.h:1751