BitMagic-C++
bmserial.h
Go to the documentation of this file.
1 #ifndef BMSERIAL__H__INCLUDED__
2 #define BMSERIAL__H__INCLUDED__
3 /*
4 Copyright(c) 2002-2019 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 For more information please visit: http://bitmagic.io
19 */
20 
21 /*! \file bmserial.h
22  \brief Serialization / compression of bvector<>.
23  Set theoretical operations on compressed BLOBs.
24 */
25 
26 /*!
27  \defgroup bvserial bvector<> serialization
28  Serialization for bvector<> container
29 
30  \ingroup bvector
31 */
32 
33 #ifndef BM__H__INCLUDED__
34 // BitMagic utility headers do not include main "bm.h" declaration
35 // #include "bm.h" or "bm64.h" explicitly
36 # error missing include (bm.h or bm64.h)
37 #endif
38 
39 
40 #ifdef _MSC_VER
41 #pragma warning( push )
42 #pragma warning( disable : 4311 4312 4127)
43 #endif
44 
45 
46 
47 #include "encoding.h"
48 #include "bmfunc.h"
49 #include "bmtrans.h"
50 #include "bmalgo.h"
51 #include "bmutil.h"
52 #include "bmbuffer.h"
53 #include "bmdef.h"
54 #include "bmxor.h"
55 
56 namespace bm
57 {
58 
59 const unsigned set_compression_max = 5; ///< Maximum supported compression level
60 const unsigned set_compression_default = 5; ///< Default compression level
61 
62 /**
63  Bit-vector serialization class.
64 
65  Class designed to convert sparse bit-vectors into a single block of memory
66  ready for file or database storage or network transfer.
67 
68  Reuse of this class for multiple serializations (but not across threads).
69  Class resue offers some performance advantage (helps with temp memory
70  reallocations).
71 
72  @ingroup bvserial
73 */
74 template<class BV>
76 {
77 public:
78  typedef BV bvector_type;
84 
85  typedef byte_buffer<allocator_type> buffer;
87 public:
88  /**
89  Constructor
90 
91  \param alloc - memory allocator
92  \param temp_block - temporary block for various operations
93  (if NULL it will be allocated and managed by serializer class)
94  Temp block is used as a scratch memory during serialization,
95  use of external temp block allows to avoid unnecessary re-allocations.
96 
97  Temp block attached is not owned by the class and NOT deallocated on
98  destruction.
99  */
100  serializer(const allocator_type& alloc = allocator_type(),
101  bm::word_t* temp_block = 0);
102 
103  serializer(bm::word_t* temp_block);
104 
105  ~serializer();
106 
107  /*! @name Compression level settings */
108  //@{
109 
110  // --------------------------------------------------------------------
111  /**
112  Set compression level. Higher compression takes more time to process.
113  @param clevel - compression level (0-5)
114  @sa get_compression_level
115  */
116  void set_compression_level(unsigned clevel) BMNOEXCEPT;
117 
118  /**
119  Get compression level (0-5), Default 5 (recommended)
120  0 - take as is
121  1, 2 - apply light weight RLE/GAP encodings, limited depth hierarchical
122  compression, intervals encoding
123  3 - variant of 2 with different cut-offs
124  4 - delta transforms plus Elias Gamma encoding where possible legacy)
125  5 - binary interpolated encoding (Moffat, et al)
126 
127  Recommended: use 3 or 5
128 
129  */
131  { return compression_level_; }
132 
133 
134  //@}
135 
136 
137  // --------------------------------------------------------------------
138  /*! @name Serialization Methods */
139  //@{
140 
141  /**
142  Bitvector serialization into memory block
143 
144  @param bv - input bitvector
145  @param buf - out buffer (pre-allocated)
146  No range checking is done in this method.
147  It is responsibility of caller to allocate sufficient
148  amount of memory using information from calc_stat() function.
149 
150  @param buf_size - size of the output buffer
151 
152  @return Size of serialization block.
153  @sa calc_stat
154  */
155  size_type serialize(const BV& bv,
156  unsigned char* buf, size_t buf_size);
157 
158  /**
159  Bitvector serialization into buffer object (resized automatically)
160 
161  @param bv - input bitvector
162  @param buf - output buffer object
163  @param bv_stat - input (optional) bit-vector statistics object
164  if NULL, serialize will compute the statistics
165  */
166  void serialize(const BV& bv,
167  typename serializer<BV>::buffer& buf,
168  const statistics_type* bv_stat = 0);
169 
170  /**
171  Bitvector serialization into buffer object (resized automatically)
172  Input bit-vector gets optimized and then destroyed, content is
173  NOT guaranteed after this operation.
174  Effectively it moves data into the buffer.
175 
176  The reason this operation exsists is because it is faster to do
177  all three operations in one single pass.
178  This is a destructive serialization!
179 
180  @param bv - input/output bitvector
181  @param buf - output buffer object
182  */
183  void optimize_serialize_destroy(BV& bv,
184  typename serializer<BV>::buffer& buf);
185 
186  //@}
187  // --------------------------------------------------------------------
188 
189  /**
190  Return serialization counter vector
191  @internal
192  */
194  { return compression_stat_; }
195 
196  /**
197  Set GAP length serialization (serializes GAP levels of the original vector)
198 
199  @param value - when TRUE serialized vector includes GAP levels parameters
200  */
201  void gap_length_serialization(bool value) BMNOEXCEPT;
202 
203  /**
204  Set byte-order serialization (for cross platform compatibility)
205  @param value - TRUE serialization format includes byte-order marker
206  */
207  void byte_order_serialization(bool value) BMNOEXCEPT;
208 
209  /**
210  Add skip-markers to serialization BLOB for faster range decode
211  at the expense of some BLOB size increase
212 
213  @param enable - TRUE searilization will add bookmark codes
214  @param bm_interval - bookmark interval in (number of blocks)
215  (suggested between 4 and 512)
216  smaller interval means more bookmarks added to the skip list thus
217  more increasing the BLOB size
218  */
219  void set_bookmarks(bool enable, unsigned bm_interval = 256) BMNOEXCEPT;
220 
221  /**
222  Attach collection of reference vectors for XOR serialization
223  (no transfer of ownership for the pointer)
224  @internal
225  */
226  void set_ref_vectors(const bv_ref_vector_type* ref_vect);
227 
228  /**
229  Set current index in rer.vector collection
230  (not a row idx or plain idx)
231  */
232  void set_curr_ref_idx(size_type ref_idx) BMNOEXCEPT;
233 
234 
235 protected:
236  /**
237  Encode serialization header information
238  */
239  void encode_header(const BV& bv, bm::encoder& enc) BMNOEXCEPT;
240 
241  /*! Encode GAP block */
242  void encode_gap_block(const bm::gap_word_t* gap_block, bm::encoder& enc);
243 
244  /*! Encode GAP block with Elias Gamma coder */
245  void gamma_gap_block(const bm::gap_word_t* gap_block,
246  bm::encoder& enc) BMNOEXCEPT;
247 
248  /**
249  Encode GAP block as delta-array with Elias Gamma coder
250  */
251  void gamma_gap_array(const bm::gap_word_t* gap_block,
252  unsigned arr_len,
253  bm::encoder& enc,
254  bool inverted = false) BMNOEXCEPT;
255 
256  /// Encode bit-block as an array of bits
257  void encode_bit_array(const bm::word_t* block,
258  bm::encoder& enc, bool inverted) BMNOEXCEPT;
259 
260  void gamma_gap_bit_block(const bm::word_t* block,
261  bm::encoder& enc) BMNOEXCEPT;
262 
263  void gamma_arr_bit_block(const bm::word_t* block,
264  bm::encoder& enc, bool inverted) BMNOEXCEPT;
265 
266  void bienc_arr_bit_block(const bm::word_t* block,
267  bm::encoder& enc, bool inverted) BMNOEXCEPT;
268 
269  /// encode bit-block as interpolated bit block of gaps
270  void bienc_gap_bit_block(const bm::word_t* block,
271  bm::encoder& enc) BMNOEXCEPT;
272 
273  void interpolated_arr_bit_block(const bm::word_t* block,
274  bm::encoder& enc, bool inverted) BMNOEXCEPT;
275  /// encode bit-block as interpolated gap block
276  void interpolated_gap_bit_block(const bm::word_t* block,
277  bm::encoder& enc) BMNOEXCEPT;
278 
279  /**
280  Encode GAP block as an array with binary interpolated coder
281  */
282  void interpolated_gap_array(const bm::gap_word_t* gap_block,
283  unsigned arr_len,
284  bm::encoder& enc,
285  bool inverted) BMNOEXCEPT;
286  void interpolated_gap_array_v0(const bm::gap_word_t* gap_block,
287  unsigned arr_len,
288  bm::encoder& enc,
289  bool inverted) BMNOEXCEPT;
290 
291 
292  /*! Encode GAP block with using binary interpolated encoder */
294  const bm::gap_word_t* gap_block, bm::encoder& enc) BMNOEXCEPT;
295 
296  /**
297  Encode BIT block with repeatable runs of zeroes
298  */
299  void encode_bit_interval(const bm::word_t* blk,
300  bm::encoder& enc,
301  unsigned size_control) BMNOEXCEPT;
302  /**
303  Encode bit-block using digest (hierarchical compression)
304  */
305  void encode_bit_digest(const bm::word_t* blk,
306  bm::encoder& enc,
307  bm::id64_t d0) BMNOEXCEPT;
308 
309  /**
310  Determine best representation for GAP block based
311  on current set compression level
312 
313  @return set_block_gap, set_block_bit_1bit, set_block_arrgap
314  set_block_arrgap_egamma, set_block_arrgap_bienc
315  set_block_arrgap_inv, set_block_arrgap_egamma_inv
316  set_block_arrgap_bienc_inv, set_block_gap_egamma
317  set_block_gap_bienc
318 
319  @internal
320  */
321  unsigned char
322  find_gap_best_encoding(const bm::gap_word_t* gap_block) BMNOEXCEPT;
323 
324  /// Determine best representation for a bit-block
325  unsigned char find_bit_best_encoding(const bm::word_t* block) BMNOEXCEPT;
326 
327  /// Determine best representation for a bit-block (level 5)
328  unsigned char find_bit_best_encoding_l5(const bm::word_t* block) BMNOEXCEPT;
329 
330  /// Reset all accumulated compression statistics
332 
333  void reset_models() BMNOEXCEPT { mod_size_ = 0; }
334  void add_model(unsigned char mod, unsigned score) BMNOEXCEPT;
335 protected:
336 
337  /// Bookmark state structure
339  {
341  : ptr_(0), nb_(0),
342  nb_range_(nb_range), bm_type_(0)
343  {
344  min_bytes_range_ = nb_range * 8;
345  if (min_bytes_range_ < 512)
346  min_bytes_range_ = 512;
347 
348  if (nb_range < 15)
349  bm_type_ = 2; // 16-bit offset
350  else
351  if (nb_range < 255)
352  bm_type_ = 1; // 24-bit offset
353  }
354 
355  unsigned char* ptr_; ///< bookmark pointer
356  block_idx_type nb_; ///< bookmark block idx
357  block_idx_type nb_range_; ///< target bookmark range in blocks
358  unsigned bm_type_; ///< 0:32-bit, 1: 24-bit, 2: 16-bit
359  size_t min_bytes_range_; ///< minumal distance (bytes) between marks
360  };
361 
362  /**
363  Check if bookmark needs to be placed and if so, encode it
364  into serialization BLOB
365 
366  @param nb - block idx
367  @param bookm - bookmark state structure
368  @param enc - BLOB encoder
369  */
370  static
372  bm::encoder& enc) BMNOEXCEPT;
373 
374 private:
375  serializer(const serializer&);
376  serializer& operator=(const serializer&);
377 
378 private:
381  typedef bm::heap_vector<bm::gap_word_t, allocator_type, true> block_arridx_type;
382  typedef typename allocator_type::allocator_pool_type allocator_pool_type;
383 
384 private:
385  bm::id64_t digest0_;
386  unsigned bit_model_d0_size_; ///< memory (bytes) by d0 method (bytes)
387  unsigned bit_model_0run_size_; ///< memory (bytes) by run-0 method (bytes)
388  block_arridx_type bit_idx_arr_;
389  unsigned scores_[bm::block_waves];
390  unsigned char models_[bm::block_waves];
391  unsigned mod_size_;
392 
393  allocator_type alloc_;
394  size_type* compression_stat_;
395  bool gap_serial_;
396  bool byte_order_serial_;
397 
398  bool sb_bookmarks_;
399  unsigned sb_range_;
400 
401  bm::word_t* temp_block_;
402  unsigned compression_level_;
403  bool own_temp_block_;
404 
405  bool optimize_; ///< flag to optimize the input vector
406  bool free_; ///< flag to free the input vector
407  allocator_pool_type pool_;
408 
409  // XOR compression
410  //
411  const bv_ref_vector_type* ref_vect_; ///< ref.vector for XOR compression
412  bm::xor_scanner<BV> xor_scan_; ///< scanner for XOR similarity
413  size_type ref_idx_; ///< current reference index
414  bm::word_t* xor_block_; ///< xor product
415 
416 };
417 
418 /**
419  Base deserialization class
420  \ingroup bvserial
421  @internal
422 */
423 template<typename DEC, typename BLOCK_IDX>
425 {
426 protected:
427  typedef DEC decoder_type;
428  typedef BLOCK_IDX block_idx_type;
430 
431 protected:
434  {}
435 
436  /// Read GAP block from the stream
438  unsigned block_type,
439  bm::gap_word_t* dst_block,
440  bm::gap_word_t& gap_head);
441 
442  /// Read list of bit ids
443  ///
444  /// @return number of ids
445  unsigned read_id_list(decoder_type& decoder,
446  unsigned block_type,
447  bm::gap_word_t* dst_arr);
448 
449  /// Read binary interpolated list into a bit-set
451 
452  /// Read binary interpolated gap blocks into a bitset
454 
455  /// Read inverted binary interpolated list into a bit-set
457 
458  /// Read digest0-type bit-block
460 
461 
462  /// read bit-block encoded as runs
463  static
465 
466  static
467  const char* err_msg() BMNOEXCEPT { return "BM::Invalid serialization format"; }
468 
469  /// Try to skip if skip bookmark is available within reach
470  /// @return new block idx if skip went well
471  ///
473  block_idx_type nb,
474  block_idx_type expect_nb) BMNOEXCEPT;
475 
476 protected:
477  bm::gap_word_t* id_array_; ///< ptr to idx array for temp decode use
478 
479  block_idx_type bookmark_idx_;///< last bookmark block index
480  unsigned skip_offset_; ///< bookmark to skip 256 encoded blocks
481  const unsigned char* skip_pos_; ///< decoder skip position
482 };
483 
484 /**
485  Deserializer for bit-vector
486  \ingroup bvserial
487 */
488 template<class BV, class DEC>
490  protected deseriaizer_base<DEC, typename BV::block_idx_type>
491 {
492 public:
493  typedef BV bvector_type;
495  typedef typename BV::size_type size_type;
500 
501 public:
502  deserializer();
503  ~deserializer();
504 
505  /*! Deserialize bit-vector (equivalent to logical OR)
506  @param bv - target bit-vector
507  @param buf - BLOB memory pointer
508  @param temp_block - temporary buffer [block size] (not used)
509 
510  @return number of consumed bytes
511  */
512  size_t deserialize(bvector_type& bv,
513  const unsigned char* buf,
514  bm::word_t* temp_block = 0);
515 
516  // ----------------------------------------------------------------
517  /**
518  Attach collection of reference vectors for XOR de-serialization
519  (no transfer of ownership for the pointer)
520  @internal
521  */
522  void set_ref_vectors(const bv_ref_vector_type* ref_vect);
523 
524 
525  /**
526  set deserialization range [from, to]
527  This is NOT exact, approximate range, content outside range
528  is not guaranteed to be absent
529  @sa unset_range()
530  */
532  {
533  is_range_set_ = 1; idx_from_ = from; idx_to_ = to;
534  }
535 
536  /**
537  Disable range deserialization
538  @sa set_range()
539  */
541 
542 protected:
543  typedef typename BV::blocks_manager_type blocks_manager_type;
544 
545 protected:
546  void xor_decode(size_type x_ref_idx, bm::id64_t x_ref_d64,
547  blocks_manager_type& bman,
548  block_idx_type nb);//,
549  //bm::word_t* blk);
550 
551  void deserialize_gap(unsigned char btype, decoder_type& dec,
553  block_idx_type nb,
554  bm::word_t* blk);
555  void decode_bit_block(unsigned char btype, decoder_type& dec,
556  blocks_manager_type& bman,
557  block_idx_type nb,
558  bm::word_t* blk);
559 
560  void decode_block_bit(decoder_type& dec,
561  bvector_type& bv,
562  block_idx_type nb,
563  bm::word_t* blk);
564 
566  bvector_type& bv,
567  block_idx_type nb,
568  bm::word_t* blk);
569 
570  void decode_arrbit(decoder_type& dec,
571  bvector_type& bv,
572  block_idx_type nb,
573  bm::word_t* blk);
574 
575 protected:
576  typedef bm::heap_vector<bm::gap_word_t, allocator_type, true> block_arridx_type;
577  typedef typename allocator_type::allocator_pool_type allocator_pool_type;
578 
579 protected:
583 
586 
587  // XOR compression
588  //
589  const bv_ref_vector_type* ref_vect_; ///< ref.vector for XOR compression
590  bm::word_t* xor_block_; ///< xor product
592 
593  // Range deserialization settings
594  //
595  unsigned is_range_set_;
598 };
599 
600 
601 /**
602  Iterator to walk forward the serialized stream.
603 
604  \internal
605  \ingroup bvserial
606 */
607 template<class BV, class SerialIterator>
609 {
610 public:
611  typedef BV bvector_type;
613  typedef SerialIterator serial_iterator_type;
614 public:
615 
616  /// set deserialization range [from, to]
617  void set_range(size_type from, size_type to);
618 
619  /// disable range filtration
620  void unset_range() BMNOEXCEPT { is_range_set_ = false; }
621 
624  bm::word_t* temp_block,
626  bool exit_on_one = false);
627 
628 private:
629  typedef typename BV::blocks_manager_type blocks_manager_type;
630  typedef typename bvector_type::block_idx_type block_idx_type;
631 
632  /// load data from the iterator of type "id list"
633  static
634  void load_id_list(bvector_type& bv,
636  unsigned id_count,
637  bool set_clear);
638 
639  /// Finalize the deserialization (zero target vector tail or bit-count tail)
640  static
641  size_type finalize_target_vector(blocks_manager_type& bman,
642  set_operation op,
643  size_type bv_block_idx);
644 
645  /// Process (obsolete) id-list serialization format
646  static
647  size_type process_id_list(bvector_type& bv,
649  set_operation op);
650  static
651  const char* err_msg() BMNOEXCEPT
652  { return "BM::de-serialization format error"; }
653 private:
654  bool is_range_set_ = false;
655  size_type nb_range_from_ = 0;
656  size_type nb_range_to_ = 0;
657 };
658 
659 /*!
660  @brief Serialization stream iterator
661 
662  Iterates blocks and control tokens of serialized bit-stream
663  \ingroup bvserial
664  @internal
665 */
666 template<class DEC, typename BLOCK_IDX>
667 class serial_stream_iterator : protected deseriaizer_base<DEC, BLOCK_IDX>
668 {
669 public:
671  typedef BLOCK_IDX block_idx_type;
673 
674 public:
675  serial_stream_iterator(const unsigned char* buf);
677 
678  /// serialized bitvector size
679  block_idx_type bv_size() const { return bv_size_; }
680 
681  /// Returns true if end of bit-stream reached
682  bool is_eof() const { return end_of_stream_; }
683 
684  /// get next block
685  void next();
686 
687  /// skip all zero or all-one blocks
689 
690  /// read bit block, using logical operation
691  unsigned get_bit_block(bm::word_t* dst_block,
692  bm::word_t* tmp_block,
693  set_operation op);
694 
695 
696  /// Read gap block data (with head)
697  void get_gap_block(bm::gap_word_t* dst_block);
698 
699  /// Return current decoder size
700  unsigned dec_size() const { return decoder_.size(); }
701 
702  /// Get low level access to the decoder (use carefully)
704 
705  /// iterator is a state machine, this enum encodes
706  /// its key value
707  ///
709  {
711  e_list_ids, ///< plain int array
712  e_blocks, ///< stream of blocks
713  e_zero_blocks, ///< one or more zero bit blocks
714  e_one_blocks, ///< one or more all-1 bit blocks
715  e_bit_block, ///< one bit block
716  e_gap_block ///< one gap block
717 
718  };
719 
720  /// Returns iterator internal state
721  iterator_state state() const BMNOEXCEPT { return this->state_; }
722 
723  iterator_state get_state() const BMNOEXCEPT { return this->state_; }
724  /// Number of ids in the inverted list (valid for e_list_ids)
725  unsigned get_id_count() const BMNOEXCEPT { return this->id_cnt_; }
726 
727  /// Get last id from the id list
728  bm::id_t get_id() const BMNOEXCEPT { return this->last_id_; }
729 
730  /// Get current block index
731  block_idx_type block_idx() const BMNOEXCEPT { return this->block_idx_; }
732 
733 public:
734  /// member function pointer for bitset-bitset get operations
735  ///
736  typedef
739 
740  unsigned
741  get_bit_block_ASSIGN(bm::word_t* dst_block, bm::word_t* tmp_block);
742  unsigned
743  get_bit_block_OR (bm::word_t* dst_block, bm::word_t* tmp_block);
744  unsigned
745  get_bit_block_AND (bm::word_t* dst_block, bm::word_t* tmp_block);
746  unsigned
747  get_bit_block_SUB (bm::word_t* dst_block, bm::word_t* tmp_block);
748  unsigned
749  get_bit_block_XOR (bm::word_t* dst_block, bm::word_t* tmp_block);
750  unsigned
751  get_bit_block_COUNT (bm::word_t* dst_block, bm::word_t* tmp_block);
752  unsigned
753  get_bit_block_COUNT_AND(bm::word_t* dst_block, bm::word_t* tmp_block);
754  unsigned
755  get_bit_block_COUNT_OR(bm::word_t* dst_block, bm::word_t* tmp_block);
756  unsigned
757  get_bit_block_COUNT_XOR(bm::word_t* dst_block, bm::word_t* tmp_block);
758  unsigned
759  get_bit_block_COUNT_SUB_AB(bm::word_t* dst_block, bm::word_t* tmp_block);
760  unsigned
761  get_bit_block_COUNT_SUB_BA(bm::word_t* dst_block, bm::word_t* tmp_block);
762  unsigned
763  get_bit_block_COUNT_A(bm::word_t* dst_block, bm::word_t* tmp_block);
764  unsigned
766  {
767  return get_bit_block_COUNT(dst_block, tmp_block);
768  }
769 
770  /// Get array of bits out of the decoder into bit block
771  /// (Converts inverted list into bits)
772  /// Returns number of words (bits) being read
773  unsigned get_arr_bit(bm::word_t* dst_block,
774  bool clear_target=true) BMNOEXCEPT;
775 
776  /// Get current block type
777  unsigned get_block_type() const BMNOEXCEPT { return block_type_; }
778 
779  unsigned get_bit() BMNOEXCEPT;
780 
781  void get_inv_arr(bm::word_t* block) BMNOEXCEPT;
782 
783  /// Try to skip if skip bookmark is available within reach
784  /// @return true if skip went well
785  ///
787  {
788  block_idx_type new_nb = parent_type::try_skip(decoder_, nb, expect_nb);
789  if (new_nb)
790  {
791  block_idx_ = new_nb; state_ = e_blocks;
792  }
793  return new_nb;
794  }
795 
796 protected:
798 
803  unsigned id_cnt_; ///< Id counter for id list
804  bm::id_t last_id_; ///< Last id from the id list
806 
807  unsigned block_type_; ///< current block type
808  block_idx_type block_idx_; ///< current block index
809  block_idx_type mono_block_cnt_; ///< number of 0 or 1 blocks
810 
813 };
814 
815 /**
816  Deserializer, performs logical operations between bit-vector and
817  serialized bit-vector. This utility class potentially provides faster
818  and/or more memory efficient operation than more conventional deserialization
819  into memory bvector and then logical operation
820 
821  \ingroup bvserial
822 */
823 template<typename BV>
825 {
826 public:
827  typedef BV bvector_type;
828  typedef typename BV::allocator_type allocator_type;
831 
832 public:
835 
836  /*!
837  \brief Deserialize bvector using buffer as set operation argument
838 
839  \param bv - target bvector
840  \param buf - serialized buffer used as as a logical operation argument
841  \param op - set algebra operation (default: OR)
842  \param exit_on_one - quick exit if set operation found some result
843 
844  \return bitcount (for COUNT_* operations)
845  */
847  const unsigned char* buf,
848  set_operation op,
849  bool exit_on_one = false);
850 
851  /*!
852  Deserialize range using mask vector (AND)
853  \param bv - target bvector (should be set ranged)
854  \param buf - serialized buffer pointer
855  \param idx_from - range of bits set for deserialization [from..to]
856  \param idx_to - range of bits [from..to]
857  */
859  const unsigned char* buf,
860  size_type idx_from,
861  size_type idx_to);
862 
863 
864 
865 
866  // -----------------------------------------------------------------
867  // obsolete methods (switch to new ones, without team_block)
868  //
869 
870  /*!
871  \brief Obsolete! Deserialize bvector using buffer as set operation argument
872 
873  \param bv - target bvector
874  \param buf - serialized buffer as a logical argument
875  \param op - set algebra operation (default: OR)
876  \param exit_on_one - quick exit if set operation found some result
877 
878  \return bitcount (for COUNT_* operations)
879  */
881  const unsigned char* buf,
882  bm::word_t*, /*temp_block,*/
884  bool exit_on_one = false)
885  { return deserialize(bv, buf, op, exit_on_one); }
886 
887  /*!
888  Deserialize range using mask vector (AND)
889  \param bv - target bvector (should be set ranged)
890  \param buf - serialized buffer pointer
891  \param idx_from - range of bits set for deserialization [from..to]
892  \param idx_to - range of bits [from..to]
893  */
895  const unsigned char* buf,
896  bm::word_t*, /* temp_block, */
897  size_type idx_from,
898  size_type idx_to)
899  { deserialize_range(bv, buf, idx_from, idx_to); }
900  // -----------------------------------------------------------------
901 
902  /**
903  Attach collection of reference vectors for XOR serialization
904  (no transfer of ownership for the pointer)
905  @internal
906  */
907  void set_ref_vectors(const bv_ref_vector_type* ref_vect)
908  { ref_vect_ = ref_vect; }
909 
910 private:
911  size_type deserialize_xor(bvector_type& bv,
912  const unsigned char* buf,
913  set_operation op,
914  bool exit_on_one);
915  static
916  size_type deserialize_xor(bvector_type& bv,
917  bvector_type& bv_tmp,
918  set_operation op);
919 
920  void deserialize_xor_range(bvector_type& bv,
921  const unsigned char* buf,
922  size_type idx_from,
923  size_type idx_to);
924 
925 private:
926  typedef typename bvector_type::block_idx_type block_idx_type;
927  typedef
928  typename BV::blocks_manager_type blocks_manager_type;
929  typedef
931  typedef
933  typedef
935 
936 private:
937  allocator_type alloc_;
938  bm::word_t* temp_block_;
939 
940  /// default stream iterator (same endian)
942  /// big-endian stream iterator
944  /// little-endian stream iterator
946 
947 
948  // XOR compression related fields
949  //
950  const bv_ref_vector_type* ref_vect_; ///< xor ref.vector
951 
952 
953 };
954 
955 
956 
957 
958 //----------------------------------------------------------------------------
959 //
960 //----------------------------------------------------------------------------
961 
962 
963 /// \internal
964 /// \ingroup bvserial
967  BM_HM_RESIZE = (1 << 1), ///< resized vector
968  BM_HM_ID_LIST = (1 << 2), ///< id list stored
969  BM_HM_NO_BO = (1 << 3), ///< no byte-order
970  BM_HM_NO_GAPL = (1 << 4), ///< no GAP levels
971  BM_HM_64_BIT = (1 << 5), ///< 64-bit vector
972  BM_HM_HXOR = (1 << 6) ///< horizontal XOR compression turned ON
973 };
974 
975 
976 // --------------------------------------------------------------------
977 // Serialization stream encoding constants
978 //
979 
980 const unsigned char set_block_end = 0; //!< End of serialization
981 const unsigned char set_block_1zero = 1; //!< One all-zero block
982 const unsigned char set_block_1one = 2; //!< One block all-set (1111...)
983 const unsigned char set_block_8zero = 3; //!< Up to 256 zero blocks
984 const unsigned char set_block_8one = 4; //!< Up to 256 all-set blocks
985 const unsigned char set_block_16zero = 5; //!< Up to 65536 zero blocks
986 const unsigned char set_block_16one = 6; //!< UP to 65536 all-set blocks
987 const unsigned char set_block_32zero = 7; //!< Up to 4G zero blocks
988 const unsigned char set_block_32one = 8; //!< UP to 4G all-set blocks
989 const unsigned char set_block_azero = 9; //!< All other blocks zero
990 const unsigned char set_block_aone = 10; //!< All other blocks one
991 const unsigned char set_block_bit = 11; //!< Plain bit block
992 const unsigned char set_block_sgapbit = 12; //!< SGAP compressed bitblock
993 const unsigned char set_block_sgapgap = 13; //!< SGAP compressed GAP block
994 const unsigned char set_block_gap = 14; //!< Plain GAP block
995 const unsigned char set_block_gapbit = 15; //!< GAP compressed bitblock
996 const unsigned char set_block_arrbit = 16; //!< List of bits ON
997 const unsigned char set_block_bit_interval = 17; //!< Interval block
998 const unsigned char set_block_arrgap = 18; //!< List of bits ON (GAP block)
999 const unsigned char set_block_bit_1bit = 19; //!< Bit block with 1 bit ON
1000 const unsigned char set_block_gap_egamma = 20; //!< Gamma compressed GAP block
1001 const unsigned char set_block_arrgap_egamma = 21; //!< Gamma compressed delta GAP array
1002 const unsigned char set_block_bit_0runs = 22; //!< Bit block with encoded zero intervals
1003 const unsigned char set_block_arrgap_egamma_inv = 23; //!< Gamma compressed inverted delta GAP array
1004 const unsigned char set_block_arrgap_inv = 24; //!< List of bits OFF (GAP block)
1005 const unsigned char set_block_64zero = 25; //!< lots of zero blocks
1006 const unsigned char set_block_64one = 26; //!< lots of all-set blocks
1007 
1008 const unsigned char set_block_gap_bienc = 27; //!< Interpolated GAP block (legacy)
1009 const unsigned char set_block_arrgap_bienc = 28; //!< Interpolated GAP array
1010 const unsigned char set_block_arrgap_bienc_inv = 29; //!< Interpolated GAP array (inverted)
1011 const unsigned char set_block_arrbit_inv = 30; //!< List of bits OFF
1012 const unsigned char set_block_arr_bienc = 31; //!< Interpolated block as int array
1013 const unsigned char set_block_arr_bienc_inv = 32; //!< Interpolated inverted block int array
1014 const unsigned char set_block_bitgap_bienc = 33; //!< Interpolated bit-block as GAPs
1015 const unsigned char set_block_bit_digest0 = 34; //!< H-compression with digest mask
1016 
1017 const unsigned char set_block_ref_eq = 35; //!< block is a copy of a reference block
1018 const unsigned char set_block_xor_ref8 = 36; //!< block is masked XOR of a reference block (8-bit)
1019 const unsigned char set_block_xor_ref16 = 37; //!< block is masked XOR of a reference block (16-bit)
1020 const unsigned char set_block_xor_ref32 = 38; //!< ..... 32-bit (should never happen)
1021 const unsigned char set_block_xor_gap_ref8 = 39; //!< ..... 8-bit
1022 const unsigned char set_block_xor_gap_ref16 = 40; //!< ..... 16-bit
1023 const unsigned char set_block_xor_gap_ref32 = 41; //!< ..... 32-bit (should never happen)
1024 const unsigned char set_block_xor_chain = 42; //!< XOR chain (reserved)
1025 
1026 const unsigned char set_block_gap_bienc_v2 = 43; //!< Interpolated GAP block (v2)
1027 const unsigned char set_block_arrgap_bienc_v2 = 44; //!< //!< Interpolated GAP array (v2)
1028 const unsigned char set_block_arrgap_bienc_inv_v2 = 45; //!< Interpolated GAP array (inverted)
1029 const unsigned char set_block_bitgap_bienc_v2 = 46; //!< Interpolated bit-block as GAPs (v2 - reseved)
1030 
1031 const unsigned char set_nb_bookmark16 = 47; //<! jump ahead mark (16-bit)
1032 const unsigned char set_nb_bookmark24 = 48; //<! jump ahead mark (24-bit)
1033 const unsigned char set_nb_bookmark32 = 49; //<! jump ahead mark (32-bit)
1034 const unsigned char set_nb_sync_mark8 = 50; //!< bookmark sync point (8-bits)
1035 const unsigned char set_nb_sync_mark16 = 51;
1036 const unsigned char set_nb_sync_mark24 = 52;
1037 const unsigned char set_nb_sync_mark32 = 53;
1038 const unsigned char set_nb_sync_mark48 = 54;
1039 const unsigned char set_nb_sync_mark64 = 55; //!< ..... 64-bit (should never happen)
1040 
1041 template<class BV>
1043  bm::word_t* temp_block)
1044 : alloc_(alloc),
1045  compression_stat_(0),
1046  gap_serial_(false),
1047  byte_order_serial_(true),
1048  sb_bookmarks_(false),
1049  sb_range_(0),
1050  compression_level_(bm::set_compression_default),
1051  ref_vect_(0),
1052  ref_idx_(0),
1053  xor_block_(0)
1054 {
1055  bit_idx_arr_.resize(bm::gap_max_bits);
1056  if (temp_block == 0)
1057  {
1058  temp_block_ = alloc_.alloc_bit_block();
1059  own_temp_block_ = true;
1060  }
1061  else
1062  {
1063  temp_block_ = temp_block;
1064  own_temp_block_ = false;
1065  }
1066  compression_stat_ = (size_type*) alloc_.alloc_bit_block();
1067  optimize_ = free_ = false;
1068 }
1069 
1070 template<class BV>
1072 : alloc_(allocator_type()),
1073  compression_stat_(0),
1074  gap_serial_(false),
1075  byte_order_serial_(true),
1076  sb_bookmarks_(false),
1077  sb_range_(0),
1078  compression_level_(bm::set_compression_default),
1079  ref_vect_(0),
1080  ref_idx_(0),
1081  xor_block_(0)
1082 {
1083  bit_idx_arr_.resize(bm::gap_max_bits);
1084  if (temp_block == 0)
1085  {
1086  temp_block_ = alloc_.alloc_bit_block();
1087  own_temp_block_ = true;
1088  }
1089  else
1090  {
1091  temp_block_ = temp_block;
1092  own_temp_block_ = false;
1093  }
1094  compression_stat_ = (size_type*) alloc_.alloc_bit_block();
1095  optimize_ = free_ = false;
1096 }
1097 
1098 template<class BV>
1100 {
1101  if (own_temp_block_)
1102  alloc_.free_bit_block(temp_block_);
1103  if (compression_stat_)
1104  alloc_.free_bit_block((bm::word_t*)compression_stat_);
1105  if (xor_block_)
1106  alloc_.free_bit_block(xor_block_);
1107 }
1108 
1109 
1110 template<class BV>
1112 {
1113  for (unsigned i = 0; i < 256; ++i)
1114  compression_stat_[i] = 0;
1115 }
1116 
1117 
1118 template<class BV>
1120 {
1121  if (clevel <= bm::set_compression_max)
1122  compression_level_ = clevel;
1123 }
1124 
1125 template<class BV>
1127 {
1128  gap_serial_ = value;
1129 }
1130 
1131 template<class BV>
1133 {
1134  byte_order_serial_ = value;
1135 }
1136 
1137 template<class BV>
1138 void serializer<BV>::set_bookmarks(bool enable, unsigned bm_interval) BMNOEXCEPT
1139 {
1140  sb_bookmarks_ = enable;
1141  if (enable)
1142  {
1143  if (bm_interval > 512)
1144  bm_interval = 512;
1145  else
1146  if (bm_interval < 4)
1147  bm_interval = 4;
1148  }
1149  sb_range_ = bm_interval;
1150 }
1151 
1152 template<class BV>
1154 {
1155  ref_vect_ = ref_vect;
1156  xor_scan_.set_ref_vector(ref_vect);
1157  if (!xor_block_)
1158  xor_block_ = alloc_.alloc_bit_block();
1159 }
1160 
1161 template<class BV>
1163 {
1164  ref_idx_ = ref_idx;
1165 }
1166 
1167 template<class BV>
1169 {
1170  const blocks_manager_type& bman = bv.get_blocks_manager();
1171 
1172  unsigned char header_flag = 0;
1173  if (bv.size() == bm::id_max) // no dynamic resize
1174  header_flag |= BM_HM_DEFAULT;
1175  else
1176  header_flag |= BM_HM_RESIZE;
1177 
1178  if (!byte_order_serial_)
1179  header_flag |= BM_HM_NO_BO;
1180 
1181  if (!gap_serial_)
1182  header_flag |= BM_HM_NO_GAPL;
1183 
1184  #ifdef BM64ADDR
1185  header_flag |= BM_HM_64_BIT;
1186  #endif
1187 
1188  if (ref_vect_)
1189  {
1190  // TODO: check if XOR search found anything at all
1191  header_flag |= BM_HM_HXOR; // XOR compression turned ON
1192  }
1193 
1194  enc.put_8(header_flag);
1195 
1196  if (byte_order_serial_)
1197  {
1199  enc.put_8((unsigned char)bo);
1200  }
1201  // keep GAP levels information
1202  if (gap_serial_)
1203  {
1204  enc.put_16(bman.glen(), bm::gap_levels);
1205  }
1206 
1207  // save size (only if bvector has been down-sized)
1208  if (header_flag & BM_HM_RESIZE)
1209  {
1210  #ifdef BM64ADDR
1211  enc.put_64(bv.size());
1212  #else
1213  enc.put_32(bv.size());
1214  #endif
1215  }
1216 
1217 }
1218 
1219 template<class BV>
1221  const bm::gap_word_t* gap_block, bm::encoder& enc) BMNOEXCEPT
1222 {
1223  unsigned len = bm::gap_length(gap_block);
1224  if (len > 4) // BIC encoding
1225  {
1226  encoder::position_type enc_pos0 = enc.get_pos();
1227  BM_ASSERT(gap_block[len-1] == 65535);
1228 
1229  bm::gap_word_t head = gap_block[0];
1230  head &= bm::gap_word_t(~(3 << 1)); // clear the level flags
1231  bm::gap_word_t min_v = gap_block[1];
1232  bm::gap_word_t max_v = gap_block[len-2];
1233  bm::gap_word_t tail_delta = bm::gap_word_t(65535 - max_v);
1234 
1235  if (min_v < 256)
1236  head |= (1 << 1);
1237  if (tail_delta < 256)
1238  head |= (1 << 2);
1239 
1240  enc.put_8(bm::set_block_gap_bienc_v2);
1241  enc.put_16(head);
1242  if (min_v < 256)
1243  enc.put_8((unsigned char)min_v);
1244  else
1245  enc.put_16(min_v);
1246 
1247  if (tail_delta < 256)
1248  enc.put_8((unsigned char)tail_delta);
1249  else
1250  enc.put_16(tail_delta);
1251 
1252  bit_out_type bout(enc);
1253  bout.bic_encode_u16(&gap_block[2], len-4, min_v, max_v);
1254  bout.flush();
1255 
1256  // re-evaluate coding efficiency
1257  //
1258  encoder::position_type enc_pos1 = enc.get_pos();
1259  unsigned gamma_size = (unsigned)(enc_pos1 - enc_pos0);
1260  if (gamma_size > (len-1)*sizeof(gap_word_t))
1261  {
1262  enc.set_pos(enc_pos0);
1263  }
1264  else
1265  {
1266  compression_stat_[bm::set_block_gap_bienc]++;
1267  return;
1268  }
1269  }
1270 
1271  // save as plain GAP block
1272  enc.put_8(bm::set_block_gap);
1273  enc.put_16(gap_block, len-1);
1274 
1275  compression_stat_[bm::set_block_gap]++;
1276 }
1277 
1278 
1279 template<class BV>
1281  bm::encoder& enc) BMNOEXCEPT
1282 {
1283  unsigned len = gap_length(gap_block);
1284  if (len > 3 && (compression_level_ > 3)) // Use Elias Gamma encoding
1285  {
1286  encoder::position_type enc_pos0 = enc.get_pos();
1287  {
1288  bit_out_type bout(enc);
1289  gamma_encoder_func gamma(bout);
1290 
1291  enc.put_8(bm::set_block_gap_egamma);
1292  enc.put_16(gap_block[0]);
1293 
1294  for_each_dgap(gap_block, gamma);
1295  }
1296  // re-evaluate coding efficiency
1297  //
1298  encoder::position_type enc_pos1 = enc.get_pos();
1299  unsigned gamma_size = (unsigned)(enc_pos1 - enc_pos0);
1300  if (gamma_size > (len-1)*sizeof(gap_word_t))
1301  {
1302  enc.set_pos(enc_pos0);
1303  }
1304  else
1305  {
1306  compression_stat_[bm::set_block_gap_egamma]++;
1307  return;
1308  }
1309  }
1310 
1311  // save as plain GAP block
1312  enc.put_8(bm::set_block_gap);
1313  enc.put_16(gap_block, len-1);
1314 
1315  compression_stat_[bm::set_block_gap]++;
1316 }
1317 
1318 template<class BV>
1320  unsigned arr_len,
1321  bm::encoder& enc,
1322  bool inverted) BMNOEXCEPT
1323 {
1324  unsigned char scode = inverted ? bm::set_block_arrgap_egamma_inv
1326  if (compression_level_ > 3 && arr_len > 1)
1327  {
1328  encoder::position_type enc_pos0 = enc.get_pos();
1329  {
1330  bit_out_type bout(enc);
1331  enc.put_8(scode);
1332  bout.gamma(arr_len);
1333  gap_word_t prev = gap_array[0];
1334  bout.gamma(prev + 1);
1335 
1336  for (unsigned i = 1; i < arr_len; ++i)
1337  {
1338  gap_word_t curr = gap_array[i];
1339  bout.gamma(curr - prev);
1340  prev = curr;
1341  }
1342  }
1343  encoder::position_type enc_pos1 = enc.get_pos();
1344  unsigned gamma_size = (unsigned)(enc_pos1 - enc_pos0);
1345  unsigned plain_size = (unsigned)(sizeof(gap_word_t)+arr_len*sizeof(gap_word_t));
1346  if (gamma_size >= plain_size)
1347  {
1348  enc.set_pos(enc_pos0); // rollback the bit stream
1349  }
1350  else
1351  {
1352  compression_stat_[scode]++;
1353  return;
1354  }
1355  }
1356  // save as a plain array
1357  scode = inverted ? bm::set_block_arrgap_inv : bm::set_block_arrgap;
1358  enc.put_prefixed_array_16(scode, gap_array, arr_len, true);
1359  compression_stat_[scode]++;
1360 }
1361 
1362 
1363 template<class BV>
1365  const bm::gap_word_t* gap_block,
1366  unsigned arr_len,
1367  bm::encoder& enc,
1368  bool inverted) BMNOEXCEPT
1369 {
1370  BM_ASSERT(arr_len <= 65535);
1371  unsigned char scode = inverted ? bm::set_block_arrgap_bienc_inv
1373  if (arr_len > 4)
1374  {
1375  encoder::position_type enc_pos0 = enc.get_pos();
1376  {
1377  bit_out_type bout(enc);
1378 
1379  bm::gap_word_t min_v = gap_block[0];
1380  bm::gap_word_t max_v = gap_block[arr_len-1];
1381  BM_ASSERT(max_v > min_v);
1382 
1383  enc.put_8(scode);
1384  enc.put_16(min_v);
1385  enc.put_16(max_v);
1386 
1387  bout.gamma(arr_len-4);
1388  bout.bic_encode_u16(&gap_block[1], arr_len-2, min_v, max_v);
1389  bout.flush();
1390  }
1391  encoder::position_type enc_pos1 = enc.get_pos();
1392  unsigned enc_size = (unsigned)(enc_pos1 - enc_pos0);
1393  unsigned raw_size = (unsigned)(sizeof(gap_word_t)+arr_len*sizeof(gap_word_t));
1394  if (enc_size >= raw_size)
1395  {
1396  enc.set_pos(enc_pos0); // rollback the bit stream
1397  }
1398  else
1399  {
1400  compression_stat_[scode]++;
1401  return;
1402  }
1403  }
1404  // save as a plain array
1405  scode = inverted ? bm::set_block_arrgap_inv : bm::set_block_arrgap;
1406  enc.put_prefixed_array_16(scode, gap_block, arr_len, true);
1407  compression_stat_[scode]++;
1408 }
1409 
1410 
1411 template<class BV>
1413  unsigned arr_len,
1414  bm::encoder& enc,
1415  bool inverted) BMNOEXCEPT
1416 {
1417  BM_ASSERT(arr_len <= 65535);
1418 
1419  unsigned char scode = inverted ? bm::set_block_arrgap_bienc_inv_v2
1421  if (arr_len > 4)
1422  {
1423  bm::gap_word_t min_v = gap_block[0];
1424  bm::gap_word_t max_v = gap_block[arr_len-1];
1425  bm::gap_word_t tail = bm::gap_word_t(max_v - min_v);
1426 
1427  if (min_v >= 256 && tail >= 256)// || arr_len > 128)
1428  {
1429  interpolated_gap_array_v0(gap_block, arr_len, enc, inverted);
1430  return;
1431  }
1432 
1433  BM_ASSERT(arr_len < 16383);
1434  encoder::position_type enc_pos0 = enc.get_pos();
1435  {
1436  bit_out_type bout(enc);
1437 
1438  BM_ASSERT(max_v > min_v);
1439 
1440  enc.put_8(scode);
1441 
1442  BM_ASSERT((arr_len & (3 << 14)) == 0);
1443  arr_len <<= 2;
1444  if (min_v < 256)
1445  arr_len |= 1;
1446  if (tail < 256)
1447  arr_len |= (1 << 1);
1448 
1449  enc.put_16(bm::gap_word_t(arr_len));
1450  if (min_v < 256)
1451  enc.put_8((unsigned char)min_v);
1452  else
1453  enc.put_16(min_v);
1454 
1455  if (tail < 256)
1456  enc.put_8((unsigned char)tail);
1457  else
1458  enc.put_16(tail);
1459 
1460  arr_len >>= 2;
1461 
1462  bout.bic_encode_u16(&gap_block[1], arr_len-2, min_v, max_v);
1463  bout.flush();
1464  }
1465  encoder::position_type enc_pos1 = enc.get_pos();
1466  unsigned enc_size = (unsigned)(enc_pos1 - enc_pos0);
1467  unsigned raw_size = (unsigned)(sizeof(gap_word_t)+arr_len*sizeof(gap_word_t));
1468  if (enc_size >= raw_size)
1469  {
1470  enc.set_pos(enc_pos0); // rollback the bit stream
1471  }
1472  else
1473  {
1474  compression_stat_[scode]++;
1475  return;
1476  }
1477  }
1478  // save as a plain array
1479  scode = inverted ? bm::set_block_arrgap_inv : bm::set_block_arrgap;
1480  enc.put_prefixed_array_16(scode, gap_block, arr_len, true);
1481  compression_stat_[scode]++;
1482 }
1483 
1484 
1485 
1486 template<class BV>
1487 void serializer<BV>::add_model(unsigned char mod, unsigned score) BMNOEXCEPT
1488 {
1489  BM_ASSERT(mod_size_ < 64); // too many models (memory corruption?)
1490  scores_[mod_size_] = score; models_[mod_size_] = mod;
1491  ++mod_size_;
1492 }
1493 
1494 template<class BV>
1495 unsigned char
1497 {
1498  unsigned bc, bit_gaps;
1499 
1500  add_model(bm::set_block_bit, bm::gap_max_bits); // default model (bit-block)
1501 
1502  bit_model_0run_size_ = bm::bit_count_nonzero_size(block, bm::set_block_size);
1503  add_model(bm::set_block_bit_0runs, bit_model_0run_size_ * 8);
1504 
1505  bm::id64_t d0 = digest0_ = bm::calc_block_digest0(block);
1506  if (!d0)
1507  {
1508  add_model(bm::set_block_azero, 0);
1509  return bm::set_block_azero;
1510  }
1511  unsigned d0_bc = word_bitcount64(d0);
1512  bit_model_d0_size_ = unsigned(8 + (32 * d0_bc * sizeof(bm::word_t)));
1513  if (d0 != ~0ull)
1514  add_model(bm::set_block_bit_digest0, bit_model_d0_size_ * 8);
1515 
1516 
1517  bm::bit_block_change_bc(block, &bit_gaps, &bc);
1518  BM_ASSERT(bm::bit_block_count(block) == bc);
1519  BM_ASSERT(bm::bit_block_calc_change(block) == bit_gaps);
1520 
1521  if (bc == 1)
1522  {
1523  add_model(bm::set_block_bit_1bit, 16);
1524  return bm::set_block_bit_1bit;
1525  }
1526  unsigned inverted_bc = bm::gap_max_bits - bc;
1527  if (!inverted_bc)
1528  {
1529  add_model(bm::set_block_aone, 0);
1530  return bm::set_block_aone;
1531  }
1532  unsigned arr_size =
1533  unsigned(sizeof(gap_word_t) + (bc * sizeof(gap_word_t)));
1534  unsigned arr_size_inv =
1535  unsigned(sizeof(gap_word_t) + (inverted_bc * sizeof(gap_word_t)));
1536 
1537  add_model(bm::set_block_arrbit, arr_size*8);
1538  add_model(bm::set_block_arrbit_inv, arr_size_inv*8);
1539  const unsigned bie_bits_per_int = 4;
1540 
1541  if (bit_gaps > 3 && bit_gaps < bm::gap_max_buff_len)
1542  add_model(bm::set_block_gap_bienc,
1543  32 + (bit_gaps-1) * bie_bits_per_int);
1544  if (bc < bit_gaps && bc < bm::gap_equiv_len)
1545  add_model(bm::set_block_arrgap_bienc, 16*3 + bc*bie_bits_per_int);
1546  else
1547  if (inverted_bc < bit_gaps && inverted_bc < bm::gap_equiv_len)
1548  add_model(bm::set_block_arrgap_bienc_inv, 16*3 + inverted_bc*bie_bits_per_int);
1549  else
1550  if (bc >= bm::gap_equiv_len && bc < bie_cut_off)
1551  add_model(bm::set_block_arr_bienc, 16*3 + bc * bie_bits_per_int);
1552  else
1553  if (inverted_bc > 3 && inverted_bc >= bm::gap_equiv_len && inverted_bc < bie_cut_off)
1554  add_model(bm::set_block_arr_bienc_inv, 16*3 + inverted_bc * bie_bits_per_int);
1555 
1556  if (bit_gaps >= bm::gap_max_buff_len && bit_gaps < bie_cut_off)
1557  add_model(bm::set_block_bitgap_bienc, 16*4 + (bit_gaps-2) * bie_bits_per_int);
1558  else
1559  {
1560  if (bit_gaps < bm::gap_max_buff_len) // GAP block
1561  {
1562  bit_gaps -= bit_gaps > 2 ? 2 : 0;
1563  add_model(bm::set_block_bitgap_bienc, 16*4 + bit_gaps * bie_bits_per_int);
1564  }
1565  }
1566 
1567  // find the best representation based on computed approx.models
1568  //
1569  unsigned min_score = bm::gap_max_bits;
1570  unsigned char model = bm::set_block_bit;
1571  for (unsigned i = 0; i < mod_size_; ++i)
1572  {
1573  if (scores_[i] < min_score)
1574  {
1575  min_score = scores_[i];
1576  model = models_[i];
1577  }
1578  }
1579  return model;
1580 }
1581 
1582 template<class BV>
1583 unsigned char
1585 {
1586  reset_models();
1587 
1588  if (compression_level_ >= 5)
1589  return find_bit_best_encoding_l5(block);
1590 
1591  unsigned bc, bit_gaps;
1592 
1593  // heuristics and hard-coded rules to determine
1594  // the best representation for bit-block
1595  //
1596  add_model(bm::set_block_bit, bm::gap_max_bits); // default model (bit-block)
1597 
1598  if (compression_level_ <= 1)
1599  return bm::set_block_bit;
1600 
1601  // check if it is a very sparse block with some areas of dense areas
1602  bit_model_0run_size_ = bm::bit_count_nonzero_size(block, bm::set_block_size);
1603  if (compression_level_ <= 5)
1604  add_model(bm::set_block_bit_0runs, bit_model_0run_size_ * 8);
1605 
1606  if (compression_level_ >= 2)
1607  {
1608  bm::id64_t d0 = digest0_ = bm::calc_block_digest0(block);
1609  if (!d0)
1610  {
1611  add_model(bm::set_block_azero, 0);
1612  return bm::set_block_azero;
1613  }
1614  unsigned d0_bc = word_bitcount64(d0);
1615  bit_model_d0_size_ = unsigned(8 + (32 * d0_bc * sizeof(bm::word_t)));
1616  if (d0 != ~0ull)
1617  add_model(bm::set_block_bit_digest0, bit_model_d0_size_ * 8);
1618 
1619  if (compression_level_ >= 4)
1620  {
1621  bm::bit_block_change_bc(block, &bit_gaps, &bc);
1622  }
1623  else
1624  {
1625  bc = bm::bit_block_count(block);
1626  bit_gaps = 65535;
1627  }
1628  BM_ASSERT(bc);
1629 
1630  if (bc == 1)
1631  {
1632  add_model(bm::set_block_bit_1bit, 16);
1633  return bm::set_block_bit_1bit;
1634  }
1635  unsigned inverted_bc = bm::gap_max_bits - bc;
1636  if (!inverted_bc)
1637  {
1638  add_model(bm::set_block_aone, 0);
1639  return bm::set_block_aone;
1640  }
1641 
1642  if (compression_level_ >= 3)
1643  {
1644  unsigned arr_size =
1645  unsigned(sizeof(gap_word_t) + (bc * sizeof(gap_word_t)));
1646  unsigned arr_size_inv =
1647  unsigned(sizeof(gap_word_t) + (inverted_bc * sizeof(gap_word_t)));
1648 
1649  add_model(bm::set_block_arrbit, arr_size*8);
1650  add_model(bm::set_block_arrbit_inv, arr_size_inv*8);
1651 
1652  if (compression_level_ >= 4)
1653  {
1654  const unsigned gamma_bits_per_int = 6;
1655  //unsigned bit_gaps = bm::bit_block_calc_change(block);
1656 
1657  if (compression_level_ == 4)
1658  {
1659  if (bit_gaps > 3 && bit_gaps < bm::gap_max_buff_len)
1660  add_model(bm::set_block_gap_egamma,
1661  16 + (bit_gaps-1) * gamma_bits_per_int);
1662  if (bc < bit_gaps && bc < bm::gap_equiv_len)
1663  add_model(bm::set_block_arrgap_egamma,
1664  16 + bc * gamma_bits_per_int);
1665  if (inverted_bc > 3 && inverted_bc < bit_gaps && inverted_bc < bm::gap_equiv_len)
1667  16 + inverted_bc * gamma_bits_per_int);
1668  }
1669  } // level >= 3
1670  } // level >= 3
1671  } // level >= 2
1672 
1673  // find the best representation based on computed approx.models
1674  //
1675  unsigned min_score = bm::gap_max_bits;
1676  unsigned char model = bm::set_block_bit;
1677  for (unsigned i = 0; i < mod_size_; ++i)
1678  {
1679  if (scores_[i] < min_score)
1680  {
1681  min_score = scores_[i];
1682  model = models_[i];
1683  }
1684  }
1685  return model;
1686 }
1687 
1688 template<class BV>
1689 unsigned char
1691 {
1692  // heuristics and hard-coded rules to determine
1693  // the best representation for d-GAP block
1694  //
1695  if (compression_level_ <= 2)
1696  return bm::set_block_gap;
1697  unsigned len = bm::gap_length(gap_block);
1698  if (len == 2)
1699  return bm::set_block_gap;
1700  unsigned bc = bm::gap_bit_count_unr(gap_block);
1701  if (bc == 1)
1702  return bm::set_block_bit_1bit;
1703  if (bc < len)
1704  {
1705  if (compression_level_ < 4)
1706  return bm::set_block_arrgap;
1707  if (compression_level_ == 4)
1710  }
1711  unsigned inverted_bc = bm::gap_max_bits - bc;
1712  if (inverted_bc < len)
1713  {
1714  if (compression_level_ < 4)
1715  return bm::set_block_arrgap_inv;
1716  if (compression_level_ == 4)
1719  }
1720  if (len < 6)
1721  {
1722  return bm::set_block_gap;
1723  }
1724 
1725  if (compression_level_ == 4)
1726  return bm::set_block_gap_egamma;
1727  return bm::set_block_gap_bienc;
1728 }
1729 
1730 
1731 
1732 
1733 template<class BV>
1735 {
1736  gap_word_t* gap_temp_block = (gap_word_t*) temp_block_;
1737 
1738  gap_word_t arr_len;
1739  bool invert = false;
1740 
1741  unsigned char enc_choice = find_gap_best_encoding(gap_block);
1742  switch (enc_choice)
1743  {
1744  case bm::set_block_gap:
1745  gamma_gap_block(gap_block, enc); // TODO: use plain encode (non-gamma)
1746  break;
1747 
1749  arr_len = bm::gap_convert_to_arr(gap_temp_block,
1750  gap_block,
1751  bm::gap_equiv_len-10);
1752  BM_ASSERT(arr_len == 1);
1754  enc.put_16(gap_temp_block[0]);
1755  compression_stat_[bm::set_block_bit_1bit]++;
1756  break;
1759  invert = true;
1761  // fall through
1762  case bm::set_block_arrgap:
1764  // fall through
1766  arr_len = gap_convert_to_arr(gap_temp_block,
1767  gap_block,
1768  bm::gap_equiv_len-10,
1769  invert);
1770  BM_ASSERT(arr_len);
1771  gamma_gap_array(gap_temp_block, arr_len, enc, invert);
1772  break;
1774  interpolated_encode_gap_block(gap_block, enc);
1775  break;
1777  invert = true;
1779  // fall through
1781  arr_len = gap_convert_to_arr(gap_temp_block,
1782  gap_block,
1783  bm::gap_equiv_len-64,
1784  invert);
1785  BM_ASSERT(arr_len);
1786  interpolated_gap_array(gap_temp_block, arr_len, enc, invert);
1787  break;
1788  default:
1789  gamma_gap_block(gap_block, enc);
1790  } // switch
1791 }
1792 
1793 template<class BV>
1795  bm::encoder& enc,
1796  unsigned //size_control
1797  ) BMNOEXCEPT
1798 {
1799  enc.put_8(bm::set_block_bit_0runs);
1800  enc.put_8((blk[0]==0) ? 0 : 1); // encode start
1801 
1802  unsigned i, j;
1803  for (i = 0; i < bm::set_block_size; ++i)
1804  {
1805  if (blk[i] == 0)
1806  {
1807  // scan fwd to find 0 island length
1808  for (j = i+1; j < bm::set_block_size; ++j)
1809  {
1810  if (blk[j] != 0)
1811  break;
1812  }
1813  BM_ASSERT(j-i);
1814  enc.put_16((gap_word_t)(j-i));
1815  i = j - 1;
1816  }
1817  else
1818  {
1819  // scan fwd to find non-0 island length
1820  for (j = i+1; j < bm::set_block_size; ++j)
1821  {
1822  if (blk[j] == 0)
1823  {
1824  // look ahead to identify and ignore short 0-run
1825  if (((j+1 < bm::set_block_size) && blk[j+1]) ||
1826  ((j+2 < bm::set_block_size) && blk[j+2]))
1827  {
1828  ++j; // skip zero word
1829  continue;
1830  }
1831  break;
1832  }
1833  }
1834  BM_ASSERT(j-i);
1835  enc.put_16((gap_word_t)(j-i));
1836  enc.put_32(blk + i, j - i); // stream all bit-words now
1837 
1838  i = j - 1;
1839  }
1840  }
1841  compression_stat_[bm::set_block_bit_0runs]++;
1842 }
1843 
1844 
1845 template<class BV>
1847  bm::encoder& enc,
1849 {
1850  // evaluate a few "sure" models here and pick the best
1851  //
1852  if (d0 != ~0ull)
1853  {
1854  if (bit_model_0run_size_ < bit_model_d0_size_)
1855  {
1856  encode_bit_interval(block, enc, 0); // TODO: get rid of param 3 (0)
1857  return;
1858  }
1859 
1860  // encode using digest0 method
1861  //
1862  enc.put_8(bm::set_block_bit_digest0);
1863  enc.put_64(d0);
1864 
1865  while (d0)
1866  {
1867  bm::id64_t t = bm::bmi_blsi_u64(d0); // d & -d;
1868 
1869  unsigned wave = bm::word_bitcount64(t - 1);
1870  unsigned off = wave * bm::set_block_digest_wave_size;
1871 
1872  unsigned j = 0;
1873  do
1874  {
1875  enc.put_32(block[off+j+0]);
1876  enc.put_32(block[off+j+1]);
1877  enc.put_32(block[off+j+2]);
1878  enc.put_32(block[off+j+3]);
1879  j += 4;
1880  } while (j < bm::set_block_digest_wave_size);
1881 
1882  d0 = bm::bmi_bslr_u64(d0); // d &= d - 1;
1883  } // while
1884 
1885  compression_stat_[bm::set_block_bit_digest0]++;
1886  }
1887  else
1888  {
1889  if (bit_model_0run_size_ < unsigned(bm::set_block_size*sizeof(bm::word_t)))
1890  {
1891  encode_bit_interval(block, enc, 0); // TODO: get rid of param 3 (0)
1892  return;
1893  }
1894 
1895  enc.put_prefixed_array_32(bm::set_block_bit, block, bm::set_block_size);
1896  compression_stat_[bm::set_block_bit]++;
1897  }
1898 }
1899 
1900 
1901 
1902 template<class BV>
1903 void serializer<BV>::serialize(const BV& bv,
1904  typename serializer<BV>::buffer& buf,
1905  const statistics_type* bv_stat)
1906 {
1907  statistics_type stat;
1908  if (!bv_stat)
1909  {
1910  bv.calc_stat(&stat);
1911  bv_stat = &stat;
1912  }
1913 
1914  buf.resize(bv_stat->max_serialize_mem, false); // no-copy resize
1915  optimize_ = free_ = false;
1916 
1917  size_type slen = this->serialize(bv, buf.data(), buf.size());
1918  BM_ASSERT(slen <= buf.size()); // or we have a BIG problem with prediction
1919  BM_ASSERT(slen);
1920 
1921  buf.resize(slen);
1922 }
1923 
1924 template<class BV>
1926  typename serializer<BV>::buffer& buf)
1927 {
1928  statistics_type st;
1929  optimize_ = true;
1930  if (!ref_vect_) // do not use block-free if XOR compression is setup
1931  free_ = true; // set the destructive mode
1932 
1933  typename bvector_type::mem_pool_guard mp_g_z;
1934  mp_g_z.assign_if_not_set(pool_, bv);
1935 
1936  bv.optimize(temp_block_, BV::opt_compress, &st);
1937  serialize(bv, buf, &st);
1938 
1939  optimize_ = free_ = false; // restore the default mode
1940 }
1941 
1942 template<class BV>
1944  bm::encoder& enc,
1945  bool inverted) BMNOEXCEPT
1946 {
1947  unsigned arr_len;
1948  unsigned mask = inverted ? ~0u : 0u;
1949  // TODO: get rid of max bits
1950  arr_len = bm::bit_convert_to_arr(bit_idx_arr_.data(),
1951  block,
1954  mask);
1955  if (arr_len)
1956  {
1957  unsigned char scode =
1959  enc.put_prefixed_array_16(scode, bit_idx_arr_.data(), arr_len, true);
1960  compression_stat_[scode]++;
1961  return;
1962  }
1963  encode_bit_digest(block, enc, digest0_);
1964 }
1965 
1966 template<class BV>
1968  bm::encoder& enc) BMNOEXCEPT
1969 {
1970  unsigned len = bm::bit_to_gap(bit_idx_arr_.data(), block, bm::gap_equiv_len);
1971  BM_ASSERT(len); (void)len;
1972  gamma_gap_block(bit_idx_arr_.data(), enc);
1973 }
1974 
1975 template<class BV>
1977  bm::encoder& enc,
1978  bool inverted) BMNOEXCEPT
1979 {
1980  unsigned mask = inverted ? ~0u : 0u;
1981  unsigned arr_len = bit_convert_to_arr(bit_idx_arr_.data(),
1982  block,
1985  mask);
1986  if (arr_len)
1987  {
1988  gamma_gap_array(bit_idx_arr_.data(), arr_len, enc, inverted);
1989  return;
1990  }
1991  enc.put_prefixed_array_32(bm::set_block_bit, block, bm::set_block_size);
1992  compression_stat_[bm::set_block_bit]++;
1993 }
1994 
1995 template<class BV>
1997  bm::encoder& enc,
1998  bool inverted) BMNOEXCEPT
1999 {
2000  unsigned mask = inverted ? ~0u : 0u;
2001  unsigned arr_len = bit_convert_to_arr(bit_idx_arr_.data(),
2002  block,
2005  mask);
2006  if (arr_len)
2007  {
2008  interpolated_gap_array(bit_idx_arr_.data(), arr_len, enc, inverted);
2009  return;
2010  }
2011  encode_bit_digest(block, enc, digest0_);
2012 }
2013 
2014 template<class BV>
2016  bm::encoder& enc) BMNOEXCEPT
2017 {
2018  unsigned len = bm::bit_to_gap(bit_idx_arr_.data(), block, bm::gap_max_bits);
2019  BM_ASSERT(len); (void)len;
2020  interpolated_encode_gap_block(bit_idx_arr_.data(), enc);
2021 }
2022 
2023 
2024 template<class BV>
2026  bm::encoder& enc) BMNOEXCEPT
2027 {
2028  unsigned len = bm::bit_to_gap(bit_idx_arr_.data(), block, bm::gap_max_bits);
2029  BM_ASSERT(len); (void)len;
2030  BM_ASSERT(len <= bie_cut_off);
2031 
2032  const unsigned char scode = bm::set_block_bitgap_bienc;
2033 
2034  encoder::position_type enc_pos0 = enc.get_pos();
2035  {
2036  bit_out_type bout(enc);
2037 
2038  bm::gap_word_t head = (bit_idx_arr_[0] & 1); // isolate start flag
2039  bm::gap_word_t min_v = bit_idx_arr_[1];
2040 
2041  BM_ASSERT(bit_idx_arr_[len] == 65535);
2042  BM_ASSERT(bit_idx_arr_[len] > min_v);
2043 
2044  enc.put_8(scode);
2045 
2046  enc.put_8((unsigned char)head);
2047  enc.put_16(bm::gap_word_t(len));
2048  enc.put_16(min_v);
2049  bout.bic_encode_u16(&bit_idx_arr_[2], len-2, min_v, 65535);
2050  bout.flush();
2051  }
2052  encoder::position_type enc_pos1 = enc.get_pos();
2053  unsigned enc_size = (unsigned)(enc_pos1 - enc_pos0);
2054  unsigned raw_size = sizeof(word_t) * bm::set_block_size;
2055  if (enc_size >= raw_size)
2056  {
2057  enc.set_pos(enc_pos0); // rollback the bit stream
2058  }
2059  else
2060  {
2061  compression_stat_[scode]++;
2062  return;
2063  }
2064  encode_bit_digest(block, enc, digest0_);
2065 }
2066 
2067 
2068 
2069 
2070 
2071 template<class BV>
2072 void
2074  bm::encoder& enc,
2075  bool inverted) BMNOEXCEPT
2076 {
2077  unsigned mask = inverted ? ~0u : 0u;
2078  unsigned arr_len = bit_convert_to_arr(bit_idx_arr_.data(),
2079  block,
2082  mask);
2083  if (arr_len)
2084  {
2085  unsigned char scode =
2087 
2088  encoder::position_type enc_pos0 = enc.get_pos();
2089  {
2090  bit_out_type bout(enc);
2091 
2092  bm::gap_word_t min_v = bit_idx_arr_[0];
2093  bm::gap_word_t max_v = bit_idx_arr_[arr_len-1];
2094  BM_ASSERT(max_v > min_v);
2095 
2096  enc.put_8(scode);
2097  enc.put_16(min_v);
2098  enc.put_16(max_v);
2099  enc.put_16(bm::gap_word_t(arr_len));
2100  bout.bic_encode_u16(&bit_idx_arr_[1], arr_len-2, min_v, max_v);
2101  bout.flush();
2102  }
2103  encoder::position_type enc_pos1 = enc.get_pos();
2104  unsigned enc_size = (unsigned)(enc_pos1 - enc_pos0);
2105  unsigned raw_size = sizeof(word_t) * bm::set_block_size;
2106  if (enc_size >= raw_size)
2107  {
2108  enc.set_pos(enc_pos0); // rollback the bit stream
2109  }
2110  else
2111  {
2112  if (digest0_ != ~0ull && enc_size > bit_model_d0_size_)
2113  {
2114  enc.set_pos(enc_pos0); // rollback the bit stream
2115  }
2116  else
2117  {
2118  compression_stat_[scode]++;
2119  return;
2120  }
2121  }
2122  }
2123  encode_bit_digest(block, enc, digest0_);
2124 }
2125 
2126 
2127 
2128 #define BM_SER_NEXT_GRP(enc, nb, B_1ZERO, B_8ZERO, B_16ZERO, B_32ZERO, B_64ZERO) \
2129  if (nb == 1u) \
2130  enc.put_8(B_1ZERO); \
2131  else if (nb < 256u) \
2132  { \
2133  enc.put_8(B_8ZERO); \
2134  enc.put_8((unsigned char)nb); \
2135  } \
2136  else if (nb < 65536u) \
2137  { \
2138  enc.put_8(B_16ZERO); \
2139  enc.put_16((unsigned short)nb); \
2140  } \
2141  else if (nb < bm::id_max32) \
2142  { \
2143  enc.put_8(B_32ZERO); \
2144  enc.put_32(unsigned(nb)); \
2145  } \
2146  else \
2147  {\
2148  enc.put_8(B_64ZERO); \
2149  enc.put_64(nb); \
2150  }
2151 
2152 
2153 template<class BV>
2155  bookmark_state& bookm,
2156  bm::encoder& enc) BMNOEXCEPT
2157 {
2158  BM_ASSERT(bookm.nb_range_);
2159 
2160  block_idx_type nb_delta = nb - bookm.nb_;
2161  if (bookm.ptr_ && nb_delta >= bookm.nb_range_)
2162  {
2163  unsigned char* curr = enc.get_pos();
2164  size_t bytes_delta = size_t(curr - bookm.ptr_);
2165  if (bytes_delta > bookm.min_bytes_range_)
2166  {
2167  enc.set_pos(bookm.ptr_); // rewind back and save the skip
2168  switch (bookm.bm_type_)
2169  {
2170  case 0: // 32-bit mark
2171  bytes_delta -= sizeof(unsigned);
2172  if (bytes_delta < 0xFFFFFFFF) // range overflow check
2173  enc.put_32(unsigned(bytes_delta));
2174  // if range is somehow off, bookmark remains 0 (NULL)
2175  break;
2176  case 1: // 24-bit mark
2177  bytes_delta -= (sizeof(unsigned)-1);
2178  if (bytes_delta < 0xFFFFFF)
2179  enc.put_24(unsigned(bytes_delta));
2180  break;
2181  case 2: // 16-bit mark
2182  bytes_delta -= sizeof(unsigned short);
2183  if (bytes_delta < 0xFFFF)
2184  enc.put_16((unsigned short)bytes_delta);
2185  break;
2186  default:
2187  BM_ASSERT(0);
2188  break;
2189  } // switch
2190 
2191  enc.set_pos(curr); // restore and save the sync mark
2192 
2193  if (nb_delta < 0xFF)
2194  {
2195  enc.put_8(set_nb_sync_mark8);
2196  enc.put_8((unsigned char) nb_delta);
2197  }
2198  else
2199  if (nb_delta < 0xFFFF)
2200  {
2201  enc.put_8(set_nb_sync_mark16);
2202  enc.put_16((unsigned short) nb_delta);
2203  }
2204  else
2205  if (nb_delta < 0xFFFFFF)
2206  {
2207  enc.put_8(set_nb_sync_mark24);
2208  enc.put_24(unsigned(nb_delta));
2209  }
2210  else
2211  if (nb_delta < ~0U)
2212  {
2213  enc.put_8(set_nb_sync_mark32);
2214  enc.put_32(unsigned(nb_delta));
2215  }
2216  else
2217  {
2218  #ifdef BM64ADDR
2219  if (nb_delta < 0xFFFFFFFFFFFFUL)
2220  {
2221  enc.put_8(set_nb_sync_mark48);
2222  enc.put_48(nb_delta);
2223  }
2224  else
2225  {
2226  enc.put_8(set_nb_sync_mark64);
2227  enc.put_64(nb_delta);
2228  }
2229  #endif
2230  }
2231  bookm.ptr_ = 0;
2232  }
2233  }
2234 
2235  if (!bookm.ptr_) // start new bookmark
2236  {
2237  // bookmarks use VBR to save offset
2238  bookm.nb_ = nb;
2239  bookm.ptr_ = enc.get_pos() + 1;
2240  switch (bookm.bm_type_)
2241  {
2242  case 0: // 32-bit mark
2243  enc.put_8(bm::set_nb_bookmark32);
2244  enc.put_32(0); // put NULL mark at first
2245  break;
2246  case 1: // 24-bit mark
2247  enc.put_8(bm::set_nb_bookmark24);
2248  enc.put_24(0);
2249  break;
2250  case 2: // 16-bit mark
2251  enc.put_8(bm::set_nb_bookmark16);
2252  enc.put_16(0);
2253  break;
2254  default:
2255  BM_ASSERT(0);
2256  break;
2257  } // switch
2258  }
2259 }
2260 
2261 
2262 template<class BV>
2265  unsigned char* buf, size_t buf_size)
2266 {
2267  BM_ASSERT(temp_block_);
2268 
2269  reset_compression_stats();
2270  const blocks_manager_type& bman = bv.get_blocks_manager();
2271 
2272  bm::encoder enc(buf, buf_size); // create the encoder
2273  encode_header(bv, enc);
2274 
2275  bookmark_state sb_bookmark(sb_range_);
2276 
2277  block_idx_type i, j;
2278  for (i = 0; i < bm::set_total_blocks; ++i)
2279  {
2280  unsigned i0, j0;
2281  bm::get_block_coord(i, i0, j0);
2282 
2283  // ----------------------------------------------------
2284  // bookmark the stream to allow fast skip on decode
2285  //
2286  if (sb_bookmarks_)
2287  {
2288  process_bookmark(i, sb_bookmark, enc);
2289  }
2290 
2291  const bm::word_t* blk = bman.get_block(i0, j0);
2292 
2293  // ----------------------------------------------------
2294  // Empty or ONE block serialization
2295  //
2296  // TODO: make a function to check this in ONE pass
2297  //
2298  bool flag;
2299  flag = bm::check_block_zero(blk, false/*shallow check*/);
2300  if (flag)
2301  {
2302  zero_block:
2303  flag = 1;
2304  block_idx_type next_nb = bman.find_next_nz_block(i+1, false);
2305  if (next_nb == bm::set_total_blocks) // no more blocks
2306  {
2307  enc.put_8(set_block_azero);
2308  return (size_type)enc.size();
2309  }
2310  block_idx_type nb = next_nb - i;
2311 
2312  if (nb > 1 && nb < 128)
2313  {
2314  // special (but frequent) case -- GAP delta fits in 7bits
2315  unsigned char c = (unsigned char)((1u << 7) | nb);
2316  enc.put_8(c);
2317  }
2318  else
2319  {
2321  set_block_8zero,
2325  }
2326  i = next_nb - 1;
2327  continue;
2328  }
2329  else
2330  {
2331  flag = bm::check_block_one(blk, false); // shallow scan
2332  if (flag)
2333  {
2334  full_block:
2335  flag = 1;
2336  // Look ahead for similar blocks
2337  for(j = i+1; j < bm::set_total_blocks; ++j)
2338  {
2339  bm::get_block_coord(j, i0, j0);
2340  if (!j0) // look ahead if the whole superblock is 0xFF
2341  {
2342  bm::word_t*** blk_root = bman.top_blocks_root();
2343  if ((bm::word_t*)blk_root[i0] == FULL_BLOCK_FAKE_ADDR)
2344  {
2345  j += 255;
2346  continue;
2347  }
2348  }
2349  const bm::word_t* blk_next = bman.get_block(i0, j0);
2350  if (flag != bm::check_block_one(blk_next, true)) // deep scan
2351  break;
2352  } // for j
2353 
2354  // TODO: improve this condition, because last block is always
2355  // has 0 at the end, thus this never happen in practice
2356  if (j == bm::set_total_blocks)
2357  {
2358  enc.put_8(set_block_aone);
2359  break;
2360  }
2361  else
2362  {
2363  block_idx_type nb = j - i;
2365  set_block_8one,
2366  set_block_16one,
2369  }
2370  i = j - 1;
2371  continue;
2372  }
2373  }
2374 
2375  // --------------------------------------------------
2376  // GAP serialization
2377  //
2378  if (BM_IS_GAP(blk))
2379  {
2380  if (ref_vect_) // XOR filter
2381  {
2382  bool found = xor_scan_.search_best_xor_gap(blk,
2383  ref_idx_+1, ref_vect_->size(),
2384  i0, j0);
2385  if (found)
2386  {
2387  const bm::gap_word_t* gap_block = BMGAP_PTR(blk);
2388  unsigned glen = bm::gap_length(gap_block)-1;
2389 
2390  const bm::gap_word_t* gap_xor_block =
2391  (const bm::gap_word_t*) xor_scan_.get_found_block();
2392  size_type ridx = xor_scan_.found_ridx();
2393  size_type plain_idx = xor_scan_.get_ref_vector().get_row_idx(ridx);
2394 
2395  bm::gap_word_t* tmp_buf = (bm::gap_word_t*)xor_block_;
2396 
2397  unsigned res_len;
2398  bm::gap_operation_xor(gap_block, gap_xor_block, tmp_buf, res_len);
2399 
2400  BM_ASSERT(res_len <= glen);
2401  BM_ASSERT(1+res_len == bm::gap_length(tmp_buf));
2402  unsigned delta = glen - res_len;
2403  if (delta > 2)
2404  {
2405  if (plain_idx < 256)
2406  {
2408  enc.put_8((unsigned char) plain_idx);
2409  }
2410  else
2411  {
2412  if (plain_idx < 65536)
2413  {
2415  enc.put_16((unsigned short) plain_idx);
2416  }
2417  else
2418  {
2420  enc.put_32(unsigned(plain_idx));
2421  }
2422  }
2423  encode_gap_block(tmp_buf, enc);
2424 
2425  compression_stat_[bm::set_block_xor_gap_ref32]++;
2426  continue;
2427  }
2428  }
2429  }
2430 
2431  encode_gap_block(BMGAP_PTR(blk), enc);
2432  }
2433  else
2434  {
2435  if (ref_vect_) // XOR filter
2436  {
2437  xor_scan_.compute_x_block_stats(blk);
2438  bool found =
2439  xor_scan_.search_best_xor_mask(blk,
2440  ref_idx_+1, ref_vect_->size(),
2441  i0, j0,
2442  xor_block_);
2443  if (found)
2444  {
2445  size_type ridx = xor_scan_.found_ridx();
2446  if (xor_scan_.is_eq_found()) // golden! (found a copy)
2447  {
2448  size_type row_idx = xor_scan_.get_ref_vector().get_row_idx(ridx);
2450  enc.put_32(unsigned(row_idx));
2451  compression_stat_[bm::set_block_ref_eq]++;
2452  continue;
2453  }
2454  found = xor_scan_.validate_found(xor_block_, blk);
2455  if (found)
2456  {
2457  bm::id64_t d64 = xor_scan_.get_xor_digest();
2458  BM_ASSERT(d64);
2459  size_type plain_idx = xor_scan_.get_ref_vector().get_row_idx(ridx);
2460 
2461  if (plain_idx < 256)
2462  {
2464  enc.put_8((unsigned char) plain_idx);
2465  }
2466  else
2467  {
2468  if (plain_idx < 65536)
2469  {
2471  enc.put_16((unsigned short) plain_idx);
2472  }
2473  else
2474  {
2476  enc.put_32(unsigned(plain_idx));
2477  }
2478  }
2479  enc.put_64(d64); // xor digest
2480  compression_stat_[bm::set_block_xor_ref32]++;
2481  blk = xor_block_; // substitute block with XOR product
2482  }
2483  } // if xor found
2484 
2485  }
2486 
2487  // ----------------------------------------------
2488  // BIT BLOCK serialization
2489  //
2490 
2491  unsigned char model = find_bit_best_encoding(blk);
2492  switch (model)
2493  {
2494  case bm::set_block_bit:
2496  break;
2498  {
2499  unsigned bit_idx = 0;
2500  bm::bit_block_find(blk, bit_idx, &bit_idx);
2501  BM_ASSERT(bit_idx < 65536);
2502  enc.put_8(bm::set_block_bit_1bit); enc.put_16(bm::short_t(bit_idx));
2503  compression_stat_[bm::set_block_bit_1bit]++;
2504  continue;
2505  }
2506  break;
2507  case bm::set_block_azero: // empty block all of the sudden ?
2508  goto zero_block;
2509  case bm::set_block_aone:
2510  goto full_block;
2511  case bm::set_block_arrbit:
2512  encode_bit_array(blk, enc, false);
2513  break;
2515  encode_bit_array(blk, enc, true);
2516  break;
2518  gamma_gap_bit_block(blk, enc);
2519  break;
2521  encode_bit_interval(blk, enc, 0); // TODO: get rid of param 3 (0)
2522  break;
2524  gamma_arr_bit_block(blk, enc, false);
2525  break;
2527  gamma_arr_bit_block(blk, enc, true);
2528  break;
2530  bienc_arr_bit_block(blk, enc, false);
2531  break;
2533  bienc_arr_bit_block(blk, enc, true);
2534  break;
2536  interpolated_arr_bit_block(blk, enc, false);
2537  break;
2539  interpolated_arr_bit_block(blk, enc, true); // inverted
2540  break;
2542  interpolated_gap_bit_block(blk, enc);
2543  break;
2545  bienc_gap_bit_block(blk, enc);
2546  break;
2548  encode_bit_digest(blk, enc, digest0_);
2549  break;
2550  default:
2551  BM_ASSERT(0); // predictor returned an unknown model
2553  }
2554  } // bit-block processing
2555 
2556  // destructive serialization mode
2557  //
2558  if (free_)
2559  {
2560  // const cast is ok, because it is a requested mode
2561  const_cast<blocks_manager_type&>(bman).zero_block(i);
2562  }
2563 
2564  } // for i
2565 
2566  enc.put_8(set_block_end);
2567  return (size_type)enc.size();
2568 }
2569 
2570 
2571 
2572 /// Bit mask flags for serialization algorithm
2573 /// \ingroup bvserial
2575  BM_NO_BYTE_ORDER = 1, ///< save no byte-order info (save some space)
2576  BM_NO_GAP_LENGTH = (1 << 1) ///< save no GAP info (save some space)
2577 };
2578 
2579 /*!
2580  \brief Saves bitvector into memory.
2581 
2582  Function serializes content of the bitvector into memory.
2583  Serialization adaptively uses compression(variation of GAP encoding)
2584  when it is benefitial.
2585 
2586  \param bv - source bvecor
2587  \param buf - pointer on target memory area. No range checking in the
2588  function. It is responsibility of programmer to allocate sufficient
2589  amount of memory using information from calc_stat function.
2590 
2591  \param temp_block - pointer on temporary memory block. Cannot be 0;
2592  If you want to save memory across multiple bvectors
2593  allocate temporary block using allocate_tempblock and pass it to
2594  serialize.
2595  (Serialize does not deallocate temp_block.)
2596 
2597  \param serialization_flags
2598  Flags controlling serilization (bit-mask)
2599  (use OR-ed serialization flags)
2600 
2601  \ingroup bvserial
2602 
2603  \return Size of serialization block.
2604  \sa calc_stat, serialization_flags
2605 
2606 */
2607 /*!
2608  Serialization format:
2609  <pre>
2610 
2611  | HEADER | BLOCKS |
2612 
2613  Header structure:
2614  BYTE : Serialization header (bit mask of BM_HM_*)
2615  BYTE : Byte order ( 0 - Big Endian, 1 - Little Endian)
2616  INT16: Reserved (0)
2617  INT16: Reserved Flags (0)
2618 
2619  </pre>
2620 */
2621 template<class BV>
2622 size_t serialize(const BV& bv,
2623  unsigned char* buf,
2624  bm::word_t* temp_block = 0,
2625  unsigned serialization_flags = 0)
2626 {
2627  bm::serializer<BV> bv_serial(bv.get_allocator(), temp_block);
2628 
2630  bv_serial.byte_order_serialization(false);
2631 
2633  bv_serial.gap_length_serialization(false);
2634  else
2635  bv_serial.gap_length_serialization(true);
2636 
2637  return bv_serial.serialize(bv, buf, 0);
2638 }
2639 
2640 /*!
2641  @brief Saves bitvector into memory.
2642  Allocates temporary memory block for bvector.
2643 
2644  \param bv - source bvecor
2645  \param buf - pointer on target memory area. No range checking in the
2646  function. It is responsibility of programmer to allocate sufficient
2647  amount of memory using information from calc_stat function.
2648 
2649  \param serialization_flags
2650  Flags controlling serilization (bit-mask)
2651  (use OR-ed serialization flags)
2652 
2653  \ingroup bvserial
2654 */
2655 template<class BV>
2656 size_t serialize(BV& bv,
2657  unsigned char* buf,
2658  unsigned serialization_flags=0)
2659 {
2660  return bm::serialize(bv, buf, 0, serialization_flags);
2661 }
2662 
2663 
2664 
2665 /*!
2666  @brief Bitvector deserialization from a memory BLOB
2667 
2668  @param bv - target bvector
2669  @param buf - pointer on memory which keeps serialized bvector
2670  @param temp_block - pointer on temporary block,
2671  if NULL bvector allocates own.
2672  @param ref_vect - [in] optional pointer to a list of
2673  reference vectors used for
2674  XOR compression.
2675 
2676  @return Number of bytes consumed by deserializer.
2677 
2678  Function deserializes bitvector from memory block containig results
2679  of previous serialization. Function does not remove bits
2680  which are currently set. Effectively it is OR logical operation
2681  between the target bit-vector and serialized one.
2682 
2683  @sa deserialize_range
2684 
2685  @ingroup bvserial
2686 */
2687 template<class BV>
2688 size_t deserialize(BV& bv,
2689  const unsigned char* buf,
2690  bm::word_t* temp_block = 0,
2691  const bm::bv_ref_vector<BV>* ref_vect = 0)
2692 {
2693  ByteOrder bo_current = globals<true>::byte_order();
2694 
2695  bm::decoder dec(buf);
2696  unsigned char header_flag = dec.get_8();
2697  ByteOrder bo = bo_current;
2698  if (!(header_flag & BM_HM_NO_BO))
2699  {
2700  bo = (bm::ByteOrder) dec.get_8();
2701  }
2702 
2703  if (bo_current == bo)
2704  {
2706  deserial.set_ref_vectors(ref_vect);
2707  return deserial.deserialize(bv, buf, temp_block);
2708  }
2709  switch (bo_current)
2710  {
2711  case BigEndian:
2712  {
2714  deserial.set_ref_vectors(ref_vect);
2715  return deserial.deserialize(bv, buf, temp_block);
2716  }
2717  case LittleEndian:
2718  {
2720  deserial.set_ref_vectors(ref_vect);
2721  return deserial.deserialize(bv, buf, temp_block);
2722  }
2723  default:
2724  BM_ASSERT(0);
2725  };
2726  return 0;
2727 }
2728 
2729 
2730 /*!
2731  @brief Bitvector range deserialization from a memory BLOB
2732 
2733  @param bv - target bvector
2734  @param buf - pointer on memory which keeps serialized bvector
2735  @param from - bit-vector index to deserialize from
2736  @param to - bit-vector index to deserialize to
2737  @param ref_vect - [in] optional pointer to a list of
2738  reference vectors used for
2739  XOR compression.
2740 
2741  Function deserializes bitvector from memory block containig results
2742  of previous serialization. Function does not remove bits
2743  which are currently set. Effectively it is OR logical operation
2744  between the target bit-vector and serialized one.
2745 
2746  @sa deserialize
2747 
2748  @ingroup bvserial
2749 */
2750 template<class BV>
2751 void deserialize_range(BV& bv,
2752  const unsigned char* buf,
2753  typename BV::size_type from,
2754  typename BV::size_type to,
2755  const bm::bv_ref_vector<BV>* ref_vect = 0)
2756 {
2757  ByteOrder bo_current = globals<true>::byte_order();
2758 
2759  bm::decoder dec(buf);
2760  unsigned char header_flag = dec.get_8();
2761  ByteOrder bo = bo_current;
2762  if (!(header_flag & BM_HM_NO_BO))
2763  {
2764  bo = (bm::ByteOrder) dec.get_8();
2765  }
2766 
2767  if (bo_current == bo)
2768  {
2770  deserial.set_ref_vectors(ref_vect);
2771  deserial.set_range(from, to);
2772  deserial.deserialize(bv, buf);
2773  }
2774  else
2775  {
2776  switch (bo_current)
2777  {
2778  case BigEndian:
2779  {
2781  deserial.set_ref_vectors(ref_vect);
2782  deserial.set_range(from, to);
2783  deserial.deserialize(bv, buf);
2784  }
2785  break;
2786  case LittleEndian:
2787  {
2789  deserial.set_ref_vectors(ref_vect);
2790  deserial.set_range(from, to);
2791  deserial.deserialize(bv, buf);
2792  }
2793  break;;
2794  default:
2795  BM_ASSERT(0);
2796  };
2797  }
2798  bv.keep_range(from, to);
2799 }
2800 
2801 
2802 template<typename DEC, typename BLOCK_IDX>
2805  unsigned block_type,
2806  bm::gap_word_t* dst_arr)
2807 {
2808  bm::gap_word_t len = 0;
2809 
2810  switch (block_type)
2811  {
2812  case set_block_bit_1bit:
2813  dst_arr[0] = decoder.get_16();
2814  len = 1;
2815  break;
2816  case set_block_arrgap:
2817  case set_block_arrgap_inv:
2818  len = decoder.get_16();
2819  decoder.get_16(dst_arr, len);
2820  break;
2823  {
2824  bit_in_type bin(decoder);
2825  len = (gap_word_t)bin.gamma();
2826  gap_word_t prev = 0;
2827  for (gap_word_t k = 0; k < len; ++k)
2828  {
2829  gap_word_t bit_idx = (gap_word_t)bin.gamma();
2830  if (k == 0) --bit_idx; // TODO: optimization
2831  bit_idx = (gap_word_t)(bit_idx + prev);
2832  prev = bit_idx;
2833  dst_arr[k] = bit_idx;
2834  } // for
2835  }
2836  break;
2839  {
2840  bm::gap_word_t min_v = decoder.get_16();
2841  bm::gap_word_t max_v = decoder.get_16();
2842 
2843  bit_in_type bin(decoder);
2844  len = bm::gap_word_t(bin.gamma() + 4);
2845  dst_arr[0] = min_v;
2846  dst_arr[len-1] = max_v;
2847  bin.bic_decode_u16(&dst_arr[1], len-2, min_v, max_v);
2848  }
2849  break;
2852  {
2853  bm::gap_word_t min_v, max_v;
2854  len = decoder.get_16();
2855  if (len & 1)
2856  min_v = decoder.get_8();
2857  else
2858  min_v = decoder.get_16();
2859 
2860  if (len & (1<<1))
2861  max_v = decoder.get_8();
2862  else
2863  max_v = decoder.get_16();
2864  max_v = bm::gap_word_t(min_v + max_v);
2865 
2866  len = bm::gap_word_t(len >> 2);
2867 
2868  bit_in_type bin(decoder);
2869  dst_arr[0] = min_v;
2870  dst_arr[len-1] = max_v;
2871  bin.bic_decode_u16(&dst_arr[1], len-2, min_v, max_v);
2872  }
2873  break;
2874 
2875  default:
2876  BM_ASSERT(0);
2877  #ifndef BM_NO_STL
2878  throw std::logic_error(err_msg());
2879  #else
2880  BM_THROW(BM_ERR_SERIALFORMAT);
2881  #endif
2882  }
2883  return len;
2884 }
2885 
2886 template<typename DEC, typename BLOCK_IDX>
2887 void
2889  bm::word_t* blk) BMNOEXCEPT
2890 {
2891  BM_ASSERT(!BM_IS_GAP(blk));
2892 
2893  bm::gap_word_t min_v = dec.get_16();
2894  bm::gap_word_t max_v = dec.get_16();
2895  unsigned arr_len = dec.get_16();
2896 
2897  bit_in_type bin(dec);
2898 
2899  if (!IS_VALID_ADDR(blk))
2900  {
2901  bin.bic_decode_u16_dry(arr_len-2, min_v, max_v);
2902  return;
2903  }
2904  bm::set_bit(blk, min_v);
2905  bm::set_bit(blk, max_v);
2906  bin.bic_decode_u16_bitset(blk, arr_len-2, min_v, max_v);
2907 }
2908 
2909 template<typename DEC, typename BLOCK_IDX>
2910 void
2912  bm::word_t* blk) BMNOEXCEPT
2913 {
2914  // TODO: optimization
2915  bm::bit_block_set(blk, 0);
2916  this->read_bic_arr(decoder, blk);
2917  bm::bit_invert(blk);
2918 }
2919 
2920 template<typename DEC, typename BLOCK_IDX>
2922  bm::word_t* blk) BMNOEXCEPT
2923 {
2924  BM_ASSERT(!BM_IS_GAP(blk));
2925 
2926  bm::gap_word_t head = dec.get_8();
2927  unsigned arr_len = dec.get_16();
2928  bm::gap_word_t min_v = dec.get_16();
2929 
2930  BM_ASSERT(arr_len <= bie_cut_off);
2931 
2932  id_array_[0] = head;
2933  id_array_[1] = min_v;
2934  id_array_[arr_len] = 65535;
2935 
2936  bit_in_type bin(dec);
2937  bin.bic_decode_u16(&id_array_[2], arr_len-2, min_v, 65535);
2938 
2939  if (!IS_VALID_ADDR(blk))
2940  return;
2941  bm::gap_add_to_bitset(blk, id_array_, arr_len);
2942 }
2943 
2944 template<typename DEC, typename BLOCK_IDX>
2946  decoder_type& dec,
2947  bm::word_t* block) BMNOEXCEPT
2948 {
2949  bm::id64_t d0 = dec.get_64();
2950  while (d0)
2951  {
2952  bm::id64_t t = bm::bmi_blsi_u64(d0); // d & -d;
2953 
2954  unsigned wave = bm::word_bitcount64(t - 1);
2955  unsigned off = wave * bm::set_block_digest_wave_size;
2956  unsigned j = 0;
2957  if (!IS_VALID_ADDR(block))
2958  {
2959  do
2960  {
2961  dec.get_32();
2962  dec.get_32();
2963  dec.get_32();
2964  dec.get_32();
2965  j += 4;
2966  } while (j < bm::set_block_digest_wave_size);
2967  }
2968  else
2969  {
2970  do
2971  {
2972  block[off+j+0] |= dec.get_32();
2973  block[off+j+1] |= dec.get_32();
2974  block[off+j+2] |= dec.get_32();
2975  block[off+j+3] |= dec.get_32();
2976  j += 4;
2977  } while (j < bm::set_block_digest_wave_size);
2978  }
2979 
2980  d0 = bm::bmi_bslr_u64(d0); // d &= d - 1;
2981  } // while
2982 }
2983 
2984 template<typename DEC, typename BLOCK_IDX>
2986  decoder_type& dec,
2987  bm::word_t* blk) BMNOEXCEPT
2988 {
2989  //TODO: optimization if block exists and it is OR-ed read
2990  bm::bit_block_set(blk, 0);
2991 
2992  unsigned char run_type = dec.get_8();
2993  for (unsigned j = 0; j < bm::set_block_size; run_type = !run_type)
2994  {
2995  unsigned run_length = dec.get_16();
2996  if (run_type)
2997  {
2998  unsigned run_end = j + run_length;
2999  BM_ASSERT(run_end <= bm::set_block_size);
3000  for (;j < run_end; ++j)
3001  {
3002  unsigned w = dec.get_32();
3003  blk[j] = w;
3004  }
3005  }
3006  else
3007  {
3008  j += run_length;
3009  }
3010  } // for j
3011 }
3012 
3013 
3014 template<typename DEC, typename BLOCK_IDX>
3015 void
3017  unsigned block_type,
3018  bm::gap_word_t* dst_block,
3019  bm::gap_word_t& gap_head)
3020 {
3021 // typedef bit_in<DEC> bit_in_type;
3022  switch (block_type)
3023  {
3024  case set_block_gap:
3025  {
3026  unsigned len = gap_length(&gap_head);
3027  --len;
3028  *dst_block = gap_head;
3029  decoder.get_16(dst_block+1, len - 1);
3030  dst_block[len] = gap_max_bits - 1;
3031  }
3032  break;
3033  case set_block_bit_1bit:
3034  {
3035  gap_set_all(dst_block, bm::gap_max_bits, 0);
3036  gap_word_t bit_idx = decoder.get_16();
3037  gap_add_value(dst_block, bit_idx);
3038  }
3039  break;
3040  case set_block_arrgap:
3041  case set_block_arrgap_inv:
3042  {
3043  gap_set_all(dst_block, bm::gap_max_bits, 0);
3044  gap_word_t len = decoder.get_16();
3045  for (gap_word_t k = 0; k < len; ++k)
3046  {
3047  gap_word_t bit_idx = decoder.get_16();
3048  bm::gap_add_value(dst_block, bit_idx);
3049  } // for
3050  }
3051  break;
3058  {
3059  unsigned arr_len = read_id_list(decoder, block_type, id_array_);
3060  dst_block[0] = 0;
3061  unsigned gap_len =
3062  gap_set_array(dst_block, id_array_, arr_len);
3063  BM_ASSERT(gap_len == bm::gap_length(dst_block));
3064  (void)(gap_len);
3065  }
3066  break;
3067  case set_block_gap_egamma:
3068  {
3069  unsigned len = (gap_head >> 3);
3070  --len;
3071  // read gamma GAP block into a dest block
3072  *dst_block = gap_head;
3073  gap_word_t* gap_data_ptr = dst_block + 1;
3074 
3075  bit_in_type bin(decoder);
3076  gap_word_t v = (gap_word_t)bin.gamma();
3077  gap_word_t gap_sum = *gap_data_ptr = (gap_word_t)(v - 1);
3078  for (unsigned i = 1; i < len; ++i)
3079  {
3080  v = (gap_word_t)bin.gamma();
3081  gap_sum = (gap_word_t)(gap_sum + v);
3082  *(++gap_data_ptr) = gap_sum;
3083  }
3084  dst_block[len+1] = bm::gap_max_bits - 1;
3085  }
3086  break;
3087  case set_block_gap_bienc:
3088  {
3089  unsigned len = (gap_head >> 3);
3090  *dst_block = gap_head;
3091  bm::gap_word_t min_v = decoder.get_16();
3092  dst_block[1] = min_v;
3093  bit_in_type bin(decoder);
3094  bin.bic_decode_u16(&dst_block[2], len-2, min_v, 65535);
3095  dst_block[len] = bm::gap_max_bits - 1;
3096  }
3097  break;
3099  {
3100  bm::gap_word_t min_v, max_v;
3101  unsigned len = (gap_head >> 3);
3102  bm::gap_word_t min8 = gap_head & (1 << 1);
3103  bm::gap_word_t tail8 = gap_head & (1 << 2);
3104  gap_head &= bm::gap_word_t(~(3 << 1)); // clear the flags
3105  if (min8)
3106  min_v = decoder.get_8();
3107  else
3108  min_v = decoder.get_16();
3109  if (tail8)
3110  max_v = decoder.get_8();
3111  else
3112  max_v = decoder.get_16();
3113  max_v = bm::gap_word_t(65535 - max_v); // tail correction
3114 
3115  dst_block[0] = gap_head;
3116  dst_block[1] = min_v;
3117  bit_in_type bin(decoder);
3118  bin.bic_decode_u16(&dst_block[2], len-3, min_v, max_v);
3119  dst_block[len-1] = max_v;
3120  dst_block[len] = bm::gap_max_bits - 1;
3121  }
3122  break;
3123  default:
3124  BM_ASSERT(0);
3125  #ifndef BM_NO_STL
3126  throw std::logic_error(err_msg());
3127  #else
3128  BM_THROW(BM_ERR_SERIALFORMAT);
3129  #endif
3130  }
3131 
3132  if (block_type == set_block_arrgap_egamma_inv ||
3133  block_type == set_block_arrgap_inv ||
3134  block_type == set_block_arrgap_bienc_inv ||
3135  block_type == set_block_arrgap_bienc_inv_v2)
3136  {
3137  gap_invert(dst_block);
3138  }
3139 }
3140 
3141 template<typename DEC, typename BLOCK_IDX>
3145  block_idx_type nb,
3146  block_idx_type expect_nb) BMNOEXCEPT
3147 {
3148  if (skip_offset_) // skip bookmark is available
3149  {
3150  const unsigned char* save_pos = decoder.get_pos(); // save position
3151 
3152  if (save_pos > skip_pos_) // checkpoint passed
3153  {
3154  skip_offset_ = 0;
3155  return false;
3156  }
3157  decoder.set_pos(skip_pos_);
3158 
3159  unsigned char sync_mark = decoder.get_8();
3160  block_idx_type nb_sync;
3161  switch (sync_mark)
3162  {
3163  case set_nb_sync_mark8:
3164  nb_sync = decoder.get_8();
3165  break;
3166  case set_nb_sync_mark16:
3167  nb_sync = decoder.get_16();
3168  break;
3169  case set_nb_sync_mark24:
3170  nb_sync = decoder.get_24();
3171  break;
3172  case set_nb_sync_mark32:
3173  nb_sync = decoder.get_32();
3174  break;
3175  case set_nb_sync_mark48:
3176  nb_sync = block_idx_type(decoder.get_48());
3177  #ifndef BM64ADDR
3178  BM_ASSERT(0);
3179  decoder.set_pos(save_pos);
3180  skip_offset_ = 0;
3181  return 0; // invalid bookmark from 64-bit serialization
3182  #endif
3183  break;
3184  case set_nb_sync_mark64:
3185  nb_sync = block_idx_type(decoder.get_64());
3186  #ifndef BM64ADDR
3187  BM_ASSERT(0);
3188  decoder.set_pos(save_pos);
3189  skip_offset_ = 0;
3190  return 0; // invalid bookmark from 64-bit serialization
3191  #endif
3192  break;
3193  default:
3194  BM_ASSERT(0);
3195  decoder.set_pos(save_pos);
3196  skip_offset_ = 0;
3197  return 0;
3198  } // switch
3199 
3200  nb_sync += nb;
3201  if (nb_sync <= expect_nb) // within reach
3202  {
3203  skip_offset_ = 0;
3204  return nb_sync;
3205  }
3206  skip_offset_ = 0;
3207  decoder.set_pos(save_pos);
3208  }
3209  return 0;
3210 }
3211 
3212 
3213 // -------------------------------------------------------------------------
3214 
3215 template<class BV, class DEC>
3217 : ref_vect_(0),
3218  xor_block_(0),
3219  or_block_(0),
3220  is_range_set_(0)
3221 {
3222  temp_block_ = alloc_.alloc_bit_block();
3223  bit_idx_arr_.resize(bm::gap_max_bits);
3224  this->id_array_ = bit_idx_arr_.data();
3225  gap_temp_block_.resize(bm::gap_max_bits);
3226 
3227  alloc_.set_pool(&pool_);
3228 }
3229 
3230 template<class BV, class DEC>
3232 {
3233  alloc_.free_bit_block(temp_block_);
3234  if (xor_block_)
3235  alloc_.free_bit_block(xor_block_);
3236  BM_ASSERT(!or_block_);
3237 }
3238 
3239 template<class BV, class DEC>
3241 {
3242  ref_vect_ = ref_vect;
3243  if (ref_vect_ && !xor_block_)
3244  xor_block_ = alloc_.alloc_bit_block();
3245 }
3246 
3247 template<class BV, class DEC>
3248 void
3250  bvector_type& bv, blocks_manager_type& bman,
3251  block_idx_type nb,
3252  bm::word_t* blk)
3253 {
3254  gap_word_t gap_head;
3255  bm::gap_word_t* gap_temp_block = gap_temp_block_.data();
3256 
3257  bool inv_flag = false;
3258 
3259  switch (btype)
3260  {
3261  case set_block_gap:
3263  case set_block_gapbit:
3264  {
3265  gap_head = (gap_word_t)
3266  (sizeof(gap_word_t) == 2 ? dec.get_16() : dec.get_32());
3267 
3268  unsigned len = gap_length(&gap_head);
3269  int level = gap_calc_level(len, bman.glen());
3270  --len;
3271  if (level == -1) // Too big to be GAP: convert to BIT block
3272  {
3273  *gap_temp_block = gap_head;
3274  dec.get_16(gap_temp_block+1, len - 1);
3275  gap_temp_block[len] = gap_max_bits - 1;
3276 
3277  if (blk == 0) // block does not exist yet
3278  {
3279  blk = bman.get_allocator().alloc_bit_block();
3280  bman.set_block(nb, blk);
3281  gap_convert_to_bitset(blk, gap_temp_block);
3282  }
3283  else // We have some data already here. Apply OR operation.
3284  {
3285  gap_convert_to_bitset(temp_block_,
3286  gap_temp_block);
3287  bv.combine_operation_with_block(nb,
3288  temp_block_,
3289  0,
3290  BM_OR);
3291  }
3292  return;
3293  } // level == -1
3294 
3295  set_gap_level(&gap_head, level);
3296 
3297  if (blk == 0)
3298  {
3299  BM_ASSERT(level >= 0);
3300  gap_word_t* gap_blk =
3301  bman.get_allocator().alloc_gap_block(unsigned(level), bman.glen());
3302  gap_word_t* gap_blk_ptr = BMGAP_PTR(gap_blk);
3303  *gap_blk_ptr = gap_head;
3304  bm::set_gap_level(gap_blk_ptr, level);
3305  blk = bman.set_block(nb, (bm::word_t*)BMPTR_SETBIT0(gap_blk));
3306  BM_ASSERT(blk == 0);
3307 
3308  dec.get_16(gap_blk + 1, len - 1);
3309  gap_blk[len] = bm::gap_max_bits - 1;
3310  }
3311  else // target block exists
3312  {
3313  // read GAP block into a temp memory and perform OR
3314  *gap_temp_block = gap_head;
3315  dec.get_16(gap_temp_block + 1, len - 1);
3316  gap_temp_block[len] = bm::gap_max_bits - 1;
3317  break;
3318  }
3319  return;
3320  }
3322  inv_flag = true;
3324  case set_block_arrgap:
3331  {
3332  unsigned arr_len = this->read_id_list(dec, btype, this->id_array_);
3333  gap_temp_block[0] = 0; // reset unused bits in gap header
3334  unsigned gap_len =
3335  bm::gap_set_array(gap_temp_block, this->id_array_, arr_len);
3336  if (inv_flag)
3337  bm::gap_invert(gap_temp_block);
3338 
3339  BM_ASSERT(gap_len == bm::gap_length(gap_temp_block));
3340  int level = bm::gap_calc_level(gap_len, bman.glen());
3341  if (level == -1) // Too big to be GAP: convert to BIT block
3342  {
3343  bm::gap_convert_to_bitset(temp_block_, gap_temp_block);
3344  bv.combine_operation_with_block(nb,
3345  temp_block_,
3346  0,
3347  BM_OR);
3348  return;
3349  }
3350  break;
3351  }
3352  case set_block_gap_egamma:
3353  gap_head = dec.get_16();
3357  case set_block_arrgap_inv:
3360  this->read_gap_block(dec, btype, gap_temp_block, gap_head);
3361  break;
3365  gap_head = dec.get_16();
3366  this->read_gap_block(dec, btype, gap_temp_block, gap_head);
3367  break;
3368  default:
3369  BM_ASSERT(0);
3370  #ifndef BM_NO_STL
3371  throw std::logic_error(this->err_msg());
3372  #else
3373  BM_THROW(BM_ERR_SERIALFORMAT);
3374  #endif
3375  }
3376 
3377  bv.combine_operation_with_block(nb,
3378  (bm::word_t*)gap_temp_block,
3379  1,
3380  BM_OR);
3381 }
3382 
3383 template<class BV, class DEC>
3385  decoder_type& dec,
3386  blocks_manager_type& bman,
3387  block_idx_type nb,
3388  bm::word_t* blk)
3389 {
3390  if (!blk)
3391  {
3392  blk = bman.get_allocator().alloc_bit_block();
3393  bman.set_block(nb, blk);
3394  bm::bit_block_set(blk, 0);
3395  }
3396  else
3397  if (BM_IS_GAP(blk))
3398  blk = bman.deoptimize_block(nb);
3399 
3400  BM_ASSERT(blk != temp_block_);
3401 
3402  switch (btype)
3403  {
3404  case set_block_arrbit_inv:
3405  if (IS_FULL_BLOCK(blk))
3406  blk = bman.deoptimize_block(nb);
3407  bm::bit_block_set(temp_block_, ~0u);
3408  {
3409  gap_word_t len = dec.get_16();
3410  for (unsigned k = 0; k < len; ++k)
3411  {
3412  gap_word_t bit_idx = dec.get_16();
3413  bm::clear_bit(temp_block_, bit_idx);
3414  } // for
3415  }
3416  bm::bit_block_or(blk, temp_block_);
3417  break;
3419  this->read_bic_arr(dec, blk);
3420  break;
3422  BM_ASSERT(blk != temp_block_);
3423  if (IS_FULL_BLOCK(blk))
3424  blk = bman.deoptimize_block(nb);
3425  // TODO: optimization
3426  bm::bit_block_set(temp_block_, 0);
3427  this->read_bic_arr(dec, temp_block_);
3428  bm::bit_invert(temp_block_);
3429  bm::bit_block_or(blk, temp_block_);
3430  break;
3432  this->read_bic_gap(dec, blk);
3433  break;
3435  this->read_digest0_block(dec, blk);
3436  break;
3437  default:
3438  BM_ASSERT(0);
3439  #ifndef BM_NO_STL
3440  throw std::logic_error(this->err_msg());
3441  #else
3442  BM_THROW(BM_ERR_SERIALFORMAT);
3443  #endif
3444  } // switch
3445 }
3446 
3447 template<class BV, class DEC>
3449  bvector_type& bv,
3450  block_idx_type nb,
3451  bm::word_t* blk)
3452 {
3453  if (!blk)
3454  {
3455  blocks_manager_type& bman = bv.get_blocks_manager();
3456  blk = bman.get_allocator().alloc_bit_block();
3457  bman.set_block(nb, blk);
3458  dec.get_32(blk, bm::set_block_size);
3459  }
3460  else
3461  {
3462  dec.get_32(temp_block_, bm::set_block_size);
3463  bv.combine_operation_with_block(nb, temp_block_, 0, BM_OR);
3464  }
3465 }
3466 
3467 template<class BV, class DEC>
3469  bvector_type& bv,
3470  block_idx_type nb,
3471  bm::word_t* blk)
3472 {
3473  unsigned head_idx = dec.get_16();
3474  unsigned tail_idx = dec.get_16();
3475  if (!blk)
3476  {
3477  blocks_manager_type& bman = bv.get_blocks_manager();
3478  blk = bman.get_allocator().alloc_bit_block();
3479  bman.set_block(nb, blk);
3480  for (unsigned k = 0; k < head_idx; ++k)
3481  blk[k] = 0;
3482  dec.get_32(blk + head_idx, tail_idx - head_idx + 1);
3483  for (unsigned j = tail_idx + 1; j < bm::set_block_size; ++j)
3484  blk[j] = 0;
3485  }
3486  else
3487  {
3488  bm::bit_block_set(temp_block_, 0);
3489  dec.get_32(temp_block_ + head_idx, tail_idx - head_idx + 1);
3490  bv.combine_operation_with_block(nb, temp_block_, 0, BM_OR);
3491  }
3492 }
3493 
3494 template<class BV, class DEC>
3496  bvector_type& bv,
3497  block_idx_type nb,
3498  bm::word_t* blk)
3499 {
3500  blocks_manager_type& bman = bv.get_blocks_manager();
3501  bm::gap_word_t len = dec.get_16();
3502  if (BM_IS_GAP(blk))
3503  {
3504  blk = bman.deoptimize_block(nb);
3505  }
3506  else
3507  {
3508  if (!blk) // block does not exists yet
3509  {
3510  blk = bman.get_allocator().alloc_bit_block();
3511  bm::bit_block_set(blk, 0);
3512  bman.set_block(nb, blk);
3513  }
3514  else
3515  if (IS_FULL_BLOCK(blk)) // nothing to do
3516  {
3517  for (unsigned k = 0; k < len; ++k) // dry read
3518  dec.get_16();
3519  return;
3520  }
3521  }
3522  // Get the array one by one and set the bits.
3523  for (unsigned k = 0; k < len; ++k)
3524  {
3525  gap_word_t bit_idx = dec.get_16();
3526  bm::set_bit(blk, bit_idx);
3527  }
3528 }
3529 
3530 template<class BV, class DEC>
3532  const unsigned char* buf,
3533  bm::word_t* /*temp_block*/)
3534 {
3535  blocks_manager_type& bman = bv.get_blocks_manager();
3536  if (!bman.is_init())
3537  bman.init_tree();
3538 
3539  bm::word_t* temp_block = temp_block_;
3540 
3541  bm::strategy strat = bv.get_new_blocks_strat();
3542  bv.set_new_blocks_strat(BM_GAP);
3543  typename bvector_type::mem_pool_guard mp_guard_bv;
3544  mp_guard_bv.assign_if_not_set(pool_, bv);
3545 
3546 
3547  decoder_type dec(buf);
3548 
3549  // Reading th serialization header
3550  //
3551  unsigned char header_flag = dec.get_8();
3552  if (!(header_flag & BM_HM_NO_BO))
3553  {
3554  /*ByteOrder bo = (bm::ByteOrder)*/dec.get_8();
3555  }
3556  if (header_flag & BM_HM_64_BIT)
3557  {
3558  #ifndef BM64ADDR
3559  BM_ASSERT(0); // 64-bit address vector cannot read on 32
3560  #ifndef BM_NO_STL
3561  throw std::logic_error(this->err_msg());
3562  #else
3563  BM_THROW(BM_ERR_SERIALFORMAT);
3564  #endif
3565  #endif
3566  }
3567 
3568  if (header_flag & BM_HM_ID_LIST)
3569  {
3570  // special case: the next comes plain list of integers
3571  if (header_flag & BM_HM_RESIZE)
3572  {
3573  block_idx_type bv_size;
3574  if (header_flag & BM_HM_64_BIT)
3575  {
3576  BM_ASSERT(sizeof(block_idx_type)==8);
3577  bv_size = (block_idx_type)dec.get_64();
3578  }
3579  else
3580  bv_size = dec.get_32();
3581  if (bv_size > bv.size())
3582  bv.resize(bv_size);
3583  }
3584  for (unsigned cnt = dec.get_32(); cnt; --cnt)
3585  {
3586  bm::id_t idx = dec.get_32();
3587  bv.set(idx);
3588  } // for
3589  // -1 for compatibility with other deserialization branches
3590  return dec.size()-1;
3591  }
3592 
3593  if (!(header_flag & BM_HM_NO_GAPL))
3594  {
3595  for (unsigned k = 0; k < bm::gap_levels; ++k) // read GAP levels
3596  {
3597  /*glevels[i] =*/ dec.get_16();
3598  }
3599  }
3600  if (header_flag & BM_HM_RESIZE)
3601  {
3602  block_idx_type bv_size;
3603  if (header_flag & BM_HM_64_BIT)
3604  {
3605  // 64-bit vector cannot be deserialized into 32-bit
3606  BM_ASSERT(sizeof(block_idx_type)==8);
3607  bv_size = (block_idx_type)dec.get_64();
3608  #ifndef BM64ADDR
3609  #ifndef BM_NO_STL
3610  throw std::logic_error(this->err_msg());
3611  #else
3612  BM_THROW(BM_ERR_SERIALFORMAT);
3613  #endif
3614  #endif
3615  }
3616  else
3617  bv_size = dec.get_32();
3618  if (bv_size > bv.size())
3619  bv.resize(bv_size);
3620  }
3621 
3622  if (header_flag & BM_HM_HXOR) // XOR compression mode
3623  {
3624  if (!xor_block_)
3625  xor_block_ = alloc_.alloc_bit_block();
3626  }
3627 
3628  size_type full_blocks = 0;
3629 
3630  // reference XOR compression FSM fields
3631  //
3632  size_type x_ref_idx = 0;
3633  bm::id64_t x_ref_d64 = 0;
3634  block_idx_type x_nb = 0;
3635  bool x_ref_gap = false;
3636  unsigned row_idx;
3637 
3638  block_idx_type nb_sync;
3639 
3640  unsigned char btype;
3641  block_idx_type nb;
3642  unsigned i0, j0;
3643 
3644  block_idx_type i = 0;
3645  do
3646  {
3647  if (is_range_set_)
3648  {
3649  block_idx_type nb_to = (idx_to_ >> bm::set_block_shift);
3650  if (i > nb_to)
3651  break; // early exit
3652  }
3653 
3654  btype = dec.get_8();
3655  if (btype & (1 << 7)) // check if short zero-run packed here
3656  {
3657  nb = btype & ~(1 << 7);
3658  BM_ASSERT(nb);
3659  i += nb;
3660  continue;
3661  }
3662  bm::get_block_coord(i, i0, j0);
3663  bm::word_t* blk = bman.get_block_ptr(i0, j0);
3664 
3665  switch (btype)
3666  {
3667  case set_block_azero:
3668  case set_block_end:
3670  break;
3671  case set_block_1zero:
3672  break;
3673  case set_block_8zero:
3674  nb = dec.get_8();
3675  BM_ASSERT(nb);
3676  i += nb;
3677  continue; // bypass ++i;
3678  case set_block_16zero:
3679  nb = dec.get_16();
3680  BM_ASSERT(nb);
3681  i += nb;
3682  continue; // bypass ++i;
3683  case set_block_32zero:
3684  nb = dec.get_32();
3685  BM_ASSERT(nb);
3686  i += nb;
3687  continue; // bypass ++i;
3688  case set_block_64zero:
3689  #ifdef BM64ADDR
3690  nb = dec.get_64();
3691  BM_ASSERT(nb);
3692  i += nb;
3693  continue; // bypass ++i;
3694  #else
3695  BM_ASSERT(0); // attempt to read 64-bit vector in 32-bit mode
3696  #ifndef BM_NO_STL
3697  throw std::logic_error(this->err_msg());
3698  #else
3699  BM_THROW(BM_ERR_SERIALFORMAT);
3700  #endif
3701  //i = bm::set_total_blocks;
3702  #endif
3703  break;
3704  case set_block_aone:
3705  bman.set_all_set(i, bm::set_total_blocks-1);
3707  break;
3708  case set_block_1one:
3709  bman.set_block_all_set(i);
3710  break;
3711  case set_block_8one:
3712  full_blocks = dec.get_8();
3713  goto process_full_blocks;
3714  break;
3715  case set_block_16one:
3716  full_blocks = dec.get_16();
3717  goto process_full_blocks;
3718  break;
3719  case set_block_32one:
3720  full_blocks = dec.get_32();
3721  goto process_full_blocks;
3722  break;
3723  case set_block_64one:
3724  #ifdef BM64ADDR
3725  full_blocks = dec.get_64();
3726  goto process_full_blocks;
3727  #else
3728  BM_ASSERT(0); // 32-bit vector cannot read 64-bit
3729  dec.get_64();
3730  #ifndef BM_NO_STL
3731  throw std::logic_error(this->err_msg());
3732  #else
3733  BM_THROW(BM_ERR_SERIALFORMAT);
3734  #endif
3735  #endif
3736  process_full_blocks:
3737  {
3738  BM_ASSERT(full_blocks);
3739  size_type from = i * bm::gap_max_bits;
3740  size_type to = from + full_blocks * bm::gap_max_bits;
3741  bv.set_range(from, to-1);
3742  i += full_blocks-1;
3743  }
3744  break;
3745  case set_block_bit:
3746  decode_block_bit(dec, bv, i, blk);
3747  break;
3748  case set_block_bit_1bit:
3749  {
3750  size_type bit_idx = dec.get_16();
3751  bit_idx += i * bm::bits_in_block;
3752  bv.set_bit_no_check(bit_idx);
3753  break;
3754  }
3755  case set_block_bit_0runs:
3756  {
3757  //TODO: optimization if block exists
3758  this->read_0runs_block(dec, temp_block);
3759  bv.combine_operation_with_block(i, temp_block, 0, BM_OR);
3760  break;
3761  }
3762  case set_block_bit_interval:
3763  decode_block_bit_interval(dec, bv, i, blk);
3764  break;
3765  case set_block_gap:
3766  case set_block_gapbit:
3767  case set_block_arrgap:
3768  case set_block_gap_egamma:
3771  case set_block_arrgap_inv:
3772  case set_block_gap_bienc:
3778  deserialize_gap(btype, dec, bv, bman, i, blk);
3779  break;
3780  case set_block_arrbit:
3781  decode_arrbit(dec, bv, i, blk);
3782  break;
3787  decode_bit_block(btype, dec, bman, i, blk);
3788  break;
3790  decode_bit_block(btype, dec, bman, i, blk);
3791  break;
3792 
3793  // --------------------------------------- bookmarks and skip jumps
3794  //
3795  case set_nb_bookmark32:
3796  this->bookmark_idx_ = i;
3797  this->skip_offset_ = dec.get_32();
3798  goto process_bookmark;
3799  case set_nb_bookmark24:
3800  this->bookmark_idx_ = i;
3801  this->skip_offset_ = dec.get_24();
3802  goto process_bookmark;
3803  case set_nb_bookmark16:
3804  this->bookmark_idx_ = i;
3805  this->skip_offset_ = dec.get_16();
3806  process_bookmark:
3807  if (is_range_set_)
3808  {
3809  this->skip_pos_ = dec.get_pos() + this->skip_offset_;
3810  block_idx_type nb_from = (idx_from_ >> bm::set_block_shift);
3811  nb_from = this->try_skip(dec, i, nb_from);
3812  if (nb_from)
3813  i = nb_from;
3814  }
3815  continue; // bypass ++i;
3816 
3817  case set_nb_sync_mark8:
3818  nb_sync = dec.get_8();
3819  goto process_nb_sync;
3820  case set_nb_sync_mark16:
3821  nb_sync = dec.get_16();
3822  goto process_nb_sync;
3823  case set_nb_sync_mark24:
3824  nb_sync = dec.get_24();
3825  goto process_nb_sync;
3826  case set_nb_sync_mark32:
3827  nb_sync = dec.get_32();
3828  goto process_nb_sync;
3829  case set_nb_sync_mark48:
3830  nb_sync = block_idx_type(dec.get_48());
3831  goto process_nb_sync;
3832  case set_nb_sync_mark64:
3833  nb_sync = block_idx_type(dec.get_64());
3834  process_nb_sync:
3835  BM_ASSERT(i == this->bookmark_idx_ + nb_sync);
3836  if (i != this->bookmark_idx_ + nb_sync)
3837  {
3838  #ifndef BM_NO_STL
3839  throw std::logic_error(this->err_msg());
3840  #else
3841  BM_THROW(BM_ERR_SERIALFORMAT);
3842  #endif
3843  }
3844 
3845  continue; // bypass ++i;
3846 
3847  // ------------------------------------ XOR compression markers
3848  case bm::set_block_ref_eq:
3849  {
3850  BM_ASSERT(ref_vect_); // reference vector has to be attached
3851  if (x_ref_d64 || x_ref_gap) // previous delayed XOR post proc.
3852  {
3853  xor_decode(x_ref_idx, x_ref_d64, bman, x_nb);
3854  x_ref_d64 = 0; x_ref_gap = false;
3855  }
3856 
3857  row_idx = dec.get_32();
3858  size_type idx = ref_vect_->find(row_idx);
3859  if (idx == ref_vect_->not_found())
3860  {
3861  BM_ASSERT(0);
3862  goto throw_err;
3863  }
3864  const bvector_type* ref_bv = ref_vect_->get_bv(idx);
3865  BM_ASSERT(ref_bv);
3866  const blocks_manager_type& ref_bman = ref_bv->get_blocks_manager();
3867  const bm::word_t* ref_blk = ref_bman.get_block_ptr(i0, j0);
3868  if (ref_blk)
3869  bv.combine_operation_with_block(i, ref_blk,
3870  BM_IS_GAP(ref_blk), BM_OR);
3871  break;
3872  }
3874  if (x_ref_d64 || x_ref_gap) // previous delayed XOR post proc.
3875  {
3876  xor_decode(x_ref_idx, x_ref_d64, bman, x_nb);
3877  x_ref_d64 = 0; x_ref_gap = false;
3878  }
3879  row_idx = dec.get_8();
3880  goto process_xor;
3882  if (x_ref_d64 || x_ref_gap) // previous delayed XOR post proc.
3883  {
3884  xor_decode(x_ref_idx, x_ref_d64, bman, x_nb);
3885  x_ref_d64 = 0; x_ref_gap = false;
3886  }
3887  row_idx = dec.get_16();
3888  goto process_xor;
3890  {
3891  if (x_ref_d64 || x_ref_gap) // previous delayed XOR post proc.
3892  {
3893  xor_decode(x_ref_idx, x_ref_d64, bman, x_nb);
3894  x_ref_d64 = 0; x_ref_gap = false;
3895  }
3896  row_idx = dec.get_32();
3897  process_xor:
3898  x_ref_d64 = dec.get_64();
3899  process_xor_ref:
3900  BM_ASSERT(ref_vect_); // reference vector has to be attached
3901  x_ref_idx = ref_vect_->find(row_idx);
3902  x_nb = i;
3903  if (x_ref_idx == ref_vect_->not_found())
3904  {
3905  BM_ASSERT(0);
3906  goto throw_err;
3907  }
3908  if (blk)
3909  {
3910  or_block_ = bman.deoptimize_block(i);
3911  bman.set_block_ptr(i, 0); // borrow OR target before XOR
3912  }
3913  }
3914  continue; // important! cont to avoid inc(i)
3916  if (x_ref_d64 || x_ref_gap) // previous delayed XOR post proc.
3917  {
3918  xor_decode(x_ref_idx, x_ref_d64, bman, x_nb);
3919  x_ref_d64 = 0; x_ref_gap = false;
3920  }
3921  row_idx = dec.get_8();
3922  x_ref_gap = true;
3923  goto process_xor_ref;
3925  if (x_ref_d64 || x_ref_gap) // previous delayed XOR post proc.
3926  {
3927  xor_decode(x_ref_idx, x_ref_d64, bman, x_nb);
3928  x_ref_d64 = 0; x_ref_gap = false;
3929  }
3930  row_idx = dec.get_16();
3931  x_ref_gap = true;
3932  goto process_xor_ref;
3934  if (x_ref_d64 || x_ref_gap) // previous delayed XOR post proc.
3935  {
3936  xor_decode(x_ref_idx, x_ref_d64, bman, x_nb);
3937  x_ref_d64 = 0; x_ref_gap = false;
3938  }
3939  row_idx = dec.get_32();
3940  x_ref_gap = true;
3941  goto process_xor_ref;
3942 
3943  default:
3944  BM_ASSERT(0); // unknown block type
3945  throw_err:
3946  #ifndef BM_NO_STL
3947  throw std::logic_error(this->err_msg());
3948  #else
3949  BM_THROW(BM_ERR_SERIALFORMAT);
3950  #endif
3951  } // switch
3952 
3953  ++i;
3954  } while (i < bm::set_total_blocks);
3955 
3956  // process the last delayed XOR ref here
3957  //
3958  if (x_ref_d64 || x_ref_gap) // XOR post proc.
3959  xor_decode(x_ref_idx, x_ref_d64, bman, x_nb);
3960 
3961  bv.set_new_blocks_strat(strat);
3962 
3963  return dec.size();
3964 }
3965 
3966 // ---------------------------------------------------------------------------
3967 
3968 template<class BV, class DEC>
3970  blocks_manager_type& bman,
3971  block_idx_type nb)
3972 {
3973  BM_ASSERT(ref_vect_);
3974 
3975  unsigned i0, j0;
3976 
3977  const bvector_type* ref_bv = ref_vect_->get_bv(x_ref_idx);
3978  BM_ASSERT(ref_bv);
3979  const blocks_manager_type& ref_bman = ref_bv->get_blocks_manager();
3980  BM_ASSERT(&ref_bman != &bman);
3981  bm::get_block_coord(nb, i0, j0);
3982 
3983  const bm::word_t* ref_blk = ref_bman.get_block_ptr(i0, j0);
3984  if (!ref_blk)
3985  {
3986  BM_ASSERT(!or_block_);
3987  return;
3988  }
3989 
3990  if (BM_IS_GAP(ref_blk))
3991  {
3992  bm::gap_word_t* gap_block = BMGAP_PTR(ref_blk);
3993  bm::gap_convert_to_bitset(xor_block_, gap_block);
3994  ref_blk = xor_block_;
3995  }
3996  else
3997  {
3998  if (IS_FULL_BLOCK(ref_blk))
3999  ref_blk = FULL_BLOCK_REAL_ADDR;
4000  }
4001 
4002  bm::word_t* blk = bman.deoptimize_block(nb);
4003  if (!blk)
4004  {
4005  blk = bman.check_allocate_block(nb, 0);
4006  }
4007  if (x_ref_d64)
4008  bm::bit_block_xor(blk, blk, ref_blk, x_ref_d64);
4009  else
4010  bm::bit_block_xor(blk, ref_blk);
4011 
4012  if (or_block_)
4013  {
4014  bm::bit_block_or(blk, or_block_);
4015  alloc_.free_bit_block(or_block_);
4016  or_block_ = 0;
4017  bman.optimize_bit_block(i0, j0);
4018  }
4019 }
4020 
4021 
4022 // ---------------------------------------------------------------------------
4023 
4024 template<typename DEC, typename BLOCK_IDX>
4026  : decoder_(buf),
4027  end_of_stream_(false),
4028  bv_size_(0),
4029  state_(e_unknown),
4030  id_cnt_(0),
4031  block_idx_(0),
4032  mono_block_cnt_(0),
4033  block_idx_arr_(0)
4034 {
4035  ::memset(bit_func_table_, 0, sizeof(bit_func_table_));
4036 
4039 
4064 
4065 
4066  // reading stream header
4067  unsigned char header_flag = decoder_.get_8();
4068  if (!(header_flag & BM_HM_NO_BO))
4069  {
4070  /*ByteOrder bo = (bm::ByteOrder)*/decoder_.get_8();
4071  }
4072 
4073  // check if bitvector comes as an inverted, sorted list of ints
4074  //
4075  if (header_flag & BM_HM_ID_LIST)
4076  {
4077  // special case: the next comes plain list of unsigned integers
4078  if (header_flag & BM_HM_RESIZE)
4079  {
4080  if (header_flag & BM_HM_64_BIT)
4081  {
4082  BM_ASSERT(sizeof(block_idx_type)==8);
4083  bv_size_ = (block_idx_type)decoder_.get_64();
4084  }
4085  else
4086  bv_size_ = decoder_.get_32();
4087  }
4088 
4089  state_ = e_list_ids;
4090  id_cnt_ = decoder_.get_32();
4091  next(); // read first id
4092  }
4093  else
4094  {
4095  if (!(header_flag & BM_HM_NO_GAPL))
4096  {
4097  for (unsigned i = 0; i < bm::gap_levels; ++i) // load the GAP levels
4098  glevels_[i] = decoder_.get_16();
4099  }
4100  if (header_flag & BM_HM_RESIZE)
4101  {
4102  if (header_flag & BM_HM_64_BIT)
4103  {
4104  BM_ASSERT(sizeof(block_idx_type)==8);
4105  bv_size_ = (block_idx_type)decoder_.get_64();
4106  }
4107  else
4108  bv_size_ = decoder_.get_32();
4109  }
4110  state_ = e_blocks;
4111  }
4112  block_idx_arr_=(gap_word_t*)::malloc(sizeof(gap_word_t) * bm::gap_max_bits);
4113  if (!block_idx_arr_)
4114  {
4115  #ifndef BM_NO_STL
4116  throw std::bad_alloc();
4117  #else
4118  BM_THROW(BM_ERR_BADALLOC);
4119  #endif
4120  }
4121  this->id_array_ = block_idx_arr_;
4122 }
4123 
4124 template<typename DEC, typename BLOCK_IDX>
4126 {
4127  if (block_idx_arr_)
4128  ::free(block_idx_arr_);
4129 }
4130 
4131 
4132 template<typename DEC, typename BLOCK_IDX>
4134 {
4135  if (is_eof())
4136  {
4137  ++block_idx_;
4138  return;
4139  }
4140  block_idx_type nb_sync;
4141 
4142  switch (state_)
4143  {
4144  case e_list_ids:
4145  // read inverted ids one by one
4146  if (id_cnt_ == 0)
4147  {
4148  end_of_stream_ = true;
4149  state_ = e_unknown;
4150  break;
4151  }
4152  last_id_ = decoder_.get_32();
4153  --id_cnt_;
4154  break;
4155 
4156  case e_blocks:
4157  if (block_idx_ == bm::set_total_blocks)
4158  {
4159  end_of_stream_ = true;
4160  state_ = e_unknown;
4161  break;
4162  }
4163 
4164  block_type_ = decoder_.get_8();
4165 
4166  // pre-check for 7-bit zero block
4167  //
4168  if (block_type_ & (1u << 7u))
4169  {
4170  mono_block_cnt_ = (block_type_ & ~(1u << 7u)) - 1;
4171  state_ = e_zero_blocks;
4172  break;
4173  }
4174 
4175  switch (block_type_)
4176  {
4177  case set_block_azero:
4178  case set_block_end:
4179  end_of_stream_ = true; state_ = e_unknown;
4180  break;
4181  case set_block_1zero:
4182  state_ = e_zero_blocks;
4183  mono_block_cnt_ = 0;
4184  break;
4185  case set_block_8zero:
4186  state_ = e_zero_blocks;
4187  mono_block_cnt_ = decoder_.get_8()-1;
4188  break;
4189  case set_block_16zero:
4190  state_ = e_zero_blocks;
4191  mono_block_cnt_ = decoder_.get_16()-1;
4192  break;
4193  case set_block_32zero:
4194  state_ = e_zero_blocks;
4195  mono_block_cnt_ = decoder_.get_32()-1;
4196  break;
4197  case set_block_aone:
4198  state_ = e_one_blocks;
4199  mono_block_cnt_ = bm::set_total_blocks - block_idx_;
4200  break;
4201  case set_block_1one:
4202  state_ = e_one_blocks;
4203  mono_block_cnt_ = 0;
4204  break;
4205  case set_block_8one:
4206  state_ = e_one_blocks;
4207  mono_block_cnt_ = decoder_.get_8()-1;
4208  break;
4209  case set_block_16one:
4210  state_ = e_one_blocks;
4211  mono_block_cnt_ = decoder_.get_16()-1;
4212  break;
4213  case set_block_32one:
4214  state_ = e_one_blocks;
4215  mono_block_cnt_ = decoder_.get_32()-1;
4216  break;
4217  case set_block_64one:
4218  state_ = e_one_blocks;
4219  mono_block_cnt_ = block_idx_type(decoder_.get_64()-1);
4220  break;
4221 
4222  case bm::set_block_bit:
4225  case bm::set_block_arrbit:
4231  state_ = e_bit_block;
4232  break;
4233  case set_block_gap:
4235  case set_block_gap_egamma:
4237  case set_block_gap_bienc:
4240  gap_head_ = decoder_.get_16();
4242  case set_block_arrgap:
4248  case set_block_arrgap_inv:
4250  case set_block_bit_1bit:
4259  state_ = e_gap_block;
4260  break;
4261  case set_block_gapbit:
4262  state_ = e_gap_block; //e_bit_block; // TODO: make a better decision here
4263  break;
4264 
4265  // --------------------------------------------- bookmarks and syncs
4266  //
4267  case set_nb_bookmark32:
4268  this->bookmark_idx_ = block_idx_;
4269  this->skip_offset_ = decoder_.get_32();
4270  this->skip_pos_ = decoder_.get_pos() + this->skip_offset_;
4271  break;
4272  case set_nb_bookmark24:
4273  this->bookmark_idx_ = block_idx_;
4274  this->skip_offset_ = decoder_.get_24();
4275  this->skip_pos_ = decoder_.get_pos() + this->skip_offset_;
4276  break;
4277  case set_nb_bookmark16:
4278  this->bookmark_idx_ = block_idx_;
4279  this->skip_offset_ = decoder_.get_16();
4280  this->skip_pos_ = decoder_.get_pos() + this->skip_offset_;
4281  break;
4282 
4283  case set_nb_sync_mark8:
4284  nb_sync = decoder_.get_8();
4285  goto process_nb_sync;
4286  case set_nb_sync_mark16:
4287  nb_sync = decoder_.get_16();
4288  goto process_nb_sync;
4289  case set_nb_sync_mark24:
4290  nb_sync = decoder_.get_24();
4291  goto process_nb_sync;
4292  case set_nb_sync_mark32:
4293  nb_sync = decoder_.get_32();
4294  goto process_nb_sync;
4295  case set_nb_sync_mark48:
4296  nb_sync = block_idx_type(decoder_.get_48());
4297  goto process_nb_sync;
4298  case set_nb_sync_mark64:
4299  nb_sync = block_idx_type(decoder_.get_64());
4300  process_nb_sync:
4301  BM_ASSERT(block_idx_ == this->bookmark_idx_ + nb_sync);
4302  if (block_idx_ != this->bookmark_idx_ + nb_sync)
4303  {
4304  #ifndef BM_NO_STL
4305  throw std::logic_error(this->err_msg());
4306  #else
4307  BM_THROW(BM_ERR_SERIALFORMAT);
4308  #endif
4309  }
4310  break;
4311 
4312  // --------------------------------------------------------------
4313  // XOR deserials are incompatible with the operation deserialial
4314  // throw or assert here
4315  //
4316  case set_block_ref_eq:
4317  case set_block_xor_ref8:
4318  case set_block_xor_ref16:
4319  case set_block_xor_ref32:
4321  // fall through
4322  default:
4323  BM_ASSERT(0);
4324  #ifndef BM_NO_STL
4325  throw std::logic_error(this->err_msg());
4326  #else
4327  BM_THROW(BM_ERR_SERIALFORMAT);
4328  #endif
4329  } // switch
4330 
4331  break;
4332 
4333  case e_zero_blocks:
4334  case e_one_blocks:
4335  ++block_idx_;
4336  if (!mono_block_cnt_)
4337  {
4338  state_ = e_blocks; // get new block token
4339  break;
4340  }
4341  --mono_block_cnt_;
4342  break;
4343 
4344  case e_unknown:
4345  default:
4346  BM_ASSERT(0);
4347  #ifndef BM_NO_STL
4348  throw std::logic_error(this->err_msg());
4349  #else
4350  BM_THROW(BM_ERR_SERIALFORMAT);
4351  #endif
4352  } // switch
4353 }
4354 
4355 template<typename DEC, typename BLOCK_IDX>
4358 {
4359  BM_ASSERT(state_ == e_zero_blocks || state_ == e_one_blocks);
4360  if (!mono_block_cnt_)
4361  ++block_idx_;
4362  else
4363  {
4364  block_idx_ += mono_block_cnt_+1;
4365  mono_block_cnt_ = 0;
4366  }
4367  state_ = e_blocks;
4368  return block_idx_;
4369 }
4370 
4371 template<typename DEC, typename BLOCK_IDX>
4372 void
4374 {
4375  gap_word_t len = decoder_.get_16();
4376  if (block)
4377  {
4378  bm::bit_block_set(block, ~0u);
4379  for (unsigned k = 0; k < len; ++k)
4380  {
4381  bm::gap_word_t bit_idx = decoder_.get_16();
4382  bm::clear_bit(block, bit_idx);
4383  }
4384  }
4385  else // dry read
4386  {
4387  for (unsigned k = 0; k < len; ++k)
4388  decoder_.get_16();
4389  }
4390 }
4391 
4392 
4393 template<typename DEC, typename BLOCK_IDX>
4394 unsigned
4396  bm::word_t* dst_block,
4397  bm::word_t* tmp_block)
4398 {
4399  // ASSIGN should be ready for dry read (dst_block can be NULL)
4400  //
4401  BM_ASSERT(this->state_ == e_bit_block);
4402  unsigned count = 0;
4403 
4404  switch (this->block_type_)
4405  {
4406  case set_block_bit:
4407  decoder_.get_32(dst_block, bm::set_block_size);
4408  break;
4409  case set_block_bit_0runs:
4410  {
4411  if (IS_VALID_ADDR(dst_block))
4412  bm::bit_block_set(dst_block, 0);
4413  unsigned char run_type = decoder_.get_8();
4414  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
4415  {
4416  unsigned run_length = decoder_.get_16();
4417  if (run_type)
4418  {
4419  decoder_.get_32(dst_block ? dst_block + j : dst_block, run_length);
4420  }
4421  j += run_length;
4422  } // for
4423  }
4424  break;
4426  {
4427  unsigned head_idx = decoder_.get_16();
4428  unsigned tail_idx = decoder_.get_16();
4429  if (dst_block)
4430  {
4431  for (unsigned i = 0; i < head_idx; ++i)
4432  dst_block[i] = 0;
4433  decoder_.get_32(dst_block + head_idx,
4434  tail_idx - head_idx + 1);
4435  for (unsigned j = tail_idx + 1; j < bm::set_block_size; ++j)
4436  dst_block[j] = 0;
4437  }
4438  else
4439  {
4440  int pos = int(tail_idx - head_idx) + 1;
4441  pos *= 4u;
4442  decoder_.seek(pos);
4443  }
4444  }
4445  break;
4446  case set_block_arrbit:
4447  case set_block_bit_1bit:
4448  get_arr_bit(dst_block, true /*clear target*/);
4449  break;
4450  case set_block_gapbit:
4451  BM_ASSERT(0);
4452  #ifndef BM_NO_STL
4453  throw std::logic_error(this->err_msg());
4454  #else
4455  BM_THROW(BM_ERR_SERIALFORMAT);
4456  #endif
4457  break;
4458  case set_block_arrbit_inv:
4459  get_inv_arr(dst_block);
4460  break;
4462  if (IS_VALID_ADDR(dst_block))
4463  bm::bit_block_set(dst_block, 0);
4464  this->read_bic_arr(decoder_, dst_block);
4465  break;
4467  this->read_bic_arr_inv(decoder_, tmp_block);
4468  if (IS_VALID_ADDR(dst_block))
4469  bm::bit_block_copy(dst_block, tmp_block);
4470  break;
4472  if (IS_VALID_ADDR(dst_block))
4473  bm::bit_block_set(dst_block, 0);
4474  this->read_bic_gap(decoder_, dst_block);
4475  break;
4477  if (IS_VALID_ADDR(dst_block))
4478  bm::bit_block_set(dst_block, 0);
4479  this->read_digest0_block(decoder_, dst_block);
4480  break;
4481  default:
4482  BM_ASSERT(0);
4483  #ifndef BM_NO_STL
4484  throw std::logic_error(this->err_msg());
4485  #else
4486  BM_THROW(BM_ERR_SERIALFORMAT);
4487  #endif
4488  } // switch
4489  return count;
4490 }
4491 
4492 template<typename DEC, typename BLOCK_IDX>
4493 unsigned
4495  bm::word_t* dst_block,
4496  bm::word_t* tmp_block)
4497 {
4498  BM_ASSERT(this->state_ == e_bit_block);
4499  unsigned count = 0;
4500  switch (block_type_)
4501  {
4502  case set_block_bit:
4503  decoder_.get_32_OR(dst_block, bm::set_block_size);
4504  break;
4506  {
4507  unsigned head_idx = decoder_.get_16();
4508  unsigned tail_idx = decoder_.get_16();
4509  for (unsigned i = head_idx; i <= tail_idx; ++i)
4510  dst_block[i] |= decoder_.get_32();
4511  }
4512  break;
4513  case set_block_bit_0runs:
4514  {
4515  unsigned char run_type = decoder_.get_8();
4516  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
4517  {
4518  unsigned run_length = decoder_.get_16();
4519  if (run_type)
4520  {
4521  unsigned run_end = j + run_length;
4522  for (;j < run_end; ++j)
4523  {
4525  dst_block[j] |= decoder_.get_32();
4526  }
4527  }
4528  else
4529  {
4530  j += run_length;
4531  }
4532  } // for
4533  }
4534  break;
4535  case set_block_bit_1bit:
4536  case set_block_arrbit:
4537  get_arr_bit(dst_block, false /*don't clear target*/);
4538  break;
4539  case set_block_arrbit_inv:
4540  get_inv_arr(tmp_block);
4541  bm::bit_block_or(dst_block, tmp_block);
4542  break;
4544  this->read_bic_arr(decoder_, dst_block);
4545  break;
4547  this->read_bic_arr_inv(decoder_, tmp_block);
4548  bm::bit_block_or(dst_block, tmp_block);
4549  break;
4551  this->read_bic_gap(decoder_, dst_block);
4552  break;
4554  this->read_digest0_block(decoder_, dst_block);
4555  break;
4556  default:
4557  BM_ASSERT(0);
4558  #ifndef BM_NO_STL
4559  throw std::logic_error(this->err_msg());
4560  #else
4561  BM_THROW(BM_ERR_SERIALFORMAT);
4562  #endif
4563  } // switch
4564  return count;
4565 }
4566 
4567 template<typename DEC, typename BLOCK_IDX>
4568 unsigned
4570  bm::word_t* BMRESTRICT dst_block,
4571  bm::word_t* BMRESTRICT tmp_block)
4572 {
4573  BM_ASSERT(this->state_ == e_bit_block);
4574  BM_ASSERT(dst_block != tmp_block);
4575  unsigned count = 0;
4576  switch (block_type_)
4577  {
4578  case set_block_bit:
4579  decoder_.get_32_AND(dst_block, bm::set_block_size);
4580  break;
4581  case set_block_bit_0runs:
4582  {
4583  unsigned char run_type = decoder_.get_8();
4584  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
4585  {
4586  unsigned run_length = decoder_.get_16();
4587 
4588  unsigned run_end = j + run_length;
4589  if (run_type)
4590  {
4591  for (;j < run_end; ++j)
4592  {
4594  dst_block[j] &= decoder_.get_32();
4595  }
4596  }
4597  else
4598  {
4599  for (;j < run_end; ++j)
4600  {
4602  dst_block[j] = 0;
4603  }
4604  }
4605  } // for
4606  }
4607  break;
4609  {
4610  unsigned head_idx = decoder_.get_16();
4611  unsigned tail_idx = decoder_.get_16();
4612  unsigned i;
4613  for ( i = 0; i < head_idx; ++i)
4614  dst_block[i] = 0;
4615  for ( i = head_idx; i <= tail_idx; ++i)
4616  dst_block[i] &= decoder_.get_32();
4617  for ( i = tail_idx + 1; i < bm::set_block_size; ++i)
4618  dst_block[i] = 0;
4619  }
4620  break;
4621  case set_block_bit_1bit:
4622  case set_block_arrbit:
4623  get_arr_bit(tmp_block, true /*clear target*/);
4624  if (dst_block)
4625  bm::bit_block_and(dst_block, tmp_block);
4626  break;
4627  case set_block_arrbit_inv:
4628  get_inv_arr(tmp_block);
4629  if (dst_block)
4630  bm::bit_block_and(dst_block, tmp_block);
4631  break;
4632  case set_block_arr_bienc:
4633  if (dst_block)
4634  {
4635  bm::bit_block_set(tmp_block, 0);
4636  this->read_bic_arr(decoder_, tmp_block);
4637  bm::bit_block_and(dst_block, tmp_block);
4638  }
4639  else
4640  this->read_bic_arr(decoder_, 0); // dry read
4641  break;
4643  this->read_bic_arr_inv(decoder_, tmp_block);
4644  if (dst_block)
4645  bm::bit_block_and(dst_block, tmp_block);
4646  break;
4648  if (dst_block)
4649  {
4650  BM_ASSERT(IS_VALID_ADDR(dst_block));
4651  bm::bit_block_set(tmp_block, 0);
4652  this->read_bic_gap(decoder_, tmp_block);
4653  bm::bit_block_and(dst_block, tmp_block);
4654  }
4655  else
4656  this->read_bic_gap(decoder_, 0); // dry read
4657  break;
4659  if (dst_block)
4660  {
4661  BM_ASSERT(IS_VALID_ADDR(dst_block));
4662  bm::bit_block_set(tmp_block, 0);
4663  this->read_digest0_block(decoder_, tmp_block);
4664  bm::bit_block_and(dst_block, tmp_block);
4665  }
4666  else
4667  this->read_digest0_block(decoder_, 0); // dry read
4668  break;
4669  default:
4670  BM_ASSERT(0);
4671  #ifndef BM_NO_STL
4672  throw std::logic_error(this->err_msg());
4673  #else
4674  BM_THROW(BM_ERR_SERIALFORMAT);
4675  #endif
4676  } // switch
4677  return count;
4678 }
4679 
4680 template<typename DEC, typename BLOCK_IDX>
4681 unsigned
4683  bm::word_t* dst_block,
4684  bm::word_t* tmp_block)
4685 {
4686  BM_ASSERT(this->state_ == e_bit_block);
4687  BM_ASSERT(dst_block != tmp_block);
4688 
4689  unsigned count = 0;
4690  switch (block_type_)
4691  {
4692  case set_block_bit:
4693  for (unsigned i = 0; i < bm::set_block_size; ++i)
4694  dst_block[i] ^= decoder_.get_32();
4695  break;
4696  case set_block_bit_0runs:
4697  {
4698  unsigned char run_type = decoder_.get_8();
4699  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
4700  {
4701  unsigned run_length = decoder_.get_16();
4702  if (run_type)
4703  {
4704  unsigned run_end = j + run_length;
4705  for (;j < run_end; ++j)
4706  {
4708  dst_block[j] ^= decoder_.get_32();
4709  }
4710  }
4711  else
4712  {
4713  j += run_length;
4714  }
4715  } // for
4716  }
4717  break;
4719  {
4720  unsigned head_idx = decoder_.get_16();
4721  unsigned tail_idx = decoder_.get_16();
4722  for (unsigned i = head_idx; i <= tail_idx; ++i)
4723  dst_block[i] ^= decoder_.get_32();
4724  }
4725  break;
4726  case set_block_bit_1bit:
4727  // TODO: optimization
4728  case set_block_arrbit:
4729  get_arr_bit(tmp_block, true /*clear target*/);
4730  if (dst_block)
4731  bm::bit_block_xor(dst_block, tmp_block);
4732  break;
4733  case set_block_arrbit_inv:
4734  get_inv_arr(tmp_block);
4735  if (dst_block)
4736  bm::bit_block_xor(dst_block, tmp_block);
4737  break;
4738  case set_block_arr_bienc:
4739  bm::bit_block_set(tmp_block, 0);
4740  this->read_bic_arr(decoder_, tmp_block);
4741  if (dst_block)
4742  bm::bit_block_xor(dst_block, tmp_block);
4743  break;
4745  this->read_bic_arr_inv(decoder_, tmp_block);
4746  if (dst_block)
4747  {
4748  BM_ASSERT(IS_VALID_ADDR(dst_block));
4749  bm::bit_block_xor(dst_block, tmp_block);
4750  }
4751  break;
4753  if (dst_block)
4754  {
4755  BM_ASSERT(IS_VALID_ADDR(dst_block));
4756  bm::bit_block_set(tmp_block, 0);
4757  this->read_bic_gap(decoder_, tmp_block);
4758  bm::bit_block_xor(dst_block, tmp_block);
4759  }
4760  else
4761  this->read_bic_gap(decoder_, 0); // dry read
4762  break;
4764  if (dst_block)
4765  {
4766  BM_ASSERT(IS_VALID_ADDR(dst_block));
4767  bm::bit_block_set(tmp_block, 0);
4768  this->read_digest0_block(decoder_, tmp_block);
4769  bm::bit_block_xor(dst_block, tmp_block);
4770  }
4771  else
4772  this->read_digest0_block(decoder_, 0); // dry read
4773  break;
4774  default:
4775  BM_ASSERT(0);
4776  #ifndef BM_NO_STL
4777  throw std::logic_error(this->err_msg());
4778  #else
4779  BM_THROW(BM_ERR_SERIALFORMAT);
4780  #endif
4781  } // switch
4782  return count;
4783 }
4784 
4785 template<typename DEC, typename BLOCK_IDX>
4786 unsigned
4788  bm::word_t* dst_block,
4789  bm::word_t* tmp_block)
4790 {
4791  BM_ASSERT(this->state_ == e_bit_block);
4792  BM_ASSERT(dst_block != tmp_block);
4793 
4794  unsigned count = 0;
4795  switch (block_type_)
4796  {
4797  case set_block_bit:
4798  for (unsigned i = 0; i < bm::set_block_size; ++i)
4799  dst_block[i] &= ~decoder_.get_32();
4800  break;
4801  case set_block_bit_0runs:
4802  {
4803  unsigned char run_type = decoder_.get_8();
4804  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
4805  {
4806  unsigned run_length = decoder_.get_16();
4807  if (run_type)
4808  {
4809  unsigned run_end = j + run_length;
4810  for (;j < run_end; ++j)
4811  {
4813  dst_block[j] &= ~decoder_.get_32();
4814  }
4815  }
4816  else
4817  {
4818  j += run_length;
4819  }
4820  } // for
4821  }
4822  break;
4824  {
4825  unsigned head_idx = decoder_.get_16();
4826  unsigned tail_idx = decoder_.get_16();
4827  for (unsigned i = head_idx; i <= tail_idx; ++i)
4828  dst_block[i] &= ~decoder_.get_32();
4829  }
4830  break;
4831  case set_block_bit_1bit:
4832  // TODO: optimization
4833  case set_block_arrbit:
4834  get_arr_bit(tmp_block, true /*clear target*/);
4835  if (dst_block)
4836  bm::bit_block_sub(dst_block, tmp_block);
4837  break;
4838  case set_block_arrbit_inv:
4839  get_inv_arr(tmp_block);
4840  if (dst_block)
4841  bm::bit_block_sub(dst_block, tmp_block);
4842  break;
4843  case set_block_arr_bienc:
4844  bm::bit_block_set(tmp_block, 0);
4845  this->read_bic_arr(decoder_, tmp_block);
4846  if (dst_block)
4847  bm::bit_block_sub(dst_block, tmp_block);
4848  break;
4850  this->read_bic_arr_inv(decoder_, tmp_block);
4851  if (dst_block)
4852  bm::bit_block_sub(dst_block, tmp_block);
4853  break;
4855  if (dst_block)
4856  {
4857  BM_ASSERT(IS_VALID_ADDR(dst_block));
4858  bm::bit_block_set(tmp_block, 0);
4859  this->read_bic_gap(decoder_, tmp_block);
4860  bm::bit_block_sub(dst_block, tmp_block);
4861  }
4862  else
4863  this->read_bic_gap(decoder_, 0); // dry read
4864  break;
4866  if (dst_block)
4867  {
4868  BM_ASSERT(IS_VALID_ADDR(dst_block));
4869  bm::bit_block_set(tmp_block, 0);
4870  this->read_digest0_block(decoder_, tmp_block);
4871  bm::bit_block_sub(dst_block, tmp_block);
4872  }
4873  else
4874  this->read_digest0_block(decoder_, 0); // dry read
4875  break;
4876  default:
4877  BM_ASSERT(0);
4878  #ifndef BM_NO_STL
4879  throw std::logic_error(this->err_msg());
4880  #else
4881  BM_THROW(BM_ERR_SERIALFORMAT);
4882  #endif
4883  } // switch
4884  return count;
4885 }
4886 
4887 
4888 template<typename DEC, typename BLOCK_IDX>
4889 unsigned
4891  bm::word_t* /*dst_block*/,
4892  bm::word_t* tmp_block)
4893 {
4894  BM_ASSERT(this->state_ == e_bit_block);
4895 
4896  unsigned count = 0;
4897  switch (block_type_)
4898  {
4899  case set_block_bit:
4900  for (unsigned i = 0; i < bm::set_block_size; ++i)
4901  count += bm::word_bitcount(decoder_.get_32());
4902  break;
4903  case set_block_bit_0runs:
4904  {
4905  //count = 0;
4906  unsigned char run_type = decoder_.get_8();
4907  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
4908  {
4909  unsigned run_length = decoder_.get_16();
4910  if (run_type)
4911  {
4912  unsigned run_end = j + run_length;
4913  for (;j < run_end; ++j)
4914  {
4915  count += word_bitcount(decoder_.get_32());
4916  }
4917  }
4918  else
4919  {
4920  j += run_length;
4921  }
4922  } // for
4923  return count;
4924  }
4926  {
4927  unsigned head_idx = decoder_.get_16();
4928  unsigned tail_idx = decoder_.get_16();
4929  for (unsigned i = head_idx; i <= tail_idx; ++i)
4930  count += bm::word_bitcount(decoder_.get_32());
4931  }
4932  break;
4933  case set_block_arrbit:
4934  count += get_arr_bit(0);
4935  break;
4936  case set_block_bit_1bit:
4937  ++count;
4938  decoder_.get_16();
4939  break;
4940  case set_block_arrbit_inv:
4941  get_inv_arr(tmp_block);
4942  goto count_tmp;
4943  case set_block_arr_bienc:
4944  bm::bit_block_set(tmp_block, 0); // TODO: just add a counted read
4945  this->read_bic_arr(decoder_, tmp_block);
4946  goto count_tmp;
4948  this->read_bic_arr_inv(decoder_, tmp_block);
4949  goto count_tmp;
4951  bm::bit_block_set(tmp_block, 0);
4952  this->read_digest0_block(decoder_, tmp_block);
4953  goto count_tmp;
4955  bm::bit_block_set(tmp_block, 0);
4956  this->read_bic_gap(decoder_, tmp_block);
4957  count_tmp:
4958  count += bm::bit_block_count(tmp_block);
4959  break;
4960  default:
4961  BM_ASSERT(0);
4962  #ifndef BM_NO_STL
4963  throw std::logic_error(this->err_msg());
4964  #else
4965  BM_THROW(BM_ERR_SERIALFORMAT);
4966  #endif
4967 
4968  } // switch
4969  return count;
4970 }
4971 
4972 template<typename DEC, typename BLOCK_IDX>
4973 unsigned
4975  bm::word_t* dst_block,
4976  bm::word_t* tmp_block)
4977 {
4978  BM_ASSERT(this->state_ == e_bit_block);
4979  unsigned count = 0;
4980  if (dst_block)
4981  {
4982  // count the block bitcount
4983  count = bm::bit_block_count(dst_block);
4984  }
4985 
4986  switch (block_type_)
4987  {
4988  case set_block_bit:
4989  decoder_.get_32(0, bm::set_block_size);
4990  break;
4991  case set_block_bit_0runs:
4992  {
4993  unsigned char run_type = decoder_.get_8();
4994  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
4995  {
4996  unsigned run_length = decoder_.get_16();
4997  if (run_type)
4998  {
4999  unsigned run_end = j + run_length;
5000  for (;j < run_end; ++j)
5001  {
5002  decoder_.get_32();
5003  }
5004  }
5005  else
5006  {
5007  j += run_length;
5008  }
5009  } // for
5010  }
5011  break;
5012 
5014  {
5015  unsigned head_idx = decoder_.get_16();
5016  unsigned tail_idx = decoder_.get_16();
5017  for (unsigned i = head_idx; i <= tail_idx; ++i)
5018  decoder_.get_32();
5019  }
5020  break;
5021  case set_block_arrbit:
5022  get_arr_bit(0);
5023  break;
5024  case set_block_bit_1bit:
5025  decoder_.get_16();
5026  break;
5027  case set_block_arrbit_inv:
5028  get_inv_arr(tmp_block);
5029  break;
5030  case set_block_arr_bienc:
5031  this->read_bic_arr(decoder_, tmp_block); // TODO: add dry read
5032  break;
5034  this->read_bic_arr_inv(decoder_, tmp_block); // TODO: add dry read
5035  break;
5037  this->read_bic_gap(decoder_, tmp_block);
5038  break;
5040  this->read_digest0_block(decoder_, 0); // dry read
5041  break;
5042  default:
5043  BM_ASSERT(0);
5044  #ifndef BM_NO_STL
5045  throw std::logic_error(this->err_msg());
5046  #else
5047  BM_THROW(BM_ERR_SERIALFORMAT);
5048  #endif
5049 
5050  } // switch
5051  return count;
5052 }
5053 
5054 
5055 template<typename DEC, typename BLOCK_IDX>
5056 unsigned
5058  bm::word_t* dst_block,
5059  bm::word_t* tmp_block)
5060 {
5061  BM_ASSERT(this->state_ == e_bit_block);
5062  BM_ASSERT(dst_block);
5063 
5064  unsigned count = 0;
5065  switch (block_type_)
5066  {
5067  case set_block_bit:
5068  for (unsigned i = 0; i < bm::set_block_size; ++i)
5069  count += word_bitcount(dst_block[i] & decoder_.get_32());
5070  break;
5071  case set_block_bit_0runs:
5072  {
5073  //count = 0;
5074  unsigned char run_type = decoder_.get_8();
5075  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
5076  {
5077  unsigned run_length = decoder_.get_16();
5078  if (run_type)
5079  {
5080  unsigned run_end = j + run_length;
5081  for (;j < run_end; ++j)
5082  {
5083  count += word_bitcount(dst_block[j] & decoder_.get_32());
5084  }
5085  }
5086  else
5087  {
5088  j += run_length;
5089  }
5090  } // for
5091  return count;
5092  }
5094  {
5095  unsigned head_idx = decoder_.get_16();
5096  unsigned tail_idx = decoder_.get_16();
5097  for (unsigned i = head_idx; i <= tail_idx; ++i)
5098  count += word_bitcount(dst_block[i] & decoder_.get_32());
5099  }
5100  break;
5101  case set_block_bit_1bit:
5102  // TODO: optimization
5103  case set_block_arrbit:
5104  get_arr_bit(tmp_block, true /*clear target*/);
5105  goto count_tmp;
5106  break;
5107  case set_block_arrbit_inv:
5108  get_inv_arr(tmp_block);
5109  goto count_tmp;
5110  break;
5111  case set_block_arr_bienc:
5112  bm::bit_block_set(tmp_block, 0);
5113  this->read_bic_arr(decoder_, tmp_block);
5114  goto count_tmp;
5116  this->read_bic_arr_inv(decoder_, tmp_block);
5117  goto count_tmp;
5119  bm::bit_block_set(tmp_block, 0);
5120  this->read_digest0_block(decoder_, tmp_block);
5121  goto count_tmp;
5123  bm::bit_block_set(tmp_block, 0);
5124  this->read_bic_gap(decoder_, tmp_block);
5125  count_tmp:
5126  count += bm::bit_operation_and_count(dst_block, tmp_block);
5127  break;
5128  default:
5129  BM_ASSERT(0);
5130  #ifndef BM_NO_STL
5131  throw std::logic_error(this->err_msg());
5132  #else
5133  BM_THROW(BM_ERR_SERIALFORMAT);
5134  #endif
5135 
5136  } // switch
5137  return count;
5138 }
5139 
5140 template<typename DEC,typename BLOCK_IDX>
5141 unsigned
5143  bm::word_t* dst_block,
5144  bm::word_t* tmp_block)
5145 {
5146  BM_ASSERT(this->state_ == e_bit_block);
5147  BM_ASSERT(dst_block);
5148 
5149  bitblock_sum_adapter count_adapter;
5150  switch (block_type_)
5151  {
5152  case set_block_bit:
5153  {
5154  bitblock_get_adapter ga(dst_block);
5156 
5157  bit_recomb(ga,
5158  decoder_,
5159  func,
5160  count_adapter
5161  );
5162  }
5163  break;
5164  case set_block_bit_0runs:
5165  {
5166  unsigned count = 0;
5167  unsigned char run_type = decoder_.get_8();
5168  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
5169  {
5170  unsigned run_length = decoder_.get_16();
5171  unsigned run_end = j + run_length;
5172  if (run_type)
5173  {
5174  for (;j < run_end; ++j)
5175  {
5177  count += word_bitcount(dst_block[j] | decoder_.get_32());
5178  }
5179  }
5180  else
5181  {
5182  for (;j < run_end; ++j)
5183  {
5185  count += word_bitcount(dst_block[j]);
5186  }
5187  }
5188  } // for
5189  return count;
5190  }
5192  {
5193  unsigned head_idx = decoder_.get_16();
5194  unsigned tail_idx = decoder_.get_16();
5195  unsigned count = 0;
5196  unsigned i;
5197  for (i = 0; i < head_idx; ++i)
5198  count += bm::word_bitcount(dst_block[i]);
5199  for (i = head_idx; i <= tail_idx; ++i)
5200  count += bm::word_bitcount(dst_block[i] | decoder_.get_32());
5201  for (i = tail_idx + 1; i < bm::set_block_size; ++i)
5202  count += bm::word_bitcount(dst_block[i]);
5203  return count;
5204  }
5205  case set_block_bit_1bit:
5206  // TODO: optimization
5207  case set_block_arrbit:
5208  get_arr_bit(tmp_block, true /* clear target*/);
5209  return bit_operation_or_count(dst_block, tmp_block);
5210  case set_block_arrbit_inv:
5211  get_inv_arr(tmp_block);
5212  goto count_tmp;
5213  case set_block_arr_bienc:
5214  bm::bit_block_set(tmp_block, 0);
5215  this->read_bic_arr(decoder_, tmp_block);
5216  goto count_tmp;
5218  this->read_bic_arr_inv(decoder_, tmp_block);
5219  goto count_tmp;
5221  bm::bit_block_set(tmp_block, 0);
5222  this->read_digest0_block(decoder_, tmp_block);
5223  goto count_tmp;
5225  bm::bit_block_set(tmp_block, 0);
5226  this->read_bic_gap(decoder_, tmp_block);
5227  count_tmp:
5228  return bm::bit_operation_or_count(dst_block, tmp_block);
5229  default:
5230  BM_ASSERT(0);
5231  #ifndef BM_NO_STL
5232  throw std::logic_error(this->err_msg());
5233  #else
5234  BM_THROW(BM_ERR_SERIALFORMAT);
5235  #endif
5236 
5237  } // switch
5238  return count_adapter.sum();
5239 }
5240 
5241 template<typename DEC, typename BLOCK_IDX>
5242 unsigned
5244  bm::word_t* dst_block,
5245  bm::word_t* tmp_block)
5246 {
5247  BM_ASSERT(this->state_ == e_bit_block);
5248  BM_ASSERT(dst_block);
5249 
5250  bitblock_sum_adapter count_adapter;
5251  switch (block_type_)
5252  {
5253  case set_block_bit:
5254  {
5255  bitblock_get_adapter ga(dst_block);
5257 
5258  bit_recomb(ga,
5259  decoder_,
5260  func,
5261  count_adapter
5262  );
5263  }
5264  break;
5265  case set_block_bit_0runs:
5266  {
5267  unsigned count = 0;
5268  unsigned char run_type = decoder_.get_8();
5269  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
5270  {
5271  unsigned run_length = decoder_.get_16();
5272  unsigned run_end = j + run_length;
5273  if (run_type)
5274  {
5275  for (;j < run_end; ++j)
5276  {
5278  count += bm::word_bitcount(dst_block[j] ^ decoder_.get_32());
5279  }
5280  }
5281  else
5282  {
5283  for (;j < run_end; ++j)
5284  {
5286  count += bm::word_bitcount(dst_block[j]);
5287  }
5288  }
5289  } // for
5290  return count;
5291  }
5293  {
5294  unsigned head_idx = decoder_.get_16();
5295  unsigned tail_idx = decoder_.get_16();
5296  unsigned count = 0;
5297  unsigned i;
5298  for (i = 0; i < head_idx; ++i)
5299  count += bm::word_bitcount(dst_block[i]);
5300  for (i = head_idx; i <= tail_idx; ++i)
5301  count += bm::word_bitcount(dst_block[i] ^ decoder_.get_32());
5302  for (i = tail_idx + 1; i < bm::set_block_size; ++i)
5303  count += bm::word_bitcount(dst_block[i]);
5304  return count;
5305  }
5306  case set_block_bit_1bit:
5307  // TODO: optimization
5308  case set_block_arrbit:
5309  get_arr_bit(tmp_block, true /* clear target*/);
5310  goto count_tmp;
5311  case set_block_arrbit_inv:
5312  get_inv_arr(tmp_block);
5313  goto count_tmp;
5314  case set_block_arr_bienc:
5315  bm::bit_block_set(tmp_block, 0);
5316  this->read_bic_arr(decoder_, tmp_block);
5317  goto count_tmp;
5319  this->read_bic_arr_inv(decoder_, tmp_block);
5320  goto count_tmp;
5321  break;
5323  bm::bit_block_set(tmp_block, 0);
5324  this->read_digest0_block(decoder_, tmp_block);
5325  goto count_tmp;
5327  bm::bit_block_set(tmp_block, 0);
5328  this->read_bic_gap(decoder_, tmp_block);
5329  count_tmp:
5330  return bm::bit_operation_xor_count(dst_block, tmp_block);
5331  default:
5332  BM_ASSERT(0);
5333  #ifndef BM_NO_STL
5334  throw std::logic_error(this->err_msg());
5335  #else
5336  BM_THROW(BM_ERR_SERIALFORMAT);
5337  #endif
5338 
5339  } // switch
5340  return count_adapter.sum();
5341 }
5342 
5343 template<typename DEC, typename BLOCK_IDX>
5344 unsigned
5346  bm::word_t* dst_block,
5347  bm::word_t* tmp_block)
5348 {
5349  BM_ASSERT(this->state_ == e_bit_block);
5350  BM_ASSERT(dst_block);
5351 
5352  bitblock_sum_adapter count_adapter;
5353  switch (block_type_)
5354  {
5355  case set_block_bit:
5356  {
5357  bitblock_get_adapter ga(dst_block);
5359 
5360  bit_recomb(ga,
5361  decoder_,
5362  func,
5363  count_adapter
5364  );
5365  }
5366  break;
5367  case set_block_bit_0runs:
5368  {
5369  unsigned count = 0;
5370  unsigned char run_type = decoder_.get_8();
5371  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
5372  {
5373  unsigned run_length = decoder_.get_16();
5374  unsigned run_end = j + run_length;
5375  if (run_type)
5376  {
5377  for (;j < run_end; ++j)
5378  {
5380  count += word_bitcount(dst_block[j] & ~decoder_.get_32());
5381  }
5382  }
5383  else
5384  {
5385  for (;j < run_end; ++j)
5386  {
5388  count += bm::word_bitcount(dst_block[j]);
5389  }
5390  }
5391  } // for
5392  return count;
5393  }
5395  {
5396  unsigned head_idx = decoder_.get_16();
5397  unsigned tail_idx = decoder_.get_16();
5398  unsigned count = 0;
5399  unsigned i;
5400  for (i = 0; i < head_idx; ++i)
5401  count += bm::word_bitcount(dst_block[i]);
5402  for (i = head_idx; i <= tail_idx; ++i)
5403  count += bm::word_bitcount(dst_block[i] & (~decoder_.get_32()));
5404  for (i = tail_idx + 1; i < bm::set_block_size; ++i)
5405  count += bm::word_bitcount(dst_block[i]);
5406  return count;
5407  }
5408  break;
5409  case set_block_bit_1bit:
5410  //TODO: optimization
5411  case set_block_arrbit:
5412  get_arr_bit(tmp_block, true /* clear target*/);
5413  goto count_tmp;
5414  case set_block_arrbit_inv:
5415  get_inv_arr(tmp_block);
5416  goto count_tmp;
5417  case set_block_arr_bienc:
5418  bm::bit_block_set(tmp_block, 0);
5419  this->read_bic_arr(decoder_, tmp_block);
5420  goto count_tmp;
5422  this->read_bic_arr_inv(decoder_, tmp_block);
5423  goto count_tmp;
5425  bm::bit_block_set(tmp_block, 0);
5426  this->read_digest0_block(decoder_, tmp_block);
5427  goto count_tmp;
5429  bm::bit_block_set(tmp_block, 0);
5430  this->read_bic_gap(decoder_, tmp_block);
5431  count_tmp:
5432  return bm::bit_operation_sub_count(dst_block, tmp_block);
5433  default:
5434  BM_ASSERT(0);
5435  #ifndef BM_NO_STL
5436  throw std::logic_error(this->err_msg());
5437  #else
5438  BM_THROW(BM_ERR_SERIALFORMAT);
5439  #endif
5440 
5441  } // switch
5442  return count_adapter.sum();
5443 }
5444 
5445 template<typename DEC, typename BLOCK_IDX>
5446 unsigned
5448  bm::word_t* dst_block,
5449  bm::word_t* tmp_block)
5450 {
5451  BM_ASSERT(this->state_ == e_bit_block);
5452  BM_ASSERT(dst_block);
5453 
5454  bitblock_sum_adapter count_adapter;
5455  switch (block_type_)
5456  {
5457  case set_block_bit:
5458  {
5459  bitblock_get_adapter ga(dst_block);
5461 
5462  bit_recomb(ga,
5463  decoder_,
5464  func,
5465  count_adapter
5466  );
5467  }
5468  break;
5469  case set_block_bit_0runs:
5470  {
5471  unsigned count = 0;
5472  unsigned char run_type = decoder_.get_8();
5473  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
5474  {
5475  unsigned run_length = decoder_.get_16();
5476  unsigned run_end = j + run_length;
5477  if (run_type)
5478  {
5479  for (;j < run_end; ++j)
5480  {
5482  count += word_bitcount(decoder_.get_32() & (~dst_block[j]));
5483  }
5484  }
5485  else
5486  {
5487  j += run_length;
5488  }
5489  } // for
5490  return count;
5491  }
5493  {
5494  unsigned head_idx = decoder_.get_16();
5495  unsigned tail_idx = decoder_.get_16();
5496  unsigned count = 0;
5497  unsigned i;
5498  for (i = head_idx; i <= tail_idx; ++i)
5499  count += bm::word_bitcount(decoder_.get_32() & (~dst_block[i]));
5500  return count;
5501  }
5502  break;
5503  case set_block_bit_1bit:
5504  // TODO: optimization
5505  case set_block_arrbit:
5506  get_arr_bit(tmp_block, true /* clear target*/);
5507  goto count_tmp;
5508  case set_block_arrbit_inv:
5509  get_inv_arr(tmp_block);
5510  goto count_tmp;
5511  case set_block_arr_bienc:
5512  bm::bit_block_set(tmp_block, 0);
5513  this->read_bic_arr(decoder_, tmp_block);
5514  goto count_tmp;
5516  this->read_bic_arr_inv(decoder_, tmp_block);
5517  goto count_tmp;
5519  bm::bit_block_set(tmp_block, 0);
5520  this->read_digest0_block(decoder_, tmp_block);
5521  goto count_tmp;
5523  bm::bit_block_set(tmp_block, 0);
5524  this->read_bic_gap(decoder_, tmp_block);
5525  count_tmp:
5526  return bm::bit_operation_sub_count(tmp_block, dst_block);
5527  default:
5528  BM_ASSERT(0);
5529  #ifndef BM_NO_STL
5530  throw std::logic_error(this->err_msg());
5531  #else
5532  BM_THROW(BM_ERR_SERIALFORMAT);
5533  #endif
5534  } // switch
5535  return count_adapter.sum();
5536 }
5537 
5538 
5539 
5540 template<typename DEC, typename BLOCK_IDX>
5542  bm::word_t* dst_block,
5543  bool clear_target) BMNOEXCEPT
5544 {
5545  BM_ASSERT(this->block_type_ == set_block_arrbit ||
5546  this->block_type_ == set_block_bit_1bit);
5547 
5548  gap_word_t len = decoder_.get_16(); // array length / 1bit_idx
5549  if (dst_block)
5550  {
5551  if (clear_target)
5552  bm::bit_block_set(dst_block, 0);
5553 
5554  if (this->block_type_ == set_block_bit_1bit)
5555  {
5556  // len contains idx of 1 bit set
5557  set_bit(dst_block, len);
5558  return 1;
5559  }
5560 
5561  for (unsigned k = 0; k < len; ++k)
5562  {
5563  gap_word_t bit_idx = decoder_.get_16();
5564  bm::set_bit(dst_block, bit_idx);
5565  }
5566  }
5567  else
5568  {
5569  if (this->block_type_ == set_block_bit_1bit)
5570  return 1; // nothing to do: len var already consumed 16 bits
5571 
5572  // fwd the decode stream
5573  decoder_.seek(len * 2);
5574  }
5575  return len;
5576 }
5577 
5578 template<typename DEC, typename BLOCK_IDX>
5580 {
5581  BM_ASSERT(this->block_type_ == set_block_bit_1bit);
5582  ++(this->block_idx_);
5583  this->state_ = e_blocks;
5584 
5585  return decoder_.get_16(); // 1bit_idx
5586 }
5587 
5588 template<typename DEC, typename BLOCK_IDX>
5589 void
5591 {
5592  BM_ASSERT(this->state_ == e_gap_block ||
5593  this->block_type_ == set_block_bit_1bit);
5594  BM_ASSERT(dst_block);
5595 
5596  this->read_gap_block(this->decoder_,
5597  this->block_type_,
5598  dst_block,
5599  this->gap_head_);
5600 
5601  ++(this->block_idx_);
5602  this->state_ = e_blocks;
5603 }
5604 
5605 
5606 template<typename DEC, typename BLOCK_IDX>
5607 unsigned
5609  bm::word_t* dst_block,
5610  bm::word_t* tmp_block,
5611  set_operation op)
5612 {
5613  BM_ASSERT(this->state_ == e_bit_block);
5614 
5615  get_bit_func_type bit_func = bit_func_table_[op];
5616  BM_ASSERT(bit_func);
5617  unsigned cnt = ((*this).*(bit_func))(dst_block, tmp_block);
5618  this->state_ = e_blocks;
5619  ++(this->block_idx_);
5620  return cnt;
5621 }
5622 
5623 // ----------------------------------------------------------------------
5624 //
5625 // ----------------------------------------------------------------------
5626 
5627 template<class BV>
5629 : temp_block_(0), ref_vect_(0)
5630 {
5631  temp_block_ = alloc_.alloc_bit_block();
5632 }
5633 
5634 
5635 template<class BV>
5637 {
5638  if (temp_block_)
5639  alloc_.free_bit_block(temp_block_);
5640 }
5641 
5642 template<class BV>
5645  const unsigned char* buf,
5646  set_operation op,
5647  bool /*exit_on_one*/)
5648 {
5649  if (op == set_OR)
5650  {
5651  bm::deserialize(bv, buf, temp_block_, ref_vect_);
5652  return 0;
5653  }
5654  bvector_type bv_tmp;
5655  bm::deserialize(bv_tmp, buf, temp_block_, ref_vect_);
5656  return deserialize_xor(bv, bv_tmp, op);
5657 }
5658 
5659 template<class BV>
5660 void operation_deserializer<BV>::deserialize_xor_range(
5661  bvector_type& bv,
5662  const unsigned char* buf,
5663  size_type idx_from,
5664  size_type idx_to)
5665 {
5666  bv.clear();
5667 
5668  ByteOrder bo_current = globals<true>::byte_order();
5669 
5670  bm::decoder dec(buf);
5671  unsigned char header_flag = dec.get_8();
5672  ByteOrder bo = bo_current;
5673  if (!(header_flag & BM_HM_NO_BO))
5674  {
5675  bo = (bm::ByteOrder) dec.get_8();
5676  }
5677 
5678  if (bo_current == bo)
5679  {
5680  deserializer<BV, bm::decoder> deserial;
5681  deserial.set_ref_vectors(ref_vect_);
5682  deserial.set_range(idx_from, idx_to);
5683  deserial.deserialize(bv, buf);
5684  }
5685  else
5686  {
5687  switch (bo_current)
5688  {
5689  case BigEndian:
5690  {
5691  deserializer<BV, bm::decoder_big_endian> deserial;
5692  deserial.set_ref_vectors(ref_vect_);
5693  deserial.set_range(idx_from, idx_to);
5694  deserial.deserialize(bv, buf);
5695  }
5696  break;
5697  case LittleEndian:
5698  {
5699  deserializer<BV, bm::decoder_little_endian> deserial;
5700  deserial.set_ref_vectors(ref_vect_);
5701  deserial.set_range(idx_from, idx_to);
5702  deserial.deserialize(bv, buf);
5703  }
5704  break;
5705  default:
5706  BM_ASSERT(0);
5707  };
5708  }
5709  bv.keep_range_no_check(idx_from, idx_to);
5710 }
5711 
5712 
5713 
5714 template<class BV>
5715 typename operation_deserializer<BV>::size_type
5716 operation_deserializer<BV>::deserialize_xor(bvector_type& bv,
5717  bvector_type& bv_tmp,
5718  set_operation op)
5719 {
5720  size_type count = 0;
5721  switch (op)
5722  {
5723  case set_AND:
5724  bv.bit_and(bv_tmp, bvector_type::opt_compress);
5725  break;
5726  case set_ASSIGN:
5727  bv.swap(bv_tmp);
5728  break;
5729  case set_SUB:
5730  bv -= bv_tmp;
5731  break;
5732  case set_XOR:
5733  bv ^= bv_tmp;
5734  break;
5735  case set_COUNT: case set_COUNT_B:
5736  count = bv_tmp.count();
5737  break;
5738  case set_COUNT_A:
5739  return bv.count();
5740  case set_COUNT_AND:
5741  count += bm::count_and(bv, bv_tmp);
5742  break;
5743  case set_COUNT_XOR:
5744  count += bm::count_xor(bv, bv_tmp);
5745  break;
5746  case set_COUNT_OR:
5747  count += bm::count_or(bv, bv_tmp);
5748  break;
5749  case set_COUNT_SUB_AB:
5750  count += bm::count_sub(bv, bv_tmp);
5751  break;
5752  case set_COUNT_SUB_BA:
5753  count += bm::count_sub(bv_tmp, bv);
5754  break;
5755  case set_END:
5756  default:
5757  BM_ASSERT(0);
5758  #ifndef BM_NO_STL
5759  throw std::logic_error("BM: serialization error");
5760  #else
5761  BM_THROW(BM_ERR_SERIALFORMAT);
5762  #endif
5763  } // switch
5764 
5765  return count;
5766 }
5767 
5768 
5769 
5770 template<class BV>
5771 typename operation_deserializer<BV>::size_type
5773  const unsigned char* buf,
5774  set_operation op,
5775  bool exit_on_one)
5776 {
5777  ByteOrder bo_current = globals<true>::byte_order();
5778  bm::decoder dec(buf);
5779  unsigned char header_flag = dec.get_8();
5780 
5781  if (header_flag & BM_HM_HXOR) // XOR compression
5782  {
5783  BM_ASSERT(ref_vect_); // reference vector must be set
5784  return deserialize_xor(bv, buf, op, exit_on_one);
5785  }
5786 
5787  ByteOrder bo = bo_current;
5788  if (!(header_flag & BM_HM_NO_BO))
5789  bo = (bm::ByteOrder) dec.get_8();
5790 
5791  if (op == bm::set_ASSIGN)
5792  {
5793  bv.clear(true);
5794  op = bm::set_OR;
5795  }
5796 
5797  if (bo_current == bo)
5798  {
5799  serial_stream_current ss(buf);
5800  return it_d_.deserialize(bv, ss, temp_block_, op, exit_on_one);
5801  }
5802  switch (bo_current)
5803  {
5804  case BigEndian:
5805  {
5806  serial_stream_be ss(buf);
5807  return it_d_be_.deserialize(bv, ss, temp_block_, op, exit_on_one);
5808  }
5809  case LittleEndian:
5810  {
5811  serial_stream_le ss(buf);
5812  return it_d_le_.deserialize(bv, ss, temp_block_, op, exit_on_one);
5813  }
5814  default:
5815  BM_ASSERT(0);
5816  #ifndef BM_NO_STL
5817  throw std::logic_error("BM::platform error: unknown endianness");
5818  #else
5819  BM_THROW(BM_ERR_SERIALFORMAT);
5820  #endif
5821  };
5822 }
5823 
5824 template<class BV>
5826  bvector_type& bv,
5827  const unsigned char* buf,
5828  size_type idx_from,
5829  size_type idx_to)
5830 {
5831  ByteOrder bo_current = globals<true>::byte_order();
5832  bm::decoder dec(buf);
5833  unsigned char header_flag = dec.get_8();
5834 
5835  // check if it is empty fresh vector, set the range then
5836  //
5837  blocks_manager_type& bman = bv.get_blocks_manager();
5838  if (!bman.is_init())
5839  bv.set_range(idx_from, idx_to);
5840  else
5841  {} // assume that the target range set outside the call
5842 
5843 
5844  if (header_flag & BM_HM_HXOR) // XOR compression
5845  {
5846  BM_ASSERT(ref_vect_);
5847  bvector_type bv_tmp;
5848  deserialize_xor_range(bv_tmp, buf, idx_from, idx_to);
5849  if (bv.any())
5850  {
5851  bv.bit_and(bv_tmp, bvector_type::opt_compress);
5852  }
5853  else
5854  {
5855  bv.swap(bv_tmp);
5856  }
5857  return;
5858  }
5859 
5860  ByteOrder bo = bo_current;
5861  if (!(header_flag & BM_HM_NO_BO))
5862  bo = (bm::ByteOrder) dec.get_8();
5863 
5864  const bm::set_operation op = bm::set_AND;
5865 
5866  if (bo_current == bo)
5867  {
5868  serial_stream_current ss(buf);
5869  it_d_.set_range(idx_from, idx_to);
5870  it_d_.deserialize(bv, ss, temp_block_, op, false);
5871  it_d_.unset_range();
5872  return;
5873  }
5874  switch (bo_current)
5875  {
5876  case BigEndian:
5877  {
5878  serial_stream_be ss(buf);
5879  it_d_be_.set_range(idx_from, idx_to);
5880  it_d_be_.deserialize(bv, ss, temp_block_, op, false);
5881  it_d_be_.unset_range();
5882  return;
5883  }
5884  case LittleEndian:
5885  {
5886  serial_stream_le ss(buf);
5887  it_d_le_.set_range(idx_from, idx_to);
5888  it_d_le_.deserialize(bv, ss, temp_block_, op, false);
5889  it_d_le_.unset_range();
5890  return;
5891  }
5892  default:
5893  BM_ASSERT(0);
5894  #ifndef BM_NO_STL
5895  throw std::logic_error("BM::platform error: unknown endianness");
5896  #else
5897  BM_THROW(BM_ERR_SERIALFORMAT);
5898  #endif
5899  };
5900  return;
5901 }
5902 
5903 
5904 
5905 // ------------------------------------------------------------------
5906 
5907 template<class BV, class SerialIterator>
5909  size_type from, size_type to)
5910 {
5911  is_range_set_ = true;
5912  nb_range_from_ = (from >> bm::set_block_shift);
5913  nb_range_to_ = (to >> bm::set_block_shift);
5914 }
5915 
5916 
5917 template<class BV, class SerialIterator>
5919  bvector_type& bv,
5920  serial_iterator_type& sit,
5921  unsigned id_count,
5922  bool set_clear)
5923 {
5924  const unsigned win_size = 64;
5925  bm::id_t id_buffer[win_size+1];
5926 
5927  if (set_clear) // set bits
5928  {
5929  for (unsigned i = 0; i <= id_count;)
5930  {
5931  unsigned j;
5932  for (j = 0; j < win_size && i <= id_count; ++j, ++i)
5933  {
5934  id_buffer[j] = sit.get_id();
5935  sit.next();
5936  } // for j
5937  bm::combine_or(bv, id_buffer, id_buffer + j);
5938  } // for i
5939  }
5940  else // clear bits
5941  {
5942  for (unsigned i = 0; i <= id_count;)
5943  {
5944  unsigned j;
5945  for (j = 0; j < win_size && i <= id_count; ++j, ++i)
5946  {
5947  id_buffer[j] = sit.get_id();
5948  sit.next();
5949  } // for j
5950  bm::combine_sub(bv, id_buffer, id_buffer + j);
5951  } // for i
5952  }
5953 }
5954 
5955 template<class BV, class SerialIterator>
5956 typename iterator_deserializer<BV, SerialIterator>::size_type
5957 iterator_deserializer<BV, SerialIterator>::finalize_target_vector(
5958  blocks_manager_type& bman,
5959  set_operation op,
5960  size_type bv_block_idx)
5961 {
5962  size_type count = 0;
5963  switch (op)
5964  {
5965  case set_OR: case set_SUB: case set_XOR:
5966  case set_COUNT: case set_COUNT_B: case set_COUNT_AND:
5967  case set_COUNT_SUB_BA:
5968  // nothing to do
5969  break;
5970  case set_ASSIGN: case set_AND:
5971  {
5972  block_idx_type nblock_last = (bm::id_max >> bm::set_block_shift);
5973  if (bv_block_idx <= nblock_last)
5974  bman.set_all_zero(bv_block_idx, nblock_last); // clear the tail
5975  }
5976  break;
5977  case set_COUNT_A: case set_COUNT_OR: case set_COUNT_XOR:
5978  case set_COUNT_SUB_AB:
5979  // count bits in the target vector
5980  {
5981  unsigned i, j;
5982  bm::get_block_coord(bv_block_idx, i,