BitMagic-C++
bmserial.h
Go to the documentation of this file.
1 #ifndef BMSERIAL__H__INCLUDED__
2 #define BMSERIAL__H__INCLUDED__
3 /*
4 Copyright(c) 2002-2019 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 For more information please visit: http://bitmagic.io
19 */
20 
21 /*! \file bmserial.h
22  \brief Serialization / compression of bvector<>.
23  Set theoretical operations on compressed BLOBs.
24 */
25 
26 /*!
27  \defgroup bvserial bvector<> serialization
28  Serialization for bvector<> container
29 
30  \ingroup bvector
31 */
32 
33 #ifndef BM__H__INCLUDED__
34 // BitMagic utility headers do not include main "bm.h" declaration
35 // #include "bm.h" or "bm64.h" explicitly
36 # error missing include (bm.h or bm64.h)
37 #endif
38 
39 
40 #ifdef _MSC_VER
41 #pragma warning( push )
42 #pragma warning( disable : 4311 4312 4127)
43 #endif
44 
45 
46 
47 #include "encoding.h"
48 #include "bmfunc.h"
49 #include "bmtrans.h"
50 #include "bmalgo_impl.h"
51 #include "bmutil.h"
52 #include "bmbuffer.h"
53 #include "bmdef.h"
54 
55 //#include "bmgamma.h"
56 
57 
58 namespace bm
59 {
60 
61 const unsigned set_compression_max = 5; ///< Maximum supported compression level
62 const unsigned set_compression_default = 5; ///< Default compression level
63 
64 /**
65  Bit-vector serialization class.
66 
67  Class designed to convert sparse bit-vectors into a single block of memory
68  ready for file or database storage or network transfer.
69 
70  Reuse of this class for multiple serializations (but not across threads).
71  Class resue offers some performance advantage (helps with temp memory
72  reallocations).
73 
74  @ingroup bvserial
75 */
76 template<class BV>
78 {
79 public:
80  typedef BV bvector_type;
86 
87  typedef byte_buffer<allocator_type> buffer;
88 public:
89  /**
90  Constructor
91 
92  \param alloc - memory allocator
93  \param temp_block - temporary block for various operations
94  (if NULL it will be allocated and managed by serializer class)
95  Temp block is used as a scratch memory during serialization,
96  use of external temp block allows to avoid unnecessary re-allocations.
97 
98  Temp block attached is not owned by the class and NOT deallocated on
99  destruction.
100  */
101  serializer(const allocator_type& alloc = allocator_type(),
102  bm::word_t* temp_block = 0);
103 
104  serializer(bm::word_t* temp_block);
105 
106  ~serializer();
107 
108  /**
109  Set compression level. Higher compression takes more time to process.
110  @param clevel - compression level (0-4)
111  */
112  void set_compression_level(unsigned clevel);
113 
114  /**
115  Get compression level (0-5), Default 5 (recommended)
116  0 - take as is
117  1, 2 - apply light weight RLE/GAP encodings, limited depth hierarchical
118  compression, intervals encoding
119  3 - variant of 2 with different cut-offs
120  4 - delta transforms plus Elias Gamma encoding where possible legacy)
121  5 - binary interpolated encoding (Moffat, et al)
122 
123  Recommended: use 3 or 5
124  */
125  unsigned get_compression_level() const { return compression_level_; }
126 
127  // --------------------------------------------------------------------
128  /*! @name Serialization Methods */
129  //@{
130 
131  /**
132  Bitvector serialization into memory block
133 
134  @param bv - input bitvector
135  @param buf - out buffer (pre-allocated)
136  No range checking is done in this method.
137  It is responsibility of caller to allocate sufficient
138  amount of memory using information from calc_stat() function.
139 
140  @param buf_size - size of the output buffer
141 
142  @return Size of serialization block.
143  @sa calc_stat
144  */
145  size_type serialize(const BV& bv,
146  unsigned char* buf, size_t buf_size);
147 
148  /**
149  Bitvector serialization into buffer object (resized automatically)
150 
151  @param bv - input bitvector
152  @param buf - output buffer object
153  @param bv_stat - input (optional) bit-vector statistics object
154  if NULL, serialize will compute the statistics
155  */
156  void serialize(const BV& bv,
157  typename serializer<BV>::buffer& buf,
158  const statistics_type* bv_stat = 0);
159 
160  /**
161  Bitvector serialization into buffer object (resized automatically)
162  Input bit-vector gets optimized and then destroyed, content is
163  NOT guaranteed after this operation.
164  Effectively it moves data into the buffer.
165 
166  The reason this operation exsists is because it is faster to do
167  all three operations in one single pass.
168  This is a destructive serialization!
169 
170  @param bv - input/output bitvector
171  @param buf - output buffer object
172  */
173  void optimize_serialize_destroy(BV& bv,
174  typename serializer<BV>::buffer& buf);
175 
176  //@}
177  // --------------------------------------------------------------------
178 
179  /**
180  Return serialization counter vector
181  @internal
182  */
183  const size_type* get_compression_stat() const { return compression_stat_; }
184 
185  /**
186  Set GAP length serialization (serializes GAP levels of the original vector)
187 
188  @param value - when TRUE serialized vector includes GAP levels parameters
189  */
190  void gap_length_serialization(bool value);
191 
192  /**
193  Set byte-order serialization (for cross platform compatibility)
194  @param value - TRUE serialization format includes byte-order marker
195  */
196  void byte_order_serialization(bool value);
197 
198 protected:
199  /**
200  Encode serialization header information
201  */
202  void encode_header(const BV& bv, bm::encoder& enc);
203 
204  /*! Encode GAP block */
205  void encode_gap_block(const bm::gap_word_t* gap_block, bm::encoder& enc);
206 
207  /*! Encode GAP block with Elias Gamma coder */
208  void gamma_gap_block(const bm::gap_word_t* gap_block, bm::encoder& enc);
209 
210  /**
211  Encode GAP block as delta-array with Elias Gamma coder
212  */
213  void gamma_gap_array(const bm::gap_word_t* gap_block,
214  unsigned arr_len,
215  bm::encoder& enc,
216  bool inverted = false);
217 
218  /// Encode bit-block as an array of bits
219  void encode_bit_array(const bm::word_t* block,
220  bm::encoder& enc, bool inverted);
221 
222  void gamma_gap_bit_block(const bm::word_t* block,
223  bm::encoder& enc);
224 
225  void gamma_arr_bit_block(const bm::word_t* block,
226  bm::encoder& enc, bool inverted);
227 
228  void bienc_arr_bit_block(const bm::word_t* block,
229  bm::encoder& enc, bool inverted);
230 
231  /// encode bit-block as interpolated bit block of gaps
232  void bienc_gap_bit_block(const bm::word_t* block, bm::encoder& enc);
233 
234  void interpolated_arr_bit_block(const bm::word_t* block,
235  bm::encoder& enc, bool inverted);
236  /// encode bit-block as interpolated gap block
237  void interpolated_gap_bit_block(const bm::word_t* block,
238  bm::encoder& enc);
239 
240  /**
241  Encode GAP block as an array with binary interpolated coder
242  */
243  void interpolated_gap_array(const bm::gap_word_t* gap_block,
244  unsigned arr_len,
245  bm::encoder& enc,
246  bool inverted);
247 
248 
249  /*! Encode GAP block with using binary interpolated encoder */
251  const bm::gap_word_t* gap_block, bm::encoder& enc);
252 
253  /**
254  Encode BIT block with repeatable runs of zeroes
255  */
256  void encode_bit_interval(const bm::word_t* blk,
257  bm::encoder& enc,
258  unsigned size_control);
259  /**
260  Encode bit-block using digest (hierarchical compression)
261  */
262  void encode_bit_digest(const bm::word_t* blk,
263  bm::encoder& enc,
264  bm::id64_t d0);
265 
266  /**
267  Determine best representation for GAP block based
268  on current set compression level
269 
270  @return set_block_gap, set_block_bit_1bit, set_block_arrgap
271  set_block_arrgap_egamma, set_block_arrgap_bienc
272  set_block_arrgap_inv, set_block_arrgap_egamma_inv
273  set_block_arrgap_bienc_inv, set_block_gap_egamma
274  set_block_gap_bienc
275 
276  @internal
277  */
278  unsigned char find_gap_best_encoding(const bm::gap_word_t* gap_block);
279 
280  /**
281  Determine best representation for a bit-block
282  */
283  unsigned char find_bit_best_encoding(const bm::word_t* block);
284 
285  /// Reset all accumulated compression statistics
287 
288  void reset_models() { mod_size_ = 0; }
289  void add_model(unsigned char mod, unsigned score);
290 
291 private:
292  serializer(const serializer&);
293  serializer& operator=(const serializer&);
294 
295 private:
298  typedef bm::heap_vector<bm::gap_word_t, allocator_type> block_arridx_type;
299  typedef typename allocator_type::allocator_pool_type allocator_pool_type;
300 
301 private:
302  bm::id64_t digest0_;
303  unsigned bit_model_d0_size_; ///< memory (bytes) by d0 method (bytes)
304  unsigned bit_model_0run_size_; ///< memory (bytes) by run-0 method (bytes)
305  block_arridx_type bit_idx_arr_;
306  unsigned scores_[64];
307  unsigned char models_[64];
308  unsigned mod_size_;
309 
310  allocator_type alloc_;
311  size_type* compression_stat_;
312  bool gap_serial_;
313  bool byte_order_serial_;
314  bm::word_t* temp_block_;
315  unsigned compression_level_;
316  bool own_temp_block_;
317 
318  bool optimize_; ///< flag to optimize the input vector
319  bool free_; ///< flag to free the input vector
320  allocator_pool_type pool_;
321 
322 };
323 
324 /**
325  Base deserialization class
326  \ingroup bvserial
327 */
328 template<class DEC> class deseriaizer_base
329 {
330 protected:
331  typedef DEC decoder_type;
332 
333 protected:
334  deseriaizer_base() : id_array_(0) {}
335 
336  /// Read GAP block from the stream
337  void read_gap_block(decoder_type& decoder,
338  unsigned block_type,
339  bm::gap_word_t* dst_block,
340  bm::gap_word_t& gap_head);
341 
342  /// Read list of bit ids
343  ///
344  /// @return number of ids
345  unsigned read_id_list(decoder_type& decoder,
346  unsigned block_type,
347  bm::gap_word_t* dst_arr);
348 
349  /// Read binary interpolated list into a bit-set
350  void read_bic_arr(decoder_type& decoder, bm::word_t* blk);
351 
352  /// Read binary interpolated gap blocks into a bitset
353  void read_bic_gap(decoder_type& decoder, bm::word_t* blk);
354 
355  /// Read inverted binary interpolated list into a bit-set
356  void read_bic_arr_inv(decoder_type& decoder, bm::word_t* blk);
357 
358  /// Read digest0-type bit-block
359  void read_digest0_block(decoder_type& decoder, bm::word_t* blk);
360 
361 
362  /// read bit-block encoded as runs
363  static
364  void read_0runs_block(decoder_type& decoder, bm::word_t* blk);
365 
366  static
367  const char* err_msg() { return "BM::Invalid serialization format"; }
368 
369 
370 protected:
371  bm::gap_word_t* id_array_; ///< ptr to idx array for temp decode use
372 };
373 
374 /**
375  Deserializer for bit-vector
376  \ingroup bvserial
377 */
378 template<class BV, class DEC>
379 class deserializer : protected deseriaizer_base<DEC>
380 {
381 public:
383  typedef BV bvector_type;
385  typedef typename BV::size_type size_type;
388 
389 public:
390  deserializer();
391  ~deserializer();
392 
393  size_t deserialize(bvector_type& bv,
394  const unsigned char* buf,
395  bm::word_t* temp_block);
396 protected:
397  typedef typename BV::blocks_manager_type blocks_manager_type;
398 
399 protected:
400  void deserialize_gap(unsigned char btype, decoder_type& dec,
401  bvector_type& bv, blocks_manager_type& bman,
402  block_idx_type nb,
403  bm::word_t* blk);
404  void decode_bit_block(unsigned char btype, decoder_type& dec,
405  blocks_manager_type& bman,
406  block_idx_type nb,
407  bm::word_t* blk);
408 protected:
409  typedef bm::heap_vector<bm::gap_word_t, allocator_type> block_arridx_type;
410 
411 protected:
412  block_arridx_type bit_idx_arr_;
413  block_arridx_type gap_temp_block_;
415  allocator_type alloc_;
416 };
417 
418 
419 /**
420  Iterator to walk forward the serialized stream.
421 
422  \internal
423  \ingroup bvserial
424 */
425 template<class BV, class SerialIterator>
427 {
428 public:
429  typedef BV bvector_type;
431  typedef SerialIterator serial_iterator_type;
432 public:
433  static
434  size_type deserialize(bvector_type& bv,
435  serial_iterator_type& sit,
436  bm::word_t* temp_block,
438  bool exit_on_one = false);
439 
440 private:
441  typedef typename BV::blocks_manager_type blocks_manager_type;
443 
444  /// load data from the iterator of type "id list"
445  static
446  void load_id_list(bvector_type& bv,
447  serial_iterator_type& sit,
448  unsigned id_count,
449  bool set_clear);
450 
451  /// Finalize the deserialization (zero target vector tail or bit-count tail)
452  static
453  size_type finalize_target_vector(blocks_manager_type& bman,
454  set_operation op,
455  size_type bv_block_idx);
456 
457  /// Process (obsolete) id-list serialization format
458  static
459  size_type process_id_list(bvector_type& bv,
460  serial_iterator_type& sit,
461  set_operation op);
462  static
463  const char* err_msg() { return "BM::de-serialization format error"; }
464 
465 };
466 
467 /*!
468  @brief Serialization stream iterator
469 
470  Iterates blocks and control tokens of serialized bit-stream
471  \ingroup bvserial
472  @internal
473 */
474 template<class DEC>
476 {
477 public:
479  #ifdef BM64ADDR
480  typedef bm::id64_t block_idx_type;
481  #else
483  #endif
484 
485 public:
486  serial_stream_iterator(const unsigned char* buf);
488 
489  /// serialized bitvector size
490  block_idx_type bv_size() const { return bv_size_; }
491 
492  /// Returns true if end of bit-stream reached
493  bool is_eof() const { return end_of_stream_; }
494 
495  /// get next block
496  void next();
497 
498  /// skip all zero or all-one blocks
499  block_idx_type skip_mono_blocks();
500 
501  /// read bit block, using logical operation
502  unsigned get_bit_block(bm::word_t* dst_block,
503  bm::word_t* tmp_block,
504  set_operation op);
505 
506 
507  /// Read gap block data (with head)
508  void get_gap_block(bm::gap_word_t* dst_block);
509 
510  /// Return current decoder size
511  unsigned dec_size() const { return decoder_.size(); }
512 
513  /// Get low level access to the decoder (use carefully)
514  decoder_type& decoder() { return decoder_; }
515 
516  /// iterator is a state machine, this enum encodes
517  /// its key value
518  ///
520  {
521  e_unknown = 0,
522  e_list_ids, ///< plain int array
523  e_blocks, ///< stream of blocks
524  e_zero_blocks, ///< one or more zero bit blocks
525  e_one_blocks, ///< one or more all-1 bit blocks
526  e_bit_block, ///< one bit block
527  e_gap_block ///< one gap block
528 
529  };
530 
531  /// Returns iterator internal state
532  iterator_state state() const { return this->state_; }
533 
534  iterator_state get_state() const { return this->state_; }
535  /// Number of ids in the inverted list (valid for e_list_ids)
536  unsigned get_id_count() const { return this->id_cnt_; }
537 
538  /// Get last id from the id list
539  bm::id_t get_id() const { return this->last_id_; }
540 
541  /// Get current block index
542  block_idx_type block_idx() const { return this->block_idx_; }
543 
544 public:
545  /// member function pointer for bitset-bitset get operations
546  ///
547  typedef
548  unsigned (serial_stream_iterator<DEC>::*get_bit_func_type)
550 
551  unsigned
552  get_bit_block_ASSIGN(bm::word_t* dst_block, bm::word_t* tmp_block);
553  unsigned
554  get_bit_block_OR (bm::word_t* dst_block, bm::word_t* tmp_block);
555  unsigned
556  get_bit_block_AND (bm::word_t* dst_block, bm::word_t* tmp_block);
557  unsigned
558  get_bit_block_SUB (bm::word_t* dst_block, bm::word_t* tmp_block);
559  unsigned
560  get_bit_block_XOR (bm::word_t* dst_block, bm::word_t* tmp_block);
561  unsigned
562  get_bit_block_COUNT (bm::word_t* dst_block, bm::word_t* tmp_block);
563  unsigned
564  get_bit_block_COUNT_AND(bm::word_t* dst_block, bm::word_t* tmp_block);
565  unsigned
566  get_bit_block_COUNT_OR(bm::word_t* dst_block, bm::word_t* tmp_block);
567  unsigned
568  get_bit_block_COUNT_XOR(bm::word_t* dst_block, bm::word_t* tmp_block);
569  unsigned
570  get_bit_block_COUNT_SUB_AB(bm::word_t* dst_block, bm::word_t* tmp_block);
571  unsigned
572  get_bit_block_COUNT_SUB_BA(bm::word_t* dst_block, bm::word_t* tmp_block);
573  unsigned
574  get_bit_block_COUNT_A(bm::word_t* dst_block, bm::word_t* tmp_block);
575  unsigned
577  {
578  return get_bit_block_COUNT(dst_block, tmp_block);
579  }
580 
581  /// Get array of bits out of the decoder into bit block
582  /// (Converts inverted list into bits)
583  /// Returns number of words (bits) being read
584  unsigned get_arr_bit(bm::word_t* dst_block,
585  bool clear_target=true);
586 
587  /// Get current block type
588  unsigned get_block_type() const { return block_type_; }
589 
590  unsigned get_bit();
591 
592  void get_inv_arr(bm::word_t* block);
593 
594 protected:
595  get_bit_func_type bit_func_table_[bm::set_END];
596 
599  block_idx_type bv_size_;
601  unsigned id_cnt_; ///< Id counter for id list
602  bm::id_t last_id_; ///< Last id from the id list
603  gap_word_t glevels_[bm::gap_levels]; ///< GAP levels
604 
605  unsigned block_type_; ///< current block type
606  block_idx_type block_idx_; ///< current block index
607  block_idx_type mono_block_cnt_; ///< number of 0 or 1 blocks
608 
611 };
612 
613 /**
614  Deserializer, performs logical operations between bit-vector and
615  serialized bit-vector. This utility class potentially provides faster
616  and/or more memory efficient operation than more conventional deserialization
617  into memory bvector and then logical operation
618 
619  \ingroup bvserial
620 */
621 template<class BV>
623 {
624 public:
625  typedef BV bvector_type;
627 public:
628  /**
629  \brief Deserialize bvector using buffer as set operation argument
630 
631  \param bv - target bvector
632  \param buf - serialized buffer as a logical argument
633  \param temp_block - temporary block to avoid re-allocations
634  \param op - set algebra operation (default: OR)
635  \param exit_on_one - quick exit if set operation found some result
636 
637  \return bitcount
638  */
639  static
640  size_type deserialize(bvector_type& bv,
641  const unsigned char* buf,
642  bm::word_t* temp_block,
644  bool exit_on_one = false ///<! exit early if any one are found
645  );
646 private:
647  typedef
648  typename BV::blocks_manager_type blocks_manager_type;
649  typedef
651  typedef
653  typedef
656 
657 };
658 
659 
660 
661 
662 //----------------------------------------------------------------------------
663 //
664 //----------------------------------------------------------------------------
665 
666 
667 /// \internal
668 /// \ingroup bvserial
671  BM_HM_RESIZE = (1 << 1), ///< resized vector
672  BM_HM_ID_LIST = (1 << 2), ///< id list stored
673  BM_HM_NO_BO = (1 << 3), ///< no byte-order
674  BM_HM_NO_GAPL = (1 << 4), ///< no GAP levels
675  BM_HM_64_BIT = (1 << 5) ///< 64-bit vector
676 };
677 
678 const unsigned bie_cut_off = 16384; // binary interpolative encode size cut-off
679 
680 
681 // Serialization stream block type constants
682 //
683 
684 const unsigned char set_block_end = 0; //!< End of serialization
685 const unsigned char set_block_1zero = 1; //!< One all-zero block
686 const unsigned char set_block_1one = 2; //!< One block all-set (1111...)
687 const unsigned char set_block_8zero = 3; //!< Up to 256 zero blocks
688 const unsigned char set_block_8one = 4; //!< Up to 256 all-set blocks
689 const unsigned char set_block_16zero = 5; //!< Up to 65536 zero blocks
690 const unsigned char set_block_16one = 6; //!< UP to 65536 all-set blocks
691 const unsigned char set_block_32zero = 7; //!< Up to 4G zero blocks
692 const unsigned char set_block_32one = 8; //!< UP to 4G all-set blocks
693 const unsigned char set_block_azero = 9; //!< All other blocks zero
694 const unsigned char set_block_aone = 10; //!< All other blocks one
695 const unsigned char set_block_bit = 11; //!< Plain bit block
696 const unsigned char set_block_sgapbit = 12; //!< SGAP compressed bitblock
697 const unsigned char set_block_sgapgap = 13; //!< SGAP compressed GAP block
698 const unsigned char set_block_gap = 14; //!< Plain GAP block
699 const unsigned char set_block_gapbit = 15; //!< GAP compressed bitblock
700 const unsigned char set_block_arrbit = 16; //!< List of bits ON
701 const unsigned char set_block_bit_interval = 17; //!< Interval block
702 const unsigned char set_block_arrgap = 18; //!< List of bits ON (GAP block)
703 const unsigned char set_block_bit_1bit = 19; //!< Bit block with 1 bit ON
704 const unsigned char set_block_gap_egamma = 20; //!< Gamma compressed GAP block
705 const unsigned char set_block_arrgap_egamma = 21; //!< Gamma compressed delta GAP array
706 const unsigned char set_block_bit_0runs = 22; //!< Bit block with encoded zero intervals
707 const unsigned char set_block_arrgap_egamma_inv = 23; //!< Gamma compressed inverted delta GAP array
708 const unsigned char set_block_arrgap_inv = 24; //!< List of bits OFF (GAP block)
709 const unsigned char set_block_64zero = 25; //!< lots of zero blocks
710 const unsigned char set_block_64one = 26; //!< lots of all-set blocks
711 
712 const unsigned char set_block_gap_bienc = 27; //!< Interpolated GAP block
713 const unsigned char set_block_arrgap_bienc = 28; //!< Interpolated GAP array
714 const unsigned char set_block_arrgap_bienc_inv = 29; //!< Interpolated GAP array (inverted)
715 const unsigned char set_block_arrbit_inv = 30; //!< List of bits OFF
716 const unsigned char set_block_arr_bienc = 31; //!< Interpolated block as int array
717 const unsigned char set_block_arr_bienc_inv = 32; //!< Interpolated inverted block int array
718 const unsigned char set_block_bitgap_bienc = 33; //!< Interpolated bit-block as GAPs
719 const unsigned char set_block_bit_digest0 = 34; //!< H-compression with digest mask
720 
721 
722 
723 
724 template<class BV>
726  bm::word_t* temp_block)
727 : alloc_(alloc),
728  compression_stat_(0),
729  gap_serial_(false),
730  byte_order_serial_(true),
731  compression_level_(bm::set_compression_default)
732 {
733  bit_idx_arr_.resize(65536);
734  if (temp_block == 0)
735  {
736  temp_block_ = alloc_.alloc_bit_block();
737  own_temp_block_ = true;
738  }
739  else
740  {
741  temp_block_ = temp_block;
742  own_temp_block_ = false;
743  }
744  compression_stat_ = (size_type*) alloc_.alloc_bit_block();
745  optimize_ = free_ = false;
746 }
747 
748 template<class BV>
750 : alloc_(allocator_type()),
751  compression_stat_(0),
752  gap_serial_(false),
753  byte_order_serial_(true),
754  compression_level_(bm::set_compression_default)
755 {
756  bit_idx_arr_.resize(bm::gap_max_bits);
757  if (temp_block == 0)
758  {
759  temp_block_ = alloc_.alloc_bit_block();
760  own_temp_block_ = true;
761  }
762  else
763  {
764  temp_block_ = temp_block;
765  own_temp_block_ = false;
766  }
767  compression_stat_ = (size_type*) alloc_.alloc_bit_block();
768  optimize_ = free_ = false;
769 }
770 
771 template<class BV>
773 {
774  if (own_temp_block_)
775  alloc_.free_bit_block(temp_block_);
776  if (compression_stat_)
777  alloc_.free_bit_block((bm::word_t*)compression_stat_);
778 }
779 
780 
781 template<class BV>
783 {
784  for (unsigned i = 0; i < 256; ++i)
785  compression_stat_[i] = 0;
786 }
787 
788 
789 template<class BV>
791 {
792  if (clevel <= bm::set_compression_max)
793  compression_level_ = clevel;
794 }
795 
796 template<class BV>
798 {
799  gap_serial_ = value;
800 }
801 
802 template<class BV>
804 {
805  byte_order_serial_ = value;
806 }
807 
808 template<class BV>
810 {
811  const blocks_manager_type& bman = bv.get_blocks_manager();
812 
813  unsigned char header_flag = 0;
814  if (bv.size() == bm::id_max) // no dynamic resize
815  header_flag |= BM_HM_DEFAULT;
816  else
817  header_flag |= BM_HM_RESIZE;
818 
819  if (!byte_order_serial_)
820  header_flag |= BM_HM_NO_BO;
821 
822  if (!gap_serial_)
823  header_flag |= BM_HM_NO_GAPL;
824 
825  #ifdef BM64ADDR
826  header_flag |= BM_HM_64_BIT;
827  #endif
828 
829  enc.put_8(header_flag);
830 
831  if (byte_order_serial_)
832  {
834  enc.put_8((unsigned char)bo);
835  }
836  // keep GAP levels information
837  if (gap_serial_)
838  {
839  enc.put_16(bman.glen(), bm::gap_levels);
840  }
841 
842  // save size (only if bvector has been down-sized)
843  if (header_flag & BM_HM_RESIZE)
844  {
845  #ifdef BM64ADDR
846  enc.put_64(bv.size());
847  #else
848  enc.put_32(bv.size());
849  #endif
850  }
851 
852 }
853 
854 template<class BV>
856  const bm::gap_word_t* gap_block, bm::encoder& enc)
857 {
858  unsigned len = bm::gap_length(gap_block);
859  if (len > 3) // Use Elias Gamma encoding
860  {
861  encoder::position_type enc_pos0 = enc.get_pos();
862 
863  bm::gap_word_t min_v = gap_block[1];
864 
866  enc.put_16(gap_block[0]); // gap header word
867  enc.put_16(min_v); // first word
868 
869  bit_out_type bout(enc);
870  BM_ASSERT(gap_block[len-1] == 65535);
871  bout.bic_encode_u16(&gap_block[2], len-3, min_v, 65535);
872  bout.flush();
873 
874  // re-evaluate coding efficiency
875  //
876  encoder::position_type enc_pos1 = enc.get_pos();
877  unsigned gamma_size = (unsigned)(enc_pos1 - enc_pos0);
878  if (gamma_size > (len-1)*sizeof(gap_word_t))
879  {
880  enc.set_pos(enc_pos0);
881  }
882  else
883  {
884  compression_stat_[bm::set_block_gap_bienc]++;
885  return;
886  }
887  }
888  // save as plain GAP block
890  enc.put_16(gap_block, len-1);
891 
892  compression_stat_[bm::set_block_gap]++;
893 }
894 
895 
896 template<class BV>
898 {
899  unsigned len = gap_length(gap_block);
900  if (len > 3 && (compression_level_ > 3)) // Use Elias Gamma encoding
901  {
902  encoder::position_type enc_pos0 = enc.get_pos();
903  {
904  bit_out_type bout(enc);
905  gamma_encoder_func gamma(bout);
906 
908  enc.put_16(gap_block[0]);
909 
910  for_each_dgap(gap_block, gamma);
911  }
912  // re-evaluate coding efficiency
913  //
914  encoder::position_type enc_pos1 = enc.get_pos();
915  unsigned gamma_size = (unsigned)(enc_pos1 - enc_pos0);
916  if (gamma_size > (len-1)*sizeof(gap_word_t))
917  {
918  enc.set_pos(enc_pos0);
919  }
920  else
921  {
922  compression_stat_[bm::set_block_gap_egamma]++;
923  return;
924  }
925  }
926 
927  // save as plain GAP block
929  enc.put_16(gap_block, len-1);
930 
931  compression_stat_[bm::set_block_gap]++;
932 }
933 
934 template<class BV>
936  unsigned arr_len,
937  bm::encoder& enc,
938  bool inverted)
939 {
940  unsigned char scode = inverted ? bm::set_block_arrgap_egamma_inv
942  if (compression_level_ > 3 && arr_len > 1)
943  {
944  encoder::position_type enc_pos0 = enc.get_pos();
945  {
946  bit_out_type bout(enc);
947  enc.put_8(scode);
948  bout.gamma(arr_len);
949  gap_word_t prev = gap_array[0];
950  bout.gamma(prev + 1);
951 
952  for (unsigned i = 1; i < arr_len; ++i)
953  {
954  gap_word_t curr = gap_array[i];
955  bout.gamma(curr - prev);
956  prev = curr;
957  }
958  }
959  encoder::position_type enc_pos1 = enc.get_pos();
960  unsigned gamma_size = (unsigned)(enc_pos1 - enc_pos0);
961  unsigned plain_size = (unsigned)(sizeof(gap_word_t)+arr_len*sizeof(gap_word_t));
962  if (gamma_size >= plain_size)
963  {
964  enc.set_pos(enc_pos0); // rollback the bit stream
965  }
966  else
967  {
968  compression_stat_[scode]++;
969  return;
970  }
971  }
972  // save as a plain array
973  scode = inverted ? bm::set_block_arrgap_inv : bm::set_block_arrgap;
974  enc.put_prefixed_array_16(scode, gap_array, arr_len, true);
975  compression_stat_[scode]++;
976 }
977 
978 template<class BV>
980  unsigned arr_len,
981  bm::encoder& enc,
982  bool inverted)
983 {
984  BM_ASSERT(arr_len <= 65535);
985  unsigned char scode = inverted ? bm::set_block_arrgap_bienc_inv
987  if (arr_len > 4)
988  {
989  encoder::position_type enc_pos0 = enc.get_pos();
990  {
991  bit_out_type bout(enc);
992 
993  bm::gap_word_t min_v = gap_block[0];
994  bm::gap_word_t max_v = gap_block[arr_len-1];
995  BM_ASSERT(max_v > min_v);
996 
997  enc.put_8(scode);
998  enc.put_16(min_v);
999  enc.put_16(max_v);
1000 
1001  bout.gamma(arr_len-4);
1002  bout.bic_encode_u16(&gap_block[1], arr_len-2, min_v, max_v);
1003  bout.flush();
1004  }
1005  encoder::position_type enc_pos1 = enc.get_pos();
1006  unsigned enc_size = (unsigned)(enc_pos1 - enc_pos0);
1007  unsigned raw_size = (unsigned)(sizeof(gap_word_t)+arr_len*sizeof(gap_word_t));
1008  if (enc_size >= raw_size)
1009  {
1010  enc.set_pos(enc_pos0); // rollback the bit stream
1011  }
1012  else
1013  {
1014  compression_stat_[scode]++;
1015  return;
1016  }
1017  }
1018  // save as a plain array
1019  scode = inverted ? bm::set_block_arrgap_inv : bm::set_block_arrgap;
1020  enc.put_prefixed_array_16(scode, gap_block, arr_len, true);
1021  compression_stat_[scode]++;
1022 }
1023 
1024 template<class BV>
1025 void serializer<BV>::add_model(unsigned char mod, unsigned score)
1026 {
1027  BM_ASSERT(mod_size_ < 64); // too many models (memory corruption?)
1028  scores_[mod_size_] = score; models_[mod_size_] = mod;
1029  ++mod_size_;
1030 }
1031 
1032 template<class BV>
1034 {
1035  reset_models();
1036 
1037  // heuristics and hard-coded rules to determine
1038  // the best representation for bit-block
1039  //
1040  add_model(bm::set_block_bit, bm::gap_max_bits); // default model (bit-block)
1041 
1042  if (compression_level_ <= 1)
1043  return bm::set_block_bit;
1044 
1045  // check if it is a very sparse block with some areas of dense areas
1046  bit_model_0run_size_ = bm::bit_count_nonzero_size(block, bm::set_block_size);
1047  if (compression_level_ <= 5)
1048  add_model(bm::set_block_bit_0runs, bit_model_0run_size_ * 8);
1049 
1050  if (compression_level_ >= 2)
1051  {
1052  bm::id64_t d0 = digest0_ = bm::calc_block_digest0(block);
1053  if (!d0)
1054  {
1055  add_model(bm::set_block_azero, 0);
1056  return bm::set_block_azero;
1057  }
1058  unsigned d0_bc = word_bitcount64(d0);
1059  bit_model_d0_size_ = unsigned(8 + (32 * d0_bc * sizeof(bm::word_t)));
1060  if (d0 != ~0ull)
1061  add_model(bm::set_block_bit_digest0, bit_model_d0_size_ * 8);
1062 
1063  unsigned bc = bm::bit_block_count(block);
1064  BM_ASSERT(bc);
1065 
1066  if (bc == 1)
1067  {
1068  add_model(bm::set_block_bit_1bit, 16);
1069  return bm::set_block_bit_1bit;
1070  }
1071  unsigned inverted_bc = bm::gap_max_bits - bc;
1072  if (!inverted_bc)
1073  {
1074  add_model(bm::set_block_aone, 0);
1075  return bm::set_block_aone;
1076  }
1077 
1078  if (compression_level_ >= 3)
1079  {
1080  unsigned arr_size =
1081  unsigned(sizeof(gap_word_t) + (bc * sizeof(gap_word_t)));
1082  unsigned arr_size_inv =
1083  unsigned(sizeof(gap_word_t) + (inverted_bc * sizeof(gap_word_t)));
1084 
1085  add_model(bm::set_block_arrbit, arr_size*8);
1086  add_model(bm::set_block_arrbit_inv, arr_size_inv*8);
1087 
1088  if (compression_level_ >= 4)
1089  {
1090  const unsigned gamma_bits_per_int = 6;
1091  unsigned bit_gaps = bm::bit_block_calc_change(block);
1092 
1093  if (compression_level_ == 4)
1094  {
1095  if (bit_gaps > 3 && bit_gaps < bm::gap_max_buff_len)
1096  add_model(bm::set_block_gap_egamma,
1097  16 + (bit_gaps-1) * gamma_bits_per_int);
1098  if (bc < bit_gaps && bc < bm::gap_equiv_len)
1099  add_model(bm::set_block_arrgap_egamma,
1100  16 + bc * gamma_bits_per_int);
1101  if (inverted_bc > 3 && inverted_bc < bit_gaps && inverted_bc < bm::gap_equiv_len)
1103  16 + inverted_bc * gamma_bits_per_int);
1104  }
1105 
1106  if (compression_level_ >= 5)
1107  {
1108  const unsigned bie_bits_per_int = 4;
1109 
1110  if (bit_gaps > 3 && bit_gaps < bm::gap_max_buff_len)
1111  add_model(bm::set_block_gap_bienc,
1112  32 + (bit_gaps-1) * bie_bits_per_int);
1113  if (bc < bit_gaps && bc < bm::gap_equiv_len)
1114  add_model(bm::set_block_arrgap_bienc, 16*3 + bc*bie_bits_per_int);
1115  else
1116  if (inverted_bc < bit_gaps && inverted_bc < bm::gap_equiv_len)
1117  add_model(bm::set_block_arrgap_bienc_inv, 16*3 + inverted_bc*bie_bits_per_int);
1118  else
1119  if (bc >= bm::gap_equiv_len && bc < bie_cut_off)
1120  add_model(bm::set_block_arr_bienc, 16*3 + bc * bie_bits_per_int);
1121  else
1122  if (inverted_bc > 3 && inverted_bc >= bm::gap_equiv_len && inverted_bc < bie_cut_off)
1123  add_model(bm::set_block_arr_bienc_inv, 16*3 + inverted_bc * bie_bits_per_int);
1124 
1125  if (bit_gaps >= bm::gap_max_buff_len && bit_gaps < bie_cut_off)
1126  add_model(bm::set_block_bitgap_bienc, 16*4 + (bit_gaps-2) * bie_bits_per_int);
1127 
1128 
1129  } // level >= 5
1130 
1131  } // level >= 3
1132  } // level >= 3
1133  } // level >= 2
1134 
1135  // find the best representation based on computed approx.models
1136  //
1137  unsigned min_score = bm::gap_max_bits;
1138  unsigned char model = bm::set_block_bit;
1139  for (unsigned i = 0; i < mod_size_; ++i)
1140  {
1141  if (scores_[i] < min_score)
1142  {
1143  min_score = scores_[i];
1144  model = models_[i];
1145  }
1146  }
1147  return model;
1148 }
1149 
1150 template<class BV>
1151 unsigned char
1153 {
1154  // heuristics and hard-coded rules to determine
1155  // the best representation for d-GAP block
1156  //
1157  if (compression_level_ <= 2)
1158  return bm::set_block_gap;
1159  unsigned len = bm::gap_length(gap_block);
1160  unsigned bc = bm::gap_bit_count_unr(gap_block);
1161  if (bc == 1)
1162  return bm::set_block_bit_1bit;
1163  if (bc < len)
1164  {
1165  if (compression_level_ < 4)
1166  return bm::set_block_arrgap;
1167  if (compression_level_ == 4)
1170  }
1171  unsigned inverted_bc = bm::gap_max_bits - bc;
1172  if (inverted_bc < len)
1173  {
1174  if (compression_level_ < 4)
1175  return bm::set_block_arrgap_inv;
1176  if (compression_level_ == 4)
1179  }
1180  if (len < 6)
1181  {
1182  return bm::set_block_gap;
1183  }
1184 
1185  if (compression_level_ == 4)
1186  return bm::set_block_gap_egamma;
1187  return bm::set_block_gap_bienc;
1188 }
1189 
1190 
1191 
1192 
1193 template<class BV>
1195 {
1196  gap_word_t* gap_temp_block = (gap_word_t*) temp_block_;
1197 
1198  gap_word_t arr_len;
1199  bool invert = false;
1200 
1201  unsigned char enc_choice = find_gap_best_encoding(gap_block);
1202  switch (enc_choice)
1203  {
1204  case bm::set_block_gap:
1205  gamma_gap_block(gap_block, enc); // TODO: use plain encode (non-gamma)
1206  break;
1207 
1209  arr_len = gap_convert_to_arr(gap_temp_block,
1210  gap_block,
1211  bm::gap_equiv_len-10);
1212  BM_ASSERT(arr_len == 1);
1214  enc.put_16(gap_temp_block[0]);
1215  compression_stat_[bm::set_block_bit_1bit]++;
1216  break;
1219  invert = true;
1220  // fall through is intentional here
1221  case bm::set_block_arrgap:
1223  arr_len = gap_convert_to_arr(gap_temp_block,
1224  gap_block,
1225  bm::gap_equiv_len-10,
1226  invert);
1227  BM_ASSERT(arr_len);
1228  gamma_gap_array(gap_temp_block, arr_len, enc, invert);
1229  break;
1231  interpolated_encode_gap_block(gap_block, enc);
1232  break;
1234  invert = true;
1235  // fall through is intentional here
1237  arr_len = gap_convert_to_arr(gap_temp_block,
1238  gap_block,
1239  bm::gap_equiv_len-64,
1240  invert);
1241  BM_ASSERT(arr_len);
1242  interpolated_gap_array(gap_temp_block, arr_len, enc, invert);
1243  break;
1244  default:
1245  gamma_gap_block(gap_block, enc);
1246  } // switch
1247 }
1248 
1249 template<class BV>
1251  bm::encoder& enc,
1252  unsigned //size_control
1253  )
1254 {
1256  enc.put_8((blk[0]==0) ? 0 : 1); // encode start
1257 
1258  unsigned i, j;
1259  for (i = 0; i < bm::set_block_size; ++i)
1260  {
1261  if (blk[i] == 0)
1262  {
1263  // scan fwd to find 0 island length
1264  for (j = i+1; j < bm::set_block_size; ++j)
1265  {
1266  if (blk[j] != 0)
1267  break;
1268  }
1269  BM_ASSERT(j-i);
1270  enc.put_16((gap_word_t)(j-i));
1271  i = j - 1;
1272  }
1273  else
1274  {
1275  // scan fwd to find non-0 island length
1276  for (j = i+1; j < bm::set_block_size; ++j)
1277  {
1278  if (blk[j] == 0)
1279  {
1280  // look ahead to identify and ignore short 0-run
1281  if (((j+1 < bm::set_block_size) && blk[j+1]) ||
1282  ((j+2 < bm::set_block_size) && blk[j+2]))
1283  {
1284  ++j; // skip zero word
1285  continue;
1286  }
1287  break;
1288  }
1289  }
1290  BM_ASSERT(j-i);
1291  enc.put_16((gap_word_t)(j-i));
1292  enc.put_32(blk + i, j - i); // stream all bit-words now
1293 
1294  i = j - 1;
1295  }
1296  }
1297  compression_stat_[bm::set_block_bit_0runs]++;
1298 }
1299 
1300 
1301 template<class BV>
1303  bm::encoder& enc,
1304  bm::id64_t d0)
1305 {
1306  // evaluate a few "sure" models here and pick the best
1307  //
1308  if (d0 != ~0ull)
1309  {
1310  if (bit_model_0run_size_ < bit_model_d0_size_)
1311  {
1312  encode_bit_interval(block, enc, 0); // TODO: get rid of param 3 (0)
1313  return;
1314  }
1315 
1316  // encode using digest0 method
1317  //
1319  enc.put_64(d0);
1320 
1321  while (d0)
1322  {
1323  bm::id64_t t = bm::bmi_blsi_u64(d0); // d & -d;
1324 
1325  unsigned wave = bm::word_bitcount64(t - 1);
1326  unsigned off = wave * bm::set_block_digest_wave_size;
1327 
1328  unsigned j = 0;
1329  do
1330  {
1331  enc.put_32(block[off+j+0]);
1332  enc.put_32(block[off+j+1]);
1333  enc.put_32(block[off+j+2]);
1334  enc.put_32(block[off+j+3]);
1335  j += 4;
1336  } while (j < bm::set_block_digest_wave_size);
1337 
1338  d0 = bm::bmi_bslr_u64(d0); // d &= d - 1;
1339  } // while
1340 
1341  compression_stat_[bm::set_block_bit_digest0]++;
1342  }
1343  else
1344  {
1345  if (bit_model_0run_size_ < unsigned(bm::set_block_size*sizeof(bm::word_t)))
1346  {
1347  encode_bit_interval(block, enc, 0); // TODO: get rid of param 3 (0)
1348  return;
1349  }
1350 
1352  compression_stat_[bm::set_block_bit]++;
1353  }
1354 }
1355 
1356 
1357 
1358 template<class BV>
1359 void serializer<BV>::serialize(const BV& bv,
1360  typename serializer<BV>::buffer& buf,
1361  const statistics_type* bv_stat)
1362 {
1363  statistics_type stat;
1364  if (!bv_stat)
1365  {
1366  bv.calc_stat(&stat);
1367  bv_stat = &stat;
1368  }
1369 
1370  buf.resize(bv_stat->max_serialize_mem, false); // no-copy resize
1371  optimize_ = free_ = false;
1372 
1373  size_type slen = this->serialize(bv, buf.data(), buf.size());
1374  BM_ASSERT(slen <= buf.size()); // or we have a BIG problem with prediction
1375  BM_ASSERT(slen);
1376 
1377  buf.resize(slen);
1378 }
1379 
1380 template<class BV>
1382  typename serializer<BV>::buffer& buf)
1383 {
1384  statistics_type st;
1385  optimize_ = free_ = true; // set the destructive mode
1386 
1387  typename bvector_type::mem_pool_guard mp_g_z;
1388  mp_g_z.assign_if_not_set(pool_, bv);
1389 
1390  bv.optimize(temp_block_, BV::opt_compress, &st);
1391  serialize(bv, buf, &st);
1392 
1393  optimize_ = free_ = false; // restore the default mode
1394 }
1395 
1396 template<class BV>
1398  bm::encoder& enc,
1399  bool inverted)
1400 {
1401  unsigned arr_len;
1402  unsigned mask = inverted ? ~0u : 0u;
1403  // TODO: get rid of max bits
1404  arr_len = bit_convert_to_arr(bit_idx_arr_.data(),
1405  block,
1408  mask);
1409  if (arr_len)
1410  {
1411  unsigned char scode =
1413  enc.put_prefixed_array_16(scode, bit_idx_arr_.data(), arr_len, true);
1414  compression_stat_[scode]++;
1415  return;
1416  }
1417  encode_bit_digest(block, enc, digest0_);
1418 }
1419 
1420 template<class BV>
1422  bm::encoder& enc)
1423 {
1424  unsigned len = bm::bit_to_gap(bit_idx_arr_.data(), block, bm::gap_equiv_len);
1425  BM_ASSERT(len); (void)len;
1426  gamma_gap_block(bit_idx_arr_.data(), enc);
1427 }
1428 
1429 template<class BV>
1431  bm::encoder& enc, bool inverted)
1432 {
1433  unsigned mask = inverted ? ~0u : 0u;
1434  unsigned arr_len = bit_convert_to_arr(bit_idx_arr_.data(),
1435  block,
1438  mask);
1439  if (arr_len)
1440  {
1441  gamma_gap_array(bit_idx_arr_.data(), arr_len, enc, inverted);
1442  return;
1443  }
1445  compression_stat_[bm::set_block_bit]++;
1446 }
1447 
1448 template<class BV>
1450  bm::encoder& enc, bool inverted)
1451 {
1452  unsigned mask = inverted ? ~0u : 0u;
1453  unsigned arr_len = bit_convert_to_arr(bit_idx_arr_.data(),
1454  block,
1457  mask);
1458  if (arr_len)
1459  {
1460  interpolated_gap_array(bit_idx_arr_.data(), arr_len, enc, inverted);
1461  return;
1462  }
1463  encode_bit_digest(block, enc, digest0_);
1464 }
1465 
1466 template<class BV>
1468  bm::encoder& enc)
1469 {
1470  unsigned len = bm::bit_to_gap(bit_idx_arr_.data(), block, bm::gap_max_bits);
1471  BM_ASSERT(len); (void)len;
1472  interpolated_encode_gap_block(bit_idx_arr_.data(), enc);
1473 }
1474 
1475 template<class BV>
1477  bm::encoder& enc)
1478 {
1479  unsigned len = bm::bit_to_gap(bit_idx_arr_.data(), block, bm::gap_max_bits);
1480  BM_ASSERT(len); (void)len;
1481  BM_ASSERT(len <= bie_cut_off);
1482 
1483  const unsigned char scode = bm::set_block_bitgap_bienc;
1484 
1485  encoder::position_type enc_pos0 = enc.get_pos();
1486  {
1487  bit_out_type bout(enc);
1488 
1489  bm::gap_word_t head = (bit_idx_arr_[0] & 1); // isolate start flag
1490  bm::gap_word_t min_v = bit_idx_arr_[1];
1491 
1492  BM_ASSERT(bit_idx_arr_[len] == 65535);
1493  BM_ASSERT(bit_idx_arr_[len] > min_v);
1494 
1495  enc.put_8(scode);
1496 
1497  enc.put_8((unsigned char)head);
1498  enc.put_16(bm::gap_word_t(len));
1499  enc.put_16(min_v);
1500  bout.bic_encode_u16(&bit_idx_arr_[2], len-2, min_v, 65535);
1501  bout.flush();
1502  }
1503  encoder::position_type enc_pos1 = enc.get_pos();
1504  unsigned enc_size = (unsigned)(enc_pos1 - enc_pos0);
1505  unsigned raw_size = sizeof(word_t) * bm::set_block_size;
1506  if (enc_size >= raw_size)
1507  {
1508  enc.set_pos(enc_pos0); // rollback the bit stream
1509  }
1510  else
1511  {
1512  compression_stat_[scode]++;
1513  return;
1514  }
1515  encode_bit_digest(block, enc, digest0_);
1516 }
1517 
1518 template<class BV>
1520  bm::encoder& enc, bool inverted)
1521 {
1522  unsigned mask = inverted ? ~0u : 0u;
1523  unsigned arr_len = bit_convert_to_arr(bit_idx_arr_.data(),
1524  block,
1527  mask);
1528  if (arr_len)
1529  {
1530  unsigned char scode =
1532 
1533  encoder::position_type enc_pos0 = enc.get_pos();
1534  {
1535  bit_out_type bout(enc);
1536 
1537  bm::gap_word_t min_v = bit_idx_arr_[0];
1538  bm::gap_word_t max_v = bit_idx_arr_[arr_len-1];
1539  BM_ASSERT(max_v > min_v);
1540 
1541  enc.put_8(scode);
1542  enc.put_16(min_v);
1543  enc.put_16(max_v);
1544  enc.put_16(bm::gap_word_t(arr_len));
1545  bout.bic_encode_u16(&bit_idx_arr_[1], arr_len-2, min_v, max_v);
1546  bout.flush();
1547  }
1548  encoder::position_type enc_pos1 = enc.get_pos();
1549  unsigned enc_size = (unsigned)(enc_pos1 - enc_pos0);
1550  unsigned raw_size = sizeof(word_t) * bm::set_block_size;
1551  if (enc_size >= raw_size)
1552  {
1553  enc.set_pos(enc_pos0); // rollback the bit stream
1554  }
1555  else
1556  {
1557  if (digest0_ != ~0ull && enc_size > bit_model_d0_size_)
1558  {
1559  enc.set_pos(enc_pos0); // rollback the bit stream
1560  }
1561  else
1562  {
1563  compression_stat_[scode]++;
1564  return;
1565  }
1566  }
1567  }
1568  encode_bit_digest(block, enc, digest0_);
1569 }
1570 
1571 
1572 
1573 #define BM_SER_NEXT_GRP(enc, nb, B_1ZERO, B_8ZERO, B_16ZERO, B_32ZERO, B_64ZERO) \
1574  if (nb == 1u) \
1575  enc.put_8(B_1ZERO); \
1576  else if (nb < 256u) \
1577  { \
1578  enc.put_8(B_8ZERO); \
1579  enc.put_8((unsigned char)nb); \
1580  } \
1581  else if (nb < 65536u) \
1582  { \
1583  enc.put_8(B_16ZERO); \
1584  enc.put_16((unsigned short)nb); \
1585  } \
1586  else if (nb < bm::id_max32) \
1587  { \
1588  enc.put_8(B_32ZERO); \
1589  enc.put_32(unsigned(nb)); \
1590  } \
1591  else \
1592  {\
1593  enc.put_8(B_64ZERO); \
1594  enc.put_64(nb); \
1595  }
1596 
1597 #define BM_SET_ONE_BLOCKS(x) \
1598  {\
1599  block_idx_type end_block = i + x; \
1600  for (;i < end_block; ++i) \
1601  bman.set_block_all_set(i); \
1602  } \
1603  --i
1604 
1605 
1606 template<class BV>
1607 typename serializer<BV>::size_type
1609  unsigned char* buf, size_t buf_size)
1610 {
1611  BM_ASSERT(temp_block_);
1612 
1613  reset_compression_stats();
1614  const blocks_manager_type& bman = bv.get_blocks_manager();
1615 
1616  bm::encoder enc(buf, buf_size); // create the encoder
1617  encode_header(bv, enc);
1618 
1619  block_idx_type i, j;
1620  for (i = 0; i < bm::set_total_blocks; ++i)
1621  {
1622  unsigned i0, j0;
1623  bman.get_block_coord(i, i0, j0);
1624 
1625  const bm::word_t* blk = bman.get_block(i0, j0);
1626 
1627  // ----------------------------------------------------
1628  // Empty or ONE block serialization
1629  //
1630  // TODO: make a function to check this in ONE pass
1631  //
1632  bool flag;
1633  flag = bm::check_block_zero(blk, false/*shallow check*/);
1634  if (flag)
1635  {
1636  zero_block:
1637  flag = 1;
1638  block_idx_type next_nb = bman.find_next_nz_block(i+1, false);
1639  if (next_nb == bm::set_total_blocks) // no more blocks
1640  {
1641  enc.put_8(set_block_azero);
1642  return (size_type)enc.size();
1643  }
1644  block_idx_type nb = next_nb - i;
1645 
1646  if (nb > 1 && nb < 128)
1647  {
1648  // special (but frequent) case -- GAP delta fits in 7bits
1649  unsigned char c = (unsigned char)((1u << 7) | nb);
1650  enc.put_8(c);
1651  }
1652  else
1653  {
1654  BM_SER_NEXT_GRP(enc, nb, set_block_1zero,
1655  set_block_8zero,
1656  set_block_16zero,
1657  set_block_32zero,
1658  set_block_64zero)
1659  }
1660  i = next_nb - 1;
1661  continue;
1662  }
1663  else
1664  {
1665  flag = bm::check_block_one(blk, false); // shallow scan
1666  if (flag)
1667  {
1668  full_block:
1669  flag = 1;
1670  // Look ahead for similar blocks
1671  // TODO: optimize search for next 0xFFFF block
1672  for(j = i+1; j < bm::set_total_blocks; ++j)
1673  {
1674  bman.get_block_coord(j, i0, j0);
1675  const bm::word_t* blk_next = bman.get_block(i0, j0);
1676  if (flag != bm::check_block_one(blk_next, true)) // deep scan
1677  break;
1678  }
1679  if (j == bm::set_total_blocks)
1680  {
1681  enc.put_8(set_block_aone);
1682  break;
1683  }
1684  else
1685  {
1686  block_idx_type nb = j - i;
1687  BM_SER_NEXT_GRP(enc, nb, set_block_1one,
1688  set_block_8one,
1689  set_block_16one,
1690  set_block_32one,
1691  set_block_64one)
1692  }
1693  i = j - 1;
1694  continue;
1695  }
1696  }
1697 
1698  // ---------------------------------------------
1699  // GAP serialization
1700  //
1701  if (BM_IS_GAP(blk))
1702  {
1703  encode_gap_block(BMGAP_PTR(blk), enc);
1704  }
1705  else
1706  {
1707  // ----------------------------------------------
1708  // BIT BLOCK serialization
1709  //
1710  unsigned char model = find_bit_best_encoding(blk);
1711  switch (model)
1712  {
1713  case bm::set_block_bit:
1714  enc.put_prefixed_array_32(set_block_bit, blk, bm::set_block_size);
1715  break;
1717  {
1718  unsigned bit_idx = 0;
1719  bm::bit_block_find(blk, bit_idx, &bit_idx);
1720  BM_ASSERT(bit_idx < 65536);
1721  enc.put_8(bm::set_block_bit_1bit); enc.put_16(bm::short_t(bit_idx));
1722  compression_stat_[bm::set_block_bit_1bit]++;
1723  continue;
1724  }
1725  break;
1726  case bm::set_block_azero: // empty block all of the sudden ?
1727  goto zero_block;
1728  case bm::set_block_aone:
1729  goto full_block;
1730  case bm::set_block_arrbit:
1731  encode_bit_array(blk, enc, false);
1732  break;
1734  encode_bit_array(blk, enc, true);
1735  break;
1737  gamma_gap_bit_block(blk, enc);
1738  break;
1740  encode_bit_interval(blk, enc, 0); // TODO: get rid of param 3 (0)
1741  break;
1743  gamma_arr_bit_block(blk, enc, false);
1744  break;
1746  gamma_arr_bit_block(blk, enc, true);
1747  break;
1749  bienc_arr_bit_block(blk, enc, false);
1750  break;
1752  bienc_arr_bit_block(blk, enc, true);
1753  break;
1755  interpolated_arr_bit_block(blk, enc, false);
1756  break;
1758  interpolated_arr_bit_block(blk, enc, true); // inverted
1759  break;
1761  interpolated_gap_bit_block(blk, enc);
1762  break;
1764  bienc_gap_bit_block(blk, enc);
1765  break;
1767  encode_bit_digest(blk, enc, digest0_);
1768  break;
1769  default:
1770  BM_ASSERT(0); // predictor returned an unknown model
1771  enc.put_prefixed_array_32(set_block_bit, blk, bm::set_block_size);
1772  }
1773  } // bit-block processing
1774 
1775  // destructive serialization mode
1776  //
1777  if (free_)
1778  {
1779  // const cast is ok, because it is a requested mode
1780  const_cast<blocks_manager_type&>(bman).zero_block(i);
1781  }
1782 
1783  } // for i
1784  enc.put_8(set_block_end);
1785  return (size_type)enc.size();
1786 }
1787 
1788 
1789 
1790 /// Bit mask flags for serialization algorithm
1791 /// \ingroup bvserial
1793  BM_NO_BYTE_ORDER = 1, ///< save no byte-order info (save some space)
1794  BM_NO_GAP_LENGTH = (1 << 1) ///< save no GAP info (save some space)
1795 };
1796 
1797 /*!
1798  \brief Saves bitvector into memory.
1799 
1800  Function serializes content of the bitvector into memory.
1801  Serialization adaptively uses compression(variation of GAP encoding)
1802  when it is benefitial.
1803 
1804  \param bv - source bvecor
1805  \param buf - pointer on target memory area. No range checking in the
1806  function. It is responsibility of programmer to allocate sufficient
1807  amount of memory using information from calc_stat function.
1808 
1809  \param temp_block - pointer on temporary memory block. Cannot be 0;
1810  If you want to save memory across multiple bvectors
1811  allocate temporary block using allocate_tempblock and pass it to
1812  serialize.
1813  (Serialize does not deallocate temp_block.)
1814 
1815  \param serialization_flags
1816  Flags controlling serilization (bit-mask)
1817  (use OR-ed serialization flags)
1818 
1819  \ingroup bvserial
1820 
1821  \return Size of serialization block.
1822  \sa calc_stat, serialization_flags
1823 
1824 */
1825 /*!
1826  Serialization format:
1827  <pre>
1828 
1829  | HEADER | BLOCKS |
1830 
1831  Header structure:
1832  BYTE : Serialization header (bit mask of BM_HM_*)
1833  BYTE : Byte order ( 0 - Big Endian, 1 - Little Endian)
1834  INT16: Reserved (0)
1835  INT16: Reserved Flags (0)
1836 
1837  </pre>
1838 */
1839 template<class BV>
1840 size_t serialize(const BV& bv,
1841  unsigned char* buf,
1842  bm::word_t* temp_block = 0,
1843  unsigned serialization_flags = 0)
1844 {
1845  bm::serializer<BV> bv_serial(bv.get_allocator(), temp_block);
1846 
1848  bv_serial.byte_order_serialization(false);
1849 
1851  bv_serial.gap_length_serialization(false);
1852  else
1853  bv_serial.gap_length_serialization(true);
1854 
1855  return bv_serial.serialize(bv, buf, 0);
1856 }
1857 
1858 /*!
1859  @brief Saves bitvector into memory.
1860  Allocates temporary memory block for bvector.
1861 
1862  \param bv - source bvecor
1863  \param buf - pointer on target memory area. No range checking in the
1864  function. It is responsibility of programmer to allocate sufficient
1865  amount of memory using information from calc_stat function.
1866 
1867  \param serialization_flags
1868  Flags controlling serilization (bit-mask)
1869  (use OR-ed serialization flags)
1870 
1871  \ingroup bvserial
1872 */
1873 template<class BV>
1874 size_t serialize(BV& bv,
1875  unsigned char* buf,
1876  unsigned serialization_flags=0)
1877 {
1878  return bm::serialize(bv, buf, 0, serialization_flags);
1879 }
1880 
1881 
1882 
1883 /*!
1884  @brief Bitvector deserialization from memory.
1885 
1886  @param bv - target bvector
1887  @param buf - pointer on memory which keeps serialized bvector
1888  @param temp_block - pointer on temporary block,
1889  if NULL bvector allocates own.
1890  @return Number of bytes consumed by deserializer.
1891 
1892  Function deserializes bitvector from memory block containig results
1893  of previous serialization. Function does not remove bits
1894  which are currently set. Effectively it means OR logical operation
1895  between current bitset and previously serialized one.
1896 
1897  @ingroup bvserial
1898 */
1899 template<class BV>
1900 size_t deserialize(BV& bv,
1901  const unsigned char* buf,
1902  bm::word_t* temp_block=0)
1903 {
1904  ByteOrder bo_current = globals<true>::byte_order();
1905 
1906  bm::decoder dec(buf);
1907  unsigned char header_flag = dec.get_8();
1908  ByteOrder bo = bo_current;
1909  if (!(header_flag & BM_HM_NO_BO))
1910  {
1911  bo = (bm::ByteOrder) dec.get_8();
1912  }
1913 
1914  if (bo_current == bo)
1915  {
1917  return deserial.deserialize(bv, buf, temp_block);
1918  }
1919  switch (bo_current)
1920  {
1921  case BigEndian:
1922  {
1924  return deserial.deserialize(bv, buf, temp_block);
1925  }
1926  case LittleEndian:
1927  {
1929  return deserial.deserialize(bv, buf, temp_block);
1930  }
1931  default:
1932  BM_ASSERT(0);
1933  };
1934  return 0;
1935 }
1936 
1937 template<class DEC>
1939  unsigned block_type,
1940  bm::gap_word_t* dst_arr)
1941 {
1942  typedef bit_in<DEC> bit_in_type;
1943 
1944  gap_word_t len = 0;
1945 
1946  switch (block_type)
1947  {
1948  case set_block_bit_1bit:
1949  dst_arr[0] = decoder.get_16();
1950  len = 1;
1951  break;
1952  case set_block_arrgap:
1953  case set_block_arrgap_inv:
1954  len = decoder.get_16();
1955  decoder.get_16(dst_arr, len);
1956  break;
1959  {
1960  bit_in_type bin(decoder);
1961  len = (gap_word_t)bin.gamma();
1962  gap_word_t prev = 0;
1963  for (gap_word_t k = 0; k < len; ++k)
1964  {
1965  gap_word_t bit_idx = (gap_word_t)bin.gamma();
1966  if (k == 0) --bit_idx; // TODO: optimization
1967  bit_idx = (gap_word_t)(bit_idx + prev);
1968  prev = bit_idx;
1969  dst_arr[k] = bit_idx;
1970  } // for
1971  }
1972  break;
1975  {
1976  bm::gap_word_t min_v = decoder.get_16();
1977  bm::gap_word_t max_v = decoder.get_16();
1978 
1979  bit_in_type bin(decoder);
1980  len = bm::gap_word_t(bin.gamma() + 4);
1981  dst_arr[0] = min_v;
1982  dst_arr[len-1] = max_v;
1983  bin.bic_decode_u16(&dst_arr[1], len-2, min_v, max_v);
1984  }
1985  break;
1986  default:
1987  BM_ASSERT(0);
1988  #ifndef BM_NO_STL
1989  throw std::logic_error(err_msg());
1990  #else
1991  BM_THROW(BM_ERR_SERIALFORMAT);
1992  #endif
1993  }
1994  return len;
1995 }
1996 
1997 template<class DEC>
1999 {
2000  BM_ASSERT(!BM_IS_GAP(blk));
2001 
2002  typedef bit_in<DEC> bit_in_type;
2003  bm::gap_word_t min_v = dec.get_16();
2004  bm::gap_word_t max_v = dec.get_16();
2005  unsigned arr_len = dec.get_16();
2006 
2007  bit_in_type bin(dec);
2008 
2009  if (!IS_VALID_ADDR(blk))
2010  {
2011  bin.bic_decode_u16_dry(arr_len-2, min_v, max_v);
2012  return;
2013  }
2014  bm::set_bit(blk, min_v);
2015  bm::set_bit(blk, max_v);
2016  bin.bic_decode_u16_bitset(blk, arr_len-2, min_v, max_v);
2017 }
2018 
2019 template<class DEC>
2021 {
2022  // TODO: optimization
2023  bm::bit_block_set(blk, 0);
2024  this->read_bic_arr(decoder, blk);
2025  bm::bit_invert(blk);
2026 }
2027 
2028 template<class DEC>
2030 {
2031  BM_ASSERT(!BM_IS_GAP(blk));
2032 
2033  typedef bit_in<DEC> bit_in_type;
2034 
2035  bm::gap_word_t head = dec.get_8();
2036  unsigned arr_len = dec.get_16();
2037  bm::gap_word_t min_v = dec.get_16();
2038 
2039  BM_ASSERT(arr_len <= bie_cut_off);
2040 
2041 
2042  id_array_[0] = head;
2043  id_array_[1] = min_v;
2044  id_array_[arr_len] = 65535;
2045 
2046  bit_in_type bin(dec);
2047  bin.bic_decode_u16(&id_array_[2], arr_len-2, min_v, 65535);
2048 
2049  if (!IS_VALID_ADDR(blk))
2050  {
2051  return;
2052  }
2053  bm::gap_add_to_bitset(blk, id_array_, arr_len);
2054 }
2055 
2056 template<class DEC>
2058  bm::word_t* block)
2059 {
2060  bm::id64_t d0 = dec.get_64();
2061  while (d0)
2062  {
2063  bm::id64_t t = bm::bmi_blsi_u64(d0); // d & -d;
2064 
2065  unsigned wave = bm::word_bitcount64(t - 1);
2066  unsigned off = wave * bm::set_block_digest_wave_size;
2067  unsigned j = 0;
2068  if (!IS_VALID_ADDR(block))
2069  {
2070  do
2071  {
2072  dec.get_32();
2073  dec.get_32();
2074  dec.get_32();
2075  dec.get_32();
2076  j += 4;
2077  } while (j < bm::set_block_digest_wave_size);
2078  }
2079  else
2080  {
2081  do
2082  {
2083  block[off+j+0] |= dec.get_32();
2084  block[off+j+1] |= dec.get_32();
2085  block[off+j+2] |= dec.get_32();
2086  block[off+j+3] |= dec.get_32();
2087  j += 4;
2088  } while (j < bm::set_block_digest_wave_size);
2089  }
2090 
2091  d0 = bm::bmi_bslr_u64(d0); // d &= d - 1;
2092  } // while
2093 }
2094 
2095 template<class DEC>
2097 {
2098  //TODO: optimization if block exists and it is OR-ed read
2099  bm::bit_block_set(blk, 0);
2100 
2101  unsigned char run_type = dec.get_8();
2102  for (unsigned j = 0; j < bm::set_block_size; run_type = !run_type)
2103  {
2104  unsigned run_length = dec.get_16();
2105  if (run_type)
2106  {
2107  unsigned run_end = j + run_length;
2108  BM_ASSERT(run_end <= bm::set_block_size);
2109  for (;j < run_end; ++j)
2110  {
2111  unsigned w = dec.get_32();
2112  blk[j] = w;
2113  }
2114  }
2115  else
2116  {
2117  j += run_length;
2118  }
2119  } // for j
2120 }
2121 
2122 
2123 template<class DEC>
2125  unsigned block_type,
2126  bm::gap_word_t* dst_block,
2127  bm::gap_word_t& gap_head)
2128 {
2129  typedef bit_in<DEC> bit_in_type;
2130 
2131  switch (block_type)
2132  {
2133  case set_block_gap:
2134  {
2135  unsigned len = gap_length(&gap_head);
2136  --len;
2137  *dst_block = gap_head;
2138  decoder.get_16(dst_block+1, len - 1);
2139  dst_block[len] = gap_max_bits - 1;
2140  }
2141  break;
2142  case set_block_bit_1bit:
2143  {
2144  gap_set_all(dst_block, bm::gap_max_bits, 0);
2145  gap_word_t bit_idx = decoder.get_16();
2146  gap_add_value(dst_block, bit_idx);
2147  }
2148  break;
2149  case set_block_arrgap:
2150  case set_block_arrgap_inv:
2151  {
2152  gap_set_all(dst_block, bm::gap_max_bits, 0);
2153  gap_word_t len = decoder.get_16();
2154  for (gap_word_t k = 0; k < len; ++k)
2155  {
2156  gap_word_t bit_idx = decoder.get_16();
2157  gap_add_value(dst_block, bit_idx);
2158  } // for
2159  }
2160  break;
2165  {
2166  unsigned arr_len = read_id_list(decoder, block_type, id_array_);
2167  dst_block[0] = 0;
2168  unsigned gap_len =
2169  gap_set_array(dst_block, id_array_, arr_len);
2170  BM_ASSERT(gap_len == bm::gap_length(dst_block));
2171  (void)(gap_len);
2172  }
2173  break;
2174  case set_block_gap_egamma:
2175  {
2176  unsigned len = (gap_head >> 3);
2177  --len;
2178  // read gamma GAP block into a dest block
2179  {
2180  *dst_block = gap_head;
2181  gap_word_t* gap_data_ptr = dst_block + 1;
2182 
2183  bit_in_type bin(decoder);
2184  {
2185  gap_word_t v = (gap_word_t)bin.gamma();
2186  gap_word_t gap_sum = *gap_data_ptr = (gap_word_t)(v - 1);
2187  for (unsigned i = 1; i < len; ++i)
2188  {
2189  v = (gap_word_t)bin.gamma();
2190  gap_sum = (gap_word_t)(gap_sum + v);
2191  *(++gap_data_ptr) = gap_sum;
2192  }
2193  dst_block[len+1] = bm::gap_max_bits - 1;
2194  }
2195  }
2196 
2197  }
2198  break;
2199  case set_block_gap_bienc:
2200  {
2201  unsigned len = (gap_head >> 3);
2202  *dst_block = gap_head;
2203  bm::gap_word_t min_v = decoder.get_16();
2204  dst_block[1] = min_v;
2205  bit_in_type bin(decoder);
2206  bin.bic_decode_u16(&dst_block[2], len-2, min_v, 65535);
2207  dst_block[len] = bm::gap_max_bits - 1;
2208  }
2209  break;
2210  default:
2211  BM_ASSERT(0);
2212  #ifndef BM_NO_STL
2213  throw std::logic_error(err_msg());
2214  #else
2215  BM_THROW(BM_ERR_SERIALFORMAT);
2216  #endif
2217  }
2218 
2219  if (block_type == set_block_arrgap_egamma_inv ||
2220  block_type == set_block_arrgap_inv ||
2221  block_type == set_block_arrgap_bienc_inv)
2222  {
2223  gap_invert(dst_block);
2224  }
2225 }
2226 
2227 // -------------------------------------------------------------------------
2228 
2229 template<class BV, class DEC>
2231 {
2232  temp_block_ = alloc_.alloc_bit_block();
2233  bit_idx_arr_.resize(bm::gap_max_bits);
2234  this->id_array_ = bit_idx_arr_.data();
2235  gap_temp_block_.resize(bm::gap_max_bits);
2236 }
2237 
2238 template<class BV, class DEC>
2240 {
2241  alloc_.free_bit_block(temp_block_);
2242 }
2243 
2244 
2245 template<class BV, class DEC>
2246 void
2248  bvector_type& bv, blocks_manager_type& bman,
2249  block_idx_type nb,
2250  bm::word_t* blk)
2251 {
2252  gap_word_t gap_head;
2253  bm::gap_word_t* gap_temp_block = gap_temp_block_.data();
2254 
2255  switch (btype)
2256  {
2257  case set_block_gap:
2258  case set_block_gapbit:
2259  {
2260  gap_head = (gap_word_t)
2261  (sizeof(gap_word_t) == 2 ? dec.get_16() : dec.get_32());
2262 
2263  unsigned len = gap_length(&gap_head);
2264  int level = gap_calc_level(len, bman.glen());
2265  --len;
2266  if (level == -1) // Too big to be GAP: convert to BIT block
2267  {
2268  *gap_temp_block = gap_head;
2269  dec.get_16(gap_temp_block+1, len - 1);
2270  gap_temp_block[len] = gap_max_bits - 1;
2271 
2272  if (blk == 0) // block does not exist yet
2273  {
2274  blk = bman.get_allocator().alloc_bit_block();
2275  bman.set_block(nb, blk);
2276  gap_convert_to_bitset(blk, gap_temp_block);
2277  }
2278  else // We have some data already here. Apply OR operation.
2279  {
2280  gap_convert_to_bitset(temp_block_,
2281  gap_temp_block);
2282  bv.combine_operation_with_block(nb,
2283  temp_block_,
2284  0,
2285  BM_OR);
2286  }
2287  return;
2288  } // level == -1
2289 
2290  set_gap_level(&gap_head, level);
2291 
2292  if (blk == 0)
2293  {
2294  BM_ASSERT(level >= 0);
2295  gap_word_t* gap_blk =
2296  bman.get_allocator().alloc_gap_block(unsigned(level), bman.glen());
2297  gap_word_t* gap_blk_ptr = BMGAP_PTR(gap_blk);
2298  *gap_blk_ptr = gap_head;
2299  bm::set_gap_level(gap_blk_ptr, level);
2300  blk = bman.set_block(nb, (bm::word_t*)BMPTR_SETBIT0(gap_blk));
2301  BM_ASSERT(blk == 0);
2302 
2303  dec.get_16(gap_blk + 1, len - 1);
2304  gap_blk[len] = bm::gap_max_bits - 1;
2305  }
2306  else // target block exists
2307  {
2308  // read GAP block into a temp memory and perform OR
2309  *gap_temp_block = gap_head;
2310  dec.get_16(gap_temp_block + 1, len - 1);
2311  gap_temp_block[len] = bm::gap_max_bits - 1;
2312  break;
2313  }
2314  return;
2315  }
2316  case set_block_arrgap:
2319  {
2320  unsigned arr_len = this->read_id_list(dec, btype, this->id_array_);
2321  gap_temp_block[0] = 0; // reset unused bits in gap header
2322  unsigned gap_len =
2323  gap_set_array(gap_temp_block, this->id_array_, arr_len);
2324 
2325  BM_ASSERT(gap_len == bm::gap_length(gap_temp_block));
2326  int level = gap_calc_level(gap_len, bman.glen());
2327  if (level == -1) // Too big to be GAP: convert to BIT block
2328  {
2329  gap_convert_to_bitset(temp_block_, gap_temp_block);
2330  bv.combine_operation_with_block(nb,
2331  temp_block_,
2332  0,
2333  BM_OR);
2334  return;
2335  }
2336 
2337  break;
2338  }
2339  case set_block_gap_egamma:
2340  gap_head = dec.get_16();
2342  case set_block_arrgap_inv:
2344  this->read_gap_block(dec, btype, gap_temp_block, gap_head);
2345  break;
2347  gap_head = dec.get_16();
2348  this->read_gap_block(dec, btype, gap_temp_block, gap_head);
2349  break;
2350  default:
2351  BM_ASSERT(0);
2352  #ifndef BM_NO_STL
2353  throw std::logic_error(this->err_msg());
2354  #else
2355  BM_THROW(BM_ERR_SERIALFORMAT);
2356  #endif
2357  }
2358 
2359  bv.combine_operation_with_block(nb,
2360  (bm::word_t*)gap_temp_block,
2361  1,
2362  BM_OR);
2363 }
2364 
2365 template<class BV, class DEC>
2367  decoder_type& dec,
2368  blocks_manager_type& bman,
2369  block_idx_type nb,
2370  bm::word_t* blk)
2371 {
2372  if (!blk)
2373  {
2374  blk = bman.get_allocator().alloc_bit_block();
2375  bman.set_block(nb, blk);
2376  bm::bit_block_set(blk, 0);
2377  }
2378  else
2379  if (BM_IS_GAP(blk))
2380  blk = bman.deoptimize_block(nb);
2381 
2382  BM_ASSERT(blk != temp_block_);
2383 
2384  switch (btype)
2385  {
2386  case set_block_arrbit_inv:
2387  if (IS_FULL_BLOCK(blk))
2388  blk = bman.deoptimize_block(nb);
2389  bm::bit_block_set(temp_block_, ~0u);
2390  {
2391  gap_word_t len = dec.get_16();
2392  for (unsigned k = 0; k < len; ++k)
2393  {
2394  gap_word_t bit_idx = dec.get_16();
2395  bm::clear_bit(temp_block_, bit_idx);
2396  } // for
2397  }
2398  bm::bit_block_or(blk, temp_block_);
2399  break;
2401  this->read_bic_arr(dec, blk);
2402  break;
2404  BM_ASSERT(blk != temp_block_);
2405  if (IS_FULL_BLOCK(blk))
2406  blk = bman.deoptimize_block(nb);
2407  // TODO: optimization
2408  bm::bit_block_set(temp_block_, 0);
2409  this->read_bic_arr(dec, temp_block_);
2410  bm::bit_invert(temp_block_);
2411  bm::bit_block_or(blk, temp_block_);
2412  break;
2414  this->read_bic_gap(dec, blk);
2415  break;
2417  this->read_digest0_block(dec, blk);
2418  break;
2419  default:
2420  BM_ASSERT(0);
2421  #ifndef BM_NO_STL
2422  throw std::logic_error(this->err_msg());
2423  #else
2424  BM_THROW(BM_ERR_SERIALFORMAT);
2425  #endif
2426  } // switch
2427 }
2428 
2429 
2430 
2431 
2432 template<class BV, class DEC>
2434  const unsigned char* buf,
2435  bm::word_t* /*temp_block*/)
2436 {
2437  blocks_manager_type& bman = bv.get_blocks_manager();
2438  if (!bman.is_init())
2439  bman.init_tree();
2440 
2441  bm::word_t* temp_block = temp_block_;
2442 
2443  bm::strategy strat = bv.get_new_blocks_strat();
2444  bv.set_new_blocks_strat(BM_GAP);
2445 
2446  decoder_type dec(buf);
2447 
2448  // Reading th serialization header
2449  //
2450  unsigned char header_flag = dec.get_8();
2451  if (!(header_flag & BM_HM_NO_BO))
2452  {
2453  /*ByteOrder bo = (bm::ByteOrder)*/dec.get_8();
2454  }
2455  if (header_flag & BM_HM_64_BIT)
2456  {
2457  #ifndef BM64ADDR
2458  BM_ASSERT(0); // 64-bit address vector cannot read on 32
2459  #ifndef BM_NO_STL
2460  throw std::logic_error(this->err_msg());
2461  #else
2462  BM_THROW(BM_ERR_SERIALFORMAT);
2463  #endif
2464  #endif
2465  }
2466 
2467  if (header_flag & BM_HM_ID_LIST)
2468  {
2469  // special case: the next comes plain list of integers
2470  if (header_flag & BM_HM_RESIZE)
2471  {
2472  block_idx_type bv_size;
2473  if (header_flag & BM_HM_64_BIT)
2474  {
2475  BM_ASSERT(sizeof(block_idx_type)==8);
2476  bv_size = (block_idx_type)dec.get_64();
2477  }
2478  else
2479  bv_size = dec.get_32();
2480  if (bv_size > bv.size())
2481  bv.resize(bv_size);
2482  }
2483  for (unsigned cnt = dec.get_32(); cnt; --cnt)
2484  {
2485  bm::id_t idx = dec.get_32();
2486  bv.set(idx);
2487  } // for
2488  // -1 for compatibility with other deserialization branches
2489  return dec.size()-1;
2490  }
2491 
2492  block_idx_type i;
2493 
2494  if (!(header_flag & BM_HM_NO_GAPL))
2495  {
2496  //gap_word_t glevels[bm::gap_levels];
2497  // read GAP levels information
2498  for (unsigned k = 0; k < bm::gap_levels; ++k)
2499  {
2500  /*glevels[i] =*/ dec.get_16();
2501  }
2502  }
2503  if (header_flag & BM_HM_RESIZE)
2504  {
2505  block_idx_type bv_size;
2506  if (header_flag & BM_HM_64_BIT)
2507  {
2508  // 64-bit vector cannot be deserialized into 32-bit
2509  BM_ASSERT(sizeof(block_idx_type)==8);
2510  #ifndef BM64ADDR
2511  #ifndef BM_NO_STL
2512  throw std::logic_error(this->err_msg());
2513  #else
2514  BM_THROW(BM_ERR_SERIALFORMAT);
2515  #endif
2516  #endif
2517  bv_size = (block_idx_type)dec.get_64();
2518  }
2519  else
2520  bv_size = dec.get_32();
2521  if (bv_size > bv.size())
2522  bv.resize(bv_size);
2523  }
2524 
2525  unsigned char btype;
2526  block_idx_type nb;
2527  unsigned i0, j0;
2528 
2529  for (i = 0; i < bm::set_total_blocks; ++i)
2530  {
2531  btype = dec.get_8();
2532 
2533  bman.get_block_coord(i, i0, j0);
2534  bm::word_t* blk = bman.get_block_ptr(i0, j0);
2535  // pre-check if we have short zero-run packaging here
2536  //
2537  if (btype & (1 << 7))
2538  {
2539  nb = btype & ~(1 << 7);
2540  i += nb-1;
2541  continue;
2542  }
2543 
2544  switch (btype)
2545  {
2546  case set_block_azero:
2547  case set_block_end:
2549  break;
2550  case set_block_1zero:
2551  continue;
2552  case set_block_8zero:
2553  nb = dec.get_8();
2554  i += nb-1;
2555  continue;
2556  case set_block_16zero:
2557  nb = dec.get_16();
2558  i += nb-1;
2559  continue;
2560  case set_block_32zero:
2561  nb = dec.get_32();
2562  i += nb-1;
2563  continue;
2564  case set_block_64zero:
2565  #ifdef BM64ADDR
2566  nb = dec.get_64();
2567  i += nb-1;
2568  #else
2569  BM_ASSERT(0); // attempt to read 64-bit vector in 32-bit mode
2570  #ifndef BM_NO_STL
2571  throw std::logic_error(this->err_msg());
2572  #else
2573  BM_THROW(BM_ERR_SERIALFORMAT);
2574  #endif
2576  #endif
2577  continue;
2578  case set_block_aone:
2579  bman.set_all_set(i, bm::set_total_blocks-1);
2581  break;
2582  case set_block_1one:
2583  bman.set_block_all_set(i);
2584  continue;
2585  case set_block_8one:
2586  BM_SET_ONE_BLOCKS(dec.get_8());
2587  continue;
2588  case set_block_16one:
2589  BM_SET_ONE_BLOCKS(dec.get_16());
2590  continue;
2591  case set_block_32one:
2592  BM_SET_ONE_BLOCKS(dec.get_32());
2593  continue;
2594  case set_block_64one:
2595  #ifdef BM64ADDR
2596  BM_SET_ONE_BLOCKS(dec.get_64());
2597  #else
2598  BM_ASSERT(0); // 32-bit vector cannot read 64-bit
2599  #ifndef BM_NO_STL
2600  throw std::logic_error(this->err_msg());
2601  #else
2602  BM_THROW(BM_ERR_SERIALFORMAT);
2603  #endif
2604  dec.get_64();
2605  #endif
2606  continue;
2607  case set_block_bit:
2608  {
2609  if (blk == 0)
2610  {
2611  blk = bman.get_allocator().alloc_bit_block();
2612  bman.set_block(i, blk);
2613  dec.get_32(blk, bm::set_block_size);
2614  continue;
2615  }
2616 
2617  dec.get_32(temp_block_, bm::set_block_size);
2618  bv.combine_operation_with_block(i,
2619  temp_block,
2620  0, BM_OR);
2621 
2622  continue;
2623  }
2624  case set_block_bit_1bit:
2625  {
2626  size_type bit_idx = dec.get_16();
2627  bit_idx += i * bm::bits_in_block;
2628  bv.set_bit_no_check(bit_idx);
2629  continue;
2630  }
2631  case set_block_bit_0runs:
2632  {
2633  //TODO: optimization if block exists
2634  this->read_0runs_block(dec, temp_block);
2635  bv.combine_operation_with_block(i,
2636  temp_block,
2637  0, BM_OR);
2638  continue;
2639  }
2640  case set_block_bit_interval:
2641  {
2642  unsigned head_idx, tail_idx;
2643  head_idx = dec.get_16();
2644  tail_idx = dec.get_16();
2645 
2646  if (blk == 0)
2647  {
2648  blk = bman.get_allocator().alloc_bit_block();
2649  bman.set_block(i, blk);
2650  for (unsigned k = 0; k < head_idx; ++k)
2651  {
2652  blk[k] = 0;
2653  }
2654  dec.get_32(blk + head_idx, tail_idx - head_idx + 1);
2655  for (unsigned j = tail_idx + 1; j < bm::set_block_size; ++j)
2656  {
2657  blk[j] = 0;
2658  }
2659  continue;
2660  }
2661  bm::bit_block_set(temp_block, 0);
2662  dec.get_32(temp_block + head_idx, tail_idx - head_idx + 1);
2663 
2664  bv.combine_operation_with_block(i,
2665  temp_block,
2666  0, BM_OR);
2667  continue;
2668  }
2669  case set_block_gap:
2670  case set_block_gapbit:
2671  case set_block_arrgap:
2672  case set_block_gap_egamma:
2675  case set_block_arrgap_inv:
2676  case set_block_gap_bienc:
2679  deserialize_gap(btype, dec, bv, bman, i, blk);
2680  continue;
2681  case set_block_arrbit:
2682  {
2683  gap_word_t len = dec.get_16();
2684  if (BM_IS_GAP(blk))
2685  {
2686  // convert from GAP cause generic bitblock is faster
2687  blk = bman.deoptimize_block(i);
2688  }
2689  else
2690  {
2691  if (!blk) // block does not exists yet
2692  {
2693  blk = bman.get_allocator().alloc_bit_block();
2694  bman.set_block(i, blk);
2695  bm::bit_block_set(blk, 0);
2696  }
2697  else
2698  if (IS_FULL_BLOCK(blk)) // nothing to do
2699  {
2700  for (unsigned k = 0; k < len; ++k) // dry read
2701  {
2702  dec.get_16();
2703  }
2704  continue;
2705  }
2706  }
2707 
2708  // Get the array one by one and set the bits.
2709  for (unsigned k = 0; k < len; ++k)
2710  {
2711  gap_word_t bit_idx = dec.get_16();
2712  bm::set_bit(blk, bit_idx);
2713  }
2714  continue;
2715  }
2720  decode_bit_block(btype, dec, bman, i, blk);
2721  continue;
2723  decode_bit_block(btype, dec, bman, i, blk);
2724  continue;
2725  default:
2726  BM_ASSERT(0); // unknown block type
2727  #ifndef BM_NO_STL
2728  throw std::logic_error(this->err_msg());
2729  #else
2730  BM_THROW(BM_ERR_SERIALFORMAT);
2731  #endif
2732  } // switch
2733  } // for i
2734 
2735  bv.set_new_blocks_strat(strat);
2736 
2737  return dec.size();
2738 }
2739 
2740 // ---------------------------------------------------------------------------
2741 
2742 template<class DEC>
2744  : decoder_(buf),
2745  end_of_stream_(false),
2746  bv_size_(0),
2747  state_(e_unknown),
2748  id_cnt_(0),
2749  block_idx_(0),
2750  mono_block_cnt_(0),
2751  block_idx_arr_(0)
2752 {
2753  ::memset(bit_func_table_, 0, sizeof(bit_func_table_));
2754 
2757 
2782 
2783 
2784  // reading stream header
2785  unsigned char header_flag = decoder_.get_8();
2786  if (!(header_flag & BM_HM_NO_BO))
2787  {
2788  /*ByteOrder bo = (bm::ByteOrder)*/decoder_.get_8();
2789  }
2790 
2791  // check if bitvector comes as an inverted, sorted list of ints
2792  //
2793  if (header_flag & BM_HM_ID_LIST)
2794  {
2795  // special case: the next comes plain list of unsigned integers
2796  if (header_flag & BM_HM_RESIZE)
2797  {
2798  if (header_flag & BM_HM_64_BIT)
2799  {
2800  BM_ASSERT(sizeof(block_idx_type)==8);
2801  bv_size_ = (block_idx_type)decoder_.get_64();
2802  }
2803  else
2804  bv_size_ = decoder_.get_32();
2805  }
2806 
2807  state_ = e_list_ids;
2808  id_cnt_ = decoder_.get_32();
2809  next(); // read first id
2810  }
2811  else
2812  {
2813  if (!(header_flag & BM_HM_NO_GAPL))
2814  {
2815  for (unsigned i = 0; i < bm::gap_levels; ++i) // load the GAP levels
2816  glevels_[i] = decoder_.get_16();
2817  }
2818  if (header_flag & BM_HM_RESIZE)
2819  {
2820  if (header_flag & BM_HM_64_BIT)
2821  {
2822  BM_ASSERT(sizeof(block_idx_type)==8);
2823  bv_size_ = (block_idx_type)decoder_.get_64();
2824  }
2825  else
2826  bv_size_ = decoder_.get_32();
2827  }
2828  state_ = e_blocks;
2829  }
2830  block_idx_arr_ = (gap_word_t*) ::malloc(sizeof(gap_word_t) * bm::gap_max_bits);
2831  this->id_array_ = block_idx_arr_;
2832 }
2833 
2834 template<class DEC>
2836 {
2837  if (block_idx_arr_)
2838  ::free(block_idx_arr_);
2839 }
2840 
2841 
2842 template<class DEC>
2844 {
2845  if (is_eof())
2846  {
2847  ++block_idx_;
2848  return;
2849  }
2850 
2851  switch (state_)
2852  {
2853  case e_list_ids:
2854  // read inverted ids one by one
2855  if (id_cnt_ == 0)
2856  {
2857  end_of_stream_ = true;
2858  state_ = e_unknown;
2859  break;
2860  }
2861  last_id_ = decoder_.get_32();
2862  --id_cnt_;
2863  break;
2864 
2865  case e_blocks:
2867  {
2868  end_of_stream_ = true;
2869  state_ = e_unknown;
2870  break;
2871  }
2872 
2873  block_type_ = decoder_.get_8();
2874 
2875  // pre-check for 7-bit zero block
2876  //
2877  if (block_type_ & (1u << 7u))
2878  {
2879  mono_block_cnt_ = (block_type_ & ~(1u << 7u)) - 1;
2881  break;
2882  }
2883 
2884  switch (block_type_)
2885  {
2886  case set_block_azero:
2887  case set_block_end:
2888  end_of_stream_ = true; state_ = e_unknown;
2889  break;
2890  case set_block_1zero:
2892  mono_block_cnt_ = 0;
2893  break;
2894  case set_block_8zero:
2896  mono_block_cnt_ = decoder_.get_8()-1;
2897  break;
2898  case set_block_16zero:
2900  mono_block_cnt_ = decoder_.get_16()-1;
2901  break;
2902  case set_block_32zero:
2904  mono_block_cnt_ = decoder_.get_32()-1;
2905  break;
2906  case set_block_aone:
2907  state_ = e_one_blocks;
2909  break;
2910  case set_block_1one:
2911  state_ = e_one_blocks;
2912  mono_block_cnt_ = 0;
2913  break;
2914  case set_block_8one:
2915  state_ = e_one_blocks;
2916  mono_block_cnt_ = decoder_.get_8()-1;
2917  break;
2918  case set_block_16one:
2919  state_ = e_one_blocks;
2920  mono_block_cnt_ = decoder_.get_16()-1;
2921  break;
2922  case set_block_32one:
2923  state_ = e_one_blocks;
2924  mono_block_cnt_ = decoder_.get_32()-1;
2925  break;
2926 
2927  case bm::set_block_bit:
2930  case bm::set_block_arrbit:
2936  state_ = e_bit_block;
2937  break;
2938 
2939  case set_block_gap:
2940  case set_block_gap_egamma:
2941  case set_block_gap_bienc:
2942  gap_head_ = decoder_.get_16();
2943  case set_block_arrgap:
2946  case set_block_arrgap_inv:
2947  case set_block_bit_1bit:
2950  state_ = e_gap_block;
2951  break;
2952  case set_block_gapbit:
2953  state_ = e_gap_block; //e_bit_block; // TODO: make a better decision here
2954  break;
2955  default:
2956  BM_ASSERT(0);
2957  #ifndef BM_NO_STL
2958  throw std::logic_error(this->err_msg());
2959  #else
2960  BM_THROW(BM_ERR_SERIALFORMAT);
2961  #endif
2962  } // switch
2963 
2964  break;
2965 
2966  case e_zero_blocks:
2967  case e_one_blocks:
2968  ++block_idx_;
2969  if (!mono_block_cnt_)
2970  {
2971  state_ = e_blocks; // get new block token
2972  break;
2973  }
2974  --mono_block_cnt_;
2975  break;
2976 
2977  case e_unknown:
2978  default:
2979  BM_ASSERT(0);
2980  #ifndef BM_NO_STL
2981  throw std::logic_error(this->err_msg());
2982  #else
2983  BM_THROW(BM_ERR_SERIALFORMAT);
2984  #endif
2985  } // switch
2986 }
2987 
2988 template<class DEC>
2991 {
2993  if (!mono_block_cnt_)
2994  ++block_idx_;
2995  else
2996  {
2998  mono_block_cnt_ = 0;
2999  }
3000  state_ = e_blocks;
3001  return block_idx_;
3002 }
3003 
3004 template<class DEC>
3006 {
3007  gap_word_t len = decoder_.get_16();
3008  if (block)
3009  {
3010  bm::bit_block_set(block, ~0u);
3011  for (unsigned k = 0; k < len; ++k)
3012  {
3013  gap_word_t bit_idx = decoder_.get_16();
3014  bm::clear_bit(block, bit_idx);
3015  }
3016  }
3017  else // dry read
3018  {
3019  for (unsigned k = 0; k < len; ++k)
3020  decoder_.get_16();
3021  }
3022 }
3023 
3024 
3025 template<class DEC>
3026 unsigned
3028  bm::word_t* dst_block,
3029  bm::word_t* tmp_block)
3030 {
3031  // ASSIGN should be ready for dry read (dst_block can be NULL)
3032  //
3033  BM_ASSERT(this->state_ == e_bit_block);
3034  unsigned count = 0;
3035 
3036  switch (this->block_type_)
3037  {
3038  case set_block_bit:
3039  decoder_.get_32(dst_block, bm::set_block_size);
3040  break;
3041  case set_block_bit_0runs:
3042  {
3043  if (IS_VALID_ADDR(dst_block))
3044  bm::bit_block_set(dst_block, 0);
3045  unsigned char run_type = decoder_.get_8();
3046  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
3047  {
3048  unsigned run_length = decoder_.get_16();
3049  if (run_type)
3050  {
3051  decoder_.get_32(dst_block ? dst_block + j : dst_block, run_length);
3052  }
3053  j += run_length;
3054  } // for
3055  }
3056  break;
3058  {
3059  unsigned head_idx = decoder_.get_16();
3060  unsigned tail_idx = decoder_.get_16();
3061  if (dst_block)
3062  {
3063  for (unsigned i = 0; i < head_idx; ++i)
3064  dst_block[i] = 0;
3065  decoder_.get_32(dst_block + head_idx,
3066  tail_idx - head_idx + 1);
3067  for (unsigned j = tail_idx + 1; j < bm::set_block_size; ++j)
3068  dst_block[j] = 0;
3069  }
3070  else
3071  {
3072  int pos = int(tail_idx - head_idx) + 1;
3073  pos *= 4u;
3074  decoder_.seek(pos);
3075  }
3076  }
3077  break;
3078  case set_block_arrbit:
3079  case set_block_bit_1bit:
3080  get_arr_bit(dst_block, true /*clear target*/);
3081  break;
3082  case set_block_gapbit:
3083  BM_ASSERT(0);
3084  #ifndef BM_NO_STL
3085  throw std::logic_error(this->err_msg());
3086  #else
3087  BM_THROW(BM_ERR_SERIALFORMAT);
3088  #endif
3089  break;
3090  case set_block_arrbit_inv:
3091  get_inv_arr(dst_block);
3092  break;
3094  if (IS_VALID_ADDR(dst_block))
3095  bm::bit_block_set(dst_block, 0);
3096  this->read_bic_arr(decoder_, dst_block);
3097  break;
3099  this->read_bic_arr_inv(decoder_, tmp_block);
3100  if (IS_VALID_ADDR(dst_block))
3101  bm::bit_block_copy(dst_block, tmp_block);
3102  break;
3104  if (IS_VALID_ADDR(dst_block))
3105  bm::bit_block_set(dst_block, 0);
3106  this->read_bic_gap(decoder_, dst_block);
3107  break;
3109  if (IS_VALID_ADDR(dst_block))
3110  bm::bit_block_set(dst_block, 0);
3111  this->read_digest0_block(decoder_, dst_block);
3112  break;
3113  default:
3114  BM_ASSERT(0);
3115  #ifndef BM_NO_STL
3116  throw std::logic_error(this->err_msg());
3117  #else
3118  BM_THROW(BM_ERR_SERIALFORMAT);
3119  #endif
3120  } // switch
3121  return count;
3122 }
3123 
3124 template<class DEC>
3125 unsigned
3127  bm::word_t* tmp_block)
3128 {
3129  BM_ASSERT(this->state_ == e_bit_block);
3130  unsigned count = 0;
3131  switch (block_type_)
3132  {
3133  case set_block_bit:
3134  decoder_.get_32_OR(dst_block, bm::set_block_size);
3135  break;
3137  {
3138  unsigned head_idx = decoder_.get_16();
3139  unsigned tail_idx = decoder_.get_16();
3140  for (unsigned i = head_idx; i <= tail_idx; ++i)
3141  dst_block[i] |= decoder_.get_32();
3142  }
3143  break;
3144  case set_block_bit_0runs:
3145  {
3146  unsigned char run_type = decoder_.get_8();
3147  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
3148  {
3149  unsigned run_length = decoder_.get_16();
3150  if (run_type)
3151  {
3152  unsigned run_end = j + run_length;
3153  for (;j < run_end; ++j)
3154  {
3155  BM_ASSERT(j < bm::set_block_size);
3156  dst_block[j] |= decoder_.get_32();
3157  }
3158  }
3159  else
3160  {
3161  j += run_length;
3162  }
3163  } // for
3164  }
3165  break;
3166  case set_block_bit_1bit:
3167  case set_block_arrbit:
3168  get_arr_bit(dst_block, false /*don't clear target*/);
3169  break;
3170  case set_block_arrbit_inv:
3171  get_inv_arr(tmp_block);
3172  bm::bit_block_or(dst_block, tmp_block);
3173  break;
3175  this->read_bic_arr(decoder_, dst_block);
3176  break;
3178  this->read_bic_arr_inv(decoder_, tmp_block);
3179  bm::bit_block_or(dst_block, tmp_block);
3180  break;
3182  this->read_bic_gap(decoder_, dst_block);
3183  break;
3185  this->read_digest0_block(decoder_, dst_block);
3186  break;
3187  default:
3188  BM_ASSERT(0);
3189  #ifndef BM_NO_STL
3190  throw std::logic_error(this->err_msg());
3191  #else
3192  BM_THROW(BM_ERR_SERIALFORMAT);
3193  #endif
3194  } // switch
3195  return count;
3196 }
3197 
3198 template<class DEC>
3199 unsigned
3201  bm::word_t* BMRESTRICT tmp_block)
3202 {
3203  BM_ASSERT(this->state_ == e_bit_block);
3204  BM_ASSERT(dst_block != tmp_block);
3205  unsigned count = 0;
3206  switch (block_type_)
3207  {
3208  case set_block_bit:
3209  decoder_.get_32_AND(dst_block, bm::set_block_size);
3210  break;
3211  case set_block_bit_0runs:
3212  {
3213  unsigned char run_type = decoder_.get_8();
3214  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
3215  {
3216  unsigned run_length = decoder_.get_16();
3217 
3218  unsigned run_end = j + run_length;
3219  if (run_type)
3220  {
3221  for (;j < run_end; ++j)
3222  {
3223  BM_ASSERT(j < bm::set_block_size);
3224  dst_block[j] &= decoder_.get_32();
3225  }
3226  }
3227  else
3228  {
3229  for (;j < run_end; ++j)
3230  {
3231  BM_ASSERT(j < bm::set_block_size);
3232  dst_block[j] = 0;
3233  }
3234  }
3235  } // for
3236  }
3237  break;
3239  {
3240  unsigned head_idx = decoder_.get_16();
3241  unsigned tail_idx = decoder_.get_16();
3242  unsigned i;
3243  for ( i = 0; i < head_idx; ++i)
3244  dst_block[i] = 0;
3245  for ( i = head_idx; i <= tail_idx; ++i)
3246  dst_block[i] &= decoder_.get_32();
3247  for ( i = tail_idx + 1; i < bm::set_block_size; ++i)
3248  dst_block[i] = 0;
3249  }
3250  break;
3251  case set_block_bit_1bit:
3252  case set_block_arrbit:
3253  get_arr_bit(tmp_block, true /*clear target*/);
3254  if (dst_block)
3255  bm::bit_block_and(dst_block, tmp_block);
3256  break;
3257  case set_block_arrbit_inv:
3258  get_inv_arr(tmp_block);
3259  if (dst_block)
3260  bm::bit_block_and(dst_block, tmp_block);
3261  break;
3262  case set_block_arr_bienc:
3263  if (dst_block)
3264  {
3265  bm::bit_block_set(tmp_block, 0);
3266  this->read_bic_arr(decoder_, tmp_block);
3267  bm::bit_block_and(dst_block, tmp_block);
3268  }
3269  else
3270  this->read_bic_arr(decoder_, 0); // dry read
3271  break;
3273  this->read_bic_arr_inv(decoder_, tmp_block);
3274  if (dst_block)
3275  bm::bit_block_and(dst_block, tmp_block);
3276  break;
3278  if (dst_block)
3279  {
3280  BM_ASSERT(IS_VALID_ADDR(dst_block));
3281  bm::bit_block_set(tmp_block, 0);
3282  this->read_bic_gap(decoder_, tmp_block);
3283  bm::bit_block_and(dst_block, tmp_block);
3284  }
3285  else
3286  this->read_bic_gap(decoder_, 0); // dry read
3287  break;
3289  if (dst_block)
3290  {
3291  BM_ASSERT(IS_VALID_ADDR(dst_block));
3292  bm::bit_block_set(tmp_block, 0);
3293  this->read_digest0_block(decoder_, tmp_block);
3294  bm::bit_block_and(dst_block, tmp_block);
3295  }
3296  else
3297  this->read_digest0_block(decoder_, 0); // dry read
3298  break;
3299  default:
3300  BM_ASSERT(0);
3301  #ifndef BM_NO_STL
3302  throw std::logic_error(this->err_msg());
3303  #else
3304  BM_THROW(BM_ERR_SERIALFORMAT);
3305  #endif
3306  } // switch
3307  return count;
3308 }
3309 
3310 template<class DEC>
3311 unsigned
3313  bm::word_t* tmp_block)
3314 {
3315  BM_ASSERT(this->state_ == e_bit_block);
3316  BM_ASSERT(dst_block != tmp_block);
3317 
3318  unsigned count = 0;
3319  switch (block_type_)
3320  {
3321  case set_block_bit:
3322  for (unsigned i = 0; i < bm::set_block_size; ++i)
3323  dst_block[i] ^= decoder_.get_32();
3324  break;
3325  case set_block_bit_0runs:
3326  {
3327  unsigned char run_type = decoder_.get_8();
3328  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
3329  {
3330  unsigned run_length = decoder_.get_16();
3331  if (run_type)
3332  {
3333  unsigned run_end = j + run_length;
3334  for (;j < run_end; ++j)
3335  {
3336  BM_ASSERT(j < bm::set_block_size);
3337  dst_block[j] ^= decoder_.get_32();
3338  }
3339  }
3340  else
3341  {
3342  j += run_length;
3343  }
3344  } // for
3345  }
3346  break;
3348  {
3349  unsigned head_idx = decoder_.get_16();
3350  unsigned tail_idx = decoder_.get_16();
3351  for (unsigned i = head_idx; i <= tail_idx; ++i)
3352  dst_block[i] ^= decoder_.get_32();
3353  }
3354  break;
3355  case set_block_bit_1bit:
3356  // TODO: optimization
3357  case set_block_arrbit:
3358  get_arr_bit(tmp_block, true /*clear target*/);
3359  if (dst_block)
3360  bm::bit_block_xor(dst_block, tmp_block);
3361  break;
3362  case set_block_arrbit_inv:
3363  get_inv_arr(tmp_block);
3364  if (dst_block)
3365  bm::bit_block_xor(dst_block, tmp_block);
3366  break;
3367  case set_block_arr_bienc:
3368  bm::bit_block_set(tmp_block, 0);
3369  this->read_bic_arr(decoder_, tmp_block);
3370  if (dst_block)
3371  bm::bit_block_xor(dst_block, tmp_block);
3372  break;
3374  this->read_bic_arr_inv(decoder_, tmp_block);
3375  if (dst_block)
3376  {
3377  BM_ASSERT(IS_VALID_ADDR(dst_block));
3378  bm::bit_block_xor(dst_block, tmp_block);
3379  }
3380  break;
3382  if (dst_block)
3383  {
3384  BM_ASSERT(IS_VALID_ADDR(dst_block));
3385  bm::bit_block_set(tmp_block, 0);
3386  this->read_bic_gap(decoder_, tmp_block);
3387  bm::bit_block_xor(dst_block, tmp_block);
3388  }
3389  else
3390  this->read_bic_gap(decoder_, 0); // dry read
3391  break;
3393  if (dst_block)
3394  {
3395  BM_ASSERT(IS_VALID_ADDR(dst_block));
3396  bm::bit_block_set(tmp_block, 0);
3397  this->read_digest0_block(decoder_, tmp_block);
3398  bm::bit_block_xor(dst_block, tmp_block);
3399  }
3400  else
3401  this->read_digest0_block(decoder_, 0); // dry read
3402  break;
3403  default:
3404  BM_ASSERT(0);
3405  #ifndef BM_NO_STL
3406  throw std::logic_error(this->err_msg());
3407  #else
3408  BM_THROW(BM_ERR_SERIALFORMAT);
3409  #endif
3410  } // switch
3411  return count;
3412 }
3413 
3414 template<class DEC>
3415 unsigned
3417  bm::word_t* tmp_block)
3418 {
3419  BM_ASSERT(this->state_ == e_bit_block);
3420  BM_ASSERT(dst_block != tmp_block);
3421 
3422  unsigned count = 0;
3423  switch (block_type_)
3424  {
3425  case set_block_bit:
3426  for (unsigned i = 0; i < bm::set_block_size; ++i)
3427  dst_block[i] &= ~decoder_.get_32();
3428  break;
3429  case set_block_bit_0runs:
3430  {
3431  unsigned char run_type = decoder_.get_8();
3432  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
3433  {
3434  unsigned run_length = decoder_.get_16();
3435  if (run_type)
3436  {
3437  unsigned run_end = j + run_length;
3438  for (;j < run_end; ++j)
3439  {
3440  BM_ASSERT(j < bm::set_block_size);
3441  dst_block[j] &= ~decoder_.get_32();
3442  }
3443  }
3444  else
3445  {
3446  j += run_length;
3447  }
3448  } // for
3449  }
3450  break;
3452  {
3453  unsigned head_idx = decoder_.get_16();
3454  unsigned tail_idx = decoder_.get_16();
3455  for (unsigned i = head_idx; i <= tail_idx; ++i)
3456  dst_block[i] &= ~decoder_.get_32();
3457  }
3458  break;
3459  case set_block_bit_1bit:
3460  // TODO: optimization
3461  case set_block_arrbit:
3462  get_arr_bit(tmp_block, true /*clear target*/);
3463  if (dst_block)
3464  bm::bit_block_sub(dst_block, tmp_block);
3465  break;
3466  case set_block_arrbit_inv:
3467  get_inv_arr(tmp_block);
3468  if (dst_block)
3469  bm::bit_block_sub(dst_block, tmp_block);
3470  break;
3471  case set_block_arr_bienc:
3472  bm::bit_block_set(tmp_block, 0);
3473  this->read_bic_arr(decoder_, tmp_block);
3474  if (dst_block)
3475  bm::bit_block_sub(dst_block, tmp_block);
3476  break;
3478  this->read_bic_arr_inv(decoder_, tmp_block);
3479  if (dst_block)
3480  bm::bit_block_sub(dst_block, tmp_block);
3481  break;
3483  if (dst_block)
3484  {
3485  BM_ASSERT(IS_VALID_ADDR(dst_block));
3486  bm::bit_block_set(tmp_block, 0);
3487  this->read_bic_gap(decoder_, tmp_block);
3488  bm::bit_block_sub(dst_block, tmp_block);
3489  }
3490  else
3491  this->read_bic_gap(decoder_, 0); // dry read
3492  break;
3494  if (dst_block)
3495  {
3496  BM_ASSERT(IS_VALID_ADDR(dst_block));
3497  bm::bit_block_set(tmp_block, 0);
3498  this->read_digest0_block(decoder_, tmp_block);
3499  bm::bit_block_sub(dst_block, tmp_block);
3500  }
3501  else
3502  this->read_digest0_block(decoder_, 0); // dry read
3503  break;
3504  default:
3505  BM_ASSERT(0);
3506  #ifndef BM_NO_STL
3507  throw std::logic_error(this->err_msg());
3508  #else
3509  BM_THROW(BM_ERR_SERIALFORMAT);
3510  #endif
3511  } // switch
3512  return count;
3513 }
3514 
3515 
3516 template<class DEC>
3517 unsigned
3519  bm::word_t* tmp_block)
3520 {
3521  BM_ASSERT(this->state_ == e_bit_block);
3522 
3523  unsigned count = 0;
3524  switch (block_type_)
3525  {
3526  case set_block_bit:
3527  for (unsigned i = 0; i < bm::set_block_size; ++i)
3528  count += bm::word_bitcount(decoder_.get_32());
3529  break;
3530  case set_block_bit_0runs:
3531  {
3532  //count = 0;
3533  unsigned char run_type = decoder_.get_8();
3534  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
3535  {
3536  unsigned run_length = decoder_.get_16();
3537  if (run_type)
3538  {
3539  unsigned run_end = j + run_length;
3540  for (;j < run_end; ++j)
3541  {
3542  count += word_bitcount(decoder_.get_32());
3543  }
3544  }
3545  else
3546  {
3547  j += run_length;
3548  }
3549  } // for
3550  return count;
3551  }
3553  {
3554  unsigned head_idx = decoder_.get_16();
3555  unsigned tail_idx = decoder_.get_16();
3556  for (unsigned i = head_idx; i <= tail_idx; ++i)
3557  count += bm::word_bitcount(decoder_.get_32());
3558  }
3559  break;
3560  case set_block_arrbit:
3561  count += get_arr_bit(0);
3562  break;
3563  case set_block_bit_1bit:
3564  ++count;
3565  decoder_.get_16();
3566  break;
3567  case set_block_arrbit_inv:
3568  get_inv_arr(tmp_block);
3569  goto count_tmp;
3570  case set_block_arr_bienc:
3571  bm::bit_block_set(tmp_block, 0); // TODO: just add a counted read
3572  this->read_bic_arr(decoder_, tmp_block);
3573  goto count_tmp;
3575  this->read_bic_arr_inv(decoder_, tmp_block);
3576  goto count_tmp;
3578  bm::bit_block_set(tmp_block, 0);
3579  this->read_digest0_block(decoder_, tmp_block);
3580  goto count_tmp;
3582  bm::bit_block_set(tmp_block, 0);
3583  this->read_bic_gap(decoder_, tmp_block);
3584  count_tmp:
3585  count += bm::bit_block_count(tmp_block);
3586  break;
3587  default:
3588  BM_ASSERT(0);
3589  #ifndef BM_NO_STL
3590  throw std::logic_error(this->err_msg());
3591  #else
3592  BM_THROW(BM_ERR_SERIALFORMAT);
3593  #endif
3594 
3595  } // switch
3596  return count;
3597 }
3598 
3599 template<class DEC>
3600 unsigned
3602  bm::word_t* tmp_block)
3603 {
3604  BM_ASSERT(this->state_ == e_bit_block);
3605  unsigned count = 0;
3606  if (dst_block)
3607  {
3608  // count the block bitcount
3609  count = bm::bit_block_count(dst_block);
3610  }
3611 
3612  switch (block_type_)
3613  {
3614  case set_block_bit:
3615  decoder_.get_32(0, bm::set_block_size);
3616  break;
3617  case set_block_bit_0runs:
3618  {
3619  unsigned char run_type = decoder_.get_8();
3620  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
3621  {
3622  unsigned run_length = decoder_.get_16();
3623  if (run_type)
3624  {
3625  unsigned run_end = j + run_length;
3626  for (;j < run_end; ++j)
3627  {
3628  decoder_.get_32();
3629  }
3630  }
3631  else
3632  {
3633  j += run_length;
3634  }
3635  } // for
3636  }
3637  break;
3638 
3640  {
3641  unsigned head_idx = decoder_.get_16();
3642  unsigned tail_idx = decoder_.get_16();
3643  for (unsigned i = head_idx; i <= tail_idx; ++i)
3644  decoder_.get_32();
3645  }
3646  break;
3647  case set_block_arrbit:
3648  get_arr_bit(0);
3649  break;
3650  case set_block_bit_1bit:
3651  decoder_.get_16();
3652  break;
3653  case set_block_arrbit_inv:
3654  get_inv_arr(tmp_block);
3655  break;
3656  case set_block_arr_bienc:
3657  this->read_bic_arr(decoder_, tmp_block); // TODO: add dry read
3658  break;
3660  this->read_bic_arr_inv(decoder_, tmp_block); // TODO: add dry read
3661  break;
3663  this->read_bic_gap(decoder_, tmp_block);
3664  break;
3666  this->read_digest0_block(decoder_, 0); // dry read
3667  break;
3668  default:
3669  BM_ASSERT(0);
3670  #ifndef BM_NO_STL
3671  throw std::logic_error(this->err_msg());
3672  #else
3673  BM_THROW(BM_ERR_SERIALFORMAT);
3674  #endif
3675 
3676  } // switch
3677  return count;
3678 }
3679 
3680 
3681 template<class DEC>
3682 unsigned
3684  bm::word_t* tmp_block)
3685 {
3686  BM_ASSERT(this->state_ == e_bit_block);
3687  BM_ASSERT(dst_block);
3688 
3689  unsigned count = 0;
3690  switch (block_type_)
3691  {
3692  case set_block_bit:
3693  for (unsigned i = 0; i < bm::set_block_size; ++i)
3694  count += word_bitcount(dst_block[i] & decoder_.get_32());
3695  break;
3696  case set_block_bit_0runs:
3697  {
3698  //count = 0;
3699  unsigned char run_type = decoder_.get_8();
3700  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
3701  {
3702  unsigned run_length = decoder_.get_16();
3703  if (run_type)
3704  {
3705  unsigned run_end = j + run_length;
3706  for (;j < run_end; ++j)
3707  {
3708  count += word_bitcount(dst_block[j] & decoder_.get_32());
3709  }
3710  }
3711  else
3712  {
3713  j += run_length;
3714  }
3715  } // for
3716  return count;
3717  }
3719  {
3720  unsigned head_idx = decoder_.get_16();
3721  unsigned tail_idx = decoder_.get_16();
3722  for (unsigned i = head_idx; i <= tail_idx; ++i)
3723  count += word_bitcount(dst_block[i] & decoder_.get_32());
3724  }
3725  break;
3726  case set_block_bit_1bit:
3727  // TODO: optimization
3728  case set_block_arrbit:
3729  get_arr_bit(tmp_block, true /*clear target*/);
3730  goto count_tmp;
3731  break;
3732  case set_block_arrbit_inv:
3733  get_inv_arr(tmp_block);
3734  goto count_tmp;
3735  break;
3736  case set_block_arr_bienc:
3737  bm::bit_block_set(tmp_block, 0);
3738  this->read_bic_arr(decoder_, tmp_block);
3739  goto count_tmp;
3741  this->read_bic_arr_inv(decoder_, tmp_block);
3742  goto count_tmp;
3744  bm::bit_block_set(tmp_block, 0);
3745  this->read_digest0_block(decoder_, tmp_block);
3746  goto count_tmp;
3748  bm::bit_block_set(tmp_block, 0);
3749  this->read_bic_gap(decoder_, tmp_block);
3750  count_tmp:
3751  count += bm::bit_operation_and_count(dst_block, tmp_block);
3752  break;
3753  default:
3754  BM_ASSERT(0);
3755  #ifndef BM_NO_STL
3756  throw std::logic_error(this->err_msg());
3757  #else
3758  BM_THROW(BM_ERR_SERIALFORMAT);
3759  #endif
3760 
3761  } // switch
3762  return count;
3763 }
3764 
3765 template<class DEC>
3766 unsigned
3768  bm::word_t* tmp_block)
3769 {
3770  BM_ASSERT(this->state_ == e_bit_block);
3771  BM_ASSERT(dst_block);
3772 
3773  bitblock_sum_adapter count_adapter;
3774  switch (block_type_)
3775  {
3776  case set_block_bit:
3777  {
3778  bitblock_get_adapter ga(dst_block);
3780 
3781  bit_recomb(ga,
3782  decoder_,
3783  func,
3784  count_adapter
3785  );
3786  }
3787  break;
3788  case set_block_bit_0runs:
3789  {
3790  unsigned count = 0;
3791  unsigned char run_type = decoder_.get_8();
3792  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
3793  {
3794  unsigned run_length = decoder_.get_16();
3795  unsigned run_end = j + run_length;
3796  if (run_type)
3797  {
3798  for (;j < run_end; ++j)
3799  {
3800  BM_ASSERT(j < bm::set_block_size);
3801  count += word_bitcount(dst_block[j] | decoder_.get_32());
3802  }
3803  }
3804  else
3805  {
3806  for (;j < run_end; ++j)
3807  {
3808  BM_ASSERT(j < bm::set_block_size);
3809  count += word_bitcount(dst_block[j]);
3810  }
3811  }
3812  } // for
3813  return count;
3814  }
3816  {
3817  unsigned head_idx = decoder_.get_16();
3818  unsigned tail_idx = decoder_.get_16();
3819  unsigned count = 0;
3820  unsigned i;
3821  for (i = 0; i < head_idx; ++i)
3822  count += bm::word_bitcount(dst_block[i]);
3823  for (i = head_idx; i <= tail_idx; ++i)
3824  count += bm::word_bitcount(dst_block[i] | decoder_.get_32());
3825  for (i = tail_idx + 1; i < bm::set_block_size; ++i)
3826  count += bm::word_bitcount(dst_block[i]);
3827  return count;
3828  }
3829  case set_block_bit_1bit:
3830  // TODO: optimization
3831  case set_block_arrbit:
3832  get_arr_bit(tmp_block, true /* clear target*/);
3833  return bit_operation_or_count(dst_block, tmp_block);
3834  case set_block_arrbit_inv:
3835  get_inv_arr(tmp_block);
3836  goto count_tmp;
3837  case set_block_arr_bienc:
3838  bm::bit_block_set(tmp_block, 0);
3839  this->read_bic_arr(decoder_, tmp_block);
3840  goto count_tmp;
3842  this->read_bic_arr_inv(decoder_, tmp_block);
3843  goto count_tmp;
3845  bm::bit_block_set(tmp_block, 0);
3846  this->read_digest0_block(decoder_, tmp_block);
3847  goto count_tmp;
3849  bm::bit_block_set(tmp_block, 0);
3850  this->read_bic_gap(decoder_, tmp_block);
3851  count_tmp:
3852  return bm::bit_operation_or_count(dst_block, tmp_block);
3853  default:
3854  BM_ASSERT(0);
3855  #ifndef BM_NO_STL
3856  throw std::logic_error(this->err_msg());
3857  #else
3858  BM_THROW(BM_ERR_SERIALFORMAT);
3859  #endif
3860 
3861  } // switch
3862  return count_adapter.sum();
3863 }
3864 
3865 template<class DEC>
3866 unsigned
3868  bm::word_t* tmp_block)
3869 {
3870  BM_ASSERT(this->state_ == e_bit_block);
3871  BM_ASSERT(dst_block);
3872 
3873  bitblock_sum_adapter count_adapter;
3874  switch (block_type_)
3875  {
3876  case set_block_bit:
3877  {
3878  bitblock_get_adapter ga(dst_block);
3880 
3881  bit_recomb(ga,
3882  decoder_,
3883  func,
3884  count_adapter
3885  );
3886  }
3887  break;
3888  case set_block_bit_0runs:
3889  {
3890  unsigned count = 0;
3891  unsigned char run_type = decoder_.get_8();
3892  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
3893  {
3894  unsigned run_length = decoder_.get_16();
3895  unsigned run_end = j + run_length;
3896  if (run_type)
3897  {
3898  for (;j < run_end; ++j)
3899  {
3900  BM_ASSERT(j < bm::set_block_size);
3901  count += bm::word_bitcount(dst_block[j] ^ decoder_.get_32());
3902  }
3903  }
3904  else
3905  {
3906  for (;j < run_end; ++j)
3907  {
3908  BM_ASSERT(j < bm::set_block_size);
3909  count += bm::word_bitcount(dst_block[j]);
3910  }
3911  }
3912  } // for
3913  return count;
3914  }
3916  {
3917  unsigned head_idx = decoder_.get_16();
3918  unsigned tail_idx = decoder_.get_16();
3919  unsigned count = 0;
3920  unsigned i;
3921  for (i = 0; i < head_idx; ++i)
3922  count += bm::word_bitcount(dst_block[i]);
3923  for (i = head_idx; i <= tail_idx; ++i)
3924  count += bm::word_bitcount(dst_block[i] ^ decoder_.get_32());
3925  for (i = tail_idx + 1; i < bm::set_block_size; ++i)
3926  count += bm::word_bitcount(dst_block[i]);
3927  return count;
3928  }
3929  case set_block_bit_1bit:
3930  // TODO: optimization
3931  case set_block_arrbit:
3932  get_arr_bit(tmp_block, true /* clear target*/);
3933  goto count_tmp;
3934  case set_block_arrbit_inv:
3935  get_inv_arr(tmp_block);
3936  goto count_tmp;
3937  case set_block_arr_bienc:
3938  bm::bit_block_set(tmp_block, 0);
3939  this->read_bic_arr(decoder_, tmp_block);
3940  goto count_tmp;
3942  this->read_bic_arr_inv(decoder_, tmp_block);
3943  goto count_tmp;
3944  break;
3946  bm::bit_block_set(tmp_block, 0);
3947  this->read_digest0_block(decoder_, tmp_block);
3948  goto count_tmp;
3950  bm::bit_block_set(tmp_block, 0);
3951  this->read_bic_gap(decoder_, tmp_block);
3952  count_tmp:
3953  return bm::bit_operation_xor_count(dst_block, tmp_block);
3954  default:
3955  BM_ASSERT(0);
3956  #ifndef BM_NO_STL
3957  throw std::logic_error(this->err_msg());
3958  #else
3959  BM_THROW(BM_ERR_SERIALFORMAT);
3960  #endif
3961 
3962  } // switch
3963  return count_adapter.sum();
3964 }
3965 
3966 template<class DEC>
3967 unsigned
3969  bm::word_t* tmp_block)
3970 {
3971  BM_ASSERT(this->state_ == e_bit_block);
3972  BM_ASSERT(dst_block);
3973 
3974  bitblock_sum_adapter count_adapter;
3975  switch (block_type_)
3976  {
3977  case set_block_bit:
3978  {
3979  bitblock_get_adapter ga(dst_block);
3981 
3982  bit_recomb(ga,
3983  decoder_,
3984  func,
3985  count_adapter
3986  );
3987  }
3988  break;
3989  case set_block_bit_0runs:
3990  {
3991  unsigned count = 0;
3992  unsigned char run_type = decoder_.get_8();
3993  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
3994  {
3995  unsigned run_length = decoder_.get_16();
3996  unsigned run_end = j + run_length;
3997  if (run_type)
3998  {
3999  for (;j < run_end; ++j)
4000  {
4001  BM_ASSERT(j < bm::set_block_size);
4002  count += word_bitcount(dst_block[j] & ~decoder_.get_32());
4003  }
4004  }
4005  else
4006  {
4007  for (;j < run_end; ++j)
4008  {
4009  BM_ASSERT(j < bm::set_block_size);
4010  count += bm::word_bitcount(dst_block[j]);
4011  }
4012  }
4013  } // for
4014  return count;
4015  }
4017  {
4018  unsigned head_idx = decoder_.get_16();
4019  unsigned tail_idx = decoder_.get_16();
4020  unsigned count = 0;
4021  unsigned i;
4022  for (i = 0; i < head_idx; ++i)
4023  count += bm::word_bitcount(dst_block[i]);
4024  for (i = head_idx; i <= tail_idx; ++i)
4025  count += bm::word_bitcount(dst_block[i] & (~decoder_.get_32()));
4026  for (i = tail_idx + 1; i < bm::set_block_size; ++i)
4027  count += bm::word_bitcount(dst_block[i]);
4028  return count;
4029  }
4030  break;
4031  case set_block_bit_1bit:
4032  //TODO: optimization
4033  case set_block_arrbit:
4034  get_arr_bit(tmp_block, true /* clear target*/);
4035  goto count_tmp;
4036  case set_block_arrbit_inv:
4037  get_inv_arr(tmp_block);
4038  goto count_tmp;
4039  case set_block_arr_bienc:
4040  bm::bit_block_set(tmp_block, 0);
4041  this->read_bic_arr(decoder_, tmp_block);
4042  goto count_tmp;
4044  this->read_bic_arr_inv(decoder_, tmp_block);
4045  goto count_tmp;
4047  bm::bit_block_set(tmp_block, 0);
4048  this->read_digest0_block(decoder_, tmp_block);
4049  goto count_tmp;
4051  bm::bit_block_set(tmp_block, 0);
4052  this->read_bic_gap(decoder_, tmp_block);
4053  count_tmp:
4054  return bm::bit_operation_sub_count(dst_block, tmp_block);
4055  default:
4056  BM_ASSERT(0);
4057  #ifndef BM_NO_STL
4058  throw std::logic_error(this->err_msg());
4059  #else
4060  BM_THROW(BM_ERR_SERIALFORMAT);
4061  #endif
4062 
4063  } // switch
4064  return count_adapter.sum();
4065 }
4066 
4067 template<class DEC>
4068 unsigned
4070  bm::word_t* tmp_block)
4071 {
4072  BM_ASSERT(this->state_ == e_bit_block);
4073  BM_ASSERT(dst_block);
4074 
4075  bitblock_sum_adapter count_adapter;
4076  switch (block_type_)
4077  {
4078  case set_block_bit:
4079  {
4080  bitblock_get_adapter ga(dst_block);
4082 
4083  bit_recomb(ga,
4084  decoder_,
4085  func,
4086  count_adapter
4087  );
4088  }
4089  break;
4090  case set_block_bit_0runs:
4091  {
4092  unsigned count = 0;
4093  unsigned char run_type = decoder_.get_8();
4094  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
4095  {
4096  unsigned run_length = decoder_.get_16();
4097  unsigned run_end = j + run_length;
4098  if (run_type)
4099  {
4100  for (;j < run_end; ++j)
4101  {
4102  BM_ASSERT(j < bm::set_block_size);
4103  count += word_bitcount(decoder_.get_32() & (~dst_block[j]));
4104  }
4105  }
4106  else
4107  {
4108  j += run_length;
4109  }
4110  } // for
4111  return count;
4112  }
4113 
4115  {
4116  unsigned head_idx = decoder_.get_16();
4117  unsigned tail_idx = decoder_.get_16();
4118  unsigned count = 0;
4119  unsigned i;
4120  for (i = head_idx; i <= tail_idx; ++i)
4121  count += bm::word_bitcount(decoder_.get_32() & (~dst_block[i]));
4122  return count;
4123  }
4124  break;
4125  case set_block_bit_1bit:
4126  // TODO: optimization
4127  case set_block_arrbit:
4128  get_arr_bit(tmp_block, true /* clear target*/);
4129  goto count_tmp;
4130  case set_block_arrbit_inv:
4131  get_inv_arr(tmp_block);
4132  goto count_tmp;
4133  case set_block_arr_bienc:
4134  bm::bit_block_set(tmp_block, 0);
4135  this->read_bic_arr(decoder_, tmp_block);
4136  goto count_tmp;
4138  this->read_bic_arr_inv(decoder_, tmp_block);
4139  goto count_tmp;
4141  bm::bit_block_set(tmp_block, 0);
4142  this->read_digest0_block(decoder_, tmp_block);
4143  goto count_tmp;
4145  bm::bit_block_set(tmp_block, 0);
4146  this->read_bic_gap(decoder_, tmp_block);
4147  count_tmp:
4148  return bm::bit_operation_sub_count(tmp_block, dst_block);
4149  default:
4150  BM_ASSERT(0);
4151  #ifndef BM_NO_STL
4152  throw std::logic_error(this->err_msg());
4153  #else
4154  BM_THROW(BM_ERR_SERIALFORMAT);
4155  #endif
4156  } // switch
4157  return count_adapter.sum();
4158 }
4159 
4160 
4161 
4162 template<class DEC>
4164  bool clear_target)
4165 {
4166  BM_ASSERT(this->block_type_ == set_block_arrbit ||
4167  this->block_type_ == set_block_bit_1bit);
4168 
4169  gap_word_t len = decoder_.get_16(); // array length / 1bit_idx
4170  if (dst_block)
4171  {
4172  if (clear_target)
4173  bm::bit_block_set(dst_block, 0);
4174 
4175  if (this->block_type_ == set_block_bit_1bit)
4176  {
4177  // len contains idx of 1 bit set
4178  set_bit(dst_block, len);
4179  return 1;
4180  }
4181 
4182  for (unsigned k = 0; k < len; ++k)
4183  {
4184  gap_word_t bit_idx = decoder_.get_16();
4185  bm::set_bit(dst_block, bit_idx);
4186  }
4187  }
4188  else
4189  {
4190  if (this->block_type_ == set_block_bit_1bit)
4191  {
4192  return 1; // nothing to do: len var already consumed 16bits
4193  }
4194  // fwd the decocing stream
4195  decoder_.seek(len * 2);
4196  }
4197  return len;
4198 }
4199 
4200 template<class DEC>
4202 {
4203  BM_ASSERT(this->block_type_ == set_block_bit_1bit);
4204  ++(this->block_idx_);
4205  this->state_ = e_blocks;
4206 
4207  return decoder_.get_16(); // 1bit_idx
4208 }
4209 
4210 template<class DEC>
4211 void
4213 {
4214  BM_ASSERT(this->state_ == e_gap_block ||
4215  this->block_type_ == set_block_bit_1bit);
4216  BM_ASSERT(dst_block);
4217 
4218  this->read_gap_block(this->decoder_,
4219  this->block_type_,
4220  dst_block,
4221  this->gap_head_);
4222 
4223  ++(this->block_idx_);
4224  this->state_ = e_blocks;
4225 }
4226 
4227 
4228 template<class DEC>
4229 unsigned
4231  bm::word_t* tmp_block,
4232  set_operation op)
4233 {
4234  BM_ASSERT(this->state_ == e_bit_block);
4235 
4236  get_bit_func_type bit_func = bit_func_table_[op];
4237  BM_ASSERT(bit_func);
4238  unsigned cnt = ((*this).*(bit_func))(dst_block, tmp_block);
4239  this->state_ = e_blocks;
4240  ++(this->block_idx_);
4241  return cnt;
4242 }
4243 
4244 
4245 
4246 template<class BV>
4249  const unsigned char* buf,
4250  bm::word_t* temp_block,
4251  set_operation op,
4252  bool exit_on_one)
4253 {
4254  ByteOrder bo_current = globals<true>::byte_order();
4255  bm::decoder dec(buf);
4256  unsigned char header_flag = dec.get_8();
4257  ByteOrder bo = bo_current;
4258  if (!(header_flag & BM_HM_NO_BO))
4259  {
4260  bo = (bm::ByteOrder) dec.get_8();
4261  }
4262  blocks_manager_type& bman = bv.get_blocks_manager();
4263  bit_block_guard<blocks_manager_type> bg(bman);
4264  if (temp_block == 0)
4265  {
4266  temp_block = bg.allocate();
4267  }
4268 
4269  if (op == bm::set_ASSIGN)
4270  {
4271  bv.clear(true);
4272  op = bm::set_OR;
4273  }
4274 
4275  if (bo_current == bo)
4276  {
4277  serial_stream_current ss(buf);
4278  return
4280  deserialize(bv, ss, temp_block, op, exit_on_one);
4281  }
4282  switch (bo_current)
4283  {
4284  case BigEndian:
4285  {
4286  serial_stream_be ss(buf);
4287  return
4289  deserialize(bv, ss, temp_block, op, exit_on_one);
4290  }
4291  case LittleEndian:
4292  {
4293  serial_stream_le ss(buf);
4294  return
4296  deserialize(bv, ss, temp_block, op, exit_on_one);
4297  }
4298  default:
4299  BM_ASSERT(0);
4300  #ifndef BM_NO_STL
4301  throw std::logic_error("BM::Platform error unknown endian");
4302  #else
4303  BM_THROW(BM_ERR_SERIALFORMAT);
4304  #endif
4305  };
4306  return 0;
4307 }
4308 
4309 template<class BV, class SerialIterator>
4311  bvector_type& bv,
4312  serial_iterator_type& sit,
4313  unsigned id_count,
4314  bool set_clear)
4315 {
4316  const unsigned win_size = 64;
4317  bm::id_t id_buffer[win_size+1];
4318 
4319  if (set_clear) // set bits
4320  {
4321  for (unsigned i = 0; i <= id_count;)
4322  {
4323  unsigned j;
4324  for (j = 0; j < win_size && i <= id_count; ++j, ++i)
4325  {
4326  id_buffer[j] = sit.get_id();
4327  sit.next();
4328  } // for j
4329  bm::combine_or(bv, id_buffer, id_buffer + j);
4330  } // for i
4331  }
4332  else // clear bits
4333  {
4334  for (unsigned i = 0; i <= id_count;)
4335  {
4336  unsigned j;
4337  for (j = 0; j < win_size && i <= id_count; ++j, ++i)
4338  {
4339  id_buffer[j] = sit.get_id();
4340  sit.next();
4341  } // for j
4342  bm::combine_sub(bv, id_buffer, id_buffer + j);
4343  } // for i
4344  }
4345 }
4346 
4347 template<class BV, class SerialIterator>
4350  blocks_manager_type& bman,
4351  set_operation op,
4352  size_type bv_block_idx)
4353 {
4354  size_type count = 0;
4355  switch (op)
4356  {
4357  case set_OR: case set_SUB: case set_XOR:
4358  case set_COUNT: case set_COUNT_B: case set_COUNT_AND:
4359  case set_COUNT_SUB_BA:
4360  // nothing to do
4361  break;
4362  case set_ASSIGN: case set_AND:
4363  {
4364  block_idx_type nblock_last = (bm::id_max >> bm::set_block_shift);
4365  if (bv_block_idx <= nblock_last)
4366  bman.set_all_zero(bv_block_idx, nblock_last); // clear the tail
4367  }
4368  break;
4369  case set_COUNT_A: case set_COUNT_OR: case set_COUNT_XOR:
4370  case set_COUNT_SUB_AB:
4371  // count bits in the target vector
4372  {
4373  unsigned i, j;
4374  bman.get_block_coord(bv_block_idx, i, j);
4375  bm::word_t*** blk_root = bman.top_blocks_root();
4376  unsigned top_size = bman.top_block_size();
4377  for (;i < top_size; ++i)
4378  {
4379  bm::word_t** blk_blk = blk_root[i];
4380  if (blk_blk == 0)
4381  {
4382  bv_block_idx+=bm::set_sub_array_size-j;
4383  j = 0;
4384  continue;
4385  }
4386  // TODO: optimize for FULL top level
4387  for (; j < bm::set_sub_array_size; ++j, ++bv_block_idx)
4388  {
4389  if (blk_blk[j])
4390  count += bman.block_bitcount(blk_blk[j]);
4391  } // for j
4392  j = 0;
4393  } // for i
4394  }
4395  break;
4396  case set_END:
4397  default:
4398  BM_ASSERT(0);
4399  #ifndef BM_NO_STL
4400  throw std::logic_error(err_msg());
4401  #else
4402  BM_THROW(BM_ERR_SERIALFORMAT);
4403  #endif
4404  }
4405  return count;
4406 }
4407 
4408 template<class BV, class SerialIterator>
4411  bvector_type& bv,
4412  serial_iterator_type& sit,
4413  set_operation op)
4414 {
4415  size_type count = 0;
4416  unsigned id_count = sit.get_id_count();
4417  bool set_clear = true;
4418  switch (op)
4419  {
4420  case set_AND:
4421  {
4422  // TODO: use some more optimal algorithm without temp vector
4423  BV bv_tmp(BM_GAP);
4424  load_id_list(bv_tmp, sit, id_count, true);
4425  bv &= bv_tmp;
4426  }
4427  break;
4428  case set_ASSIGN:
4429  BM_ASSERT(0);
4430  //bv.clear(true);
4431  // intentional case fall through here (not a bug)
4432  case set_OR:
4433  set_clear = true;
4434  // fall to SUB
4435  case set_SUB:
4436  load_id_list(bv, sit, id_count, set_clear);
4437  break;
4438  case set_XOR:
4439  for (unsigned i = 0; i < id_count; ++i)
4440  {
4441  bm::id_t id = sit.get_id();
4442  bv[id] ^= true;
4443  sit.next();
4444  } // for
4445  break;
4446  case set_COUNT: case set_COUNT_B:
4447  for (unsigned i = 0; i < id_count; ++i)
4448  {
4449  /* bm::id_t id = */ sit.get_id();
4450  ++count;
4451  sit.next();
4452  } // for
4453  break;
4454  case set_COUNT_A:
4455  return bv.count();
4456  case set_COUNT_AND:
4457  for (size_type i = 0; i < id_count; ++i)
4458  {
4459  bm::id_t id = sit.get_id();
4460  count += bv.get_bit(id);
4461  sit.next();
4462  } // for
4463  break;
4464  case set_COUNT_XOR:
4465  {
4466  // TODO: get rid of the temp vector
4467  BV bv_tmp(BM_GAP);
4468  load_id_list(bv_tmp, sit, id_count, true);
4469  count += bm::count_xor(bv, bv_tmp);
4470  }
4471  break;
4472  case set_COUNT_OR:
4473  {
4474  // TODO: get rid of the temp. vector
4475  BV bv_tmp(BM_GAP);
4476  load_id_list(bv_tmp, sit, id_count, true);
4477  count += bm::count_or(bv, bv_tmp);
4478  }
4479  break;
4480  case set_COUNT_SUB_AB:
4481  {
4482  // TODO: get rid of the temp. vector
4483  BV bv_tmp(bv);
4484  load_id_list(bv_tmp, sit, id_count, false);
4485  count += bv_tmp.count();
4486  }
4487  break;
4488  case set_COUNT_SUB_BA:
4489  {
4490  BV bv_tmp(BM_GAP);
4491  load_id_list(bv_tmp, sit, id_count, true);
4492  count += bm::count_sub(bv_tmp, bv);
4493  }
4494  break;
4495  case set_END:
4496  default:
4497  BM_ASSERT(0);
4498  #ifndef BM_NO_STL
4499  throw std::logic_error(err_msg());
4500  #else
4501  BM_THROW(BM_ERR_SERIALFORMAT);
4502  #endif
4503  } // switch
4504 
4505  return count;
4506 }
4507 
4508 
4509 template<class BV, class SerialIterator>
4512  bvector_type& bv,
4513  serial_iterator_type& sit,
4514  bm::word_t* temp_block,
4515  set_operation op,
4516  bool exit_on_one)
4517 {
4518  BM_ASSERT(temp_block);
4519 
4520  size_type count = 0;
4521  gap_word_t gap_temp_block[bm::gap_equiv_len * 4];
4522  gap_temp_block[0] = 0;
4523 
4524  blocks_manager_type& bman = bv.get_blocks_manager();
4525  if (!bman.is_init())
4526  bman.init_tree();
4527 
4528  if (sit.bv_size() && (sit.bv_size() > bv.size()))
4529  bv.resize(sit.bv_size());
4530 
4531  typename serial_iterator_type::iterator_state state;
4532  state = sit.get_state();
4533  if (state == serial_iterator_type::e_list_ids)
4534  {
4535  count = process_id_list(bv, sit, op);
4536  return count;
4537  }
4538 
4539  size_type bv_block_idx = 0;
4540 
4541  for (;1;)
4542  {
4543  bm::set_operation sop = op;
4544  if (sit.is_eof()) // argument stream ended
4545  {
4546  count += finalize_target_vector(bman, op, bv_block_idx);
4547  return count;
4548  }
4549 
4550  state = sit.state();
4551  switch (state)
4552  {
4553  case serial_iterator_type::e_blocks:
4554  sit.next();
4555  continue;
4556  case serial_iterator_type::e_bit_block:
4557  {
4558  BM_ASSERT(sit.block_idx() == bv_block_idx);
4559  unsigned i0, j0;
4560  bman.get_block_coord(bv_block_idx, i0, j0);
4561  bm::word_t* blk = bman.get_block_ptr(i0, j0);
4562  if (!blk)
4563  {
4564  switch (op)
4565  {
4566  case set_AND: case set_SUB: case set_COUNT_AND:
4567  case set_COUNT_SUB_AB: case set_COUNT_A:
4568  // one arg is 0, so the operation gives us zero
4569  // all we need to do is to seek the input stream
4570  sop = set_ASSIGN;
4571  break;
4572 
4573  case set_OR: case set_XOR: case set_ASSIGN:
4574  blk = bman.make_bit_block(bv_block_idx);
4575  break;
4576 
4577  case set_COUNT: case set_COUNT_XOR: case set_COUNT_OR:
4578  case set_COUNT_SUB_BA: case set_COUNT_B:
4579  // first arg is not required (should work as is)
4580  sop = set_COUNT;
4581  break;
4582 
4583  case set_END:
4584  default:
4585  BM_ASSERT(0);
4586  #ifndef BM_NO_STL
4587  throw std::logic_error(err_msg());
4588  #else
4589  BM_THROW(BM_ERR_SERIALFORMAT);
4590  #endif
4591  }
4592  }
4593  else // block exists
4594  {
4595  int is_gap = BM_IS_GAP(blk);
4596  if (is_gap || IS_FULL_BLOCK(blk))
4597  {
4598  if (IS_FULL_BLOCK(blk) && is_const_set_operation(op))
4599  {
4600  blk = FULL_BLOCK_REAL_ADDR;
4601  }
4602  else
4603  {
4604  // TODO: make sure const operations do not
4605  // deoptimize GAP blocks
4606  blk = bman.deoptimize_block(bv_block_idx);
4607  }
4608  }
4609  }
4610 
4611  // 2 bit-blocks recombination
4612  unsigned c = sit.get_bit_block(blk, temp_block, sop);
4613  count += c;
4614  if (exit_on_one && count) // early exit
4615  return count;
4616 
4617  }
4618  break;
4619 
4620  case serial_iterator_type::e_zero_blocks:
4621  {
4622  BM_ASSERT(bv_block_idx == sit.block_idx());
4623 
4624  switch (op)
4625  {
4626  case set_ASSIGN: // nothing to do to rewind fwd
4627  case set_SUB: case set_COUNT_AND: case set_OR:
4628  case set_XOR: case set_COUNT_SUB_BA: case set_COUNT_B:
4629  bv_block_idx = sit.skip_mono_blocks();
4630  continue;
4631 
4632  case set_AND: // clear the range
4633  {
4634  size_type nb_start = bv_block_idx;
4635  bv_block_idx = sit.skip_mono_blocks();
4636  bman.set_all_zero(nb_start, bv_block_idx-1);
4637  }
4638  continue;
4639  case set_END:
4640  default:
4641  break;
4642  } // switch op
4643 
4644 
4645  unsigned i0, j0;
4646  bman.get_block_coord(bv_block_idx, i0, j0);
4647  bm::word_t* blk = bman.get_block_ptr(i0, j0);
4648 
4649  sit.next();
4650 
4651  if (blk)
4652  {
4653  switch (op)
4654  {
4655  case set_AND: case set_ASSIGN:
4656  // the result is 0
4657  //blk =
4658  bman.zero_block(bv_block_idx);
4659  break;
4660 
4661  case set_SUB: case set_COUNT_AND: case set_OR:
4662  case set_XOR: case set_COUNT_SUB_BA: case set_COUNT_B:
4663  // nothing to do
4664  break;
4665 
4666  case set_COUNT_SUB_AB: case set_COUNT_A: case set_COUNT_OR:
4667  case set_COUNT: case set_COUNT_XOR:
4668  // valid bit block recombined with 0 block
4669  // results with same block data
4670  // all we need is to bitcount bv block
4671  {
4672  count += blk ? bman.block_bitcount(blk) : 0;
4673  if (exit_on_one && count) // early exit
4674  return count;
4675  }
4676  break;
4677  case set_END:
4678  default:
4679  BM_ASSERT(0);
4680  } // switch op
4681  } // if blk
4682  }
4683  break;
4684 
4685  case serial_iterator_type::e_one_blocks:
4686  {
4687  BM_ASSERT(bv_block_idx == sit.block_idx());
4688  unsigned i0, j0;
4689  bman.get_block_coord(bv_block_idx, i0, j0);
4690  bm::word_t* blk = bman.get_block_ptr(i0, j0);
4691 
4692  sit.next();
4693 
4694  switch (op)
4695  {
4696  case set_OR: case set_ASSIGN:
4697  bman.set_block_all_set(bv_block_idx);
4698  break;
4699  case set_COUNT_OR: case set_COUNT_B: case set_COUNT:
4700  // block becomes all set
4701  count += bm::bits_in_block;
4702  break;
4703  case set_SUB:
4704  //blk =
4705  bman.zero_block(bv_block_idx);
4706  break;
4707  case set_COUNT_SUB_AB: case set_AND:
4708  // nothing to do
4709  break;
4710  case set_COUNT_AND: case set_COUNT_A:
4711  count += blk ? bman.block_bitcount(blk) : 0;
4712  break;
4713  default:
4714  if (blk)
4715  {
4716  switch (op)
4717  {
4718  case set_XOR:
4719  blk = bman.deoptimize_block(bv_block_idx);
4721  break;
4722  case set_COUNT_XOR:
4723  {
4724  count +=
4726  blk,
4728  bm::COUNT_XOR);
4729  }
4730  break;
4731  case set_COUNT_SUB_BA:
4732  {
4733  count +=
4735  blk,
4738  }
4739  break;
4740  default:
4741  BM_ASSERT(0);
4742  } // switch
4743  }
4744  else // blk == 0
4745  {
4746  switch (op)
4747  {
4748  case set_XOR:
4749  // 0 XOR 1 = 1
4750  bman.set_block_all_set(bv_block_idx);
4751  break;
4752  case set_COUNT_XOR:
4753  count += bm::bits_in_block;
4754  break;
4755  case set_COUNT_SUB_BA:
4756  // 1 - 0 = 1
4757  count += bm::bits_in_block;
4758  break;
4759  default:
4760  break;
4761  } // switch
4762  } // else
4763  } // switch
4764  if (exit_on_one && count) // early exit
4765  return count;
4766  }
4767  break;
4768 
4769  case serial_iterator_type::e_gap_block:
4770  {
4771  BM_ASSERT(bv_block_idx == sit.block_idx());
4772  unsigned i0, j0;
4773  bman.get_block_coord(bv_block_idx, i0, j0);
4774  const bm::word_t* blk = bman.get_block(i0, j0);
4775 
4776  sit.get_gap_block(gap_temp_block);
4777 
4778  unsigned len = gap_length(gap_temp_block);
4779  int level = gap_calc_level(len, bman.glen());
4780  --len;
4781 
4782  bool const_op = is_const_set_operation(op);
4783  if (const_op)
4784  {
4785  distance_metric metric = operation2metric(op);
4786  bm::word_t* gptr = (bm::word_t*)gap_temp_block;
4787  BMSET_PTRGAP(gptr);
4788  unsigned c =
4790  blk,
4791  gptr,
4792  metric);
4793  count += c;
4794  if (exit_on_one && count) // early exit
4795  return count;
4796 
4797  }
4798  else // non-const set operation
4799  {
4800  if ((sop == set_ASSIGN) && blk) // target block override
4801  {
4802  bman.zero_block(bv_block_idx);
4803  sop = set_OR;
4804  }
4805  if (blk == 0) // target block not found
4806  {
4807  switch (sop)
4808  {
4809  case set_AND: case set_SUB:
4810  break;
4811  case set_OR: case set_XOR: case set_ASSIGN:
4812  bman.set_gap_block(
4813  bv_block_idx, gap_temp_block, level);
4814  break;
4815 
4816  default:
4817  BM_ASSERT(0);
4818  } // switch
4819  }
4820  else // target block exists
4821  {
4822  bm::operation bop = bm::setop2op(op);
4823  if (level == -1) // too big to GAP
4824  {
4825  gap_convert_to_bitset(temp_block, gap_temp_block);
4826  bv.combine_operation_with_block(bv_block_idx,
4827  temp_block,
4828  0, // BIT
4829  bop);
4830  }
4831  else // GAP fits
4832  {
4833  set_gap_level(gap_temp_block, level);
4834  bv.combine_operation_with_block(
4835  bv_block_idx,
4836  (bm::word_t*)gap_temp_block,
4837  1, // GAP
4838  bop);
4839  }
4840  }
4841  if (exit_on_one)
4842  {
4843  bman.get_block_coord(bv_block_idx, i0, j0);
4844  blk = bman.get_block_ptr(i0, j0);
4845  if (blk)
4846  {
4847  bool z = bm::check_block_zero(blk, true/*deep check*/);
4848  if (!z)
4849  return 1;
4850  }
4851  } // if exit_on_one
4852 
4853  } // if else non-const op
4854  }
4855  break;
4856 
4857  default:
4858  BM_ASSERT(0);
4859  #ifndef BM_NO_STL
4860  throw std::logic_error(err_msg());
4861  #else
4862  BM_THROW(BM_ERR_SERIALFORMAT);
4863  #endif
4864  } // switch
4865 
4866  ++bv_block_idx;
4867  BM_ASSERT(bv_block_idx);
4868 
4869  } // for (deserialization)
4870 
4871  return count;
4872 }
4873 
4874 
4875 
4876 
4877 } // namespace bm
4878 
4879 #include "bmundef.h"
4880 
4881 #ifdef _MSC_VER
4882 #pragma warning( pop )
4883 #endif
4884 
4885 
4886 #endif
bm::id_t bit_block_count(const bm::word_t *block)
Bitcount for bit block.
Definition: bmfunc.h:3972
void bit_block_copy(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src)
Bitblock copy operation.
Definition: bmfunc.h:5067
void encode_header(const BV &bv, bm::encoder &enc)
Encode serialization header information.
Definition: bmserial.h:809
void gap_add_to_bitset(unsigned *dest, const T *pcurr, unsigned len)
Adds(OR) GAP block to bitblock.
Definition: bmfunc.h:3098
void bit_invert(T *start)
Definition: bmfunc.h:4736
block_arridx_type gap_temp_block_
Definition: bmserial.h:413
void encode_bit_interval(const bm::word_t *blk, bm::encoder &enc, unsigned size_control)
Encode BIT block with repeatable runs of zeroes.
Definition: bmserial.h:1250
void read_bic_arr(decoder_type &decoder, bm::word_t *blk)
Read binary interpolated list into a bit-set.
Definition: bmserial.h:1998
bm::heap_vector< bm::gap_word_t, allocator_type > block_arridx_type
Definition: bmserial.h:409
void put_64(bm::id64_t w)
Puts 64 bits word into encoding buffer.
Definition: encoding.h:517
void put_8(unsigned char c)
Puts one character into the encoding buffer.
Definition: encoding.h:407
BMFORCEINLINE bm::id_t word_bitcount(bm::id_t w)
Definition: bmfunc.h:195
unsigned get_bit_block_OR(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:3126
Bit COUNT SUB AB functor.
Definition: bmfunc.h:7353
unsigned id_cnt_
Id counter for id list.
Definition: bmserial.h:601
bvector_type::block_idx_type block_idx_type
Definition: bmserial.h:84
one or more all-1 bit blocks
Definition: bmserial.h:525
bm::id64_t bit_block_xor(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src)
Plain bitblock XOR operation. Function does not analyse availability of source and destination blocks...
Definition: bmfunc.h:6403
static size_type deserialize(bvector_type &bv, serial_iterator_type &sit, bm::word_t *temp_block, set_operation op=bm::set_OR, bool exit_on_one=false)
Definition: bmserial.h:4511
const unsigned char set_block_gap
Plain GAP block.
Definition: bmserial.h:698
const unsigned set_block_size
Definition: bmconst.h:54
size_t deserialize(BV &bv, const unsigned char *buf, bm::word_t *temp_block=0)
Bitvector deserialization from memory.
Definition: bmserial.h:1900
unsigned get_bit_block_COUNT_AND(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:3683
void interpolated_arr_bit_block(const bm::word_t *block, bm::encoder &enc, bool inverted)
Definition: bmserial.h:1519
void interpolated_gap_bit_block(const bm::word_t *block, bm::encoder &enc)
encode bit-block as interpolated gap block
Definition: bmserial.h:1467
Base deserialization class.
Definition: bmserial.h:328
const unsigned char set_block_arrgap_bienc_inv
Interpolated GAP array (inverted)
Definition: bmserial.h:714
const unsigned set_sub_array_size
Definition: bmconst.h:91
serialization_flags
Bit mask flags for serialization algorithm.
Definition: bmserial.h:1792
const unsigned char set_block_32zero
Up to 4G zero blocks.
Definition: bmserial.h:691
const unsigned char set_block_32one
UP to 4G all-set blocks.
Definition: bmserial.h:692
void optimize_serialize_destroy(BV &bv, typename serializer< BV >::buffer &buf)
Bitvector serialization into buffer object (resized automatically) Input bit-vector gets optimized an...
Definition: bmserial.h:1381
unsigned gap_add_value(T *buf, unsigned pos)
Add new value to the end of GAP buffer.
Definition: bmfunc.h:2484
void read_gap_block(decoder_type &decoder, unsigned block_type, bm::gap_word_t *dst_block, bm::gap_word_t &gap_head)
Read GAP block from the stream.
Definition: bmserial.h:2124
void next()
get next block
Definition: bmserial.h:2843
unsigned char * position_type
Definition: encoding.h:52
id list stored
Definition: bmserial.h:672
#define IS_VALID_ADDR(addr)
Definition: bmdef.h:139
deseriaizer_base< DEC >::decoder_type decoder_type
Definition: bmserial.h:478
unsigned get_bit_block_COUNT_SUB_BA(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:4069
operation
Bit operations.
Definition: bmconst.h:176
unsigned get_compression_level() const
Get compression level (0-5), Default 5 (recommended) 0 - take as is 1, 2 - apply light weight RLE/GAP...
Definition: bmserial.h:125
D gap_convert_to_arr(D *BMRESTRICT dest, const T *BMRESTRICT buf, unsigned dest_len, bool invert=false)
Convert gap block into array of ints corresponding to 1 bits.
Definition: bmfunc.h:3913
void gap_set_all(T *buf, unsigned set_max, unsigned value)
Sets all bits to 0 or 1 (GAP)
Definition: bmfunc.h:3583
void bienc_gap_bit_block(const bm::word_t *block, bm::encoder &enc)
encode bit-block as interpolated bit block of gaps
Definition: bmserial.h:1476
SerialIterator serial_iterator_type
Definition: bmserial.h:431
unsigned long long int id64_t
Definition: bmconst.h:34
void assign_if_not_set(allocator_pool_type &pool, bvector< Alloc > &bv)
Definition: bm.h:1215
bvector_type::blocks_manager_type blocks_manager_type
Definition: bmserial.h:82
void reset_models()
Definition: bmserial.h:288
bm::id_t size_type
Definition: bm.h:117
const unsigned char set_block_arr_bienc
Interpolated block as int array.
Definition: bmserial.h:716
serializer(const allocator_type &alloc=allocator_type(), bm::word_t *temp_block=0)
Constructor.
Definition: bmserial.h:725
save no byte-order info (save some space)
Definition: bmserial.h:1793
save no GAP info (save some space)
Definition: bmserial.h:1794
unsigned char find_gap_best_encoding(const bm::gap_word_t *gap_block)
Determine best representation for GAP block based on current set compression level.
Definition: bmserial.h:1152
unsigned get_bit_block_XOR(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:3312
void put_32(bm::word_t w)
Puts 32 bits word into encoding buffer.
Definition: encoding.h:499
const unsigned char set_block_bit
Plain bit block.
Definition: bmserial.h:695
Definition: bm.h:76
unsigned bit_block_calc_change(const bm::word_t *block)
Definition: bmfunc.h:4173
bm::id_t last_id_
Last id from the id list.
Definition: bmserial.h:602
bvector_type::statistics statistics_type
Definition: bmserial.h:83
const unsigned gap_equiv_len
Definition: bmconst.h:80
Deserializer for bit-vector.
Definition: bmserial.h:379
unsigned bit_block_find(const bm::word_t *block, unsigned nbit, unsigned *pos)
Searches for the next 1 bit in the BIT block.
Definition: bmfunc.h:6631
size_t max_serialize_mem
estimated maximum memory for serialization
Definition: bmfunc.h:60
void gap_convert_to_bitset(unsigned *dest, const T *buf)
GAP block to bitblock conversion.
Definition: bmfunc.h:3510
pre-processor un-defines to avoid global space pollution (internal)
const unsigned bie_cut_off
Definition: bmserial.h:678
const unsigned char set_block_bit_digest0
H-compression with digest mask.
Definition: bmserial.h:719
Bit COUNT OR functor.
Definition: bmfunc.h:7341
void for_each_dgap(const T *gap_buf, Func &func)
Definition: bmfunc.h:1988
Bit-block get adapter, takes bitblock and represents it as a get_32() accessor function.
Definition: bmfunc.h:7183
void gamma_gap_block(const bm::gap_word_t *gap_block, bm::encoder &enc)
Definition: bmserial.h:897
void interpolated_gap_array(const bm::gap_word_t *gap_block, unsigned arr_len, bm::encoder &enc, bool inverted)
Encode GAP block as an array with binary interpolated coder.
Definition: bmserial.h:979
bm::id_t get_id() const
Get last id from the id list.
Definition: bmserial.h:539
void interpolated_encode_gap_block(const bm::gap_word_t *gap_block, bm::encoder &enc)
Definition: bmserial.h:855
const unsigned char set_block_bit_interval
Interval block.
Definition: bmserial.h:701
BMFORCEINLINE bm::gap_word_t gap_length(const bm::gap_word_t *buf)
Returs GAP block length.
Definition: bmfunc.h:1067
Bit-vector serialization class.
Definition: bmserial.h:77
bool bit_block_or(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src)
Plain bitblock OR operation. Function does not analyse availability of source and destination blocks...
Definition: bmfunc.h:5915
GAP compression is ON.
Definition: bmconst.h:145
const unsigned char set_block_sgapgap
SGAP compressed GAP block.
Definition: bmserial.h:697
bm::id_t bit_operation_or_count(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2)
Performs bitblock OR operation and calculates bitcount of the result.
Definition: bmfunc.h:5845
void combine_or(BV &bv, It first, It last)
OR Combine bitvector and the iterable sequence.
Definition: bmalgo_impl.h:1148
void combine_sub(BV &bv, It first, It last)
SUB Combine bitvector and the iterable sequence.
Definition: bmalgo_impl.h:1316
void set_pos(unsigned char *buf_pos)
Set current memory stream position.
Definition: encoding.h:488
const unsigned char set_block_bitgap_bienc
Interpolated bit-block as GAPs.
Definition: bmserial.h:718
#define BM_SET_ONE_BLOCKS(x)
Definition: bmserial.h:1597
unsigned get_bit_block_COUNT(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:3518
(A ^ B).count()
Definition: bmalgo_impl.h:60
bool check_block_one(const bm::word_t *blk, bool deep_scan)
Checks if block has only 1 bits.
Definition: bmfunc.h:7037
bm::word_t sum() const
Get accumulated sum.
Definition: bmfunc.h:7220
BMFORCEINLINE unsigned long long bmi_blsi_u64(unsigned long long w)
Definition: bmutil.h:323
one or more zero bit blocks
Definition: bmserial.h:524
Byte based reader for un-aligned bit streaming.
Definition: encoding.h:239
const unsigned id_max
Definition: bmconst.h:105
Statistical information about bitset&#39;s memory allocation details.
Definition: bm.h:121
(B - A).count()
Definition: bmalgo_impl.h:63
void flush()
Flush the incomplete 32-bit accumulator word.
Definition: encoding.h:213
Class for decoding data from memory buffer.
Definition: encoding.h:112
block_idx_type block_idx() const
Get current block index.
Definition: bmserial.h:542
#define BMSET_PTRGAP(ptr)
Definition: bmdef.h:167
unsigned bit_count_nonzero_size(const T *blk, unsigned data_size)
Inspects block for full zero words.
Definition: bmfunc.h:6571
size_t serialize(const BV &bv, unsigned char *buf, bm::word_t *temp_block=0, unsigned serialization_flags=0)
Saves bitvector into memory.
Definition: bmserial.h:1840
unsigned get_id_count() const
Number of ids in the inverted list (valid for e_list_ids)
Definition: bmserial.h:536
get_bit_func_type bit_func_table_[bm::set_END]
Definition: bmserial.h:595
const unsigned char set_block_arrgap_egamma_inv
Gamma compressed inverted delta GAP array.
Definition: bmserial.h:707
const unsigned char set_block_gap_bienc
Interpolated GAP block.
Definition: bmserial.h:712
blocks_manager_type::block_idx_type block_idx_type
Definition: bm.h:113
deseriaizer_base< DEC > parent_type
Definition: bmserial.h:382
iterator_state state_
Definition: bmserial.h:600
void encode_bit_array(const bm::word_t *block, bm::encoder &enc, bool inverted)
Encode bit-block as an array of bits.
Definition: bmserial.h:1397
Alloc allocator_type
Definition: bm.h:110
block_idx_type bv_size() const
serialized bitvector size
Definition: bmserial.h:490
unsigned get_bit_block_ASSIGN(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:3027
void gap_invert(T *buf)
Inverts all bits in the GAP buffer.
Definition: bmfunc.h:3654
block_idx_type skip_mono_blocks()
skip all zero or all-one blocks
Definition: bmserial.h:2990
unsigned int word_t
Definition: bmconst.h:38
const unsigned char set_block_arrbit
List of bits ON.
Definition: bmserial.h:700
BMFORCEINLINE void set_bit(unsigned *dest, unsigned bitpos)
Set 1 bit in a block.
Definition: bmfunc.h:2790
size_type serialize(const BV &bv, unsigned char *buf, size_t buf_size)
Bitvector serialization into memory block.
Definition: bmserial.h:1608
#define IS_FULL_BLOCK(addr)
Definition: bmdef.h:140
const unsigned char set_block_azero
All other blocks zero.
Definition: bmserial.h:693
void get_inv_arr(bm::word_t *block)
Definition: bmserial.h:3005
const unsigned char set_block_arrgap_inv
List of bits OFF (GAP block)
Definition: bmserial.h:708
bm::short_t get_16()
Reads 16-bit word from the decoding buffer.
Definition: encoding.h:594
unsigned char find_bit_best_encoding(const bm::word_t *block)
Determine best representation for a bit-block.
Definition: bmserial.h:1033
Algorithms for bvector<>
gap_word_t glevels_[bm::gap_levels]
GAP levels.
Definition: bmserial.h:603
void byte_order_serialization(bool value)
Set byte-order serialization (for cross platform compatibility)
Definition: bmserial.h:803
unsigned get_block_type() const
Get current block type.
Definition: bmserial.h:588
bvector_type::allocator_type allocator_type
Definition: bmserial.h:384
allocator_type alloc_
Definition: bmserial.h:415
block_idx_type bv_size_
Definition: bmserial.h:599
iterator_state
iterator is a state machine, this enum encodes its key value
Definition: bmserial.h:519
block_idx_type mono_block_cnt_
number of 0 or 1 blocks
Definition: bmserial.h:607
BMFORCEINLINE unsigned long long bmi_bslr_u64(unsigned long long w)
Definition: bmutil.h:313
const unsigned gap_max_buff_len
Definition: bmconst.h:78
unsigned get_bit_block_COUNT_OR(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:3767
Bit manipulation primitives (internal)
unsigned short short_t
Definition: bmconst.h:39
const unsigned char set_block_bit_0runs
Bit block with encoded zero intervals.
Definition: bmserial.h:706
Encoding utilities for serialization (internal)
void reset_compression_stats()
Reset all accumulated compression statistics.
Definition: bmserial.h:782
Bit-block sum adapter, takes values and sums it /internal.
Definition: bmfunc.h:7213
void bit_block_set(bm::word_t *BMRESTRICT dst, bm::word_t value)
Bitblock memset operation.
Definition: bmfunc.h:3492
static ByteOrder byte_order()
Definition: bmconst.h:465
const unsigned gap_levels
Definition: bmconst.h:83
BMFORCEINLINE unsigned word_bitcount64(bm::id64_t x)
Definition: bmfunc.h:224
bvector_type::size_type size_type
Definition: bmserial.h:430
size_t serialize(BV &bv, unsigned char *buf, unsigned serialization_flags=0)
Saves bitvector into memory. Allocates temporary memory block for bvector.
Definition: bmserial.h:1874
Internal structure.
Definition: bmconst.h:439
void gamma_arr_bit_block(const bm::word_t *block, bm::encoder &enc, bool inverted)
Definition: bmserial.h:1430
const unsigned set_compression_default
Default compression level.
Definition: bmserial.h:62
unsigned short gap_word_t
Definition: bmconst.h:76
Iterator to walk forward the serialized stream.
Definition: bmserial.h:426
unsigned gap_set_array(T *buf, const T *arr, unsigned len)
Convert array to GAP buffer.
Definition: bmfunc.h:2670
unsigned get_bit_block_AND(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:3200
blocks_manager< Alloc > blocks_manager_type
Definition: bm.h:112
unsigned get_bit_block_COUNT_A(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:3601
Byte based writer for un-aligned bit streaming.
Definition: encoding.h:165
int gap_calc_level(unsigned len, const T *glevel_len)
Calculates GAP block capacity level.
Definition: bmfunc.h:3695
bm::distance_metric_descriptor::size_type count_xor(const BV &bv1, const BV &bv2)
Computes bitcount of XOR operation of two bitsets.
Definition: bmalgo_impl.h:1018
#define BMPTR_SETBIT0(ptr)
Definition: bmdef.h:154
void encode_bit_digest(const bm::word_t *blk, bm::encoder &enc, bm::id64_t d0)
Encode bit-block using digest (hierarchical compression)
Definition: bmserial.h:1302
deseriaizer_base< DEC >::decoder_type decoder_type
Definition: bmserial.h:387
bm::id64_t get_64()
Reads 64-bit word from the decoding buffer.
Definition: encoding.h:628
const unsigned char set_block_1zero
One all-zero block.
Definition: bmserial.h:685
static size_type deserialize(bvector_type &bv, const unsigned char *buf, bm::word_t *temp_block, set_operation op=bm::set_OR, bool exit_on_one=false)
Deserialize bvector using buffer as set operation argument.
Definition: bmserial.h:4248
BV::blocks_manager_type blocks_manager_type
Definition: bmserial.h:397
unsigned get_arr_bit(bm::word_t *dst_block, bool clear_target=true)
Get array of bits out of the decoder into bit block (Converts inverted list into bits) Returns number...
Definition: bmserial.h:4163
Bit SUB BA functor.
Definition: bmfunc.h:7364
void set_gap_level(T *buf, int level)
Sets GAP block capacity level.
Definition: bmfunc.h:3673
const unsigned char set_block_8zero
Up to 256 zero blocks.
Definition: bmserial.h:687
const unsigned char set_block_arrgap_egamma
Gamma compressed delta GAP array.
Definition: bmserial.h:705
void put_16(bm::short_t s)
Puts short word (16 bits) into the encoding buffer.
Definition: encoding.h:417
void gap_length_serialization(bool value)
Set GAP length serialization (serializes GAP levels of the original vector)
Definition: bmserial.h:797
#define FULL_BLOCK_REAL_ADDR
Definition: bmdef.h:135
Definitions(internal)
void read_bic_arr_inv(decoder_type &decoder, bm::word_t *blk)
Read inverted binary interpolated list into a bit-set.
Definition: bmserial.h:2020
const unsigned char set_block_arrbit_inv
List of bits OFF.
Definition: bmserial.h:715
unsigned bit_to_gap(gap_word_t *BMRESTRICT dest, const unsigned *BMRESTRICT block, unsigned dest_len)
Definition: bmfunc.h:3841
set_operation
Codes of set operations.
Definition: bmconst.h:153
bm::id_t bit_operation_xor_count(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2)
Performs bitblock XOR operation and calculates bitcount of the result.
Definition: bmfunc.h:6513
bm::gap_word_t * id_array_
ptr to idx array for temp decode use
Definition: bmserial.h:371
BV::size_type size_type
Definition: bmserial.h:385
bm::id64_t bit_block_sub(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src)
Plain bitblock SUB (AND NOT) operation. Function does not analyse availability of source and destinat...
Definition: bmfunc.h:6182
void add_model(unsigned char mod, unsigned score)
Definition: bmserial.h:1025
bm::id_t bit_operation_and_count(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2)
Performs bitblock AND operation and calculates bitcount of the result.
Definition: bmfunc.h:5706
void put_prefixed_array_16(unsigned char c, const bm::short_t *s, unsigned count, bool encode_count)
Encode 8-bit prefix + an array.
Definition: encoding.h:390
const unsigned bits_in_block
Definition: bmconst.h:110
void gamma_gap_array(const bm::gap_word_t *gap_block, unsigned arr_len, bm::encoder &enc, bool inverted=false)
Encode GAP block as delta-array with Elias Gamma coder.
Definition: bmserial.h:935
const unsigned set_compression_max
Maximum supported compression level.
Definition: bmserial.h:61
void read_digest0_block(decoder_type &decoder, bm::word_t *blk)
Read digest0-type bit-block.
Definition: bmserial.h:2057
bvector_type::size_type size_type
Definition: bmserial.h:626
unsigned char get_8()
Reads character from the decoding buffer.
Definition: encoding.h:87
const unsigned gap_max_bits
Definition: bmconst.h:79
#define BM_IS_GAP(ptr)
Definition: bmdef.h:168
bvector_type::size_type size_type
Definition: bmserial.h:85
Utilities for bit transposition (internal) (experimental!)
void put_prefixed_array_32(unsigned char c, const bm::word_t *w, unsigned count)
Encode 8-bit prefix + an array.
Definition: encoding.h:379
bvector_type::block_idx_type block_idx_type
Definition: bmserial.h:386
distance_metric operation2metric(set_operation op)
Convert set operation into compatible distance metric.
Definition: bmalgo_impl.h:73
bool is_eof() const
Returns true if end of bit-stream reached.
Definition: bmserial.h:493
gap_word_t * block_idx_arr_
Definition: bmserial.h:610
static const char * err_msg()
Definition: bmserial.h:367
bm::operation setop2op(bm::set_operation op)
Convert set operation to operation.
Definition: bmfunc.h:793
void bit_recomb(It1 &it1, It2 &it2, BinaryOp &op, Encoder &enc, unsigned block_size=bm::set_block_size)
Definition: bmfunc.h:7263
unsigned char * get_pos() const
Get current memory stream position.
Definition: encoding.h:480
serialization_header_mask
Definition: bmserial.h:669
iterator_state state() const
Returns iterator internal state.
Definition: bmserial.h:532
BV::size_type count_or(const BV &bv1, const BV &bv2)
Computes bitcount of OR operation of two bitsets.
Definition: bmalgo_impl.h:1086
void bienc_arr_bit_block(const bm::word_t *block, bm::encoder &enc, bool inverted)
Definition: bmserial.h:1449
const unsigned char set_block_sgapbit
SGAP compressed bitblock.
Definition: bmserial.h:696
#define BMGAP_PTR(ptr)
Definition: bmdef.h:166
unsigned(serial_stream_iterator< DEC >::* get_bit_func_type)(bm::word_t *, bm::word_t *)
member function pointer for bitset-bitset get operations
Definition: bmserial.h:549
size_t size() const
Returns size of the current encoding stream.
Definition: encoding.h:472
const unsigned char set_block_16one
UP to 65536 all-set blocks.
Definition: bmserial.h:690
const unsigned char set_block_end
End of serialization.
Definition: bmserial.h:684
unsigned gap_bit_count_unr(const T *buf)
Calculates number of bits ON in GAP buffer. Loop unrolled version.
Definition: bmfunc.h:1786
unsigned get_bit_block_COUNT_B(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:576
void read_bic_gap(decoder_type &decoder, bm::word_t *blk)
Read binary interpolated gap blocks into a bitset.
Definition: bmserial.h:2029
const unsigned char set_block_8one
Up to 256 all-set blocks.
Definition: bmserial.h:688
bvector_type::allocator_type allocator_type
Definition: bmserial.h:81
unsigned int id_t
Definition: bmconst.h:37
BV::size_type count_sub(const BV &bv1, const BV &bv2)
Computes bitcount of SUB operation of two bitsets.
Definition: bmalgo_impl.h:1052
resized vector
Definition: bmserial.h:671
Functor for Elias Gamma encoding.
Definition: encoding.h:313
const unsigned char set_block_arr_bienc_inv
Interpolated inverted block int array.
Definition: bmserial.h:717
bm::id64_t bit_block_and(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src)
Plain bitblock AND operation. Function does not analyse availability of source and destination blocks...
Definition: bmfunc.h:5107
const unsigned char set_block_16zero
Up to 65536 zero blocks.
Definition: bmserial.h:689
bm::word_t * temp_block_
Definition: bmserial.h:414
void combine_count_operation_with_block(const bm::word_t *blk, const bm::word_t *arg_blk, distance_metric_descriptor *dmit, distance_metric_descriptor *dmit_end)
Internal function computes different distance metrics.
Definition: bmalgo_impl.h:125
T bit_convert_to_arr(T *BMRESTRICT dest, const unsigned *BMRESTRICT src, bm::id_t bits, unsigned dest_len, unsigned mask=0)
Convert bit block into an array of ints corresponding to 1 bits.
Definition: bmfunc.h:6978
#define BM_ASSERT
Definition: bmdef.h:117
no byte-order
Definition: bmserial.h:673
iterator_state get_state() const
Definition: bmserial.h:534
const unsigned set_total_blocks
Definition: bmconst.h:107
const unsigned char set_block_1one
One block all-set (1111...)
Definition: bmserial.h:686
void gamma(unsigned value)
Elias Gamma encode the specified value.
Definition: encoding.h:1006
unsigned get_bit_block_SUB(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:3416
distance_metric
Distance metrics codes defined for vectors A and B.
Definition: bmalgo_impl.h:57
Serialization stream iterator.
Definition: bmserial.h:475
unsigned block_type_
current block type
Definition: bmserial.h:605
bm::id64_t calc_block_digest0(const bm::word_t *const block)
Compute digest for 64 non-zero areas.
Definition: bmfunc.h:702
const unsigned char set_block_aone
All other blocks one.
Definition: bmserial.h:694
size_t deserialize(bvector_type &bv, const unsigned char *buf, bm::word_t *temp_block)
Definition: bmserial.h:2433
decoder_type & decoder()
Get low level access to the decoder (use carefully)
Definition: bmserial.h:514
const unsigned char set_block_gapbit
GAP compressed bitblock.
Definition: bmserial.h:699
block_arridx_type bit_idx_arr_
Definition: bmserial.h:412
unsigned get_bit_block_COUNT_XOR(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:3867
const unsigned set_block_shift
Definition: bmconst.h:55
strategy
Block allocation strategies.
Definition: bmconst.h:142
unsigned dec_size() const
Return current decoder size.
Definition: bmserial.h:511
bool is_const_set_operation(set_operation op)
Returns true if set operation is constant (bitcount)
Definition: bmfunc.h:784
const unsigned char set_block_64one
lots of all-set blocks
Definition: bmserial.h:710
unsigned get_bit_block_COUNT_SUB_AB(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:3968
size_t size() const
Returns size of the current decoding stream.
Definition: encoding.h:90
Memory encoding.
Definition: encoding.h:49
void gamma_gap_bit_block(const bm::word_t *block, bm::encoder &enc)
Definition: bmserial.h:1421
bm::id_t bit_operation_sub_count(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2)
Performs bitblock SUB operation and calculates bitcount of the result.
Definition: bmfunc.h:5755
void bic_encode_u16(const bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi)
Binary Interpolative array decode.
Definition: encoding.h:190
64-bit vector
Definition: bmserial.h:675
void get_gap_block(bm::gap_word_t *dst_block)
Read gap block data (with head)
Definition: bmserial.h:4212
void set_compression_level(unsigned clevel)
Set compression level.
Definition: bmserial.h:790
block_idx_type block_idx_
current block index
Definition: bmserial.h:606
#define BM_SER_NEXT_GRP(enc, nb, B_1ZERO, B_8ZERO, B_16ZERO, B_32ZERO, B_64ZERO)
Definition: bmserial.h:1573
const unsigned char set_block_arrgap_bienc
Interpolated GAP array.
Definition: bmserial.h:713
const size_type * get_compression_stat() const
Return serialization counter vector.
Definition: bmserial.h:183
const unsigned char set_block_gap_egamma
Gamma compressed GAP block.
Definition: bmserial.h:704
ByteOrder
Byte orders recognized by the library.
Definition: bmconst.h:429
byte_buffer< allocator_type > buffer
Definition: bmserial.h:87
Bit manipulation primitives (internal)
unsigned get_bit_block(bm::word_t *dst_block, bm::word_t *tmp_block, set_operation op)
read bit block, using logical operation
Definition: bmserial.h:4230
bool check_block_zero(const bm::word_t *blk, bool deep_scan)
Checks all conditions and returns true if block consists of only 0 bits.
Definition: bmfunc.h:7013
Deserializer, performs logical operations between bit-vector and serialized bit-vector.
Definition: bmserial.h:622
const unsigned char set_block_arrgap
List of bits ON (GAP block)
Definition: bmserial.h:702
const unsigned char set_block_bit_1bit
Bit block with 1 bit ON.
Definition: bmserial.h:703
void encode_gap_block(const bm::gap_word_t *gap_block, bm::encoder &enc)
Definition: bmserial.h:1194
const unsigned char set_block_64zero
lots of zero blocks
Definition: bmserial.h:709
#define BMRESTRICT
Definition: bmdef.h:180
no GAP levels
Definition: bmserial.h:674
const unsigned gap_max_bits_cmrz
Definition: bmconst.h:82
bm::word_t get_32()
Reads 32-bit word from the decoding buffer.
Definition: encoding.h:610
const unsigned set_block_digest_wave_size
Definition: bmconst.h:65
BMFORCEINLINE void clear_bit(unsigned *dest, unsigned bitpos)
Set 1 bit in a block.
Definition: bmfunc.h:2803
Bit COUNT XOR functor.
Definition: bmfunc.h:7330