BitMagic-C++
bmserial.h
Go to the documentation of this file.
1 #ifndef BMSERIAL__H__INCLUDED__
2 #define BMSERIAL__H__INCLUDED__
3 /*
4 Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 For more information please visit: http://bitmagic.io
19 */
20 
21 /*! \file bmserial.h
22  \brief Serialization / compression of bvector<>. Set operations on compressed BLOBs.
23 */
24 
25 /*!
26  \defgroup bvserial bvector<> serialization
27  Serialization for bvector<> container
28  \ingroup bvector
29 */
30 
31 #ifndef BM__H__INCLUDED__
32 #define BM__H__INCLUDED__
33 
34 #include "bm.h"
35 
36 #endif
37 
38 #ifdef _MSC_VER
39 #pragma warning( push )
40 #pragma warning( disable : 4311 4312 4127)
41 #endif
42 
43 
44 
45 #include "encoding.h"
46 #include "bmfunc.h"
47 #include "bmtrans.h"
48 #include "bmalgo_impl.h"
49 #include "bmutil.h"
50 #include "bmbuffer.h"
51 #include "bmdef.h"
52 
53 //#include "bmgamma.h"
54 
55 
56 namespace bm
57 {
58 
59 
60 // Serialization stream markup constants
61 
62 
63 const unsigned char set_block_end = 0; //!< End of serialization
64 const unsigned char set_block_1zero = 1; //!< One all-zero block
65 const unsigned char set_block_1one = 2; //!< One block all-set (1111...)
66 const unsigned char set_block_8zero = 3; //!< Up to 256 zero blocks
67 const unsigned char set_block_8one = 4; //!< Up to 256 all-set blocks
68 const unsigned char set_block_16zero = 5; //!< Up to 65536 zero blocks
69 const unsigned char set_block_16one = 6; //!< UP to 65536 all-set blocks
70 const unsigned char set_block_32zero = 7; //!< Up to 4G zero blocks
71 const unsigned char set_block_32one = 8; //!< UP to 4G all-set blocks
72 const unsigned char set_block_azero = 9; //!< All other blocks zero
73 const unsigned char set_block_aone = 10; //!< All other blocks one
74 const unsigned char set_block_bit = 11; //!< Plain bit block
75 const unsigned char set_block_sgapbit = 12; //!< SGAP compressed bitblock
76 const unsigned char set_block_sgapgap = 13; //!< SGAP compressed GAP block
77 const unsigned char set_block_gap = 14; //!< Plain GAP block
78 const unsigned char set_block_gapbit = 15; //!< GAP compressed bitblock
79 const unsigned char set_block_arrbit = 16; //!< List of bits ON
80 const unsigned char set_block_bit_interval = 17; //!< Interval block
81 const unsigned char set_block_arrgap = 18; //!< List of bits ON (GAP block)
82 const unsigned char set_block_bit_1bit = 19; //!< Bit block with 1 bit ON
83 const unsigned char set_block_gap_egamma = 20; //!< Gamma compressed GAP block
84 const unsigned char set_block_arrgap_egamma = 21; //!< Gamma compressed delta GAP array
85 const unsigned char set_block_bit_0runs = 22; //!< Bit block with encoded zero intervals
86 const unsigned char set_block_arrgap_egamma_inv = 23; //!< Gamma compressed inverted delta GAP array
87 const unsigned char set_block_arrgap_inv = 24; //!< List of bits OFF (GAP block)
88 
89 
90 /// \internal
91 /// \ingroup bvserial
94  BM_HM_RESIZE = (1 << 1), ///< resized vector
95  BM_HM_ID_LIST = (1 << 2), ///< id list stored
96  BM_HM_NO_BO = (1 << 3), ///< no byte-order
97  BM_HM_NO_GAPL = (1 << 4) ///< no GAP levels
98 };
99 
100 
101 
102 #define SER_NEXT_GRP(enc, nb, B_1ZERO, B_8ZERO, B_16ZERO, B_32ZERO) \
103  if (nb == 1) \
104  enc.put_8(B_1ZERO); \
105  else if (nb < 256) \
106  { \
107  enc.put_8(B_8ZERO); \
108  enc.put_8((unsigned char)nb); \
109  } \
110  else if (nb < 65536) \
111  { \
112  enc.put_8(B_16ZERO); \
113  enc.put_16((unsigned short)nb); \
114  } \
115  else \
116  {\
117  enc.put_8(B_32ZERO); \
118  enc.put_32(nb); \
119  }
120 
121 
122 #define BM_SET_ONE_BLOCKS(x) \
123  {\
124  unsigned end_block = i + x; \
125  for (;i < end_block; ++i) \
126  bman.set_block_all_set(i); \
127  } \
128  --i
129 
130 
131 
132 
133 /**
134  Bit-vector serialization class.
135 
136  Class designed to convert sparse bit-vectors into a single block of memory
137  ready for file or database storage or network transfer.
138 
139  Reuse of this class for multiple serializations may offer some performance
140  advantage.
141 
142  @ingroup bvserial
143 */
144 template<class BV>
146 {
147 public:
148  typedef BV bvector_type;
152 
153  typedef byte_buffer<allocator_type> buffer;
154 public:
155  /**
156  Construct serializer
157 
158  \param alloc - memory allocator
159  \param temp_block - temporary block for various operations
160  (if NULL it will be allocated and managed by serializer class)
161  Temp block is used as a scratch memory during serialization,
162  use of external temp block allows to avoid unnecessary re-allocations.
163 
164  Temp block attached is not owned by the class and NOT deallocated on
165  destruction.
166  */
167  serializer(const allocator_type& alloc = allocator_type(),
168  bm::word_t* temp_block = 0);
169 
170  serializer(bm::word_t* temp_block);
171 
172  ~serializer();
173 
174  /**
175  Set compression level. Higher compression takes more time to process.
176  @param clevel - compression level (0-4)
177  */
178  void set_compression_level(unsigned clevel);
179 
180  /**
181  Get compression level
182  */
183  unsigned get_compression_level() const;
184 
185  /**
186  Bitvector serilization into memory block
187 
188  @param bv - input bitvector
189  @param buf - out buffer (pre-allocated)
190  No range checking is done in this method.
191  It is responsibility of caller to allocate sufficient
192  amount of memory using information from calc_stat() function.
193 
194  @param buf_size - size of the output buffer
195 
196  @return Size of serialization block.
197  @sa calc_stat
198  */
199  unsigned serialize(const BV& bv,
200  unsigned char* buf, size_t buf_size);
201 
202  /**
203  Bitvector serilization into buffer object (it gets resized automatically)
204 
205  @param bv - input bitvector
206  @param buf - output buffer object
207  @param bv_stat - input (optional) bit-vector statistics object
208  if NULL, serizlize will compute statistics
209  */
210  void serialize(const BV& bv, typename serializer<BV>::buffer& buf, const statistics_type* bv_stat);
211 
212 
213  /**
214  Set GAP length serialization (serializes GAP levels of the original vector)
215 
216  @param value - when TRUE serialized vector includes GAP levels parameters
217  */
218  void gap_length_serialization(bool value);
219  /**
220  Set byte-order serialization (for cross platform compatibility)
221 
222  @param value - TRUE serialization format includes byte-order marker
223  */
224  void byte_order_serialization(bool value);
225 
226 protected:
227  /**
228  Encode serialization header information
229  */
230  void encode_header(const BV& bv, bm::encoder& enc);
231 
232  /**
233  Encode GAP block
234  */
235  void encode_gap_block(bm::gap_word_t* gap_block, bm::encoder& enc);
236 
237  /**
238  Encode GAP block with Elias Gamma coder
239  */
240  void gamma_gap_block(bm::gap_word_t* gap_block, bm::encoder& enc);
241 
242  /**
243  Encode GAP block as delta-array with Elias Gamma coder
244  */
245  void gamma_gap_array(const bm::gap_word_t* gap_block,
246  unsigned arr_len,
247  bm::encoder& enc,
248  bool inverted = false);
249 
250  /**
251  Encode BIT block with repeatable runs of zeroes
252  */
253  void encode_bit_interval(const bm::word_t* blk,
254  bm::encoder& enc,
255  unsigned size_control);
256 
257 private:
258  serializer(const serializer&);
259  serializer& operator=(const serializer&);
260 
261 private:
262 
265 
266 private:
267  allocator_type alloc_;
268  bool gap_serial_;
269  bool byte_order_serial_;
270  bm::word_t* temp_block_;
271  unsigned compression_level_;
272  bool own_temp_block_;
273 };
274 
275 /**
276  Base deserialization class
277  \ingroup bvserial
278 */
279 template<class DEC>
281 {
282 public:
283  typedef DEC decoder_type;
284 protected:
286 
287  /// Read GAP block from the stream
288  void read_gap_block(decoder_type& decoder,
289  unsigned block_type,
290  bm::gap_word_t* dst_block,
291  bm::gap_word_t& gap_head);
292 
293  /// Read list of bit ids
294  ///
295  /// @return number of ids
296  unsigned read_id_list(decoder_type& decoder,
297  unsigned block_type,
298  bm::gap_word_t* dst_arr);
299 
300 protected:
302 };
303 
304 /**
305  Deserializer for bit-vector
306  \ingroup bvserial
307 */
308 template<class BV, class DEC>
309 class deserializer : protected deseriaizer_base<DEC>
310 {
311 public:
312  typedef BV bvector_type;
314 public:
315  deserializer() : temp_block_(0) {}
316 
317  unsigned deserialize(bvector_type& bv,
318  const unsigned char* buf,
319  bm::word_t* temp_block);
320 protected:
321  typedef typename BV::blocks_manager_type blocks_manager_type;
322  typedef typename BV::allocator_type allocator_type;
323 
324 protected:
325  void deserialize_gap(unsigned char btype, decoder_type& dec,
326  bvector_type& bv, blocks_manager_type& bman,
327  unsigned i,
328  bm::word_t* blk);
329 protected:
330  bm::gap_word_t gap_temp_block_[bm::gap_equiv_len * 4];
332 };
333 
334 
335 /**
336  Iterator to walk forward the serialized stream.
337 
338  \internal
339  \ingroup bvserial
340 */
341 template<class BV, class SerialIterator>
343 {
344 public:
345  typedef BV bvector_type;
346  typedef SerialIterator serial_iterator_type;
347 public:
348  static
349  unsigned deserialize(bvector_type& bv,
350  serial_iterator_type& sit,
351  bm::word_t* temp_block,
353  bool exit_on_one = false);
354 
355  /// experimental 3 way deserialization
356  /// target = mask %OPERATION% BLOB
357  ///
358  static
359  void deserialize(bvector_type& bv_target,
360  const bvector_type& bv_mask,
361  serial_iterator_type& sit,
362  bm::word_t* temp_block,
363  set_operation op);
364 
365 private:
366  typedef typename BV::blocks_manager_type blocks_manager_type;
367 
368  /// load data from the iterator of type "id list"
369  static
370  void load_id_list(bvector_type& bv,
371  serial_iterator_type& sit,
372  unsigned id_count,
373  bool set_clear);
374 
375  /// Finalize the deserialization (zero target vector tail or bit-count tail)
376  static
377  unsigned finalize_target_vector(blocks_manager_type& bman,
378  set_operation op,
379  unsigned bv_block_idx);
380 
381  /// Process (obsolete) id-list serialization format
382  static
383  unsigned process_id_list(bvector_type& bv,
384  serial_iterator_type& sit,
385  set_operation op);
386 
387 
388 };
389 
390 /*!
391  @brief Serialization stream iterator
392 
393  Iterates blocks and control tokens of serialized bit-stream
394  \ingroup bvserial
395 */
396 template<class DEC>
398 {
399 public:
401 public:
402  serial_stream_iterator(const unsigned char* buf);
403 
404  /// serialized bitvector size
405  unsigned bv_size() const { return bv_size_; }
406 
407  /// Returns true if end of bit-stream reached
408  bool is_eof() const { return end_of_stream_; }
409 
410  /// get next block
411  void next();
412 
413  /// skip all zero or all-one blocks
414  void skip_mono_blocks();
415 
416  /// read bit block, using logical operation
417  unsigned get_bit_block(bm::word_t* dst_block,
418  bm::word_t* tmp_block,
419  set_operation op);
420 
421 
422  /// Read gap block data (with head)
423  void get_gap_block(bm::gap_word_t* dst_block);
424 
425  /// Return current decoder size
426  unsigned dec_size() const { return decoder_.size(); }
427 
428  /// Get low level access to the decoder (use carefully)
429  decoder_type& decoder() { return decoder_; }
430 
431  /// iterator is a state machine, this enum encodes
432  /// its key value
433  ///
435  {
436  e_unknown = 0,
437  e_list_ids, ///< plain int array
438  e_blocks, ///< stream of blocks
439  e_zero_blocks, ///< one or more zero bit blocks
440  e_one_blocks, ///< one or more all-1 bit blocks
441  e_bit_block, ///< one bit block
442  e_gap_block ///< one gap block
443 
444  };
445 
446  /// Returns iterator internal state
447  iterator_state state() const { return this->state_; }
448 
449  iterator_state get_state() const { return this->state_; }
450  /// Number of ids in the inverted list (valid for e_list_ids)
451  unsigned get_id_count() const { return this->id_cnt_; }
452 
453  /// Get last id from the id list
454  bm::id_t get_id() const { return this->last_id_; }
455 
456  /// Get current block index
457  unsigned block_idx() const { return this->block_idx_; }
458 
459 public:
460  /// member function pointer for bitset-bitset get operations
461  ///
462  typedef
463  unsigned (serial_stream_iterator<DEC>::*get_bit_func_type)
465 
466  unsigned
467  get_bit_block_ASSIGN(bm::word_t* dst_block, bm::word_t* tmp_block);
468  unsigned
469  get_bit_block_OR (bm::word_t* dst_block, bm::word_t* tmp_block);
470  unsigned
471  get_bit_block_AND (bm::word_t* dst_block, bm::word_t* tmp_block);
472  unsigned
473  get_bit_block_SUB (bm::word_t* dst_block, bm::word_t* tmp_block);
474  unsigned
475  get_bit_block_XOR (bm::word_t* dst_block, bm::word_t* tmp_block);
476  unsigned
477  get_bit_block_COUNT (bm::word_t* dst_block, bm::word_t* tmp_block);
478  unsigned
479  get_bit_block_COUNT_AND(bm::word_t* dst_block, bm::word_t* tmp_block);
480  unsigned
481  get_bit_block_COUNT_OR(bm::word_t* dst_block, bm::word_t* tmp_block);
482  unsigned
483  get_bit_block_COUNT_XOR(bm::word_t* dst_block, bm::word_t* tmp_block);
484  unsigned
485  get_bit_block_COUNT_SUB_AB(bm::word_t* dst_block, bm::word_t* tmp_block);
486  unsigned
487  get_bit_block_COUNT_SUB_BA(bm::word_t* dst_block, bm::word_t* tmp_block);
488  unsigned
489  get_bit_block_COUNT_A(bm::word_t* dst_block, bm::word_t* tmp_block);
490  unsigned
492  {
493  return get_bit_block_COUNT(dst_block, tmp_block);
494  }
495 
496  /// Get array of bits out of the decoder into bit block
497  /// (Converts inverted list into bits)
498  /// Returns number of words (bits) being read
499  unsigned get_arr_bit(bm::word_t* dst_block,
500  bool clear_target=true);
501 
502  /// Get current block type
503  unsigned get_block_type() const { return block_type_; }
504 
505  unsigned get_bit();
506 
507 protected:
508  get_bit_func_type bit_func_table_[bm::set_END];
509 
512  unsigned bv_size_;
514  unsigned id_cnt_; ///< Id counter for id list
515  bm::id_t last_id_; ///< Last id from the id list
516  gap_word_t glevels_[bm::gap_levels]; ///< GAP levels
517 
518  unsigned block_type_; ///< current block type
519  unsigned block_idx_; ///< current block index
520  unsigned mono_block_cnt_; ///< number of 0 or 1 blocks
521 
523 };
524 
525 /**
526  Deserializer, performs logical operations between bit-vector and
527  serialized bit-vector. This utility class potentially provides faster
528  and/or more memory efficient operation than more conventional deserialization
529  into memory bvector and then logical operation
530 
531  \ingroup bvserial
532 */
533 template<class BV>
535 {
536 public:
537  typedef BV bvector_type;
538 public:
539  /**
540  \brief Deserialize bvector using buffer as set operation argument
541 
542  \param bv - target bvector
543  \param buf - serialized buffer as a logical argument
544  \param temp_block - temporary block to avoid re-allocations
545  \param op - set algebra operation (default: OR)
546  \param exit_on_one - quick exit if set operation found some result
547 
548  \return bitcount
549  */
550  static
551  unsigned deserialize(bvector_type& bv,
552  const unsigned char* buf,
553  bm::word_t* temp_block,
555  bool exit_on_one = false ///<! exit early if any one are found
556  );
557 private:
558  /** experimental 3-way deserializator TARGET = MASK (OR/AND/XOR) BUF
559  \param bv_target - target bvector
560  \param bv_mask - mask bvector (MASK)
561  \param buf - buffer argument
562  \param temp_block - operational temporary block to avoid re-allocations
563  \param op - logical operation
564  */
565  static
566  void deserialize(bvector_type& bv_target,
567  const bvector_type& bv_mask,
568  const unsigned char* buf,
569  bm::word_t* temp_block,
570  set_operation op);
571 
572 private:
573  typedef
574  typename BV::blocks_manager_type blocks_manager_type;
575  typedef
577  typedef
579  typedef
581 
582 };
583 
584 
585 
586 
587 
588 //---------------------------------------------------------------------
589 
590 template<class BV>
592  bm::word_t* temp_block)
593 : alloc_(alloc),
594  gap_serial_(false),
595  byte_order_serial_(true),
596  compression_level_(4)
597 {
598  if (temp_block == 0)
599  {
600  temp_block_ = alloc_.alloc_bit_block();
601  own_temp_block_ = true;
602  }
603  else
604  {
605  temp_block_ = temp_block;
606  own_temp_block_ = false;
607  }
608 }
609 
610 template<class BV>
612 : alloc_(allocator_type()),
613  gap_serial_(false),
614  byte_order_serial_(true),
615  compression_level_(4)
616 {
617  if (temp_block == 0)
618  {
619  temp_block_ = alloc_.alloc_bit_block();
620  own_temp_block_ = true;
621  }
622  else
623  {
624  temp_block_ = temp_block;
625  own_temp_block_ = false;
626  }
627 }
628 
629 
630 template<class BV>
632 {
633  compression_level_ = clevel;
634 }
635 
636 template<class BV>
638 {
639  return compression_level_;
640 }
641 
642 template<class BV>
644 {
645  if (own_temp_block_)
646  alloc_.free_bit_block(temp_block_);
647 }
648 
649 
650 template<class BV>
652 {
653  gap_serial_ = value;
654 }
655 
656 template<class BV>
658 {
659  byte_order_serial_ = value;
660 }
661 
662 template<class BV>
664 {
665  const blocks_manager_type& bman = bv.get_blocks_manager();
666 
667  unsigned char header_flag = 0;
668  if (bv.size() == bm::id_max) // no dynamic resize
669  header_flag |= BM_HM_DEFAULT;
670  else
671  header_flag |= BM_HM_RESIZE;
672 
673  if (!byte_order_serial_)
674  header_flag |= BM_HM_NO_BO;
675 
676  if (!gap_serial_)
677  header_flag |= BM_HM_NO_GAPL;
678 
679  enc.put_8(header_flag);
680 
681  if (byte_order_serial_)
682  {
684  enc.put_8((unsigned char)bo);
685  }
686 
687  // keep GAP levels information
688  if (gap_serial_)
689  {
690  enc.put_16(bman.glen(), bm::gap_levels);
691  }
692 
693  // save size (only if bvector has been down-sized)
694  if (header_flag & BM_HM_RESIZE)
695  {
696  enc.put_32(bv.size());
697  }
698 
699 }
700 
701 template<class BV>
703 {
704  unsigned len = gap_length(gap_block);
705 
706  // Use Elias Gamma encoding
707  if (len > 6 && (compression_level_ > 3))
708  {
709  encoder::position_type enc_pos0 = enc.get_pos();
710  {
711  bit_out_type bout(enc);
712  gamma_encoder_func gamma(bout);
713 
714  enc.put_8(set_block_gap_egamma);
715  enc.put_16(gap_block[0]);
716 
717  for_each_dgap(gap_block, gamma);
718  }
719 
720  // evaluate gamma coding efficiency
721  encoder::position_type enc_pos1 = enc.get_pos();
722  unsigned gamma_size = (unsigned)(enc_pos1 - enc_pos0);
723  if (gamma_size > (len-1)*sizeof(gap_word_t))
724  {
725  enc.set_pos(enc_pos0);
726  }
727  else
728  {
729  return;
730  }
731  }
732 
733  // save as plain GAP block
734  enc.put_8(set_block_gap);
735  enc.put_16(gap_block, len-1);
736 }
737 
738 template<class BV>
740  unsigned arr_len,
741  bm::encoder& enc,
742  bool inverted)
743 {
744  if (compression_level_ > 3 && arr_len > 25)
745  {
746  encoder::position_type enc_pos0 = enc.get_pos();
747  {
748  bit_out_type bout(enc);
749 
750  enc.put_8(
751  inverted ? set_block_arrgap_egamma_inv
752  : set_block_arrgap_egamma);
753 
754  bout.gamma(arr_len);
755 
756  gap_word_t prev = gap_array[0];
757  bout.gamma(prev + 1);
758 
759  for (unsigned i = 1; i < arr_len; ++i)
760  {
761  gap_word_t curr = gap_array[i];
762  bout.gamma(curr - prev);
763  prev = curr;
764  }
765  }
766 
767  encoder::position_type enc_pos1 = enc.get_pos();
768  unsigned gamma_size = (unsigned)(enc_pos1 - enc_pos0);
769  if (gamma_size > (arr_len)*sizeof(gap_word_t))
770  {
771  enc.set_pos(enc_pos0); // re-wind the bit stream
772  }
773  else
774  {
775  return;
776  }
777  }
778 
779  // save as an plain array
780  enc.put_prefixed_array_16(inverted ? set_block_arrgap_inv : set_block_arrgap,
781  gap_array, arr_len, true);
782 }
783 
784 
785 template<class BV>
787 {
788  if (compression_level_ > 2)
789  {
790  gap_word_t* gap_temp_block = (gap_word_t*) temp_block_;
791  gap_word_t arr_len;
792 
793  unsigned bc = gap_bit_count_unr(gap_block);
794  if (bc == 1)
795  {
796  arr_len = gap_convert_to_arr(gap_temp_block,
797  gap_block,
798  bm::gap_equiv_len-10);
799  BM_ASSERT(arr_len == 1);
800  enc.put_8(set_block_bit_1bit);
801  enc.put_16(gap_temp_block[0]);
802  return;
803  }
804 
805  unsigned len = gap_length(gap_block);
806  bool invert, use_array;
807  invert = use_array = false;
808 
809  if (bc < len-1)
810  {
811  use_array = true;
812  }
813  else // inverted array is a better alternative?
814  {
815  unsigned inverted_bc = bm::gap_max_bits - bc;
816  if (inverted_bc < len-1)
817  {
818  use_array = invert = true;
819  }
820  }
821  if (use_array)
822  {
823  arr_len = gap_convert_to_arr(gap_temp_block,
824  gap_block,
826  invert);
827  if (arr_len)
828  {
829  gamma_gap_array(gap_temp_block, arr_len, enc, invert);
830  return;
831  }
832  }
833  }
834 
835  gamma_gap_block(gap_block, enc);
836 }
837 
838 template<class BV>
840  bm::encoder& enc,
841  unsigned //size_control
842  )
843 {
844  enc.put_8(set_block_bit_0runs);
845  enc.put_8((blk[0]==0) ? 0 : 1); // encode start
846 
847  unsigned i,j;//,k;
848 
849  for (i = 0; i < bm::set_block_size; ++i)
850  {
851  if (blk[i] == 0)
852  {
853  // scan fwd to find 0 island length
854  for (j = i+1; j < bm::set_block_size; ++j)
855  {
856  if (blk[j] != 0)
857  break;
858  }
859  enc.put_16((gap_word_t)(j-i));
860  i = j - 1;
861  }
862  else
863  {
864  // scan fwd to find non-0 island length
865  for (j = i+1; j < bm::set_block_size; ++j)
866  {
867  if (blk[j] == 0)
868  {
869  // look ahead to identify and ignore short 0-run
870  if (((j+1 < bm::set_block_size) && blk[j+1]) ||
871  ((j+2 < bm::set_block_size) && blk[j+2])
872  )
873  {
874  ++j; // skip zero word
875  continue;
876  }
877  break;
878  }
879  }
880  enc.put_16((gap_word_t)(j-i));
881  // stream all bit-words now
882  BM_ASSERT(i < j);
883  enc.put_32(blk + i, j - i);
884  i = j - 1;
885  }
886  }
887 }
888 
889 template<class BV>
890 void serializer<BV>::serialize(const BV& bv,
891  typename serializer<BV>::buffer& buf,
892  const statistics_type* bv_stat)
893 {
894  statistics_type stat;
895  if (!bv_stat)
896  {
897  bv.calc_stat(&stat);
898  bv_stat = &stat;
899  }
900 
901  buf.resize(bv_stat->max_serialize_mem);
902 
903  unsigned slen = this->serialize(bv, buf.data(), buf.size());
904  BM_ASSERT(slen <= buf.size()); // or we have a BIG problem with prediction
905  BM_ASSERT(slen);
906 
907  buf.resize(slen);
908 }
909 
910 
911 template<class BV>
912 unsigned serializer<BV>::serialize(const BV& bv,
913  unsigned char* buf, size_t buf_size)
914 {
915  BM_ASSERT(temp_block_);
916 
917  const blocks_manager_type& bman = bv.get_blocks_manager();
918 
919  gap_word_t* gap_temp_block = (gap_word_t*) temp_block_;
920 
921  bm::encoder enc(buf, buf_size); // create the encoder
922  encode_header(bv, enc);
923 
924  unsigned i,j;
925 
926 
927  // save blocks.
928  for (i = 0; i < bm::set_total_blocks; ++i)
929  {
930  bm::word_t* blk = bman.get_block(i);
931  // -----------------------------------------
932  // Empty or ONE block serialization
933 
934  bool flag;
935  flag = bm::check_block_zero(blk, false/*shallow check*/);
936  if (flag)
937  {
938  zero_block:
939  unsigned next_nb = bman.find_next_nz_block(i+1, false);
940  if (next_nb == bm::set_total_blocks) // no more blocks
941  {
942  enc.put_8(set_block_azero);
943  return enc.size();
944  }
945  unsigned nb = next_nb - i;
946 
947  if (nb > 1 && nb < 128)
948  {
949  // special (but frequent) case -- GAP delta fits in 7bits
950  unsigned char c = (unsigned char)((1u << 7) | nb);
951  enc.put_8(c);
952  }
953  else
954  {
955  SER_NEXT_GRP(enc, nb, set_block_1zero,
956  set_block_8zero,
957  set_block_16zero,
958  set_block_32zero)
959  }
960  i = next_nb - 1;
961  continue;
962  }
963  else
964  {
965  flag = bm::check_block_one(blk, false);
966  if (flag)
967  {
968  // Look ahead for similar blocks
969  for(j = i+1; j < bm::set_total_blocks; ++j)
970  {
971  bm::word_t* blk_next = bman.get_block(j);
972  if (flag != bm::check_block_one(blk_next, false))
973  break;
974  }
975  if (j == bm::set_total_blocks)
976  {
977  enc.put_8(set_block_aone);
978  break;
979  }
980  else
981  {
982  unsigned nb = j - i;
983  SER_NEXT_GRP(enc, nb, set_block_1one,
984  set_block_8one,
985  set_block_16one,
986  set_block_32one)
987  }
988  i = j - 1;
989  continue;
990  }
991  }
992 
993  // ------------------------------
994  // GAP serialization
995 
996  if (BM_IS_GAP(blk))
997  {
998  gap_word_t* gblk = BMGAP_PTR(blk);
999  encode_gap_block(gblk, enc);
1000  continue;
1001  }
1002 
1003  // ----------------------------------------------
1004  // BIT BLOCK serialization
1005 
1006  {
1007  if (compression_level_ <= 1)
1008  {
1009  enc.put_prefixed_array_32(set_block_bit, blk, bm::set_block_size);
1010  continue;
1011  }
1012 
1013  // compute bit-block statistics: bit-count and number of GAPS
1014  unsigned block_bc = bm::bit_block_count(blk);
1015  unsigned bit_gaps = bm::bit_block_calc_change(blk);
1016 
1017  unsigned block_bc_inv = bm::gap_max_bits - block_bc;
1018  switch (block_bc)
1019  {
1020  case 1: // corner case: only 1 bit on
1021  {
1022  bm::id_t bit_idx = 0;
1023  bm::bit_find_in_block(blk, bit_idx, &bit_idx);
1024  enc.put_8(set_block_bit_1bit); enc.put_16(bm::short_t(bit_idx));
1025  continue;
1026  }
1027  case 0: goto zero_block; // empty block
1028  default:
1029  break;
1030  }
1031 
1032 
1033  // compute alternative representation sizes
1034  //
1035  unsigned arr_block_size = unsigned(sizeof(gap_word_t) + (block_bc * sizeof(gap_word_t)));
1036  unsigned arr_block_size_inv = unsigned(sizeof(gap_word_t) + (block_bc_inv * sizeof(gap_word_t)));
1037  unsigned gap_block_size = unsigned(sizeof(gap_word_t) + ((bit_gaps+1) * sizeof(gap_word_t)));
1038  unsigned interval_block_size;
1039  interval_block_size = bit_count_nonzero_size(blk, bm::set_block_size);
1040 
1041  bool inverted = false;
1042 
1043  if (arr_block_size_inv < arr_block_size &&
1044  arr_block_size_inv < gap_block_size &&
1045  arr_block_size_inv < interval_block_size)
1046  {
1047  inverted = true;
1048  goto bit_as_array;
1049  }
1050 
1051  // if interval representation is not a good alternative
1052  if ((interval_block_size > arr_block_size) ||
1053  (interval_block_size > gap_block_size))
1054  {
1055  if (gap_block_size < (bm::gap_equiv_len-64) &&
1056  gap_block_size < arr_block_size)
1057  {
1058  unsigned len = bm::bit_to_gap(gap_temp_block,
1059  blk,
1060  bm::gap_equiv_len-64);
1061  if (len) // save as GAP
1062  {
1063  gamma_gap_block(gap_temp_block, enc);
1064  continue;
1065  }
1066  }
1067 
1068  if (arr_block_size < ((bm::gap_equiv_len-64) * sizeof(gap_word_t)))
1069  {
1070  bit_as_array:
1071  gap_word_t arr_len;
1072  unsigned mask = inverted ? ~0u : 0u;
1073  arr_len = bit_convert_to_arr(gap_temp_block,
1074  blk,
1076  bm::gap_equiv_len-64,
1077  mask);
1078  if (arr_len)
1079  {
1080  gamma_gap_array(gap_temp_block, arr_len, enc, inverted);
1081  continue;
1082  }
1083 
1084  }
1085  // full bit-block
1086  enc.put_prefixed_array_32(set_block_bit, blk, bm::set_block_size);
1087  continue;
1088  }
1089 
1090  // if interval block is a winner
1091  // it needs to have a compelling advantage of 25% over bit block
1092  //
1093  unsigned threashold_block_size =
1094  bm::set_block_size * sizeof(bm::word_t);
1095  threashold_block_size -= threashold_block_size / 4;
1096 
1097  if (interval_block_size < arr_block_size &&
1098  interval_block_size < gap_block_size &&
1099  interval_block_size < (bm::set_block_size * sizeof(bm::word_t))
1100  )
1101  {
1102  encode_bit_interval(blk, enc, interval_block_size);
1103  continue;
1104  }
1105 
1106  if (gap_block_size < bm::gap_equiv_len &&
1107  gap_block_size < arr_block_size)
1108  {
1109  unsigned len =
1110  bm::bit_to_gap(gap_temp_block, blk, bm::gap_equiv_len-64);
1111  if (len) // save as GAP
1112  {
1113  gamma_gap_block(gap_temp_block, enc);
1114  continue;
1115  }
1116  }
1117 
1118  // if array is best
1119  if (arr_block_size < bm::gap_equiv_len-64)
1120  {
1121  goto bit_as_array;
1122  }
1123  // full bit-block
1124  enc.put_prefixed_array_32(set_block_bit, blk, bm::set_block_size);
1125  continue;
1126  }
1127  }
1128 
1129  enc.put_8(set_block_end);
1130 
1131  unsigned encoded_size = enc.size();
1132  return encoded_size;
1133 
1134 }
1135 
1136 
1137 
1138 /// Bit mask flags for serialization algorithm
1139 /// \ingroup bvserial
1141  BM_NO_BYTE_ORDER = 1, ///< save no byte-order info (save some space)
1142  BM_NO_GAP_LENGTH = (1 << 1) ///< save no GAP info (save some space)
1143 };
1144 
1145 /*!
1146  \brief Saves bitvector into memory.
1147 
1148  Function serializes content of the bitvector into memory.
1149  Serialization adaptively uses compression(variation of GAP encoding)
1150  when it is benefitial.
1151 
1152  \param bv - source bvecor
1153  \param buf - pointer on target memory area. No range checking in the
1154  function. It is responsibility of programmer to allocate sufficient
1155  amount of memory using information from calc_stat function.
1156 
1157  \param temp_block - pointer on temporary memory block. Cannot be 0;
1158  If you want to save memory across multiple bvectors
1159  allocate temporary block using allocate_tempblock and pass it to
1160  serialize.
1161  (Serialize does not deallocate temp_block.)
1162 
1163  \param serialization_flags
1164  Flags controlling serilization (bit-mask)
1165  (use OR-ed serialization flags)
1166 
1167  \ingroup bvserial
1168 
1169  \return Size of serialization block.
1170  \sa calc_stat, serialization_flags
1171 
1172 */
1173 /*!
1174  Serialization format:
1175  <pre>
1176 
1177  | HEADER | BLOCKS |
1178 
1179  Header structure:
1180  BYTE : Serialization header (bit mask of BM_HM_*)
1181  BYTE : Byte order ( 0 - Big Endian, 1 - Little Endian)
1182  INT16: Reserved (0)
1183  INT16: Reserved Flags (0)
1184 
1185  </pre>
1186 */
1187 template<class BV>
1188 unsigned serialize(const BV& bv,
1189  unsigned char* buf,
1190  bm::word_t* temp_block = 0,
1191  unsigned serialization_flags = 0)
1192 {
1193  bm::serializer<BV> bv_serial(bv.get_allocator(), temp_block);
1195  bv_serial.byte_order_serialization(false);
1196 
1198  bv_serial.gap_length_serialization(false);
1199  else
1200  bv_serial.gap_length_serialization(true);
1201 
1202  bv_serial.set_compression_level(4);
1203 
1204  return bv_serial.serialize(bv, buf, 0);
1205 }
1206 
1207 /*!
1208  @brief Saves bitvector into memory.
1209  Allocates temporary memory block for bvector.
1210 
1211  \param bv - source bvecor
1212  \param buf - pointer on target memory area. No range checking in the
1213  function. It is responsibility of programmer to allocate sufficient
1214  amount of memory using information from calc_stat function.
1215 
1216  \param serialization_flags
1217  Flags controlling serilization (bit-mask)
1218  (use OR-ed serialization flags)
1219 
1220  \ingroup bvserial
1221 */
1222 template<class BV>
1223 unsigned serialize(BV& bv,
1224  unsigned char* buf,
1225  unsigned serialization_flags=0)
1226 {
1227  return bm::serialize(bv, buf, 0, serialization_flags);
1228 }
1229 
1230 
1231 
1232 /*!
1233  @brief Bitvector deserialization from memory.
1234 
1235  @param bv - target bvector
1236  @param buf - pointer on memory which keeps serialized bvector
1237  @param temp_block - pointer on temporary block,
1238  if NULL bvector allocates own.
1239  @return Number of bytes consumed by deserializer.
1240 
1241  Function desrializes bitvector from memory block containig results
1242  of previous serialization. Function does not remove bits
1243  which are currently set. Effectively it means OR logical operation
1244  between current bitset and previously serialized one.
1245 
1246  @ingroup bvserial
1247 */
1248 template<class BV>
1249 unsigned deserialize(BV& bv,
1250  const unsigned char* buf,
1251  bm::word_t* temp_block=0)
1252 {
1253  ByteOrder bo_current = globals<true>::byte_order();
1254 
1255  bm::decoder dec(buf);
1256  unsigned char header_flag = dec.get_8();
1257  ByteOrder bo = bo_current;
1258  if (!(header_flag & BM_HM_NO_BO))
1259  {
1260  bo = (bm::ByteOrder) dec.get_8();
1261  }
1262 
1263  if (bo_current == bo)
1264  {
1266  return deserial.deserialize(bv, buf, temp_block);
1267  }
1268  switch (bo_current)
1269  {
1270  case BigEndian:
1271  {
1273  return deserial.deserialize(bv, buf, temp_block);
1274  }
1275  case LittleEndian:
1276  {
1278  return deserial.deserialize(bv, buf, temp_block);
1279  }
1280  default:
1281  BM_ASSERT(0);
1282  };
1283  return 0;
1284 }
1285 
1286 template<class DEC>
1288  unsigned block_type,
1289  bm::gap_word_t* dst_arr)
1290 {
1291  typedef bit_in<DEC> bit_in_type;
1292 
1293  gap_word_t len = 0;
1294 
1295  switch (block_type)
1296  {
1297  case set_block_bit_1bit:
1298  dst_arr[0] = decoder.get_16();
1299  len = 1;
1300  break;
1301  case set_block_arrgap:
1302  case set_block_arrgap_inv:
1303  len = decoder.get_16();
1304  decoder.get_16(dst_arr, len);
1305  break;
1308  {
1309  bit_in_type bin(decoder);
1310  len = (gap_word_t)bin.gamma();
1311  gap_word_t prev = 0;
1312  for (gap_word_t k = 0; k < len; ++k)
1313  {
1314  gap_word_t bit_idx = (gap_word_t)bin.gamma();
1315  if (k == 0) --bit_idx; // TODO: optimization
1316  bit_idx = (gap_word_t)(bit_idx + prev);
1317  prev = bit_idx;
1318  dst_arr[k] = bit_idx;
1319  } // for
1320  }
1321  break;
1322  default:
1323  BM_ASSERT(0);
1324  }
1325  return len;
1326 }
1327 
1328 
1329 template<class DEC>
1331  unsigned block_type,
1332  bm::gap_word_t* dst_block,
1333  bm::gap_word_t& gap_head)
1334 {
1335  typedef bit_in<DEC> bit_in_type;
1336 
1337  switch (block_type)
1338  {
1339  case set_block_gap:
1340  {
1341  unsigned len = gap_length(&gap_head);
1342  --len;
1343  *dst_block = gap_head;
1344  decoder.get_16(dst_block+1, len - 1);
1345  dst_block[len] = gap_max_bits - 1;
1346  }
1347  break;
1348  case set_block_bit_1bit:
1349  {
1350  gap_set_all(dst_block, bm::gap_max_bits, 0);
1351  gap_word_t bit_idx = decoder.get_16();
1352  gap_add_value(dst_block, bit_idx);
1353  }
1354  break;
1355  case set_block_arrgap:
1356  case set_block_arrgap_inv:
1357  {
1358  gap_set_all(dst_block, bm::gap_max_bits, 0);
1359  gap_word_t len = decoder.get_16();
1360 
1361  for (gap_word_t k = 0; k < len; ++k)
1362  {
1363  gap_word_t bit_idx = decoder.get_16();
1364  gap_add_value(dst_block, bit_idx);
1365  } // for
1366  }
1367  break;
1370  {
1371  unsigned arr_len = read_id_list(decoder, block_type, id_array_);
1372  dst_block[0] = 0;
1373  //unsigned gap_len =
1374  gap_set_array(dst_block, id_array_, arr_len);
1375  }
1376  break;
1377  case set_block_gap_egamma:
1378  {
1379  unsigned len = (gap_head >> 3);
1380  --len;
1381  // read gamma GAP block into a dest block
1382  {
1383  *dst_block = gap_head;
1384  gap_word_t* gap_data_ptr = dst_block + 1;
1385 
1386  bit_in_type bin(decoder);
1387  {
1388  gap_word_t v = (gap_word_t)bin.gamma();
1389  gap_word_t gap_sum = *gap_data_ptr = (gap_word_t)(v - 1);
1390  for (unsigned i = 1; i < len; ++i)
1391  {
1392  v = (gap_word_t)bin.gamma();
1393  gap_sum = (gap_word_t)(gap_sum + v);
1394  *(++gap_data_ptr) = gap_sum;
1395  }
1396  dst_block[len+1] = bm::gap_max_bits - 1;
1397  }
1398  }
1399 
1400  }
1401  break;
1402  default:
1403  BM_ASSERT(0);
1404  }
1405 
1406  if (block_type == set_block_arrgap_egamma_inv ||
1407  block_type == set_block_arrgap_inv)
1408  {
1409  gap_invert(dst_block);
1410  }
1411 }
1412 
1413 
1414 template<class BV, class DEC>
1415 void
1417  bvector_type& bv, blocks_manager_type& bman,
1418  unsigned i,
1419  bm::word_t* blk)
1420 {
1421  //typedef bit_in<DEC> bit_in_type;
1422  gap_word_t gap_head;
1423 
1424  switch (btype)
1425  {
1426  case set_block_gap:
1427  case set_block_gapbit:
1428  {
1429  gap_head = (gap_word_t)
1430  (sizeof(gap_word_t) == 2 ? dec.get_16() : dec.get_32());
1431 
1432  unsigned len = gap_length(&gap_head);
1433  int level = gap_calc_level(len, bman.glen());
1434  --len;
1435  if (level == -1) // Too big to be GAP: convert to BIT block
1436  {
1437  *gap_temp_block_ = gap_head;
1438  dec.get_16(gap_temp_block_+1, len - 1);
1439  gap_temp_block_[len] = gap_max_bits - 1;
1440 
1441  if (blk == 0) // block does not exist yet
1442  {
1443  blk = bman.get_allocator().alloc_bit_block();
1444  bman.set_block(i, blk);
1445  gap_convert_to_bitset(blk, gap_temp_block_);
1446  }
1447  else // We have some data already here. Apply OR operation.
1448  {
1449  gap_convert_to_bitset(temp_block_,
1450  gap_temp_block_);
1451  bv.combine_operation_with_block(i,
1452  temp_block_,
1453  0,
1454  BM_OR);
1455  }
1456  return;
1457  } // level == -1
1458 
1459  set_gap_level(&gap_head, level);
1460 
1461  if (blk == 0)
1462  {
1463  BM_ASSERT(level >= 0);
1464  gap_word_t* gap_blk =
1465  bman.get_allocator().alloc_gap_block(unsigned(level), bman.glen());
1466  gap_word_t* gap_blk_ptr = BMGAP_PTR(gap_blk);
1467  *gap_blk_ptr = gap_head;
1468  bm::set_gap_level(gap_blk_ptr, level);
1469  blk = bman.set_block(i, (bm::word_t*)BMPTR_SETBIT0(gap_blk));
1470  BM_ASSERT(blk == 0);
1471 
1472  dec.get_16(gap_blk + 1, len - 1);
1473  gap_blk[len] = bm::gap_max_bits - 1;
1474  }
1475  else // target block exists
1476  {
1477  // read GAP block into a temp memory and perform OR
1478  *gap_temp_block_ = gap_head;
1479  dec.get_16(gap_temp_block_ + 1, len - 1);
1480  gap_temp_block_[len] = bm::gap_max_bits - 1;
1481  break;
1482  }
1483  return;
1484  }
1485  case set_block_arrgap:
1487  {
1488  unsigned arr_len = this->read_id_list(dec, btype, this->id_array_);
1489  gap_temp_block_[0] = 0; // reset unused bits in gap header
1490  unsigned gap_len =
1491  gap_set_array(gap_temp_block_, this->id_array_, arr_len);
1492 
1493  int level = gap_calc_level(gap_len, bman.glen());
1494  if (level == -1) // Too big to be GAP: convert to BIT block
1495  {
1496  gap_convert_to_bitset(temp_block_, gap_temp_block_);
1497  bv.combine_operation_with_block(i,
1498  temp_block_,
1499  0,
1500  BM_OR);
1501  return;
1502  }
1503 
1504  break;
1505  }
1506  case set_block_gap_egamma:
1507  gap_head = (gap_word_t)
1508  (sizeof(gap_word_t) == 2 ? dec.get_16() : dec.get_32());
1510  case set_block_arrgap_inv:
1511  this->read_gap_block(dec, btype, gap_temp_block_, gap_head);
1512  break;
1513  default:
1514  BM_ASSERT(0);
1515  }
1516 
1517  bv.combine_operation_with_block(i,
1518  (bm::word_t*)gap_temp_block_,
1519  1,
1520  BM_OR);
1521 
1522 }
1523 
1524 
1525 template<class BV, class DEC>
1527  const unsigned char* buf,
1528  bm::word_t* temp_block)
1529 {
1530  blocks_manager_type& bman = bv.get_blocks_manager();
1531  if (!bman.is_init())
1532  {
1533  bman.init_tree();
1534  }
1535 
1536  bm::wordop_t* tmp_buf =
1537  temp_block ? (bm::wordop_t*) temp_block
1538  : (bm::wordop_t*)bman.check_allocate_tempblock();
1539 
1540  temp_block_ = temp_block = (word_t*)tmp_buf;
1541  bm::strategy strat = bv.get_new_blocks_strat();
1542  bv.set_new_blocks_strat(BM_GAP);
1543 
1544  decoder_type dec(buf);
1545 
1547 
1548  // Reading header
1549 
1550  unsigned char header_flag = dec.get_8();
1551  if (!(header_flag & BM_HM_NO_BO))
1552  {
1553  /*ByteOrder bo = (bm::ByteOrder)*/dec.get_8();
1554  }
1555 
1556  if (header_flag & BM_HM_ID_LIST)
1557  {
1558  // special case: the next comes plain list of integers
1559  if (header_flag & BM_HM_RESIZE)
1560  {
1561  unsigned bv_size = dec.get_32();
1562  if (bv_size > bv.size())
1563  {
1564  bv.resize(bv_size);
1565  }
1566  }
1567 
1568 
1569  for (unsigned cnt = dec.get_32(); cnt; --cnt) {
1570  bm::id_t id = dec.get_32();
1571  bv.set(id);
1572  } // for
1573  // -1 for compatibility with other deserialization branches
1574  return dec.size()-1;
1575  }
1576 
1577  unsigned i;
1578 
1579  if (!(header_flag & BM_HM_NO_GAPL))
1580  {
1581  //gap_word_t glevels[bm::gap_levels];
1582  // read GAP levels information
1583  for (i = 0; i < bm::gap_levels; ++i)
1584  {
1585  /*glevels[i] =*/ dec.get_16();
1586  }
1587  }
1588 
1589  if (header_flag & (1 << 1))
1590  {
1591  unsigned bv_size = dec.get_32();
1592  if (bv_size > bv.size())
1593  {
1594  bv.resize(bv_size);
1595  }
1596  }
1597 
1598  unsigned char btype;
1599  unsigned nb;
1600 
1601  for (i = 0; i < bm::set_total_blocks; ++i)
1602  {
1603  btype = dec.get_8();
1604  bm::word_t* blk = bman.get_block(i);
1605 
1606  // pre-check if we have short zero-run packaging here
1607  //
1608  if (btype & (1 << 7))
1609  {
1610  nb = btype & ~(1 << 7);
1611  i += nb-1;
1612  continue;
1613  }
1614 
1615  switch (btype)
1616  {
1617  case set_block_azero:
1618  case set_block_end:
1620  break;
1621  case set_block_1zero:
1622  continue;
1623  case set_block_8zero:
1624  nb = dec.get_8();
1625  i += nb-1;
1626  continue;
1627  case set_block_16zero:
1628  nb = dec.get_16();
1629  i += nb-1;
1630  continue;
1631  case set_block_32zero:
1632  nb = dec.get_32();
1633  i += nb-1;
1634  continue;
1635  case set_block_aone:
1636  for (;i < bm::set_total_blocks; ++i)
1637  {
1638  bman.set_block_all_set(i);
1639  }
1640  break;
1641  case set_block_1one:
1642  bman.set_block_all_set(i);
1643  continue;
1644  case set_block_8one:
1645  BM_SET_ONE_BLOCKS(dec.get_8());
1646  continue;
1647  case set_block_16one:
1648  BM_SET_ONE_BLOCKS(dec.get_16());
1649  continue;
1650  case set_block_32one:
1651  BM_SET_ONE_BLOCKS(dec.get_32());
1652  continue;
1653  case set_block_bit:
1654  {
1655  if (blk == 0)
1656  {
1657  blk = bman.get_allocator().alloc_bit_block();
1658  bman.set_block(i, blk);
1659  dec.get_32(blk, bm::set_block_size);
1660  continue;
1661  }
1662 
1663  dec.get_32(temp_block, bm::set_block_size);
1664  bv.combine_operation_with_block(i,
1665  temp_block,
1666  0, BM_OR);
1667 
1668  continue;
1669  }
1670  case set_block_bit_1bit:
1671  {
1672  unsigned bit_idx = dec.get_16();
1673  bit_idx += i * bm::bits_in_block;
1674  bv.set_bit(bit_idx);
1675  continue;
1676  }
1677  case set_block_bit_0runs:
1678  {
1679  //TODO: optimization if block exists
1680  bit_block_set(temp_block, 0);
1681 
1682  unsigned char run_type = dec.get_8();
1683  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
1684  {
1685  unsigned run_length = dec.get_16();
1686  if (run_type)
1687  {
1688  unsigned run_end = j + run_length;
1689  for (;j < run_end; ++j)
1690  {
1691  BM_ASSERT(j < bm::set_block_size);
1692  temp_block[j] = dec.get_32();
1693  }
1694  }
1695  else
1696  {
1697  j += run_length;
1698  }
1699  } // for
1700 
1701  bv.combine_operation_with_block(i,
1702  temp_block,
1703  0, BM_OR);
1704  continue;
1705  }
1706  case set_block_bit_interval:
1707  {
1708  unsigned head_idx, tail_idx;
1709  head_idx = dec.get_16();
1710  tail_idx = dec.get_16();
1711 
1712  if (blk == 0)
1713  {
1714  blk = bman.get_allocator().alloc_bit_block();
1715  bman.set_block(i, blk);
1716  for (unsigned k = 0; k < head_idx; ++k)
1717  {
1718  blk[k] = 0;
1719  }
1720  dec.get_32(blk + head_idx, tail_idx - head_idx + 1);
1721  for (unsigned j = tail_idx + 1; j < bm::set_block_size; ++j)
1722  {
1723  blk[j] = 0;
1724  }
1725  continue;
1726  }
1727  bit_block_set(temp_block, 0);
1728  dec.get_32(temp_block + head_idx, tail_idx - head_idx + 1);
1729 
1730  bv.combine_operation_with_block(i,
1731  temp_block,
1732  0, BM_OR);
1733  continue;
1734  }
1735  case set_block_gap:
1736  case set_block_gapbit:
1737  case set_block_arrgap:
1738  case set_block_gap_egamma:
1741  case set_block_arrgap_inv:
1742  deserialize_gap(btype, dec, bv, bman, i, blk);
1743  continue;
1744  case set_block_arrbit:
1745  {
1746  gap_word_t len = (gap_word_t)
1747  (sizeof(gap_word_t) == 2 ? dec.get_16() : dec.get_32());
1748 
1749  if (BM_IS_GAP(blk))
1750  {
1751  // convert from GAP cause generic bitblock is faster
1752  blk = bman.deoptimize_block(i);
1753  }
1754  else
1755  {
1756  if (blk == 0) // block does not exists yet
1757  {
1758  blk = bman.get_allocator().alloc_bit_block();
1759  bm::bit_block_set(blk, 0);
1760  bman.set_block(i, blk);
1761  }
1762  }
1763 
1764  // Get the array one by one and set the bits.
1765  for (unsigned k = 0; k < len; ++k)
1766  {
1767  gap_word_t bit_idx = dec.get_16();
1768  bm::set_bit(blk, bit_idx);
1769  }
1770  continue;
1771  }
1772  default:
1773  BM_ASSERT(0); // unknown block type
1774  } // switch
1775  } // for i
1776 
1777  bv.forget_count();
1778  bv.set_new_blocks_strat(strat);
1779 
1780  return dec.size();
1781 }
1782 
1783 
1784 
1785 template<class DEC>
1787  : decoder_(buf),
1788  end_of_stream_(false),
1789  bv_size_(0),
1790  state_(e_unknown),
1791  id_cnt_(0),
1792  block_idx_(0),
1793  mono_block_cnt_(0)
1794 {
1795  ::memset(bit_func_table_, 0, sizeof(bit_func_table_));
1796 
1823 
1824 
1825  // reading stream header
1826  unsigned char header_flag = decoder_.get_8();
1827  if (!(header_flag & BM_HM_NO_BO))
1828  {
1829  /*ByteOrder bo = (bm::ByteOrder)*/decoder_.get_8();
1830  }
1831 
1832  // check if bitvector comes as an inverted, sorted list of ints
1833  //
1834  if (header_flag & BM_HM_ID_LIST)
1835  {
1836  // special case: the next comes plain list of unsigned integers
1837  if (header_flag & BM_HM_RESIZE)
1838  {
1839  bv_size_ = decoder_.get_32();
1840  }
1841 
1842  state_ = e_list_ids;
1843  id_cnt_ = decoder_.get_32();
1844  next(); // read first id
1845  }
1846  else
1847  {
1848  if (!(header_flag & BM_HM_NO_GAPL))
1849  {
1850  unsigned i;
1851  // keep GAP levels info
1852  for (i = 0; i < bm::gap_levels; ++i)
1853  {
1854  glevels_[i] = decoder_.get_16();
1855  }
1856  }
1857 
1858  if (header_flag & (1 << 1))
1859  {
1860  bv_size_ = decoder_.get_32();
1861  }
1862  state_ = e_blocks;
1863  }
1864 }
1865 
1866 template<class DEC>
1868 {
1869  if (is_eof())
1870  {
1871  ++block_idx_;
1872  return;
1873  }
1874 
1875  switch (state_)
1876  {
1877  case e_list_ids:
1878  // read inverted ids one by one
1879  if (id_cnt_ == 0)
1880  {
1881  end_of_stream_ = true;
1882  state_ = e_unknown;
1883  break;
1884  }
1885  last_id_ = decoder_.get_32();
1886  --id_cnt_;
1887  break;
1888 
1889  case e_blocks:
1891  {
1892  end_of_stream_ = true;
1893  state_ = e_unknown;
1894  break;
1895  }
1896 
1897  block_type_ = decoder_.get_8();
1898 
1899  // pre-check for 7-bit zero block
1900  //
1901  if (block_type_ & (1u << 7u))
1902  {
1903  mono_block_cnt_ = (block_type_ & ~(1u << 7u)) - 1;
1905  break;
1906  }
1907 
1908  switch (block_type_)
1909  {
1910  case set_block_azero:
1911  case set_block_end:
1912  end_of_stream_ = true; state_ = e_unknown;
1913  break;
1914  case set_block_1zero:
1916  mono_block_cnt_ = 0;
1917  break;
1918  case set_block_8zero:
1920  mono_block_cnt_ = decoder_.get_8()-1;
1921  break;
1922  case set_block_16zero:
1924  mono_block_cnt_ = decoder_.get_16()-1;
1925  break;
1926  case set_block_32zero:
1928  mono_block_cnt_ = decoder_.get_32()-1;
1929  break;
1930  case set_block_aone:
1931  state_ = e_one_blocks;
1933  break;
1934  case set_block_1one:
1935  state_ = e_one_blocks;
1936  mono_block_cnt_ = 0;
1937  break;
1938  case set_block_8one:
1939  state_ = e_one_blocks;
1940  mono_block_cnt_ = decoder_.get_8()-1;
1941  break;
1942  case set_block_16one:
1943  state_ = e_one_blocks;
1944  mono_block_cnt_ = decoder_.get_16()-1;
1945  break;
1946  case set_block_32one:
1947  state_ = e_one_blocks;
1948  mono_block_cnt_ = decoder_.get_32()-1;
1949  break;
1950 
1951  case set_block_bit:
1953  case set_block_bit_0runs:
1954  case set_block_arrbit:
1955  state_ = e_bit_block;
1956  break;
1957 
1958  case set_block_gap:
1959  case set_block_gap_egamma:
1960  gap_head_ = (gap_word_t)
1961  (sizeof(gap_word_t) == 2 ?
1962  decoder_.get_16() : decoder_.get_32());
1963  case set_block_arrgap:
1966  case set_block_arrgap_inv:
1967  case set_block_bit_1bit:
1968  state_ = e_gap_block;
1969  break;
1970  case set_block_gapbit:
1971  state_ = e_gap_block; //e_bit_block; // TODO: make a better decision here
1972  break;
1973 
1974  default:
1975  BM_ASSERT(0);
1976  }// switch
1977 
1978  break;
1979 
1980  case e_zero_blocks:
1981  case e_one_blocks:
1982  ++block_idx_;
1983  if (!mono_block_cnt_)
1984  {
1985  state_ = e_blocks; // get new block token
1986  break;
1987  }
1988  --mono_block_cnt_;
1989  break;
1990 
1991  case e_unknown:
1992  default:
1993  BM_ASSERT(0);
1994  } // switch
1995 }
1996 
1997 template<class DEC>
1999 {
2001  if (!mono_block_cnt_)
2002  {
2003  ++block_idx_;
2004  }
2005  else
2006  {
2008  mono_block_cnt_ = 0;
2009  }
2010  state_ = e_blocks;
2011 }
2012 
2013 template<class DEC>
2014 unsigned
2016  bm::word_t* dst_block,
2017  bm::word_t* /*tmp_block*/)
2018 {
2019  BM_ASSERT(this->state_ == e_bit_block);
2020  unsigned count = 0;
2021 
2022  switch (this->block_type_)
2023  {
2024  case set_block_bit:
2025  decoder_.get_32(dst_block, bm::set_block_size);
2026  break;
2027  case set_block_bit_0runs:
2028  {
2029  if (dst_block)
2030  bit_block_set(dst_block, 0);
2031  unsigned char run_type = decoder_.get_8();
2032  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
2033  {
2034  unsigned run_length = decoder_.get_16();
2035  if (run_type)
2036  {
2037  decoder_.get_32(dst_block ? dst_block + j : dst_block, run_length);
2038  }
2039  j += run_length;
2040  } // for
2041  }
2042  break;
2044  {
2045  unsigned head_idx = decoder_.get_16();
2046  unsigned tail_idx = decoder_.get_16();
2047  if (dst_block)
2048  {
2049  for (unsigned i = 0; i < head_idx; ++i)
2050  dst_block[i] = 0;
2051  decoder_.get_32(dst_block + head_idx,
2052  tail_idx - head_idx + 1);
2053  for (unsigned j = tail_idx + 1; j < bm::set_block_size; ++j)
2054  dst_block[j] = 0;
2055  }
2056  else
2057  {
2058  int pos = int(tail_idx - head_idx) + 1;
2059  pos *= 4u;
2060  decoder_.seek(pos);
2061  }
2062  }
2063  break;
2064  case set_block_arrbit:
2065  case set_block_bit_1bit:
2066  get_arr_bit(dst_block, true /*clear target*/);
2067  break;
2068  case set_block_gapbit:
2069  BM_ASSERT(0);
2070  break;
2071  default:
2072  BM_ASSERT(0);
2073  } // switch
2074  return count;
2075 }
2076 
2077 template<class DEC>
2078 unsigned
2080  bm::word_t* /*tmp_block*/)
2081 {
2082  BM_ASSERT(this->state_ == e_bit_block);
2083  unsigned count = 0;
2084  switch (block_type_)
2085  {
2086  case set_block_bit:
2087  decoder_.get_32_OR(dst_block, bm::set_block_size);
2088  break;
2090  {
2091  unsigned head_idx = decoder_.get_16();
2092  unsigned tail_idx = decoder_.get_16();
2093  for (unsigned i = head_idx; i <= tail_idx; ++i)
2094  dst_block[i] |= decoder_.get_32();
2095  }
2096  break;
2097  case set_block_bit_0runs:
2098  {
2099  unsigned char run_type = decoder_.get_8();
2100  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
2101  {
2102  unsigned run_length = decoder_.get_16();
2103  if (run_type)
2104  {
2105  unsigned run_end = j + run_length;
2106  for (;j < run_end; ++j)
2107  {
2108  BM_ASSERT(j < bm::set_block_size);
2109  dst_block[j] |= decoder_.get_32();
2110  }
2111  }
2112  else
2113  {
2114  j += run_length;
2115  }
2116  } // for
2117  }
2118  break;
2119  case set_block_bit_1bit:
2120  case set_block_arrbit:
2121  get_arr_bit(dst_block, false /*don't clear target*/);
2122  break;
2123  default:
2124  BM_ASSERT(0);
2125  } // switch
2126  return count;
2127 }
2128 
2129 template<class DEC>
2130 unsigned
2132  bm::word_t* BMRESTRICT tmp_block)
2133 {
2134  BM_ASSERT(this->state_ == e_bit_block);
2135  BM_ASSERT(dst_block != tmp_block);
2136  unsigned count = 0;
2137  switch (block_type_)
2138  {
2139  case set_block_bit:
2140  decoder_.get_32_AND(dst_block, bm::set_block_size);
2141  break;
2142  case set_block_bit_0runs:
2143  {
2144  unsigned char run_type = decoder_.get_8();
2145  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
2146  {
2147  unsigned run_length = decoder_.get_16();
2148 
2149  unsigned run_end = j + run_length;
2150  if (run_type)
2151  {
2152  for (;j < run_end; ++j)
2153  {
2154  BM_ASSERT(j < bm::set_block_size);
2155  dst_block[j] &= decoder_.get_32();
2156  }
2157  }
2158  else
2159  {
2160  for (;j < run_end; ++j)
2161  {
2162  BM_ASSERT(j < bm::set_block_size);
2163  dst_block[j] = 0;
2164  }
2165  }
2166  } // for
2167  }
2168  break;
2170  {
2171  unsigned head_idx = decoder_.get_16();
2172  unsigned tail_idx = decoder_.get_16();
2173  unsigned i;
2174  for ( i = 0; i < head_idx; ++i)
2175  dst_block[i] = 0;
2176  for ( i = head_idx; i <= tail_idx; ++i)
2177  dst_block[i] &= decoder_.get_32();
2178  for ( i = tail_idx + 1; i < bm::set_block_size; ++i)
2179  dst_block[i] = 0;
2180  }
2181  break;
2182  case set_block_bit_1bit:
2183  case set_block_arrbit:
2184  get_arr_bit(tmp_block, true /*clear target*/);
2185  if (dst_block)
2186  bit_block_and(dst_block, tmp_block);
2187  break;
2188  default:
2189  BM_ASSERT(0);
2190  } // switch
2191  return count;
2192 }
2193 
2194 template<class DEC>
2195 unsigned
2197  bm::word_t* tmp_block)
2198 {
2199  BM_ASSERT(this->state_ == e_bit_block);
2200  BM_ASSERT(dst_block != tmp_block);
2201 
2202  unsigned count = 0;
2203  switch (block_type_)
2204  {
2205  case set_block_bit:
2206  for (unsigned i = 0; i < bm::set_block_size; ++i)
2207  dst_block[i] ^= decoder_.get_32();
2208  break;
2209  case set_block_bit_0runs:
2210  {
2211  unsigned char run_type = decoder_.get_8();
2212  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
2213  {
2214  unsigned run_length = decoder_.get_16();
2215  if (run_type)
2216  {
2217  unsigned run_end = j + run_length;
2218  for (;j < run_end; ++j)
2219  {
2220  BM_ASSERT(j < bm::set_block_size);
2221  dst_block[j] ^= decoder_.get_32();
2222  }
2223  }
2224  else
2225  {
2226  j += run_length;
2227  }
2228  } // for
2229  }
2230  break;
2232  {
2233  unsigned head_idx = decoder_.get_16();
2234  unsigned tail_idx = decoder_.get_16();
2235  for (unsigned i = head_idx; i <= tail_idx; ++i)
2236  dst_block[i] ^= decoder_.get_32();
2237  }
2238  break;
2239  case set_block_bit_1bit:
2240  // TODO: optimization
2241  case set_block_arrbit:
2242  get_arr_bit(tmp_block, true /*clear target*/);
2243  if (dst_block)
2244  {
2245  bit_block_xor(dst_block, tmp_block);
2246  }
2247  break;
2248  default:
2249  BM_ASSERT(0);
2250  } // switch
2251  return count;
2252 }
2253 
2254 template<class DEC>
2255 unsigned
2257  bm::word_t* tmp_block)
2258 {
2259  BM_ASSERT(this->state_ == e_bit_block);
2260  BM_ASSERT(dst_block != tmp_block);
2261 
2262  unsigned count = 0;
2263  switch (block_type_)
2264  {
2265  case set_block_bit:
2266  for (unsigned i = 0; i < bm::set_block_size; ++i)
2267  dst_block[i] &= ~decoder_.get_32();
2268  break;
2269  case set_block_bit_0runs:
2270  {
2271  unsigned char run_type = decoder_.get_8();
2272  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
2273  {
2274  unsigned run_length = decoder_.get_16();
2275  if (run_type)
2276  {
2277  unsigned run_end = j + run_length;
2278  for (;j < run_end; ++j)
2279  {
2280  BM_ASSERT(j < bm::set_block_size);
2281  dst_block[j] &= ~decoder_.get_32();
2282  }
2283  }
2284  else
2285  {
2286  j += run_length;
2287  }
2288  } // for
2289  }
2290  break;
2292  {
2293  unsigned head_idx = decoder_.get_16();
2294  unsigned tail_idx = decoder_.get_16();
2295  for (unsigned i = head_idx; i <= tail_idx; ++i)
2296  dst_block[i] &= ~decoder_.get_32();
2297  }
2298  break;
2299  case set_block_bit_1bit:
2300  // TODO: optimization
2301  case set_block_arrbit:
2302  get_arr_bit(tmp_block, true /*clear target*/);
2303  if (dst_block)
2304  bit_block_sub(dst_block, tmp_block);
2305  break;
2306  default:
2307  BM_ASSERT(0);
2308  } // switch
2309  return count;
2310 }
2311 
2312 
2313 template<class DEC>
2314 unsigned
2316  bm::word_t* /*tmp_block*/)
2317 {
2318  BM_ASSERT(this->state_ == e_bit_block);
2319 
2320  unsigned count = 0;
2321  switch (block_type_)
2322  {
2323  case set_block_bit:
2324  for (unsigned i = 0; i < bm::set_block_size; ++i)
2325  count += word_bitcount(decoder_.get_32());
2326  break;
2327  case set_block_bit_0runs:
2328  {
2329  //count = 0;
2330  unsigned char run_type = decoder_.get_8();
2331  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
2332  {
2333  unsigned run_length = decoder_.get_16();
2334  if (run_type)
2335  {
2336  unsigned run_end = j + run_length;
2337  for (;j < run_end; ++j)
2338  {
2339  count += word_bitcount(decoder_.get_32());
2340  }
2341  }
2342  else
2343  {
2344  j += run_length;
2345  }
2346  } // for
2347  return count;
2348  }
2350  {
2351  unsigned head_idx = decoder_.get_16();
2352  unsigned tail_idx = decoder_.get_16();
2353  for (unsigned i = head_idx; i <= tail_idx; ++i)
2354  count += word_bitcount(decoder_.get_32());
2355  }
2356  break;
2357  case set_block_arrbit:
2358  count += get_arr_bit(0);
2359  break;
2360  case set_block_bit_1bit:
2361  ++count;
2362  decoder_.get_16();
2363  break;
2364  default:
2365  BM_ASSERT(0);
2366  } // switch
2367  return count;
2368 }
2369 
2370 template<class DEC>
2371 unsigned
2373  bm::word_t* /*tmp_block*/)
2374 {
2375  BM_ASSERT(this->state_ == e_bit_block);
2376  unsigned count = 0;
2377  if (dst_block)
2378  {
2379  // count the block bitcount
2380  count = bm::bit_block_count(dst_block);
2381  }
2382 
2383  switch (block_type_)
2384  {
2385  case set_block_bit:
2386  decoder_.get_32(0, bm::set_block_size);
2387  break;
2388  case set_block_bit_0runs:
2389  {
2390  unsigned char run_type = decoder_.get_8();
2391  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
2392  {
2393  unsigned run_length = decoder_.get_16();
2394  if (run_type)
2395  {
2396  unsigned run_end = j + run_length;
2397  for (;j < run_end; ++j)
2398  {
2399  decoder_.get_32();
2400  }
2401  }
2402  else
2403  {
2404  j += run_length;
2405  }
2406  } // for
2407  }
2408  break;
2409 
2411  {
2412  unsigned head_idx = decoder_.get_16();
2413  unsigned tail_idx = decoder_.get_16();
2414  for (unsigned i = head_idx; i <= tail_idx; ++i)
2415  decoder_.get_32();
2416  }
2417  break;
2418  case set_block_arrbit:
2419  get_arr_bit(0);
2420  break;
2421  case set_block_bit_1bit:
2422  decoder_.get_16();
2423  break;
2424  default:
2425  BM_ASSERT(0);
2426  } // switch
2427  return count;
2428 }
2429 
2430 
2431 template<class DEC>
2432 unsigned
2434  bm::word_t* tmp_block)
2435 {
2436  BM_ASSERT(this->state_ == e_bit_block);
2437  BM_ASSERT(dst_block);
2438 
2439  unsigned count = 0;
2440  switch (block_type_)
2441  {
2442  case set_block_bit:
2443  for (unsigned i = 0; i < bm::set_block_size; ++i)
2444  count += word_bitcount(dst_block[i] & decoder_.get_32());
2445  break;
2446  case set_block_bit_0runs:
2447  {
2448  //count = 0;
2449  unsigned char run_type = decoder_.get_8();
2450  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
2451  {
2452  unsigned run_length = decoder_.get_16();
2453  if (run_type)
2454  {
2455  unsigned run_end = j + run_length;
2456  for (;j < run_end; ++j)
2457  {
2458  count += word_bitcount(dst_block[j] & decoder_.get_32());
2459  }
2460  }
2461  else
2462  {
2463  j += run_length;
2464  }
2465  } // for
2466  return count;
2467  }
2469  {
2470  unsigned head_idx = decoder_.get_16();
2471  unsigned tail_idx = decoder_.get_16();
2472  for (unsigned i = head_idx; i <= tail_idx; ++i)
2473  count += word_bitcount(dst_block[i] & decoder_.get_32());
2474  }
2475  break;
2476  case set_block_bit_1bit:
2477  // TODO: optimization
2478  case set_block_arrbit:
2479  get_arr_bit(tmp_block, true /*clear target*/);
2480  count +=
2481  bit_operation_and_count(dst_block, tmp_block);
2482  break;
2483  default:
2484  BM_ASSERT(0);
2485  } // switch
2486  return count;
2487 }
2488 
2489 template<class DEC>
2490 unsigned
2492  bm::word_t* tmp_block)
2493 {
2494  BM_ASSERT(this->state_ == e_bit_block);
2495  BM_ASSERT(dst_block);
2496 
2497  bitblock_sum_adapter count_adapter;
2498  switch (block_type_)
2499  {
2500  case set_block_bit:
2501  {
2502  bitblock_get_adapter ga(dst_block);
2504 
2505  bit_recomb(ga,
2506  decoder_,
2507  func,
2508  count_adapter
2509  );
2510  }
2511  break;
2512  case set_block_bit_0runs:
2513  {
2514  unsigned count = 0;
2515  unsigned char run_type = decoder_.get_8();
2516  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
2517  {
2518  unsigned run_length = decoder_.get_16();
2519  unsigned run_end = j + run_length;
2520  if (run_type)
2521  {
2522  for (;j < run_end; ++j)
2523  {
2524  BM_ASSERT(j < bm::set_block_size);
2525  count += word_bitcount(dst_block[j] | decoder_.get_32());
2526  }
2527  }
2528  else
2529  {
2530  for (;j < run_end; ++j)
2531  {
2532  BM_ASSERT(j < bm::set_block_size);
2533  count += word_bitcount(dst_block[j]);
2534  }
2535  }
2536  } // for
2537  return count;
2538  }
2540  {
2541  unsigned head_idx = decoder_.get_16();
2542  unsigned tail_idx = decoder_.get_16();
2543  unsigned count = 0;
2544  unsigned i;
2545  for (i = 0; i < head_idx; ++i)
2546  count += word_bitcount(dst_block[i]);
2547  for (i = head_idx; i <= tail_idx; ++i)
2548  count += word_bitcount(dst_block[i] | decoder_.get_32());
2549  for (i = tail_idx + 1; i < bm::set_block_size; ++i)
2550  count += word_bitcount(dst_block[i]);
2551  return count;
2552  }
2553  case set_block_bit_1bit:
2554  // TODO: optimization
2555  case set_block_arrbit:
2556  get_arr_bit(tmp_block, true /* clear target*/);
2557  return
2558  bit_operation_or_count(dst_block, tmp_block);
2559  default:
2560  BM_ASSERT(0);
2561  } // switch
2562  return count_adapter.sum();
2563 }
2564 
2565 template<class DEC>
2566 unsigned
2568  bm::word_t* tmp_block)
2569 {
2570  BM_ASSERT(this->state_ == e_bit_block);
2571  BM_ASSERT(dst_block);
2572 
2573  bitblock_sum_adapter count_adapter;
2574  switch (block_type_)
2575  {
2576  case set_block_bit:
2577  {
2578  bitblock_get_adapter ga(dst_block);
2580 
2581  bit_recomb(ga,
2582  decoder_,
2583  func,
2584  count_adapter
2585  );
2586  }
2587  break;
2588  case set_block_bit_0runs:
2589  {
2590  unsigned count = 0;
2591  unsigned char run_type = decoder_.get_8();
2592  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
2593  {
2594  unsigned run_length = decoder_.get_16();
2595  unsigned run_end = j + run_length;
2596  if (run_type)
2597  {
2598  for (;j < run_end; ++j)
2599  {
2600  BM_ASSERT(j < bm::set_block_size);
2601  count += word_bitcount(dst_block[j] ^ decoder_.get_32());
2602  }
2603  }
2604  else
2605  {
2606  for (;j < run_end; ++j)
2607  {
2608  BM_ASSERT(j < bm::set_block_size);
2609  count += word_bitcount(dst_block[j]);
2610  }
2611  }
2612  } // for
2613  return count;
2614  }
2616  {
2617  unsigned head_idx = decoder_.get_16();
2618  unsigned tail_idx = decoder_.get_16();
2619  unsigned count = 0;
2620  unsigned i;
2621  for (i = 0; i < head_idx; ++i)
2622  count += word_bitcount(dst_block[i]);
2623  for (i = head_idx; i <= tail_idx; ++i)
2624  count += word_bitcount(dst_block[i] ^ decoder_.get_32());
2625  for (i = tail_idx + 1; i < bm::set_block_size; ++i)
2626  count += word_bitcount(dst_block[i]);
2627  return count;
2628  }
2629  case set_block_bit_1bit:
2630  // TODO: optimization
2631  case set_block_arrbit:
2632  get_arr_bit(tmp_block, true /* clear target*/);
2633  return
2634  bit_operation_xor_count(dst_block, tmp_block);
2635  default:
2636  BM_ASSERT(0);
2637  } // switch
2638  return count_adapter.sum();
2639 }
2640 
2641 template<class DEC>
2642 unsigned
2644  bm::word_t* tmp_block)
2645 {
2646  BM_ASSERT(this->state_ == e_bit_block);
2647  BM_ASSERT(dst_block);
2648 
2649  bitblock_sum_adapter count_adapter;
2650  switch (block_type_)
2651  {
2652  case set_block_bit:
2653  {
2654  bitblock_get_adapter ga(dst_block);
2656 
2657  bit_recomb(ga,
2658  decoder_,
2659  func,
2660  count_adapter
2661  );
2662  }
2663  break;
2664  case set_block_bit_0runs:
2665  {
2666  unsigned count = 0;
2667  unsigned char run_type = decoder_.get_8();
2668  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
2669  {
2670  unsigned run_length = decoder_.get_16();
2671  unsigned run_end = j + run_length;
2672  if (run_type)
2673  {
2674  for (;j < run_end; ++j)
2675  {
2676  BM_ASSERT(j < bm::set_block_size);
2677  count += word_bitcount(dst_block[j] & ~decoder_.get_32());
2678  }
2679  }
2680  else
2681  {
2682  for (;j < run_end; ++j)
2683  {
2684  BM_ASSERT(j < bm::set_block_size);
2685  count += word_bitcount(dst_block[j]);
2686  }
2687  }
2688  } // for
2689  return count;
2690  }
2692  {
2693  unsigned head_idx = decoder_.get_16();
2694  unsigned tail_idx = decoder_.get_16();
2695  unsigned count = 0;
2696  unsigned i;
2697  for (i = 0; i < head_idx; ++i)
2698  count += word_bitcount(dst_block[i]);
2699  for (i = head_idx; i <= tail_idx; ++i)
2700  count += word_bitcount(dst_block[i] & (~decoder_.get_32()));
2701  for (i = tail_idx + 1; i < bm::set_block_size; ++i)
2702  count += word_bitcount(dst_block[i]);
2703  return count;
2704  }
2705  break;
2706  case set_block_bit_1bit:
2707  //TODO: optimization
2708  case set_block_arrbit:
2709  get_arr_bit(tmp_block, true /* clear target*/);
2710  return
2711  bit_operation_sub_count(dst_block, tmp_block);
2712  default:
2713  BM_ASSERT(0);
2714  } // switch
2715  return count_adapter.sum();
2716 }
2717 
2718 template<class DEC>
2719 unsigned
2721  bm::word_t* tmp_block)
2722 {
2723  BM_ASSERT(this->state_ == e_bit_block);
2724  BM_ASSERT(dst_block);
2725 
2726  bitblock_sum_adapter count_adapter;
2727  switch (block_type_)
2728  {
2729  case set_block_bit:
2730  {
2731  bitblock_get_adapter ga(dst_block);
2733 
2734  bit_recomb(ga,
2735  decoder_,
2736  func,
2737  count_adapter
2738  );
2739  }
2740  break;
2741  case set_block_bit_0runs:
2742  {
2743  unsigned count = 0;
2744  unsigned char run_type = decoder_.get_8();
2745  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
2746  {
2747  unsigned run_length = decoder_.get_16();
2748  unsigned run_end = j + run_length;
2749  if (run_type)
2750  {
2751  for (;j < run_end; ++j)
2752  {
2753  BM_ASSERT(j < bm::set_block_size);
2754  count += word_bitcount(decoder_.get_32() & (~dst_block[j]));
2755  }
2756  }
2757  else
2758  {
2759  j += run_length;
2760  }
2761  } // for
2762  return count;
2763  }
2764 
2766  {
2767  unsigned head_idx = decoder_.get_16();
2768  unsigned tail_idx = decoder_.get_16();
2769  unsigned count = 0;
2770  unsigned i;
2771  for (i = head_idx; i <= tail_idx; ++i)
2772  count += word_bitcount(decoder_.get_32() & (~dst_block[i]));
2773  return count;
2774  }
2775  break;
2776  case set_block_bit_1bit:
2777  // TODO: optimization
2778  case set_block_arrbit:
2779  get_arr_bit(tmp_block, true /* clear target*/);
2780  return
2781  bit_operation_sub_count(tmp_block, dst_block);
2782  default:
2783  BM_ASSERT(0);
2784  } // switch
2785  return count_adapter.sum();
2786 }
2787 
2788 
2789 
2790 template<class DEC>
2792  bool clear_target)
2793 {
2794  BM_ASSERT(this->block_type_ == set_block_arrbit ||
2795  this->block_type_ == set_block_bit_1bit);
2796 
2797  gap_word_t len = decoder_.get_16(); // array length / 1bit_idx
2798  if (dst_block)
2799  {
2800  if (clear_target)
2801  bit_block_set(dst_block, 0);
2802 
2803  if (this->block_type_ == set_block_bit_1bit)
2804  {
2805  // len contains idx of 1 bit set
2806  set_bit(dst_block, len);
2807  return 1;
2808  }
2809 
2810  for (unsigned k = 0; k < len; ++k)
2811  {
2812  gap_word_t bit_idx = decoder_.get_16();
2813  set_bit(dst_block, bit_idx);
2814  }
2815  }
2816  else
2817  {
2818  if (this->block_type_ == set_block_bit_1bit)
2819  {
2820  return 1; // nothing to do: len var already consumed 16bits
2821  }
2822  // fwd the decocing stream
2823  decoder_.seek(len * 2);
2824  }
2825  return len;
2826 }
2827 
2828 template<class DEC>
2830 {
2831  BM_ASSERT(this->block_type_ == set_block_bit_1bit);
2832  ++(this->block_idx_);
2833  this->state_ = e_blocks;
2834 
2835  return decoder_.get_16(); // 1bit_idx
2836 }
2837 
2838 template<class DEC>
2839 void
2841 {
2842  BM_ASSERT(this->state_ == e_gap_block ||
2843  this->block_type_ == set_block_bit_1bit);
2844  BM_ASSERT(dst_block);
2845 
2846  this->read_gap_block(this->decoder_,
2847  this->block_type_,
2848  dst_block,
2849  this->gap_head_);
2850 
2851  ++(this->block_idx_);
2852  this->state_ = e_blocks;
2853 }
2854 
2855 
2856 template<class DEC>
2857 unsigned
2859  bm::word_t* tmp_block,
2860  set_operation op)
2861 {
2862  BM_ASSERT(this->state_ == e_bit_block);
2863  get_bit_func_type bit_func = bit_func_table_[op];
2864  BM_ASSERT(bit_func);
2865  unsigned cnt = ((*this).*(bit_func))(dst_block, tmp_block);
2866  this->state_ = e_blocks;
2867  ++(this->block_idx_);
2868  return cnt;
2869 }
2870 
2871 
2872 
2873 template<class BV>
2875  bvector_type& bv,
2876  const unsigned char* buf,
2877  bm::word_t* temp_block,
2878  set_operation op,
2879  bool exit_on_one
2880  )
2881 {
2882  ByteOrder bo_current = globals<true>::byte_order();
2883 
2884  bm::decoder dec(buf);
2885  unsigned char header_flag = dec.get_8();
2886  ByteOrder bo = bo_current;
2887  if (!(header_flag & BM_HM_NO_BO))
2888  {
2889  bo = (bm::ByteOrder) dec.get_8();
2890  }
2891 
2892  blocks_manager_type& bman = bv.get_blocks_manager();
2893  bit_block_guard<blocks_manager_type> bg(bman);
2894  if (temp_block == 0)
2895  {
2896  temp_block = bg.allocate();
2897  }
2898 
2899  if (bo_current == bo)
2900  {
2901  serial_stream_current ss(buf);
2902  return
2904  deserialize(bv, ss, temp_block, op, exit_on_one);
2905  }
2906  switch (bo_current)
2907  {
2908  case BigEndian:
2909  {
2910  serial_stream_be ss(buf);
2911  return
2913  deserialize(bv, ss, temp_block, op, exit_on_one);
2914  }
2915  case LittleEndian:
2916  {
2917  serial_stream_le ss(buf);
2918  return
2920  deserialize(bv, ss, temp_block, op, exit_on_one);
2921  }
2922  default:
2923  BM_ASSERT(0);
2924  };
2925  return 0;
2926 }
2927 
2928 
2929 template<class BV>
2931  bvector_type& bv_target,
2932  const bvector_type& bv_mask,
2933  const unsigned char* buf,
2934  bm::word_t* temp_block,
2935  set_operation op)
2936 {
2937  ByteOrder bo_current = globals<true>::byte_order();
2938 
2939  bm::decoder dec(buf);
2940  unsigned char header_flag = dec.get_8();
2941  ByteOrder bo = bo_current;
2942  if (!(header_flag & BM_HM_NO_BO))
2943  {
2944  bo = (bm::ByteOrder) dec.get_8();
2945  }
2946 
2947  blocks_manager_type& bman = bv_target.get_blocks_manager();
2948  if (!bman.is_init())
2949  {
2950  bman.init_tree();
2951  }
2952  bit_block_guard<blocks_manager_type> bg(bman);
2953  if (temp_block == 0)
2954  {
2955  temp_block = bg.allocate();
2956  }
2957 
2958  if (bo_current == bo)
2959  {
2960  serial_stream_current ss(buf);
2962  deserialize(bv_target, bv_mask, ss, temp_block, op);
2963  return;
2964  }
2965  switch (bo_current)
2966  {
2967  case BigEndian:
2968  {
2969  serial_stream_be ss(buf);
2971  deserialize(bv_target, bv_mask, ss, temp_block, op);
2972  }
2973  case LittleEndian:
2974  {
2975  serial_stream_le ss(buf);
2977  deserialize(bv_target, bv_mask, ss, temp_block, op);
2978  }
2979  default:
2980  BM_ASSERT(0);
2981  };
2982 }
2983 
2984 
2985 template<class BV, class SerialIterator>
2987  bvector_type& bv,
2988  serial_iterator_type& sit,
2989  unsigned id_count,
2990  bool set_clear)
2991 {
2992  const unsigned win_size = 64;
2993  bm::id_t id_buffer[win_size+1];
2994 
2995  if (set_clear) // set bits
2996  {
2997  for (unsigned i = 0; i <= id_count;)
2998  {
2999  unsigned j;
3000  for (j = 0; j < win_size && i <= id_count; ++j, ++i)
3001  {
3002  id_buffer[j] = sit.get_id();
3003  sit.next();
3004  } // for j
3005  bm::combine_or(bv, id_buffer, id_buffer + j);
3006  } // for i
3007  }
3008  else // clear bits
3009  {
3010  for (unsigned i = 0; i <= id_count;)
3011  {
3012  unsigned j;
3013  for (j = 0; j < win_size && i <= id_count; ++j, ++i)
3014  {
3015  id_buffer[j] = sit.get_id();
3016  sit.next();
3017  } // for j
3018  bm::combine_sub(bv, id_buffer, id_buffer + j);
3019  } // for i
3020  }
3021 }
3022 
3023 template<class BV, class SerialIterator>
3024 unsigned
3026  blocks_manager_type& bman,
3027  set_operation op,
3028  unsigned bv_block_idx)
3029 {
3030  unsigned count = 0;
3031  switch (op)
3032  {
3033  case set_OR: case set_SUB: case set_XOR:
3034  case set_COUNT: case set_COUNT_B: case set_COUNT_AND:
3035  case set_COUNT_SUB_BA:
3036  // nothing to do
3037  break;
3038  case set_AND: case set_ASSIGN:
3039  // clear the rest of the target vector
3040  {
3041  unsigned i, j;
3042  bman.get_block_coord(bv_block_idx, i, j);
3043  bm::word_t*** blk_root = bman.top_blocks_root();
3044  unsigned top_size = bman.top_block_size();
3045  for (; i < top_size; ++i)
3046  {
3047  bm::word_t** blk_blk = blk_root[i];
3048  if (blk_blk == 0)
3049  {
3050  bv_block_idx+=bm::set_array_size-j;
3051  j = 0;
3052  continue;
3053  }
3054  for (;j < bm::set_array_size; ++j, ++bv_block_idx)
3055  {
3056  //if (blk_blk[j])
3057  bman.zero_block(bv_block_idx);
3058  } // for j
3059  j = 0;
3060  } // for i
3061 
3062  }
3063  break;
3064  case set_COUNT_A: case set_COUNT_OR: case set_COUNT_XOR:
3065  case set_COUNT_SUB_AB:
3066  // count bits in the target vector
3067  {
3068  unsigned i, j;
3069  bman.get_block_coord(bv_block_idx, i, j);
3070  bm::word_t*** blk_root = bman.top_blocks_root();
3071  unsigned top_size = bman.top_block_size();
3072  for (;i < top_size; ++i)
3073  {
3074  bm::word_t** blk_blk = blk_root[i];
3075  if (blk_blk == 0)
3076  {
3077  bv_block_idx+=bm::set_array_size-j;
3078  j = 0;
3079  continue;
3080  }
3081  for (;j < bm::set_array_size; ++j, ++bv_block_idx)
3082  {
3083  if (blk_blk[j])
3084  count += bman.block_bitcount(blk_blk[j]);
3085  } // for j
3086  j = 0;
3087  } // for i
3088 
3089  }
3090  break;
3091  case set_END:
3092  default:
3093  BM_ASSERT(0);
3094  }
3095  return count;
3096 }
3097 
3098 template<class BV, class SerialIterator>
3099 unsigned
3101  bvector_type& bv,
3102  serial_iterator_type& sit,
3103  set_operation op)
3104 {
3105  unsigned count = 0;
3106  unsigned id_count = sit.get_id_count();
3107  bool set_clear = true;
3108  switch (op)
3109  {
3110  case set_AND:
3111  {
3112  // TODO: use some more optimal algorithm without temp vector
3113  BV bv_tmp(BM_GAP);
3114  load_id_list(bv_tmp, sit, id_count, true);
3115  bv &= bv_tmp;
3116  }
3117  break;
3118  case set_ASSIGN:
3119  bv.clear(true);
3120  // intentional case fall through here (not a bug)
3121  case set_OR:
3122  set_clear = true;
3123  // fall to SUB
3124  case set_SUB:
3125  load_id_list(bv, sit, id_count, set_clear);
3126  break;
3127  case set_XOR:
3128  for (unsigned i = 0; i < id_count; ++i)
3129  {
3130  bm::id_t id = sit.get_id();
3131  bv[id] ^= true;
3132  sit.next();
3133  } // for
3134  break;
3135  case set_COUNT: case set_COUNT_B:
3136  for (unsigned i = 0; i < id_count; ++i)
3137  {
3138  /* bm::id_t id = */ sit.get_id();
3139  ++count;
3140  sit.next();
3141  } // for
3142  break;
3143  case set_COUNT_A:
3144  return bv.count();
3145  case set_COUNT_AND:
3146  for (unsigned i = 0; i < id_count; ++i)
3147  {
3148  bm::id_t id = sit.get_id();
3149  count += bv.get_bit(id);
3150  sit.next();
3151  } // for
3152  break;
3153  case set_COUNT_XOR:
3154  {
3155  // TODO: get rid of the temp vector
3156  BV bv_tmp(BM_GAP);
3157  load_id_list(bv_tmp, sit, id_count, true);
3158  count += bm::count_xor(bv, bv_tmp);
3159  }
3160  break;
3161  case set_COUNT_OR:
3162  {
3163  // TODO: get rid of the temp. vector
3164  BV bv_tmp(BM_GAP);
3165  load_id_list(bv_tmp, sit, id_count, true);
3166  count += bm::count_or(bv, bv_tmp);
3167  }
3168  break;
3169  case set_COUNT_SUB_AB:
3170  {
3171  // TODO: get rid of the temp. vector
3172  BV bv_tmp(bv);
3173  load_id_list(bv_tmp, sit, id_count, false);
3174  count += bv_tmp.count();
3175  }
3176  break;
3177  case set_COUNT_SUB_BA:
3178  {
3179  BV bv_tmp(BM_GAP);
3180  load_id_list(bv_tmp, sit, id_count, true);
3181  count += bm::count_sub(bv_tmp, bv);
3182  }
3183  break;
3184  case set_END:
3185  default:
3186  BM_ASSERT(0);
3187  } // switch
3188 
3189  return count;
3190 }
3191 
3192 
3193 template<class BV, class SerialIterator>
3195  bvector_type& bv_target,
3196  const bvector_type& bv_mask,
3197  serial_iterator_type& sit,
3198  bm::word_t* temp_block,
3199  set_operation op)
3200 {
3201  BM_ASSERT(temp_block);
3202  BM_ASSERT(op == bm::set_AND ||
3203  op == bm::set_OR || op == bm::set_XOR || op == bm::set_SUB);
3204 
3205  gap_word_t gap_temp_block[bm::gap_equiv_len*3];
3206  gap_temp_block[0] = 0;
3207 
3208  bv_target.clear(true); // clear and free the memory
3209 
3210  const blocks_manager_type& bman_mask = bv_mask.get_blocks_manager();
3211  blocks_manager_type& bman_target = bv_target.get_blocks_manager();
3212 
3213  if (!bman_target.is_init())
3214  {
3215  bman_target.init_tree();
3216  }
3217 
3218  unsigned bv_size = sit.bv_size();
3219  if (bv_mask.size() > bv_size)
3220  {
3221  bv_size = bv_mask.size();
3222  }
3223  if (bv_target.size() < bv_size)
3224  {
3225  bv_target.resize(bv_size);
3226  }
3227 
3228  unsigned top_blocks = bman_mask.effective_top_block_size();
3229 
3231 
3232  typename serial_iterator_type::iterator_state state;
3233  state = sit.get_state();
3234  if (state == serial_iterator_type::e_list_ids)
3235  {
3236  bv_target = bv_mask;
3237  process_id_list(bv_target, sit, op);
3238  return;
3239  }
3240 
3241  unsigned bv_block_idx = 0;
3242  for (;1;)
3243  {
3244  bv_block_idx = sit.block_idx();
3245  // early exit check to avoid over-scan
3246  {
3247  unsigned tb_idx = bv_block_idx >> bm::set_array_shift; // current top block
3248  if (tb_idx > top_blocks)
3249  {
3250  if (op == bm::set_AND)
3251  break;
3252  if (sit.is_eof())
3253  break;
3254  }
3255  }
3256 
3257  if (sit.is_eof()) // argument stream ended
3258  {
3259  if (op == bm::set_AND)
3260  break;
3261  // (OR, SUB, XOR) need to scan fwd until mask vector ends
3262  state = serial_iterator_type::e_zero_blocks;
3263  }
3264  else
3265  {
3266  state = sit.state();
3267  }
3268 
3269  switch (state)
3270  {
3271  case serial_iterator_type::e_blocks:
3272  sit.next();
3273  continue;
3274  case serial_iterator_type::e_bit_block:
3275  {
3276  bm::set_operation sop = op;
3277  const bm::word_t* blk_mask = bman_mask.get_block(bv_block_idx);
3278  bm::word_t* blk_target = 0;
3279  if (!blk_mask)
3280  {
3281  switch (op)
3282  {
3283  case set_AND: case set_SUB:
3284  // first arg is 0, so the operation gives us zero
3285  // all we need to do is to seek the input stream
3286  sop = set_ASSIGN;
3287  break;
3288  case set_OR: case set_XOR:
3289  blk_target = bman_target.make_bit_block(bv_block_idx);
3290  break;
3291  case set_ASSIGN:
3292  case set_COUNT:
3293  case set_COUNT_AND:
3294  case set_COUNT_XOR:
3295  case set_COUNT_OR:
3296  case set_COUNT_SUB_AB:
3297  case set_COUNT_SUB_BA:
3298  case set_COUNT_A:
3299  case set_COUNT_B:
3300  case set_END:
3301  default:
3302  BM_ASSERT(0);
3303  }
3304  }
3305  else // block exists
3306  {
3307  int is_gap = BM_IS_GAP(blk_mask);
3308  blk_target = bman_target.copy_bit_block(bv_block_idx, blk_mask, is_gap);
3309  }
3310 
3311  // 2 bit blocks recombination
3312  sit.get_bit_block(blk_target, temp_block, sop);
3313  }
3314  break;
3315 
3316  case serial_iterator_type::e_zero_blocks:
3317  {
3318  if (op == set_AND)
3319  {
3320  sit.skip_mono_blocks();
3321  break;
3322  }
3323  sit.next();
3324  // set_SUB: set_OR: set_XOR:
3325  bman_target.copy_block(bv_block_idx, bman_mask);
3326  }
3327  break;
3328 
3329  case serial_iterator_type::e_one_blocks:
3330  {
3331  BM_ASSERT(bv_block_idx == sit.block_idx());
3332  const bm::word_t* blk_mask = bman_mask.get_block(bv_block_idx);
3333  sit.next();
3334 
3335  switch (op)
3336  {
3337  case set_OR:
3338  bman_target.set_block_all_set(bv_block_idx);
3339  break;
3340  case set_SUB:
3341  break;
3342  case set_AND:
3343  bman_target.copy_block(bv_block_idx, bman_mask);
3344  break;
3345  case set_XOR:
3346  if (blk_mask)
3347  {
3348  int is_gap = BM_IS_GAP(blk_mask);
3349  bm::word_t* blk_target =
3350  bman_target.copy_bit_block(bv_block_idx, blk_mask, is_gap);
3351  bit_block_xor(blk_target, FULL_BLOCK_REAL_ADDR);
3352  }
3353  else
3354  {
3355  // 0 XOR 1 = 1
3356  bman_target.set_block_all_set(bv_block_idx);
3357  }
3358  break;
3359  default:
3360  BM_ASSERT(0);
3361  } // switch
3362 
3363  }
3364  break;
3365 
3366  case serial_iterator_type::e_gap_block:
3367  {
3368  // Single bit-in-block optimization
3369  if (sit.get_block_type() == set_block_bit_1bit)
3370  {
3371  if (op == set_AND)
3372  {
3373  unsigned bit_idx = sit.get_bit();
3374  unsigned bn = (bv_block_idx << bm::set_block_shift) | bit_idx;
3375  bool bval_mask = bv_mask.test(bn);
3376  bv_target.set_bit(bn, bval_mask);
3377  break;
3378  }
3379  }
3380 
3381  const bm::word_t* blk_mask = bman_mask.get_block(bv_block_idx);
3382 
3383  sit.get_gap_block(gap_temp_block);
3384  // gap_word_t gap_head = gap_temp_block[0];
3385 
3386  unsigned len = gap_length(gap_temp_block);
3387  int level = gap_calc_level(len, bman_target.glen());
3388  --len;
3389 
3390 
3391  if (!blk_mask)
3392  {
3393  if (op == set_OR || op == set_XOR)
3394  {
3395  bman_target.set_gap_block(bv_block_idx, gap_temp_block, level);
3396  }
3397  }
3398  else // mask block exists
3399  {
3400  bm::operation bop = bm::setop2op(op);
3401  bman_target.copy_block(bv_block_idx, bman_mask);
3402  bv_target.combine_operation_with_block(
3403  bv_block_idx,
3404  (bm::word_t*)gap_temp_block,
3405  1, // GAP
3406  bop);
3407  }
3408 
3409  }
3410  break;
3411 
3412  default:
3413  BM_ASSERT(0);
3414  } // switch
3415 
3416 
3417  } // for (deserialization)
3418 
3419  bv_target.forget_count();
3420 }
3421 
3422 
3423 
3424 template<class BV, class SerialIterator>
3425 unsigned
3427  bvector_type& bv,
3428  serial_iterator_type& sit,
3429  bm::word_t* temp_block,
3430  set_operation op,
3431  bool exit_on_one)
3432 {
3433  BM_ASSERT(temp_block);
3434 
3435  unsigned count = 0;
3436  gap_word_t gap_temp_block[bm::gap_equiv_len*3];
3437  gap_temp_block[0] = 0;
3438 
3439  blocks_manager_type& bman = bv.get_blocks_manager();
3440  if (!bman.is_init())
3441  {
3442  bman.init_tree();
3443  }
3444 
3445  bv.forget_count();
3446  if (sit.bv_size() && (sit.bv_size() > bv.size()))
3447  {
3448  bv.resize(sit.bv_size());
3449  }
3450 
3452 
3453  typename serial_iterator_type::iterator_state state;
3454  state = sit.get_state();
3455  if (state == serial_iterator_type::e_list_ids)
3456  {
3457  count = process_id_list(bv, sit, op);
3458  return count;
3459  }
3460 
3461  unsigned bv_block_idx = 0;
3462 
3463  for (;1;)
3464  {
3465  bm::set_operation sop = op;
3466  if (sit.is_eof()) // argument stream ended
3467  {
3468  count += finalize_target_vector(bman, op, bv_block_idx);
3469  return count;
3470  }
3471 
3472  state = sit.state();
3473  switch (state)
3474  {
3475  case serial_iterator_type::e_blocks:
3476  sit.next();
3477  continue;
3478  case serial_iterator_type::e_bit_block:
3479  {
3480  BM_ASSERT(sit.block_idx() == bv_block_idx);
3481  bm::word_t* blk = bman.get_block(bv_block_idx);
3482 
3483  if (!blk)
3484  {
3485  switch (op)
3486  {
3487  case set_AND: case set_SUB: case set_COUNT_AND:
3488  case set_COUNT_SUB_AB: case set_COUNT_A:
3489  // one arg is 0, so the operation gives us zero
3490  // all we need to do is to seek the input stream
3491  sop = set_ASSIGN;
3492  break;
3493 
3494  case set_OR: case set_XOR: case set_ASSIGN:
3495  blk = bman.make_bit_block(bv_block_idx);
3496  break;
3497 
3498  case set_COUNT: case set_COUNT_XOR: case set_COUNT_OR:
3499  case set_COUNT_SUB_BA: case set_COUNT_B:
3500  // first arg is not required (should work as is)
3501  sop = set_COUNT;
3502  break;
3503 
3504  case set_END:
3505  default:
3506  BM_ASSERT(0);
3507  }
3508  }
3509  else // block exists
3510  {
3511  int is_gap = BM_IS_GAP(blk);
3512  if (is_gap || IS_FULL_BLOCK(blk))
3513  {
3514  if (IS_FULL_BLOCK(blk) && is_const_set_operation(op))
3515  {
3516  }
3517  else
3518  {
3519  // TODO: make sure const operations do not
3520  // deoptimize GAP blocks
3521  blk = bman.deoptimize_block(bv_block_idx);
3522  }
3523  }
3524  }
3525 
3526  // 2 bit blocks recombination
3527  unsigned c = sit.get_bit_block(blk, temp_block, sop);
3528  count += c;
3529  if (exit_on_one && count) // early exit
3530  return count;
3531 
3532  }
3533  break;
3534 
3535  case serial_iterator_type::e_zero_blocks:
3536  {
3537  BM_ASSERT(bv_block_idx == sit.block_idx());
3538  bm::word_t* blk = bman.get_block(bv_block_idx);
3539  sit.next();
3540 
3541  if (blk)
3542  {
3543  switch (op)
3544  {
3545  case set_AND: case set_ASSIGN:
3546  // the result is 0
3547  //blk =
3548  bman.zero_block(bv_block_idx);
3549  break;
3550 
3551  case set_SUB: case set_COUNT_AND: case set_OR:
3552  case set_XOR: case set_COUNT_SUB_BA: case set_COUNT_B:
3553  // nothing to do
3554  break;
3555 
3556  case set_COUNT_SUB_AB: case set_COUNT_A: case set_COUNT_OR:
3557  case set_COUNT: case set_COUNT_XOR:
3558  // valid bit block recombined with 0 block
3559  // results with same block data
3560  // all we need is to bitcount bv block
3561  {
3562  count += blk ? bman.block_bitcount(blk) : 0;
3563  if (exit_on_one && count) // early exit
3564  return count;
3565  }
3566  break;
3567  case set_END:
3568  default:
3569  BM_ASSERT(0);
3570  } // switch op
3571  } // if blk
3572  }
3573  break;
3574 
3575  case serial_iterator_type::e_one_blocks:
3576  {
3577  BM_ASSERT(bv_block_idx == sit.block_idx());
3578  bm::word_t* blk = bman.get_block(bv_block_idx);
3579  sit.next();
3580 
3581  switch (op)
3582  {
3583  case set_OR: case set_ASSIGN:
3584  bman.set_block_all_set(bv_block_idx);
3585  break;
3586  case set_COUNT_OR: case set_COUNT_B: case set_COUNT:
3587  // block becomes all set
3588  count += bm::bits_in_block;
3589  break;
3590  case set_SUB:
3591  //blk =
3592  bman.zero_block(bv_block_idx);
3593  break;
3594  case set_COUNT_SUB_AB: case set_AND:
3595  // nothing to do
3596  break;
3597  case set_COUNT_AND: case set_COUNT_A:
3598  count += blk ? bman.block_bitcount(blk) : 0;
3599  break;
3600  default:
3601  if (blk)
3602  {
3603  switch (op)
3604  {
3605  case set_XOR:
3606  blk = bman.deoptimize_block(bv_block_idx);
3608  break;
3609  case set_COUNT_XOR:
3610  {
3611  count +=
3613  blk,
3615  bm::COUNT_XOR);
3616  }
3617  break;
3618  case set_COUNT_SUB_BA:
3619  {
3620  count +=
3622  blk,
3625  }
3626  break;
3627  default:
3628  BM_ASSERT(0);
3629  } // switch
3630  }
3631  else // blk == 0
3632  {
3633  switch (op)
3634  {
3635  case set_XOR:
3636  // 0 XOR 1 = 1
3637  bman.set_block_all_set(bv_block_idx);
3638  break;
3639  case set_COUNT_XOR:
3640  count += bm::bits_in_block;
3641  break;
3642  case set_COUNT_SUB_BA:
3643  // 1 - 0 = 1
3644  count += bm::bits_in_block;
3645  break;
3646  default:
3647  break;
3648  } // switch
3649  } // else
3650  } // switch
3651  if (exit_on_one && count) // early exit
3652  return count;
3653 
3654  }
3655  break;
3656 
3657  case serial_iterator_type::e_gap_block:
3658  {
3659  BM_ASSERT(bv_block_idx == sit.block_idx());
3660  bm::word_t* blk = bman.get_block(bv_block_idx);
3661 
3662  sit.get_gap_block(gap_temp_block);
3663 
3664  unsigned len = gap_length(gap_temp_block);
3665  int level = gap_calc_level(len, bman.glen());
3666  --len;
3667 
3668  bool const_op = is_const_set_operation(op);
3669  if (const_op)
3670  {
3671  distance_metric metric = operation2metric(op);
3672  bm::word_t* gptr = (bm::word_t*)gap_temp_block;
3673  BMSET_PTRGAP(gptr);
3674  unsigned c =
3676  blk,
3677  gptr,
3678  metric);
3679  count += c;
3680  if (exit_on_one && count) // early exit
3681  return count;
3682 
3683  }
3684  else // non-const set operation
3685  {
3686  if ((sop == set_ASSIGN) && blk) // target block override
3687  {
3688  //blk =
3689  bman.zero_block(bv_block_idx);
3690  sop = set_OR;
3691  }
3692  if (blk == 0) // target block not found
3693  {
3694  switch (sop)
3695  {
3696  case set_AND: case set_SUB:
3697  break;
3698  case set_OR: case set_XOR: case set_ASSIGN:
3699  bman.set_gap_block(
3700  bv_block_idx, gap_temp_block, level);
3701  break;
3702 
3703  default:
3704  BM_ASSERT(0);
3705  } // switch
3706  }
3707  else // target block exists
3708  {
3709  bm::operation bop = bm::setop2op(op);
3710  if (level == -1) // too big to GAP
3711  {
3712  gap_convert_to_bitset(temp_block, gap_temp_block);
3713  bv.combine_operation_with_block(bv_block_idx,
3714  temp_block,
3715  0, // BIT
3716  bop);
3717  }
3718  else // GAP fits
3719  {
3720  set_gap_level(gap_temp_block, level);
3721  bv.combine_operation_with_block(
3722  bv_block_idx,
3723  (bm::word_t*)gap_temp_block,
3724  1, // GAP
3725  bop);
3726  }
3727  }
3728  if (exit_on_one)
3729  {
3730  blk = bman.get_block(bv_block_idx);
3731  if (blk)
3732  {
3733  bool z = bm::check_block_zero(blk, true/*deep check*/);
3734  if (!z)
3735  return 1;
3736  }
3737  } // if exit_on_one
3738 
3739  } // if else non-const op
3740  }
3741  break;
3742 
3743  default:
3744  BM_ASSERT(0);
3745  } // switch
3746 
3747  ++bv_block_idx;
3748 
3749  } // for (deserialization)
3750 
3751  return count;
3752 }
3753 
3754 
3755 
3756 
3757 } // namespace bm
3758 
3759 #include "bmundef.h"
3760 
3761 #ifdef _MSC_VER
3762 #pragma warning( pop )
3763 #endif
3764 
3765 
3766 #endif
bm::id_t bit_block_count(const bm::word_t *block)
Bitcount for bit block.
Definition: bmfunc.h:3716
Compressed bit-vector bvector<> container, set algebraic methods, traversal iterators.
void encode_header(const BV &bv, bm::encoder &enc)
Encode serialization header information.
Definition: bmserial.h:663
void encode_bit_interval(const bm::word_t *blk, bm::encoder &enc, unsigned size_control)
Encode BIT block with repeatable runs of zeroes.
Definition: bmserial.h:839
void put_8(unsigned char c)
Puts one character into the encoding buffer.
Definition: encoding.h:595
BMFORCEINLINE bm::id_t word_bitcount(bm::id_t w)
Definition: bmfunc.h:179
unsigned get_bit_block_OR(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:2079
Bit COUNT SUB AB functor.
Definition: bmfunc.h:6919
unsigned size() const
Returns size of the current encoding stream.
Definition: encoding.h:666
unsigned id_cnt_
Id counter for id list.
Definition: bmserial.h:514
one or more all-1 bit blocks
Definition: bmserial.h:440
bm::id64_t bit_block_xor(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src)
Plain bitblock XOR operation. Function does not analyse availability of source and destination blocks...
Definition: bmfunc.h:5987
const unsigned char set_block_gap
Plain GAP block.
Definition: bmserial.h:77
const unsigned set_block_size
Definition: bmconst.h:48
unsigned get_bit_block_COUNT_AND(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:2433
Base deserialization class.
Definition: bmserial.h:280
serialization_flags
Bit mask flags for serialization algorithm.
Definition: bmserial.h:1140
const unsigned char set_block_32zero
Up to 4G zero blocks.
Definition: bmserial.h:70
const unsigned char set_block_32one
UP to 4G all-set blocks.
Definition: bmserial.h:71
unsigned gap_add_value(T *buf, unsigned pos)
Add new value to the end of GAP buffer.
Definition: bmfunc.h:2286
void read_gap_block(decoder_type &decoder, unsigned block_type, bm::gap_word_t *dst_block, bm::gap_word_t &gap_head)
Read GAP block from the stream.
Definition: bmserial.h:1330
void next()
get next block
Definition: bmserial.h:1867
const unsigned set_array_shift
Definition: bmconst.h:83
unsigned char * position_type
Definition: encoding.h:45
id list stored
Definition: bmserial.h:95
deseriaizer_base< DEC >::decoder_type decoder_type
Definition: bmserial.h:400
unsigned get_bit_block_COUNT_SUB_BA(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:2720
operation
Bit operations.
Definition: bmconst.h:152
unsigned get_compression_level() const
Get compression level.
Definition: bmserial.h:637
D gap_convert_to_arr(D *BMRESTRICT dest, const T *BMRESTRICT buf, unsigned dest_len, bool invert=false)
Convert gap block into array of ints corresponding to 1 bits.
Definition: bmfunc.h:3657
void gap_set_all(T *buf, unsigned set_max, unsigned value)
Sets all bits to 0 or 1 (GAP)
Definition: bmfunc.h:3315
SerialIterator serial_iterator_type
Definition: bmserial.h:346
BV::allocator_type allocator_type
Definition: bmserial.h:322
bvector_type::blocks_manager_type blocks_manager_type
Definition: bmserial.h:150
unsigned serialize(const BV &bv, unsigned char *buf, size_t buf_size)
Bitvector serilization into memory block.
Definition: bmserial.h:912
void gamma_gap_block(bm::gap_word_t *gap_block, bm::encoder &enc)
Encode GAP block with Elias Gamma coder.
Definition: bmserial.h:702
unsigned mono_block_cnt_
number of 0 or 1 blocks
Definition: bmserial.h:520
serializer(const allocator_type &alloc=allocator_type(), bm::word_t *temp_block=0)
Construct serializer.
Definition: bmserial.h:591
save no byte-order info (save some space)
Definition: bmserial.h:1141
save no GAP info (save some space)
Definition: bmserial.h:1142
unsigned get_bit_block_XOR(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:2196
void put_32(bm::word_t w)
Puts 32 bits word into encoding buffer.
Definition: encoding.h:693
const unsigned char set_block_bit
Plain bit block.
Definition: bmserial.h:74
Definition: bm.h:69
unsigned bit_block_calc_change(const bm::word_t *block)
Definition: bmfunc.h:3907
bm::id_t last_id_
Last id from the id list.
Definition: bmserial.h:515
bvector_type::statistics statistics_type
Definition: bmserial.h:151
const unsigned gap_equiv_len
Definition: bmconst.h:74
Deserializer for bit-vector.
Definition: bmserial.h:309
size_t max_serialize_mem
Estimated maximum of memory required for serialization.
Definition: bmfunc.h:60
void gap_convert_to_bitset(unsigned *dest, const T *buf)
GAP block to bitblock conversion.
Definition: bmfunc.h:3227
pre-processor un-defines to avoid global space pollution (internal)
Bit COUNT OR functor.
Definition: bmfunc.h:6907
void for_each_dgap(const T *gap_buf, Func &func)
Definition: bmfunc.h:1785
Bit-block get adapter, takes bitblock and represents it as a get_32() accessor function.
Definition: bmfunc.h:6749
bm::id_t get_id() const
Get last id from the id list.
Definition: bmserial.h:454
unsigned bv_size() const
serialized bitvector size
Definition: bmserial.h:405
const unsigned char set_block_bit_interval
Interval block.
Definition: bmserial.h:80
BMFORCEINLINE bm::gap_word_t gap_length(const bm::gap_word_t *buf)
Returs GAP block length.
Definition: bmfunc.h:978
Bit-vector serialization class.
Definition: bmserial.h:145
unsigned serialize(const BV &bv, unsigned char *buf, bm::word_t *temp_block=0, unsigned serialization_flags=0)
Saves bitvector into memory.
Definition: bmserial.h:1188
GAP compression is ON.
Definition: bmconst.h:121
const unsigned char set_block_sgapgap
SGAP compressed GAP block.
Definition: bmserial.h:76
bm::id_t bit_operation_or_count(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2)
Performs bitblock OR operation and calculates bitcount of the result.
Definition: bmfunc.h:5440
void combine_or(BV &bv, It first, It last)
OR Combine bitvector and the iterable sequence.
Definition: bmalgo_impl.h:1134
void combine_sub(BV &bv, It first, It last)
SUB Combine bitvector and the iterable sequence.
Definition: bmalgo_impl.h:1303
void set_pos(unsigned char *buf_pos)
Set current memory stream position.
Definition: encoding.h:682
#define BM_SET_ONE_BLOCKS(x)
Definition: bmserial.h:122
unsigned get_bit_block_COUNT(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:2315
(A ^ B).count()
Definition: bmalgo_impl.h:60
bool check_block_one(const bm::word_t *blk, bool deep_scan)
Checks if block has only 1 bits.
Definition: bmfunc.h:6603
bm::word_t sum() const
Get accumulated sum.
Definition: bmfunc.h:6786
bm::id_t count_xor(const BV &bv1, const BV &bv2)
Computes bitcount of XOR operation of two bitsets.
Definition: bmalgo_impl.h:1006
one or more zero bit blocks
Definition: bmserial.h:439
Byte based reader for un-aligned bit streaming.
Definition: encoding.h:351
const unsigned id_max
Definition: bmconst.h:44
Statistical information about bitset&#39;s memory allocation details.
Definition: bm.h:144
(B - A).count()
Definition: bmalgo_impl.h:63
Class for decoding data from memory buffer.
Definition: encoding.h:105
#define BMSET_PTRGAP(ptr)
Definition: bmdef.h:166
unsigned bit_count_nonzero_size(const T *blk, unsigned data_size)
Inspects block for full zero words.
Definition: bmfunc.h:6148
unsigned get_id_count() const
Number of ids in the inverted list (valid for e_list_ids)
Definition: bmserial.h:451
unsigned serialize(BV &bv, unsigned char *buf, unsigned serialization_flags=0)
Saves bitvector into memory. Allocates temporary memory block for bvector.
Definition: bmserial.h:1223
get_bit_func_type bit_func_table_[bm::set_END]
Definition: bmserial.h:508
const unsigned char set_block_arrgap_egamma_inv
Gamma compressed inverted delta GAP array.
Definition: bmserial.h:86
int bit_find_in_block(const bm::word_t *data, unsigned nbit, bm::id_t *prev)
Searches for the next 1 bit in the BIT block.
Definition: bmfunc.h:6209
iterator_state state_
Definition: bmserial.h:513
Alloc allocator_type
Definition: bm.h:138
unsigned get_bit_block_ASSIGN(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:2015
void gap_invert(T *buf)
Inverts all bits in the GAP buffer.
Definition: bmfunc.h:3386
unsigned int word_t
Definition: bmconst.h:36
const unsigned char set_block_arrbit
List of bits ON.
Definition: bmserial.h:79
BMFORCEINLINE void set_bit(unsigned *dest, unsigned bitpos)
Set 1 bit in a block.
Definition: bmfunc.h:2537
#define IS_FULL_BLOCK(addr)
Definition: bmdef.h:139
#define SER_NEXT_GRP(enc, nb, B_1ZERO, B_8ZERO, B_16ZERO, B_32ZERO)
Definition: bmserial.h:102
const unsigned char set_block_azero
All other blocks zero.
Definition: bmserial.h:72
unsigned block_idx_
current block index
Definition: bmserial.h:519
const unsigned char set_block_arrgap_inv
List of bits OFF (GAP block)
Definition: bmserial.h:87
bm::short_t get_16()
Reads 16-bit word from the decoding buffer.
Definition: encoding.h:794
Algorithms for bvector<>
gap_word_t glevels_[bm::gap_levels]
GAP levels.
Definition: bmserial.h:516
unsigned size() const
Returns size of the current decoding stream.
Definition: encoding.h:83
static unsigned deserialize(bvector_type &bv, serial_iterator_type &sit, bm::word_t *temp_block, set_operation op=bm::set_OR, bool exit_on_one=false)
Definition: bmserial.h:3426
void byte_order_serialization(bool value)
Set byte-order serialization (for cross platform compatibility)
Definition: bmserial.h:657
unsigned get_block_type() const
Get current block type.
Definition: bmserial.h:503
iterator_state
iterator is a state machine, this enum encodes its key value
Definition: bmserial.h:434
unsigned get_bit_block_COUNT_OR(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:2491
Bit manipulation primitives (internal)
unsigned short short_t
Definition: bmconst.h:37
const unsigned char set_block_bit_0runs
Bit block with encoded zero intervals.
Definition: bmserial.h:85
unsigned deserialize(BV &bv, const unsigned char *buf, bm::word_t *temp_block=0)
Bitvector deserialization from memory.
Definition: bmserial.h:1249
Encoding utilities for serialization (internal)
Bit-block sum adapter, takes values and sums it /internal.
Definition: bmfunc.h:6779
void bit_block_set(bm::word_t *BMRESTRICT dst, bm::word_t value)
Bitblock memset operation.
Definition: bmfunc.h:3209
static ByteOrder byte_order()
Definition: bmconst.h:441
const unsigned gap_levels
Definition: bmconst.h:76
Internal structure.
Definition: bmconst.h:415
unsigned short gap_word_t
Definition: bmconst.h:70
Iterator to walk forward the serialized stream.
Definition: bmserial.h:342
unsigned gap_set_array(T *buf, const T *arr, unsigned len)
Convert array to GAP buffer.
Definition: bmfunc.h:2418
unsigned get_bit_block_AND(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:2131
unsigned block_idx() const
Get current block index.
Definition: bmserial.h:457
blocks_manager< Alloc > blocks_manager_type
Definition: bm.h:140
bm::id_t count_sub(const BV &bv1, const BV &bv2)
Computes bitcount of SUB operation of two bitsets.
Definition: bmalgo_impl.h:1040
unsigned get_bit_block_COUNT_A(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:2372
Byte based writer for un-aligned bit streaming.
Definition: encoding.h:157
int gap_calc_level(unsigned len, const T *glevel_len)
Calculates GAP block capacity level.
Definition: bmfunc.h:3427
#define BMPTR_SETBIT0(ptr)
Definition: bmdef.h:153
deseriaizer_base< DEC >::decoder_type decoder_type
Definition: bmserial.h:313
const unsigned char set_block_1zero
One all-zero block.
Definition: bmserial.h:64
BV::blocks_manager_type blocks_manager_type
Definition: bmserial.h:321
unsigned get_arr_bit(bm::word_t *dst_block, bool clear_target=true)
Get array of bits out of the decoder into bit block (Converts inverted list into bits) Returns number...
Definition: bmserial.h:2791
Bit SUB BA functor.
Definition: bmfunc.h:6930
void set_gap_level(T *buf, int level)
Sets GAP block capacity level.
Definition: bmfunc.h:3405
const unsigned char set_block_8zero
Up to 256 zero blocks.
Definition: bmserial.h:66
const unsigned char set_block_arrgap_egamma
Gamma compressed delta GAP array.
Definition: bmserial.h:84
void put_16(bm::short_t s)
Puts short word (16 bits) into the encoding buffer.
Definition: encoding.h:605
void gap_length_serialization(bool value)
Set GAP length serialization (serializes GAP levels of the original vector)
Definition: bmserial.h:651
#define FULL_BLOCK_REAL_ADDR
Definition: bmdef.h:135
Definitions(internal)
bm::id_t count_or(const BV &bv1, const BV &bv2)
Computes bitcount of OR operation of two bitsets.
Definition: bmalgo_impl.h:1074
unsigned bit_to_gap(gap_word_t *BMRESTRICT dest, const unsigned *BMRESTRICT block, unsigned dest_len)
Definition: bmfunc.h:3582
set_operation
Codes of set operations.
Definition: bmconst.h:129
bm::id_t bit_operation_xor_count(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2)
Performs bitblock XOR operation and calculates bitcount of the result.
Definition: bmfunc.h:6097
bm::id64_t bit_block_sub(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src)
Plain bitblock SUB (AND NOT) operation. Function does not analyse availability of source and destinat...
Definition: bmfunc.h:5766
bm::id_t bit_operation_and_count(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2)
Performs bitblock AND operation and calculates bitcount of the result.
Definition: bmfunc.h:5332
void put_prefixed_array_16(unsigned char c, const bm::short_t *s, unsigned count, bool encode_count)
Encode 8-bit prefix + an array.
Definition: encoding.h:578
const unsigned bits_in_block
Definition: bmconst.h:87
void gamma_gap_array(const bm::gap_word_t *gap_block, unsigned arr_len, bm::encoder &enc, bool inverted=false)
Encode GAP block as delta-array with Elias Gamma coder.
Definition: bmserial.h:739
unsigned deserialize(bvector_type &bv, const unsigned char *buf, bm::word_t *temp_block)
Definition: bmserial.h:1526
unsigned char get_8()
Reads character from the decoding buffer.
Definition: encoding.h:80
const unsigned gap_max_bits
Definition: bmconst.h:73
#define BM_IS_GAP(ptr)
Definition: bmdef.h:167
Utilities for bit transposition (internal) (experimental!)
void put_prefixed_array_32(unsigned char c, const bm::word_t *w, unsigned count)
Encode 8-bit prefix + an array.
Definition: encoding.h:567
distance_metric operation2metric(set_operation op)
Convert set operation into compatible distance metric.
Definition: bmalgo_impl.h:73
bool is_eof() const
Returns true if end of bit-stream reached.
Definition: bmserial.h:408
bm::operation setop2op(bm::set_operation op)
Convert set operation to operation.
Definition: bmfunc.h:777
void bit_recomb(It1 &it1, It2 &it2, BinaryOp &op, Encoder &enc, unsigned block_size=bm::set_block_size)
Definition: bmfunc.h:6829
unsigned char * get_pos() const
Get current memory stream position.
Definition: encoding.h:674
serialization_header_mask
Definition: bmserial.h:92
#define BM_SET_MMX_GUARD
Definition: bmdef.h:232
iterator_state state() const
Returns iterator internal state.
Definition: bmserial.h:447
const unsigned char set_block_sgapbit
SGAP compressed bitblock.
Definition: bmserial.h:75
#define BMGAP_PTR(ptr)
Definition: bmdef.h:165
static unsigned deserialize(bvector_type &bv, const unsigned char *buf, bm::word_t *temp_block, set_operation op=bm::set_OR, bool exit_on_one=false)
Deserialize bvector using buffer as set operation argument.
Definition: bmserial.h:2874
unsigned(serial_stream_iterator< DEC >::* get_bit_func_type)(bm::word_t *, bm::word_t *)
member function pointer for bitset-bitset get operations
Definition: bmserial.h:464
const unsigned char set_block_16one
UP to 65536 all-set blocks.
Definition: bmserial.h:69
const unsigned set_array_size
Definition: bmconst.h:82
const unsigned char set_block_end
End of serialization.
Definition: bmserial.h:63
unsigned gap_bit_count_unr(const T *buf)
Calculates number of bits ON in GAP buffer. Loop unrolled version.
Definition: bmfunc.h:1583
unsigned get_bit_block_COUNT_B(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:491
const unsigned char set_block_8one
Up to 256 all-set blocks.
Definition: bmserial.h:67
void skip_mono_blocks()
skip all zero or all-one blocks
Definition: bmserial.h:1998
bvector_type::allocator_type allocator_type
Definition: bmserial.h:149
unsigned int id_t
Definition: bmconst.h:35
resized vector
Definition: bmserial.h:94
Functor for Elias Gamma encoding.
Definition: encoding.h:489
bm::id64_t bit_block_and(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src)
Plain bitblock AND operation. Function does not analyse availability of source and destination blocks...
Definition: bmfunc.h:4733
const unsigned char set_block_16zero
Up to 65536 zero blocks.
Definition: bmserial.h:68
bm::word_t * temp_block_
Definition: bmserial.h:331
void combine_count_operation_with_block(const bm::word_t *blk, const bm::word_t *arg_blk, distance_metric_descriptor *dmit, distance_metric_descriptor *dmit_end)
Internal function computes different distance metrics.
Definition: bmalgo_impl.h:119
T bit_convert_to_arr(T *BMRESTRICT dest, const unsigned *BMRESTRICT src, bm::id_t bits, unsigned dest_len, unsigned mask=0)
Convert bit block into an array of ints corresponding to 1 bits.
Definition: bmfunc.h:6545
#define BM_ASSERT
Definition: bmdef.h:116
no byte-order
Definition: bmserial.h:96
iterator_state get_state() const
Definition: bmserial.h:449
const unsigned set_total_blocks
Definition: bmconst.h:85
const unsigned char set_block_1one
One block all-set (1111...)
Definition: bmserial.h:65
void gamma(unsigned value)
Definition: encoding.h:251
unsigned get_bit_block_SUB(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:2256
id64_t wordop_t
Definition: bmconst.h:101
distance_metric
Distance metrics codes defined for vectors A and B.
Definition: bmalgo_impl.h:57
Serialization stream iterator.
Definition: bmserial.h:397
unsigned block_type_
current block type
Definition: bmserial.h:518
const unsigned char set_block_aone
All other blocks one.
Definition: bmserial.h:73
decoder_type & decoder()
Get low level access to the decoder (use carefully)
Definition: bmserial.h:429
const unsigned char set_block_gapbit
GAP compressed bitblock.
Definition: bmserial.h:78
unsigned get_bit_block_COUNT_XOR(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:2567
const unsigned set_block_shift
Definition: bmconst.h:49
strategy
Block allocation strategies.
Definition: bmconst.h:118
unsigned dec_size() const
Return current decoder size.
Definition: bmserial.h:426
bool is_const_set_operation(set_operation op)
Returns true if set operation is constant (bitcount)
Definition: bmfunc.h:768
unsigned get_bit_block_COUNT_SUB_AB(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:2643
Memory encoding.
Definition: encoding.h:42
bm::id_t bit_operation_sub_count(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2)
Performs bitblock SUB operation and calculates bitcount of the result.
Definition: bmfunc.h:5372
void get_gap_block(bm::gap_word_t *dst_block)
Read gap block data (with head)
Definition: bmserial.h:2840
void set_compression_level(unsigned clevel)
Set compression level.
Definition: bmserial.h:631
const unsigned char set_block_gap_egamma
Gamma compressed GAP block.
Definition: bmserial.h:83
ByteOrder
Byte orders recognized by the library.
Definition: bmconst.h:405
byte_buffer< allocator_type > buffer
Definition: bmserial.h:153
void encode_gap_block(bm::gap_word_t *gap_block, bm::encoder &enc)
Encode GAP block.
Definition: bmserial.h:786
Bit manipulation primitives (internal)
unsigned get_bit_block(bm::word_t *dst_block, bm::word_t *tmp_block, set_operation op)
read bit block, using logical operation
Definition: bmserial.h:2858
bool check_block_zero(const bm::word_t *blk, bool deep_scan)
Checks all conditions and returns true if block consists of only 0 bits.
Definition: bmfunc.h:6580
Deserializer, performs logical operations between bit-vector and serialized bit-vector.
Definition: bmserial.h:534
const unsigned char set_block_arrgap
List of bits ON (GAP block)
Definition: bmserial.h:81
const unsigned char set_block_bit_1bit
Bit block with 1 bit ON.
Definition: bmserial.h:82
#define BMRESTRICT
Definition: bmdef.h:179
no GAP levels
Definition: bmserial.h:97
bm::word_t get_32()
Reads 32-bit word from the decoding buffer.
Definition: encoding.h:809
Bit COUNT XOR functor.
Definition: bmfunc.h:6896