BitMagicC++Library
bmserial.h
Go to the documentation of this file.
1 #ifndef BMSERIAL__H__INCLUDED__
2 #define BMSERIAL__H__INCLUDED__
3 /*
4 Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 For more information please visit: http://bitmagic.io
19 */
20 
21 
22 /*!
23  \defgroup bvserial bvector<> serialization
24  Serialization for bvector<> container
25  \ingroup bvector
26 
27  */
28 
29 #ifndef BM__H__INCLUDED__
30 #define BM__H__INCLUDED__
31 
32 #include "bm.h"
33 
34 #endif
35 
36 #ifdef _MSC_VER
37 #pragma warning( push )
38 #pragma warning( disable : 4311 4312 4127)
39 #endif
40 
41 
42 
43 #include "encoding.h"
44 #include "bmdef.h"
45 #include "bmfunc.h"
46 #include "bmtrans.h"
47 #include "bmalgo_impl.h"
48 #include "bmutil.h"
49 #include "bmbuffer.h"
50 
51 //#include "bmgamma.h"
52 
53 
54 namespace bm
55 {
56 
57 
58 // Serialization stream markup constants
59 
60 
61 const unsigned char set_block_end = 0; //!< End of serialization
62 const unsigned char set_block_1zero = 1; //!< One all-zero block
63 const unsigned char set_block_1one = 2; //!< One block all-set (1111...)
64 const unsigned char set_block_8zero = 3; //!< Up to 256 zero blocks
65 const unsigned char set_block_8one = 4; //!< Up to 256 all-set blocks
66 const unsigned char set_block_16zero = 5; //!< Up to 65536 zero blocks
67 const unsigned char set_block_16one = 6; //!< UP to 65536 all-set blocks
68 const unsigned char set_block_32zero = 7; //!< Up to 4G zero blocks
69 const unsigned char set_block_32one = 8; //!< UP to 4G all-set blocks
70 const unsigned char set_block_azero = 9; //!< All other blocks zero
71 const unsigned char set_block_aone = 10; //!< All other blocks one
72 const unsigned char set_block_bit = 11; //!< Plain bit block
73 const unsigned char set_block_sgapbit = 12; //!< SGAP compressed bitblock
74 const unsigned char set_block_sgapgap = 13; //!< SGAP compressed GAP block
75 const unsigned char set_block_gap = 14; //!< Plain GAP block
76 const unsigned char set_block_gapbit = 15; //!< GAP compressed bitblock
77 const unsigned char set_block_arrbit = 16; //!< List of bits ON
78 const unsigned char set_block_bit_interval = 17; //!< Interval block
79 const unsigned char set_block_arrgap = 18; //!< List of bits ON (GAP block)
80 const unsigned char set_block_bit_1bit = 19; //!< Bit block with 1 bit ON
81 const unsigned char set_block_gap_egamma = 20; //!< Gamma compressed GAP block
82 const unsigned char set_block_arrgap_egamma = 21; //!< Gamma compressed delta GAP array
83 const unsigned char set_block_bit_0runs = 22; //!< Bit block with encoded zero intervals
84 const unsigned char set_block_arrgap_egamma_inv = 23; //!< Gamma compressed inverted delta GAP array
85 const unsigned char set_block_arrgap_inv = 24; //!< List of bits OFF (GAP block)
86 
87 
88 /// \internal
89 /// \ingroup bvserial
92  BM_HM_RESIZE = (1 << 1), ///< resized vector
93  BM_HM_ID_LIST = (1 << 2), ///< id list stored
94  BM_HM_NO_BO = (1 << 3), ///< no byte-order
95  BM_HM_NO_GAPL = (1 << 4) ///< no GAP levels
96 };
97 
98 
99 
100 #define SER_NEXT_GRP(enc, nb, B_1ZERO, B_8ZERO, B_16ZERO, B_32ZERO) \
101  if (nb == 1) \
102  enc.put_8(B_1ZERO); \
103  else if (nb < 256) \
104  { \
105  enc.put_8(B_8ZERO); \
106  enc.put_8((unsigned char)nb); \
107  } \
108  else if (nb < 65536) \
109  { \
110  enc.put_8(B_16ZERO); \
111  enc.put_16((unsigned short)nb); \
112  } \
113  else \
114  {\
115  enc.put_8(B_32ZERO); \
116  enc.put_32(nb); \
117  }
118 
119 
120 #define BM_SET_ONE_BLOCKS(x) \
121  {\
122  unsigned end_block = i + x; \
123  for (;i < end_block; ++i) \
124  bman.set_block_all_set(i); \
125  } \
126  --i
127 
128 
129 
130 
131 /**
132  Bit-vector serialization class.
133 
134  Class designed to convert sparse bit-vectors into a single block of memory
135  ready for file or database storage or network transfer.
136 
137  Reuse of this class for multiple serializations may offer some performance
138  advantage.
139 
140  @ingroup bvserial
141 */
142 template<class BV>
144 {
145 public:
146  typedef BV bvector_type;
147  typedef typename bvector_type::allocator_type allocator_type;
148  typedef typename bvector_type::blocks_manager_type blocks_manager_type;
149  typedef typename bvector_type::statistics statistics_type;
150 
151  typedef byte_buffer<allocator_type> buffer;
152 public:
153  /**
154  Construct serializer
155 
156  \param alloc - memory allocator
157  \param temp_block - temporary block for various operations
158  (if NULL it will be allocated and managed by serializer class)
159  Temp block is used as a scratch memory during serialization,
160  use of external temp block allows to avoid unnecessary re-allocations.
161 
162  Temp block attached is not owned by the class and NOT deallocated on
163  destruction.
164  */
165  serializer(const allocator_type& alloc = allocator_type(),
166  bm::word_t* temp_block = 0);
167 
168  serializer(bm::word_t* temp_block);
169 
170  ~serializer();
171 
172  /**
173  Set compression level. Higher compression takes more time to process.
174  @param clevel - compression level (0-4)
175  */
176  void set_compression_level(unsigned clevel);
177 
178  /**
179  Get compression level
180  */
181  unsigned get_compression_level() const;
182 
183  /**
184  Bitvector serilization into memory block
185 
186  @param bv - input bitvector
187  @param buf - out buffer (pre-allocated)
188  No range checking is done in this method.
189  It is responsibility of caller to allocate sufficient
190  amount of memory using information from calc_stat() function.
191 
192  @param buf_size - size of the output buffer
193 
194  @return Size of serialization block.
195  @sa calc_stat
196  */
197  unsigned serialize(const BV& bv,
198  unsigned char* buf, size_t buf_size);
199 
200  /**
201  Bitvector serilization into buffer object (it gets resized automatically)
202 
203  @param bv - input bitvector
204  @param buf - output buffer object
205  @param bv_stat - input (optional) bit-vector statistics object
206  if NULL, serizlize will compute statistics
207  */
208  void serialize(const BV& bv, typename serializer<BV>::buffer& buf, const statistics_type* bv_stat);
209 
210 
211  /**
212  Set GAP length serialization (serializes GAP levels of the original vector)
213 
214  @param value - when TRUE serialized vector includes GAP levels parameters
215  */
216  void gap_length_serialization(bool value);
217  /**
218  Set byte-order serialization (for cross platform compatibility)
219 
220  @param value - TRUE serialization format includes byte-order marker
221  */
222  void byte_order_serialization(bool value);
223 
224 protected:
225  /**
226  Encode serialization header information
227  */
228  void encode_header(const BV& bv, bm::encoder& enc);
229 
230  /**
231  Encode GAP block
232  */
233  void encode_gap_block(bm::gap_word_t* gap_block, bm::encoder& enc);
234 
235  /**
236  Encode GAP block with Elias Gamma coder
237  */
238  void gamma_gap_block(bm::gap_word_t* gap_block, bm::encoder& enc);
239 
240  /**
241  Encode GAP block as delta-array with Elias Gamma coder
242  */
243  void gamma_gap_array(const bm::gap_word_t* gap_block,
244  unsigned arr_len,
245  bm::encoder& enc,
246  bool inverted = false);
247 
248  /**
249  Encode BIT block with repeatable runs of zeroes
250  */
251  void encode_bit_interval(const bm::word_t* blk,
252  bm::encoder& enc,
253  unsigned size_control);
254 
255 private:
256  serializer(const serializer&);
257  serializer& operator=(const serializer&);
258 
259 private:
260 
263 
264 private:
265  allocator_type alloc_;
266  bool gap_serial_;
267  bool byte_order_serial_;
268  bm::word_t* temp_block_;
269  unsigned compression_level_;
270  bool own_temp_block_;
271 };
272 
273 /**
274  Base deserialization class
275  \ingroup bvserial
276 */
277 template<class DEC>
279 {
280 public:
281  typedef DEC decoder_type;
282 protected:
284 
285  /// Read GAP block from the stream
286  void read_gap_block(decoder_type& decoder,
287  unsigned block_type,
288  bm::gap_word_t* dst_block,
289  bm::gap_word_t& gap_head);
290 
291  /// Read list of bit ids
292  ///
293  /// @return number of ids
294  unsigned read_id_list(decoder_type& decoder,
295  unsigned block_type,
296  bm::gap_word_t* dst_arr);
297 
298 protected:
300 };
301 
302 /**
303  Deserializer for bit-vector
304  \ingroup bvserial
305 */
306 template<class BV, class DEC>
307 class deserializer : protected deseriaizer_base<DEC>
308 {
309 public:
310  typedef BV bvector_type;
312 public:
313  deserializer() : temp_block_(0) {}
314 
315  unsigned deserialize(bvector_type& bv,
316  const unsigned char* buf,
317  bm::word_t* temp_block);
318 protected:
319  typedef typename BV::blocks_manager_type blocks_manager_type;
320  typedef typename BV::allocator_type allocator_type;
321 
322 protected:
323  void deserialize_gap(unsigned char btype, decoder_type& dec,
324  bvector_type& bv, blocks_manager_type& bman,
325  unsigned i,
326  bm::word_t* blk);
327 protected:
328  bm::gap_word_t gap_temp_block_[bm::gap_equiv_len * 4];
330 };
331 
332 
333 /**
334  Iterator to walk forward the serialized stream.
335 
336  \internal
337  \ingroup bvserial
338 */
339 template<class BV, class SerialIterator>
341 {
342 public:
343  typedef BV bvector_type;
344  typedef SerialIterator serial_iterator_type;
345 public:
346  static
347  unsigned deserialize(bvector_type& bv,
348  serial_iterator_type& sit,
349  bm::word_t* temp_block,
351  bool exit_on_one = false);
352 
353  /// experimental 3 way deserialization
354  /// target = mask %OPERATION% BLOB
355  ///
356  static
357  void deserialize(bvector_type& bv_target,
358  const bvector_type& bv_mask,
359  serial_iterator_type& sit,
360  bm::word_t* temp_block,
361  set_operation op);
362 
363 private:
364  typedef typename BV::blocks_manager_type blocks_manager_type;
365 
366  /// load data from the iterator of type "id list"
367  static
368  void load_id_list(bvector_type& bv,
369  serial_iterator_type& sit,
370  unsigned id_count,
371  bool set_clear);
372 
373  /// Finalize the deserialization (zero target vector tail or bit-count tail)
374  static
375  unsigned finalize_target_vector(blocks_manager_type& bman,
376  set_operation op,
377  unsigned bv_block_idx);
378 
379  /// Process (obsolete) id-list serialization format
380  static
381  unsigned process_id_list(bvector_type& bv,
382  serial_iterator_type& sit,
383  set_operation op);
384 
385 
386 };
387 
388 /*!
389  @brief Serialization stream iterator
390 
391  Iterates blocks and control tokens of serialized bit-stream
392  \ingroup bvserial
393 */
394 template<class DEC>
396 {
397 public:
399 public:
400  serial_stream_iterator(const unsigned char* buf);
401 
402  /// serialized bitvector size
403  unsigned bv_size() const { return bv_size_; }
404 
405  /// Returns true if end of bit-stream reached
406  bool is_eof() const { return end_of_stream_; }
407 
408  /// get next block
409  void next();
410 
411  /// skip all zero or all-one blocks
412  void skip_mono_blocks();
413 
414  /// read bit block, using logical operation
415  unsigned get_bit_block(bm::word_t* dst_block,
416  bm::word_t* tmp_block,
417  set_operation op);
418 
419 
420  /// Read gap block data (with head)
421  void get_gap_block(bm::gap_word_t* dst_block);
422 
423  /// Return current decoder size
424  unsigned dec_size() const { return decoder_.size(); }
425 
426  /// Get low level access to the decoder (use carefully)
427  decoder_type& decoder() { return decoder_; }
428 
429  /// iterator is a state machine, this enum encodes
430  /// its key value
431  ///
433  {
434  e_unknown = 0,
435  e_list_ids, ///< plain int array
436  e_blocks, ///< stream of blocks
437  e_zero_blocks, ///< one or more zero bit blocks
438  e_one_blocks, ///< one or more all-1 bit blocks
439  e_bit_block, ///< one bit block
440  e_gap_block ///< one gap block
441 
442  };
443 
444  /// Returns iterator internal state
445  iterator_state state() const { return this->state_; }
446 
447  iterator_state get_state() const { return this->state_; }
448  /// Number of ids in the inverted list (valid for e_list_ids)
449  unsigned get_id_count() const { return this->id_cnt_; }
450 
451  /// Get last id from the id list
452  bm::id_t get_id() const { return this->last_id_; }
453 
454  /// Get current block index
455  unsigned block_idx() const { return this->block_idx_; }
456 
457 public:
458  /// member function pointer for bitset-bitset get operations
459  ///
460  typedef
461  unsigned (serial_stream_iterator<DEC>::*get_bit_func_type)
463 
464  unsigned
465  get_bit_block_ASSIGN(bm::word_t* dst_block, bm::word_t* tmp_block);
466  unsigned
467  get_bit_block_OR (bm::word_t* dst_block, bm::word_t* tmp_block);
468  unsigned
469  get_bit_block_AND (bm::word_t* dst_block, bm::word_t* tmp_block);
470  unsigned
471  get_bit_block_SUB (bm::word_t* dst_block, bm::word_t* tmp_block);
472  unsigned
473  get_bit_block_XOR (bm::word_t* dst_block, bm::word_t* tmp_block);
474  unsigned
475  get_bit_block_COUNT (bm::word_t* dst_block, bm::word_t* tmp_block);
476  unsigned
477  get_bit_block_COUNT_AND(bm::word_t* dst_block, bm::word_t* tmp_block);
478  unsigned
479  get_bit_block_COUNT_OR(bm::word_t* dst_block, bm::word_t* tmp_block);
480  unsigned
481  get_bit_block_COUNT_XOR(bm::word_t* dst_block, bm::word_t* tmp_block);
482  unsigned
483  get_bit_block_COUNT_SUB_AB(bm::word_t* dst_block, bm::word_t* tmp_block);
484  unsigned
485  get_bit_block_COUNT_SUB_BA(bm::word_t* dst_block, bm::word_t* tmp_block);
486  unsigned
487  get_bit_block_COUNT_A(bm::word_t* dst_block, bm::word_t* tmp_block);
488  unsigned
490  {
491  return get_bit_block_COUNT(dst_block, tmp_block);
492  }
493 
494  /// Get array of bits out of the decoder into bit block
495  /// (Converts inverted list into bits)
496  /// Returns number of words (bits) being read
497  unsigned get_arr_bit(bm::word_t* dst_block,
498  bool clear_target=true);
499 
500  /// Get current block type
501  unsigned get_block_type() const { return block_type_; }
502 
503  unsigned get_bit();
504 
505 protected:
506  get_bit_func_type bit_func_table_[bm::set_END];
507 
510  unsigned bv_size_;
512  unsigned id_cnt_; ///< Id counter for id list
513  bm::id_t last_id_; ///< Last id from the id list
514  gap_word_t glevels_[bm::gap_levels]; ///< GAP levels
515 
516  unsigned block_type_; ///< current block type
517  unsigned block_idx_; ///< current block index
518  unsigned mono_block_cnt_; ///< number of 0 or 1 blocks
519 
521 };
522 
523 /**
524  Deserializer, performs logical operations between bit-vector and
525  serialized bit-vector. This utility class potentially provides faster
526  and/or more memory efficient operation than more conventional deserialization
527  into memory bvector and then logical operation
528 
529  \ingroup bvserial
530 */
531 template<class BV>
533 {
534 public:
535  typedef BV bvector_type;
536 public:
537  /**
538  \brief Deserialize bvector using buffer as set operation argument
539 
540  \param bv - target bvector
541  \param buf - serialized buffer as a logical argument
542  \param temp_block - temporary block to avoid re-allocations
543  \param op - set algebra operation (default: OR)
544  \param exit_on_one - quick exit if set operation found some result
545 
546  \return bitcount
547  */
548  static
549  unsigned deserialize(bvector_type& bv,
550  const unsigned char* buf,
551  bm::word_t* temp_block,
553  bool exit_on_one = false ///<! exit early if any one are found
554  );
555 private:
556  /** experimental 3-way deserializator TARGET = MASK (OR/AND/XOR) BUF
557  \param bv_target - target bvector
558  \param bv_mask - mask bvector (MASK)
559  \param buf - buffer argument
560  \param temp_block - operational temporary block to avoid re-allocations
561  \param op - logical operation
562  */
563  static
564  void deserialize(bvector_type& bv_target,
565  const bvector_type& bv_mask,
566  const unsigned char* buf,
567  bm::word_t* temp_block,
568  set_operation op);
569 
570 private:
571  typedef
572  typename BV::blocks_manager_type blocks_manager_type;
573  typedef
575  typedef
577  typedef
579 
580 };
581 
582 
583 
584 
585 
586 //---------------------------------------------------------------------
587 
588 template<class BV>
590  bm::word_t* temp_block)
591 : alloc_(alloc),
592  gap_serial_(false),
593  byte_order_serial_(true),
594  compression_level_(4)
595 {
596  if (temp_block == 0)
597  {
598  temp_block_ = alloc_.alloc_bit_block();
599  own_temp_block_ = true;
600  }
601  else
602  {
603  temp_block_ = temp_block;
604  own_temp_block_ = false;
605  }
606 }
607 
608 template<class BV>
610 : alloc_(allocator_type()),
611  gap_serial_(false),
612  byte_order_serial_(true),
613  compression_level_(4)
614 {
615  if (temp_block == 0)
616  {
617  temp_block_ = alloc_.alloc_bit_block();
618  own_temp_block_ = true;
619  }
620  else
621  {
622  temp_block_ = temp_block;
623  own_temp_block_ = false;
624  }
625 }
626 
627 
628 template<class BV>
630 {
631  compression_level_ = clevel;
632 }
633 
634 template<class BV>
636 {
637  return compression_level_;
638 }
639 
640 template<class BV>
642 {
643  if (own_temp_block_)
644  alloc_.free_bit_block(temp_block_);
645 }
646 
647 
648 template<class BV>
650 {
651  gap_serial_ = value;
652 }
653 
654 template<class BV>
656 {
657  byte_order_serial_ = value;
658 }
659 
660 template<class BV>
662 {
663  const blocks_manager_type& bman = bv.get_blocks_manager();
664 
665  unsigned char header_flag = 0;
666  if (bv.size() == bm::id_max) // no dynamic resize
667  header_flag |= BM_HM_DEFAULT;
668  else
669  header_flag |= BM_HM_RESIZE;
670 
671  if (!byte_order_serial_)
672  header_flag |= BM_HM_NO_BO;
673 
674  if (!gap_serial_)
675  header_flag |= BM_HM_NO_GAPL;
676 
677  enc.put_8(header_flag);
678 
679  if (byte_order_serial_)
680  {
682  enc.put_8((unsigned char)bo);
683  }
684 
685  // keep GAP levels information
686  if (gap_serial_)
687  {
688  enc.put_16(bman.glen(), bm::gap_levels);
689  }
690 
691  // save size (only if bvector has been down-sized)
692  if (header_flag & BM_HM_RESIZE)
693  {
694  enc.put_32(bv.size());
695  }
696 
697 }
698 
699 template<class BV>
701 {
702  unsigned len = gap_length(gap_block);
703 
704  // Use Elias Gamma encoding
705  if (len > 6 && (compression_level_ > 3))
706  {
707  encoder::position_type enc_pos0 = enc.get_pos();
708  {
709  bit_out_type bout(enc);
710  gamma_encoder_func gamma(bout);
711 
712  enc.put_8(set_block_gap_egamma);
713  enc.put_16(gap_block[0]);
714 
715  for_each_dgap(gap_block, gamma);
716  }
717 
718  // evaluate gamma coding efficiency
719  encoder::position_type enc_pos1 = enc.get_pos();
720  unsigned gamma_size = (unsigned)(enc_pos1 - enc_pos0);
721  if (gamma_size > (len-1)*sizeof(gap_word_t))
722  {
723  enc.set_pos(enc_pos0);
724  }
725  else
726  {
727  return;
728  }
729  }
730 
731  // save as plain GAP block
732  enc.put_8(set_block_gap);
733  enc.put_16(gap_block, len-1);
734 }
735 
736 template<class BV>
738  unsigned arr_len,
739  bm::encoder& enc,
740  bool inverted)
741 {
742  if (compression_level_ > 3 && arr_len > 25)
743  {
744  encoder::position_type enc_pos0 = enc.get_pos();
745  {
746  bit_out_type bout(enc);
747 
748  enc.put_8(
749  inverted ? set_block_arrgap_egamma_inv
750  : set_block_arrgap_egamma);
751 
752  bout.gamma(arr_len);
753 
754  gap_word_t prev = gap_array[0];
755  bout.gamma(prev + 1);
756 
757  for (unsigned i = 1; i < arr_len; ++i)
758  {
759  gap_word_t curr = gap_array[i];
760  bout.gamma(curr - prev);
761  prev = curr;
762  }
763  }
764 
765  encoder::position_type enc_pos1 = enc.get_pos();
766  unsigned gamma_size = (unsigned)(enc_pos1 - enc_pos0);
767  if (gamma_size > (arr_len)*sizeof(gap_word_t))
768  {
769  enc.set_pos(enc_pos0);
770  }
771  else
772  {
773  return;
774  }
775  }
776 
777  // save as an plain array
778  enc.put_prefixed_array_16(inverted ? set_block_arrgap_inv : set_block_arrgap,
779  gap_array, arr_len, true);
780 }
781 
782 
783 template<class BV>
785 {
786  if (compression_level_ > 2)
787  {
788  gap_word_t* gap_temp_block = (gap_word_t*) temp_block_;
789  gap_word_t arr_len;
790 
791  unsigned bc = gap_bit_count_unr(gap_block);
792  if (bc == 1)
793  {
794  arr_len = gap_convert_to_arr(gap_temp_block,
795  gap_block,
796  bm::gap_equiv_len-10);
797  BM_ASSERT(arr_len == 1);
798  enc.put_8(set_block_bit_1bit);
799  enc.put_16(gap_temp_block[0]);
800  return;
801  }
802 
803  unsigned len = gap_length(gap_block);
804  bool invert, use_array;
805  invert = use_array = false;
806 
807  if (bc < len-1)
808  {
809  use_array = true;
810  }
811  else // inverted array is a better alternative?
812  {
813  unsigned inverted_bc = bm::gap_max_bits - bc;
814  if (inverted_bc < len-1)
815  {
816  use_array = invert = true;
817  }
818  }
819  if (use_array)
820  {
821  arr_len = gap_convert_to_arr(gap_temp_block,
822  gap_block,
824  invert);
825  if (arr_len)
826  {
827  gamma_gap_array(gap_temp_block, arr_len, enc, invert);
828  return;
829  }
830  }
831  }
832 
833  gamma_gap_block(gap_block, enc);
834 }
835 
836 template<class BV>
838  bm::encoder& enc,
839  unsigned //size_control
840  )
841 {
842  enc.put_8(set_block_bit_0runs);
843  enc.put_8((blk[0]==0) ? 0 : 1); // encode start
844 
845  unsigned i,j;//,k;
846 
847  for (i = 0; i < bm::set_block_size; ++i)
848  {
849  if (blk[i] == 0)
850  {
851  // scan fwd to find 0 island length
852  for (j = i+1; j < bm::set_block_size; ++j)
853  {
854  if (blk[j] != 0)
855  break;
856  }
857  enc.put_16((gap_word_t)(j-i));
858  i = j - 1;
859  }
860  else
861  {
862  // scan fwd to find non-0 island length
863  for (j = i+1; j < bm::set_block_size; ++j)
864  {
865  if (blk[j] == 0)
866  {
867  // look ahead to identify and ignore short 0-run
868  if (((j+1 < bm::set_block_size) && blk[j+1]) ||
869  ((j+2 < bm::set_block_size) && blk[j+2])
870  )
871  {
872  ++j; // skip zero word
873  continue;
874  }
875  break;
876  }
877  }
878  enc.put_16((gap_word_t)(j-i));
879  // stream all bit-words now
880  BM_ASSERT(i < j);
881  enc.put_32(blk + i, j - i);
882  i = j - 1;
883  }
884  }
885 }
886 
887 template<class BV>
888 void serializer<BV>::serialize(const BV& bv,
889  typename serializer<BV>::buffer& buf,
890  const statistics_type* bv_stat)
891 {
892  statistics_type stat;
893  if (!bv_stat)
894  {
895  bv.calc_stat(&stat);
896  bv_stat = &stat;
897  }
898 
899  buf.resize(bv_stat->max_serialize_mem);
900 
901  unsigned slen = this->serialize(bv, buf.data(), buf.size());
902  BM_ASSERT(slen <= buf.size()); // or we have a BIG problem with prediction
903  BM_ASSERT(slen);
904 
905  buf.resize(slen);
906 }
907 
908 
909 template<class BV>
910 unsigned serializer<BV>::serialize(const BV& bv,
911  unsigned char* buf, size_t buf_size)
912 {
913  BM_ASSERT(temp_block_);
914 
915  const blocks_manager_type& bman = bv.get_blocks_manager();
916 
917  gap_word_t* gap_temp_block = (gap_word_t*) temp_block_;
918 
919  bm::encoder enc(buf, buf_size); // create the encoder
920  encode_header(bv, enc);
921 
922  unsigned i,j;
923 
924 
925  // save blocks.
926  for (i = 0; i < bm::set_total_blocks; ++i)
927  {
928  bm::word_t* blk = bman.get_block(i);
929  // -----------------------------------------
930  // Empty or ONE block serialization
931 
932  bool flag;
933  flag = bman.is_block_zero(i, blk, false);
934  if (flag)
935  {
936  zero_block:
937  unsigned next_nb = bman.find_next_nz_block(i+1, false);
938  if (next_nb == bm::set_total_blocks) // no more blocks
939  {
940  enc.put_8(set_block_azero);
941  return enc.size();
942  }
943  unsigned nb = next_nb - i;
944 
945  if (nb > 1 && nb < 128)
946  {
947  // special (but frequent) case -- GAP delta fits in 7bits
948  unsigned char c = (unsigned char)((1u << 7) | nb);
949  enc.put_8(c);
950  }
951  else
952  {
953  SER_NEXT_GRP(enc, nb, set_block_1zero,
954  set_block_8zero,
955  set_block_16zero,
956  set_block_32zero)
957  }
958  i = next_nb - 1;
959  continue;
960  }
961  else
962  {
963  flag = bman.is_block_one(i, blk, false);
964  if (flag)
965  {
966  // Look ahead for similar blocks
967  for(j = i+1; j < bm::set_total_blocks; ++j)
968  {
969  bm::word_t* blk_next = bman.get_block(j);
970  if (flag != bman.is_block_one(j, blk_next, false))
971  break;
972  }
973  if (j == bm::set_total_blocks)
974  {
975  enc.put_8(set_block_aone);
976  break;
977  }
978  else
979  {
980  unsigned nb = j - i;
981  SER_NEXT_GRP(enc, nb, set_block_1one,
982  set_block_8one,
983  set_block_16one,
984  set_block_32one)
985  }
986  i = j - 1;
987  continue;
988  }
989  }
990 
991  // ------------------------------
992  // GAP serialization
993 
994  if (BM_IS_GAP(blk))
995  {
996  gap_word_t* gblk = BMGAP_PTR(blk);
997  encode_gap_block(gblk, enc);
998  continue;
999  }
1000 
1001  // ----------------------------------------------
1002  // BIT BLOCK serialization
1003 
1004  {
1005  if (compression_level_ <= 1)
1006  {
1007  enc.put_prefixed_array_32(set_block_bit, blk, bm::set_block_size);
1008  continue;
1009  }
1010 
1011  // compute bit-block statistics: bit-count and number of GAPS
1012  unsigned block_bc = 0;
1013  bm::id_t bit_gaps =
1015  &block_bc);
1016  unsigned block_bc_inv = bm::gap_max_bits - block_bc;
1017  switch (block_bc)
1018  {
1019  case 1: // corner case: only 1 bit on
1020  {
1021  bm::id_t bit_idx = 0;
1022  bit_find_in_block(blk, bit_idx, &bit_idx);
1023  enc.put_8(set_block_bit_1bit); enc.put_16((short)bit_idx);
1024  continue;
1025  }
1026  case 0: goto zero_block; // empty block
1027  }
1028 
1029 
1030  // compute alternative representation sizes
1031  //
1032  unsigned arr_block_size = unsigned(sizeof(gap_word_t) + (block_bc * sizeof(gap_word_t)));
1033  unsigned arr_block_size_inv = unsigned(sizeof(gap_word_t) + (block_bc_inv * sizeof(gap_word_t)));
1034  unsigned gap_block_size = unsigned(sizeof(gap_word_t) + ((bit_gaps+1) * sizeof(gap_word_t)));
1035  unsigned interval_block_size;
1036  interval_block_size = bit_count_nonzero_size(blk, bm::set_block_size);
1037 
1038  bool inverted = false;
1039 
1040  if (arr_block_size_inv < arr_block_size &&
1041  arr_block_size_inv < gap_block_size &&
1042  arr_block_size_inv < interval_block_size)
1043  {
1044  inverted = true;
1045  goto bit_as_array;
1046  }
1047 
1048  // if interval representation is not a good alternative
1049  if ((interval_block_size > arr_block_size) ||
1050  (interval_block_size > gap_block_size))
1051  {
1052  if (gap_block_size < (bm::gap_equiv_len-64) &&
1053  gap_block_size < arr_block_size)
1054  {
1055  unsigned len = bit_convert_to_gap(gap_temp_block,
1056  blk,
1058  bm::gap_equiv_len-64);
1059  if (len) // save as GAP
1060  {
1061  gamma_gap_block(gap_temp_block, enc);
1062  continue;
1063  }
1064  }
1065 
1066  if (arr_block_size < ((bm::gap_equiv_len-64) * sizeof(gap_word_t)))
1067  {
1068  bit_as_array:
1069  gap_word_t arr_len;
1070  unsigned mask = inverted ? ~0 : 0;
1071  arr_len = bit_convert_to_arr(gap_temp_block,
1072  blk,
1074  bm::gap_equiv_len-64,
1075  mask);
1076  if (arr_len)
1077  {
1078  gamma_gap_array(gap_temp_block, arr_len, enc, inverted);
1079  continue;
1080  }
1081 
1082  }
1083  // full bit-block
1084  enc.put_prefixed_array_32(set_block_bit, blk, bm::set_block_size);
1085  continue;
1086  }
1087 
1088  // if interval block is a winner
1089  // it needs to have a compelling advantage of 25% over bit block
1090  //
1091  unsigned threashold_block_size =
1092  bm::set_block_size * sizeof(bm::word_t);
1093  threashold_block_size -= threashold_block_size / 4;
1094 
1095  if (interval_block_size < arr_block_size &&
1096  interval_block_size < gap_block_size &&
1097  interval_block_size < (bm::set_block_size * sizeof(bm::word_t))
1098  )
1099  {
1100  encode_bit_interval(blk, enc, interval_block_size);
1101  continue;
1102  }
1103 
1104  if (gap_block_size < bm::gap_equiv_len &&
1105  gap_block_size < arr_block_size)
1106  {
1107  unsigned len = bit_convert_to_gap(gap_temp_block,
1108  blk,
1110  bm::gap_equiv_len-64);
1111  if (len) // save as GAP
1112  {
1113  gamma_gap_block(gap_temp_block, enc);
1114  continue;
1115  }
1116  }
1117 
1118 
1119  // if array is best
1120  if (arr_block_size < bm::gap_equiv_len-64)
1121  {
1122  goto bit_as_array;
1123  }
1124  // full bit-block
1125  enc.put_prefixed_array_32(set_block_bit, blk, bm::set_block_size);
1126  continue;
1127  }
1128  }
1129 
1130  enc.put_8(set_block_end);
1131 
1132  unsigned encoded_size = enc.size();
1133  return encoded_size;
1134 
1135 }
1136 
1137 
1138 
1139 /// Bit mask flags for serialization algorithm
1140 /// \ingroup bvserial
1142  BM_NO_BYTE_ORDER = 1, ///< save no byte-order info (save some space)
1143  BM_NO_GAP_LENGTH = (1 << 1) ///< save no GAP info (save some space)
1144 };
1145 
1146 /*!
1147  \brief Saves bitvector into memory.
1148 
1149  Function serializes content of the bitvector into memory.
1150  Serialization adaptively uses compression(variation of GAP encoding)
1151  when it is benefitial.
1152 
1153  \param bv - source bvecor
1154  \param buf - pointer on target memory area. No range checking in the
1155  function. It is responsibility of programmer to allocate sufficient
1156  amount of memory using information from calc_stat function.
1157 
1158  \param temp_block - pointer on temporary memory block. Cannot be 0;
1159  If you want to save memory across multiple bvectors
1160  allocate temporary block using allocate_tempblock and pass it to
1161  serialize.
1162  (Serialize does not deallocate temp_block.)
1163 
1164  \param serialization_flags
1165  Flags controlling serilization (bit-mask)
1166  (use OR-ed serialization flags)
1167 
1168  \ingroup bvserial
1169 
1170  \return Size of serialization block.
1171  \sa calc_stat, serialization_flags
1172 
1173 */
1174 /*!
1175  Serialization format:
1176  <pre>
1177 
1178  | HEADER | BLOCKS |
1179 
1180  Header structure:
1181  BYTE : Serialization header (bit mask of BM_HM_*)
1182  BYTE : Byte order ( 0 - Big Endian, 1 - Little Endian)
1183  INT16: Reserved (0)
1184  INT16: Reserved Flags (0)
1185 
1186  </pre>
1187 */
1188 template<class BV>
1189 unsigned serialize(const BV& bv,
1190  unsigned char* buf,
1191  bm::word_t* temp_block = 0,
1192  unsigned serialization_flags = 0)
1193 {
1194  bm::serializer<BV> bv_serial(bv.get_allocator(), temp_block);
1196  bv_serial.byte_order_serialization(false);
1197 
1199  bv_serial.gap_length_serialization(false);
1200  else
1201  bv_serial.gap_length_serialization(true);
1202 
1203  bv_serial.set_compression_level(4);
1204 
1205  return bv_serial.serialize(bv, buf, 0);
1206 }
1207 
1208 /*!
1209  @brief Saves bitvector into memory.
1210  Allocates temporary memory block for bvector.
1211 
1212  \param bv - source bvecor
1213  \param buf - pointer on target memory area. No range checking in the
1214  function. It is responsibility of programmer to allocate sufficient
1215  amount of memory using information from calc_stat function.
1216 
1217  \param serialization_flags
1218  Flags controlling serilization (bit-mask)
1219  (use OR-ed serialization flags)
1220 
1221  \ingroup bvserial
1222 */
1223 template<class BV>
1224 unsigned serialize(BV& bv,
1225  unsigned char* buf,
1226  unsigned serialization_flags=0)
1227 {
1228  return bm::serialize(bv, buf, 0, serialization_flags);
1229 }
1230 
1231 
1232 
1233 /*!
1234  @brief Bitvector deserialization from memory.
1235 
1236  @param bv - target bvector
1237  @param buf - pointer on memory which keeps serialized bvector
1238  @param temp_block - pointer on temporary block,
1239  if NULL bvector allocates own.
1240  @return Number of bytes consumed by deserializer.
1241 
1242  Function desrializes bitvector from memory block containig results
1243  of previous serialization. Function does not remove bits
1244  which are currently set. Effectively it means OR logical operation
1245  between current bitset and previously serialized one.
1246 
1247  @ingroup bvserial
1248 */
1249 template<class BV>
1250 unsigned deserialize(BV& bv,
1251  const unsigned char* buf,
1252  bm::word_t* temp_block=0)
1253 {
1254  ByteOrder bo_current = globals<true>::byte_order();
1255 
1256  bm::decoder dec(buf);
1257  unsigned char header_flag = dec.get_8();
1258  ByteOrder bo = bo_current;
1259  if (!(header_flag & BM_HM_NO_BO))
1260  {
1261  bo = (bm::ByteOrder) dec.get_8();
1262  }
1263 
1264  if (bo_current == bo)
1265  {
1267  return deserial.deserialize(bv, buf, temp_block);
1268  }
1269  switch (bo_current)
1270  {
1271  case BigEndian:
1272  {
1274  return deserial.deserialize(bv, buf, temp_block);
1275  }
1276  case LittleEndian:
1277  {
1279  return deserial.deserialize(bv, buf, temp_block);
1280  }
1281  default:
1282  BM_ASSERT(0);
1283  };
1284  return 0;
1285 }
1286 
1287 template<class DEC>
1289  unsigned block_type,
1290  bm::gap_word_t* dst_arr)
1291 {
1292  typedef bit_in<DEC> bit_in_type;
1293 
1294  gap_word_t len = 0;
1295 
1296  switch (block_type)
1297  {
1298  case set_block_bit_1bit:
1299  dst_arr[0] = decoder.get_16();
1300  len = 1;
1301  break;
1302  case set_block_arrgap:
1303  case set_block_arrgap_inv:
1304  len = decoder.get_16();
1305  decoder.get_16(dst_arr, len);
1306  break;
1309  {
1310  bit_in_type bin(decoder);
1311  len = (gap_word_t)bin.gamma();
1312  gap_word_t prev = 0;
1313  for (gap_word_t k = 0; k < len; ++k)
1314  {
1315  gap_word_t bit_idx = (gap_word_t)bin.gamma();
1316  if (k == 0) --bit_idx; // TODO: optimization
1317  bit_idx = (gap_word_t)(bit_idx + prev);
1318  prev = bit_idx;
1319  dst_arr[k] = bit_idx;
1320  } // for
1321  }
1322  break;
1323  default:
1324  BM_ASSERT(0);
1325  }
1326  return len;
1327 }
1328 
1329 
1330 template<class DEC>
1332  unsigned block_type,
1333  bm::gap_word_t* dst_block,
1334  bm::gap_word_t& gap_head)
1335 {
1336  typedef bit_in<DEC> bit_in_type;
1337 
1338  switch (block_type)
1339  {
1340  case set_block_gap:
1341  {
1342  unsigned len = gap_length(&gap_head);
1343  --len;
1344  *dst_block = gap_head;
1345  decoder.get_16(dst_block+1, len - 1);
1346  dst_block[len] = gap_max_bits - 1;
1347  }
1348  break;
1349  case set_block_bit_1bit:
1350  {
1351  gap_set_all(dst_block, bm::gap_max_bits, 0);
1352  gap_word_t bit_idx = decoder.get_16();
1353  gap_add_value(dst_block, bit_idx);
1354  }
1355  break;
1356  case set_block_arrgap:
1357  case set_block_arrgap_inv:
1358  {
1359  gap_set_all(dst_block, bm::gap_max_bits, 0);
1360  gap_word_t len = decoder.get_16();
1361 
1362  for (gap_word_t k = 0; k < len; ++k)
1363  {
1364  gap_word_t bit_idx = decoder.get_16();
1365  gap_add_value(dst_block, bit_idx);
1366  } // for
1367  }
1368  break;
1371  {
1372  unsigned arr_len = read_id_list(decoder, block_type, id_array_);
1373  dst_block[0] = 0;
1374  //unsigned gap_len =
1375  gap_set_array(dst_block, id_array_, arr_len);
1376  }
1377  break;
1378  case set_block_gap_egamma:
1379  {
1380  unsigned len = (gap_head >> 3);
1381  --len;
1382  // read gamma GAP block into a dest block
1383  {
1384  *dst_block = gap_head;
1385  gap_word_t* gap_data_ptr = dst_block + 1;
1386 
1387  bit_in_type bin(decoder);
1388  {
1389  gap_word_t v = (gap_word_t)bin.gamma();
1390  gap_word_t gap_sum = *gap_data_ptr = (gap_word_t)(v - 1);
1391  for (unsigned i = 1; i < len; ++i)
1392  {
1393  v = (gap_word_t)bin.gamma();
1394  gap_sum = (gap_word_t)(gap_sum + v);
1395  *(++gap_data_ptr) = gap_sum;
1396  }
1397  dst_block[len+1] = bm::gap_max_bits - 1;
1398  }
1399  }
1400 
1401  }
1402  break;
1403  default:
1404  BM_ASSERT(0);
1405  }
1406 
1407  if (block_type == set_block_arrgap_egamma_inv ||
1408  block_type == set_block_arrgap_inv)
1409  {
1410  gap_invert(dst_block);
1411  }
1412 }
1413 
1414 
1415 template<class BV, class DEC>
1416 void
1418  bvector_type& bv, blocks_manager_type& bman,
1419  unsigned i,
1420  bm::word_t* blk)
1421 {
1422  //typedef bit_in<DEC> bit_in_type;
1423  gap_word_t gap_head;
1424 
1425  switch (btype)
1426  {
1427  case set_block_gap:
1428  case set_block_gapbit:
1429  {
1430  gap_head = (gap_word_t)
1431  (sizeof(gap_word_t) == 2 ? dec.get_16() : dec.get_32());
1432 
1433  unsigned len = gap_length(&gap_head);
1434  int level = gap_calc_level(len, bman.glen());
1435  --len;
1436  if (level == -1) // Too big to be GAP: convert to BIT block
1437  {
1438  *gap_temp_block_ = gap_head;
1439  dec.get_16(gap_temp_block_+1, len - 1);
1440  gap_temp_block_[len] = gap_max_bits - 1;
1441 
1442  if (blk == 0) // block does not exist yet
1443  {
1444  blk = bman.get_allocator().alloc_bit_block();
1445  bman.set_block(i, blk);
1446  gap_convert_to_bitset(blk, gap_temp_block_);
1447  }
1448  else // We have some data already here. Apply OR operation.
1449  {
1450  gap_convert_to_bitset(temp_block_,
1451  gap_temp_block_);
1452  bv.combine_operation_with_block(i,
1453  temp_block_,
1454  0,
1455  BM_OR);
1456  }
1457  return;
1458  } // level == -1
1459 
1460  set_gap_level(&gap_head, level);
1461 
1462  if (blk == 0)
1463  {
1464  gap_word_t* gap_blk =
1465  bman.get_allocator().alloc_gap_block(level, bman.glen());
1466  gap_word_t* gap_blk_ptr = BMGAP_PTR(gap_blk);
1467  *gap_blk_ptr = gap_head;
1468  set_gap_level(gap_blk_ptr, level);
1469  bman.set_block(i, (bm::word_t*)gap_blk);
1470  bman.set_block_gap(i);
1471  dec.get_16(gap_blk + 1, len - 1);
1472  gap_blk[len] = bm::gap_max_bits - 1;
1473  }
1474  else // target block exists
1475  {
1476  // read GAP block into a temp memory and perform OR
1477  *gap_temp_block_ = gap_head;
1478  dec.get_16(gap_temp_block_ + 1, len - 1);
1479  gap_temp_block_[len] = bm::gap_max_bits - 1;
1480  break;
1481  }
1482  return;
1483  }
1484  case set_block_arrgap:
1486  {
1487  unsigned arr_len = this->read_id_list(dec, btype, this->id_array_);
1488  gap_temp_block_[0] = 0; // reset unused bits in gap header
1489  unsigned gap_len =
1490  gap_set_array(gap_temp_block_, this->id_array_, arr_len);
1491 
1492  int level = gap_calc_level(gap_len, bman.glen());
1493  if (level == -1) // Too big to be GAP: convert to BIT block
1494  {
1495  gap_convert_to_bitset(temp_block_, gap_temp_block_);
1496  bv.combine_operation_with_block(i,
1497  temp_block_,
1498  0,
1499  BM_OR);
1500  return;
1501  }
1502 
1503  break;
1504  }
1505  case set_block_gap_egamma:
1506  gap_head = (gap_word_t)
1507  (sizeof(gap_word_t) == 2 ? dec.get_16() : dec.get_32());
1509  case set_block_arrgap_inv:
1510  this->read_gap_block(dec, btype, gap_temp_block_, gap_head);
1511  break;
1512  default:
1513  BM_ASSERT(0);
1514  }
1515 
1516  bv.combine_operation_with_block(i,
1517  (bm::word_t*)gap_temp_block_,
1518  1,
1519  BM_OR);
1520 
1521 }
1522 
1523 
1524 template<class BV, class DEC>
1526  const unsigned char* buf,
1527  bm::word_t* temp_block)
1528 {
1529  blocks_manager_type& bman = bv.get_blocks_manager();
1530  if (!bman.is_init())
1531  {
1532  bman.init_tree();
1533  }
1534 
1535  bm::wordop_t* tmp_buf =
1536  temp_block ? (bm::wordop_t*) temp_block
1537  : (bm::wordop_t*)bman.check_allocate_tempblock();
1538 
1539  temp_block_ = temp_block = (word_t*)tmp_buf;
1540  bm::strategy strat = bv.get_new_blocks_strat();
1541  bv.set_new_blocks_strat(BM_GAP);
1542 
1543  decoder_type dec(buf);
1544 
1546 
1547  // Reading header
1548 
1549  unsigned char header_flag = dec.get_8();
1550  if (!(header_flag & BM_HM_NO_BO))
1551  {
1552  /*ByteOrder bo = (bm::ByteOrder)*/dec.get_8();
1553  }
1554 
1555  if (header_flag & BM_HM_ID_LIST)
1556  {
1557  // special case: the next comes plain list of integers
1558  if (header_flag & BM_HM_RESIZE)
1559  {
1560  unsigned bv_size = dec.get_32();
1561  if (bv_size > bv.size())
1562  {
1563  bv.resize(bv_size);
1564  }
1565  }
1566 
1567 
1568  for (unsigned cnt = dec.get_32(); cnt; --cnt) {
1569  bm::id_t id = dec.get_32();
1570  bv.set(id);
1571  } // for
1572  // -1 for compatibility with other deserialization branches
1573  return dec.size()-1;
1574  }
1575 
1576  unsigned i;
1577 
1578  if (!(header_flag & BM_HM_NO_GAPL))
1579  {
1580  //gap_word_t glevels[bm::gap_levels];
1581  // read GAP levels information
1582  for (i = 0; i < bm::gap_levels; ++i)
1583  {
1584  /*glevels[i] =*/ dec.get_16();
1585  }
1586  }
1587 
1588  if (header_flag & (1 << 1))
1589  {
1590  unsigned bv_size = dec.get_32();
1591  if (bv_size > bv.size())
1592  {
1593  bv.resize(bv_size);
1594  }
1595  }
1596 
1597  unsigned char btype;
1598  unsigned nb;
1599 
1600  for (i = 0; i < bm::set_total_blocks; ++i)
1601  {
1602  btype = dec.get_8();
1603  bm::word_t* blk = bman.get_block(i);
1604  // pre-check if we have short zero-run packaging here
1605  //
1606  if (btype & (1 << 7))
1607  {
1608  nb = btype & ~(1 << 7);
1609  i += nb-1;
1610  continue;
1611  }
1612 
1613  switch (btype)
1614  {
1615  case set_block_azero:
1616  case set_block_end:
1618  break;
1619  case set_block_1zero:
1620  continue;
1621  case set_block_8zero:
1622  nb = dec.get_8();
1623  i += nb-1;
1624  continue;
1625  case set_block_16zero:
1626  nb = dec.get_16();
1627  i += nb-1;
1628  continue;
1629  case set_block_32zero:
1630  nb = dec.get_32();
1631  i += nb-1;
1632  continue;
1633  case set_block_aone:
1634  for (;i < bm::set_total_blocks; ++i)
1635  {
1636  bman.set_block_all_set(i);
1637  }
1638  break;
1639  case set_block_1one:
1640  bman.set_block_all_set(i);
1641  continue;
1642  case set_block_8one:
1643  BM_SET_ONE_BLOCKS(dec.get_8());
1644  continue;
1645  case set_block_16one:
1646  BM_SET_ONE_BLOCKS(dec.get_16());
1647  continue;
1648  case set_block_32one:
1649  BM_SET_ONE_BLOCKS(dec.get_32());
1650  continue;
1651  case set_block_bit:
1652  {
1653  if (blk == 0)
1654  {
1655  blk = bman.get_allocator().alloc_bit_block();
1656  bman.set_block(i, blk);
1657  dec.get_32(blk, bm::set_block_size);
1658  continue;
1659  }
1660 
1661  dec.get_32(temp_block, bm::set_block_size);
1662  bv.combine_operation_with_block(i,
1663  temp_block,
1664  0, BM_OR);
1665 
1666  continue;
1667  }
1668  case set_block_bit_1bit:
1669  {
1670  unsigned bit_idx = dec.get_16();
1671  bit_idx += i * bm::bits_in_block;
1672  bv.set_bit(bit_idx);
1673  continue;
1674  }
1675  case set_block_bit_0runs:
1676  {
1677  //TODO: optimization if block exists
1678  bit_block_set(temp_block, 0);
1679 
1680  unsigned char run_type = dec.get_8();
1681  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
1682  {
1683  unsigned run_length = dec.get_16();
1684  if (run_type)
1685  {
1686  unsigned run_end = j + run_length;
1687  for (;j < run_end; ++j)
1688  {
1689  BM_ASSERT(j < bm::set_block_size);
1690  temp_block[j] = dec.get_32();
1691  }
1692  }
1693  else
1694  {
1695  j += run_length;
1696  }
1697  } // for
1698 
1699  bv.combine_operation_with_block(i,
1700  temp_block,
1701  0, BM_OR);
1702  continue;
1703  }
1704  case set_block_bit_interval:
1705  {
1706  unsigned head_idx, tail_idx;
1707  head_idx = dec.get_16();
1708  tail_idx = dec.get_16();
1709 
1710  if (blk == 0)
1711  {
1712  blk = bman.get_allocator().alloc_bit_block();
1713  bman.set_block(i, blk);
1714  for (unsigned k = 0; k < head_idx; ++k)
1715  {
1716  blk[k] = 0;
1717  }
1718  dec.get_32(blk + head_idx, tail_idx - head_idx + 1);
1719  for (unsigned j = tail_idx + 1; j < bm::set_block_size; ++j)
1720  {
1721  blk[j] = 0;
1722  }
1723  continue;
1724  }
1725  bit_block_set(temp_block, 0);
1726  dec.get_32(temp_block + head_idx, tail_idx - head_idx + 1);
1727 
1728  bv.combine_operation_with_block(i,
1729  temp_block,
1730  0, BM_OR);
1731  continue;
1732  }
1733  case set_block_gap:
1734  case set_block_gapbit:
1735  case set_block_arrgap:
1736  case set_block_gap_egamma:
1739  case set_block_arrgap_inv:
1740  deserialize_gap(btype, dec, bv, bman, i, blk);
1741  continue;
1742  case set_block_arrbit:
1743  {
1744  gap_word_t len = (gap_word_t)
1745  (sizeof(gap_word_t) == 2 ? dec.get_16() : dec.get_32());
1746 
1747  if (bman.is_block_gap(i))
1748  {
1749  // Here we most probably does not want to keep
1750  // the block GAP since generic bitblock offers better
1751  // performance.
1752  blk = bman.convert_gap2bitset(i);
1753  }
1754  else
1755  {
1756  if (blk == 0) // block does not exists yet
1757  {
1758  blk = bman.get_allocator().alloc_bit_block();
1759  bit_block_set(blk, 0);
1760  bman.set_block(i, blk);
1761  }
1762  }
1763 
1764  // Get the array one by one and set the bits.
1765  for (unsigned k = 0; k < len; ++k)
1766  {
1767  gap_word_t bit_idx = dec.get_16();
1768  set_bit(blk, bit_idx);
1769  }
1770  continue;
1771  }
1772  default:
1773  BM_ASSERT(0); // unknown block type
1774  } // switch
1775  } // for i
1776 
1777  bv.forget_count();
1778  bv.set_new_blocks_strat(strat);
1779 
1780  return dec.size();
1781 }
1782 
1783 
1784 
1785 template<class DEC>
1787  : decoder_(buf),
1788  end_of_stream_(false),
1789  bv_size_(0),
1790  state_(e_unknown),
1791  id_cnt_(0),
1792  block_idx_(0),
1793  mono_block_cnt_(0)
1794 {
1795  ::memset(bit_func_table_, 0, sizeof(bit_func_table_));
1796 
1823 
1824 
1825  // reading stream header
1826  unsigned char header_flag = decoder_.get_8();
1827  if (!(header_flag & BM_HM_NO_BO))
1828  {
1829  /*ByteOrder bo = (bm::ByteOrder)*/decoder_.get_8();
1830  }
1831 
1832  // check if bitvector comes as an inverted, sorted list of ints
1833  //
1834  if (header_flag & BM_HM_ID_LIST)
1835  {
1836  // special case: the next comes plain list of unsigned integers
1837  if (header_flag & BM_HM_RESIZE)
1838  {
1839  bv_size_ = decoder_.get_32();
1840  }
1841 
1842  state_ = e_list_ids;
1843  id_cnt_ = decoder_.get_32();
1844  next(); // read first id
1845  }
1846  else
1847  {
1848  if (!(header_flag & BM_HM_NO_GAPL))
1849  {
1850  unsigned i;
1851  // keep GAP levels info
1852  for (i = 0; i < bm::gap_levels; ++i)
1853  {
1854  glevels_[i] = decoder_.get_16();
1855  }
1856  }
1857 
1858  if (header_flag & (1 << 1))
1859  {
1860  bv_size_ = decoder_.get_32();
1861  }
1862  state_ = e_blocks;
1863  }
1864 }
1865 
1866 template<class DEC>
1868 {
1869  if (is_eof())
1870  {
1871  ++block_idx_;
1872  return;
1873  }
1874 
1875  switch (state_)
1876  {
1877  case e_list_ids:
1878  // read inverted ids one by one
1879  if (id_cnt_ == 0)
1880  {
1881  end_of_stream_ = true;
1882  state_ = e_unknown;
1883  break;
1884  }
1885  last_id_ = decoder_.get_32();
1886  --id_cnt_;
1887  break;
1888 
1889  case e_blocks:
1891  {
1892  end_of_stream_ = true;
1893  state_ = e_unknown;
1894  break;
1895  }
1896 
1897  block_type_ = decoder_.get_8();
1898 
1899  // pre-check for 7-bit zero block
1900  //
1901  if (block_type_ & (1 << 7))
1902  {
1903  mono_block_cnt_ = (block_type_ & ~(1 << 7)) - 1;
1905  break;
1906  }
1907 
1908  switch (block_type_)
1909  {
1910  case set_block_azero:
1911  case set_block_end:
1912  end_of_stream_ = true; state_ = e_unknown;
1913  break;
1914  case set_block_1zero:
1916  mono_block_cnt_ = 0;
1917  break;
1918  case set_block_8zero:
1920  mono_block_cnt_ = decoder_.get_8()-1;
1921  break;
1922  case set_block_16zero:
1924  mono_block_cnt_ = decoder_.get_16()-1;
1925  break;
1926  case set_block_32zero:
1928  mono_block_cnt_ = decoder_.get_32()-1;
1929  break;
1930  case set_block_aone:
1931  state_ = e_one_blocks;
1933  break;
1934  case set_block_1one:
1935  state_ = e_one_blocks;
1936  mono_block_cnt_ = 0;
1937  break;
1938  case set_block_8one:
1939  state_ = e_one_blocks;
1940  mono_block_cnt_ = decoder_.get_8()-1;
1941  break;
1942  case set_block_16one:
1943  state_ = e_one_blocks;
1944  mono_block_cnt_ = decoder_.get_16()-1;
1945  break;
1946  case set_block_32one:
1947  state_ = e_one_blocks;
1948  mono_block_cnt_ = decoder_.get_32()-1;
1949  break;
1950 
1951  case set_block_bit:
1953  case set_block_bit_0runs:
1954  case set_block_arrbit:
1955  state_ = e_bit_block;
1956  break;
1957 
1958  case set_block_gap:
1959  case set_block_gap_egamma:
1960  gap_head_ = (gap_word_t)
1961  (sizeof(gap_word_t) == 2 ?
1962  decoder_.get_16() : decoder_.get_32());
1963  case set_block_arrgap:
1966  case set_block_arrgap_inv:
1967  case set_block_bit_1bit:
1968  state_ = e_gap_block;
1969  break;
1970  case set_block_gapbit:
1971  state_ = e_gap_block; //e_bit_block; // TODO: make a better decision here
1972  break;
1973 
1974  default:
1975  BM_ASSERT(0);
1976  }// switch
1977 
1978  break;
1979 
1980  case e_zero_blocks:
1981  case e_one_blocks:
1982  ++block_idx_;
1983  if (!mono_block_cnt_)
1984  {
1985  state_ = e_blocks; // get new block token
1986  break;
1987  }
1988  --mono_block_cnt_;
1989  break;
1990 
1991  case e_unknown:
1992  default:
1993  BM_ASSERT(0);
1994  } // switch
1995 }
1996 
1997 template<class DEC>
1999 {
2001  if (!mono_block_cnt_)
2002  {
2003  ++block_idx_;
2004  }
2005  else
2006  {
2008  mono_block_cnt_ = 0;
2009  }
2010  state_ = e_blocks;
2011 }
2012 
2013 template<class DEC>
2014 unsigned
2016  bm::word_t* dst_block,
2017  bm::word_t* /*tmp_block*/)
2018 {
2019  BM_ASSERT(this->state_ == e_bit_block);
2020  unsigned count = 0;
2021 
2022  switch (this->block_type_)
2023  {
2024  case set_block_bit:
2025  decoder_.get_32(dst_block, bm::set_block_size);
2026  break;
2027  case set_block_bit_0runs:
2028  {
2029  if (dst_block)
2030  bit_block_set(dst_block, 0);
2031  unsigned char run_type = decoder_.get_8();
2032  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
2033  {
2034  unsigned run_length = decoder_.get_16();
2035  if (run_type)
2036  {
2037  decoder_.get_32(dst_block ? dst_block + j : dst_block, run_length);
2038  }
2039  j += run_length;
2040  } // for
2041  }
2042  break;
2044  {
2045  unsigned head_idx = decoder_.get_16();
2046  unsigned tail_idx = decoder_.get_16();
2047  if (dst_block)
2048  {
2049  for (unsigned i = 0; i < head_idx; ++i)
2050  dst_block[i] = 0;
2051  decoder_.get_32(dst_block + head_idx,
2052  tail_idx - head_idx + 1);
2053  for (unsigned j = tail_idx + 1; j < bm::set_block_size; ++j)
2054  dst_block[j] = 0;
2055  }
2056  else
2057  {
2058  decoder_.seek((tail_idx - head_idx + 1) * 4);
2059  }
2060  }
2061  break;
2062  case set_block_arrbit:
2063  case set_block_bit_1bit:
2064  get_arr_bit(dst_block, true /*clear target*/);
2065  break;
2066  case set_block_gapbit:
2067  BM_ASSERT(0);
2068  break;
2069  default:
2070  BM_ASSERT(0);
2071  } // switch
2072  return count;
2073 }
2074 
2075 template<class DEC>
2076 unsigned
2078  bm::word_t* /*tmp_block*/)
2079 {
2080  BM_ASSERT(this->state_ == e_bit_block);
2081  unsigned count = 0;
2082  switch (block_type_)
2083  {
2084  case set_block_bit:
2085  {
2086 #ifdef BMAVX2OPT
2087  for (unsigned i = 0; i < bm::set_block_size; i+=8)
2088  {
2089  unsigned i0 = decoder_.get_32();
2090  unsigned i1 = decoder_.get_32();
2091  unsigned i2 = decoder_.get_32();
2092  unsigned i3 = decoder_.get_32();
2093  unsigned i4 = decoder_.get_32();
2094  unsigned i5 = decoder_.get_32();
2095  unsigned i6 = decoder_.get_32();
2096  unsigned i7 = decoder_.get_32();
2097 
2098  __m256i ymm0 = _mm256_setr_epi32(i0, i1, i2, i3, i4, i5, i6, i7);
2099  __m256i ymm1 = _mm256_load_si256((__m256i*)(dst_block+i));
2100  ymm0 = _mm256_or_si256(ymm0, ymm1);
2101  _mm256_store_si256((__m256i*)(dst_block+i), ymm0);
2102  }
2103 #else
2104  for (unsigned i = 0; i < bm::set_block_size; i+=4)
2105  {
2106  dst_block[i+0] |= decoder_.get_32();
2107  dst_block[i+1] |= decoder_.get_32();
2108  dst_block[i+2] |= decoder_.get_32();
2109  dst_block[i+3] |= decoder_.get_32();
2110  }
2111 #endif
2112  }
2113 
2114  break;
2116  {
2117  unsigned head_idx = decoder_.get_16();
2118  unsigned tail_idx = decoder_.get_16();
2119  for (unsigned i = head_idx; i <= tail_idx; ++i)
2120  dst_block[i] |= decoder_.get_32();
2121  }
2122  break;
2123  case set_block_bit_0runs:
2124  {
2125  unsigned char run_type = decoder_.get_8();
2126  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
2127  {
2128  unsigned run_length = decoder_.get_16();
2129  if (run_type)
2130  {
2131  unsigned run_end = j + run_length;
2132  for (;j < run_end; ++j)
2133  {
2134  BM_ASSERT(j < bm::set_block_size);
2135  dst_block[j] |= decoder_.get_32();
2136  }
2137  }
2138  else
2139  {
2140  j += run_length;
2141  }
2142  } // for
2143  }
2144  break;
2145  case set_block_bit_1bit:
2146  case set_block_arrbit:
2147  get_arr_bit(dst_block, false /*don't clear target*/);
2148  break;
2149  default:
2150  BM_ASSERT(0);
2151  } // switch
2152  return count;
2153 }
2154 
2155 template<class DEC>
2156 unsigned
2158  bm::word_t* BMRESTRICT tmp_block)
2159 {
2160  BM_ASSERT(this->state_ == e_bit_block);
2161  BM_ASSERT(dst_block != tmp_block);
2162  unsigned count = 0;
2163  switch (block_type_)
2164  {
2165  case set_block_bit:
2166 
2167 #ifdef BMAVX2OPT
2168  for (unsigned i = 0; i < bm::set_block_size; i+=8)
2169  {
2170  unsigned i0 = decoder_.get_32();
2171  unsigned i1 = decoder_.get_32();
2172  unsigned i2 = decoder_.get_32();
2173  unsigned i3 = decoder_.get_32();
2174  unsigned i4 = decoder_.get_32();
2175  unsigned i5 = decoder_.get_32();
2176  unsigned i6 = decoder_.get_32();
2177  unsigned i7 = decoder_.get_32();
2178 
2179  __m256i ymm0 = _mm256_setr_epi32(i0, i1, i2, i3, i4, i5, i6, i7);
2180  __m256i ymm1 = _mm256_load_si256((__m256i*)(dst_block+i));
2181  ymm0 = _mm256_and_si256(ymm0, ymm1);
2182  _mm256_store_si256((__m256i*)(dst_block+i), ymm0);
2183  }
2184 #else
2185  for (unsigned i = 0; i < bm::set_block_size; i+=4)
2186  {
2187  dst_block[i+0] &= decoder_.get_32();
2188  dst_block[i+1] &= decoder_.get_32();
2189  dst_block[i+2] &= decoder_.get_32();
2190  dst_block[i+3] &= decoder_.get_32();
2191  }
2192 #endif
2193  break;
2194  case set_block_bit_0runs:
2195  {
2196  unsigned char run_type = decoder_.get_8();
2197  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
2198  {
2199  unsigned run_length = decoder_.get_16();
2200 
2201  unsigned run_end = j + run_length;
2202  if (run_type)
2203  {
2204  for (;j < run_end; ++j)
2205  {
2206  BM_ASSERT(j < bm::set_block_size);
2207  dst_block[j] &= decoder_.get_32();
2208  }
2209  }
2210  else
2211  {
2212  for (;j < run_end; ++j)
2213  {
2214  BM_ASSERT(j < bm::set_block_size);
2215  dst_block[j] = 0;
2216  }
2217  }
2218  } // for
2219  }
2220  break;
2222  {
2223  unsigned head_idx = decoder_.get_16();
2224  unsigned tail_idx = decoder_.get_16();
2225  unsigned i;
2226  for ( i = 0; i < head_idx; ++i)
2227  dst_block[i] = 0;
2228  for ( i = head_idx; i <= tail_idx; ++i)
2229  dst_block[i] &= decoder_.get_32();
2230  for ( i = tail_idx + 1; i < bm::set_block_size; ++i)
2231  dst_block[i] = 0;
2232  }
2233  break;
2234  case set_block_bit_1bit:
2235  case set_block_arrbit:
2236  get_arr_bit(tmp_block, true /*clear target*/);
2237  if (dst_block)
2238  bit_block_and(dst_block, tmp_block);
2239  break;
2240  default:
2241  BM_ASSERT(0);
2242  } // switch
2243  return count;
2244 }
2245 
2246 template<class DEC>
2247 unsigned
2249  bm::word_t* tmp_block)
2250 {
2251  BM_ASSERT(this->state_ == e_bit_block);
2252  BM_ASSERT(dst_block != tmp_block);
2253 
2254  unsigned count = 0;
2255  switch (block_type_)
2256  {
2257  case set_block_bit:
2258  for (unsigned i = 0; i < bm::set_block_size; ++i)
2259  dst_block[i] ^= decoder_.get_32();
2260  break;
2261  case set_block_bit_0runs:
2262  {
2263  unsigned char run_type = decoder_.get_8();
2264  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
2265  {
2266  unsigned run_length = decoder_.get_16();
2267  if (run_type)
2268  {
2269  unsigned run_end = j + run_length;
2270  for (;j < run_end; ++j)
2271  {
2272  BM_ASSERT(j < bm::set_block_size);
2273  dst_block[j] ^= decoder_.get_32();
2274  }
2275  }
2276  else
2277  {
2278  j += run_length;
2279  }
2280  } // for
2281  }
2282  break;
2284  {
2285  unsigned head_idx = decoder_.get_16();
2286  unsigned tail_idx = decoder_.get_16();
2287  for (unsigned i = head_idx; i <= tail_idx; ++i)
2288  dst_block[i] ^= decoder_.get_32();
2289  }
2290  break;
2291  case set_block_bit_1bit:
2292  // TODO: optimization
2293  case set_block_arrbit:
2294  get_arr_bit(tmp_block, true /*clear target*/);
2295  if (dst_block)
2296  {
2297  bit_block_xor(dst_block, tmp_block);
2298  }
2299  break;
2300  default:
2301  BM_ASSERT(0);
2302  } // switch
2303  return count;
2304 }
2305 
2306 template<class DEC>
2307 unsigned
2309  bm::word_t* tmp_block)
2310 {
2311  BM_ASSERT(this->state_ == e_bit_block);
2312  BM_ASSERT(dst_block != tmp_block);
2313 
2314  unsigned count = 0;
2315  switch (block_type_)
2316  {
2317  case set_block_bit:
2318  for (unsigned i = 0; i < bm::set_block_size; ++i)
2319  dst_block[i] &= ~decoder_.get_32();
2320  break;
2321  case set_block_bit_0runs:
2322  {
2323  unsigned char run_type = decoder_.get_8();
2324  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
2325  {
2326  unsigned run_length = decoder_.get_16();
2327  if (run_type)
2328  {
2329  unsigned run_end = j + run_length;
2330  for (;j < run_end; ++j)
2331  {
2332  BM_ASSERT(j < bm::set_block_size);
2333  dst_block[j] &= ~decoder_.get_32();
2334  }
2335  }
2336  else
2337  {
2338  j += run_length;
2339  }
2340  } // for
2341  }
2342  break;
2344  {
2345  unsigned head_idx = decoder_.get_16();
2346  unsigned tail_idx = decoder_.get_16();
2347  for (unsigned i = head_idx; i <= tail_idx; ++i)
2348  dst_block[i] &= ~decoder_.get_32();
2349  }
2350  break;
2351  case set_block_bit_1bit:
2352  // TODO: optimization
2353  case set_block_arrbit:
2354  get_arr_bit(tmp_block, true /*clear target*/);
2355  if (dst_block)
2356  bit_block_sub(dst_block, tmp_block);
2357  break;
2358  default:
2359  BM_ASSERT(0);
2360  } // switch
2361  return count;
2362 }
2363 
2364 
2365 template<class DEC>
2366 unsigned
2368  bm::word_t* /*tmp_block*/)
2369 {
2370  BM_ASSERT(this->state_ == e_bit_block);
2371 
2372  unsigned count = 0;
2373  switch (block_type_)
2374  {
2375  case set_block_bit:
2376  for (unsigned i = 0; i < bm::set_block_size; ++i)
2377  count += word_bitcount(decoder_.get_32());
2378  break;
2379  case set_block_bit_0runs:
2380  {
2381  //count = 0;
2382  unsigned char run_type = decoder_.get_8();
2383  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
2384  {
2385  unsigned run_length = decoder_.get_16();
2386  if (run_type)
2387  {
2388  unsigned run_end = j + run_length;
2389  for (;j < run_end; ++j)
2390  {
2391  count += word_bitcount(decoder_.get_32());
2392  }
2393  }
2394  else
2395  {
2396  j += run_length;
2397  }
2398  } // for
2399  return count;
2400  }
2402  {
2403  unsigned head_idx = decoder_.get_16();
2404  unsigned tail_idx = decoder_.get_16();
2405  for (unsigned i = head_idx; i <= tail_idx; ++i)
2406  count += word_bitcount(decoder_.get_32());
2407  }
2408  break;
2409  case set_block_arrbit:
2410  count += get_arr_bit(0);
2411  break;
2412  case set_block_bit_1bit:
2413  ++count;
2414  decoder_.get_16();
2415  break;
2416  default:
2417  BM_ASSERT(0);
2418  } // switch
2419  return count;
2420 }
2421 
2422 template<class DEC>
2423 unsigned
2425  bm::word_t* /*tmp_block*/)
2426 {
2427  BM_ASSERT(this->state_ == e_bit_block);
2428  unsigned count = 0;
2429  if (dst_block)
2430  {
2431  // count the block bitcount
2432  count =
2433  bit_block_calc_count(dst_block,
2434  dst_block + bm::set_block_size);
2435  }
2436 
2437  switch (block_type_)
2438  {
2439  case set_block_bit:
2440  decoder_.get_32(0, bm::set_block_size);
2441  break;
2442  case set_block_bit_0runs:
2443  {
2444  unsigned char run_type = decoder_.get_8();
2445  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
2446  {
2447  unsigned run_length = decoder_.get_16();
2448  if (run_type)
2449  {
2450  unsigned run_end = j + run_length;
2451  for (;j < run_end; ++j)
2452  {
2453  decoder_.get_32();
2454  }
2455  }
2456  else
2457  {
2458  j += run_length;
2459  }
2460  } // for
2461  }
2462  break;
2463 
2465  {
2466  unsigned head_idx = decoder_.get_16();
2467  unsigned tail_idx = decoder_.get_16();
2468  for (unsigned i = head_idx; i <= tail_idx; ++i)
2469  decoder_.get_32();
2470  }
2471  break;
2472  case set_block_arrbit:
2473  get_arr_bit(0);
2474  break;
2475  case set_block_bit_1bit:
2476  decoder_.get_16();
2477  break;
2478  default:
2479  BM_ASSERT(0);
2480  } // switch
2481  return count;
2482 }
2483 
2484 
2485 template<class DEC>
2486 unsigned
2488  bm::word_t* tmp_block)
2489 {
2490  BM_ASSERT(this->state_ == e_bit_block);
2491  BM_ASSERT(dst_block);
2492 
2493  unsigned count = 0;
2494  switch (block_type_)
2495  {
2496  case set_block_bit:
2497  for (unsigned i = 0; i < bm::set_block_size; ++i)
2498  count += word_bitcount(dst_block[i] & decoder_.get_32());
2499  break;
2500  case set_block_bit_0runs:
2501  {
2502  //count = 0;
2503  unsigned char run_type = decoder_.get_8();
2504  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
2505  {
2506  unsigned run_length = decoder_.get_16();
2507  if (run_type)
2508  {
2509  unsigned run_end = j + run_length;
2510  for (;j < run_end; ++j)
2511  {
2512  count += word_bitcount(dst_block[j] & decoder_.get_32());
2513  }
2514  }
2515  else
2516  {
2517  j += run_length;
2518  }
2519  } // for
2520  return count;
2521  }
2523  {
2524  unsigned head_idx = decoder_.get_16();
2525  unsigned tail_idx = decoder_.get_16();
2526  for (unsigned i = head_idx; i <= tail_idx; ++i)
2527  count += word_bitcount(dst_block[i] & decoder_.get_32());
2528  }
2529  break;
2530  case set_block_bit_1bit:
2531  // TODO: optimization
2532  case set_block_arrbit:
2533  get_arr_bit(tmp_block, true /*clear target*/);
2534  count +=
2535  bit_operation_and_count(dst_block, dst_block + bm::set_block_size,
2536  tmp_block);
2537  break;
2538  default:
2539  BM_ASSERT(0);
2540  } // switch
2541  return count;
2542 }
2543 
2544 template<class DEC>
2545 unsigned
2547  bm::word_t* tmp_block)
2548 {
2549  BM_ASSERT(this->state_ == e_bit_block);
2550  BM_ASSERT(dst_block);
2551 
2552  bitblock_sum_adapter count_adapter;
2553  switch (block_type_)
2554  {
2555  case set_block_bit:
2556  {
2557  bitblock_get_adapter ga(dst_block);
2559 
2560  bit_recomb(ga,
2561  decoder_,
2562  func,
2563  count_adapter
2564  );
2565  }
2566  break;
2567  case set_block_bit_0runs:
2568  {
2569  unsigned count = 0;
2570  unsigned char run_type = decoder_.get_8();
2571  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
2572  {
2573  unsigned run_length = decoder_.get_16();
2574  unsigned run_end = j + run_length;
2575  if (run_type)
2576  {
2577  for (;j < run_end; ++j)
2578  {
2579  BM_ASSERT(j < bm::set_block_size);
2580  count += word_bitcount(dst_block[j] | decoder_.get_32());
2581  }
2582  }
2583  else
2584  {
2585  for (;j < run_end; ++j)
2586  {
2587  BM_ASSERT(j < bm::set_block_size);
2588  count += word_bitcount(dst_block[j]);
2589  }
2590  }
2591  } // for
2592  return count;
2593  }
2595  {
2596  unsigned head_idx = decoder_.get_16();
2597  unsigned tail_idx = decoder_.get_16();
2598  unsigned count = 0;
2599  unsigned i;
2600  for (i = 0; i < head_idx; ++i)
2601  count += word_bitcount(dst_block[i]);
2602  for (i = head_idx; i <= tail_idx; ++i)
2603  count += word_bitcount(dst_block[i] | decoder_.get_32());
2604  for (i = tail_idx + 1; i < bm::set_block_size; ++i)
2605  count += word_bitcount(dst_block[i]);
2606  return count;
2607  }
2608  case set_block_bit_1bit:
2609  // TODO: optimization
2610  case set_block_arrbit:
2611  get_arr_bit(tmp_block, true /* clear target*/);
2612  return
2613  bit_operation_or_count(dst_block,
2614  dst_block + bm::set_block_size,
2615  tmp_block);
2616  default:
2617  BM_ASSERT(0);
2618  } // switch
2619  return count_adapter.sum();
2620 }
2621 
2622 template<class DEC>
2623 unsigned
2625  bm::word_t* tmp_block)
2626 {
2627  BM_ASSERT(this->state_ == e_bit_block);
2628  BM_ASSERT(dst_block);
2629 
2630  bitblock_sum_adapter count_adapter;
2631  switch (block_type_)
2632  {
2633  case set_block_bit:
2634  {
2635  bitblock_get_adapter ga(dst_block);
2637 
2638  bit_recomb(ga,
2639  decoder_,
2640  func,
2641  count_adapter
2642  );
2643  }
2644  break;
2645  case set_block_bit_0runs:
2646  {
2647  unsigned count = 0;
2648  unsigned char run_type = decoder_.get_8();
2649  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
2650  {
2651  unsigned run_length = decoder_.get_16();
2652  unsigned run_end = j + run_length;
2653  if (run_type)
2654  {
2655  for (;j < run_end; ++j)
2656  {
2657  BM_ASSERT(j < bm::set_block_size);
2658  count += word_bitcount(dst_block[j] ^ decoder_.get_32());
2659  }
2660  }
2661  else
2662  {
2663  for (;j < run_end; ++j)
2664  {
2665  BM_ASSERT(j < bm::set_block_size);
2666  count += word_bitcount(dst_block[j]);
2667  }
2668  }
2669  } // for
2670  return count;
2671  }
2673  {
2674  unsigned head_idx = decoder_.get_16();
2675  unsigned tail_idx = decoder_.get_16();
2676  unsigned count = 0;
2677  unsigned i;
2678  for (i = 0; i < head_idx; ++i)
2679  count += word_bitcount(dst_block[i]);
2680  for (i = head_idx; i <= tail_idx; ++i)
2681  count += word_bitcount(dst_block[i] ^ decoder_.get_32());
2682  for (i = tail_idx + 1; i < bm::set_block_size; ++i)
2683  count += word_bitcount(dst_block[i]);
2684  return count;
2685  }
2686  case set_block_bit_1bit:
2687  // TODO: optimization
2688  case set_block_arrbit:
2689  get_arr_bit(tmp_block, true /* clear target*/);
2690  return
2691  bit_operation_xor_count(dst_block,
2692  dst_block + bm::set_block_size,
2693  tmp_block);
2694  default:
2695  BM_ASSERT(0);
2696  } // switch
2697  return count_adapter.sum();
2698 }
2699 
2700 template<class DEC>
2701 unsigned
2703  bm::word_t* tmp_block)
2704 {
2705  BM_ASSERT(this->state_ == e_bit_block);
2706  BM_ASSERT(dst_block);
2707 
2708  bitblock_sum_adapter count_adapter;
2709  switch (block_type_)
2710  {
2711  case set_block_bit:
2712  {
2713  bitblock_get_adapter ga(dst_block);
2715 
2716  bit_recomb(ga,
2717  decoder_,
2718  func,
2719  count_adapter
2720  );
2721  }
2722  break;
2723  case set_block_bit_0runs:
2724  {
2725  unsigned count = 0;
2726  unsigned char run_type = decoder_.get_8();
2727  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
2728  {
2729  unsigned run_length = decoder_.get_16();
2730  unsigned run_end = j + run_length;
2731  if (run_type)
2732  {
2733  for (;j < run_end; ++j)
2734  {
2735  BM_ASSERT(j < bm::set_block_size);
2736  count += word_bitcount(dst_block[j] & ~decoder_.get_32());
2737  }
2738  }
2739  else
2740  {
2741  for (;j < run_end; ++j)
2742  {
2743  BM_ASSERT(j < bm::set_block_size);
2744  count += word_bitcount(dst_block[j]);
2745  }
2746  }
2747  } // for
2748  return count;
2749  }
2751  {
2752  unsigned head_idx = decoder_.get_16();
2753  unsigned tail_idx = decoder_.get_16();
2754  unsigned count = 0;
2755  unsigned i;
2756  for (i = 0; i < head_idx; ++i)
2757  count += word_bitcount(dst_block[i]);
2758  for (i = head_idx; i <= tail_idx; ++i)
2759  count += word_bitcount(dst_block[i] & (~decoder_.get_32()));
2760  for (i = tail_idx + 1; i < bm::set_block_size; ++i)
2761  count += word_bitcount(dst_block[i]);
2762  return count;
2763  }
2764  break;
2765  case set_block_bit_1bit:
2766  //TODO: optimization
2767  case set_block_arrbit:
2768  get_arr_bit(tmp_block, true /* clear target*/);
2769  return
2770  bit_operation_sub_count(dst_block,
2771  dst_block + bm::set_block_size,
2772  tmp_block);
2773  default:
2774  BM_ASSERT(0);
2775  } // switch
2776  return count_adapter.sum();
2777 }
2778 
2779 template<class DEC>
2780 unsigned
2782  bm::word_t* tmp_block)
2783 {
2784  BM_ASSERT(this->state_ == e_bit_block);
2785  BM_ASSERT(dst_block);
2786 
2787  bitblock_sum_adapter count_adapter;
2788  switch (block_type_)
2789  {
2790  case set_block_bit:
2791  {
2792  bitblock_get_adapter ga(dst_block);
2794 
2795  bit_recomb(ga,
2796  decoder_,
2797  func,
2798  count_adapter
2799  );
2800  }
2801  break;
2802  case set_block_bit_0runs:
2803  {
2804  unsigned count = 0;
2805  unsigned char run_type = decoder_.get_8();
2806  for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
2807  {
2808  unsigned run_length = decoder_.get_16();
2809  unsigned run_end = j + run_length;
2810  if (run_type)
2811  {
2812  for (;j < run_end; ++j)
2813  {
2814  BM_ASSERT(j < bm::set_block_size);
2815  count += word_bitcount(decoder_.get_32() & (~dst_block[j]));
2816  }
2817  }
2818  else
2819  {
2820  j += run_length;
2821  }
2822  } // for
2823  return count;
2824  }
2825 
2827  {
2828  unsigned head_idx = decoder_.get_16();
2829  unsigned tail_idx = decoder_.get_16();
2830  unsigned count = 0;
2831  unsigned i;
2832  for (i = head_idx; i <= tail_idx; ++i)
2833  count += word_bitcount(decoder_.get_32() & (~dst_block[i]));
2834  return count;
2835  }
2836  break;
2837  case set_block_bit_1bit:
2838  // TODO: optimization
2839  case set_block_arrbit:
2840  get_arr_bit(tmp_block, true /* clear target*/);
2841  return
2842  bit_operation_sub_count(tmp_block,
2843  tmp_block + bm::set_block_size,
2844  dst_block);
2845  default:
2846  BM_ASSERT(0);
2847  } // switch
2848  return count_adapter.sum();
2849 }
2850 
2851 
2852 
2853 template<class DEC>
2855  bool clear_target)
2856 {
2857  BM_ASSERT(this->block_type_ == set_block_arrbit ||
2858  this->block_type_ == set_block_bit_1bit);
2859 
2860  gap_word_t len = decoder_.get_16(); // array length / 1bit_idx
2861  if (dst_block)
2862  {
2863  if (clear_target)
2864  bit_block_set(dst_block, 0);
2865 
2866  if (this->block_type_ == set_block_bit_1bit)
2867  {
2868  // len contains idx of 1 bit set
2869  set_bit(dst_block, len);
2870  return 1;
2871  }
2872 
2873  for (unsigned k = 0; k < len; ++k)
2874  {
2875  gap_word_t bit_idx = decoder_.get_16();
2876  set_bit(dst_block, bit_idx);
2877  }
2878  }
2879  else
2880  {
2881  if (this->block_type_ == set_block_bit_1bit)
2882  {
2883  return 1; // nothing to do: len var already consumed 16bits
2884  }
2885  // fwd the decocing stream
2886  decoder_.seek(len * 2);
2887  }
2888  return len;
2889 }
2890 
2891 template<class DEC>
2893 {
2894  BM_ASSERT(this->block_type_ == set_block_bit_1bit);
2895  ++(this->block_idx_);
2896  this->state_ = e_blocks;
2897 
2898  return decoder_.get_16(); // 1bit_idx
2899 }
2900 
2901 template<class DEC>
2902 void
2904 {
2905  BM_ASSERT(this->state_ == e_gap_block ||
2906  this->block_type_ == set_block_bit_1bit);
2907  BM_ASSERT(dst_block);
2908 
2909  this->read_gap_block(this->decoder_,
2910  this->block_type_,
2911  dst_block,
2912  this->gap_head_);
2913 
2914  ++(this->block_idx_);
2915  this->state_ = e_blocks;
2916 }
2917 
2918 
2919 template<class DEC>
2920 unsigned
2922  bm::word_t* tmp_block,
2923  set_operation op)
2924 {
2925  BM_ASSERT(this->state_ == e_bit_block);
2926  get_bit_func_type bit_func = bit_func_table_[op];
2927  BM_ASSERT(bit_func);
2928  unsigned cnt = ((*this).*(bit_func))(dst_block, tmp_block);
2929  this->state_ = e_blocks;
2930  ++(this->block_idx_);
2931  return cnt;
2932 }
2933 
2934 
2935 
2936 template<class BV>
2938  bvector_type& bv,
2939  const unsigned char* buf,
2940  bm::word_t* temp_block,
2941  set_operation op,
2942  bool exit_on_one
2943  )
2944 {
2945  ByteOrder bo_current = globals<true>::byte_order();
2946 
2947  bm::decoder dec(buf);
2948  unsigned char header_flag = dec.get_8();
2949  ByteOrder bo = bo_current;
2950  if (!(header_flag & BM_HM_NO_BO))
2951  {
2952  bo = (bm::ByteOrder) dec.get_8();
2953  }
2954 
2955  blocks_manager_type& bman = bv.get_blocks_manager();
2956  bit_block_guard<blocks_manager_type> bg(bman);
2957  if (temp_block == 0)
2958  {
2959  temp_block = bg.allocate();
2960  }
2961 
2962  if (bo_current == bo)
2963  {
2964  serial_stream_current ss(buf);
2965  return
2967  deserialize(bv, ss, temp_block, op, exit_on_one);
2968  }
2969  switch (bo_current)
2970  {
2971  case BigEndian:
2972  {
2973  serial_stream_be ss(buf);
2974  return
2976  deserialize(bv, ss, temp_block, op, exit_on_one);
2977  }
2978  case LittleEndian:
2979  {
2980  serial_stream_le ss(buf);
2981  return
2983  deserialize(bv, ss, temp_block, op, exit_on_one);
2984  }
2985  default:
2986  BM_ASSERT(0);
2987  };
2988  return 0;
2989 }
2990 
2991 
2992 template<class BV>
2994  bvector_type& bv_target,
2995  const bvector_type& bv_mask,
2996  const unsigned char* buf,
2997  bm::word_t* temp_block,
2998  set_operation op)
2999 {
3000  ByteOrder bo_current = globals<true>::byte_order();
3001 
3002  bm::decoder dec(buf);
3003  unsigned char header_flag = dec.get_8();
3004  ByteOrder bo = bo_current;
3005  if (!(header_flag & BM_HM_NO_BO))
3006  {
3007  bo = (bm::ByteOrder) dec.get_8();
3008  }
3009 
3010  blocks_manager_type& bman = bv_target.get_blocks_manager();
3011  if (!bman.is_init())
3012  {
3013  bman.init_tree();
3014  }
3015  bit_block_guard<blocks_manager_type> bg(bman);
3016  if (temp_block == 0)
3017  {
3018  temp_block = bg.allocate();
3019  }
3020 
3021  if (bo_current == bo)
3022  {
3023  serial_stream_current ss(buf);
3025  deserialize(bv_target, bv_mask, ss, temp_block, op);
3026  return;
3027  }
3028  switch (bo_current)
3029  {
3030  case BigEndian:
3031  {
3032  serial_stream_be ss(buf);
3034  deserialize(bv_target, bv_mask, ss, temp_block, op);
3035  }
3036  case LittleEndian:
3037  {
3038  serial_stream_le ss(buf);
3040  deserialize(bv_target, bv_mask, ss, temp_block, op);
3041  }
3042  default:
3043  BM_ASSERT(0);
3044  };
3045 }
3046 
3047 
3048 template<class BV, class SerialIterator>
3050  bvector_type& bv,
3051  serial_iterator_type& sit,
3052  unsigned id_count,
3053  bool set_clear)
3054 {
3055  const unsigned win_size = 64;
3056  bm::id_t id_buffer[win_size+1];
3057 
3058  if (set_clear) // set bits
3059  {
3060  for (unsigned i = 0; i <= id_count;)
3061  {
3062  unsigned j;
3063  for (j = 0; j < win_size && i <= id_count; ++j, ++i)
3064  {
3065  id_buffer[j] = sit.get_id();
3066  sit.next();
3067  } // for j
3068  bm::combine_or(bv, id_buffer, id_buffer + j);
3069  } // for i
3070  }
3071  else // clear bits
3072  {
3073  for (unsigned i = 0; i <= id_count;)
3074  {
3075  unsigned j;
3076  for (j = 0; j < win_size && i <= id_count; ++j, ++i)
3077  {
3078  id_buffer[j] = sit.get_id();
3079  sit.next();
3080  } // for j
3081  bm::combine_sub(bv, id_buffer, id_buffer + j);
3082  } // for i
3083  }
3084 }
3085 
3086 template<class BV, class SerialIterator>
3087 unsigned
3089  blocks_manager_type& bman,
3090  set_operation op,
3091  unsigned bv_block_idx)
3092 {
3093  unsigned count = 0;
3094  switch (op)
3095  {
3096  case set_OR: case set_SUB: case set_XOR:
3097  case set_COUNT: case set_COUNT_B: case set_COUNT_AND:
3098  case set_COUNT_SUB_BA:
3099  // nothing to do
3100  break;
3101  case set_AND: case set_ASSIGN:
3102  // clear the rest of the target vector
3103  {
3104  unsigned i, j;
3105  bman.get_block_coord(bv_block_idx, &i, &j);
3106  bm::word_t*** blk_root = bman.top_blocks_root();
3107  unsigned effective_top_size =
3108  bman.effective_top_block_size();
3109  for (;i < effective_top_size; ++i)
3110  {
3111  bm::word_t** blk_blk = blk_root[i];
3112  if (blk_blk == 0)
3113  {
3114  bv_block_idx+=bm::set_array_size-j;
3115  j = 0;
3116  continue;
3117  }
3118  for (;j < bm::set_array_size; ++j, ++bv_block_idx)
3119  {
3120  if (blk_blk[j])
3121  bman.zero_block(bv_block_idx);
3122  } // for j
3123  j = 0;
3124  } // for i
3125 
3126  }
3127  break;
3128  case set_COUNT_A: case set_COUNT_OR: case set_COUNT_XOR:
3129  case set_COUNT_SUB_AB:
3130  // count bits in the target vector
3131  {
3132  unsigned i, j;
3133  bman.get_block_coord(bv_block_idx, &i, &j);
3134  bm::word_t*** blk_root = bman.top_blocks_root();
3135  unsigned effective_top_size =
3136  bman.effective_top_block_size();
3137  for (;i < effective_top_size; ++i)
3138  {
3139  bm::word_t** blk_blk = blk_root[i];
3140  if (blk_blk == 0)
3141  {
3142  bv_block_idx+=bm::set_array_size-j;
3143  j = 0;
3144  continue;
3145  }
3146  for (;j < bm::set_array_size; ++j, ++bv_block_idx)
3147  {
3148  if (blk_blk[j])
3149  count += bman.block_bitcount(blk_blk[j]);//, bv_block_idx);
3150  } // for j
3151  j = 0;
3152  } // for i
3153 
3154  }
3155  break;
3156  default:
3157  BM_ASSERT(0);
3158  }
3159  return count;
3160 }
3161 
3162 template<class BV, class SerialIterator>
3163 unsigned
3165  bvector_type& bv,
3166  serial_iterator_type& sit,
3167  set_operation op)
3168 {
3169  unsigned count = 0;
3170  unsigned id_count = sit.get_id_count();
3171  bool set_clear = true;
3172  switch (op)
3173  {
3174  case set_AND:
3175  {
3176  // TODO: use some more optimal algorithm without temp vector
3177  BV bv_tmp(BM_GAP);
3178  load_id_list(bv_tmp, sit, id_count, true);
3179  bv &= bv_tmp;
3180  }
3181  break;
3182  case set_ASSIGN:
3183  bv.clear(true);
3184  // intentional case fall through here (not a bug)
3185  case set_OR:
3186  set_clear = true;
3187  // fall to SUB
3188  case set_SUB:
3189  load_id_list(bv, sit, id_count, set_clear);
3190  break;
3191  case set_XOR:
3192  for (unsigned i = 0; i < id_count; ++i)
3193  {
3194  bm::id_t id = sit.get_id();
3195  bv[id] ^= true;
3196  sit.next();
3197  } // for
3198  break;
3199  case set_COUNT: case set_COUNT_B:
3200  for (unsigned i = 0; i < id_count; ++i)
3201  {
3202  /* bm::id_t id = */ sit.get_id();
3203  ++count;
3204  sit.next();
3205  } // for
3206  break;
3207  case set_COUNT_A:
3208  return bv.count();
3209  case set_COUNT_AND:
3210  for (unsigned i = 0; i < id_count; ++i)
3211  {
3212  bm::id_t id = sit.get_id();
3213  count += bv.get_bit(id);
3214  sit.next();
3215  } // for
3216  break;
3217  case set_COUNT_XOR:
3218  {
3219  // TODO: get rid of the temp vector
3220  BV bv_tmp(BM_GAP);
3221  load_id_list(bv_tmp, sit, id_count, true);
3222  count += count_xor(bv, bv_tmp);
3223  }
3224  break;
3225  case set_COUNT_OR:
3226  {
3227  // TODO: get rid of the temp. vector
3228  BV bv_tmp(BM_GAP);
3229  load_id_list(bv_tmp, sit, id_count, true);
3230  count += count_or(bv, bv_tmp);
3231  }
3232  break;
3233  case set_COUNT_SUB_AB:
3234  {
3235  // TODO: get rid of the temp. vector
3236  BV bv_tmp(bv);
3237  load_id_list(bv_tmp, sit, id_count, false);
3238  count += bv_tmp.count();
3239  }
3240  break;
3241  default:
3242  BM_ASSERT(0);
3243  } // switch
3244 
3245  return count;
3246 }
3247 
3248 
3249 template<class BV, class SerialIterator>
3251  bvector_type& bv_target,
3252  const bvector_type& bv_mask,
3253  serial_iterator_type& sit,
3254  bm::word_t* temp_block,
3255  set_operation op)
3256 {
3257  BM_ASSERT(temp_block);
3258  BM_ASSERT(op == bm::set_AND ||
3259  op == bm::set_OR || op == bm::set_XOR || op == bm::set_SUB);
3260 
3261  gap_word_t gap_temp_block[bm::gap_equiv_len*3];
3262  gap_temp_block[0] = 0;
3263 
3264  bv_target.clear(true); // clear and free the memory
3265 
3266  const blocks_manager_type& bman_mask = bv_mask.get_blocks_manager();
3267  blocks_manager_type& bman_target = bv_target.get_blocks_manager();
3268 
3269  if (!bman_target.is_init())
3270  {
3271  bman_target.init_tree();
3272  }
3273 
3274  unsigned bv_size = sit.bv_size();
3275  if (bv_mask.size() > bv_size)
3276  {
3277  bv_size = bv_mask.size();
3278  }
3279  if (bv_target.size() < bv_size)
3280  {
3281  bv_target.resize(bv_size);
3282  }
3283 
3284  unsigned top_blocks = bman_mask.effective_top_block_size();
3285 
3287 
3288  typename serial_iterator_type::iterator_state state;
3289  state = sit.get_state();
3290  if (state == serial_iterator_type::e_list_ids)
3291  {
3292  bv_target = bv_mask;
3293  process_id_list(bv_target, sit, op);
3294  return;
3295  }
3296 
3297  unsigned bv_block_idx = 0;
3298  for (;1;)
3299  {
3300  bv_block_idx = sit.block_idx();
3301  // early exit check to avoid over-scan
3302  {
3303  unsigned tb_idx = bv_block_idx >> bm::set_array_shift; // current top block
3304  if (tb_idx > top_blocks)
3305  {
3306  if (op == bm::set_AND)
3307  break;
3308  if (sit.is_eof())
3309  break;
3310  }
3311  }
3312 
3313  if (sit.is_eof()) // argument stream ended
3314  {
3315  if (op == bm::set_AND)
3316  break;
3317  // (OR, SUB, XOR) need to scan fwd until mask vector ends
3318  state = serial_iterator_type::e_zero_blocks;
3319  }
3320  else
3321  {
3322  state = sit.state();
3323  }
3324 
3325  switch (state)
3326  {
3327  case serial_iterator_type::e_blocks:
3328  sit.next();
3329  continue;
3330  case serial_iterator_type::e_bit_block:
3331  {
3332  bm::set_operation sop = op;
3333  const bm::word_t* blk_mask = bman_mask.get_block(bv_block_idx);
3334  bm::word_t* blk_target = 0;
3335  if (!blk_mask)
3336  {
3337  switch (op)
3338  {
3339  case set_AND: case set_SUB:
3340  // first arg is 0, so the operation gives us zero
3341  // all we need to do is to seek the input stream
3342  sop = set_ASSIGN;
3343  break;
3344  case set_OR: case set_XOR:
3345  blk_target = bman_target.make_bit_block(bv_block_idx);
3346  break;
3347  default:
3348  BM_ASSERT(0);
3349  }
3350  }
3351  else // block exists
3352  {
3353  int is_gap = BM_IS_GAP(blk_mask);
3354  blk_target = bman_target.copy_bit_block(bv_block_idx, blk_mask, is_gap);
3355  }
3356 
3357  // 2 bit blocks recombination
3358  sit.get_bit_block(blk_target, temp_block, sop);
3359  }
3360  break;
3361 
3362  case serial_iterator_type::e_zero_blocks:
3363  {
3364  if (op == set_AND)
3365  {
3366  sit.skip_mono_blocks();
3367  break;
3368  }
3369  sit.next();
3370  // set_SUB: set_OR: set_XOR:
3371  bman_target.copy_block(bv_block_idx, bman_mask);
3372  }
3373  break;
3374 
3375  case serial_iterator_type::e_one_blocks:
3376  {
3377  BM_ASSERT(bv_block_idx == sit.block_idx());
3378  const bm::word_t* blk_mask = bman_mask.get_block(bv_block_idx);
3379  sit.next();
3380 
3381  switch (op)
3382  {
3383  case set_OR:
3384  bman_target.set_block_all_set(bv_block_idx);
3385  break;
3386  case set_SUB:
3387  break;
3388  case set_AND:
3389  bman_target.copy_block(bv_block_idx, bman_mask);
3390  break;
3391  case set_XOR:
3392  if (blk_mask)
3393  {
3394  int is_gap = BM_IS_GAP(blk_mask);
3395  bm::word_t* blk_target =
3396  bman_target.copy_bit_block(bv_block_idx, blk_mask, is_gap);
3397  bit_block_xor(blk_target, FULL_BLOCK_REAL_ADDR);
3398  }
3399  else
3400  {
3401  // 0 XOR 1 = 1
3402  bman_target.set_block_all_set(bv_block_idx);
3403  }
3404  break;
3405  default:
3406  BM_ASSERT(0);
3407  } // switch
3408 
3409  }
3410  break;
3411 
3412  case serial_iterator_type::e_gap_block:
3413  {
3414  // Single bit-in-block optimization
3415  if (sit.get_block_type() == set_block_bit_1bit)
3416  {
3417  if (op == set_AND)
3418  {
3419  unsigned bit_idx = sit.get_bit();
3420  unsigned bn = (bv_block_idx << bm::set_block_shift) | bit_idx;
3421  bool bval_mask = bv_mask.test(bn);
3422  bv_target.set_bit(bn, bval_mask);
3423  break;
3424  }
3425  }
3426 
3427  const bm::word_t* blk_mask = bman_mask.get_block(bv_block_idx);
3428 
3429  sit.get_gap_block(gap_temp_block);
3430  // gap_word_t gap_head = gap_temp_block[0];
3431 
3432  unsigned len = gap_length(gap_temp_block);
3433  int level = gap_calc_level(len, bman_target.glen());
3434  --len;
3435 
3436 
3437  if (!blk_mask)
3438  {
3439  if (op == set_OR || op == set_XOR)
3440  {
3441  bman_target.set_gap_block(bv_block_idx, gap_temp_block, level);
3442  }
3443  }
3444  else // mask block exists
3445  {
3446  bm::operation bop = bm::setop2op(op);
3447  bman_target.copy_block(bv_block_idx, bman_mask);
3448  bv_target.combine_operation_with_block(
3449  bv_block_idx,
3450  (bm::word_t*)gap_temp_block,
3451  1, // GAP
3452  bop);
3453  }
3454 
3455  }
3456  break;
3457 
3458  default:
3459  BM_ASSERT(0);
3460  } // switch
3461 
3462 
3463  } // for (deserialization)
3464 
3465  bv_target.forget_count();
3466 }
3467 
3468 
3469 
3470 template<class BV, class SerialIterator>
3471 unsigned
3473  bvector_type& bv,
3474  serial_iterator_type& sit,
3475  bm::word_t* temp_block,
3476  set_operation op,
3477  bool exit_on_one)
3478 {
3479  BM_ASSERT(temp_block);
3480 
3481  unsigned count = 0;
3482  gap_word_t gap_temp_block[bm::gap_equiv_len*3];
3483  gap_temp_block[0] = 0;
3484 
3485  blocks_manager_type& bman = bv.get_blocks_manager();
3486  if (!bman.is_init())
3487  {
3488  bman.init_tree();
3489  }
3490 
3491  bv.forget_count();
3492  if (sit.bv_size() && (sit.bv_size() > bv.size()))
3493  {
3494  bv.resize(sit.bv_size());
3495  }
3496 
3498 
3499  typename serial_iterator_type::iterator_state state;
3500  state = sit.get_state();
3501  if (state == serial_iterator_type::e_list_ids)
3502  {
3503  count = process_id_list(bv, sit, op);
3504  return count;
3505  }
3506 
3507  unsigned bv_block_idx = 0;
3508 
3509  for (;1;)
3510  {
3511  bm::set_operation sop = op;
3512  if (sit.is_eof()) // argument stream ended
3513  {
3514  count += finalize_target_vector(bman, op, bv_block_idx);
3515  return count;
3516  }
3517 
3518  state = sit.state();
3519  switch (state)
3520  {
3521  case serial_iterator_type::e_blocks:
3522  sit.next();
3523  continue;
3524  case serial_iterator_type::e_bit_block:
3525  {
3526  BM_ASSERT(sit.block_idx() == bv_block_idx);
3527  bm::word_t* blk = bman.get_block(bv_block_idx);
3528 
3529  if (!blk)
3530  {
3531  switch (op)
3532  {
3533  case set_AND: case set_SUB: case set_COUNT_AND:
3534  case set_COUNT_SUB_AB: case set_COUNT_A:
3535  // one arg is 0, so the operation gives us zero
3536  // all we need to do is to seek the input stream
3537  sop = set_ASSIGN;
3538  break;
3539 
3540  case set_OR: case set_XOR: case set_ASSIGN:
3541  blk = bman.make_bit_block(bv_block_idx);
3542  break;
3543 
3544  case set_COUNT: case set_COUNT_XOR: case set_COUNT_OR:
3545  case set_COUNT_SUB_BA: case set_COUNT_B:
3546  // first arg is not required (should work as is)
3547  sop = set_COUNT;
3548  break;
3549 
3550  case set_END:
3551  default:
3552  BM_ASSERT(0);
3553  }
3554  }
3555  else // block exists
3556  {
3557  int is_gap = BM_IS_GAP(blk);
3558  if (is_gap || IS_FULL_BLOCK(blk))
3559  {
3560  if (IS_FULL_BLOCK(blk) && is_const_set_operation(op))
3561  {
3562  }
3563  else
3564  {
3565  // TODO: make sure const operations do not
3566  // deoptimize GAP blocks
3567  blk = bman.deoptimize_block(bv_block_idx);
3568  }
3569  }
3570  }
3571 
3572  // 2 bit blocks recombination
3573  unsigned c = sit.get_bit_block(blk, temp_block, sop);
3574  count += c;
3575  if (exit_on_one && count) // early exit
3576  return count;
3577 
3578  }
3579  break;
3580 
3581  case serial_iterator_type::e_zero_blocks:
3582  {
3583  BM_ASSERT(bv_block_idx == sit.block_idx());
3584  bm::word_t* blk = bman.get_block(bv_block_idx);
3585  sit.next();
3586 
3587  if (blk)
3588  {
3589  switch (op)
3590  {
3591  case set_AND: case set_ASSIGN:
3592  // the result is 0
3593  blk = bman.zero_block(bv_block_idx);
3594  break;
3595 
3596  case set_SUB: case set_COUNT_AND: case set_OR:
3597  case set_XOR: case set_COUNT_SUB_BA: case set_COUNT_B:
3598  // nothing to do
3599  break;
3600 
3601  case set_COUNT_SUB_AB: case set_COUNT_A: case set_COUNT_OR:
3602  case set_COUNT: case set_COUNT_XOR:
3603  // valid bit block recombined with 0 block
3604  // results with same block data
3605  // all we need is to bitcount bv block
3606  {
3607  unsigned c = bman.block_bitcount(blk);//, bv_block_idx);
3608  count += c;
3609  if (exit_on_one && count) // early exit
3610  return count;
3611  }
3612  break;
3613  case set_END:
3614  default:
3615  BM_ASSERT(0);
3616  } // switch op
3617  } // if blk
3618  }
3619  break;
3620 
3621  case serial_iterator_type::e_one_blocks:
3622  {
3623  BM_ASSERT(bv_block_idx == sit.block_idx());
3624  bm::word_t* blk = bman.get_block(bv_block_idx);
3625  sit.next();
3626 
3627  switch (op)
3628  {
3629  case set_OR: case set_ASSIGN:
3630  bman.set_block_all_set(bv_block_idx);
3631  break;
3632  case set_COUNT_OR: case set_COUNT_B: case set_COUNT:
3633  // block becomes all set
3634  count += bm::bits_in_block;
3635  break;
3636  case set_SUB:
3637  blk = bman.zero_block(bv_block_idx);
3638  break;
3639  case set_COUNT_SUB_AB: case set_AND:
3640  // nothing to do
3641  break;
3642  case set_COUNT_AND: case set_COUNT_A:
3643  count += bman.block_bitcount(blk);//, bv_block_idx);
3644  break;
3645  default:
3646  if (blk)
3647  {
3648  switch (op)
3649  {
3650  case set_XOR:
3651  blk = bman.deoptimize_block(bv_block_idx);
3653  break;
3654  case set_COUNT_XOR:
3655  {
3656  count +=
3658  blk,
3660  bm::COUNT_XOR);
3661  }
3662  break;
3663  case set_COUNT_SUB_BA:
3664  {
3665  count +=
3667  blk,
3670  }
3671  break;
3672  default:
3673  BM_ASSERT(0);
3674  } // switch
3675  }
3676  else // blk == 0
3677  {
3678  switch (op)
3679  {
3680  case set_XOR:
3681  // 0 XOR 1 = 1
3682  bman.set_block_all_set(bv_block_idx);
3683  break;
3684  case set_COUNT_XOR:
3685  count += bm::bits_in_block;
3686  break;
3687  case set_COUNT_SUB_BA:
3688  // 1 - 0 = 1
3689  count += bm::bits_in_block;
3690  break;
3691  default:
3692  break;
3693  } // switch
3694  } // else
3695  } // switch
3696  if (exit_on_one && count) // early exit
3697  return count;
3698 
3699  }
3700  break;
3701 
3702  case serial_iterator_type::e_gap_block:
3703  {
3704  BM_ASSERT(bv_block_idx == sit.block_idx());
3705  bm::word_t* blk = bman.get_block(bv_block_idx);
3706 
3707  sit.get_gap_block(gap_temp_block);
3708 
3709  unsigned len = gap_length(gap_temp_block);
3710  int level = gap_calc_level(len, bman.glen());
3711  --len;
3712 
3713  bool const_op = is_const_set_operation(op);
3714  if (const_op)
3715  {
3716  distance_metric metric = operation2metric(op);
3717  bm::word_t* gptr = (bm::word_t*)gap_temp_block;
3718  BMSET_PTRGAP(gptr);
3719  unsigned c =
3721  blk,
3722  gptr,
3723  metric);
3724  count += c;
3725  if (exit_on_one && count) // early exit
3726  return count;
3727 
3728  }
3729  else // non-const set operation
3730  {
3731  if ((sop == set_ASSIGN) && blk) // target block override
3732  {
3733  blk = bman.zero_block(bv_block_idx);
3734  sop = set_OR;
3735  }
3736  if (blk == 0) // target block not found
3737  {
3738  switch (sop)
3739  {
3740  case set_AND: case set_SUB:
3741  break;
3742  case set_OR: case set_XOR: case set_ASSIGN:
3743  bman.set_gap_block(
3744  bv_block_idx, gap_temp_block, level);
3745  break;
3746  default:
3747  BM_ASSERT(0);
3748  } // switch
3749  }
3750  else // target block exists
3751  {
3752  bm::operation bop = bm::setop2op(op);
3753  if (level == -1) // too big to GAP
3754  {
3755  gap_convert_to_bitset(temp_block, gap_temp_block);
3756  bv.combine_operation_with_block(bv_block_idx,
3757  temp_block,
3758  0, // BIT
3759  bop);
3760  }
3761  else // GAP fits
3762  {
3763  set_gap_level(gap_temp_block, level);
3764  bv.combine_operation_with_block(
3765  bv_block_idx,
3766  (bm::word_t*)gap_temp_block,
3767  1, // GAP
3768  bop);
3769  }
3770  }
3771  if (exit_on_one)
3772  {
3773  blk = bman.get_block(bv_block_idx);
3774  if (blk)
3775  {
3776  bool z = bman.is_block_zero(bv_block_idx, blk);
3777  if (!z)
3778  return 1;
3779  }
3780  } // if exit_on_one
3781 
3782  } // if else non-const op
3783  }
3784  break;
3785 
3786  default:
3787  BM_ASSERT(0);
3788  } // switch
3789 
3790  ++bv_block_idx;
3791 
3792  } // for (deserialization)
3793 
3794  return count;
3795 }
3796 
3797 
3798 
3799 
3800 } // namespace bm
3801 
3802 #include "bmundef.h"
3803 
3804 #ifdef _MSC_VER
3805 #pragma warning( pop )
3806 #endif
3807 
3808 
3809 #endif
void encode_header(const BV &bv, bm::encoder &enc)
Encode serialization header information.
Definition: bmserial.h:661
void encode_bit_interval(const bm::word_t *blk, bm::encoder &enc, unsigned size_control)
Encode BIT block with repeatable runs of zeroes.
Definition: bmserial.h:837
void put_8(unsigned char c)
Puts one character into the encoding buffer.
Definition: encoding.h:555
BMFORCEINLINE bm::id_t word_bitcount(bm::id_t w)
Definition: bmfunc.h:125
unsigned get_bit_block_OR(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:2077
Bit COUNT SUB AB functor.
Definition: bmfunc.h:5400
unsigned size() const
Returns size of the current encoding stream.
Definition: encoding.h:626
unsigned id_cnt_
Id counter for id list.
Definition: bmserial.h:512
one or more all-1 bit blocks
Definition: bmserial.h:438
const unsigned char set_block_gap
Plain GAP block.
Definition: bmserial.h:75
const unsigned set_block_size
Definition: bmconst.h:44
operation
Bit operations enumeration.
Definition: bmfunc.h:260
set_operation
Nomenclature of set operations.
Definition: bmfunc.h:231
unsigned get_bit_block_COUNT_AND(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:2487
Base deserialization class.
Definition: bmserial.h:278
serialization_flags
Bit mask flags for serialization algorithm.
Definition: bmserial.h:1141
const unsigned char set_block_32zero
Up to 4G zero blocks.
Definition: bmserial.h:68
const unsigned char set_block_32one
UP to 4G all-set blocks.
Definition: bmserial.h:69
unsigned gap_add_value(T *buf, unsigned pos)
Add new value to the end of GAP buffer.
Definition: bmfunc.h:1452
void read_gap_block(decoder_type &decoder, unsigned block_type, bm::gap_word_t *dst_block, bm::gap_word_t &gap_head)
Read GAP block from the stream.
Definition: bmserial.h:1331
void next()
get next block
Definition: bmserial.h:1867
const unsigned set_array_shift
Definition: bmconst.h:73
unsigned char * position_type
Definition: encoding.h:40
id list stored
Definition: bmserial.h:93
deseriaizer_base< DEC >::decoder_type decoder_type
Definition: bmserial.h:398
unsigned get_bit_block_COUNT_SUB_BA(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:2781
unsigned get_compression_level() const
Get compression level.
Definition: bmserial.h:635
D gap_convert_to_arr(D *BMRESTRICT dest, const T *BMRESTRICT buf, unsigned dest_len, bool invert=false)
Convert gap block into array of ints corresponding to 1 bits.
Definition: bmfunc.h:2853
void gap_set_all(T *buf, unsigned set_max, unsigned value)
Sets all bits to 0 or 1 (GAP)
Definition: bmfunc.h:2434
SerialIterator serial_iterator_type
Definition: bmserial.h:344
int gap_calc_level(int len, const T *glevel_len)
Calculates GAP block capacity level.
Definition: bmfunc.h:2636
BV::allocator_type allocator_type
Definition: bmserial.h:320
bvector_type::blocks_manager_type blocks_manager_type
Definition: bmserial.h:148
unsigned serialize(const BV &bv, unsigned char *buf, size_t buf_size)
Bitvector serilization into memory block.
Definition: bmserial.h:910
void gamma_gap_block(bm::gap_word_t *gap_block, bm::encoder &enc)
Encode GAP block with Elias Gamma coder.
Definition: bmserial.h:700
unsigned mono_block_cnt_
number of 0 or 1 blocks
Definition: bmserial.h:518
serializer(const allocator_type &alloc=allocator_type(), bm::word_t *temp_block=0)
Construct serializer.
Definition: bmserial.h:589
save no byte-order info (save some space)
Definition: bmserial.h:1142
save no GAP info (save some space)
Definition: bmserial.h:1143
unsigned get_bit_block_XOR(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:2248
const unsigned char set_block_bit
Plain bit block.
Definition: bmserial.h:72
Definition: bm.h:59
bm::id_t last_id_
Last id from the id list.
Definition: bmserial.h:513
bvector_type::statistics statistics_type
Definition: bmserial.h:149
T gap_length(const T *buf)
Returs GAP block length.
Definition: bmfunc.h:2563
const unsigned gap_equiv_len
Definition: bmconst.h:64
Deserializer for bit-vector.
Definition: bmserial.h:307
void gap_convert_to_bitset(unsigned *dest, const T *buf)
GAP block to bitblock conversion.
Definition: bmfunc.h:2343
Bit COUNT OR functor.
Definition: bmfunc.h:5388
void for_each_dgap(const T *gap_buf, Func &func)
Definition: bmfunc.h:951
Bit-block get adapter, takes bitblock and represents it as a get_32() accessor function.
Definition: bmfunc.h:5230
bm::id_t get_id() const
Get last id from the id list.
Definition: bmserial.h:452
bm::id_t bit_operation_or_count(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src1_end, const bm::word_t *BMRESTRICT src2)
Performs bitblock OR operation and calculates bitcount of the result.
Definition: bmfunc.h:4314
unsigned bv_size() const
serialized bitvector size
Definition: bmserial.h:403
const unsigned char set_block_bit_interval
Interval block.
Definition: bmserial.h:78
Bit-vector serialization class.
Definition: bmserial.h:143
unsigned serialize(const BV &bv, unsigned char *buf, bm::word_t *temp_block=0, unsigned serialization_flags=0)
Saves bitvector into memory.
Definition: bmserial.h:1189
GAP compression is ON.
Definition: bmconst.h:105
const unsigned char set_block_sgapgap
SGAP compressed GAP block.
Definition: bmserial.h:74
void combine_or(BV &bv, It first, It last)
OR Combine bitvector and the iterable sequence.
Definition: bmalgo_impl.h:1148
void combine_sub(BV &bv, It first, It last)
SUB Combine bitvector and the iterable sequence.
Definition: bmalgo_impl.h:1317
void set_pos(unsigned char *buf_pos)
Set current memory stream position.
Definition: encoding.h:642
#define BM_SET_ONE_BLOCKS(x)
Definition: bmserial.h:120
unsigned get_bit_block_COUNT(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:2367
(A ^ B).count()
Definition: bmalgo_impl.h:54
bm::id_t bit_operation_xor_count(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src1_end, const bm::word_t *BMRESTRICT src2)
Performs bitblock XOR operation and calculates bitcount of the result.
Definition: bmfunc.h:4649
bm::word_t sum() const
Get accumulated sum.
Definition: bmfunc.h:5267
bm::id_t count_xor(const BV &bv1, const BV &bv2)
Computes bitcount of XOR operation of two bitsets.
Definition: bmalgo_impl.h:1020
one or more zero bit blocks
Definition: bmserial.h:437
Byte based reader for un-aligned bit streaming.
Definition: encoding.h:341
const unsigned id_max
Definition: bmconst.h:40
(B - A).count()
Definition: bmalgo_impl.h:57
Class for decoding data from memory buffer.
Definition: encoding.h:100
#define BMSET_PTRGAP(ptr)
Definition: bmdef.h:160
unsigned bit_count_nonzero_size(const T *blk, unsigned data_size)
Inspects block for full zero words.
Definition: bmfunc.h:4704
unsigned get_id_count() const
Number of ids in the inverted list (valid for e_list_ids)
Definition: bmserial.h:449
unsigned serialize(BV &bv, unsigned char *buf, unsigned serialization_flags=0)
Saves bitvector into memory. Allocates temporary memory block for bvector.
Definition: bmserial.h:1224
get_bit_func_type bit_func_table_[bm::set_END]
Definition: bmserial.h:506
const unsigned char set_block_arrgap_egamma_inv
Gamma compressed inverted delta GAP array.
Definition: bmserial.h:84
int bit_find_in_block(const bm::word_t *data, unsigned nbit, bm::id_t *prev)
Searches for the next 1 bit in the BIT block.
Definition: bmfunc.h:4765
iterator_state state_
Definition: bmserial.h:511
bm::id_t bit_operation_and_count(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src1_end, const bm::word_t *BMRESTRICT src2)
Performs bitblock AND operation and calculates bitcount of the result.
Definition: bmfunc.h:4185
unsigned get_bit_block_ASSIGN(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:2015
void gap_invert(T *buf)
Inverts all bits in the GAP buffer.
Definition: bmfunc.h:2505
unsigned int word_t
Definition: bmconst.h:35
const unsigned char set_block_arrbit
List of bits ON.
Definition: bmserial.h:77
BMFORCEINLINE void set_bit(unsigned *dest, unsigned bitpos)
Set 1 bit in a block.
Definition: bmfunc.h:1652
#define IS_FULL_BLOCK(addr)
Definition: bmdef.h:135
#define SER_NEXT_GRP(enc, nb, B_1ZERO, B_8ZERO, B_16ZERO, B_32ZERO)
Definition: bmserial.h:100
const unsigned char set_block_azero
All other blocks zero.
Definition: bmserial.h:70
unsigned block_idx_
current block index
Definition: bmserial.h:517
const unsigned char set_block_arrgap_inv
List of bits OFF (GAP block)
Definition: bmserial.h:85
gap_word_t glevels_[bm::gap_levels]
GAP levels.
Definition: bmserial.h:514
static unsigned deserialize(bvector_type &bv, serial_iterator_type &sit, bm::word_t *temp_block, set_operation op=bm::set_OR, bool exit_on_one=false)
Definition: bmserial.h:3472
void byte_order_serialization(bool value)
Set byte-order serialization (for cross platform compatibility)
Definition: bmserial.h:655
unsigned get_block_type() const
Get current block type.
Definition: bmserial.h:501
iterator_state
iterator is a state machine, this enum encodes its key value
Definition: bmserial.h:432
void put_32(bm::word_t w)
Puts 32 bits word into encoding buffer.
Definition: encoding.h:653
unsigned get_bit_block_COUNT_OR(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:2546
const unsigned char set_block_bit_0runs
Bit block with encoded zero intervals.
Definition: bmserial.h:83
unsigned deserialize(BV &bv, const unsigned char *buf, bm::word_t *temp_block=0)
Bitvector deserialization from memory.
Definition: bmserial.h:1250
Bit-block sum adapter, takes values and sums it /internal.
Definition: bmfunc.h:5260
void bit_block_set(bm::word_t *BMRESTRICT dst, bm::word_t value)
Bitblock memset operation.
Definition: bmfunc.h:2325
void bit_block_and(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src)
Plain bitblock AND operation. Function does not analyse availability of source and destination blocks...
Definition: bmfunc.h:3764
static ByteOrder byte_order()
Definition: bmfunc.h:433
const unsigned gap_levels
Definition: bmconst.h:66
unsigned bit_convert_to_gap(T *BMRESTRICT dest, const unsigned *BMRESTRICT src, bm::id_t bits, unsigned dest_len)
Converts bit block to GAP.
Definition: bmfunc.h:2708
Internal structure.
Definition: bmfunc.h:399
unsigned short gap_word_t
Definition: bmconst.h:60
Iterator to walk forward the serialized stream.
Definition: bmserial.h:340
unsigned gap_set_array(T *buf, const T *arr, unsigned len)
Convert array to GAP buffer.
Definition: bmfunc.h:1537
unsigned get_bit_block_AND(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:2157
unsigned block_idx() const
Get current block index.
Definition: bmserial.h:455
unsigned get_bit_block_COUNT_A(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:2424
Byte based writer for un-aligned bit streaming.
Definition: encoding.h:148
deseriaizer_base< DEC >::decoder_type decoder_type
Definition: bmserial.h:311
bm::id_t bit_operation_sub_count(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src1_end, const bm::word_t *BMRESTRICT src2)
Performs bitblock SUB operation and calculates bitcount of the result.
Definition: bmfunc.h:4233
const unsigned char set_block_1zero
One all-zero block.
Definition: bmserial.h:62
void bit_block_xor(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src)
Plain bitblock XOR operation. Function does not analyse availability of source and destination blocks...
Definition: bmfunc.h:4570
BV::blocks_manager_type blocks_manager_type
Definition: bmserial.h:319
bm::id_t bit_block_calc_count_change(const bm::word_t *block, const bm::word_t *block_end, unsigned *bit_count)
Definition: bmfunc.h:3053
unsigned get_arr_bit(bm::word_t *dst_block, bool clear_target=true)
Get array of bits out of the decoder into bit block (Converts inverted list into bits) Returns number...
Definition: bmserial.h:2854
Bit SUB BA functor.
Definition: bmfunc.h:5411
const unsigned char set_block_8zero
Up to 256 zero blocks.
Definition: bmserial.h:64
const unsigned char set_block_arrgap_egamma
Gamma compressed delta GAP array.
Definition: bmserial.h:82
void put_16(bm::short_t s)
Puts short word (16 bits) into the encoding buffer.
Definition: encoding.h:565
void gap_length_serialization(bool value)
Set GAP length serialization (serializes GAP levels of the original vector)
Definition: bmserial.h:649
#define FULL_BLOCK_REAL_ADDR
Definition: bmdef.h:131
bm::id_t count_or(const BV &bv1, const BV &bv2)
Computes bitcount of OR operation of two bitsets.
Definition: bmalgo_impl.h:1088
void put_prefixed_array_16(unsigned char c, const bm::short_t *s, unsigned count, bool encode_count)
Encode 8-bit prefix + an array.
Definition: encoding.h:538
const unsigned bits_in_block
Definition: bmconst.h:77
void bit_block_sub(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src)
Plain bitblock SUB (AND NOT) operation. Function does not analyse availability of source and destinat...
Definition: bmfunc.h:4474
void gamma_gap_array(const bm::gap_word_t *gap_block, unsigned arr_len, bm::encoder &enc, bool inverted=false)
Encode GAP block as delta-array with Elias Gamma coder.
Definition: bmserial.h:737
unsigned deserialize(bvector_type &bv, const unsigned char *buf, bm::word_t *temp_block)
Definition: bmserial.h:1525
unsigned char get_8()
Reads character from the decoding buffer.
Definition: encoding.h:75
const unsigned gap_max_bits
Definition: bmconst.h:63
#define BM_IS_GAP(ptr)
Definition: bmdef.h:161
void put_prefixed_array_32(unsigned char c, const bm::word_t *w, unsigned count)
Encode 8-bit prefix + an array.
Definition: encoding.h:527
distance_metric operation2metric(set_operation op)
Convert set operation into compatible distance metric.
Definition: bmalgo_impl.h:67
bool is_eof() const
Returns true if end of bit-stream reached.
Definition: bmserial.h:406
bm::operation setop2op(bm::set_operation op)
Convert set operation to operation.
Definition: bmfunc.h:272
void bit_recomb(It1 &it1, It2 &it2, BinaryOp &op, Encoder &enc, unsigned block_size=bm::set_block_size)
Definition: bmfunc.h:5310
unsigned char * get_pos() const
Get current memory stream position.
Definition: encoding.h:634
serialization_header_mask
Definition: bmserial.h:90
#define BM_SET_MMX_GUARD
Definition: bmdef.h:201
iterator_state state() const
Returns iterator internal state.
Definition: bmserial.h:445
const unsigned char set_block_sgapbit
SGAP compressed bitblock.
Definition: bmserial.h:73
#define BMGAP_PTR(ptr)
Definition: bmdef.h:159
static unsigned deserialize(bvector_type &bv, const unsigned char *buf, bm::word_t *temp_block, set_operation op=bm::set_OR, bool exit_on_one=false)
Deserialize bvector using buffer as set operation argument.
Definition: bmserial.h:2937
bm::id_t bit_block_calc_count(const bm::word_t *block, const bm::word_t *block_end)
Bitcount for bit string.
Definition: bmfunc.h:2912
unsigned(serial_stream_iterator< DEC >::* get_bit_func_type)(bm::word_t *, bm::word_t *)
member function pointer for bitset-bitset get operations
Definition: bmserial.h:462
const unsigned char set_block_16one
UP to 65536 all-set blocks.
Definition: bmserial.h:67
const unsigned set_array_size
Definition: bmconst.h:72
const unsigned char set_block_end
End of serialization.
Definition: bmserial.h:61
unsigned gap_bit_count_unr(const T *buf)
Calculates number of bits ON in GAP buffer. Loop unrolled version.
Definition: bmfunc.h:804
unsigned get_bit_block_COUNT_B(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:489
const unsigned char set_block_8one
Up to 256 all-set blocks.
Definition: bmserial.h:65
void skip_mono_blocks()
skip all zero or all-one blocks
Definition: bmserial.h:1998
bvector_type::allocator_type allocator_type
Definition: bmserial.h:147
unsigned int id_t
Definition: bmconst.h:34
resized vector
Definition: bmserial.h:92
Functor for Elias Gamma encoding.
Definition: encoding.h:449
const unsigned char set_block_16zero
Up to 65536 zero blocks.
Definition: bmserial.h:66
bm::word_t * temp_block_
Definition: bmserial.h:329
void combine_count_operation_with_block(const bm::word_t *blk, const bm::word_t *arg_blk, distance_metric_descriptor *dmit, distance_metric_descriptor *dmit_end)
Internal function computes different distance metrics.
Definition: bmalgo_impl.h:113
T bit_convert_to_arr(T *BMRESTRICT dest, const unsigned *BMRESTRICT src, bm::id_t bits, unsigned dest_len, unsigned mask=0)
Convert bit block into an array of ints corresponding to 1 bits.
Definition: bmfunc.h:5071
#define BM_ASSERT
Definition: bmdef.h:112
no byte-order
Definition: bmserial.h:94
iterator_state get_state() const
Definition: bmserial.h:447
const unsigned set_total_blocks
Definition: bmconst.h:75
const unsigned char set_block_1one
One block all-set (1111...)
Definition: bmserial.h:63
void gamma(unsigned value)
Definition: encoding.h:236
unsigned get_bit_block_SUB(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:2308
id64_t wordop_t
Definition: bmconst.h:83
distance_metric
Distance metrics codes defined for vectors A and B.
Definition: bmalgo_impl.h:51
Serialization stream iterator.
Definition: bmserial.h:395
unsigned block_type_
current block type
Definition: bmserial.h:516
const unsigned char set_block_aone
All other blocks one.
Definition: bmserial.h:71
decoder_type & decoder()
Get low level access to the decoder (use carefully)
Definition: bmserial.h:427
const unsigned char set_block_gapbit
GAP compressed bitblock.
Definition: bmserial.h:76
unsigned get_bit_block_COUNT_XOR(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:2624
const unsigned set_block_shift
Definition: bmconst.h:45
strategy
Block allocation strategies.
Definition: bmconst.h:102
unsigned dec_size() const
Return current decoder size.
Definition: bmserial.h:424
bool is_const_set_operation(set_operation op)
Returns true if set operation is constant (bitcount)
Definition: bmfunc.h:252
unsigned get_bit_block_COUNT_SUB_AB(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:2702
Memory encoding.
Definition: encoding.h:37
void get_gap_block(bm::gap_word_t *dst_block)
Read gap block data (with head)
Definition: bmserial.h:2903
void set_compression_level(unsigned clevel)
Set compression level.
Definition: bmserial.h:629
void set_gap_level(T *buf, unsigned level)
Sets GAP block capacity level.
Definition: bmfunc.h:2620
const unsigned char set_block_gap_egamma
Gamma compressed GAP block.
Definition: bmserial.h:81
ByteOrder
Byte orders recognized by the library.
Definition: bmfunc.h:389
byte_buffer< allocator_type > buffer
Definition: bmserial.h:151
void encode_gap_block(bm::gap_word_t *gap_block, bm::encoder &enc)
Encode GAP block.
Definition: bmserial.h:784
unsigned get_bit_block(bm::word_t *dst_block, bm::word_t *tmp_block, set_operation op)
read bit block, using logical operation
Definition: bmserial.h:2921
Deserializer, performs logical operations between bit-vector and serialized bit-vector.
Definition: bmserial.h:532
const unsigned char set_block_arrgap
List of bits ON (GAP block)
Definition: bmserial.h:79
const unsigned char set_block_bit_1bit
Bit block with 1 bit ON.
Definition: bmserial.h:80
#define BMRESTRICT
Definition: bmdef.h:173
no GAP levels
Definition: bmserial.h:95
Bit COUNT XOR functor.
Definition: bmfunc.h:5377