BitMagic-C++
bmserial.h
Go to the documentation of this file.
1#ifndef BMSERIAL__H__INCLUDED__
2#define BMSERIAL__H__INCLUDED__
3/*
4Copyright(c) 2002-2019 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5
6Licensed under the Apache License, Version 2.0 (the "License");
7you may not use this file except in compliance with the License.
8You may obtain a copy of the License at
9
10 http://www.apache.org/licenses/LICENSE-2.0
11
12Unless required by applicable law or agreed to in writing, software
13distributed under the License is distributed on an "AS IS" BASIS,
14WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15See the License for the specific language governing permissions and
16limitations under the License.
17
18For more information please visit: http://bitmagic.io
19*/
20
21/*! \file bmserial.h
22 \brief Serialization / compression of bvector<>.
23 Set theoretical operations on compressed BLOBs.
24*/
25
26/*!
27 \defgroup bvserial bvector<> serialization
28 Serialization for bvector<> container
29
30 \ingroup bvector
31*/
32
33#ifndef BM__H__INCLUDED__
34// BitMagic utility headers do not include main "bm.h" declaration
35// #include "bm.h" or "bm64.h" explicitly
36# error missing include (bm.h or bm64.h)
37#endif
38
39
40#ifdef _MSC_VER
41#pragma warning( push )
42#pragma warning( disable : 4311 4312 4127)
43#endif
44
45
46
47#include "encoding.h"
48#include "bmfunc.h"
49#include "bmtrans.h"
50#include "bmalgo.h"
51#include "bmutil.h"
52#include "bmbuffer.h"
53#include "bmdef.h"
54#include "bmxor.h"
55
56namespace bm
57{
58
59const unsigned set_compression_max = 6; ///< Maximum supported compression level
60const unsigned set_compression_default = 6; ///< Default compression level
61
62/**
63 Bit-vector serialization class.
64
65 Class designed to convert sparse bit-vectors into a single block of memory
66 ready for file or database storage or network transfer.
67
68 Reuse of this class for multiple serializations (but not across threads).
69 Class resue offers some performance advantage (helps with temp memory
70 reallocations).
71
72 @ingroup bvserial
73*/
74template<class BV>
76{
77public:
78 typedef BV bvector_type;
84
85 typedef byte_buffer<allocator_type> buffer;
88
89 typedef
91public:
92 /**
93 Constructor
94
95 \param alloc - memory allocator
96 \param temp_block - temporary block for various operations
97 (if NULL it will be allocated and managed by serializer class)
98 Temp block is used as a scratch memory during serialization,
99 use of external temp block allows to avoid unnecessary re-allocations.
100
101 Temp block attached is not owned by the class and NOT deallocated on
102 destruction.
103 */
105 bm::word_t* temp_block = 0);
106
107 serializer(bm::word_t* temp_block);
108
110
111 /*! @name Compression level settings */
112 //@{
113
114 // --------------------------------------------------------------------
115 /**
116 Set compression level. Higher compression takes more time to process.
117 @param clevel - compression level (0-6)
118 0 - take as is
119 1, 2 - apply light weight RLE/GAP encodings, limited depth hierarchical
120 compression, intervals encoding
121 3 - variant of 2 with different cut-offs
122 4 - delta transforms plus Elias Gamma encoding where possible legacy)
123 5 - Binary Interpolative Coding (BIC) - light settings
124 6 - Binary Interpolative Coding (BIC) - harder settings
125
126 @sa get_compression_level
127 */
128 void set_compression_level(unsigned clevel) BMNOEXCEPT;
129
130 /**
131 Get current compression level.
132 */
134 { return compression_level_; }
135
136
137 //@}
138
139
140 // --------------------------------------------------------------------
141 /*! @name Serialization Methods */
142 //@{
143
144 /**
145 Bitvector serialization into memory block
146
147 @param bv - input bitvector
148 @param buf - out buffer (pre-allocated)
149 No range checking is done in this method.
150 It is responsibility of caller to allocate sufficient
151 amount of memory using information from calc_stat() function.
152
153 @param buf_size - size of the output buffer
154
155 @return Size of serialization block.
156 @sa calc_stat
157 */
158 size_type serialize(const BV& bv,
159 unsigned char* buf, size_t buf_size);
160
161 /**
162 Bitvector serialization into buffer object (resized automatically)
163
164 @param bv - input bitvector
165 @param buf - output buffer object
166 @param bv_stat - input (optional) bit-vector statistics object
167 if NULL, serialize will compute the statistics
168 */
169 void serialize(const BV& bv,
170 typename serializer<BV>::buffer& buf,
171 const statistics_type* bv_stat = 0);
172
173 /**
174 Bitvector serialization into buffer object (resized automatically)
175 Input bit-vector gets optimized and then destroyed, content is
176 NOT guaranteed after this operation.
177 Effectively it moves data into the buffer.
178
179 The reason this operation exsists is because it is faster to do
180 all three operations in one single pass.
181 This is a destructive serialization!
182
183 @param bv - input/output bitvector
184 @param buf - output buffer object
185 */
187 typename serializer<BV>::buffer& buf);
188
189 //@}
190 // --------------------------------------------------------------------
191
192 /**
193 Return serialization counter vector
194 @internal
195 */
197 { return compression_stat_; }
198
199 /**
200 Enable/disable statistics reset on each serilaization
201 */
203 { allow_stat_reset_ = allow; }
204
205 /// Reset all accumulated compression statistics
207
208 /**
209 Set GAP length serialization (serializes GAP levels of the original vector)
210
211 @param value - when TRUE serialized vector includes GAP levels parameters
212 */
214
215 /**
216 Set byte-order serialization (for cross platform compatibility)
217 @param value - TRUE serialization format includes byte-order marker
218 */
220
221 /**
222 Add skip-markers to serialization BLOB for faster range decode
223 at the expense of some BLOB size increase
224
225 @param enable - TRUE searilization will add bookmark codes
226 @param bm_interval - bookmark interval in (number of blocks)
227 suggested values between 4 and 512 (block size is 64K bits)
228 smaller interval means more bookmarks added to the skip list
229 allows faster range deserialization at the expense of
230 somewhat increased BLOB size.
231 */
232 void set_bookmarks(bool enable, unsigned bm_interval = 256) BMNOEXCEPT;
233
234 /**
235 Fine tuning for Binary Interpolative Compression (levels 5+)
236 The parameter sets average population count per block (64Kbits)
237 below which block is considered very sparse.
238 If super block (group of 256 blocks) is very sparse it applies
239 block size expansion (for the compression purposes) to
240 improve compression rates.
241 */
242 void set_sparse_cutoff(unsigned cutoff) BMNOEXCEPT;
243
244 /**
245 Attach collection of reference vectors for XOR serialization
246 (no transfer of ownership for the pointers)
247 @internal
248 */
249 void set_ref_vectors(const bv_ref_vector_type* ref_vect);
250
251 /**
252 Calculate XOR similarity model for ref_vector
253 refernece vector must be associated before
254
255 @param sim_model - [out] similarity model to compute
256 @param ref_vect - [in] reference vectors
257 @param params - parameters to regulate search depth
258 @return true - if similarity model created successfully
259
260 @sa set_ref_vectors
261 @internal
262 */
264 const bv_ref_vector_type& ref_vect,
265 const bm::xor_sim_params& params);
266
267 /**
268 Atach XOR similarity model (must be computed by the same ref vector)
269 @internal
270 */
272
273 /**
274 Set current index in rer.vector collection
275 (not a row idx or plain idx)
276 */
278
279
280protected:
281 /**
282 Encode serialization header information
283 */
284 void encode_header(const BV& bv, bm::encoder& enc) BMNOEXCEPT;
285
286 /*! Encode GAP block */
287 void encode_gap_block(const bm::gap_word_t* gap_block, bm::encoder& enc);
288
289 /*! Encode GAP block with Elias Gamma coder */
290 void gamma_gap_block(const bm::gap_word_t* gap_block,
291 bm::encoder& enc) BMNOEXCEPT;
292
293 /**
294 Encode GAP block as delta-array with Elias Gamma coder
295 */
296 void gamma_gap_array(const bm::gap_word_t* gap_block,
297 unsigned arr_len,
298 bm::encoder& enc,
299 bool inverted = false) BMNOEXCEPT;
300
301 /// Encode bit-block as an array of bits
302 void encode_bit_array(const bm::word_t* block,
303 bm::encoder& enc, bool inverted) BMNOEXCEPT;
304
305 void gamma_gap_bit_block(const bm::word_t* block,
306 bm::encoder& enc) BMNOEXCEPT;
307
308 void gamma_arr_bit_block(const bm::word_t* block,
309 bm::encoder& enc, bool inverted) BMNOEXCEPT;
310
311 void bienc_arr_bit_block(const bm::word_t* block,
312 bm::encoder& enc, bool inverted) BMNOEXCEPT;
313
314 void bienc_arr_sblock(const BV& bv, unsigned sb,
315 bm::encoder& enc) BMNOEXCEPT;
316
317 /// encode bit-block as interpolated bit block of gaps
318 void bienc_gap_bit_block(const bm::word_t* block,
319 bm::encoder& enc) BMNOEXCEPT;
320
322 bm::encoder& enc, bool inverted) BMNOEXCEPT;
323 /// encode bit-block as interpolated gap block
325 bm::encoder& enc) BMNOEXCEPT;
326
327 /**
328 Encode GAP block as an array with binary interpolated coder
329 */
330 void interpolated_gap_array(const bm::gap_word_t* gap_block,
331 unsigned arr_len,
332 bm::encoder& enc,
333 bool inverted) BMNOEXCEPT;
335 unsigned arr_len,
336 bm::encoder& enc,
337 bool inverted) BMNOEXCEPT;
338
339
340 /*! Encode GAP block with using binary interpolated encoder */
342 const bm::gap_word_t* gap_block, bm::encoder& enc) BMNOEXCEPT;
343
344 /**
345 Encode BIT block with repeatable runs of zeroes
346 */
347 void encode_bit_interval(const bm::word_t* blk,
348 bm::encoder& enc,
349 unsigned size_control) BMNOEXCEPT;
350 /**
351 Encode bit-block using digest (hierarchical compression)
352 */
353 void encode_bit_digest(const bm::word_t* blk,
354 bm::encoder& enc,
355 bm::id64_t d0) BMNOEXCEPT;
356 /**
357 Encode XOR match chain
358 */
360 const block_match_chain_type& mchain) BMNOEXCEPT;
361
362 /**
363 Determine best representation for GAP block based
364 on current set compression level
365
366 @return set_block_bit, set_block_bit_1bit, set_block_arrgap
367 set_block_arrgap_egamma, set_block_arrgap_bienc
368 set_block_arrgap_inv, set_block_arrgap_egamma_inv
369 set_block_arrgap_bienc_inv, set_block_gap_egamma
370 set_block_gap_bienc
371
372 @internal
373 */
374 unsigned char
376
377 /// Determine best representation for a bit-block
378 unsigned char find_bit_best_encoding(const bm::word_t* block) BMNOEXCEPT;
379
380 /// Determine best representation for a bit-block (level 5)
381 unsigned char find_bit_best_encoding_l5(const bm::word_t* block) BMNOEXCEPT;
382
383 void reset_models() BMNOEXCEPT { mod_size_ = 0; }
384 void add_model(unsigned char mod, unsigned score) BMNOEXCEPT;
385protected:
386
387 /// Bookmark state structure
389 {
391 : ptr_(0), nb_(0),
392 nb_range_(nb_range), bm_type_(0)
393 {
394 min_bytes_range_ = nb_range * 8;
395 if (min_bytes_range_ < 512)
396 min_bytes_range_ = 512;
397
398 if (nb_range < 15)
399 bm_type_ = 2; // 16-bit offset
400 else
401 if (nb_range < 255)
402 bm_type_ = 1; // 24-bit offset
403 }
404
405 unsigned char* ptr_; ///< bookmark pointer
406 block_idx_type nb_; ///< bookmark block idx
407 block_idx_type nb_range_; ///< target bookmark range in blocks
408 unsigned bm_type_; ///< 0:32-bit, 1: 24-bit, 2: 16-bit
409 size_t min_bytes_range_; ///< minumal distance (bytes) between marks
410 };
411
412 /**
413 Check if bookmark needs to be placed and if so, encode it
414 into serialization BLOB
415
416 @param nb - block idx
417 @param bookm - bookmark state structure
418 @param enc - BLOB encoder
419 */
420 static
421 void process_bookmark(block_idx_type nb, bookmark_state& bookm,
423
424 /**
425 Compute digest based XOR product, place into tmp XOR block
426 */
428 const bm::word_t* s_block,
429 const block_match_chain_type& mchain,
430 unsigned i, unsigned j) BMNOEXCEPT;
431
432private:
433 serializer(const serializer&);
434 serializer& operator=(const serializer&);
435
436private:
439 typedef bm::heap_vector<bm::gap_word_t, allocator_type, true> block_arridx_type;
440 typedef bm::heap_vector<unsigned, allocator_type, true> sblock_arridx_type;
441 typedef typename allocator_type::allocator_pool_type allocator_pool_type;
442
443private:
444 bm::id64_t digest0_;
445 unsigned bit_model_d0_size_; ///< memory (bytes) by d0 method (bytes)
446 unsigned bit_model_0run_size_; ///< memory (bytes) by run-0 method (bytes)
447 block_arridx_type bit_idx_arr_;
448 sblock_arridx_type sb_bit_idx_arr_;
449 unsigned scores_[bm::block_waves];
450 unsigned char models_[bm::block_waves];
451 unsigned mod_size_;
452
453 allocator_type alloc_;
454 size_type* compression_stat_;
455 bool allow_stat_reset_ = true; ///< controls zeroing of telemetry
456 bool gap_serial_;
457 bool byte_order_serial_;
458
459 bool sb_bookmarks_; ///< Bookmarks flag
460 unsigned sb_range_; ///< Desired bookmarks interval
461
462 bm::word_t* temp_block_;
463 unsigned compression_level_;
464 bool own_temp_block_;
465
466 bool optimize_; ///< flag to optimize the input vector
467 bool free_; ///< flag to free the input vector
468 allocator_pool_type pool_;
469
470
471 unsigned char* enc_header_pos_; ///< pos of top level header to roll back
472 unsigned char header_flag_; ///< set of masks used to save
473
474 // XOR compression
475 //
476 const bv_ref_vector_type* ref_vect_; ///< ref.vector for XOR compression
477 const xor_sim_model_type* sim_model_; ///< similarity model matrix
478 bm::xor_scanner<BV> xor_scan_; ///< scanner for XOR similarity
479 size_type ref_idx_; ///< current reference index
480 bm::word_t* xor_tmp_block_; ///< tmp area for xor product
481 bm::word_t* xor_tmp1_;
482 bm::word_t* xor_tmp2_;
483
484 unsigned sparse_cutoff_; ///< number of bits per blocks to consider sparse
485};
486
487/**
488 Base deserialization class
489 \ingroup bvserial
490 @internal
491*/
492template<typename DEC, typename BLOCK_IDX>
494{
495protected:
496 typedef DEC decoder_type;
497 typedef BLOCK_IDX block_idx_type;
499
500protected:
503 {}
504
505 /// Read GAP block from the stream
507 unsigned block_type,
508 bm::gap_word_t* dst_block,
509 bm::gap_word_t& gap_head);
510
511 /// Read list of bit ids
512 ///
513 /// @return number of ids
515 unsigned block_type,
516 bm::gap_word_t* dst_arr);
517
518 /// Read binary interpolated list into a bit-set
520 bm::word_t* blk, unsigned block_type) BMNOEXCEPT;
521
522 /// Read list of bit ids for super-blocks
523 ///
524 /// @return number of ids
526 unsigned block_type,
527 unsigned* dst_arr,
528 unsigned* sb_idx);
529
530 /// Read binary interpolated gap blocks into a bitset
532
533 /// Read inverted binary interpolated list into a bit-set
535
536 /// Read digest0-type bit-block
538
539
540 /// read bit-block encoded as runs
541 static
543
544 static
545 const char* err_msg() BMNOEXCEPT { return "BM::Invalid serialization format"; }
546
547 /// Try to skip if skip bookmark is available within reach
548 /// @return new block idx if skip went well
549 ///
552 block_idx_type expect_nb) BMNOEXCEPT;
553
554protected:
555 bm::gap_word_t* id_array_; ///< ptr to idx array for temp decode use
556 unsigned* sb_id_array_; ///< ptr to super-block idx array (temp)
557
558 block_idx_type bookmark_idx_;///< last bookmark block index
559 unsigned skip_offset_; ///< bookmark to skip 256 encoded blocks
560 const unsigned char* skip_pos_; ///< decoder skip position
561};
562
563/**
564 Deserializer for bit-vector
565 \ingroup bvserial
566*/
567template<class BV, class DEC>
569 protected deseriaizer_base<DEC, typename BV::block_idx_type>
570{
571public:
572 typedef BV bvector_type;
574 typedef typename BV::size_type size_type;
579
580public:
583
584 /*! Deserialize bit-vector (equivalent to logical OR)
585 @param bv - target bit-vector
586 @param buf - BLOB memory pointer
587 @param temp_block - temporary buffer [block size] (not used)
588
589 @return number of consumed bytes
590 */
592 const unsigned char* buf,
593 bm::word_t* temp_block = 0);
594
595 // ----------------------------------------------------------------
596 /**
597 Attach collection of reference vectors for XOR de-serialization
598 (no transfer of ownership for the pointer)
599 @internal
600 */
601 void set_ref_vectors(const bv_ref_vector_type* ref_vect);
602
603
604 /**
605 set deserialization range [from, to]
606 This is NOT exact, approximate range, content outside range
607 is not guaranteed to be absent
608 @sa unset_range()
609 */
611 {
612 is_range_set_ = 1; idx_from_ = from; idx_to_ = to;
613 }
614
615 /**
616 Disable range deserialization
617 @sa set_range()
618 */
620
621 /** reset range deserialization and reference vectors
622 @sa set_range()
623 */
625 {
627 }
628protected:
629 typedef typename BV::blocks_manager_type blocks_manager_type;
630
631protected:
635
636 void deserialize_gap(unsigned char btype, decoder_type& dec,
639 bm::word_t* blk);
640 void decode_bit_block(unsigned char btype, decoder_type& dec,
643 bm::word_t* blk);
644
646 bvector_type& bv,
648 bm::word_t* blk);
649
651 bvector_type& bv,
653 bm::word_t* blk);
654
656 bvector_type& bv,
658 bm::word_t* blk);
659
660 void decode_arr_sblock(unsigned char btype, decoder_type& dec,
661 bvector_type& bv);
662
663
664protected:
665 typedef bm::heap_vector<bm::gap_word_t, allocator_type, true> block_arridx_type;
666 typedef bm::heap_vector<bm::word_t, allocator_type, true> sblock_arridx_type;
668
669protected:
674
677
678 // XOR compression memory
679 //
680 const bv_ref_vector_type* ref_vect_; ///< ref.vector for XOR compression
681 bm::word_t* xor_block_; ///< xor product
684
685 // XOR decode FSM
686 //
692// bool x_ref_gap_;
693
694 // Range deserialization settings
695 //
699
700
701};
702
703
704/**
705 Iterator to walk forward the serialized stream.
706
707 \internal
708 \ingroup bvserial
709*/
710template<class BV, class SerialIterator>
712{
713public:
714 typedef BV bvector_type;
716 typedef SerialIterator serial_iterator_type;
717public:
718
719 /// set deserialization range [from, to]
720 void set_range(size_type from, size_type to);
721
722 /// disable range filtration
724
726 serial_iterator_type& sit,
727 bm::word_t* temp_block,
729 bool exit_on_one = false);
730
731private:
732 typedef typename BV::blocks_manager_type blocks_manager_type;
734
735 /// load data from the iterator of type "id list"
736 static
737 void load_id_list(bvector_type& bv,
738 serial_iterator_type& sit,
739 unsigned id_count,
740 bool set_clear);
741
742 /// Finalize the deserialization (zero target vector tail or bit-count tail)
743 static
744 size_type finalize_target_vector(blocks_manager_type& bman,
745 set_operation op,
746 size_type bv_block_idx);
747
748 /// Process (obsolete) id-list serialization format
749 static
750 size_type process_id_list(bvector_type& bv,
751 serial_iterator_type& sit,
752 set_operation op);
753 static
754 const char* err_msg() BMNOEXCEPT
755 { return "BM::de-serialization format error"; }
756private:
757 bool is_range_set_ = false;
758 size_type nb_range_from_ = 0;
759 size_type nb_range_to_ = 0;
760};
761
762/*!
763 @brief Serialization stream iterator
764
765 Iterates blocks and control tokens of serialized bit-stream
766 \ingroup bvserial
767 @internal
768*/
769template<class DEC, typename BLOCK_IDX>
770class serial_stream_iterator : protected deseriaizer_base<DEC, BLOCK_IDX>
771{
772public:
774 typedef BLOCK_IDX block_idx_type;
776
777public:
778 serial_stream_iterator(const unsigned char* buf);
780
781 /// serialized bitvector size
782 block_idx_type bv_size() const { return bv_size_; }
783
784 /// Returns true if end of bit-stream reached
785 bool is_eof() const { return end_of_stream_; }
786
787 /// get next block
788 void next();
789
790 /// skip all zero or all-one blocks
791 block_idx_type skip_mono_blocks() BMNOEXCEPT;
792
793 /// read bit block, using logical operation
794 unsigned get_bit_block(bm::word_t* dst_block,
795 bm::word_t* tmp_block,
796 set_operation op);
797
798
799 /// Read gap block data (with head)
800 void get_gap_block(bm::gap_word_t* dst_block);
801
802 /// Return current decoder size
803 unsigned dec_size() const { return decoder_.size(); }
804
805 /// Get low level access to the decoder (use carefully)
806 decoder_type& decoder() { return decoder_; }
807
808 /// iterator is a state machine, this enum encodes
809 /// its key value
810 ///
812 {
813 e_unknown = 0,
814 e_list_ids, ///< plain int array
815 e_blocks, ///< stream of blocks
816 e_zero_blocks, ///< one or more zero bit blocks
817 e_one_blocks, ///< one or more all-1 bit blocks
818 e_bit_block, ///< one bit block
819 e_gap_block ///< one gap block
820
821 };
822
823 /// Returns iterator internal state
824 iterator_state state() const BMNOEXCEPT { return this->state_; }
825
826 iterator_state get_state() const BMNOEXCEPT { return this->state_; }
827 /// Number of ids in the inverted list (valid for e_list_ids)
828 unsigned get_id_count() const BMNOEXCEPT { return this->id_cnt_; }
829
830 /// Get last id from the id list
831 bm::id_t get_id() const BMNOEXCEPT { return this->last_id_; }
832
833 /// Get current block index
834 block_idx_type block_idx() const BMNOEXCEPT { return this->block_idx_; }
835
836public:
837 /// member function pointer for bitset-bitset get operations
838 ///
839 typedef
840 unsigned (serial_stream_iterator<DEC, BLOCK_IDX>::*get_bit_func_type)
842
843 unsigned
844 get_bit_block_ASSIGN(bm::word_t* dst_block, bm::word_t* tmp_block);
845 unsigned
846 get_bit_block_OR (bm::word_t* dst_block, bm::word_t* tmp_block);
847 unsigned
848 get_bit_block_AND (bm::word_t* dst_block, bm::word_t* tmp_block);
849 unsigned
850 get_bit_block_SUB (bm::word_t* dst_block, bm::word_t* tmp_block);
851 unsigned
852 get_bit_block_XOR (bm::word_t* dst_block, bm::word_t* tmp_block);
853 unsigned
854 get_bit_block_COUNT (bm::word_t* dst_block, bm::word_t* tmp_block);
855 unsigned
856 get_bit_block_COUNT_AND(bm::word_t* dst_block, bm::word_t* tmp_block);
857 unsigned
858 get_bit_block_COUNT_OR(bm::word_t* dst_block, bm::word_t* tmp_block);
859 unsigned
860 get_bit_block_COUNT_XOR(bm::word_t* dst_block, bm::word_t* tmp_block);
861 unsigned
862 get_bit_block_COUNT_SUB_AB(bm::word_t* dst_block, bm::word_t* tmp_block);
863 unsigned
864 get_bit_block_COUNT_SUB_BA(bm::word_t* dst_block, bm::word_t* tmp_block);
865 unsigned
866 get_bit_block_COUNT_A(bm::word_t* dst_block, bm::word_t* tmp_block);
867 unsigned
869 {
870 return get_bit_block_COUNT(dst_block, tmp_block);
871 }
872
873 /// Get array of bits out of the decoder into bit block
874 /// (Converts inverted list into bits)
875 /// Returns number of words (bits) being read
876 unsigned get_arr_bit(bm::word_t* dst_block,
877 bool clear_target=true) BMNOEXCEPT;
878
879 /// Get current block type
880 unsigned get_block_type() const BMNOEXCEPT { return block_type_; }
881
882 unsigned get_bit() BMNOEXCEPT;
883
884 void get_inv_arr(bm::word_t* block) BMNOEXCEPT;
885
886 /// Try to skip if skip bookmark is available within reach
887 /// @return true if skip went well
888 ///
890 {
891 block_idx_type new_nb = parent_type::try_skip(decoder_, nb, expect_nb);
892 if (new_nb)
893 {
894 block_idx_ = new_nb; state_ = e_blocks;
895 }
896 return new_nb;
897 }
898
899protected:
900 get_bit_func_type bit_func_table_[bm::set_END];
901
902 unsigned char header_flag_;
903
908 unsigned id_cnt_; ///< Id counter for id list
909 bm::id_t last_id_; ///< Last id from the id list
910 gap_word_t glevels_[bm::gap_levels]; ///< GAP levels
911
912 unsigned block_type_; ///< current block type
913 block_idx_type block_idx_; ///< current block index
914 block_idx_type mono_block_cnt_; ///< number of 0 or 1 blocks
915
918};
919
920/**
921 Deserializer, performs logical operations between bit-vector and
922 serialized bit-vector. This utility class potentially provides faster
923 and/or more memory efficient operation than more conventional deserialization
924 into memory bvector and then logical operation
925
926 \ingroup bvserial
927*/
928template<typename BV>
930{
931public:
932 typedef BV bvector_type;
933 typedef typename BV::allocator_type allocator_type;
936
937public:
940
941 /*!
942 \brief Deserialize bvector using buffer as set operation argument
943
944 \param bv - target bvector
945 \param buf - serialized buffer used as as a logical operation argument
946 \param op - set algebra operation (default: OR)
947 \param exit_on_one - quick exit if set operation found some result
948
949 \return bitcount (for COUNT_* operations)
950 */
952 const unsigned char* buf,
953 set_operation op,
954 bool exit_on_one = false);
955
956 /*!
957 Deserialize range using mask vector (AND)
958 \param bv - target bvector (should be set ranged)
959 \param buf - serialized buffer pointer
960 \param idx_from - range of bits set for deserialization [from..to]
961 \param idx_to - range of bits [from..to]
962 */
964 const unsigned char* buf,
965 size_type idx_from,
966 size_type idx_to);
967
968
969
970
971 // -----------------------------------------------------------------
972 // obsolete methods (switch to new ones, without team_block)
973 //
974
975 /*!
976 \brief Obsolete! Deserialize bvector using buffer as set operation argument
977
978 \param bv - target bvector
979 \param buf - serialized buffer as a logical argument
980 \param op - set algebra operation (default: OR)
981 \param exit_on_one - quick exit if set operation found some result
982
983 \return bitcount (for COUNT_* operations)
984 */
986 const unsigned char* buf,
987 bm::word_t*, /*temp_block,*/
989 bool exit_on_one = false)
990 { return deserialize(bv, buf, op, exit_on_one); }
991
992 /*!
993 Deserialize range using mask vector (AND)
994 \param bv - target bvector (should be set ranged)
995 \param buf - serialized buffer pointer
996 \param idx_from - range of bits set for deserialization [from..to]
997 \param idx_to - range of bits [from..to]
998 */
1000 const unsigned char* buf,
1001 bm::word_t*, /* temp_block, */
1002 size_type idx_from,
1003 size_type idx_to)
1004 { deserialize_range(bv, buf, idx_from, idx_to); }
1005 // -----------------------------------------------------------------
1006
1007 /**
1008 Attach collection of reference vectors for XOR serialization
1009 (no transfer of ownership for the pointer)
1010 @internal
1011 */
1013 { ref_vect_ = ref_vect; }
1014
1015private:
1016 size_type deserialize_xor(bvector_type& bv,
1017 const unsigned char* buf,
1018 set_operation op,
1019 bool exit_on_one);
1020 static
1021 size_type deserialize_xor(bvector_type& bv,
1022 bvector_type& bv_tmp,
1023 set_operation op);
1024
1025 void deserialize_xor_range(bvector_type& bv,
1026 const unsigned char* buf,
1027 size_type idx_from,
1028 size_type idx_to);
1029
1030private:
1032 typedef
1033 typename BV::blocks_manager_type blocks_manager_type;
1034 typedef
1036 typedef
1038 typedef
1040 typedef
1042 typedef
1044
1045private:
1048
1049 /// default stream iterator (same endian)
1051 /// big-endian stream iterator
1053 /// little-endian stream iterator
1055
1057 deserializer_le de_le_;
1058 deserializer_be de_be_;
1059
1060 bvector_type bv_tmp_;
1061
1062
1063 // XOR compression related fields
1064 //
1065 const bv_ref_vector_type* ref_vect_; ///< xor ref.vector
1066
1067
1068};
1069
1070
1071
1072
1073//----------------------------------------------------------------------------
1074//
1075//----------------------------------------------------------------------------
1076
1077
1078/// \internal
1079/// \ingroup bvserial
1082 BM_HM_RESIZE = (1 << 1), ///< resized vector
1083 BM_HM_ID_LIST = (1 << 2), ///< id list stored
1084 BM_HM_NO_BO = (1 << 3), ///< no byte-order
1085 BM_HM_NO_GAPL = (1 << 4), ///< no GAP levels
1086 BM_HM_64_BIT = (1 << 5), ///< 64-bit vector
1087 BM_HM_HXOR = (1 << 6), ///< horizontal XOR compression turned ON
1088 BM_HM_SPARSE = (1 << 7) ///< very sparse vector
1090
1091
1092// --------------------------------------------------------------------
1093// Serialization stream encoding constants
1094//
1095
1096const unsigned char set_block_end = 0; //!< End of serialization
1097const unsigned char set_block_1zero = 1; //!< One all-zero block
1098const unsigned char set_block_1one = 2; //!< One block all-set (1111...)
1099const unsigned char set_block_8zero = 3; //!< Up to 256 zero blocks
1100const unsigned char set_block_8one = 4; //!< Up to 256 all-set blocks
1101const unsigned char set_block_16zero = 5; //!< Up to 65536 zero blocks
1102const unsigned char set_block_16one = 6; //!< UP to 65536 all-set blocks
1103const unsigned char set_block_32zero = 7; //!< Up to 4G zero blocks
1104const unsigned char set_block_32one = 8; //!< UP to 4G all-set blocks
1105const unsigned char set_block_azero = 9; //!< All other blocks zero
1106const unsigned char set_block_aone = 10; //!< All other blocks one
1107const unsigned char set_block_bit = 11; //!< Plain bit block
1108const unsigned char set_block_sgapbit = 12; //!< SGAP compressed bitblock
1109const unsigned char set_block_sgapgap = 13; //!< SGAP compressed GAP block
1110const unsigned char set_block_gap = 14; //!< Plain GAP block
1111const unsigned char set_block_gapbit = 15; //!< GAP compressed bitblock
1112const unsigned char set_block_arrbit = 16; //!< List of bits ON
1113const unsigned char set_block_bit_interval = 17; //!< Interval block
1114const unsigned char set_block_arrgap = 18; //!< List of bits ON (GAP block)
1115const unsigned char set_block_bit_1bit = 19; //!< Bit block with 1 bit ON
1116const unsigned char set_block_gap_egamma = 20; //!< Gamma compressed GAP block
1117const unsigned char set_block_arrgap_egamma = 21; //!< Gamma compressed delta GAP array
1118const unsigned char set_block_bit_0runs = 22; //!< Bit block with encoded zero intervals
1119const unsigned char set_block_arrgap_egamma_inv = 23; //!< Gamma compressed inverted delta GAP array
1120const unsigned char set_block_arrgap_inv = 24; //!< List of bits OFF (GAP block)
1121const unsigned char set_block_64zero = 25; //!< lots of zero blocks
1122const unsigned char set_block_64one = 26; //!< lots of all-set blocks
1123
1124const unsigned char set_block_gap_bienc = 27; //!< Interpolated GAP block (legacy)
1125const unsigned char set_block_arrgap_bienc = 28; //!< Interpolated GAP array
1126const unsigned char set_block_arrgap_bienc_inv = 29; //!< Interpolated GAP array (inverted)
1127const unsigned char set_block_arrbit_inv = 30; //!< List of bits OFF
1128const unsigned char set_block_arr_bienc = 31; //!< Interpolated block as int array
1129const unsigned char set_block_arr_bienc_inv = 32; //!< Interpolated inverted block int array
1130const unsigned char set_block_bitgap_bienc = 33; //!< Interpolated bit-block as GAPs
1131const unsigned char set_block_bit_digest0 = 34; //!< H-compression with digest mask
1132
1133const unsigned char set_block_ref_eq = 35; //!< block is a copy of a reference block
1134const unsigned char set_block_xor_ref8 = 36; //!< block is masked XOR of a reference block (8-bit)
1135const unsigned char set_block_xor_ref16 = 37; //!< block is masked XOR of a reference block (16-bit)
1136const unsigned char set_block_xor_ref32 = 38; //!< ..... 32-bit (should never happen)
1137const unsigned char set_block_xor_gap_ref8 = 39; //!< ..... 8-bit
1138const unsigned char set_block_xor_gap_ref16 = 40; //!< ..... 16-bit
1139const unsigned char set_block_xor_gap_ref32 = 41; //!< ..... 32-bit (should never happen)
1140const unsigned char set_block_xor_chain = 42; //!< XOR chain (composit of sub-blocks)
1141
1142const unsigned char set_block_gap_bienc_v2 = 43; //!< Interpolated GAP block (v2)
1143const unsigned char set_block_arrgap_bienc_v2 = 44; //!< //!< Interpolated GAP array (v2)
1144const unsigned char set_block_arrgap_bienc_inv_v2 = 45; //!< Interpolated GAP array (inverted)
1145const unsigned char set_block_bitgap_bienc_v2 = 46; //!< Interpolated bit-block as GAPs (v2 - reseved)
1146
1147const unsigned char set_nb_bookmark16 = 47; //!< jump ahead mark (16-bit)
1148const unsigned char set_nb_bookmark24 = 48; //!< jump ahead mark (24-bit)
1149const unsigned char set_nb_bookmark32 = 49; //!< jump ahead mark (32-bit)
1150const unsigned char set_nb_sync_mark8 = 50; //!< bookmark sync point (8-bits)
1151const unsigned char set_nb_sync_mark16 = 51;
1152const unsigned char set_nb_sync_mark24 = 52;
1153const unsigned char set_nb_sync_mark32 = 53;
1154const unsigned char set_nb_sync_mark48 = 54;
1155const unsigned char set_nb_sync_mark64 = 55; //!< ..... 64-bit (should never happen)
1156
1157const unsigned char set_sblock_bienc = 56; //!< super-block interpolated list
1158const unsigned char set_block_arr_bienc_8bh = 57; //!< BIC block 8bit header
1159
1160const unsigned char set_block_xor_ref8_um = 58; //!< block is un-masked XOR of a reference block (8-bit)
1161const unsigned char set_block_xor_ref16_um = 59; //!< block is un-masked XOR of a reference block (16-bit)
1162const unsigned char set_block_xor_ref32_um = 60; //!< ..... 32-bit (should never happen)
1163
1164
1165
1166const unsigned sparse_max_l5 = 48;
1167const unsigned sparse_max_l6 = 256;
1168
1169template<class BV>
1171 bm::word_t* temp_block)
1172: alloc_(alloc),
1173 compression_stat_(0),
1174 gap_serial_(false),
1175 byte_order_serial_(true),
1176 sb_bookmarks_(false),
1177 sb_range_(0),
1178 compression_level_(bm::set_compression_default),
1179 enc_header_pos_(0), header_flag_(0),
1180 ref_vect_(0),
1181 sim_model_(0),
1182 ref_idx_(0),
1183 xor_tmp_block_(0),
1184 sparse_cutoff_(sparse_max_l6)
1185{
1186 bit_idx_arr_.resize(bm::gap_max_bits);
1187 if (temp_block == 0)
1188 {
1189 temp_block_ = alloc_.alloc_bit_block();
1190 own_temp_block_ = true;
1191 }
1192 else
1193 {
1194 temp_block_ = temp_block;
1195 own_temp_block_ = false;
1196 }
1197 compression_stat_ = (size_type*) alloc_.alloc_bit_block();
1198 optimize_ = free_ = false;
1199 xor_tmp1_ = xor_tmp2_ = 0;
1200}
1201
1202template<class BV>
1204: alloc_(allocator_type()),
1205 compression_stat_(0),
1206 gap_serial_(false),
1207 byte_order_serial_(true),
1208 sb_bookmarks_(false),
1209 sb_range_(0),
1210 compression_level_(bm::set_compression_default),
1211 enc_header_pos_(0), header_flag_(0),
1212 ref_vect_(0),
1213 sim_model_(0),
1214 ref_idx_(0),
1215 xor_tmp_block_(0),
1216 sparse_cutoff_(sparse_max_l6)
1217{
1218 bit_idx_arr_.resize(bm::gap_max_bits);
1219 if (temp_block == 0)
1220 {
1221 temp_block_ = alloc_.alloc_bit_block();
1222 own_temp_block_ = true;
1223 }
1224 else
1225 {
1226 temp_block_ = temp_block;
1227 own_temp_block_ = false;
1228 }
1229 compression_stat_ = (size_type*) alloc_.alloc_bit_block();
1230 optimize_ = free_ = false;
1231 xor_tmp1_ = xor_tmp2_ = 0;
1232}
1233
1234template<class BV>
1236{
1237 if (own_temp_block_)
1238 alloc_.free_bit_block(temp_block_);
1239 if (compression_stat_)
1240 alloc_.free_bit_block((bm::word_t*)compression_stat_);
1241 if (xor_tmp_block_)
1242 alloc_.free_bit_block(xor_tmp_block_, 3);
1243}
1244
1245
1246template<class BV>
1248{
1249 for (unsigned i = 0; i < 256; ++i)
1250 compression_stat_[i] = 0;
1251}
1252
1253template<class BV>
1255{
1256 if (clevel <= bm::set_compression_max)
1257 compression_level_ = clevel;
1258 if (compression_level_ == 5)
1259 sparse_cutoff_ = sparse_max_l5;
1260 else if (compression_level_ == 6)
1261 sparse_cutoff_ = sparse_max_l6;
1262}
1263
1264template<class BV>
1266{
1267 BM_ASSERT(cutoff <= sparse_max_l6);
1268 if (cutoff <= sparse_max_l6)
1269 sparse_cutoff_ = cutoff;
1270 else
1271 sparse_cutoff_ = sparse_max_l6;
1272}
1273
1274template<class BV>
1276{
1277 gap_serial_ = value;
1278}
1279
1280template<class BV>
1282{
1283 byte_order_serial_ = value;
1284}
1285
1286template<class BV>
1287void serializer<BV>::set_bookmarks(bool enable, unsigned bm_interval) BMNOEXCEPT
1288{
1289 sb_bookmarks_ = enable;
1290 if (enable)
1291 {
1292 if (bm_interval > 512)
1293 bm_interval = 512;
1294 else
1295 if (bm_interval < 4)
1296 bm_interval = 4;
1297 }
1298 sb_range_ = bm_interval;
1299}
1300
1301template<class BV>
1303{
1304 ref_vect_ = ref_vect;
1305 sim_model_ = 0;
1306 xor_scan_.set_ref_vector(ref_vect);
1307 if (!xor_tmp_block_ && ref_vect)
1308 {
1309 xor_tmp_block_ = alloc_.alloc_bit_block(3);
1310 xor_tmp1_ = &xor_tmp_block_[bm::set_block_size];
1311 xor_tmp2_ = &xor_tmp_block_[bm::set_block_size*2];
1312 }
1313}
1314
1315template<class BV>
1317 const bv_ref_vector_type& ref_vect,
1318 const bm::xor_sim_params& params)
1319{
1320 return xor_scan_.compute_sim_model(sim_model, ref_vect, params);
1321}
1322
1323template<class BV>
1325{
1326 sim_model_ = sim_model;
1327}
1328
1329template<class BV>
1331{
1332 ref_idx_ = ref_idx;
1333}
1334
1335template<class BV>
1337{
1338 const blocks_manager_type& bman = bv.get_blocks_manager();
1339
1340 header_flag_ = 0;
1341 if (bv.size() == bm::id_max) // no dynamic resize
1342 header_flag_ |= BM_HM_DEFAULT;
1343 else
1344 header_flag_ |= BM_HM_RESIZE;
1345
1346 if (!byte_order_serial_)
1347 header_flag_ |= BM_HM_NO_BO;
1348
1349 if (!gap_serial_)
1350 header_flag_ |= BM_HM_NO_GAPL;
1351
1352 #ifdef BM64ADDR
1353 header_flag_ |= BM_HM_64_BIT;
1354 #endif
1355
1356 if (ref_vect_)
1357 {
1358 // TODO: check if XOR search found anything at all
1359 header_flag_ |= BM_HM_HXOR; // XOR compression turned ON
1360 }
1361
1362 enc_header_pos_ = enc.get_pos();
1363 enc.put_8(header_flag_);
1364
1365 if (byte_order_serial_)
1366 {
1368 enc.put_8((unsigned char)bo);
1369 }
1370 // keep GAP levels information
1371 if (gap_serial_)
1372 {
1373 enc.put_16(bman.glen(), bm::gap_levels);
1374 }
1375
1376 // save size (only if bvector has been down-sized)
1377 if (header_flag_ & BM_HM_RESIZE)
1378 {
1379 #ifdef BM64ADDR
1380 enc.put_64(bv.size());
1381 #else
1382 enc.put_32(bv.size());
1383 #endif
1384 }
1385}
1386
1387template<class BV>
1389 const bm::gap_word_t* gap_block, bm::encoder& enc) BMNOEXCEPT
1390{
1391 unsigned len = bm::gap_length(gap_block);
1392 if (len > 4) // BIC encoding
1393 {
1394 encoder::position_type enc_pos0 = enc.get_pos();
1395 BM_ASSERT(gap_block[len-1] == 65535);
1396
1397 bm::gap_word_t head = gap_block[0];
1398 head &= bm::gap_word_t(~(3 << 1)); // clear the level flags
1399 bm::gap_word_t min_v = gap_block[1];
1400 bm::gap_word_t max_v = gap_block[len-2];
1401 bm::gap_word_t tail_delta = bm::gap_word_t(65535 - max_v);
1402
1403 if (min_v < 256)
1404 head |= (1 << 1);
1405 if (tail_delta < 256)
1406 head |= (1 << 2);
1407
1408 enc.put_8(bm::set_block_gap_bienc_v2);
1409 enc.put_16(head);
1410 if (min_v < 256)
1411 enc.put_8((unsigned char)min_v);
1412 else
1413 enc.put_16(min_v);
1414
1415 if (tail_delta < 256)
1416 enc.put_8((unsigned char)tail_delta);
1417 else
1418 enc.put_16(tail_delta);
1419
1420 bit_out_type bout(enc);
1421 bout.bic_encode_u16(&gap_block[2], len-4, min_v, max_v);
1422 bout.flush();
1423
1424 // re-evaluate coding efficiency
1425 //
1426 encoder::position_type enc_pos1 = enc.get_pos();
1427 unsigned gamma_size = (unsigned)(enc_pos1 - enc_pos0);
1428 if (gamma_size > (len-1)*sizeof(gap_word_t))
1429 {
1430 enc.set_pos(enc_pos0);
1431 }
1432 else
1433 {
1434 compression_stat_[bm::set_block_gap_bienc]++;
1435 return;
1436 }
1437 }
1438
1439 // save as plain GAP block
1440 enc.put_8(bm::set_block_gap);
1441 enc.put_16(gap_block, len-1);
1442
1443 compression_stat_[bm::set_block_gap]++;
1444}
1445
1446
1447template<class BV>
1450{
1451 unsigned len = gap_length(gap_block);
1452 if (len > 3 && (compression_level_ > 3)) // Use Elias Gamma encoding
1453 {
1454 encoder::position_type enc_pos0 = enc.get_pos();
1455 {
1456 bit_out_type bout(enc);
1457 gamma_encoder_func gamma(bout);
1458
1459 enc.put_8(bm::set_block_gap_egamma);
1460 enc.put_16(gap_block[0]);
1461
1462 for_each_dgap(gap_block, gamma);
1463 }
1464 // re-evaluate coding efficiency
1465 //
1466 encoder::position_type enc_pos1 = enc.get_pos();
1467 unsigned gamma_size = (unsigned)(enc_pos1 - enc_pos0);
1468 if (gamma_size > (len-1)*sizeof(gap_word_t))
1469 {
1470 enc.set_pos(enc_pos0);
1471 }
1472 else
1473 {
1474 compression_stat_[bm::set_block_gap_egamma]++;
1475 return;
1476 }
1477 }
1478
1479 // save as plain GAP block
1480 enc.put_8(bm::set_block_gap);
1481 enc.put_16(gap_block, len-1);
1482
1483 compression_stat_[bm::set_block_gap]++;
1484}
1485
1486template<class BV>
1488 unsigned arr_len,
1489 bm::encoder& enc,
1490 bool inverted) BMNOEXCEPT
1491{
1492 unsigned char scode = inverted ? bm::set_block_arrgap_egamma_inv
1494 if (compression_level_ > 3 && arr_len > 1)
1495 {
1496 encoder::position_type enc_pos0 = enc.get_pos();
1497 {
1498 bit_out_type bout(enc);
1499 enc.put_8(scode);
1500 bout.gamma(arr_len);
1501 gap_word_t prev = gap_array[0];
1502 bout.gamma(prev + 1);
1503
1504 for (unsigned i = 1; i < arr_len; ++i)
1505 {
1506 gap_word_t curr = gap_array[i];
1507 bout.gamma(curr - prev);
1508 prev = curr;
1509 }
1510 }
1511 encoder::position_type enc_pos1 = enc.get_pos();
1512 unsigned gamma_size = (unsigned)(enc_pos1 - enc_pos0);
1513 unsigned plain_size = (unsigned)(sizeof(gap_word_t)+arr_len*sizeof(gap_word_t));
1514 if (gamma_size >= plain_size)
1515 {
1516 enc.set_pos(enc_pos0); // rollback the bit stream
1517 }
1518 else
1519 {
1520 compression_stat_[scode]++;
1521 return;
1522 }
1523 }
1524 // save as a plain array
1526 enc.put_prefixed_array_16(scode, gap_array, arr_len, true);
1527 compression_stat_[scode]++;
1528}
1529
1530
1531template<class BV>
1533 const bm::gap_word_t* gap_block,
1534 unsigned arr_len,
1535 bm::encoder& enc,
1536 bool inverted) BMNOEXCEPT
1537{
1538 BM_ASSERT(arr_len <= 65535);
1539 unsigned char scode = inverted ? bm::set_block_arrgap_bienc_inv
1541 if (arr_len > 4)
1542 {
1543 encoder::position_type enc_pos0 = enc.get_pos();
1544 {
1545 bit_out_type bout(enc);
1546
1547 bm::gap_word_t min_v = gap_block[0];
1548 bm::gap_word_t max_v = gap_block[arr_len-1];
1549 BM_ASSERT(max_v > min_v);
1550
1551 enc.put_8(scode);
1552 enc.put_16(min_v);
1553 enc.put_16(max_v);
1554
1555 bout.gamma(arr_len-4);
1556 bout.bic_encode_u16(&gap_block[1], arr_len-2, min_v, max_v);
1557 bout.flush();
1558 }
1559 encoder::position_type enc_pos1 = enc.get_pos();
1560 unsigned enc_size = (unsigned)(enc_pos1 - enc_pos0);
1561 unsigned raw_size = (unsigned)(sizeof(gap_word_t)+arr_len*sizeof(gap_word_t));
1562 if (enc_size >= raw_size)
1563 {
1564 enc.set_pos(enc_pos0); // rollback the bit stream
1565 }
1566 else
1567 {
1568 compression_stat_[scode]++;
1569 return;
1570 }
1571 }
1572 // save as a plain array
1574 enc.put_prefixed_array_16(scode, gap_block, arr_len, true);
1575 compression_stat_[scode]++;
1576}
1577
1578
1579template<class BV>
1581 unsigned arr_len,
1582 bm::encoder& enc,
1583 bool inverted) BMNOEXCEPT
1584{
1585 BM_ASSERT(arr_len <= 65535);
1586
1587 unsigned char scode = inverted ? bm::set_block_arrgap_bienc_inv_v2
1589 if (arr_len > 4)
1590 {
1591 bm::gap_word_t min_v = gap_block[0];
1592 bm::gap_word_t max_v = gap_block[arr_len-1];
1593 bm::gap_word_t tail = bm::gap_word_t(max_v - min_v);
1594
1595 if (min_v >= 256 && tail >= 256)// || arr_len > 128)
1596 {
1597 interpolated_gap_array_v0(gap_block, arr_len, enc, inverted);
1598 return;
1599 }
1600
1601 BM_ASSERT(arr_len < 16383);
1602 encoder::position_type enc_pos0 = enc.get_pos();
1603 {
1604 bit_out_type bout(enc);
1605
1606 BM_ASSERT(max_v > min_v);
1607
1608 enc.put_8(scode);
1609
1610 BM_ASSERT((arr_len & (3 << 14)) == 0);
1611 arr_len <<= 2;
1612 if (min_v < 256)
1613 arr_len |= 1;
1614 if (tail < 256)
1615 arr_len |= (1 << 1);
1616
1617 enc.put_16(bm::gap_word_t(arr_len));
1618 if (min_v < 256)
1619 enc.put_8((unsigned char)min_v);
1620 else
1621 enc.put_16(min_v);
1622
1623 if (tail < 256)
1624 enc.put_8((unsigned char)tail);
1625 else
1626 enc.put_16(tail);
1627
1628 arr_len >>= 2;
1629
1630 bout.bic_encode_u16(&gap_block[1], arr_len-2, min_v, max_v);
1631 bout.flush();
1632 }
1633 encoder::position_type enc_pos1 = enc.get_pos();
1634 unsigned enc_size = (unsigned)(enc_pos1 - enc_pos0);
1635 unsigned raw_size = (unsigned)(sizeof(gap_word_t)+arr_len*sizeof(gap_word_t));
1636 if (enc_size >= raw_size)
1637 {
1638 enc.set_pos(enc_pos0); // rollback the bit stream
1639 }
1640 else
1641 {
1642 compression_stat_[scode]++;
1643 return;
1644 }
1645 }
1646 // save as a plain array
1648 enc.put_prefixed_array_16(scode, gap_block, arr_len, true);
1649 compression_stat_[scode]++;
1650}
1651
1652
1653
1654template<class BV>
1655void serializer<BV>::add_model(unsigned char mod, unsigned score) BMNOEXCEPT
1656{
1657 BM_ASSERT(mod_size_ < 64); // too many models (memory corruption?)
1658 scores_[mod_size_] = score; models_[mod_size_] = mod;
1659 ++mod_size_;
1660}
1661
1662template<class BV>
1663unsigned char
1665{
1666 const float bie_bits_per_int = compression_level_ < 6 ? 3.75f : 2.5f;
1667 const unsigned bie_limit = unsigned(float(bm::gap_max_bits) / bie_bits_per_int);
1668
1669 unsigned bc, ibc, gc;
1670
1671 add_model(bm::set_block_bit, bm::gap_max_bits); // default model (bit-block)
1672
1673 bit_model_0run_size_ = bm::bit_count_nonzero_size(block, bm::set_block_size);
1674 add_model(bm::set_block_bit_0runs, bit_model_0run_size_ * 8);
1675
1676 bm::id64_t d0 = digest0_ = bm::calc_block_digest0(block);
1677 if (!d0)
1678 return bm::set_block_azero;
1679
1680 unsigned d0_bc = word_bitcount64(d0);
1681 bit_model_d0_size_ = unsigned(8 + (32 * d0_bc * sizeof(bm::word_t)));
1682 if (d0 != ~0ull)
1683 add_model(bm::set_block_bit_digest0, bit_model_d0_size_ * 8);
1684
1685 bm::bit_block_change_bc(block, &gc, &bc);
1686 ibc = bm::gap_max_bits - bc;
1687 if (bc == 1)
1689 if (!ibc)
1690 return bm::set_block_aone;
1691 {
1692 unsigned arr_size =
1693 unsigned(sizeof(gap_word_t) + (bc * sizeof(gap_word_t)));
1694 unsigned arr_size_inv =
1695 unsigned(sizeof(gap_word_t) + (ibc * sizeof(gap_word_t)));
1696
1697 add_model(bm::set_block_arrbit, arr_size * 8);
1698 add_model(bm::set_block_arrbit_inv, arr_size_inv * 8);
1699 }
1700
1701 float gcf=float(gc);
1702
1703 if (gc > 3 && gc < bm::gap_max_buff_len)
1704 add_model(bm::set_block_gap_bienc,
1705 32 + unsigned((gcf-1) * bie_bits_per_int));
1706
1707
1708 float bcf=float(bc), ibcf=float(ibc);
1709
1710 if (bc < bie_limit)
1711 add_model(bm::set_block_arr_bienc, 16 * 3 + unsigned(bcf * bie_bits_per_int));
1712 else
1713 {
1714 if (ibc < bie_limit)
1716 16 * 3 + unsigned(ibcf * bie_bits_per_int));
1717 }
1718
1719 gc -= gc > 2 ? 2 : 0;
1720 gcf = float(gc);
1721 if (gc < bm::gap_max_buff_len) // GAP block
1722 {
1724 16 * 4 + unsigned(gcf * bie_bits_per_int));
1725 }
1726 else
1727 {
1728 if (gc < bie_limit)
1730 16 * 4 + unsigned(gcf * bie_bits_per_int));
1731 }
1732
1733 // find the best representation based on computed approx.models
1734 //
1735 unsigned min_score = bm::gap_max_bits;
1736 unsigned char model = bm::set_block_bit;
1737 for (unsigned i = 0; i < mod_size_; ++i)
1738 {
1739 if (scores_[i] < min_score)
1740 {
1741 min_score = scores_[i];
1742 model = models_[i];
1743 }
1744 }
1745#if 0
1746 if (model == set_block_bit_0runs)
1747 {
1748 std::cout << " 0runs=" << (bit_model_0run_size_ * 8) << std::endl;
1749 std::cout << " GAP BIC=" << (16 * 4 + unsigned(gcf * bie_bits_per_int)) << std::endl;
1750 std::cout << " ARR BIC=" << (16 * 3 + unsigned(bcf * bie_bits_per_int)) << std::endl;
1751 std::cout << "BC,GC=[" << bc <<", " << gc << "]" << std::endl;
1752 std::cout << "bie_limit=" << bie_limit << std::endl;
1753
1754 }
1755 switch(model)
1756 {
1757 case set_block_bit: std::cout << "BIT=" << "[" << bc <<", " << gc << "]"; break;
1758 case set_block_bit_1bit: std::cout << "1bit "; break;
1759 case set_block_arrgap: std::cout << "arr_gap "; break;
1760 case set_block_arrgap_egamma: std::cout << "arrgap_egamma "; break;
1761 case set_block_arrgap_bienc: std::cout << "arrgap_BIC "; break;
1762 case set_block_arrgap_inv: std::cout << "arrgap_INV "; break;
1763 case set_block_arrgap_egamma_inv: std::cout << "arrgap_egamma_INV "; break;
1764 case set_block_arrgap_bienc_inv: std::cout << "arrgap_BIC_INV "; break;
1765 case set_block_gap_egamma: std::cout << "gap_egamma "; break;
1766 case set_block_gap_bienc: std::cout << "gap_BIC "; break;
1767 case set_block_bitgap_bienc: std::cout << "bitgap_BIC "; break;
1768 case set_block_bit_digest0: std::cout << "D0 "; break;
1769 case set_block_bit_0runs: std::cout << "0runs=[" << bc <<", " << gc << " lmt=" << bie_limit << "]"; break;
1770 case set_block_arr_bienc_inv: std::cout << "arr_BIC_INV "; break;
1771 case set_block_arr_bienc: std::cout << "arr_BIC "; break;
1772 default: std::cout << "UNK=" << int(model); break;
1773 }
1774#endif
1775 return model;
1776}
1777
1778template<class BV>
1779unsigned char
1781{
1782 // experimental code
1783#if 0
1784 {
1785 const float bie_bits_per_int = compression_level_ < 6 ? 3.75f : 2.5f;
1786
1787 unsigned wave_matches[e_bit_end] = {0, };
1788
1790 unsigned s_gc, s_bc;
1791 bm::compute_s_block_descr(block, x_descr, &s_gc, &s_bc);
1792 const unsigned wave_max_bits = bm::set_block_digest_wave_size * 32;
1793
1794 for (unsigned i = 0; i < bm::block_waves; ++i)
1795 {
1796 s_gc = x_descr.sb_gc[i];
1797 s_bc = x_descr.sb_bc[i];
1798 unsigned curr_best;
1799 bm::bit_representation best_rep =
1800 bm::best_representation(s_bc, s_gc, wave_max_bits, bie_bits_per_int, &curr_best);
1801 wave_matches[best_rep]++;
1802 } // for
1803 unsigned sum_cases = 0;
1804 bool sub_diff = false;
1805 unsigned v_count = 0;
1806 for (unsigned i = 0; i < e_bit_end; ++i)
1807 {
1808 sum_cases += wave_matches[i];
1809 if (wave_matches[i] != 0 && wave_matches[i] < 64)
1810 {
1811 sub_diff = true; v_count++;
1812 }
1813 }
1814 if (sub_diff)
1815 {
1816 std::cout << "-" << v_count;
1817 if (v_count > 2)
1818 {
1819 for (unsigned i=0; i < e_bit_end; ++i)
1820 {
1821 if (wave_matches[i])
1822 {
1823 switch (i)
1824 {
1825 case e_bit_GAP: std::cout << " G" << wave_matches[i]; break;
1826 case e_bit_INT: std::cout << " I" << wave_matches[i]; break;
1827 case e_bit_IINT: std::cout << "iI" << wave_matches[i]; break;
1828 case e_bit_1: std::cout << " 1s" << wave_matches[i]; break;
1829 case e_bit_0: std::cout << " 0s" << wave_matches[i]; break;
1830 case e_bit_bit: std::cout << " B" << wave_matches[i]; break;
1831 }
1832 }
1833 } // for
1834 std::cout << " |";
1835 }
1836 }
1837 else
1838 {
1839 //std::cout << "=";
1840 }
1841 BM_ASSERT(sum_cases == 64);
1842 }
1843#endif
1844 reset_models();
1845
1846 if (compression_level_ >= 5)
1847 return find_bit_best_encoding_l5(block);
1848
1849 unsigned bc, bit_gaps;
1850
1851 // heuristics and hard-coded rules to determine
1852 // the best representation for bit-block
1853 //
1854 add_model(bm::set_block_bit, bm::gap_max_bits); // default model (bit-block)
1855
1856 if (compression_level_ <= 1)
1857 return bm::set_block_bit;
1858
1859 // check if it is a very sparse block with some areas of dense areas
1860 bit_model_0run_size_ = bm::bit_count_nonzero_size(block, bm::set_block_size);
1861 if (compression_level_ <= 5)
1862 add_model(bm::set_block_bit_0runs, bit_model_0run_size_ * 8);
1863
1864 if (compression_level_ >= 2)
1865 {
1866 bm::id64_t d0 = digest0_ = bm::calc_block_digest0(block);
1867 if (!d0)
1868 {
1869 add_model(bm::set_block_azero, 0);
1870 return bm::set_block_azero;
1871 }
1872 unsigned d0_bc = word_bitcount64(d0);
1873 bit_model_d0_size_ = unsigned(8 + (32 * d0_bc * sizeof(bm::word_t)));
1874 if (d0 != ~0ull)
1875 add_model(bm::set_block_bit_digest0, bit_model_d0_size_ * 8);
1876
1877 if (compression_level_ >= 4)
1878 {
1879 bm::bit_block_change_bc(block, &bit_gaps, &bc);
1880 }
1881 else
1882 {
1883 bc = bm::bit_block_count(block);
1884 bit_gaps = 65535;
1885 }
1886 BM_ASSERT(bc);
1887
1888 if (bc == 1)
1889 {
1890 add_model(bm::set_block_bit_1bit, 16);
1892 }
1893 unsigned inverted_bc = bm::gap_max_bits - bc;
1894 if (!inverted_bc)
1895 {
1896 add_model(bm::set_block_aone, 0);
1897 return bm::set_block_aone;
1898 }
1899
1900 if (compression_level_ >= 3)
1901 {
1902 unsigned arr_size =
1903 unsigned(sizeof(gap_word_t) + (bc * sizeof(gap_word_t)));
1904 unsigned arr_size_inv =
1905 unsigned(sizeof(gap_word_t) + (inverted_bc * sizeof(gap_word_t)));
1906
1907 add_model(bm::set_block_arrbit, arr_size*8);
1908 add_model(bm::set_block_arrbit_inv, arr_size_inv*8);
1909
1910 if (compression_level_ >= 4)
1911 {
1912 const unsigned gamma_bits_per_int = 6;
1913 //unsigned bit_gaps = bm::bit_block_calc_change(block);
1914
1915 if (compression_level_ == 4)
1916 {
1917 if (bit_gaps > 3 && bit_gaps < bm::gap_max_buff_len)
1918 add_model(bm::set_block_gap_egamma,
1919 16 + (bit_gaps-1) * gamma_bits_per_int);
1920 if (bc < bit_gaps && bc < bm::gap_equiv_len)
1922 16 + bc * gamma_bits_per_int);
1923 if (inverted_bc > 3 && inverted_bc < bit_gaps && inverted_bc < bm::gap_equiv_len)
1925 16 + inverted_bc * gamma_bits_per_int);
1926 }
1927 } // level >= 3
1928 } // level >= 3
1929 } // level >= 2
1930
1931 // find the best representation based on computed approx.models
1932 //
1933 unsigned min_score = bm::gap_max_bits;
1934 unsigned char model = bm::set_block_bit;
1935 for (unsigned i = 0; i < mod_size_; ++i)
1936 {
1937 if (scores_[i] < min_score)
1938 {
1939 min_score = scores_[i];
1940 model = models_[i];
1941 }
1942 }
1943 return model;
1944}
1945
1946template<class BV>
1947unsigned char
1949{
1950 // heuristics and hard-coded rules to determine
1951 // the best representation for d-GAP block
1952 //
1953 if (compression_level_ <= 2)
1954 return bm::set_block_gap;
1955
1956 unsigned len = bm::gap_length(gap_block);
1957 if (len == 2)
1958 return bm::set_block_gap;
1959
1960 unsigned bc = bm::gap_bit_count_unr(gap_block);
1961 unsigned ibc = bm::gap_max_bits - bc;
1962
1963 if (bc == 1)
1965
1966 bc += 2; ibc += 2; // correct counts because effective GAP len = len - 2
1967 if (bc < len)
1968 {
1969 if (compression_level_ < 4 || len < 6)
1970 return bm::set_block_arrgap;
1971
1972 if (compression_level_ == 4)
1974
1976 }
1977 if (ibc < len)
1978 {
1979 if (compression_level_ < 4 || len < 6)
1981
1982 if (compression_level_ == 4)
1985 }
1986 if (len < 6)
1987 return bm::set_block_gap;
1988
1989 if (compression_level_ == 4)
1991
1993}
1994
1995
1996
1997
1998template<class BV>
2000{
2001 gap_word_t* gap_temp_block = (gap_word_t*) temp_block_;
2002
2003 gap_word_t arr_len;
2004 bool invert = false;
2005
2006 unsigned char enc_choice = find_gap_best_encoding(gap_block);
2007 switch (enc_choice)
2008 {
2009 case bm::set_block_gap:
2010 gamma_gap_block(gap_block, enc); // TODO: use plain encode (non-gamma)
2011 break;
2012
2014 arr_len = bm::gap_convert_to_arr(gap_temp_block,
2015 gap_block,
2017 BM_ASSERT(arr_len == 1);
2019 enc.put_16(gap_temp_block[0]);
2020 compression_stat_[bm::set_block_bit_1bit]++;
2021 break;
2024 invert = true;
2026 // fall through
2029 // fall through
2031 arr_len = gap_convert_to_arr(gap_temp_block,
2032 gap_block,
2034 invert);
2035 BM_ASSERT(arr_len);
2036 gamma_gap_array(gap_temp_block, arr_len, enc, invert);
2037 break;
2039 interpolated_encode_gap_block(gap_block, enc);
2040 break;
2042 invert = true;
2044 // fall through
2046 arr_len = gap_convert_to_arr(gap_temp_block,
2047 gap_block,
2049 invert);
2050 BM_ASSERT(arr_len);
2051 interpolated_gap_array(gap_temp_block, arr_len, enc, invert);
2052 break;
2053 default:
2054 gamma_gap_block(gap_block, enc);
2055 } // switch
2056}
2057
2058template<class BV>
2060 bm::encoder& enc,
2061 unsigned //size_control
2062 ) BMNOEXCEPT
2063{
2064 enc.put_8(bm::set_block_bit_0runs);
2065 enc.put_8((blk[0]==0) ? 0 : 1); // encode start
2066
2067 unsigned i, j;
2068 for (i = 0; i < bm::set_block_size; ++i)
2069 {
2070 if (blk[i] == 0)
2071 {
2072 // scan fwd to find 0 island length
2073 for (j = i+1; j < bm::set_block_size; ++j)
2074 {
2075 if (blk[j] != 0)
2076 break;
2077 }
2078 BM_ASSERT(j-i);
2079 enc.put_16((gap_word_t)(j-i));
2080 i = j - 1;
2081 }
2082 else
2083 {
2084 // scan fwd to find non-0 island length
2085 for (j = i+1; j < bm::set_block_size; ++j)
2086 {
2087 if (blk[j] == 0)
2088 {
2089 // look ahead to identify and ignore short 0-run
2090 if (((j+1 < bm::set_block_size) && blk[j+1]) ||
2091 ((j+2 < bm::set_block_size) && blk[j+2]))
2092 {
2093 ++j; // skip zero word
2094 continue;
2095 }
2096 break;
2097 }
2098 }
2099 BM_ASSERT(j-i);
2100 enc.put_16((gap_word_t)(j-i));
2101 enc.put_32(blk + i, j - i); // stream all bit-words now
2102
2103 i = j - 1;
2104 }
2105 }
2106 compression_stat_[bm::set_block_bit_0runs]++;
2107}
2108
2109
2110template<class BV>
2112 bm::encoder& enc,
2114{
2115 // evaluate a few "sure" models here and pick the best
2116 //
2117 if (d0 != ~0ull)
2118 {
2119 if (bit_model_0run_size_ < bit_model_d0_size_)
2120 {
2121 encode_bit_interval(block, enc, 0); // TODO: get rid of param 3 (0)
2122 return;
2123 }
2124
2125 // encode using digest0 method
2126 //
2127 enc.put_8(bm::set_block_bit_digest0);
2128 enc.put_64(d0);
2129 while (d0)
2130 {
2131 bm::id64_t t = bm::bmi_blsi_u64(d0); // d & -d;
2132
2133 unsigned wave = bm::word_bitcount64(t - 1);
2134 unsigned off = wave * bm::set_block_digest_wave_size;
2135 unsigned j = 0;
2136 do
2137 {
2138 enc.put_32(block[off+j+0]);
2139 enc.put_32(block[off+j+1]);
2140 enc.put_32(block[off+j+2]);
2141 enc.put_32(block[off+j+3]);
2142 j += 4;
2143 } while (j < bm::set_block_digest_wave_size);
2144
2145 d0 = bm::bmi_bslr_u64(d0); // d &= d - 1;
2146 } // while
2147
2148 compression_stat_[bm::set_block_bit_digest0]++;
2149 }
2150 else
2151 {
2152 if (bit_model_0run_size_ < unsigned(bm::set_block_size*sizeof(bm::word_t)))
2153 {
2154 encode_bit_interval(block, enc, 0); // TODO: get rid of param 3 (0)
2155 return;
2156 }
2157
2158 enc.put_prefixed_array_32(bm::set_block_bit, block, bm::set_block_size);
2159 compression_stat_[bm::set_block_bit]++;
2160 }
2161}
2162
2163template<class BV>
2165 const block_match_chain_type& mchain) BMNOEXCEPT
2166{
2167 size_type chain_size = (size_type)mchain.chain_size;
2168 BM_ASSERT(chain_size);
2169
2170 unsigned char vbr_flag = bm::check_pair_vect_vbr(mchain, *ref_vect_);
2171
2172 enc.put_8(bm::set_block_xor_chain);
2173 enc.put_8(vbr_flag); // flag (reserved)
2174
2175 size_type ridx = mchain.ref_idx[0];
2176 bm::id64_t d64 = mchain.xor_d64[0];
2177 ridx = ref_vect_->get_row_idx(ridx);
2178
2179 switch(vbr_flag)
2180 {
2181 case 1: enc.put_8((unsigned char)ridx); break;
2182 case 2: enc.put_16((unsigned short)ridx); break;
2183 case 0: enc.put_32((unsigned)ridx); break;
2184 default: BM_ASSERT(0); break;
2185 } // switch
2186 enc.put_h64(d64);
2187 enc.put_8((unsigned char) (chain_size-1));
2188
2189 for (unsigned ci = 1; ci < chain_size; ++ci)
2190 {
2191 ridx = mchain.ref_idx[ci];
2192 d64 = mchain.xor_d64[ci];
2193 ridx = ref_vect_->get_row_idx(ridx);
2194 switch(vbr_flag)
2195 {
2196 case 1: enc.put_8((unsigned char)ridx); break;
2197 case 2: enc.put_16((unsigned short)ridx); break;
2198 case 0: enc.put_32((unsigned)ridx); break;
2199 default: BM_ASSERT(0); break;
2200 } // switch
2201 enc.put_h64(d64);
2202 } // for ci
2203 compression_stat_[bm::set_block_xor_chain]++;
2204}
2205
2206
2207
2208template<class BV>
2210 const bm::word_t* s_block,
2211 const block_match_chain_type& mchain,
2212 unsigned i, unsigned j) BMNOEXCEPT
2213{
2214 if (BM_IS_GAP(s_block))
2215 {
2216 bm::gap_convert_to_bitset(xor_tmp1_, BMGAP_PTR(s_block));
2217 s_block = xor_tmp1_;
2218 }
2219 size_type ridx = mchain.ref_idx[0];
2220 const bm::word_t* ref_block = xor_scan_.get_ref_block(ridx, i, j);
2221 if (BM_IS_GAP(ref_block))
2222 {
2223 bm::gap_convert_to_bitset(xor_tmp2_, BMGAP_PTR(ref_block));
2224 ref_block = xor_tmp2_;
2225 }
2226 bm::id64_t d64 = mchain.xor_d64[0];
2227 bm::bit_block_xor(xor_tmp_block_, s_block, ref_block, d64);
2228 for (unsigned k = 1; k < mchain.chain_size; ++k)
2229 {
2230 ridx = mchain.ref_idx[k];
2231 ref_block = xor_scan_.get_ref_block(ridx, i, j);
2232 if (BM_IS_GAP(ref_block))
2233 {
2234 bm::gap_convert_to_bitset(xor_tmp2_, BMGAP_PTR(ref_block));
2235 ref_block = xor_tmp2_;
2236 }
2237 d64 = mchain.xor_d64[k];
2238 bm::bit_block_xor(xor_tmp_block_, ref_block, d64);
2239 } // for i
2240}
2241
2242
2243template<class BV>
2245 typename serializer<BV>::buffer& buf,
2246 const statistics_type* bv_stat)
2247{
2248 statistics_type stat;
2249 if (!bv_stat)
2250 {
2251 bv.calc_stat(&stat);
2252 bv_stat = &stat;
2253 }
2254
2255 buf.resize(bv_stat->max_serialize_mem, false); // no-copy resize
2256 optimize_ = free_ = false;
2257
2258 unsigned char* data_buf = buf.data();
2259 size_t buf_size = buf.size();
2260 size_type slen = this->serialize(bv, data_buf, buf_size);
2261 BM_ASSERT(slen <= buf.size()); // or we have a BIG problem with prediction
2262 BM_ASSERT(slen);
2263
2264 buf.resize(slen);
2265}
2266
2267template<class BV>
2269 typename serializer<BV>::buffer& buf)
2270{
2271 statistics_type st;
2272 optimize_ = true;
2273 if (!ref_vect_) // do not use block-free if XOR compression is setup
2274 free_ = true; // set the destructive mode
2275
2276 typename bvector_type::mem_pool_guard mp_g_z;
2277 mp_g_z.assign_if_not_set(pool_, bv);
2278
2279 bv.optimize(temp_block_, BV::opt_compress, &st);
2280 serialize(bv, buf, &st);
2281
2282 optimize_ = free_ = false; // restore the default mode
2283}
2284
2285template<class BV>
2287 bm::encoder& enc,
2288 bool inverted) BMNOEXCEPT
2289{
2290 unsigned arr_len =
2291 bm::bit_block_convert_to_arr(bit_idx_arr_.data(), block, inverted);
2292
2293 if (arr_len)
2294 {
2295 unsigned char scode =
2297 enc.put_prefixed_array_16(scode, bit_idx_arr_.data(), arr_len, true);
2298 compression_stat_[scode]++;
2299 return;
2300 }
2301 encode_bit_digest(block, enc, digest0_);
2302}
2303
2304template<class BV>
2307{
2308 unsigned len = bm::bit_to_gap(bit_idx_arr_.data(), block, bm::gap_equiv_len);
2309 BM_ASSERT(len); (void)len;
2310 gamma_gap_block(bit_idx_arr_.data(), enc);
2311}
2312
2313template<class BV>
2315 bm::encoder& enc,
2316 bool inverted) BMNOEXCEPT
2317{
2318 unsigned arr_len =
2319 bm::bit_block_convert_to_arr(bit_idx_arr_.data(), block, inverted);
2320 if (arr_len)
2321 {
2322 gamma_gap_array(bit_idx_arr_.data(), arr_len, enc, inverted);
2323 return;
2324 }
2325 enc.put_prefixed_array_32(bm::set_block_bit, block, bm::set_block_size);
2326 compression_stat_[bm::set_block_bit]++;
2327}
2328
2329template<class BV>
2331 bm::encoder& enc,
2332 bool inverted) BMNOEXCEPT
2333{
2334 unsigned arr_len =
2335 bm::bit_block_convert_to_arr(bit_idx_arr_.data(), block, inverted);
2336 if (arr_len)
2337 {
2338 interpolated_gap_array(bit_idx_arr_.data(), arr_len, enc, inverted);
2339 return;
2340 }
2341 encode_bit_digest(block, enc, digest0_);
2342}
2343
2344template<class BV>
2347{
2348 unsigned len = bm::bit_to_gap(bit_idx_arr_.data(), block, bm::gap_max_bits);
2349 BM_ASSERT(len); (void)len;
2350 interpolated_encode_gap_block(bit_idx_arr_.data(), enc);
2351}
2352
2353
2354template<class BV>
2357{
2358 unsigned len = bm::bit_to_gap(bit_idx_arr_.data(), block, bm::gap_max_bits);
2359 BM_ASSERT(len); (void)len;
2360
2361 const unsigned char scode = bm::set_block_bitgap_bienc;
2362
2363 encoder::position_type enc_pos0 = enc.get_pos();
2364 {
2365 bit_out_type bout(enc);
2366
2367 bm::gap_word_t head = (bit_idx_arr_[0] & 1); // isolate start flag
2368 bm::gap_word_t min_v = bit_idx_arr_[1];
2369
2370 BM_ASSERT(bit_idx_arr_[len] == 65535);
2371 BM_ASSERT(bit_idx_arr_[len] > min_v);
2372
2373 enc.put_8(scode);
2374
2375 enc.put_8((unsigned char)head);
2376 enc.put_16(bm::gap_word_t(len));
2377 enc.put_16(min_v);
2378 bout.bic_encode_u16(&bit_idx_arr_[2], len-2, min_v, 65535);
2379 bout.flush();
2380 }
2381 encoder::position_type enc_pos1 = enc.get_pos();
2382 unsigned enc_size = (unsigned)(enc_pos1 - enc_pos0);
2383 unsigned raw_size = sizeof(word_t) * bm::set_block_size;
2384 if (enc_size >= raw_size)
2385 {
2386 enc.set_pos(enc_pos0); // rollback the bit stream
2387 }
2388 else
2389 {
2390 compression_stat_[scode]++;
2391 return;
2392 }
2393 // if we got to this point it means coding was not efficient
2394 // and we rolled back to simpler method
2395 //
2396 encode_bit_digest(block, enc, digest0_);
2397}
2398
2399//--------------------------------------------------------------------
2400//
2401const unsigned sblock_flag_sb16 = (1u << 0); ///< 16-bit SB index (8-bit by default)
2402const unsigned sblock_flag_sb32 = (1u << 1); ///< 32-bit SB index
2403
2404const unsigned sblock_flag_min16 = (1u << 2); ///< 16-bit minv
2405const unsigned sblock_flag_min24 = (1u << 3); ///< 24-bit minv
2407
2408const unsigned sblock_flag_len16 = (1u << 4); ///< 16-bit len (8-bit by default)
2409const unsigned sblock_flag_max16 = (1u << 5);
2410const unsigned sblock_flag_max24 = (1u << 6);
2412
2413template<class BV>
2414void serializer<BV>::bienc_arr_sblock(const BV& bv, unsigned sb,
2416{
2417 unsigned sb_flag = 0;
2418
2419 unsigned char scode = bm::set_sblock_bienc;
2420 bm::convert_sub_to_arr(bv, sb, sb_bit_idx_arr_);
2421 unsigned len = (unsigned)sb_bit_idx_arr_.size();
2422
2423 BM_ASSERT(sb_bit_idx_arr_.size() < 65536);
2424 BM_ASSERT(sb_bit_idx_arr_.size());
2425
2426 bm::word_t min_v = sb_bit_idx_arr_[0];
2427 bm::word_t max_v = sb_bit_idx_arr_[len - 1];
2429 bm::word_t max_v_delta = bm::set_sub_total_bits - max_v;
2430
2431 // build decoding flags
2432 if (sb > 65535)
2433 sb_flag |= bm::sblock_flag_sb32;
2434 else if (sb > 255)
2435 sb_flag |= bm::sblock_flag_sb16;
2436
2437 if (len > 255)
2438 sb_flag |= bm::sblock_flag_len16;
2439
2440 if (min_v > 65535)
2441 if (min_v < 0xFFFFFF)
2442 sb_flag |= bm::sblock_flag_min24;
2443 else
2444 sb_flag |= bm::sblock_flag_min32; // 24 and 16
2445 else if (min_v > 255)
2446 sb_flag |= bm::sblock_flag_min16;
2447
2448 if (max_v_delta > 65535)
2449 if (max_v_delta < 0xFFFFFF)
2450 sb_flag |= bm::sblock_flag_max24;
2451 else
2452 sb_flag |= bm::sblock_flag_max32;
2453 else if (max_v_delta > 255)
2454 sb_flag |= bm::sblock_flag_max16;
2455
2456 // encoding header
2457 //
2458 enc.put_8(scode);
2459 enc.put_8((unsigned char)sb_flag);
2460
2461 if (sb > 65535)
2462 enc.put_32(sb);
2463 else if (sb > 255)
2464 enc.put_16((unsigned short)sb);
2465 else
2466 enc.put_8((unsigned char)sb);
2467
2468 if (len > 255)
2469 enc.put_16((unsigned short)len);
2470 else
2471 enc.put_8((unsigned char)len);
2472
2473 if (min_v > 65535)
2474 if (min_v < 0xFFFFFF)
2475 enc.put_24(min_v);
2476 else
2477 enc.put_32(min_v);
2478 else if (min_v > 255)
2479 enc.put_16((unsigned short)min_v);
2480 else
2481 enc.put_8((unsigned char)min_v);
2482
2483 if (max_v_delta > 65535)
2484 if (max_v < 0xFFFFFF)
2485 enc.put_24(max_v_delta);
2486 else
2487 enc.put_32(max_v_delta);
2488 else if (max_v_delta > 255)
2489 enc.put_16((unsigned short)max_v_delta);
2490 else
2491 enc.put_8((unsigned char)max_v_delta);
2492
2493 bit_out_type bout(enc);
2494 bout.bic_encode_u32_cm(sb_bit_idx_arr_.data()+1,
2495 (unsigned)sb_bit_idx_arr_.size()-2,
2496 min_v, max_v);
2497
2498 compression_stat_[scode]++;
2499}
2500
2501
2502
2503template<class BV>
2504void
2506 bm::encoder& enc,
2507 bool inverted) BMNOEXCEPT
2508{
2509 unsigned arr_len = bm::bit_block_convert_to_arr(
2510 bit_idx_arr_.data(), block, inverted);
2511
2512 if (arr_len)
2513 {
2514 unsigned char scode =
2516
2517 encoder::position_type enc_pos0 = enc.get_pos();
2518 {
2519 bit_out_type bout(enc);
2520
2521 bm::gap_word_t min_v = bit_idx_arr_[0];
2522 bm::gap_word_t max_v = bit_idx_arr_[arr_len-1];
2523 BM_ASSERT(max_v > min_v);
2524 bm::gap_word_t max_delta = bm::gap_word_t(65536 - max_v);
2525
2526 if (!inverted && min_v <= 0xFF && max_delta <= 0xFF) // 8-bit header
2527 {
2528 enc.put_8(bm::set_block_arr_bienc_8bh);
2529 enc.put_8((unsigned char)min_v);
2530 enc.put_8((unsigned char)max_delta);
2531 }
2532 else
2533 {
2534 enc.put_8(scode);
2535 enc.put_16(min_v);
2536 enc.put_16(max_v);
2537 }
2538 enc.put_16(bm::gap_word_t(arr_len));
2539
2540 bout.bic_encode_u16(&bit_idx_arr_[1], arr_len-2, min_v, max_v);
2541 bout.flush();
2542 }
2543 encoder::position_type enc_pos1 = enc.get_pos();
2544 unsigned enc_size = (unsigned)(enc_pos1 - enc_pos0);
2545 unsigned raw_size = sizeof(word_t) * bm::set_block_size;
2546 if (enc_size >= raw_size)
2547 {
2548 enc.set_pos(enc_pos0); // rollback the bit stream
2549 }
2550 else
2551 {
2552 if (digest0_ != ~0ull && enc_size > bit_model_d0_size_)
2553 {
2554 enc.set_pos(enc_pos0); // rollback the bit stream
2555 }
2556 else
2557 {
2558 compression_stat_[scode]++;
2559 return;
2560 }
2561 }
2562 }
2563 // coding did not result in best compression
2564 // use simpler method
2565 encode_bit_digest(block, enc, digest0_);
2566}
2567
2568
2569
2570#define BM_SER_NEXT_GRP(enc, nb, B_1ZERO, B_8ZERO, B_16ZERO, B_32ZERO, B_64ZERO) \
2571 if (nb == 1u) \
2572 enc.put_8(B_1ZERO); \
2573 else if (nb < 256u) \
2574 { \
2575 enc.put_8(B_8ZERO); \
2576 enc.put_8((unsigned char)nb); \
2577 } \
2578 else if (nb < 65536u) \
2579 { \
2580 enc.put_8(B_16ZERO); \
2581 enc.put_16((unsigned short)nb); \
2582 } \
2583 else if (nb < bm::id_max32) \
2584 { \
2585 enc.put_8(B_32ZERO); \
2586 enc.put_32(unsigned(nb)); \
2587 } \
2588 else \
2589 {\
2590 enc.put_8(B_64ZERO); \
2591 enc.put_64(nb); \
2592 }
2593
2594
2595template<class BV>
2597 bookmark_state& bookm,
2599{
2600 BM_ASSERT(bookm.nb_range_);
2601
2602 block_idx_type nb_delta = nb - bookm.nb_;
2603 if (bookm.ptr_ && nb_delta >= bookm.nb_range_)
2604 {
2605 unsigned char* curr = enc.get_pos();
2606 size_t bytes_delta = size_t(curr - bookm.ptr_);
2607 if (bytes_delta > bookm.min_bytes_range_)
2608 {
2609 enc.set_pos(bookm.ptr_); // rewind back and save the skip
2610 switch (bookm.bm_type_)
2611 {
2612 case 0: // 32-bit mark
2613 bytes_delta -= sizeof(unsigned);
2614 if (bytes_delta < 0xFFFFFFFF) // range overflow check
2615 enc.put_32(unsigned(bytes_delta));
2616 // if range is somehow off, bookmark remains 0 (NULL)
2617 break;
2618 case 1: // 24-bit mark
2619 bytes_delta -= (sizeof(unsigned)-1);
2620 if (bytes_delta < 0xFFFFFF)
2621 enc.put_24(unsigned(bytes_delta));
2622 break;
2623 case 2: // 16-bit mark
2624 bytes_delta -= sizeof(unsigned short);
2625 if (bytes_delta < 0xFFFF)
2626 enc.put_16((unsigned short)bytes_delta);
2627 break;
2628 default:
2629 BM_ASSERT(0);
2630 break;
2631 } // switch
2632
2633 enc.set_pos(curr); // restore and save the sync mark
2634
2635 if (nb_delta < 0xFF)
2636 {
2637 enc.put_8(set_nb_sync_mark8);
2638 enc.put_8((unsigned char) nb_delta);
2639 }
2640 else
2641 if (nb_delta < 0xFFFF)
2642 {
2643 enc.put_8(set_nb_sync_mark16);
2644 enc.put_16((unsigned short) nb_delta);
2645 }
2646 else
2647 if (nb_delta < 0xFFFFFF)
2648 {
2649 enc.put_8(set_nb_sync_mark24);
2650 enc.put_24(unsigned(nb_delta));
2651 }
2652 else
2653 if (nb_delta < ~0U)
2654 {
2655 enc.put_8(set_nb_sync_mark32);
2656 enc.put_32(unsigned(nb_delta));
2657 }
2658 else
2659 {
2660 #ifdef BM64ADDR
2661 if (nb_delta < 0xFFFFFFFFFFFFUL)
2662 {
2663 enc.put_8(set_nb_sync_mark48);
2664 enc.put_48(nb_delta);
2665 }
2666 else
2667 {
2668 enc.put_8(set_nb_sync_mark64);
2669 enc.put_64(nb_delta);
2670 }
2671 #endif
2672 }
2673 bookm.ptr_ = 0;
2674 }
2675 }
2676
2677 if (!bookm.ptr_) // start new bookmark
2678 {
2679 // bookmarks use VBR to save offset
2680 bookm.nb_ = nb;
2681 bookm.ptr_ = enc.get_pos() + 1;
2682 switch (bookm.bm_type_)
2683 {
2684 case 0: // 32-bit mark
2685 enc.put_8(bm::set_nb_bookmark32);
2686 enc.put_32(0); // put NULL mark at first
2687 break;
2688 case 1: // 24-bit mark
2689 enc.put_8(bm::set_nb_bookmark24);
2690 enc.put_24(0);
2691 break;
2692 case 2: // 16-bit mark
2693 enc.put_8(bm::set_nb_bookmark16);
2694 enc.put_16(0);
2695 break;
2696 default:
2697 BM_ASSERT(0);
2698 break;
2699 } // switch
2700 }
2701}
2702
2703
2704template<class BV>
2707 unsigned char* buf, size_t buf_size)
2708{
2709 BM_ASSERT(temp_block_);
2710
2711 if (allow_stat_reset_)
2712 reset_compression_stats();
2713 const blocks_manager_type& bman = bv.get_blocks_manager();
2714
2715 bm::encoder enc(buf, buf_size); // create the encoder
2716 enc_header_pos_ = 0;
2717 encode_header(bv, enc);
2718
2719 bookmark_state sb_bookmark(sb_range_);
2720
2721 unsigned i_last = ~0u;
2722
2723 block_idx_type i, j;
2724 for (i = 0; i < bm::set_total_blocks; ++i)
2725 {
2726 unsigned i0, j0;
2727 bm::get_block_coord(i, i0, j0);
2728
2729 if (i0 < bman.top_block_size())
2730 {
2731 // ----------------------------------------------------
2732 // bookmark the stream to allow fast skip on decode
2733 //
2734 if (sb_bookmarks_)
2735 {
2736 process_bookmark(i, sb_bookmark, enc);
2737 }
2738
2739 // ---------------------------------------------------
2740 // check if top level block is embarassingly sparse
2741 // and can be encoded as an array of unsigned
2742 //
2743 if ((compression_level_ >= 5) && (i0 != i_last))
2744 {
2745 i_last = i0;
2746 bool is_sparse_sub = bman.is_sparse_sblock(i0, sparse_cutoff_);
2747 if (is_sparse_sub)
2748 {
2749 header_flag_ |= BM_HM_SPARSE;
2750 bienc_arr_sblock(bv, i0, enc);
2751 i += (bm::set_sub_array_size - j0) - 1;
2752 continue;
2753 }
2754 }
2755 }
2756 else
2757 {
2758 break;
2759 }
2760
2761 const bm::word_t* blk = bman.get_block(i0, j0);
2762
2763 // ----------------------------------------------------
2764 // Empty or ONE block serialization
2765 //
2766 // TODO: make a function to check this in ONE pass
2767 //
2768 bool flag;
2769 flag = bm::check_block_zero(blk, false/*shallow check*/);
2770 if (flag)
2771 {
2772 zero_block:
2773 flag = 1;
2774 block_idx_type next_nb = bman.find_next_nz_block(i+1, false);
2775 if (next_nb == bm::set_total_blocks) // no more blocks
2776 {
2778 size_type sz = (size_type)enc.size();
2779
2780 // rewind back to save header flag
2781 enc.set_pos(enc_header_pos_);
2782 enc.put_8(header_flag_);
2783 return sz;
2784 }
2785 block_idx_type nb = next_nb - i;
2786
2787 if (nb > 1 && nb < 128)
2788 {
2789 // special (but frequent) case -- GAP delta fits in 7bits
2790 unsigned char c = (unsigned char)((1u << 7) | nb);
2791 enc.put_8(c);
2792 }
2793 else
2794 {
2800 }
2801 i = next_nb - 1;
2802 continue;
2803 }
2804 else
2805 {
2806 flag = bm::check_block_one(blk, false); // shallow scan
2807 if (flag)
2808 {
2809 full_block:
2810 flag = 1;
2811 // Look ahead for similar blocks
2812 for(j = i+1; j < bm::set_total_blocks; ++j)
2813 {
2814 bm::get_block_coord(j, i0, j0);
2815 if (!j0) // look ahead if the whole superblock is 0xFF
2816 {
2817 bm::word_t*** blk_root = bman.top_blocks_root();
2818 if ((bm::word_t*)blk_root[i0] == FULL_BLOCK_FAKE_ADDR)
2819 {
2820 j += 255;
2821 continue;
2822 }
2823 }
2824 const bm::word_t* blk_next = bman.get_block(i0, j0);
2825 if (flag != bm::check_block_one(blk_next, true)) // deep scan
2826 break;
2827 } // for j
2828
2829 // TODO: improve this condition, because last block is always
2830 // has 0 at the end, thus this never happen in practice
2831 if (j == bm::set_total_blocks)
2832 {
2833 enc.put_8(set_block_aone);
2834 break;
2835 }
2836 else
2837 {
2838 block_idx_type nb = j - i;
2844 }
2845 i = j - 1;
2846 continue;
2847 }
2848 }
2849
2850 if (ref_vect_) // XOR filter
2851 {
2852 // Similarity model must be attached with the ref.vectors
2853 // for XOR filter to work
2854 BM_ASSERT(sim_model_);
2855
2856 bool nb_indexed = sim_model_->bv_blocks.test(i);
2857 BM_ASSERT(nb_indexed); // Model is correctly computed from ref-vector
2858 if (nb_indexed)
2859 {
2860 // TODO: use rs-index or count blocks
2861 size_type rank = sim_model_->bv_blocks.count_range(0, i);
2862 BM_ASSERT(rank);
2863 --rank;
2864 const block_match_chain_type& mchain =
2865 sim_model_->matr.get(ref_idx_, rank);
2866 BM_ASSERT(mchain.nb == i);
2867 switch (mchain.match)
2868 {
2869 case e_no_xor_match:
2870 break;
2871 case e_xor_match_EQ:
2872 {
2873 BM_ASSERT(mchain.chain_size == 1);
2874 size_type ridx = mchain.ref_idx[0];
2875 size_type plain_idx = ref_vect_->get_row_idx(ridx);
2877 enc.put_32(unsigned(plain_idx));
2878 compression_stat_[bm::set_block_ref_eq]++;
2879 continue;
2880 }
2881 break;
2882 case e_xor_match_GC:
2883 case e_xor_match_BC:
2884 case e_xor_match_iBC:
2885 {
2886 BM_ASSERT(mchain.chain_size);
2887 xor_tmp_product(blk, mchain, i0, j0);
2888 // TODO: validate xor_tmp_block_
2889 if (mchain.chain_size == 1)
2890 {
2891 size_type ridx = mchain.ref_idx[0];
2892 bm::id64_t d64 = mchain.xor_d64[0];
2893 size_type plain_idx = ref_vect_->get_row_idx(ridx);
2894 if (d64 == ~0ull)
2895 {
2896 enc.put_8_16_32(unsigned(plain_idx),
2900 }
2901 else
2902 {
2903 enc.put_8_16_32(unsigned(plain_idx),
2907 enc.put_64(d64); // xor digest mask
2908 }
2909 compression_stat_[bm::set_block_xor_ref32]++;
2910 }
2911 else // chain
2912 {
2913 encode_xor_match_chain(enc, mchain);
2914 }
2915 blk = xor_tmp_block_; // substitute with XOR product
2916 }
2917 break;
2918 default:
2919 BM_ASSERT(0);
2920 } // switch xor_match
2921 }
2922 }
2923
2924 // --------------------------------------------------
2925 // GAP serialization
2926 //
2927 if (BM_IS_GAP(blk))
2928 {
2929 encode_gap_block(BMGAP_PTR(blk), enc);
2930 }
2931 else // bit-block
2932 {
2933 // ----------------------------------------------
2934 // BIT BLOCK serialization
2935 //
2936
2937 unsigned char model = find_bit_best_encoding(blk);
2938 switch (model)
2939 {
2940 case bm::set_block_bit:
2942 break;
2944 {
2945 unsigned bit_idx = 0;
2946 bm::bit_block_find(blk, bit_idx, &bit_idx);
2947 BM_ASSERT(bit_idx < 65536);
2948 enc.put_8(bm::set_block_bit_1bit); enc.put_16(bm::short_t(bit_idx));
2949 compression_stat_[bm::set_block_bit_1bit]++;
2950 continue;
2951 }
2952 break;
2953 case bm::set_block_azero: // empty block all of the sudden ?
2954 goto zero_block;
2955 case bm::set_block_aone:
2956 goto full_block;
2958 encode_bit_array(blk, enc, false);
2959 break;
2961 encode_bit_array(blk, enc, true);
2962 break;
2964 gamma_gap_bit_block(blk, enc);
2965 break;
2967 encode_bit_interval(blk, enc, 0); // TODO: get rid of param 3 (0)
2968 break;
2970 gamma_arr_bit_block(blk, enc, false);
2971 break;
2973 gamma_arr_bit_block(blk, enc, true);
2974 break;
2976 bienc_arr_bit_block(blk, enc, false);
2977 break;
2979 bienc_arr_bit_block(blk, enc, true);
2980 break;
2982 interpolated_arr_bit_block(blk, enc, false);
2983 break;
2985 interpolated_arr_bit_block(blk, enc, true); // inverted
2986 break;
2988 interpolated_gap_bit_block(blk, enc);
2989 break;
2991 bienc_gap_bit_block(blk, enc);
2992 break;
2994 encode_bit_digest(blk, enc, digest0_);
2995 break;
2996 default:
2997 BM_ASSERT(0); // predictor returned an unknown model
2999 }
3000 } // bit-block processing
3001
3002 // destructive serialization mode
3003 //
3004 if (free_)
3005 {
3006 // const cast is ok, because it is a requested mode
3007 const_cast<blocks_manager_type&>(bman).zero_block(i);
3008 }
3009
3010 } // for i
3011
3012 enc.put_8(set_block_end);
3013 size_type sz = (size_type)enc.size();
3014
3015 // rewind back to save header flag
3016 enc.set_pos(enc_header_pos_);
3017 enc.put_8(header_flag_);
3018
3019 return sz;
3020}
3021
3022
3023
3024/// Bit mask flags for serialization algorithm
3025/// \ingroup bvserial
3027 BM_NO_BYTE_ORDER = 1, ///< save no byte-order info (save some space)
3028 BM_NO_GAP_LENGTH = (1 << 1) ///< save no GAP info (save some space)
3030
3031/*!
3032 \brief Saves bitvector into memory.
3033
3034 Function serializes content of the bitvector into memory.
3035 Serialization adaptively uses compression(variation of GAP encoding)
3036 when it is benefitial.
3037
3038 \param bv - source bvecor
3039 \param buf - pointer on target memory area. No range checking in the
3040 function. It is responsibility of programmer to allocate sufficient
3041 amount of memory using information from calc_stat function.
3042
3043 \param temp_block - pointer on temporary memory block. Cannot be 0;
3044 If you want to save memory across multiple bvectors
3045 allocate temporary block using allocate_tempblock and pass it to
3046 serialize.
3047 (Serialize does not deallocate temp_block.)
3048
3049 \param serialization_flags
3050 Flags controlling serilization (bit-mask)
3051 (use OR-ed serialization flags)
3052
3053 \ingroup bvserial
3054
3055 \return Size of serialization block.
3056 \sa calc_stat, serialization_flags
3057
3058*/
3059/*!
3060 Serialization format:
3061 <pre>
3062
3063 | HEADER | BLOCKS |
3064
3065 Header structure:
3066 BYTE : Serialization header (bit mask of BM_HM_*)
3067 BYTE : Byte order ( 0 - Big Endian, 1 - Little Endian)
3068 INT16: Reserved (0)
3069 INT16: Reserved Flags (0)
3070
3071 </pre>
3072*/
3073template<class BV>
3074size_t serialize(const BV& bv,
3075 unsigned char* buf,
3076 bm::word_t* temp_block = 0,
3077 unsigned serialization_flags = 0)
3078{
3079 bm::serializer<BV> bv_serial(bv.get_allocator(), temp_block);
3080
3082 bv_serial.byte_order_serialization(false);
3083
3085 bv_serial.gap_length_serialization(false);
3086 else
3087 bv_serial.gap_length_serialization(true);
3088
3089 return (size_t) bv_serial.serialize(bv, buf, 0);
3090}
3091
3092/*!
3093 @brief Saves bitvector into memory.
3094 Allocates temporary memory block for bvector.
3095
3096 \param bv - source bvecor
3097 \param buf - pointer on target memory area. No range checking in the
3098 function. It is responsibility of programmer to allocate sufficient
3099 amount of memory using information from calc_stat function.
3100
3101 \param serialization_flags
3102 Flags controlling serilization (bit-mask)
3103 (use OR-ed serialization flags)
3104
3105 \ingroup bvserial
3106*/
3107template<class BV>
3108size_t serialize(BV& bv,
3109 unsigned char* buf,
3110 unsigned serialization_flags=0)
3111{
3112 return bm::serialize(bv, buf, 0, serialization_flags);
3113}
3114
3115
3116
3117/*!
3118 @brief Bitvector deserialization from a memory BLOB
3119
3120 @param bv - target bvector
3121 @param buf - pointer on memory which keeps serialized bvector
3122 @param temp_block - pointer on temporary block,
3123 if NULL bvector allocates own.
3124 @param ref_vect - [in] optional pointer to a list of
3125 reference vectors used for
3126 XOR compression.
3127
3128 @return Number of bytes consumed by deserializer.
3129
3130 Function deserializes bitvector from memory block containig results
3131 of previous serialization. Function does not remove bits
3132 which are currently set. Effectively it is OR logical operation
3133 between the target bit-vector and serialized one.
3134
3135 @sa deserialize_range
3136
3137 @ingroup bvserial
3138*/
3139template<class BV>
3140size_t deserialize(BV& bv,
3141 const unsigned char* buf,
3142 bm::word_t* temp_block = 0,
3143 const bm::bv_ref_vector<BV>* ref_vect = 0)
3144{
3145 ByteOrder bo_current = globals<true>::byte_order();
3146
3147 bm::decoder dec(buf);
3148 unsigned char header_flag = dec.get_8();
3149 ByteOrder bo = bo_current;
3150 if (!(header_flag & BM_HM_NO_BO))
3151 {
3152 bo = (bm::ByteOrder) dec.get_8();
3153 }
3154
3155 if (bo_current == bo)
3156 {
3158 deserial.set_ref_vectors(ref_vect);
3159 return deserial.deserialize(bv, buf, temp_block);
3160 }
3161 switch (bo_current)
3162 {
3163 case BigEndian:
3164 {
3166 deserial.set_ref_vectors(ref_vect);
3167 return deserial.deserialize(bv, buf, temp_block);
3168 }
3169 case LittleEndian:
3170 {
3172 deserial.set_ref_vectors(ref_vect);
3173 return deserial.deserialize(bv, buf, temp_block);
3174 }
3175 default:
3176 BM_ASSERT(0);
3177 };
3178 return 0;
3179}
3180
3181
3182/*!
3183 @brief Bitvector range deserialization from a memory BLOB
3184
3185 @param bv - target bvector
3186 @param buf - pointer on memory which keeps serialized bvector
3187 @param from - bit-vector index to deserialize from
3188 @param to - bit-vector index to deserialize to
3189 @param ref_vect - [in] optional pointer to a list of
3190 reference vectors used for
3191 XOR compression.
3192
3193 Function deserializes bitvector from memory block containig results
3194 of previous serialization. Function does not remove bits
3195 which are currently set. Effectively it is OR logical operation
3196 between the target bit-vector and serialized one.
3197
3198 @sa deserialize
3199
3200 @ingroup bvserial
3201*/
3202template<class BV>
3204 const unsigned char* buf,
3205 typename BV::size_type from,
3206 typename BV::size_type to,
3207 const bm::bv_ref_vector<BV>* ref_vect = 0)
3208{
3209 ByteOrder bo_current = globals<true>::byte_order();
3210
3211 bm::decoder dec(buf);
3212 unsigned char header_flag = dec.get_8();
3213 ByteOrder bo = bo_current;
3214 if (!(header_flag & BM_HM_NO_BO))
3215 {
3216 bo = (bm::ByteOrder) dec.get_8();
3217 }
3218
3219 if (bo_current == bo)
3220 {
3222 deserial.set_ref_vectors(ref_vect);
3223 deserial.set_range(from, to);
3224 deserial.deserialize(bv, buf);
3225 }
3226 else
3227 {
3228 switch (bo_current)
3229 {
3230 case BigEndian:
3231 {
3233 deserial.set_ref_vectors(ref_vect);
3234 deserial.set_range(from, to);
3235 deserial.deserialize(bv, buf);
3236 }
3237 break;
3238 case LittleEndian:
3239 {
3241 deserial.set_ref_vectors(ref_vect);
3242 deserial.set_range(from, to);
3243 deserial.deserialize(bv, buf);
3244 }
3245 break;;
3246 default:
3247 BM_ASSERT(0);
3248 };
3249 }
3250 bv.keep_range(from, to);
3251}
3252
3253
3254template<typename DEC, typename BLOCK_IDX>
3257 unsigned block_type,
3258 bm::gap_word_t* dst_arr)
3259{
3260 bm::gap_word_t len = 0;
3261
3262 switch (block_type)
3263 {
3264 case set_block_bit_1bit:
3265 dst_arr[0] = decoder.get_16();
3266 len = 1;
3267 break;
3268 case set_block_arrgap:
3270 len = decoder.get_16();
3271 decoder.get_16(dst_arr, len);
3272 break;
3275 {
3276 bit_in_type bin(decoder);
3277 len = (gap_word_t)bin.gamma();
3278 gap_word_t prev = 0;
3279 for (gap_word_t k = 0; k < len; ++k)
3280 {
3281 gap_word_t bit_idx = (gap_word_t)bin.gamma();
3282 if (k == 0) --bit_idx; // TODO: optimization
3283 bit_idx = (gap_word_t)(bit_idx + prev);
3284 prev = bit_idx;
3285 dst_arr[k] = bit_idx;
3286 } // for
3287 }
3288 break;
3291 {
3292 bm::gap_word_t min_v = decoder.get_16();
3293 bm::gap_word_t max_v = decoder.get_16();
3294
3295 bit_in_type bin(decoder);
3296 len = bm::gap_word_t(bin.gamma() + 4);
3297 dst_arr[0] = min_v;
3298 dst_arr[len-1] = max_v;
3299 bin.bic_decode_u16(&dst_arr[1], len-2, min_v, max_v);
3300 }
3301 break;
3304 {
3305 bm::gap_word_t min_v, max_v;
3306 len = decoder.get_16();
3307 if (len & 1)
3308 min_v = decoder.get_8();
3309 else
3310 min_v = decoder.get_16();
3311
3312 if (len & (1<<1))
3313 max_v = decoder.get_8();
3314 else
3315 max_v = decoder.get_16();
3316 max_v = bm::gap_word_t(min_v + max_v);
3317
3318 len = bm::gap_word_t(len >> 2);
3319
3320 bit_in_type bin(decoder);
3321 dst_arr[0] = min_v;
3322 dst_arr[len-1] = max_v;
3323 bin.bic_decode_u16(&dst_arr[1], len-2, min_v, max_v);
3324 }
3325 break;
3326
3327 default: // TODO: get rid of exception here to classify as "noexept"
3328 BM_ASSERT(0);
3329 #ifndef BM_NO_STL
3330 throw std::logic_error(err_msg());
3331 #else
3332 BM_THROW(BM_ERR_SERIALFORMAT);
3333 #endif
3334 }
3335 return len;
3336}
3337
3338template<typename DEC, typename BLOCK_IDX>
3339void
3341 bm::word_t* blk,
3342 unsigned block_type) BMNOEXCEPT
3343{
3344 BM_ASSERT(!BM_IS_GAP(blk));
3345 bm::gap_word_t min_v, max_v, max_delta, arr_len;
3346
3347 switch (block_type)
3348 {
3350 min_v = dec.get_16();
3351 max_v = dec.get_16();
3352 break;
3354 min_v = dec.get_8();
3355 max_delta = dec.get_8();
3356 max_v = bm::gap_word_t(65536 - max_delta);
3357 break;
3358 default:
3359 BM_ASSERT(0);
3360 return;
3361 }
3362
3363 arr_len = dec.get_16();
3364
3365 bit_in_type bin(dec);
3366
3367 if (!IS_VALID_ADDR(blk))
3368 {
3369 bin.bic_decode_u16_dry(arr_len-2, min_v, max_v);
3370 return;
3371 }
3372 bm::set_bit(blk, min_v);
3373 bm::set_bit(blk, max_v);
3374 bin.bic_decode_u16_bitset(blk, arr_len-2, min_v, max_v);
3375}
3376
3377template<typename DEC, typename BLOCK_IDX>
3379 decoder_type& dec,
3380 unsigned block_type,
3381 unsigned* dst_arr,
3382 unsigned* sb_idx)
3383{
3384 unsigned len(0), sb_flag(0);
3385
3386 bit_in_type bin(dec);
3387
3388 switch (block_type)
3389 {
3391 {
3392 sb_flag = dec.get_8();
3393
3394 if (sb_flag & bm::sblock_flag_sb32)
3395 *sb_idx = dec.get_32();
3396 else if (sb_flag & bm::sblock_flag_sb16)
3397 *sb_idx = dec.get_16();
3398 else
3399 *sb_idx = dec.get_8();
3400
3401 if (sb_flag & bm::sblock_flag_len16)
3402 len = dec.get_16();
3403 else
3404 len = dec.get_8();
3405
3406 bm::word_t min_v;
3407 if (sb_flag & bm::sblock_flag_min24)
3408 if (sb_flag & bm::sblock_flag_min16) // 24 and 16
3409 min_v = dec.get_32();
3410 else
3411 min_v = dec.get_24(); // 24 but not 16
3412 else if (sb_flag & bm::sblock_flag_min16)
3413 min_v = dec.get_16();
3414 else
3415 min_v = dec.get_8();
3416
3417 bm::word_t max_v;// = dec.get_32();
3418 if (sb_flag & bm::sblock_flag_max24)
3419 if (sb_flag & bm::sblock_flag_max16)
3420 max_v = dec.get_32(); // 24 and 16
3421 else
3422 max_v = dec.get_24(); // 24 but not 16
3423 else if (sb_flag & bm::sblock_flag_max16)
3424 max_v = dec.get_16();
3425 else
3426 max_v = dec.get_8();
3427 max_v = bm::set_sub_total_bits - max_v;
3428
3429 dst_arr[0] = min_v;
3430 dst_arr[len-1] = max_v;
3431 bin.bic_decode_u32_cm(&dst_arr[1], len-2, min_v, max_v);
3432 }
3433 break;
3434 default: // TODO: get rid of exception here to classify as "noexept"
3435 BM_ASSERT(0);
3436 #ifndef BM_NO_STL
3437 throw std::logic_error(err_msg());
3438 #else
3439 BM_THROW(BM_ERR_SERIALFORMAT);
3440 #endif
3441 } // switch
3442 return len;
3443}
3444
3445
3446template<typename DEC, typename BLOCK_IDX>
3447void
3450{
3451 // TODO: optimization
3452 bm::bit_block_set(blk, 0);
3453 this->read_bic_arr(decoder, blk, bm::set_block_arr_bienc);
3454 bm::bit_invert(blk);
3455}
3456
3457template<typename DEC, typename BLOCK_IDX>
3460{
3461 BM_ASSERT(!BM_IS_GAP(blk));
3462
3463 bm::gap_word_t head = dec.get_8();
3464 unsigned arr_len = dec.get_16();
3465 bm::gap_word_t min_v = dec.get_16();
3466
3467
3468 id_array_[0] = head;
3469 id_array_[1] = min_v;
3470 id_array_[arr_len] = 65535;
3471
3472 bit_in_type bin(dec);
3473 bin.bic_decode_u16(&id_array_[2], arr_len-2, min_v, 65535);
3474
3475 if (!IS_VALID_ADDR(blk))
3476 return;
3477 bm::gap_add_to_bitset(blk, id_array_, arr_len);
3478}
3479
3480template<typename DEC, typename BLOCK_IDX>
3482 decoder_type& dec,
3483 bm::word_t* block) BMNOEXCEPT
3484{
3485 bm::id64_t d0 = dec.get_64();
3486 while (d0)
3487 {
3488 bm::id64_t t = bm::bmi_blsi_u64(d0); // d & -d;
3489
3490 unsigned wave = bm::word_bitcount64(t - 1);
3491 unsigned off = wave * bm::set_block_digest_wave_size;
3492 unsigned j = 0;
3493 if (!IS_VALID_ADDR(block))
3494 {
3495 do
3496 {
3497 dec.get_32();
3498 dec.get_32();
3499 dec.get_32();
3500 dec.get_32();
3501 j += 4;
3502 } while (j < bm::set_block_digest_wave_size);
3503 }
3504 else
3505 {
3506 do
3507 {
3508 block[off+j+0] |= dec.get_32();
3509 block[off+j+1] |= dec.get_32();
3510 block[off+j+2] |= dec.get_32();
3511 block[off+j+3] |= dec.get_32();
3512 j += 4;
3513 } while (j < bm::set_block_digest_wave_size);
3514 }
3515
3516 d0 = bm::bmi_bslr_u64(d0); // d &= d - 1;
3517 } // while
3518}
3519
3520template<typename DEC, typename BLOCK_IDX>
3522 decoder_type& dec,
3524{
3525 //TODO: optimization if block exists and it is OR-ed read
3526 bm::bit_block_set(blk, 0);
3527
3528 unsigned char run_type = dec.get_8();
3529 for (unsigned j = 0; j < bm::set_block_size; run_type = !run_type)
3530 {
3531 unsigned run_length = dec.get_16();
3532 if (run_type)
3533 {
3534 unsigned run_end = j + run_length;
3535 BM_ASSERT(run_end <= bm::set_block_size);
3536 for (;j < run_end; ++j)
3537 {
3538 unsigned w = dec.get_32();
3539 blk[j] = w;
3540 }
3541 }
3542 else
3543 {
3544 j += run_length;
3545 }
3546 } // for j
3547}
3548
3549
3550template<typename DEC, typename BLOCK_IDX>
3551void
3553 unsigned block_type,
3554 bm::gap_word_t* dst_block,
3555 bm::gap_word_t& gap_head)
3556{
3557// typedef bit_in<DEC> bit_in_type;
3558 switch (block_type)
3559 {
3560 case set_block_gap:
3561 {
3562 unsigned len = gap_length(&gap_head);
3563 --len;
3564 *dst_block = gap_head;
3565 decoder.get_16(dst_block+1, len - 1);
3566 dst_block[len] = gap_max_bits - 1;
3567 }
3568 break;
3569 case set_block_bit_1bit:
3570 {
3571 gap_set_all(dst_block, bm::gap_max_bits, 0);
3572 gap_word_t bit_idx = decoder.get_16();
3573 gap_add_value(dst_block, bit_idx);
3574 }
3575 break;
3576 case set_block_arrgap:
3578 {
3579 gap_set_all(dst_block, bm::gap_max_bits, 0);
3580 gap_word_t len = decoder.get_16();
3581 for (gap_word_t k = 0; k < len; ++k)
3582 {
3583 gap_word_t bit_idx = decoder.get_16();
3584 bm::gap_add_value(dst_block, bit_idx);
3585 } // for
3586 }
3587 break;
3594 {
3595 unsigned arr_len = read_id_list(decoder, block_type, id_array_);
3596 dst_block[0] = 0;
3597 unsigned gap_len =
3598 gap_set_array(dst_block, id_array_, arr_len);
3599 BM_ASSERT(gap_len == bm::gap_length(dst_block));
3600 (void)(gap_len);
3601 }
3602 break;
3604 {
3605 unsigned len = (gap_head >> 3);
3606 --len;
3607 // read gamma GAP block into a dest block
3608 *dst_block = gap_head;
3609 gap_word_t* gap_data_ptr = dst_block + 1;
3610
3611 bit_in_type bin(decoder);
3612 gap_word_t v = (gap_word_t)bin.gamma();
3613 gap_word_t gap_sum = *gap_data_ptr = (gap_word_t)(v - 1);
3614 for (unsigned i = 1; i < len; ++i)
3615 {
3616 v = (gap_word_t)bin.gamma();
3617 gap_sum = (gap_word_t)(gap_sum + v);
3618 *(++gap_data_ptr) = gap_sum;
3619 }
3620 dst_block[len+1] = bm::gap_max_bits - 1;
3621 }
3622 break;
3624 {
3625 unsigned len = (gap_head >> 3);
3626 *dst_block = gap_head;
3627 bm::gap_word_t min_v = decoder.get_16();
3628 dst_block[1] = min_v;
3629 bit_in_type bin(decoder);
3630 bin.bic_decode_u16(&dst_block[2], len-2, min_v, 65535);
3631 dst_block[len] = bm::gap_max_bits - 1;
3632 }
3633 break;
3635 {
3636 bm::gap_word_t min_v, max_v;
3637 unsigned len = (gap_head >> 3);
3638 bm::gap_word_t min8 = gap_head & (1 << 1);
3639 bm::gap_word_t tail8 = gap_head & (1 << 2);
3640 gap_head &= bm::gap_word_t(~(3 << 1)); // clear the flags
3641 if (min8)
3642 min_v = decoder.get_8();
3643 else
3644 min_v = decoder.get_16();
3645 if (tail8)
3646 max_v = decoder.get_8();
3647 else
3648 max_v = decoder.get_16();
3649 max_v = bm::gap_word_t(65535 - max_v); // tail correction
3650
3651 dst_block[0] = gap_head;
3652 dst_block[1] = min_v;
3653 bit_in_type bin(decoder);
3654 bin.bic_decode_u16(&dst_block[2], len-3, min_v, max_v);
3655 dst_block[len-1] = max_v;
3656 dst_block[len] = bm::gap_max_bits - 1;
3657 }
3658 break;
3659 default:
3660 BM_ASSERT(0);
3661 #ifndef BM_NO_STL
3662 throw std::logic_error(err_msg());
3663 #else
3664 BM_THROW(BM_ERR_SERIALFORMAT);
3665 #endif
3666 }
3667
3668 if (block_type == set_block_arrgap_egamma_inv ||
3669 block_type == set_block_arrgap_inv ||
3670 block_type == set_block_arrgap_bienc_inv ||
3671 block_type == set_block_arrgap_bienc_inv_v2)
3672 {
3673 gap_invert(dst_block);
3674 }
3675}
3676
3677template<typename DEC, typename BLOCK_IDX>
3681 block_idx_type nb,
3682 block_idx_type expect_nb) BMNOEXCEPT
3683{
3684 if (skip_offset_) // skip bookmark is available
3685 {
3686 const unsigned char* save_pos = decoder.get_pos(); // save position
3687
3688 if (save_pos > skip_pos_) // checkpoint passed
3689 {
3690 skip_offset_ = 0;
3691 return false;
3692 }
3693 decoder.set_pos(skip_pos_);
3694
3695 unsigned char sync_mark = decoder.get_8();
3696 block_idx_type nb_sync;
3697 switch (sync_mark)
3698 {
3699 case set_nb_sync_mark8:
3700 nb_sync = decoder.get_8();
3701 break;
3702 case set_nb_sync_mark16:
3703 nb_sync = decoder.get_16();
3704 break;
3705 case set_nb_sync_mark24:
3706 nb_sync = decoder.get_24();
3707 break;
3708 case set_nb_sync_mark32:
3709 nb_sync = decoder.get_32();
3710 break;
3711 case set_nb_sync_mark48:
3712 nb_sync = block_idx_type(decoder.get_48());
3713 #ifndef BM64ADDR
3714 BM_ASSERT(0);
3715 decoder.set_pos(save_pos);
3716 skip_offset_ = 0;
3717 return 0; // invalid bookmark from 64-bit serialization
3718 #endif
3719 break;
3720 case set_nb_sync_mark64:
3721 nb_sync = block_idx_type(decoder.get_64());
3722 #ifndef BM64ADDR
3723 BM_ASSERT(0);
3724 decoder.set_pos(save_pos);
3725 skip_offset_ = 0;
3726 return 0; // invalid bookmark from 64-bit serialization
3727 #endif
3728 break;
3729 default:
3730 BM_ASSERT(0);
3731 decoder.set_pos(save_pos);
3732 skip_offset_ = 0;
3733 return 0;
3734 } // switch
3735
3736 nb_sync += nb;
3737 if (nb_sync <= expect_nb) // within reach
3738 {
3739 skip_offset_ = 0;
3740 return nb_sync;
3741 }
3742 skip_offset_ = 0;
3743 decoder.set_pos(save_pos);
3744 }
3745 return 0;
3746}
3747
3748
3749// -------------------------------------------------------------------------
3750
3751template<class BV, class DEC>
3753: ref_vect_(0),
3754 xor_block_(0),
3755 or_block_(0),
3756 or_block_idx_(0),
3757 is_range_set_(0)
3758{
3759 temp_block_ = alloc_.alloc_bit_block();
3760
3762 this->id_array_ = bit_idx_arr_.data();
3763
3765 this->sb_id_array_ = sb_bit_idx_arr_.data();
3766
3768
3769 alloc_.set_pool(&pool_);
3770}
3771
3772template<class BV, class DEC>
3774{
3775 alloc_.free_bit_block(temp_block_);
3776 if (xor_block_)
3777 alloc_.free_bit_block(xor_block_, 2);
3778 BM_ASSERT(!or_block_);
3779}
3780
3781template<class BV, class DEC>
3783{
3784 ref_vect_ = ref_vect;
3785 if (ref_vect_ && !xor_block_)
3786 xor_block_ = alloc_.alloc_bit_block(2);
3787}
3788
3789template<class BV, class DEC>
3790void
3793 block_idx_type nb,
3794 bm::word_t* blk)
3795{
3796 gap_word_t gap_head;
3797 bm::gap_word_t* gap_temp_block = gap_temp_block_.data();
3798
3799 bool inv_flag = false;
3800 unsigned i0, j0;
3801 bm::get_block_coord(nb, i0, j0);
3802 bman.reserve_top_blocks(i0+1);
3803 bman.check_alloc_top_subblock(i0);
3804
3805 switch (btype)
3806 {
3807 case set_block_gap:
3809 case set_block_gapbit:
3810 {
3811 gap_head = (gap_word_t)
3812 (sizeof(gap_word_t) == 2 ? dec.get_16() : dec.get_32());
3813
3814 unsigned len = gap_length(&gap_head);
3815 int level = gap_calc_level(len, bman.glen());
3816 --len;
3817 if (level == -1) // Too big to be GAP: convert to BIT block
3818 {
3819 *gap_temp_block = gap_head;
3820 dec.get_16(gap_temp_block+1, len - 1);
3821 gap_temp_block[len] = gap_max_bits - 1;
3822
3823 if (blk == 0) // block does not exist yet
3824 {
3825 blk = bman.get_allocator().alloc_bit_block();
3826 bman.set_block(nb, blk);
3827 gap_convert_to_bitset(blk, gap_temp_block);
3828 }
3829 else // We have some data already here. Apply OR operation.
3830 {
3831 gap_convert_to_bitset(temp_block_,
3832 gap_temp_block);
3833 bv.combine_operation_block_or(i0, j0, blk, temp_block_);
3834 }
3835 return;
3836 } // level == -1
3837
3838 set_gap_level(&gap_head, level);
3839
3840 if (blk == 0)
3841 {
3842 BM_ASSERT(level >= 0);
3843 gap_word_t* gap_blk =
3844 bman.get_allocator().alloc_gap_block(unsigned(level), bman.glen());
3845 gap_word_t* gap_blk_ptr = BMGAP_PTR(gap_blk);
3846 *gap_blk_ptr = gap_head;
3847 bm::set_gap_level(gap_blk_ptr, level);
3848 blk = bman.set_block(nb, (bm::word_t*)BMPTR_SETBIT0(gap_blk));
3849 BM_ASSERT(blk == 0);
3850
3851 if (len > 1)
3852 dec.get_16(gap_blk + 1, len - 1);
3853 gap_blk[len] = bm::gap_max_bits - 1;
3854 }
3855 else // target block exists
3856 {
3857 // read GAP block into a temp memory and perform OR
3858 *gap_temp_block = gap_head;
3859 dec.get_16(gap_temp_block + 1, len - 1);
3860 gap_temp_block[len] = bm::gap_max_bits - 1;
3861 break;
3862 }
3863 return;
3864 }
3866 inv_flag = true;
3868 case set_block_arrgap:
3875 {
3876 unsigned arr_len = this->read_id_list(dec, btype, this->id_array_);
3877 gap_temp_block[0] = 0; // reset unused bits in gap header
3878 unsigned gap_len =
3879 bm::gap_set_array(gap_temp_block, this->id_array_, arr_len);
3880 if (inv_flag)
3881 bm::gap_invert(gap_temp_block);
3882
3883 BM_ASSERT(gap_len == bm::gap_length(gap_temp_block));
3884 int level = bm::gap_calc_level(gap_len, bman.glen());
3885 if (level == -1) // Too big to be GAP: convert to BIT block
3886 {
3887 bm::gap_convert_to_bitset(temp_block_, gap_temp_block);
3888 bv.combine_operation_block_or(i0, j0, blk, temp_block_);
3889 return;
3890 }
3891 break;
3892 }
3894 gap_head = dec.get_16();
3901 this->read_gap_block(dec, btype, gap_temp_block, gap_head);
3902 break;
3906 gap_head = dec.get_16();
3907 this->read_gap_block(dec, btype, gap_temp_block, gap_head);
3908 break;
3909 default:
3910 BM_ASSERT(0);
3911 #ifndef BM_NO_STL
3912 throw std::logic_error(this->err_msg());
3913 #else
3914 BM_THROW(BM_ERR_SERIALFORMAT);
3915 #endif
3916 }
3917
3918 if (x_ref_d64_) // this is a delayed bit-XOR to be played here
3919 { // deoptimize the operation it will turn to bit block anyways
3920 if (!blk)
3921 {
3922 blk = bman.get_allocator().alloc_bit_block();
3923 bm::gap_convert_to_bitset(blk, gap_temp_block);
3924 bman.set_block_ptr(i0, j0, blk);
3925 }
3926 else
3927 {
3928 bm::gap_convert_to_bitset(temp_block_, gap_temp_block);
3929 bv.combine_operation_block_or(i0, j0, blk, temp_block_);
3930 }
3931 }
3932 else
3933 {
3934 bm::word_t* tmp_blk = (bm::word_t*)gap_temp_block;
3935 BMSET_PTRGAP(tmp_blk);
3936 bv.combine_operation_block_or(i0, j0, blk, tmp_blk);
3937 }
3938}
3939
3940template<class BV, class DEC>
3942 decoder_type& dec,
3943 blocks_manager_type& bman,
3944 block_idx_type nb,
3945 bm::word_t* blk)
3946{
3947 if (!blk)
3948 {
3949 blk = bman.get_allocator().alloc_bit_block();
3950 bman.set_block(nb, blk);
3951 bm::bit_block_set(blk, 0);
3952 }
3953 else
3954 if (BM_IS_GAP(blk))
3955 blk = bman.deoptimize_block(nb);
3956
3957 BM_ASSERT(blk != temp_block_);
3958
3959 switch (btype)
3960 {
3962 if (IS_FULL_BLOCK(blk))
3963 blk = bman.deoptimize_block(nb);
3964 bm::bit_block_set(temp_block_, ~0u);
3965 {
3966 gap_word_t len = dec.get_16();
3967 for (unsigned k = 0; k < len; ++k)
3968 {
3969 gap_word_t bit_idx = dec.get_16();
3970 bm::clear_bit(temp_block_, bit_idx);
3971 } // for
3972 }
3973 bm::bit_block_or(blk, temp_block_);
3974 break;
3977 this->read_bic_arr(dec, blk, btype);
3978 break;
3980 BM_ASSERT(blk != temp_block_);
3981 if (IS_FULL_BLOCK(blk))
3982 blk = bman.deoptimize_block(nb);
3983 // TODO: optimization
3984 bm::bit_block_set(temp_block_, 0);
3985 this->read_bic_arr(dec, temp_block_, bm::set_block_arr_bienc);
3986 bm::bit_invert(temp_block_);
3987 bm::bit_block_or(blk, temp_block_);
3988 break;
3990 this->read_bic_gap(dec, blk);
3991 break;
3993 this->read_digest0_block(dec, blk);
3994 break;
3995 default:
3996 BM_ASSERT(0);
3997 #ifndef BM_NO_STL
3998 throw std::logic_error(this->err_msg());
3999 #else
4000 BM_THROW(BM_ERR_SERIALFORMAT);
4001 #endif
4002 } // switch
4003}
4004
4005template<class BV, class DEC>
4007 decoder_type& dec,
4008 bvector_type& bv)
4009{
4010 unsigned sb;
4011 unsigned* arr = this->sb_id_array_;
4012 unsigned len = this->read_bic_sb_arr(dec, btype, arr, &sb);
4013 const typename BV::size_type sb_max_bc = bm::set_sub_array_size * bm::gap_max_bits;
4014 typename BV::size_type from = sb * sb_max_bc;
4015
4016 if (is_range_set_)
4017 {
4018 for (typename BV::size_type i = 0; i < len; ++i)
4019 {
4020 typename BV::size_type idx = from + arr[i];
4021 if (idx > idx_to_)
4022 break;
4023 if (idx < idx_from_)
4024 continue;
4025 bv.set_bit_no_check(idx);
4026 } // for
4027 }
4028 else // range restriction is not set
4029 {
4030 for (typename BV::size_type i = 0; i < len; ++i)
4031 {
4032 typename BV::size_type idx = from + arr[i];
4033 bv.set_bit_no_check(idx);
4034 } // for
4035 }
4036}
4037
4038
4039template<class BV, class DEC>
4041 bvector_type& bv,
4042 block_idx_type nb,
4043 bm::word_t* blk)
4044{
4045 if (!blk)
4046 {
4047 blocks_manager_type& bman = bv.get_blocks_manager();
4048 blk = bman.get_allocator().alloc_bit_block();
4049 bman.set_block(nb, blk);
4050 dec.get_32(blk, bm::set_block_size);
4051 }
4052 else
4053 {
4054 dec.get_32(temp_block_, bm::set_block_size);
4055 bv.combine_operation_with_block(nb, temp_block_, 0, BM_OR);
4056 }
4057}
4058
4059template<class BV, class DEC>
4061 bvector_type& bv,
4062 block_idx_type nb,
4063 bm::word_t* blk)
4064{
4065 unsigned head_idx = dec.get_16();
4066 unsigned tail_idx = dec.get_16();
4067 if (!blk)
4068 {
4069 blocks_manager_type& bman = bv.get_blocks_manager();
4070 blk = bman.get_allocator().alloc_bit_block();
4071 bman.set_block(nb, blk);
4072 for (unsigned k = 0; k < head_idx; ++k)
4073 blk[k] = 0;
4074 dec.get_32(blk + head_idx, tail_idx - head_idx + 1);
4075 for (unsigned j = tail_idx + 1; j < bm::set_block_size; ++j)
4076 blk[j] = 0;
4077 }
4078 else
4079 {
4080 bm::bit_block_set(temp_block_, 0);
4081 dec.get_32(temp_block_ + head_idx, tail_idx - head_idx + 1);
4082 bv.combine_operation_with_block(nb, temp_block_, 0, BM_OR);
4083 }
4084}
4085
4086template<class BV, class DEC>
4088 bvector_type& bv,
4089 block_idx_type nb,
4090 bm::word_t* blk)
4091{
4092 blocks_manager_type& bman = bv.get_blocks_manager();
4093 bm::gap_word_t len = dec.get_16();
4094 if (BM_IS_GAP(blk))
4095 {
4096 blk = bman.deoptimize_block(nb);
4097 }
4098 else
4099 {
4100 if (!blk) // block does not exists yet
4101 {
4102 blk = bman.get_allocator().alloc_bit_block();
4103 bm::bit_block_set(blk, 0);
4104 bman.set_block(nb, blk);
4105 }
4106 else
4107 if (IS_FULL_BLOCK(blk)) // nothing to do
4108 {
4109 for (unsigned k = 0; k < len; ++k) // dry read
4110 dec.get_16();
4111 return;
4112 }
4113 }
4114 // Get the array one by one and set the bits.
4115 for (unsigned k = 0; k < len; ++k)
4116 {
4117 gap_word_t bit_idx = dec.get_16();
4118 bm::set_bit(blk, bit_idx);
4119 }
4120}
4121
4122
4123
4124template<class BV, class DEC>
4126 const unsigned char* buf,
4127 bm::word_t* /*temp_block*/)
4128{
4129 blocks_manager_type& bman = bv.get_blocks_manager();
4130 if (!bman.is_init())
4131 bman.init_tree();
4132
4133 bm::word_t* temp_block = temp_block_;
4134
4135 bm::strategy strat = bv.get_new_blocks_strat();
4136 bv.set_new_blocks_strat(BM_GAP);
4137 typename bvector_type::mem_pool_guard mp_guard_bv;
4138 mp_guard_bv.assign_if_not_set(pool_, bv);
4139
4140
4141 decoder_type dec(buf);
4142
4143 // Reading th serialization header
4144 //
4145 unsigned char header_flag = dec.get_8();
4146 if (!(header_flag & BM_HM_NO_BO))
4147 {
4148 /*ByteOrder bo = (bm::ByteOrder)*/dec.get_8();
4149 }
4150 if (header_flag & BM_HM_64_BIT)
4151 {
4152 #ifndef BM64ADDR
4153 BM_ASSERT(0); // 64-bit address vector cannot read on 32
4154 #ifndef BM_NO_STL
4155 throw std::logic_error(this->err_msg());
4156 #else
4157 BM_THROW(BM_ERR_SERIALFORMAT);
4158 #endif
4159 #endif
4160 }
4161
4162 if (header_flag & BM_HM_ID_LIST)
4163 {
4164 // special case: the next comes plain list of integers
4165 if (header_flag & BM_HM_RESIZE)
4166 {
4167 block_idx_type bv_size;
4168 if (header_flag & BM_HM_64_BIT)
4169 {
4170 BM_ASSERT(sizeof(block_idx_type)==8);
4171 bv_size = (block_idx_type)dec.get_64();
4172 }
4173 else
4174 bv_size = dec.get_32();
4175 if (bv_size > bv.size())
4176 bv.resize(bv_size);
4177 }
4178 for (unsigned cnt = dec.get_32(); cnt; --cnt)
4179 {
4180 bm::id_t idx = dec.get_32();
4181 bv.set(idx);
4182 } // for
4183 // -1 for compatibility with other deserialization branches
4184 return dec.size()-1;
4185 }
4186
4187 if (!(header_flag & BM_HM_NO_GAPL))
4188 {
4189 for (unsigned k = 0; k < bm::gap_levels; ++k) // read GAP levels
4190 {
4191 /*glevels[i] =*/ dec.get_16();
4192 }
4193 }
4194 if (header_flag & BM_HM_RESIZE)
4195 {
4196 block_idx_type bv_size;
4197 if (header_flag & BM_HM_64_BIT)
4198 {
4199 // 64-bit vector cannot be deserialized into 32-bit
4200 BM_ASSERT(sizeof(block_idx_type)==8);
4201 bv_size = (block_idx_type)dec.get_64();
4202 #ifndef BM64ADDR
4203 #ifndef BM_NO_STL
4204 throw std::logic_error(this->err_msg());
4205 #else
4206 BM_THROW(BM_ERR_SERIALFORMAT);
4207 #endif
4208 #endif
4209 }
4210 else
4211 bv_size = dec.get_32();
4212 if (bv_size > bv.size())
4213 bv.resize(bv_size);
4214 }
4215
4216 if (header_flag & BM_HM_HXOR) // XOR compression mode
4217 {
4218 if (!xor_block_)
4219 xor_block_ = alloc_.alloc_bit_block();
4220 }
4221
4222 bv.set_new_blocks_strat(bm::BM_GAP); // minimize deserialization footprint
4223
4224 size_type full_blocks = 0;
4225
4226 // reference XOR compression FSM fields
4227 //
4228 xor_reset();
4229
4230 unsigned row_idx(0);
4231
4232 block_idx_type nb_sync;
4233
4234 unsigned char btype;
4235 block_idx_type nb;
4236 unsigned i0, j0;
4237
4238 block_idx_type nb_i = 0;
4239 do
4240 {
4241 if (is_range_set_)
4242 {
4243 block_idx_type nb_to = (idx_to_ >> bm::set_block_shift);
4244 if (nb_i > nb_to)
4245 break; // early exit (out of target range)
4246 }
4247
4248 btype = dec.get_8();
4249 if (btype & (1 << 7)) // check if short zero-run packed here
4250 {
4251 nb = btype & ~(1 << 7);
4252 BM_ASSERT(nb);
4253 nb_i += nb;
4254 continue;
4255 }
4256 bm::get_block_coord(nb_i, i0, j0);
4257 bm::word_t* blk = bman.get_block_ptr(i0, j0);
4258
4259 switch (btype)
4260 {
4261 case set_block_azero:
4262 case set_block_end:
4263 nb_i = bm::set_total_blocks;
4264 break;
4265 case set_block_1zero:
4266 break;
4267 case set_block_8zero:
4268 nb = dec.get_8();
4269 BM_ASSERT(nb);
4270 nb_i += nb;
4271 continue; // bypass ++nb_i;
4272 case set_block_16zero:
4273 nb = dec.get_16();
4274 BM_ASSERT(nb);
4275 nb_i += nb;
4276 continue; // bypass ++nb_i;
4277 case set_block_32zero:
4278 nb = dec.get_32();
4279 BM_ASSERT(nb);
4280 nb_i += nb;
4281 continue; // bypass ++nb_i;
4282 case set_block_64zero:
4283 #ifdef BM64ADDR
4284 nb = dec.get_64();
4285 BM_ASSERT(nb);
4286 nb_i += nb;
4287 continue; // bypass ++nb_i;
4288 #else
4289 BM_ASSERT(0); // attempt to read 64-bit vector in 32-bit mode
4290 #ifndef BM_NO_STL
4291 throw std::logic_error(this->err_msg());
4292 #else
4293 BM_THROW(BM_ERR_SERIALFORMAT);
4294 #endif
4295 #endif
4296 break;
4297 case set_block_aone:
4298 bman.set_all_set(nb_i, bm::set_total_blocks-1);
4299 nb_i = bm::set_total_blocks;
4300 break;
4301 case set_block_1one:
4302 bman.set_block_all_set(nb_i);
4303 break;
4304 case set_block_8one:
4305 full_blocks = dec.get_8();
4306 goto process_full_blocks;
4307 break;
4308 case set_block_16one:
4309 full_blocks = dec.get_16();
4310 goto process_full_blocks;
4311 break;
4312 case set_block_32one:
4313 full_blocks = dec.get_32();
4314 goto process_full_blocks;
4315 break;
4316 case set_block_64one:
4317 #ifdef BM64ADDR
4318 full_blocks = dec.get_64();
4319 goto process_full_blocks;
4320 #else
4321 BM_ASSERT(0); // 32-bit vector cannot read 64-bit
4322 dec.get_64();
4323 #ifndef BM_NO_STL
4324 throw std::logic_error(this->err_msg());
4325 #else
4326 BM_THROW(BM_ERR_SERIALFORMAT);
4327 #endif
4328 #endif
4329 process_full_blocks:
4330 {
4331 BM_ASSERT(full_blocks);
4332 size_type from = nb_i * bm::gap_max_bits;
4333 size_type to = from + full_blocks * bm::gap_max_bits;
4334 bv.set_range(from, to-1);
4335 nb_i += full_blocks-1;
4336 }
4337 break;
4338 case set_block_bit:
4339 decode_block_bit(dec, bv, nb_i, blk);
4340 break;
4341 case set_block_bit_1bit:
4342 {
4343 size_type bit_idx = dec.get_16();
4344 bit_idx += nb_i * bm::bits_in_block;
4345 bv.set_bit_no_check(bit_idx);
4346 break;
4347 }
4349 {
4350 //TODO: optimization if block exists
4351 this->read_0runs_block(dec, temp_block);
4352 bv.combine_operation_with_block(nb_i, temp_block, 0, BM_OR);
4353 break;
4354 }
4356 decode_block_bit_interval(dec, bv, nb_i, blk);
4357 break;
4358 case set_block_gap:
4359 case set_block_gapbit:
4360 case set_block_arrgap:
4371 deserialize_gap(btype, dec, bv, bman, nb_i, blk);
4372 break;
4373 case set_block_arrbit:
4374 decode_arrbit(dec, bv, nb_i, blk);
4375 break;
4381 decode_bit_block(btype, dec, bman, nb_i, blk);
4382 break;
4384 decode_bit_block(btype, dec, bman, nb_i, blk);
4385 break;
4386
4387 // --------------------------------------- super-block encodings
4389 decode_arr_sblock(btype, dec, bv);
4390 nb_i += (bm::set_sub_array_size - j0);
4391 continue; // bypass ++i;
4392
4393 // --------------------------------------- bookmarks and skip jumps
4394 //
4395 case set_nb_bookmark32:
4396 this->bookmark_idx_ = nb_i;
4397 this->skip_offset_ = dec.get_32();
4398 goto process_bookmark;
4399 case set_nb_bookmark24:
4400 this->bookmark_idx_ = nb_i;
4401 this->skip_offset_ = dec.get_24();
4402 goto process_bookmark;
4403 case set_nb_bookmark16:
4404 this->bookmark_idx_ = nb_i;
4405 this->skip_offset_ = dec.get_16();
4406 process_bookmark:
4407 if (is_range_set_)
4408 {
4409 this->skip_pos_ = dec.get_pos() + this->skip_offset_;
4410 block_idx_type nb_from = (idx_from_ >> bm::set_block_shift);
4411 nb_from = this->try_skip(dec, nb_i, nb_from);
4412 if (nb_from)
4413 nb_i = nb_from;
4414 }
4415 continue; // bypass ++i;
4416
4417 case set_nb_sync_mark8:
4418 nb_sync = dec.get_8();
4419 goto process_nb_sync;
4420 case set_nb_sync_mark16:
4421 nb_sync = dec.get_16();
4422 goto process_nb_sync;
4423 case set_nb_sync_mark24:
4424 nb_sync = dec.get_24();
4425 goto process_nb_sync;
4426 case set_nb_sync_mark32:
4427 nb_sync = dec.get_32();
4428 goto process_nb_sync;
4429 case set_nb_sync_mark48:
4430 nb_sync = block_idx_type(dec.get_48());
4431 goto process_nb_sync;
4432 case set_nb_sync_mark64:
4433 nb_sync = block_idx_type(dec.get_64());
4434 process_nb_sync:
4435 BM_ASSERT(nb_i == this->bookmark_idx_ + nb_sync);
4436 if (nb_i != this->bookmark_idx_ + nb_sync)
4437 {
4438 #ifndef BM_NO_STL
4439 throw std::logic_error(this->err_msg());
4440 #else
4441 BM_THROW(BM_ERR_SERIALFORMAT);
4442 #endif
4443 }
4444
4445 continue; // bypass ++i;
4446
4447 // ------------------------------------ XOR compression markers
4449 {
4450 BM_ASSERT(ref_vect_); // reference vector has to be attached
4451 if (x_ref_d64_) // previous delayed XOR post proc.
4452 xor_decode(bman);
4453
4454 row_idx = dec.get_32();
4455 size_type idx = ref_vect_->find(row_idx);
4456 if (idx == ref_vect_->not_found())
4457 {
4458 BM_ASSERT(0);
4459 goto throw_err;
4460 }
4461 const bvector_type* ref_bv = ref_vect_->get_bv(idx);
4462 BM_ASSERT(ref_bv); // some incorrect work with the ref.vector
4463 BM_ASSERT(ref_bv != &bv);
4464 const blocks_manager_type& ref_bman = ref_bv->get_blocks_manager();
4465 const bm::word_t* ref_blk = ref_bman.get_block_ptr(i0, j0);
4466 if (ref_blk)
4467 bv.combine_operation_with_block(nb_i, ref_blk,
4468 BM_IS_GAP(ref_blk), BM_OR);
4469 break;
4470 }
4473 if (x_ref_d64_) // previous delayed XOR post proc.
4474 xor_decode(bman);
4475 row_idx = dec.get_8();
4476 goto process_xor;
4479 if (x_ref_d64_) // previous delayed XOR post proc.
4480 xor_decode(bman);
4481 row_idx = dec.get_16();
4482 goto process_xor;
4485 {
4486 if (x_ref_d64_) // previous delayed XOR post proc.
4487 xor_decode(bman);
4488 row_idx = dec.get_32();
4489 process_xor:
4490 if (btype <= bm::set_block_xor_ref32)
4491 x_ref_d64_ = dec.get_64();
4492 else
4493 x_ref_d64_ = ~0ull;
4494 process_xor_ref:
4495 BM_ASSERT(ref_vect_); // reference vector has to be attached
4496 x_ref_idx_ = ref_vect_->find(row_idx);
4497 x_nb_ = nb_i;
4498 if (x_ref_idx_ == ref_vect_->not_found())
4499 {
4500 BM_ASSERT(0); // XOR de-filter setup issue cannot find ref vector
4501 goto throw_err;
4502 }
4503 if (blk)
4504 {
4505 BM_ASSERT(!or_block_);
4506 or_block_ = bman.deoptimize_block(nb_i);
4507 bman.set_block_ptr(nb_i, 0); // borrow OR target before XOR
4508 or_block_idx_ = nb_i;
4509 }
4510 }
4511 continue; // important! cont to avoid inc(i)
4513 if (x_ref_d64_) // previous delayed XOR post proc.
4514 xor_decode(bman);
4515 row_idx = dec.get_8();
4516 x_ref_d64_ = ~0ULL;
4517 goto process_xor_ref;
4519 if (x_ref_d64_) // previous delayed XOR post proc.
4520 xor_decode(bman);
4521 row_idx = dec.get_16();
4522 x_ref_d64_ = ~0ULL;
4523 goto process_xor_ref;
4525 if (x_ref_d64_) // previous delayed XOR post proc.
4526 xor_decode(bman);
4527 row_idx = dec.get_32();
4528 x_ref_d64_ = ~0ULL;
4529 goto process_xor_ref;
4531 if (x_ref_d64_) // previous delayed XOR post proc.
4532 xor_decode(bman);
4533 {
4534 unsigned char vbr_flag = dec.get_8(); // reserved
4535 switch (vbr_flag)
4536 {
4537 case 1: row_idx = dec.get_8(); break;
4538 case 2: row_idx = dec.get_16(); break;
4539 case 0: row_idx = dec.get_32(); break;
4540 default: BM_ASSERT(0); break;
4541 } // switch
4542 bm::id64_t acc64 = x_ref_d64_ = dec.get_h64();
4543 BM_ASSERT(!xor_chain_size_);
4544 xor_chain_size_ = dec.get_8();
4545 BM_ASSERT(xor_chain_size_);
4546 for (unsigned ci = 0; ci < xor_chain_size_; ++ci)
4547 {
4548 switch (vbr_flag)
4549 {
4550 case 1: xor_chain_[ci].ref_idx = dec.get_8(); break;
4551 case 2: xor_chain_[ci].ref_idx = dec.get_16(); break;
4552 case 0: xor_chain_[ci].ref_idx = dec.get_32(); break;
4553 default: BM_ASSERT(0); break;
4554 } // switch
4555 xor_chain_[ci].xor_d64 = dec.get_h64();
4556
4557 BM_ASSERT((xor_chain_[ci].xor_d64 & acc64) == 0);
4558 acc64 |= xor_chain_[ci].xor_d64; // control
4559 } // for
4560 }
4561 goto process_xor_ref;
4562
4563 default:
4564 BM_ASSERT(0); // unknown block type
4565 throw_err:
4566 #ifndef BM_NO_STL
4567 throw std::logic_error(this->err_msg());
4568 #else
4569 BM_THROW(BM_ERR_SERIALFORMAT);
4570 #endif
4571 } // switch
4572
4573 ++nb_i;
4574 } while (nb_i < bm::set_total_blocks);
4575
4576 // process the last delayed XOR ref here
4577 //
4578 if (x_ref_d64_) // XOR post proc.
4579 xor_decode(bman);
4580
4581 bv.set_new_blocks_strat(strat);
4582
4583 return dec.size();
4584}
4585
4586// ---------------------------------------------------------------------------
4587
4588template<class BV, class DEC>
4590{
4591 BM_ASSERT(xor_chain_size_);
4592
4593 unsigned i0, j0;
4594 bm::get_block_coord(x_nb_, i0, j0);
4595 for (unsigned ci = 0; ci < xor_chain_size_; ++ci)
4596 {
4597 unsigned ref_idx = (unsigned)ref_vect_->find(xor_chain_[ci].ref_idx);
4598 const bvector_type* BMRESTRICT ref_bv = ref_vect_->get_bv(ref_idx);
4599 const blocks_manager_type& ref_bman = ref_bv->get_blocks_manager();
4600 BM_ASSERT(ref_bv);
4601
4602 const bm::word_t* BMRESTRICT ref_blk = ref_bman.get_block_ptr(i0, j0);
4603 if (BM_IS_GAP(ref_blk))
4604 {
4605 bm::gap_word_t* BMRESTRICT gap_block = BMGAP_PTR(ref_blk);
4606 bm::gap_convert_to_bitset(xor_block_, gap_block);
4607 ref_blk = xor_block_;
4608 }
4609 else
4610 if (IS_FULL_BLOCK(ref_blk))
4611 ref_blk = FULL_BLOCK_REAL_ADDR;
4612 if (ref_blk)
4613 bm::bit_block_xor(blk, ref_blk, xor_chain_[ci].xor_d64);
4614 } // for ci
4615}
4616
4617// ---------------------------------------------------------------------------
4618
4619template<class BV, class DEC>
4621{
4622 BM_ASSERT(ref_vect_);
4623
4624 unsigned i0, j0;
4625 const bm::word_t* ref_blk;
4626
4627 {
4628 const bvector_type* ref_bv = ref_vect_->get_bv(x_ref_idx_);
4629 const blocks_manager_type& ref_bman = ref_bv->get_blocks_manager();
4630 BM_ASSERT(ref_bv);
4631 BM_ASSERT(&ref_bman != &bman); // some incorrect work with the ref.vect
4632 bm::get_block_coord(x_nb_, i0, j0);
4633 ref_blk = ref_bman.get_block_ptr(i0, j0);
4634 }
4635 BM_ASSERT(!or_block_ || or_block_idx_ == x_nb_);
4636
4637 if (!ref_blk)
4638 {
4639 if (or_block_)
4640 {
4641 bm::word_t* blk = bman.deoptimize_block(i0, j0, true); // true to alloc
4642 if (blk)
4643 {
4644 bm::bit_block_or(blk, or_block_);
4645 alloc_.free_bit_block(or_block_);
4646 }
4647 else
4648 {
4649 bman.set_block_ptr(x_nb_, or_block_); // return OR block
4650 }
4651 or_block_ = 0; or_block_idx_ = 0;
4652 }
4653
4654 if (xor_chain_size_)
4655 {
4656 bm::word_t* blk = bman.deoptimize_block(i0, j0, true);
4657 xor_decode_chain(blk);
4658 }
4659 xor_reset();
4660 return;
4661 }
4662 if (IS_FULL_BLOCK(ref_blk))
4663 ref_blk = FULL_BLOCK_REAL_ADDR;
4664 else
4665 {
4666 if (BM_IS_GAP(ref_blk))
4667 {
4668 bm::word_t* blk = bman.get_block_ptr(i0, j0);
4669 bm::gap_word_t* gap_block = BMGAP_PTR(ref_blk);
4670 if (BM_IS_GAP(blk) && (x_ref_d64_==~0ULL) && !or_block_) // two GAPs no FULL digest
4671 {
4672 BM_ASSERT(!xor_chain_size_); // since x_ref_d64_ == 0xFF...FF
4673 bm::gap_word_t* tmp_buf = (bm::gap_word_t*)xor_block_;
4674 const bm::gap_word_t* res;
4675 unsigned res_len;
4677 gap_block,
4678 tmp_buf,
4679 res_len);
4680 BM_ASSERT(res == tmp_buf);
4681 bman.assign_gap_check(i0, j0, res, ++res_len, blk, tmp_buf);
4682
4683 xor_reset();
4684 return;
4685 }
4686
4687 bm::gap_convert_to_bitset(xor_block_, gap_block);
4688 ref_blk = xor_block_;
4689 }
4690 else
4691 {
4692 if (IS_FULL_BLOCK(ref_blk))
4693 ref_blk = FULL_BLOCK_REAL_ADDR;
4694 }
4695 }
4696 bm::word_t* blk = bman.deoptimize_block(i0, j0, true); // true to alloc
4697
4698 if (x_ref_d64_)
4699 {
4700 bm::bit_block_xor(blk, blk, ref_blk, x_ref_d64_);
4701 if (xor_chain_size_)
4702 xor_decode_chain(blk);
4703 }
4704 else
4705 {
4706 BM_ASSERT(xor_chain_size_ == 0);
4707 bm::bit_block_xor(blk, ref_blk);
4708 }
4709 xor_reset();
4710
4711 if (or_block_)
4712 {
4713 bm::bit_block_or(blk, or_block_);
4714 alloc_.free_bit_block(or_block_);
4715 or_block_ = 0;
4716 or_block_idx_ = 0;
4717 }
4718
4719 // target BLOCK post-processing
4720 //
4721 // check if we range setting overrides the need for block optimization
4722 //
4723 if (is_range_set_)
4724 {
4725 block_idx_type nb_from = (idx_from_ >> bm::set_block_shift);
4726 block_idx_type nb_to = (idx_to_ >> bm::set_block_shift);
4727 if (nb_from == x_nb_ || nb_to == x_nb_)
4728 return;
4729 }
4730 bman.optimize_bit_block(i0, j0, BV::opt_compress);
4731
4732}
4733// ---------------------------------------------------------------------------
4734
4735template<class BV, class DEC>
4737{
4738 x_ref_idx_ = 0; x_ref_d64_ = 0; xor_chain_size_ = 0;
4739 x_nb_ = 0;
4740}
4741
4742// ---------------------------------------------------------------------------
4743
4744template<typename DEC, typename BLOCK_IDX>
4746 : header_flag_(0),
4747 decoder_(buf),
4748 end_of_stream_(false),
4749 bv_size_(0),
4750 state_(e_unknown),
4751 id_cnt_(0),
4752 block_idx_(0),
4753 mono_block_cnt_(0),
4754 block_idx_arr_(0)
4755{
4756 ::memset(bit_func_table_, 0, sizeof(bit_func_table_));
4757
4760
4785
4786
4787 // reading stream header
4788 header_flag_ = decoder_.get_8();
4789 if (!(header_flag_ & BM_HM_NO_BO))
4790 {
4791 /*ByteOrder bo = (bm::ByteOrder)*/decoder_.get_8();
4792 }
4793
4794 // check if bitvector comes as an inverted, sorted list of ints
4795 //
4797 {
4798 // special case: the next comes plain list of unsigned integers
4800 {
4802 {
4803 BM_ASSERT(sizeof(block_idx_type)==8);
4804 bv_size_ = (block_idx_type)decoder_.get_64();
4805 }
4806 else
4807 bv_size_ = decoder_.get_32();
4808 }
4809
4811 id_cnt_ = decoder_.get_32();
4812 next(); // read first id
4813 }
4814 else
4815 {
4816 if (!(header_flag_ & BM_HM_NO_GAPL))
4817 {
4818 for (unsigned i = 0; i < bm::gap_levels; ++i) // load the GAP levels
4819 glevels_[i] = decoder_.get_16();
4820 }
4822 {
4824 {
4825 BM_ASSERT(sizeof(block_idx_type)==8);
4826 bv_size_ = (block_idx_type)decoder_.get_64();
4827 }
4828 else
4829 bv_size_ = decoder_.get_32();
4830 }
4831 state_ = e_blocks;
4832 }
4834 if (!block_idx_arr_)
4835 {
4836 #ifndef BM_NO_STL
4837 throw std::bad_alloc();
4838 #else
4839 BM_THROW(BM_ERR_BADALLOC);
4840 #endif
4841 }
4842
4843 this->id_array_ = block_idx_arr_;
4844}
4845
4846template<typename DEC, typename BLOCK_IDX>
4848{
4849 if (block_idx_arr_)
4850 ::free(block_idx_arr_);
4851}
4852
4853
4854template<typename DEC, typename BLOCK_IDX>
4856{
4857 if (is_eof())
4858 {
4859 ++block_idx_;
4860 return;
4861 }
4862 block_idx_type nb_sync;
4863
4864 switch (state_)
4865 {
4866 case e_list_ids:
4867 // read inverted ids one by one
4868 if (id_cnt_ == 0)
4869 {
4870 end_of_stream_ = true;
4871 state_ = e_unknown;
4872 break;
4873 }
4874 last_id_ = decoder_.get_32();
4875 --id_cnt_;
4876 break;
4877
4878 case e_blocks:
4879 if (block_idx_ == bm::set_total_blocks)
4880 {
4881 end_of_stream_ = true;
4882 state_ = e_unknown;
4883 break;
4884 }
4885
4886 block_type_ = decoder_.get_8();
4887
4888 // pre-check for 7-bit zero block
4889 //
4890 if (block_type_ & (1u << 7u))
4891 {
4892 mono_block_cnt_ = (block_type_ & ~(1u << 7u)) - 1;
4893 state_ = e_zero_blocks;
4894 break;
4895 }
4896
4897 switch (block_type_)
4898 {
4899 case set_block_azero:
4900 case set_block_end:
4901 end_of_stream_ = true; state_ = e_unknown;
4902 break;
4903 case set_block_1zero:
4904 state_ = e_zero_blocks;
4905 mono_block_cnt_ = 0;
4906 break;
4907 case set_block_8zero:
4908 state_ = e_zero_blocks;
4909 mono_block_cnt_ = decoder_.get_8()-1;
4910 break;
4911 case set_block_16zero:
4912 state_ = e_zero_blocks;
4913 mono_block_cnt_ = decoder_.get_16()-1;
4914 break;
4915 case set_block_32zero:
4916 state_ = e_zero_blocks;
4917 mono_block_cnt_ = decoder_.get_32()-1;
4918 break;
4919 case set_block_aone:
4920 state_ = e_one_blocks;
4921 mono_block_cnt_ = bm::set_total_blocks - block_idx_;
4922 break;
4923 case set_block_1one:
4924 state_ = e_one_blocks;
4925 mono_block_cnt_ = 0;
4926 break;
4927 case set_block_8one:
4928 state_ = e_one_blocks;
4929 mono_block_cnt_ = decoder_.get_8()-1;
4930 break;
4931 case set_block_16one:
4932 state_ = e_one_blocks;
4933 mono_block_cnt_ = decoder_.get_16()-1;
4934 break;
4935 case set_block_32one:
4936 state_ = e_one_blocks;
4937 mono_block_cnt_ = decoder_.get_32()-1;
4938 break;
4939 case set_block_64one:
4940 state_ = e_one_blocks;
4941 mono_block_cnt_ = block_idx_type(decoder_.get_64()-1);
4942 break;
4943
4944 case bm::set_block_bit:
4954 state_ = e_bit_block;
4955 break;
4956 case set_block_gap:
4963 gap_head_ = decoder_.get_16();
4965 case set_block_arrgap:
4973 case set_block_bit_1bit:
4982 state_ = e_gap_block;
4983 break;
4984 case set_block_gapbit:
4985 state_ = e_gap_block; //e_bit_block; // TODO: make a better decision here
4986 break;
4987
4988 // --------------------------------------------- bookmarks and syncs
4989 //
4990 case set_nb_bookmark32:
4991 this->bookmark_idx_ = block_idx_;
4992 this->skip_offset_ = decoder_.get_32();
4993 this->skip_pos_ = decoder_.get_pos() + this->skip_offset_;
4994 break;
4995 case set_nb_bookmark24:
4996 this->bookmark_idx_ = block_idx_;
4997 this->skip_offset_ = decoder_.get_24();
4998 this->skip_pos_ = decoder_.get_pos() + this->skip_offset_;
4999 break;
5000 case set_nb_bookmark16:
5001 this->bookmark_idx_ = block_idx_;
5002 this->skip_offset_ = decoder_.get_16();
5003 this->skip_pos_ = decoder_.get_pos() + this->skip_offset_;
5004 break;
5005
5006 case set_nb_sync_mark8:
5007 nb_sync = decoder_.get_8();
5008 goto process_nb_sync;
5009 case set_nb_sync_mark16:
5010 nb_sync = decoder_.get_16();
5011 goto process_nb_sync;
5012 case set_nb_sync_mark24:
5013 nb_sync = decoder_.get_24();
5014 goto process_nb_sync;
5015 case set_nb_sync_mark32:
5016 nb_sync = decoder_.get_32();
5017 goto process_nb_sync;
5018 case set_nb_sync_mark48:
5019 nb_sync = block_idx_type(decoder_.get_48());
5020 goto process_nb_sync;
5021 case set_nb_sync_mark64:
5022 nb_sync = block_idx_type(decoder_.get_64());
5023 process_nb_sync:
5024 BM_ASSERT(block_idx_ == this->bookmark_idx_ + nb_sync);
5025 if (block_idx_ != this->bookmark_idx_ + nb_sync)
5026 {
5027 #ifndef BM_NO_STL
5028 throw std::logic_error(this->err_msg());
5029 #else
5030 BM_THROW(BM_ERR_SERIALFORMAT);
5031 #endif
5032 }
5033 break;
5034
5035
5036
5037 // --------------------------------------------------------------
5038 // XOR deserials are incompatible with the operation deserialial
5039 // throw or assert here
5040 //
5041 case set_block_ref_eq:
5042 case set_block_xor_ref8:
5049 // fall through
5050 case set_sblock_bienc:
5051 BM_ASSERT(0);
5053 // fall through
5054 default:
5055 BM_ASSERT(0);
5056 #ifndef BM_NO_STL
5057 throw std::logic_error(this->err_msg());
5058 #else
5059 BM_THROW(BM_ERR_SERIALFORMAT);
5060 #endif
5061 } // switch
5062
5063 break;
5064
5065 case e_zero_blocks:
5066 case e_one_blocks:
5067 ++block_idx_;
5068 if (!mono_block_cnt_)
5069 {
5070 state_ = e_blocks; // get new block token
5071 break;
5072 }
5073 --mono_block_cnt_;
5074 break;
5075
5076 case e_unknown:
5077 default:
5078 BM_ASSERT(0);
5079 #ifndef BM_NO_STL
5080 throw std::logic_error(this->err_msg());
5081 #else
5082 BM_THROW(BM_ERR_SERIALFORMAT);
5083 #endif
5084 } // switch
5085}
5086
5087template<typename DEC, typename BLOCK_IDX>
5090{
5091 BM_ASSERT(state_ == e_zero_blocks || state_ == e_one_blocks);
5092 if (!mono_block_cnt_)
5093 ++block_idx_;
5094 else
5095 {
5096 block_idx_ += mono_block_cnt_+1;
5097 mono_block_cnt_ = 0;
5098 }
5099 state_ = e_blocks;
5100 return block_idx_;
5101}
5102
5103template<typename DEC, typename BLOCK_IDX>
5104void
5106{
5107 gap_word_t len = decoder_.get_16();
5108 if (block)
5109 {
5110 bm::bit_block_set(block, ~0u);
5111 for (unsigned k = 0; k < len; ++k)
5112 {
5113 bm::gap_word_t bit_idx = decoder_.get_16();
5114 bm::clear_bit(block, bit_idx);
5115 }
5116 }
5117 else // dry read
5118 {
5119 for (unsigned k = 0; k < len; ++k)
5120 decoder_.get_16();
5121 }
5122}
5123
5124
5125template<typename DEC, typename BLOCK_IDX>
5126unsigned
5128 bm::word_t* dst_block,
5129 bm::word_t* tmp_block)
5130{
5131 // ASSIGN should be ready for dry read (dst_block can be NULL)
5132 //
5133 BM_ASSERT(this->state_ == e_bit_block);
5134 unsigned count = 0;
5135
5136 switch (this->block_type_)
5137 {
5138 case set_block_bit:
5139 decoder_.get_32(dst_block, bm::set_block_size);
5140 break;
5141 case set_block_bit_0runs:
5142 {
5143 if (IS_VALID_ADDR(dst_block))
5144 bm::bit_block_set(dst_block, 0);
5145 unsigned char run_type = decoder_.get_8();
5146 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
5147 {
5148 unsigned run_length = decoder_.get_16();
5149 if (run_type)
5150 {
5151 decoder_.get_32(dst_block ? dst_block + j : dst_block, run_length);
5152 }
5153 j += run_length;
5154 } // for
5155 }
5156 break;
5158 {
5159 unsigned head_idx = decoder_.get_16();
5160 unsigned tail_idx = decoder_.get_16();
5161 if (dst_block)
5162 {
5163 for (unsigned i = 0; i < head_idx; ++i)
5164 dst_block[i] = 0;
5165 decoder_.get_32(dst_block + head_idx,
5166 tail_idx - head_idx + 1);
5167 for (unsigned j = tail_idx + 1; j < bm::set_block_size; ++j)
5168 dst_block[j] = 0;
5169 }
5170 else
5171 {
5172 int pos = int(tail_idx - head_idx) + 1;
5173 pos *= 4u;
5174 decoder_.seek(pos);
5175 }
5176 }
5177 break;
5178 case set_block_arrbit:
5179 case set_block_bit_1bit:
5180 get_arr_bit(dst_block, true /*clear target*/);
5181 break;
5182 case set_block_gapbit:
5183 BM_ASSERT(0);
5184 #ifndef BM_NO_STL
5185 throw std::logic_error(this->err_msg());
5186 #else
5187 BM_THROW(BM_ERR_SERIALFORMAT);
5188 #endif
5189 break;
5191 get_inv_arr(dst_block);
5192 break;
5195 if (IS_VALID_ADDR(dst_block))
5196 bm::bit_block_set(dst_block, 0);
5197 this->read_bic_arr(decoder_, dst_block, this->block_type_);
5198 break;
5200 this->read_bic_arr_inv(decoder_, tmp_block);
5201 if (IS_VALID_ADDR(dst_block))
5202 bm::bit_block_copy(dst_block, tmp_block);
5203 break;
5205 if (IS_VALID_ADDR(dst_block))
5206 bm::bit_block_set(dst_block, 0);
5207 this->read_bic_gap(decoder_, dst_block);
5208 break;
5210 if (IS_VALID_ADDR(dst_block))
5211 bm::bit_block_set(dst_block, 0);
5212 this->read_digest0_block(decoder_, dst_block);
5213 break;
5214 default:
5215 BM_ASSERT(0);
5216 #ifndef BM_NO_STL
5217 throw std::logic_error(this->err_msg());
5218 #else
5219 BM_THROW(BM_ERR_SERIALFORMAT);
5220 #endif
5221 } // switch
5222 return count;
5223}
5224
5225template<typename DEC, typename BLOCK_IDX>
5226unsigned
5228 bm::word_t* dst_block,
5229 bm::word_t* tmp_block)
5230{
5231 BM_ASSERT(this->state_ == e_bit_block);
5232 unsigned count = 0;
5233 switch (block_type_)
5234 {
5235 case set_block_bit:
5236 decoder_.get_32_OR(dst_block, bm::set_block_size);
5237 break;
5239 {
5240 unsigned head_idx = decoder_.get_16();
5241 unsigned tail_idx = decoder_.get_16();
5242 for (unsigned i = head_idx; i <= tail_idx; ++i)
5243 dst_block[i] |= decoder_.get_32();
5244 }
5245 break;
5247 {
5248 unsigned char run_type = decoder_.get_8();
5249 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
5250 {
5251 unsigned run_length = decoder_.get_16();
5252 if (run_type)
5253 {
5254 unsigned run_end = j + run_length;
5255 for (;j < run_end; ++j)
5256 {
5258 dst_block[j] |= decoder_.get_32();
5259 }
5260 }
5261 else
5262 {
5263 j += run_length;
5264 }
5265 } // for
5266 }
5267 break;
5268 case set_block_bit_1bit:
5269 case set_block_arrbit:
5270 get_arr_bit(dst_block, false /*don't clear target*/);
5271 break;
5273 get_inv_arr(tmp_block);
5274 bm::bit_block_or(dst_block, tmp_block);
5275 break;
5278 this->read_bic_arr(decoder_, dst_block, this->block_type_);
5279 break;
5281 this->read_bic_arr_inv(decoder_, tmp_block);
5282 bm::bit_block_or(dst_block, tmp_block);
5283 break;
5285 this->read_bic_gap(decoder_, dst_block);
5286 break;
5288 this->read_digest0_block(decoder_, dst_block);
5289 break;
5290 default:
5291 BM_ASSERT(0);
5292 #ifndef BM_NO_STL
5293 throw std::logic_error(this->err_msg());
5294 #else
5295 BM_THROW(BM_ERR_SERIALFORMAT);
5296 #endif
5297 } // switch
5298 return count;
5299}
5300
5301template<typename DEC, typename BLOCK_IDX>
5302unsigned
5304 bm::word_t* BMRESTRICT dst_block,
5305 bm::word_t* BMRESTRICT tmp_block)
5306{
5307 BM_ASSERT(this->state_ == e_bit_block);
5308 BM_ASSERT(dst_block != tmp_block);
5309 unsigned count = 0;
5310 switch (block_type_)
5311 {
5312 case set_block_bit:
5313 decoder_.get_32_AND(dst_block, bm::set_block_size);
5314 break;
5316 {
5317 unsigned char run_type = decoder_.get_8();
5318 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
5319 {
5320 unsigned run_length = decoder_.get_16();
5321
5322 unsigned run_end = j + run_length;
5323 if (run_type)
5324 {
5325 for (;j < run_end; ++j)
5326 {
5328 dst_block[j] &= decoder_.get_32();
5329 }
5330 }
5331 else
5332 {
5333 for (;j < run_end; ++j)
5334 {
5336 dst_block[j] = 0;
5337 }
5338 }
5339 } // for
5340 }
5341 break;
5343 {
5344 unsigned head_idx = decoder_.get_16();
5345 unsigned tail_idx = decoder_.get_16();
5346 unsigned i;
5347 for ( i = 0; i < head_idx; ++i)
5348 dst_block[i] = 0;
5349 for ( i = head_idx; i <= tail_idx; ++i)
5350 dst_block[i] &= decoder_.get_32();
5351 for ( i = tail_idx + 1; i < bm::set_block_size; ++i)
5352 dst_block[i] = 0;
5353 }
5354 break;
5355 case set_block_bit_1bit:
5356 case set_block_arrbit:
5357 get_arr_bit(tmp_block, true /*clear target*/);
5358 if (dst_block)
5359 bm::bit_block_and(dst_block, tmp_block);
5360 break;
5362 get_inv_arr(tmp_block);
5363 if (dst_block)
5364 bm::bit_block_and(dst_block, tmp_block);
5365 break;
5368 if (dst_block)
5369 {
5370 bm::bit_block_set(tmp_block, 0);
5371 this->read_bic_arr(decoder_, tmp_block, block_type_);
5372 bm::bit_block_and(dst_block, tmp_block);
5373 }
5374 else
5375 this->read_bic_arr(decoder_, 0, block_type_); // dry read
5376 break;
5378 this->read_bic_arr_inv(decoder_, tmp_block);
5379 if (dst_block)
5380 bm::bit_block_and(dst_block, tmp_block);
5381 break;
5383 if (dst_block)
5384 {
5385 BM_ASSERT(IS_VALID_ADDR(dst_block));
5386 bm::bit_block_set(tmp_block, 0);
5387 this->read_bic_gap(decoder_, tmp_block);
5388 bm::bit_block_and(dst_block, tmp_block);
5389 }
5390 else
5391 this->read_bic_gap(decoder_, 0); // dry read
5392 break;
5394 if (dst_block)
5395 {
5396 BM_ASSERT(IS_VALID_ADDR(dst_block));
5397 bm::bit_block_set(tmp_block, 0);
5398 this->read_digest0_block(decoder_, tmp_block);
5399 bm::bit_block_and(dst_block, tmp_block);
5400 }
5401 else
5402 this->read_digest0_block(decoder_, 0); // dry read
5403 break;
5404 default:
5405 BM_ASSERT(0);
5406 #ifndef BM_NO_STL
5407 throw std::logic_error(this->err_msg());
5408 #else
5409 BM_THROW(BM_ERR_SERIALFORMAT);
5410 #endif
5411 } // switch
5412 return count;
5413}
5414
5415template<typename DEC, typename BLOCK_IDX>
5416unsigned
5418 bm::word_t* dst_block,
5419 bm::word_t* tmp_block)
5420{
5421 BM_ASSERT(this->state_ == e_bit_block);
5422 BM_ASSERT(dst_block != tmp_block);
5423
5424 unsigned count = 0;
5425 switch (block_type_)
5426 {
5427 case set_block_bit:
5428 for (unsigned i = 0; i < bm::set_block_size; ++i)
5429 dst_block[i] ^= decoder_.get_32();
5430 break;
5432 {
5433 unsigned char run_type = decoder_.get_8();
5434 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
5435 {
5436 unsigned run_length = decoder_.get_16();
5437 if (run_type)
5438 {
5439 unsigned run_end = j + run_length;
5440 for (;j < run_end; ++j)
5441 {
5443 dst_block[j] ^= decoder_.get_32();
5444 }
5445 }
5446 else
5447 {
5448 j += run_length;
5449 }
5450 } // for
5451 }
5452 break;
5454 {
5455 unsigned head_idx = decoder_.get_16();
5456 unsigned tail_idx = decoder_.get_16();
5457 for (unsigned i = head_idx; i <= tail_idx; ++i)
5458 dst_block[i] ^= decoder_.get_32();
5459 }
5460 break;
5461 case set_block_bit_1bit:
5462 // TODO: optimization
5463 case set_block_arrbit:
5464 get_arr_bit(tmp_block, true /*clear target*/);
5465 if (dst_block)
5466 bm::bit_block_xor(dst_block, tmp_block);
5467 break;
5469 get_inv_arr(tmp_block);
5470 if (dst_block)
5471 bm::bit_block_xor(dst_block, tmp_block);
5472 break;
5475 bm::bit_block_set(tmp_block, 0);
5476 this->read_bic_arr(decoder_, tmp_block, block_type_);
5477 if (dst_block)
5478 bm::bit_block_xor(dst_block, tmp_block);
5479 break;
5481 this->read_bic_arr_inv(decoder_, tmp_block);
5482 if (dst_block)
5483 {
5484 BM_ASSERT(IS_VALID_ADDR(dst_block));
5485 bm::bit_block_xor(dst_block, tmp_block);
5486 }
5487 break;
5489 if (dst_block)
5490 {
5491 BM_ASSERT(IS_VALID_ADDR(dst_block));
5492 bm::bit_block_set(tmp_block, 0);
5493 this->read_bic_gap(decoder_, tmp_block);
5494 bm::bit_block_xor(dst_block, tmp_block);
5495 }
5496 else
5497 this->read_bic_gap(decoder_, 0); // dry read
5498 break;
5500 if (dst_block)
5501 {
5502 BM_ASSERT(IS_VALID_ADDR(dst_block));
5503 bm::bit_block_set(tmp_block, 0);
5504 this->read_digest0_block(decoder_, tmp_block);
5505 bm::bit_block_xor(dst_block, tmp_block);
5506 }
5507 else
5508 this->read_digest0_block(decoder_, 0); // dry read
5509 break;
5510 default:
5511 BM_ASSERT(0);
5512 #ifndef BM_NO_STL
5513 throw std::logic_error(this->err_msg());
5514 #else
5515 BM_THROW(BM_ERR_SERIALFORMAT);
5516 #endif
5517 } // switch
5518 return count;
5519}
5520
5521template<typename DEC, typename BLOCK_IDX>
5522unsigned
5524 bm::word_t* dst_block,
5525 bm::word_t* tmp_block)
5526{
5527 BM_ASSERT(this->state_ == e_bit_block);
5528 BM_ASSERT(dst_block != tmp_block);
5529
5530 unsigned count = 0;
5531 switch (block_type_)
5532 {
5533 case set_block_bit:
5534 for (unsigned i = 0; i < bm::set_block_size; ++i)
5535 dst_block[i] &= ~decoder_.get_32();
5536 break;
5538 {
5539 unsigned char run_type = decoder_.get_8();
5540 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
5541 {
5542 unsigned run_length = decoder_.get_16();
5543 if (run_type)
5544 {
5545 unsigned run_end = j + run_length;
5546 for (;j < run_end; ++j)
5547 {
5549 dst_block[j] &= ~~decoder_.get_32();
5550 }
5551 }
5552 else
5553 {
5554 j += run_length;
5555 }
5556 } // for
5557 }
5558 break;
5560 {
5561 unsigned head_idx = decoder_.get_16();
5562 unsigned tail_idx = decoder_.get_16();
5563 for (unsigned i = head_idx; i <= tail_idx; ++i)
5564 dst_block[i] &= ~decoder_.get_32();
5565 }
5566 break;
5567 case set_block_bit_1bit:
5568 // TODO: optimization
5569 case set_block_arrbit:
5570 get_arr_bit(tmp_block, true /*clear target*/);
5571 if (dst_block)
5572 bm::bit_block_sub(dst_block, tmp_block);
5573 break;
5575 get_inv_arr(tmp_block);
5576 if (dst_block)
5577 bm::bit_block_sub(dst_block, tmp_block);
5578 break;
5581 bm::bit_block_set(tmp_block, 0);
5582 this->read_bic_arr(decoder_, tmp_block, block_type_);
5583 if (dst_block)
5584 bm::bit_block_sub(dst_block, tmp_block);
5585 break;
5587 this->read_bic_arr_inv(decoder_, tmp_block);
5588 if (dst_block)
5589 bm::bit_block_sub(dst_block, tmp_block);
5590 break;
5592 if (dst_block)
5593 {
5594 BM_ASSERT(IS_VALID_ADDR(dst_block));
5595 bm::bit_block_set(tmp_block, 0);
5596 this->read_bic_gap(decoder_, tmp_block);
5597 bm::bit_block_sub(dst_block, tmp_block);
5598 }
5599 else
5600 this->read_bic_gap(decoder_, 0); // dry read
5601 break;
5603 if (dst_block)
5604 {
5605 BM_ASSERT(IS_VALID_ADDR(dst_block));
5606 bm::bit_block_set(tmp_block, 0);
5607 this->read_digest0_block(decoder_, tmp_block);
5608 bm::bit_block_sub(dst_block, tmp_block);
5609 }
5610 else
5611 this->read_digest0_block(decoder_, 0); // dry read
5612 break;
5613 default:
5614 BM_ASSERT(0);
5615 #ifndef BM_NO_STL
5616 throw std::logic_error(this->err_msg());
5617 #else
5618 BM_THROW(BM_ERR_SERIALFORMAT);
5619 #endif
5620 } // switch
5621 return count;
5622}
5623
5624
5625template<typename DEC, typename BLOCK_IDX>
5626unsigned
5628 bm::word_t* /*dst_block*/,
5629 bm::word_t* tmp_block)
5630{
5631 BM_ASSERT(this->state_ == e_bit_block);
5632
5633 unsigned count = 0;
5634 switch (block_type_)
5635 {
5636 case set_block_bit:
5637 for (unsigned i = 0; i < bm::set_block_size; ++i)
5638 count += bm::word_bitcount(decoder_.get_32());
5639 break;
5641 {
5642 //count = 0;
5643 unsigned char run_type = decoder_.get_8();
5644 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
5645 {
5646 unsigned run_length = decoder_.get_16();
5647 if (run_type)
5648 {
5649 unsigned run_end = j + run_length;
5650 for (;j < run_end; ++j)
5651 {
5652 count += word_bitcount(decoder_.get_32());
5653 }
5654 }
5655 else
5656 {
5657 j += run_length;
5658 }
5659 } // for
5660 return count;
5661 }
5663 {
5664 unsigned head_idx = decoder_.get_16();
5665 unsigned tail_idx = decoder_.get_16();
5666 for (unsigned i = head_idx; i <= tail_idx; ++i)
5667 count += bm::word_bitcount(decoder_.get_32());
5668 }
5669 break;
5670 case set_block_arrbit:
5671 count += get_arr_bit(0);
5672 break;
5673 case set_block_bit_1bit:
5674 ++count;
5675 decoder_.get_16();
5676 break;
5678 get_inv_arr(tmp_block);
5679 goto count_tmp;
5682 bm::bit_block_set(tmp_block, 0); // TODO: just add a counted read
5683 this->read_bic_arr(decoder_, tmp_block, block_type_);
5684 goto count_tmp;
5686 this->read_bic_arr_inv(decoder_, tmp_block);
5687 goto count_tmp;
5689 bm::bit_block_set(tmp_block, 0);
5690 this->read_digest0_block(decoder_, tmp_block);
5691 goto count_tmp;
5693 bm::bit_block_set(tmp_block, 0);
5694 this->read_bic_gap(decoder_, tmp_block);
5695 count_tmp:
5696 count += bm::bit_block_count(tmp_block);
5697 break;
5698 default:
5699 BM_ASSERT(0);
5700 #ifndef BM_NO_STL
5701 throw std::logic_error(this->err_msg());
5702 #else
5703 BM_THROW(BM_ERR_SERIALFORMAT);
5704 #endif
5705
5706 } // switch
5707 return count;
5708}
5709
5710template<typename DEC, typename BLOCK_IDX>
5711unsigned
5713 bm::word_t* dst_block,
5714 bm::word_t* tmp_block)
5715{
5716 BM_ASSERT(this->state_ == e_bit_block);
5717 unsigned count = 0;
5718 if (dst_block)
5719 {
5720 // count the block bitcount
5721 count = bm::bit_block_count(dst_block);
5722 }
5723
5724 switch (block_type_)
5725 {
5726 case set_block_bit:
5727 decoder_.get_32(0, bm::set_block_size);
5728 break;
5730 {
5731 unsigned char run_type = decoder_.get_8();
5732 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
5733 {
5734 unsigned run_length = decoder_.get_16();
5735 if (run_type)
5736 {
5737 unsigned run_end = j + run_length;
5738 for (;j < run_end; ++j)
5739 {
5740 decoder_.get_32();
5741 }
5742 }
5743 else
5744 {
5745 j += run_length;
5746 }
5747 } // for
5748 }
5749 break;
5750
5752 {
5753 unsigned head_idx = decoder_.get_16();
5754 unsigned tail_idx = decoder_.get_16();
5755 for (unsigned i = head_idx; i <= tail_idx; ++i)
5756 decoder_.get_32();
5757 }
5758 break;
5759 case set_block_arrbit:
5760 get_arr_bit(0);
5761 break;
5762 case set_block_bit_1bit:
5763 decoder_.get_16();
5764 break;
5766 get_inv_arr(tmp_block);
5767 break;
5770 this->read_bic_arr(decoder_, tmp_block, block_type_); // TODO: add dry read
5771 break;
5773 this->read_bic_arr_inv(decoder_, tmp_block); // TODO: add dry read
5774 break;
5776 this->read_bic_gap(decoder_, tmp_block);
5777 break;
5779 this->read_digest0_block(decoder_, 0); // dry read
5780 break;
5781 default:
5782 BM_ASSERT(0);
5783 #ifndef BM_NO_STL
5784 throw std::logic_error(this->err_msg());
5785 #else
5786 BM_THROW(BM_ERR_SERIALFORMAT);
5787 #endif
5788
5789 } // switch
5790 return count;
5791}
5792
5793
5794template<typename DEC, typename BLOCK_IDX>
5795unsigned
5797 bm::word_t* dst_block,
5798 bm::word_t* tmp_block)
5799{
5800 BM_ASSERT(this->state_ == e_bit_block);
5801 BM_ASSERT(dst_block);
5802
5803 unsigned count = 0;
5804 switch (block_type_)
5805 {
5806 case set_block_bit:
5807 for (unsigned i = 0; i < bm::set_block_size; ++i)
5808 count += word_bitcount(dst_block[i] & decoder_.get_32());
5809 break;
5811 {
5812 //count = 0;
5813 unsigned char run_type = decoder_.get_8();
5814 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
5815 {
5816 unsigned run_length = decoder_.get_16();
5817 if (run_type)
5818 {
5819 unsigned run_end = j + run_length;
5820 for (;j < run_end; ++j)
5821 {
5822 count += word_bitcount(dst_block[j] & decoder_.get_32());
5823 }
5824 }
5825 else
5826 {
5827 j += run_length;
5828 }
5829 } // for
5830 return count;
5831 }
5833 {
5834 unsigned head_idx = decoder_.get_16();
5835 unsigned tail_idx = decoder_.get_16();
5836 for (unsigned i = head_idx; i <= tail_idx; ++i)
5837 count += word_bitcount(dst_block[i] & decoder_.get_32());
5838 }
5839 break;
5840 case set_block_bit_1bit:
5841 // TODO: optimization
5842 case set_block_arrbit:
5843 get_arr_bit(tmp_block, true /*clear target*/);
5844 goto count_tmp;
5845 break;
5847 get_inv_arr(tmp_block);
5848 goto count_tmp;
5849 break;
5852 bm::bit_block_set(tmp_block, 0);
5853 this->read_bic_arr(decoder_, tmp_block, block_type_);
5854 goto count_tmp;
5856 this->read_bic_arr_inv(decoder_, tmp_block);
5857 goto count_tmp;
5859 bm::bit_block_set(tmp_block, 0);
5860 this->read_digest0_block(decoder_, tmp_block);
5861 goto count_tmp;
5863 bm::bit_block_set(tmp_block, 0);
5864 this->read_bic_gap(decoder_, tmp_block);
5865 count_tmp:
5866 count += bm::bit_operation_and_count(dst_block, tmp_block);
5867 break;
5868 default:
5869 BM_ASSERT(0);
5870 #ifndef BM_NO_STL
5871 throw std::logic_error(this->err_msg());
5872 #else
5873 BM_THROW(BM_ERR_SERIALFORMAT);
5874 #endif
5875
5876 } // switch
5877 return count;
5878}
5879
5880template<typename DEC,typename BLOCK_IDX>
5881unsigned
5883 bm::word_t* dst_block,
5884 bm::word_t* tmp_block)
5885{
5886 BM_ASSERT(this->state_ == e_bit_block);
5887 BM_ASSERT(dst_block);
5888
5889 bitblock_sum_adapter count_adapter;
5890 switch (block_type_)
5891 {
5892 case set_block_bit:
5893 {
5894 bitblock_get_adapter ga(dst_block);
5896
5897 bit_recomb(ga,
5898 decoder_,
5899 func,
5900 count_adapter
5901 );
5902 }
5903 break;
5904 case set_block_bit_0runs:
5905 {
5906 unsigned count = 0;
5907 unsigned char run_type = decoder_.get_8();
5908 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
5909 {
5910 unsigned run_length = decoder_.get_16();
5911 unsigned run_end = j + run_length;
5912 if (run_type)
5913 {
5914 for (;j < run_end; ++j)
5915 {
5917 count += word_bitcount(dst_block[j] | decoder_.get_32());
5918 }
5919 }
5920 else
5921 {
5922 for (;j < run_end; ++j)
5923 {
5925 count += word_bitcount(dst_block[j]);
5926 }
5927 }
5928 } // for
5929 return count;
5930 }
5932 {
5933 unsigned head_idx = decoder_.get_16();
5934 unsigned tail_idx = decoder_.get_16();
5935 unsigned count = 0;
5936 unsigned i;
5937 for (i = 0; i < head_idx; ++i)
5938 count += bm::word_bitcount(dst_block[i]);
5939 for (i = head_idx; i <= tail_idx; ++i)
5940 count += bm::word_bitcount(dst_block[i] | decoder_.get_32());
5941 for (i = tail_idx + 1; i < bm::set_block_size; ++i)
5942 count += bm::word_bitcount(dst_block[i]);
5943 return count;
5944 }
5945 case set_block_bit_1bit:
5946 // TODO: optimization
5947 case set_block_arrbit:
5948 get_arr_bit(tmp_block, true /* clear target*/);
5949 return bit_operation_or_count(dst_block, tmp_block);
5951 get_inv_arr(tmp_block);
5952 goto count_tmp;
5955 bm::bit_block_set(tmp_block, 0);
5956 this->read_bic_arr(decoder_, tmp_block, block_type_);
5957 goto count_tmp;
5959 this->read_bic_arr_inv(decoder_, tmp_block);
5960 goto count_tmp;
5962 bm::bit_block_set(tmp_block, 0);
5963 this->read_digest0_block(decoder_, tmp_block);
5964 goto count_tmp;
5966 bm::bit_block_set(tmp_block, 0);
5967 this->read_bic_gap(decoder_, tmp_block);
5968 count_tmp:
5969 return bm::bit_operation_or_count(dst_block, tmp_block);
5970 default:
5971 BM_ASSERT(0);
5972 #ifndef BM_NO_STL
5973 throw std::logic_error(this->err_msg());
5974 #else
5975 BM_THROW(BM_ERR_SERIALFORMAT);
5976 #endif
5977
5978 } // switch
5979 return count_adapter.sum();
5980}
5981
5982template<typename DEC, typename BLOCK_IDX>
5983unsigned
5985 bm::word_t* dst_block,
5986 bm::word_t* tmp_block)
5987{
5988 BM_ASSERT(this->state_ == e_bit_block);
5989 BM_ASSERT(dst_block);
5990
5991 bitblock_sum_adapter count_adapter;
5992 switch (block_type_)
5993 {
5994 case set_block_bit:
5995 {
5996 bitblock_get_adapter ga(dst_block);
5998
5999 bit_recomb(ga,
6000 decoder_,
6001 func,
6002 count_adapter
6003 );
6004 }
6005 break;
6006 case set_block_bit_0runs:
6007 {
6008 unsigned count = 0;
6009 unsigned char run_type = decoder_.get_8();
6010 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
6011 {
6012 unsigned run_length = decoder_.get_16();
6013 unsigned run_end = j + run_length;
6014 if (run_type)
6015 {
6016 for (;j < run_end; ++j)
6017 {
6019 count += bm::word_bitcount(dst_block[j] ^ decoder_.get_32());
6020 }
6021 }
6022 else
6023 {
6024 for (;j < run_end; ++j)
6025 {
6027 count += bm::word_bitcount(dst_block[j]);
6028 }
6029 }
6030 } // for
6031 return count;
6032 }
6034 {
6035 unsigned head_idx = decoder_.get_16();
6036 unsigned tail_idx = decoder_.get_16();
6037 unsigned count = 0;
6038 unsigned i;
6039 for (i = 0; i < head_idx; ++i)
6040 count += bm::word_bitcount(dst_block[i]);
6041 for (i = head_idx; i <= tail_idx; ++i)
6042 count += bm::word_bitcount(dst_block[i] ^ decoder_.get_32());
6043 for (i = tail_idx + 1; i < bm::set_block_size; ++i)
6044 count += bm::word_bitcount(dst_block[i]);
6045 return count;
6046 }
6047 case set_block_bit_1bit:
6048 // TODO: optimization
6049 case set_block_arrbit:
6050 get_arr_bit(tmp_block, true /* clear target*/);
6051 goto count_tmp;
6053 get_inv_arr(tmp_block);
6054 goto count_tmp;
6057 bm::bit_block_set(tmp_block, 0);
6058 this->read_bic_arr(decoder_, tmp_block, block_type_);
6059 goto count_tmp;
6061 this->read_bic_arr_inv(decoder_, tmp_block);
6062 goto count_tmp;
6063 break;
6065 bm::bit_block_set(tmp_block, 0);
6066 this->read_digest0_block(decoder_, tmp_block);
6067 goto count_tmp;
6069 bm::bit_block_set(tmp_block, 0);
6070 this->read_bic_gap(decoder_, tmp_block);
6071 count_tmp:
6072 return bm::bit_operation_xor_count(dst_block, tmp_block);
6073 default:
6074 BM_ASSERT(0);
6075 #ifndef BM_NO_STL
6076 throw std::logic_error(this->err_msg());
6077 #else
6078 BM_THROW(BM_ERR_SERIALFORMAT);
6079 #endif
6080
6081 } // switch
6082 return count_adapter.sum();
6083}
6084
6085template<typename DEC, typename BLOCK_IDX>
6086unsigned
6088 bm::word_t* dst_block,
6089 bm::word_t* tmp_block)
6090{
6091 BM_ASSERT(this->state_ == e_bit_block);
6092 BM_ASSERT(dst_block);
6093
6094 bitblock_sum_adapter count_adapter;
6095 switch (block_type_)
6096 {
6097 case set_block_bit:
6098 {
6099 bitblock_get_adapter ga(dst_block);
6101
6102 bit_recomb(ga,
6103 decoder_,
6104 func,
6105 count_adapter
6106 );
6107 }
6108 break;
6109 case set_block_bit_0runs:
6110 {
6111 unsigned count = 0;
6112 unsigned char run_type = decoder_.get_8();
6113 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
6114 {
6115 unsigned run_length = decoder_.get_16();
6116 unsigned run_end = j + run_length;
6117 if (run_type)
6118 {
6119 for (;j < run_end; ++j)
6120 {
6122 count += word_bitcount(dst_block[j] & ~decoder_.get_32());
6123 }
6124 }
6125 else
6126 {
6127 for (;j < run_end; ++j)
6128 {
6130 count += bm::word_bitcount(dst_block[j]);
6131 }
6132 }
6133 } // for
6134 return count;
6135 }
6137 {
6138 unsigned head_idx = decoder_.get_16();
6139 unsigned tail_idx = decoder_.get_16();
6140 unsigned count = 0;
6141 unsigned i;
6142 for (i = 0; i < head_idx; ++i)
6143 count += bm::word_bitcount(dst_block[i]);
6144 for (i = head_idx; i <= tail_idx; ++i)
6145 count += bm::word_bitcount(dst_block[i] & (~decoder_.get_32()));
6146 for (i = tail_idx + 1; i < bm::set_block_size; ++i)
6147 count += bm::word_bitcount(dst_block[i]);
6148 return count;
6149 }
6150 break;
6151 case set_block_bit_1bit:
6152 //TODO: optimization
6153 case set_block_arrbit:
6154 get_arr_bit(tmp_block, true /* clear target*/);
6155 goto count_tmp;
6157 get_inv_arr(tmp_block);
6158 goto count_tmp;
6161 bm::bit_block_set(tmp_block, 0);
6162 this->read_bic_arr(decoder_, tmp_block, block_type_);
6163 goto count_tmp;
6165 this->read_bic_arr_inv(decoder_, tmp_block);
6166 goto count_tmp;
6168 bm::bit_block_set(tmp_block, 0);
6169 this->read_digest0_block(decoder_, tmp_block);
6170 goto count_tmp;
6172 bm::bit_block_set(tmp_block, 0);
6173 this->read_bic_gap(decoder_, tmp_block);
6174 count_tmp:
6175 return bm::bit_operation_sub_count(dst_block, tmp_block);
6176 default:
6177 BM_ASSERT(0);
6178 #ifndef BM_NO_STL
6179 throw std::logic_error(this->err_msg());
6180 #else
6181 BM_THROW(BM_ERR_SERIALFORMAT);
6182 #endif
6183
6184 } // switch
6185 return count_adapter.sum();
6186}
6187
6188template<typename DEC, typename BLOCK_IDX>
6189unsigned
6191 bm::word_t* dst_block,
6192 bm::word_t* tmp_block)
6193{
6194 BM_ASSERT(this->state_ == e_bit_block);
6195 BM_ASSERT(dst_block);
6196
6197 bitblock_sum_adapter count_adapter;
6198 switch (block_type_)
6199 {
6200 case set_block_bit:
6201 {
6202 bitblock_get_adapter ga(dst_block);
6204
6205 bit_recomb(ga,
6206 decoder_,
6207 func,
6208 count_adapter
6209 );
6210 }
6211 break;
6212 case set_block_bit_0runs:
6213 {
6214 unsigned count = 0;
6215 unsigned char run_type = decoder_.get_8();
6216 for (unsigned j = 0; j < bm::set_block_size;run_type = !run_type)
6217 {
6218 unsigned run_length = decoder_.get_16();
6219 unsigned run_end = j + run_length;
6220 if (run_type)
6221 {
6222 for (;j < run_end; ++j)
6223 {
6225 count += word_bitcount(decoder_.get_32() & (~dst_block[j]));
6226 }
6227 }
6228 else
6229 {
6230 j += run_length;
6231 }
6232 } // for
6233 return count;
6234 }
6236 {
6237 unsigned head_idx = decoder_.get_16();
6238 unsigned tail_idx = decoder_.get_16();
6239 unsigned count = 0;
6240 unsigned i;
6241 for (i = head_idx; i <= tail_idx; ++i)
6242 count += bm::word_bitcount(decoder_.get_32() & (~dst_block[i]));
6243 return count;
6244 }
6245 break;
6246 case set_block_bit_1bit:
6247 // TODO: optimization
6248 case set_block_arrbit:
6249 get_arr_bit(tmp_block, true /* clear target*/);
6250 goto count_tmp;
6252 get_inv_arr(tmp_block);
6253 goto count_tmp;
6256 bm::bit_block_set(tmp_block, 0);
6257 this->read_bic_arr(decoder_, tmp_block, block_type_);
6258 goto count_tmp;
6260 this->read_bic_arr_inv(decoder_, tmp_block);
6261 goto count_tmp;
6263 bm::bit_block_set(tmp_block, 0);
6264 this->read_digest0_block(decoder_, tmp_block);
6265 goto count_tmp;
6267 bm::bit_block_set(tmp_block, 0);
6268 this->read_bic_gap(decoder_, tmp_block);
6269 count_tmp:
6270 return bm::bit_operation_sub_count(tmp_block, dst_block);
6271 default:
6272 BM_ASSERT(0);
6273 #ifndef BM_NO_STL
6274 throw std::logic_error(this->err_msg());
6275 #else
6276 BM_THROW(BM_ERR_SERIALFORMAT);
6277 #endif
6278 } // switch
6279 return count_adapter.sum();
6280}
6281
6282
6283
6284template<typename DEC, typename BLOCK_IDX>
6286 bm::word_t* dst_block,
6287 bool clear_target) BMNOEXCEPT
6288{
6289 BM_ASSERT(this->block_type_ == set_block_arrbit ||
6290 this->block_type_ == set_block_bit_1bit);
6291
6292 gap_word_t len = decoder_.get_16(); // array length / 1bit_idx
6293 if (dst_block)
6294 {
6295 if (clear_target)
6296 bm::bit_block_set(dst_block, 0);
6297
6298 if (this->block_type_ == set_block_bit_1bit)
6299 {
6300 // len contains idx of 1 bit set
6301 set_bit(dst_block, len);
6302 return 1;
6303 }
6304
6305 for (unsigned k = 0; k < len; ++k)
6306 {
6307 gap_word_t bit_idx = decoder_.get_16();
6308 bm::set_bit(dst_block, bit_idx);
6309 }
6310 }
6311 else
6312 {
6313 if (this->block_type_ == set_block_bit_1bit)
6314 return 1; // nothing to do: len var already consumed 16 bits
6315
6316 // fwd the decode stream
6317 decoder_.seek(len * 2);
6318 }
6319 return len;
6320}
6321
6322template<typename DEC, typename BLOCK_IDX>
6324{
6325 BM_ASSERT(this->block_type_ == set_block_bit_1bit);
6326 ++(this->block_idx_);
6327 this->state_ = e_blocks;
6328
6329 return decoder_.get_16(); // 1bit_idx
6330}
6331
6332template<typename DEC, typename BLOCK_IDX>
6333void
6335{
6336 BM_ASSERT(this->state_ == e_gap_block ||
6337 this->block_type_ == set_block_bit_1bit);
6338 BM_ASSERT(dst_block);
6339
6340 this->read_gap_block(this->decoder_,
6341 this->block_type_,
6342 dst_block,
6343 this->gap_head_);
6344
6345 ++(this->block_idx_);
6346 this->state_ = e_blocks;
6347}
6348
6349
6350template<typename DEC, typename BLOCK_IDX>
6351unsigned
6353 bm::word_t* dst_block,
6354 bm::word_t* tmp_block,
6355 set_operation op)
6356{
6357 BM_ASSERT(this->state_ == e_bit_block);
6358
6359 get_bit_func_type bit_func = bit_func_table_[op];
6360 BM_ASSERT(bit_func);
6361 unsigned cnt = ((*this).*(bit_func))(dst_block, tmp_block);
6362 this->state_ = e_blocks;
6363 ++(this->block_idx_);
6364 return cnt;
6365}
6366
6367// ----------------------------------------------------------------------
6368//
6369// ----------------------------------------------------------------------
6370
6371
6372template<class BV>
6374: temp_block_(0), ref_vect_(0)
6375{
6376 temp_block_ = alloc_.alloc_bit_block();
6377}
6378
6379
6380template<class BV>
6382{
6383 if (temp_block_)
6384 alloc_.free_bit_block(temp_block_);
6385}
6386
6387/**
6388 Utility function to process operation using temp vector
6389 @internal
6390 */
6391template<class BV>
6392typename BV::size_type process_operation(BV& bv,
6393 BV& bv_tmp,
6395{
6396 typename BV::size_type count = 0;
6397 switch (op)
6398 {
6399 case set_AND:
6400 bv.bit_and(bv_tmp, BV::opt_compress);
6401 break;
6402 case set_ASSIGN:
6403 bv.swap(bv_tmp);
6404 break;
6405 case set_OR:
6406 bv.bit_or(bv_tmp);
6407 break;
6408 case set_SUB:
6409 bv.bit_sub(bv_tmp);
6410 break;
6411 case set_XOR:
6412 bv.bit_xor(bv_tmp);
6413 break;
6414 case set_COUNT: case set_COUNT_B:
6415 count = bv_tmp.count();
6416 break;
6417 case set_COUNT_A:
6418 count = bv.count();
6419 break;
6420 case set_COUNT_AND:
6421 count = bm::count_and(bv, bv_tmp);
6422 break;
6423 case set_COUNT_XOR:
6424 count = bm::count_xor(bv, bv_tmp);
6425 break;
6426 case set_COUNT_OR:
6427 count = bm::count_or(bv, bv_tmp);
6428 break;
6429 case set_COUNT_SUB_AB:
6430 count = bm::count_sub(bv, bv_tmp);
6431 break;
6432 case set_COUNT_SUB_BA:
6433 count = bm::count_sub(bv_tmp, bv);
6434 break;
6435 case set_END:
6436 default:
6437 BM_ASSERT(0);
6438 #ifndef BM_NO_STL
6439 //throw std::logic_error(err_msg());
6440 #else
6441 BM_THROW(BM_ERR_SERIALFORMAT);
6442 #endif
6443 } // switch
6444
6445 return count;
6446}
6447
6448
6449template<class BV>
6451operation_deserializer<BV>::deserialize_xor(bvector_type& bv,
6452 const unsigned char* buf,
6453 set_operation op,
6454 bool /*exit_on_one*/)
6455{
6456 if (op == set_OR)
6457 {
6458 bm::deserialize(bv, buf, temp_block_, ref_vect_);
6459 return 0;
6460 }
6461 bvector_type bv_tmp;
6462 bm::deserialize(bv_tmp, buf, temp_block_, ref_vect_);
6463 return deserialize_xor(bv, bv_tmp, op);
6464}
6465
6466template<class BV>
6467void operation_deserializer<BV>::deserialize_xor_range(
6468 bvector_type& bv,
6469 const unsigned char* buf,
6470 size_type idx_from,
6471 size_type idx_to)
6472{
6473 bv.clear();
6474
6475 ByteOrder bo_current = globals<true>::byte_order();
6476
6477 bm::decoder dec(buf);
6478 unsigned char header_flag = dec.get_8();
6479 ByteOrder bo = bo_current;
6480 if (!(header_flag & BM_HM_NO_BO))
6481 {
6482 bo = (bm::ByteOrder) dec.get_8();
6483 }
6484
6485 if (bo_current == bo)
6486 {
6487 de_.set_ref_vectors(ref_vect_);
6488 de_.set_range(idx_from, idx_to);
6489 de_.deserialize(bv, buf);
6490 de_.reset();
6491 }
6492 else
6493 {
6494 switch (bo_current)
6495 {
6496 case BigEndian:
6497 {
6498 deserializer<BV, bm::decoder_big_endian> deserial;
6499 deserial.set_ref_vectors(ref_vect_);
6500 deserial.set_range(idx_from, idx_to);
6501 deserial.deserialize(bv, buf);
6502 }
6503 break;
6504 case LittleEndian:
6505 {
6506 deserializer<BV, bm::decoder_little_endian> deserial;
6507 deserial.set_ref_vectors(ref_vect_);
6508 deserial.set_range(idx_from, idx_to);
6509 deserial.deserialize(bv, buf);
6510 }
6511 break;
6512 default:
6513 BM_ASSERT(0);
6514 };
6515 }
6516 bv.keep_range_no_check(idx_from, idx_to);
6517}
6518
6519
6520
6521template<class BV>
6523operation_deserializer<BV>::deserialize_xor(bvector_type& bv,
6524 bvector_type& bv_tmp,
6525 set_operation op)
6526{
6527 size_type count = 0;
6528 switch (op)
6529 {
6530 case set_AND:
6532 break;
6533 case set_ASSIGN:
6534 bv.swap(bv_tmp);
6535 break;
6536 case set_SUB:
6537 bv -= bv_tmp;
6538 break;
6539 case set_XOR:
6540 bv ^= bv_tmp;
6541 break;
6542 case set_COUNT: case set_COUNT_B:
6543 count = bv_tmp.count();
6544 break;
6545 case set_COUNT_A:
6546 return bv.count();
6547 case set_COUNT_AND:
6548 count += bm::count_and(bv, bv_tmp);
6549 break;
6550 case set_COUNT_XOR:
6551 count += bm::count_xor(bv, bv_tmp);
6552 break;
6553 case set_COUNT_OR:
6554 count += bm::count_or(bv, bv_tmp);
6555 break;
6556 case set_COUNT_SUB_AB:
6557 count += bm::count_sub(bv, bv_tmp);
6558 break;
6559 case set_COUNT_SUB_BA:
6560 count += bm::count_sub(bv_tmp, bv);
6561 break;
6562 case set_END:
6563 default:
6564 BM_ASSERT(0);
6565 #ifndef BM_NO_STL
6566 throw std::logic_error("BM: serialization error");
6567 #else
6568 BM_THROW(BM_ERR_SERIALFORMAT);
6569 #endif
6570 } // switch
6571
6572 return count;
6573}
6574
6575
6576
6577template<class BV>
6580 const unsigned char* buf,
6581 set_operation op,
6582 bool exit_on_one)
6583{
6584 ByteOrder bo_current = globals<true>::byte_order();
6585 bm::decoder dec(buf);
6586 unsigned char header_flag = dec.get_8();
6587
6588 if (header_flag & BM_HM_HXOR) // XOR compression
6589 {
6590 BM_ASSERT(ref_vect_); // reference vector must be set
6591 return deserialize_xor(bv, buf, op, exit_on_one);
6592 }
6593
6594 ByteOrder bo = bo_current;
6595 if (!(header_flag & BM_HM_NO_BO))
6596 bo = (bm::ByteOrder) dec.get_8();
6597
6598 if (op == bm::set_ASSIGN)
6599 {
6600 bv.clear(true);
6601 op = bm::set_OR;
6602 }
6603
6604 if (header_flag & BM_HM_SPARSE)
6605 {
6606 size_type count = 0;
6607 if (bo_current == bo)
6608 {
6609 if (op == bm::set_OR)
6610 {
6611 de_.reset();
6612 de_.deserialize(bv, buf);
6613 }
6614 else
6615 {
6616 bv_tmp_.clear(true);
6617 bv_tmp_.set_new_blocks_strat(bm::BM_GAP);
6618 de_.reset();
6619 de_.deserialize(bv_tmp_, buf);
6620 count = bm::process_operation(bv, bv_tmp_, op);
6621 }
6622 }
6623 else
6624 {
6625 // TODO: implement byte-order aware sparse deserialization
6626 BM_ASSERT(0);
6627 }
6628 return count;
6629 }
6630
6631 if (bo_current == bo)
6632 {
6633 serial_stream_current ss(buf);
6634 return it_d_.deserialize(bv, ss, temp_block_, op, exit_on_one);
6635 }
6636 switch (bo_current)
6637 {
6638 case BigEndian:
6639 {
6640 serial_stream_be ss(buf);
6641 return it_d_be_.deserialize(bv, ss, temp_block_, op, exit_on_one);
6642 }
6643 case LittleEndian:
6644 {
6645 serial_stream_le ss(buf);
6646 return it_d_le_.deserialize(bv, ss, temp_block_, op, exit_on_one);
6647 }
6648 default:
6649 BM_ASSERT(0);
6650 #ifndef BM_NO_STL
6651 throw std::logic_error("BM::platform error: unknown endianness");
6652 #else
6653 BM_THROW(BM_ERR_SERIALFORMAT);
6654 #endif
6655 };
6656}
6657
6658template<class BV>
6660 bvector_type& bv,
6661 const unsigned char* buf,
6662 size_type idx_from,
6663 size_type idx_to)
6664{
6665 ByteOrder bo_current = globals<true>::byte_order();
6666 bm::decoder dec(buf);
6667 unsigned char header_flag = dec.get_8();
6668 ByteOrder bo = bo_current;
6669 if (!(header_flag & BM_HM_NO_BO))
6670 bo = (bm::ByteOrder) dec.get_8();
6671
6672 // check if it is empty fresh vector, set the range then
6673 //
6674 blocks_manager_type& bman = bv.get_blocks_manager();
6675 if (!bman.is_init())
6676 bv.set_range(idx_from, idx_to);
6677 else
6678 {} // assume that the target range set outside the call
6679
6680
6681 if (header_flag & BM_HM_HXOR) // XOR compression
6682 {
6683 BM_ASSERT(ref_vect_);
6684 bvector_type bv_tmp;
6685 deserialize_xor_range(bv_tmp, buf, idx_from, idx_to);
6686 if (bv.any())
6687 {
6688 bv.bit_and(bv_tmp, bvector_type::opt_compress);
6689 }
6690 else
6691 {
6692 bv.swap(bv_tmp);
6693 }
6694 return;
6695 }
6696
6697 if (header_flag & BM_HM_SPARSE)
6698 {
6699 if (bo_current == bo)
6700 {
6701 bv_tmp_.clear(true);
6702 bv_tmp_.set_new_blocks_strat(bm::BM_GAP);
6703 de_.reset();
6704 de_.set_range(idx_from, idx_to);
6705 de_.deserialize(bv_tmp_, buf);
6706 de_.reset();
6707
6708 if (bv.any())
6709 {
6710 bv.bit_and(bv_tmp_, bvector_type::opt_compress);
6711 }
6712 else
6713 {
6714 bv.swap(bv_tmp_);
6715 }
6716
6717 }
6718 else
6719 {
6720 // TODO: implement byte-order aware sparse deserialization
6721 BM_ASSERT(0);
6722 }
6723 return;
6724 }
6725
6726 const bm::set_operation op = bm::set_AND;
6727
6728 if (bo_current == bo)
6729 {
6730 serial_stream_current ss(buf);
6731 it_d_.set_range(idx_from, idx_to);
6732 it_d_.deserialize(bv, ss, temp_block_, op, false);
6733 it_d_.unset_range();
6734 return;
6735 }
6736 switch (bo_current)
6737 {
6738 case BigEndian:
6739 {
6740 serial_stream_be ss(buf);
6741 it_d_be_.set_range(idx_from, idx_to);
6742 it_d_be_.deserialize(bv, ss, temp_block_, op, false);
6743 it_d_be_.unset_range();
6744 return;
6745 }
6746 case LittleEndian:
6747 {
6748 serial_stream_le ss(buf);
6749 it_d_le_.set_range(idx_from, idx_to);
6750 it_d_le_.deserialize(bv, ss, temp_block_, op, false);
6751 it_d_le_.unset_range();
6752 return;
6753 }
6754 default:
6755 BM_ASSERT(0);
6756 #ifndef BM_NO_STL
6757 throw std::logic_error("BM::platform error: unknown endianness");
6758 #else
6759 BM_THROW(BM_ERR_SERIALFORMAT);
6760 #endif
6761 };
6762 return;
6763}
6764
6765
6766
6767// ------------------------------------------------------------------
6768
6769template<class BV, class SerialIterator>
6771 size_type from, size_type to)
6772{
6773 is_range_set_ = true;
6774 nb_range_from_ = (from >> bm::set_block_shift);
6775 nb_range_to_ = (to >> bm::set_block_shift);
6776}
6777
6778
6779template<class BV, class SerialIterator>
6781 bvector_type& bv,
6782 serial_iterator_type& sit,
6783 unsigned id_count,
6784 bool set_clear)
6785{
6786 const unsigned win_size = 64;
6787 bm::id_t id_buffer[win_size+1];
6788
6789 if (set_clear) // set bits
6790 {
6791 for (unsigned i = 0; i <= id_count;)
6792 {
6793 unsigned j;
6794 for (j = 0; j < win_size && i <= id_count; ++j, ++i)
6795 {
6796 id_buffer[j] = sit.get_id();
6797 sit.next();
6798 } // for j
6799 bm::combine_or(bv, id_buffer, id_buffer + j);
6800 } // for i
6801 }
6802 else // clear bits
6803 {
6804 for (unsigned i = 0; i <= id_count;)
6805 {
6806 unsigned j;
6807 for (j = 0; j < win_size && i <= id_count; ++j, ++i)
6808 {
6809 id_buffer[j] = sit.get_id();
6810 sit.next();
6811 } // for j
6812 bm::combine_sub(bv, id_buffer, id_buffer + j);
6813 } // for i
6814 }
6815}
6816
6817template<class BV, class SerialIterator>
6819iterator_deserializer<BV, SerialIterator>::finalize_target_vector(
6820 blocks_manager_type& bman,
6821 set_operation op,
6822 size_type bv_block_idx)
6823{
6824 size_type count = 0;
6825 switch (op)
6826 {
6827 case set_OR: case set_SUB: case set_XOR:
6828 case set_COUNT: case set_COUNT_B: case set_COUNT_AND:
6829 case set_COUNT_SUB_BA:
6830 // nothing to do
6831 break;
6832 case set_ASSIGN: case set_AND:
6833 {
6834 block_idx_type nblock_last = (bm::id_max >> bm::set_block_shift);
6835 if (bv_block_idx <= nblock_last)
6836 bman.set_all_zero(bv_block_idx, nblock_last); // clear the tail
6837 }
6838 break;
6839 case set_COUNT_A: case set_COUNT_OR: case set_COUNT_XOR:
6840 case set_COUNT_SUB_AB:
6841 // count bits in the target vector
6842 {
6843 unsigned i, j;
6844 bm::get_block_coord(bv_block_idx, i, j);
6845 bm::word_t*** blk_root = bman.top_blocks_root();
6846 unsigned top_size = bman.top_block_size();
6847 for (;i < top_size; ++i)
6848 {
6849 bm::word_t** blk_blk = blk_root[i];
6850 if (blk_blk == 0)
6851 {
6852 bv_block_idx+=bm::set_sub_array_size-j;
6853 j = 0;
6854 continue;
6855 }
6856 // TODO: optimize for FULL top level
6857 for (; j < bm::set_sub_array_size; ++j, ++bv_block_idx)
6858 {
6859 if (blk_blk[j])
6860 count += bman.block_bitcount(blk_blk[j]);
6861 } // for j
6862 j = 0;
6863 } // for i
6864 }
6865 break;
6866 case set_END:
6867 default:
6868 BM_ASSERT(0);
6869 #ifndef BM_NO_STL
6870 throw std::logic_error(err_msg());
6871 #else
6872 BM_THROW(BM_ERR_SERIALFORMAT);
6873 #endif
6874 }
6875 return count;
6876}
6877
6878template<class BV, class SerialIterator>
6880iterator_deserializer<BV, SerialIterator>::process_id_list(
6881 bvector_type& bv,
6882 serial_iterator_type& sit,
6883 set_operation op)
6884{
6885 size_type count = 0;
6886 unsigned id_count = sit.get_id_count();
6887 bool set_clear = true;
6888 switch (op)
6889 {
6890 case set_AND:
6891 {
6892 // TODO: use some more optimal algorithm without temp vector
6893 BV bv_tmp(BM_GAP);
6894 load_id_list(bv_tmp, sit, id_count, true);
6895 bv &= bv_tmp;
6896 }
6897 break;
6898 case set_ASSIGN:
6899 BM_ASSERT(0);
6901 // fall through
6902 case set_OR:
6903 set_clear = true;
6905 // fall through
6906 case set_SUB:
6907 load_id_list(bv, sit, id_count, set_clear);
6908 break;
6909 case set_XOR:
6910 for (unsigned i = 0; i < id_count; ++i)
6911 {
6912 bm::id_t id = sit.get_id();
6913 bv[id] ^= true;
6914 sit.next();
6915 } // for
6916 break;
6917 case set_COUNT: case set_COUNT_B:
6918 for (unsigned i = 0; i < id_count; ++i)
6919 {
6920 /* bm::id_t id = */ sit.get_id();
6921 ++count;
6922 sit.next();
6923 } // for
6924 break;
6925 case set_COUNT_A:
6926 return bv.count();
6927 case set_COUNT_AND:
6928 for (size_type i = 0; i < id_count; ++i)
6929 {
6930 bm::id_t id = sit.get_id();
6931 count += bv.get_bit(id);
6932 sit.next();
6933 } // for
6934 break;
6935 case set_COUNT_XOR:
6936 {
6937 // TODO: get rid of the temp vector
6938 BV bv_tmp(BM_GAP);
6939 load_id_list(bv_tmp, sit, id_count, true);
6940 count += bm::count_xor(bv, bv_tmp);
6941 }
6942 break;
6943 case set_COUNT_OR:
6944 {
6945 // TODO: get rid of the temp. vector
6946 BV bv_tmp(BM_GAP);
6947 load_id_list(bv_tmp, sit, id_count, true);
6948 count += bm::count_or(bv, bv_tmp);
6949 }
6950 break;
6951 case set_COUNT_SUB_AB:
6952 {
6953 // TODO: get rid of the temp. vector
6954 BV bv_tmp(bv);
6955 load_id_list(bv_tmp, sit, id_count, false);
6956 count += bv_tmp.count();
6957 }
6958 break;
6959 case set_COUNT_SUB_BA:
6960 {
6961 BV bv_tmp(BM_GAP);
6962 load_id_list(bv_tmp, sit, id_count, true);
6963 count += bm::count_sub(bv_tmp, bv);
6964 }
6965 break;
6966 case set_END:
6967 default:
6968 BM_ASSERT(0);
6969 #ifndef BM_NO_STL
6970 throw std::logic_error(err_msg());
6971 #else
6972 BM_THROW(BM_ERR_SERIALFORMAT);
6973 #endif
6974 } // switch
6975
6976 return count;
6977}
6978
6979
6980template<class BV, class SerialIterator>
6983 bvector_type& bv,
6985 bm::word_t* temp_block,
6986 set_operation op,
6987 bool exit_on_one)
6988{
6989 BM_ASSERT(temp_block);
6990
6991 size_type count = 0;
6992 gap_word_t gap_temp_block[bm::gap_equiv_len * 4];
6993 gap_temp_block[0] = 0;
6994
6995 blocks_manager_type& bman = bv.get_blocks_manager();
6996 if (!bman.is_init())
6997 bman.init_tree();
6998
6999 if (sit.bv_size() && (sit.bv_size() > bv.size()))
7000 bv.resize(sit.bv_size());
7001
7002 typename serial_iterator_type::iterator_state state;
7003 state = sit.get_state();
7004 if (state == serial_iterator_type::e_list_ids)
7005 {
7006 count = process_id_list(bv, sit, op);
7007 return count;
7008 }
7009
7010 size_type bv_block_idx = 0;
7011 size_type empty_op_cnt = 0; // counter for empty target operations
7012
7013 for (;1;)
7014 {
7015 bm::set_operation sop = op;
7016 if (sit.is_eof()) // argument stream ended
7017 {
7018 count += finalize_target_vector(bman, op, bv_block_idx);
7019 return count;
7020 }
7021
7022 state = sit.state();
7023 switch (state)
7024 {
7025 case serial_iterator_type::e_blocks:
7026 sit.next();
7027
7028 // try to skip forward
7029 //
7030 if (is_range_set_ && (bv_block_idx < nb_range_from_))
7031 {
7032 // TODO: use saved bookmark block-idx
7033 bool skip_flag = sit.try_skip(bv_block_idx, nb_range_from_);
7034 if (skip_flag)
7035 {
7036 bv_block_idx = sit.block_idx();
7037 BM_ASSERT(bv_block_idx <= nb_range_from_);
7038 BM_ASSERT(sit.state() == serial_iterator_type::e_blocks);
7039 }
7040 }
7041 continue;
7042 case serial_iterator_type::e_bit_block:
7043 {
7044 BM_ASSERT(sit.block_idx() == bv_block_idx);
7045 unsigned i0, j0;
7046 bm::get_block_coord(bv_block_idx, i0, j0);
7047 bm::word_t* blk = bman.get_block_ptr(i0, j0);
7048 if (!blk)
7049 {
7050 switch (op)
7051 {
7052 case set_AND: case set_SUB: case set_COUNT_AND:
7053 case set_COUNT_SUB_AB: case set_COUNT_A:
7054 // one arg is 0, so the operation gives us zero
7055 // all we need to do is to seek the input stream
7056 sop = set_ASSIGN;
7057 break;
7058
7059 case set_OR: case set_XOR: case set_ASSIGN:
7060 blk = bman.make_bit_block(bv_block_idx);
7061 break;
7062
7063 case set_COUNT: case set_COUNT_XOR: case set_COUNT_OR:
7064 case set_COUNT_SUB_BA: case set_COUNT_B:
7065 // first arg is not required (should work as is)
7066 sop = set_COUNT;
7067 break;
7068
7069 case set_END:
7070 default:
7071 BM_ASSERT(0);
7072 #ifndef BM_NO_STL
7073 throw std::logic_error(err_msg());
7074 #else
7075 BM_THROW(BM_ERR_SERIALFORMAT);
7076 #endif
7077 }
7078 }
7079 else // block exists
7080 {
7081 int is_gap = BM_IS_GAP(blk);
7082 if (is_gap || IS_FULL_BLOCK(blk))
7083 {
7084 if (IS_FULL_BLOCK(blk) && is_const_set_operation(op))
7085 {
7087 }
7088 else
7089 {
7090 // TODO: make sure const operations do not
7091 // deoptimize GAP blocks
7092 blk = bman.deoptimize_block(bv_block_idx);
7093 }
7094 }
7095 }
7096
7097 // 2 bit-blocks recombination
7098 unsigned c = sit.get_bit_block(blk, temp_block, sop);
7099 count += c;
7100 if (exit_on_one && count) // early exit
7101 return count;
7102 switch (op) // target block optimization for non-const operations
7103 {
7104 case set_AND: case set_SUB: case set_XOR: case set_OR:
7105 bman.optimize_bit_block(i0, j0, bvector_type::opt_compress);
7106 break;
7107 default: break;
7108 } // switch
7109
7110 }
7111 break;
7112
7113 case serial_iterator_type::e_zero_blocks:
7114 {
7115 BM_ASSERT(bv_block_idx == sit.block_idx());
7116
7117 switch (op)
7118 {
7119 case set_ASSIGN: // nothing to do to rewind fwd
7120 case set_SUB: case set_COUNT_AND: case set_OR:
7121 case set_XOR: case set_COUNT_SUB_BA: case set_COUNT_B:
7122 bv_block_idx = sit.skip_mono_blocks();
7123 continue;
7124
7125 case set_AND: // clear the range
7126 {
7127 size_type nb_start = bv_block_idx;
7128 bv_block_idx = sit.skip_mono_blocks();
7129 bman.set_all_zero(nb_start, bv_block_idx-1);
7130 }
7131 continue;
7132 case set_END:
7133 default:
7134 break;
7135 } // switch op
7136
7137
7138 unsigned i0, j0;
7139 bm::get_block_coord(bv_block_idx, i0, j0);
7140 bm::word_t* blk = bman.get_block_ptr(i0, j0);
7141
7142 sit.next();
7143
7144 if (blk)
7145 {
7146 switch (op)
7147 {
7148 case set_AND: case set_ASSIGN:
7149 // the result is 0
7150 //blk =
7151 bman.zero_block(bv_block_idx);
7152 break;
7153
7154 case set_SUB: case set_COUNT_AND: case set_OR:
7155 case set_XOR: case set_COUNT_SUB_BA: case set_COUNT_B:
7156 // nothing to do
7157 break;
7158
7159 case set_COUNT_SUB_AB: case set_COUNT_A: case set_COUNT_OR:
7160 case set_COUNT: case set_COUNT_XOR:
7161 // valid bit block recombined with 0 block
7162 // results with same block data
7163 // all we need is to bitcount bv block
7164 {
7165 count += blk ? bman.block_bitcount(blk) : 0;
7166 if (exit_on_one && count) // early exit
7167 return count;
7168 }
7169 break;
7170 case set_END:
7171 default:
7172 BM_ASSERT(0);
7173 } // switch op
7174 } // if blk
7175 }
7176 break;
7177
7178 case serial_iterator_type::e_one_blocks:
7179 {
7180 BM_ASSERT(bv_block_idx == sit.block_idx());
7181 unsigned i0, j0;
7182 bm::get_block_coord(bv_block_idx, i0, j0);
7183 bm::word_t* blk = bman.get_block_ptr(i0, j0);
7184
7185 sit.next();
7186
7187 switch (op)
7188 {
7189 case set_OR: case set_ASSIGN:
7190 bman.set_block_all_set(bv_block_idx);
7191 break;
7192 case set_COUNT_OR: case set_COUNT_B: case set_COUNT:
7193 // block becomes all set
7194 count += bm::bits_in_block;
7195 break;
7196 case set_SUB:
7197 //blk =
7198 bman.zero_block(bv_block_idx);
7199 break;
7200 case set_COUNT_SUB_AB: case set_AND:
7201 // nothing to do, check maybe nothing else to do at all
7202 if (++empty_op_cnt > 64)
7203 {
7204 size_type last_id;
7205 bool b = bv.find_reverse(last_id);
7206 if (!b)
7207 return count;
7208 size_type last_nb = (last_id >> bm::set_block_shift);
7209 if (last_nb < bv_block_idx)
7210 return count; // early exit, nothing to do here
7211 empty_op_cnt = 0;
7212 }
7213 break;
7214 case set_COUNT_AND: case set_COUNT_A:
7215 count += blk ? bman.block_bitcount(blk) : 0;
7216 break;
7217 default:
7218 if (blk)
7219 {
7220 switch (op)
7221 {
7222 case set_XOR:
7223 blk = bman.deoptimize_block(bv_block_idx);
7225 break;
7226 case set_COUNT_XOR:
7227 {
7228 count +=
7230 blk,
7233 }
7234 break;
7235 case set_COUNT_SUB_BA:
7236 {
7237 count +=
7239 blk,
7242 }
7243 break;
7244 default:
7245 BM_ASSERT(0);
7246 } // switch
7247 }
7248 else // blk == 0
7249 {
7250 switch (op)
7251 {
7252 case set_XOR:
7253 // 0 XOR 1 = 1
7254 bman.set_block_all_set(bv_block_idx);
7255 break;
7256 case set_COUNT_XOR:
7257 count += bm::bits_in_block;
7258 break;
7259 case set_COUNT_SUB_BA:
7260 // 1 - 0 = 1
7261 count += bm::bits_in_block;
7262 break;
7263 default:
7264 break;
7265 } // switch
7266 } // else
7267 } // switch
7268 if (exit_on_one && count) // early exit
7269 return count;
7270 }
7271 break;
7272
7273 case serial_iterator_type::e_gap_block:
7274 {
7275 BM_ASSERT(bv_block_idx == sit.block_idx());
7276
7277 unsigned i0, j0;
7278 bm::get_block_coord(bv_block_idx, i0, j0);
7279 const bm::word_t* blk = bman.get_block(i0, j0);
7280
7281 sit.get_gap_block(gap_temp_block);
7282
7283 unsigned len = gap_length(gap_temp_block);
7284 int level = gap_calc_level(len, bman.glen());
7285 --len;
7286
7287 bool const_op = is_const_set_operation(op);
7288 if (const_op)
7289 {
7290 distance_metric metric = operation2metric(op);
7291 bm::word_t* gptr = (bm::word_t*)gap_temp_block;
7292 BMSET_PTRGAP(gptr);
7293 unsigned c =
7295 blk,
7296 gptr,
7297 metric);
7298 count += c;
7299 if (exit_on_one && count) // early exit
7300 return count;
7301
7302 }
7303 else // non-const set operation
7304 {
7305 if ((sop == set_ASSIGN) && blk) // target block override
7306 {
7307 bman.zero_block(bv_block_idx);
7308 sop = set_OR;
7309 }
7310 if (blk == 0) // target block not found
7311 {
7312 switch (sop)
7313 {
7314 case set_AND: case set_SUB:
7315 break;
7316 case set_OR: case set_XOR: case set_ASSIGN:
7317 bman.set_gap_block(
7318 bv_block_idx, gap_temp_block, level);
7319 break;
7320
7321 default:
7322 BM_ASSERT(0);
7323 } // switch
7324 }
7325 else // target block exists
7326 {
7327 bm::operation bop = bm::setop2op(op);
7328 if (level == -1) // too big to GAP
7329 {
7330 gap_convert_to_bitset(temp_block, gap_temp_block);
7331 bv.combine_operation_with_block(bv_block_idx,
7332 temp_block,
7333 0, // BIT
7334 bop);
7335 }
7336 else // GAP fits
7337 {
7338 set_gap_level(gap_temp_block, level);
7339 bv.combine_operation_with_block(
7340 bv_block_idx,
7341 (bm::word_t*)gap_temp_block,
7342 1, // GAP
7343 bop);
7344 }
7345 }
7346 if (exit_on_one)
7347 {
7348 bm::get_block_coord(bv_block_idx, i0, j0);
7349 blk = bman.get_block_ptr(i0, j0);
7350 if (blk)
7351 {
7352 bool z = bm::check_block_zero(blk, true/*deep check*/);
7353 if (!z)
7354 return 1;
7355 }
7356 } // if exit_on_one
7357
7358 } // if else non-const op
7359 }
7360 break;
7361
7362 default:
7363 BM_ASSERT(0);
7364 #ifndef BM_NO_STL
7365 throw std::logic_error(err_msg());
7366 #else
7367 BM_THROW(BM_ERR_SERIALFORMAT);
7368 #endif
7369 } // switch
7370
7371 ++bv_block_idx;
7372 BM_ASSERT(bv_block_idx);
7373
7374 if (is_range_set_ && (bv_block_idx > nb_range_to_))
7375 break;
7376
7377 } // for (deserialization)
7378
7379 return count;
7380}
7381
7382
7383
7384} // namespace bm
7385
7386
7387#ifdef _MSC_VER
7388#pragma warning( pop )
7389#endif
7390
7391
7392#endif
Algorithms for bvector<> (main include)
Definitions(internal)
#define IS_FULL_BLOCK(addr)
Definition: bmdef.h:162
#define IS_VALID_ADDR(addr)
Definition: bmdef.h:161
#define BM_FALLTHROUGH
Definition: bmdef.h:358
#define BMRESTRICT
Definition: bmdef.h:203
#define BMNOEXCEPT
Definition: bmdef.h:82
#define BMPTR_SETBIT0(ptr)
Definition: bmdef.h:177
#define BMGAP_PTR(ptr)
Definition: bmdef.h:189
#define BM_IS_GAP(ptr)
Definition: bmdef.h:191
#define BMSET_PTRGAP(ptr)
Definition: bmdef.h:190
#define BM_ASSERT
Definition: bmdef.h:139
#define FULL_BLOCK_FAKE_ADDR
Definition: bmdef.h:158
#define FULL_BLOCK_REAL_ADDR
Definition: bmdef.h:157
Bit manipulation primitives (internal)
#define BM_SER_NEXT_GRP(enc, nb, B_1ZERO, B_8ZERO, B_16ZERO, B_32ZERO, B_64ZERO)
Definition: bmserial.h:2570
Utilities for bit transposition (internal) (experimental!)
Bit manipulation primitives (internal)
Functions and utilities for XOR filters (internal)
Byte based reader for un-aligned bit streaming.
Definition: encoding.h:257
unsigned gamma() BMNOEXCEPT
decode unsigned value using Elias Gamma coding
Definition: encoding.h:1795
void bic_decode_u16(bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Binary Interpolative array decode.
Definition: encoding.h:275
void bic_decode_u16_bitset(bm::word_t *block, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Definition: encoding.h:282
void bic_decode_u32_cm(bm::word_t *arr, unsigned sz, bm::word_t lo, bm::word_t hi) BMNOEXCEPT
Binary Interpolative array decode (32-bit)
Definition: encoding.h:1514
void bic_decode_u16_dry(unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Definition: encoding.h:288
Byte based writer for un-aligned bit streaming.
Definition: encoding.h:183
void flush() BMNOEXCEPT
Flush the incomplete 32-bit accumulator word.
Definition: encoding.h:230
void bic_encode_u32_cm(const bm::word_t *arr, unsigned sz, bm::word_t lo, bm::word_t hi) BMNOEXCEPT
Binary Interpolative encoding (array of 32-bit ints) cm - "center-minimal".
Definition: encoding.h:1294
void gamma(unsigned value) BMNOEXCEPT
Elias Gamma encode the specified value.
Definition: encoding.h:1187
void bic_encode_u16(const bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Binary Interpolative array decode.
Definition: encoding.h:207
Bit-block get adapter, takes bitblock and represents it as a get_32() accessor function.
Definition: bmfunc.h:9045
Bit-block sum adapter, takes values and sums it /internal.
Definition: bmfunc.h:9075
bm::word_t sum() const BMNOEXCEPT
Get accumulated sum.
Definition: bmfunc.h:9081
List of reference bit-vectors with their true index associations.
Definition: bmxor.h:624
Bitvector Bit-vector container with runtime compression of bits.
Definition: bm.h:115
@ opt_compress
compress blocks when possible (GAP/prefix sum)
Definition: bm.h:137
size_type count() const BMNOEXCEPT
population count (count of ON bits)
Definition: bm.h:2366
bm::bvector< Alloc > & bit_and(const bm::bvector< Alloc > &bv1, const bm::bvector< Alloc > &bv2, typename bm::bvector< Alloc >::optmode opt_mode=opt_none)
3-operand AND : this := bv1 AND bv2
Definition: bm.h:5880
blocks_manager< Alloc > blocks_manager_type
Definition: bm.h:119
bvector_size_type size_type
Definition: bm.h:121
void swap(bvector< Alloc > &bvect) BMNOEXCEPT
Exchanges content of bv and this bvector.
Definition: bm.h:3931
blocks_manager_type::block_idx_type block_idx_type
Definition: bm.h:120
void clear(const size_type *ids, size_type ids_size, bm::sort_order so=bm::BM_UNKNOWN)
clear list of bits in this bitset
Definition: bm.h:4114
Alloc allocator_type
Definition: bm.h:117
bool get_bit(size_type n) const BMNOEXCEPT
returns true if bit n is set and false is bit n is 0.
Definition: bm.h:3567
const unsigned char * get_pos() const BMNOEXCEPT
Return current buffer pointer.
Definition: encoding.h:105
unsigned char get_8() BMNOEXCEPT
Reads character from the decoding buffer.
Definition: encoding.h:93
void set_pos(const unsigned char *pos) BMNOEXCEPT
Set current buffer pointer.
Definition: encoding.h:108
Class for decoding data from memory buffer.
Definition: encoding.h:126
bm::word_t get_32() BMNOEXCEPT
Reads 32-bit word from the decoding buffer.
Definition: encoding.h:751
bm::word_t get_24() BMNOEXCEPT
Reads 32-bit word from the decoding buffer.
Definition: encoding.h:738
bm::id64_t get_64() BMNOEXCEPT
Reads 64-bit word from the decoding buffer.
Definition: encoding.h:786
bm::short_t get_16() BMNOEXCEPT
Reads 16-bit word from the decoding buffer.
Definition: encoding.h:722
bm::id64_t get_48() BMNOEXCEPT
Reads 64-bit word from the decoding buffer.
Definition: encoding.h:769
Base deserialization class.
Definition: bmserial.h:494
const unsigned char * skip_pos_
decoder skip position
Definition: bmserial.h:560
static const char * err_msg() BMNOEXCEPT
Definition: bmserial.h:545
block_idx_type bookmark_idx_
last bookmark block index
Definition: bmserial.h:558
unsigned skip_offset_
bookmark to skip 256 encoded blocks
Definition: bmserial.h:559
static void read_0runs_block(decoder_type &decoder, bm::word_t *blk) BMNOEXCEPT
read bit-block encoded as runs
Definition: bmserial.h:3521
void read_bic_arr_inv(decoder_type &decoder, bm::word_t *blk) BMNOEXCEPT
Read inverted binary interpolated list into a bit-set.
Definition: bmserial.h:3448
block_idx_type try_skip(decoder_type &decoder, block_idx_type nb, block_idx_type expect_nb) BMNOEXCEPT
Try to skip if skip bookmark is available within reach.
Definition: bmserial.h:3679
void read_digest0_block(decoder_type &decoder, bm::word_t *blk) BMNOEXCEPT
Read digest0-type bit-block.
Definition: bmserial.h:3481
void read_bic_gap(decoder_type &decoder, bm::word_t *blk) BMNOEXCEPT
Read binary interpolated gap blocks into a bitset.
Definition: bmserial.h:3458
bm::bit_in< DEC > bit_in_type
Definition: bmserial.h:498
unsigned * sb_id_array_
ptr to super-block idx array (temp)
Definition: bmserial.h:556
unsigned read_bic_sb_arr(decoder_type &decoder, unsigned block_type, unsigned *dst_arr, unsigned *sb_idx)
Read list of bit ids for super-blocks.
Definition: bmserial.h:3378
void read_gap_block(decoder_type &decoder, unsigned block_type, bm::gap_word_t *dst_block, bm::gap_word_t &gap_head)
Read GAP block from the stream.
Definition: bmserial.h:3552
unsigned read_id_list(decoder_type &decoder, unsigned block_type, bm::gap_word_t *dst_arr)
Read list of bit ids.
Definition: bmserial.h:3255
BLOCK_IDX block_idx_type
Definition: bmserial.h:497
bm::gap_word_t * id_array_
ptr to idx array for temp decode use
Definition: bmserial.h:555
void read_bic_arr(decoder_type &decoder, bm::word_t *blk, unsigned block_type) BMNOEXCEPT
Read binary interpolated list into a bit-set.
Definition: bmserial.h:3340
Deserializer for bit-vector.
Definition: bmserial.h:570
void reset() BMNOEXCEPT
reset range deserialization and reference vectors
Definition: bmserial.h:624
void set_ref_vectors(const bv_ref_vector_type *ref_vect)
Attach collection of reference vectors for XOR de-serialization (no transfer of ownership for the poi...
Definition: bmserial.h:3782
bm::match_pair xor_chain_[64]
Definition: bmserial.h:691
block_arridx_type bit_idx_arr_
Definition: bmserial.h:670
allocator_type::allocator_pool_type allocator_pool_type
Definition: bmserial.h:667
allocator_type alloc_
Definition: bmserial.h:676
unsigned is_range_set_
Definition: bmserial.h:696
bm::id64_t x_ref_d64_
Definition: bmserial.h:688
unsigned xor_chain_size_
Definition: bmserial.h:690
void xor_decode(blocks_manager_type &bman)
Definition: bmserial.h:4620
deseriaizer_base< DEC, block_idx_type > parent_type
Definition: bmserial.h:576
bm::bv_ref_vector< BV > bv_ref_vector_type
Definition: bmserial.h:578
void decode_arr_sblock(unsigned char btype, decoder_type &dec, bvector_type &bv)
Definition: bmserial.h:4006
void deserialize_gap(unsigned char btype, decoder_type &dec, bvector_type &bv, blocks_manager_type &bman, block_idx_type nb, bm::word_t *blk)
Definition: bmserial.h:3791
bvector_type::allocator_type allocator_type
Definition: bmserial.h:573
size_type or_block_idx_
Definition: bmserial.h:683
parent_type::decoder_type decoder_type
Definition: bmserial.h:577
void xor_reset() BMNOEXCEPT
Definition: bmserial.h:4736
BV::size_type size_type
Definition: bmserial.h:574
void set_range(size_type from, size_type to) BMNOEXCEPT
set deserialization range [from, to] This is NOT exact, approximate range, content outside range is n...
Definition: bmserial.h:610
allocator_pool_type pool_
Definition: bmserial.h:675
sblock_arridx_type sb_bit_idx_arr_
Definition: bmserial.h:671
size_type x_ref_idx_
Definition: bmserial.h:687
const bv_ref_vector_type * ref_vect_
ref.vector for XOR compression
Definition: bmserial.h:680
void unset_range() BMNOEXCEPT
Disable range deserialization.
Definition: bmserial.h:619
BV::blocks_manager_type blocks_manager_type
Definition: bmserial.h:629
size_type idx_to_
Definition: bmserial.h:698
block_arridx_type gap_temp_block_
Definition: bmserial.h:672
bm::word_t * xor_block_
xor product
Definition: bmserial.h:681
block_idx_type x_nb_
Definition: bmserial.h:689
size_t deserialize(bvector_type &bv, const unsigned char *buf, bm::word_t *temp_block=0)
Definition: bmserial.h:4125
bvector_type::block_idx_type block_idx_type
Definition: bmserial.h:575
void decode_arrbit(decoder_type &dec, bvector_type &bv, block_idx_type nb, bm::word_t *blk)
Definition: bmserial.h:4087
void xor_decode_chain(bm::word_t *BMRESTRICT blk) BMNOEXCEPT
Definition: bmserial.h:4589
void decode_bit_block(unsigned char btype, decoder_type &dec, blocks_manager_type &bman, block_idx_type nb, bm::word_t *blk)
Definition: bmserial.h:3941
bm::word_t * or_block_
Definition: bmserial.h:682
bm::word_t * temp_block_
Definition: bmserial.h:673
void decode_block_bit(decoder_type &dec, bvector_type &bv, block_idx_type nb, bm::word_t *blk)
Definition: bmserial.h:4040
void decode_block_bit_interval(decoder_type &dec, bvector_type &bv, block_idx_type nb, bm::word_t *blk)
Definition: bmserial.h:4060
bm::heap_vector< bm::gap_word_t, allocator_type, true > block_arridx_type
Definition: bmserial.h:665
bm::heap_vector< bm::word_t, allocator_type, true > sblock_arridx_type
Definition: bmserial.h:666
size_type idx_from_
Definition: bmserial.h:697
Memory encoding.
Definition: encoding.h:50
unsigned char * position_type
Definition: encoding.h:52
size_t size() const BMNOEXCEPT
Returns size of the current encoding stream.
Definition: encoding.h:529
void put_64(bm::id64_t w) BMNOEXCEPT
Puts 64 bits word into encoding buffer.
Definition: encoding.h:606
void put_8(unsigned char c) BMNOEXCEPT
Puts one character into the encoding buffer.
Definition: encoding.h:434
void set_pos(unsigned char *buf_pos) BMNOEXCEPT
Set current memory stream position.
Definition: encoding.h:545
void put_32(bm::word_t w) BMNOEXCEPT
Puts 32 bits word into encoding buffer.
Definition: encoding.h:571
void put_16(bm::short_t s) BMNOEXCEPT
Puts short word (16 bits) into the encoding buffer.
Definition: encoding.h:444
void put_prefixed_array_32(unsigned char c, const bm::word_t *w, unsigned count) BMNOEXCEPT
Encode 8-bit prefix + an array.
Definition: encoding.h:406
void put_8_16_32(unsigned w, unsigned char c8, unsigned char c16, unsigned char c32) BMNOEXCEPT
but gat plus value based on its VBR evaluation
Definition: encoding.h:487
Functor for Elias Gamma encoding.
Definition: encoding.h:341
Iterator to walk forward the serialized stream.
Definition: bmserial.h:712
void set_range(size_type from, size_type to)
set deserialization range [from, to]
Definition: bmserial.h:6770
SerialIterator serial_iterator_type
Definition: bmserial.h:716
void unset_range() BMNOEXCEPT
disable range filtration
Definition: bmserial.h:723
bvector_type::size_type size_type
Definition: bmserial.h:715
size_type deserialize(bvector_type &bv, serial_iterator_type &sit, bm::word_t *temp_block, set_operation op=bm::set_OR, bool exit_on_one=false)
Definition: bmserial.h:6982
Deserializer, performs logical operations between bit-vector and serialized bit-vector.
Definition: bmserial.h:930
bm::bv_ref_vector< BV > bv_ref_vector_type
Definition: bmserial.h:935
void set_ref_vectors(const bv_ref_vector_type *ref_vect)
Attach collection of reference vectors for XOR serialization (no transfer of ownership for the pointe...
Definition: bmserial.h:1012
void deserialize_range(bvector_type &bv, const unsigned char *buf, size_type idx_from, size_type idx_to)
Definition: bmserial.h:6659
BV::allocator_type allocator_type
Definition: bmserial.h:933
size_type deserialize(bvector_type &bv, const unsigned char *buf, set_operation op, bool exit_on_one=false)
Deserialize bvector using buffer as set operation argument.
Definition: bmserial.h:6579
void deserialize_range(bvector_type &bv, const unsigned char *buf, bm::word_t *, size_type idx_from, size_type idx_to)
Definition: bmserial.h:999
bvector_type::size_type size_type
Definition: bmserial.h:934
size_type deserialize(bvector_type &bv, const unsigned char *buf, bm::word_t *, set_operation op=bm::set_OR, bool exit_on_one=false)
Obsolete! Deserialize bvector using buffer as set operation argument.
Definition: bmserial.h:985
Serialization stream iterator.
Definition: bmserial.h:771
unsigned get_arr_bit(bm::word_t *dst_block, bool clear_target=true) BMNOEXCEPT
Get array of bits out of the decoder into bit block (Converts inverted list into bits) Returns number...
Definition: bmserial.h:6285
bm::id_t last_id_
Last id from the id list.
Definition: bmserial.h:909
unsigned get_bit_block_COUNT_B(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:868
deseriaizer_base< DEC, block_idx_type > parent_type
Definition: bmserial.h:775
deseriaizer_base< DEC, BLOCK_IDX >::decoder_type decoder_type
Definition: bmserial.h:773
unsigned char header_flag_
Definition: bmserial.h:902
decoder_type & decoder()
Get low level access to the decoder (use carefully)
Definition: bmserial.h:806
void next()
get next block
Definition: bmserial.h:4855
block_idx_type bv_size_
Definition: bmserial.h:906
serial_stream_iterator(const unsigned char *buf)
Definition: bmserial.h:4745
block_idx_type mono_block_cnt_
number of 0 or 1 blocks
Definition: bmserial.h:914
unsigned block_type_
current block type
Definition: bmserial.h:912
unsigned get_id_count() const BMNOEXCEPT
Number of ids in the inverted list (valid for e_list_ids)
Definition: bmserial.h:828
unsigned get_bit() BMNOEXCEPT
Definition: bmserial.h:6323
unsigned get_bit_block_SUB(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:5523
void get_inv_arr(bm::word_t *block) BMNOEXCEPT
Definition: bmserial.h:5105
unsigned get_bit_block_COUNT_SUB_AB(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:6087
unsigned get_bit_block_AND(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:5303
unsigned get_bit_block_COUNT_SUB_BA(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:6190
bm::id_t get_id() const BMNOEXCEPT
Get last id from the id list.
Definition: bmserial.h:831
unsigned get_bit_block_COUNT_OR(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:5882
unsigned get_bit_block_COUNT(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:5627
bool is_eof() const
Returns true if end of bit-stream reached.
Definition: bmserial.h:785
block_idx_type block_idx_
current block index
Definition: bmserial.h:913
unsigned get_bit_block_XOR(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:5417
unsigned get_bit_block_OR(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:5227
unsigned get_bit_block_ASSIGN(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:5127
iterator_state state() const BMNOEXCEPT
Returns iterator internal state.
Definition: bmserial.h:824
block_idx_type block_idx() const BMNOEXCEPT
Get current block index.
Definition: bmserial.h:834
iterator_state state_
Definition: bmserial.h:907
gap_word_t * block_idx_arr_
Definition: bmserial.h:917
unsigned get_bit_block_COUNT_A(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:5712
gap_word_t glevels_[bm::gap_levels]
GAP levels.
Definition: bmserial.h:910
iterator_state
iterator is a state machine, this enum encodes its key value
Definition: bmserial.h:812
@ e_bit_block
one bit block
Definition: bmserial.h:818
@ e_list_ids
plain int array
Definition: bmserial.h:814
@ e_zero_blocks
one or more zero bit blocks
Definition: bmserial.h:816
@ e_one_blocks
one or more all-1 bit blocks
Definition: bmserial.h:817
@ e_blocks
stream of blocks
Definition: bmserial.h:815
void get_gap_block(bm::gap_word_t *dst_block)
Read gap block data (with head)
Definition: bmserial.h:6334
get_bit_func_type bit_func_table_[bm::set_END]
Definition: bmserial.h:900
unsigned id_cnt_
Id counter for id list.
Definition: bmserial.h:908
unsigned get_bit_block_COUNT_XOR(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:5984
unsigned get_bit_block_COUNT_AND(bm::word_t *dst_block, bm::word_t *tmp_block)
Definition: bmserial.h:5796
block_idx_type skip_mono_blocks() BMNOEXCEPT
skip all zero or all-one blocks
Definition: bmserial.h:5089
block_idx_type bv_size() const
serialized bitvector size
Definition: bmserial.h:782
iterator_state get_state() const BMNOEXCEPT
Definition: bmserial.h:826
unsigned get_bit_block(bm::word_t *dst_block, bm::word_t *tmp_block, set_operation op)
read bit block, using logical operation
Definition: bmserial.h:6352
Bit-vector serialization class.
Definition: bmserial.h:76
void gamma_gap_block(const bm::gap_word_t *gap_block, bm::encoder &enc) BMNOEXCEPT
Definition: bmserial.h:1448
void serialize(const BV &bv, typename serializer< BV >::buffer &buf, const statistics_type *bv_stat=0)
Bitvector serialization into buffer object (resized automatically)
Definition: bmserial.h:2244
unsigned char find_bit_best_encoding(const bm::word_t *block) BMNOEXCEPT
Determine best representation for a bit-block.
Definition: bmserial.h:1780
serializer(bm::word_t *temp_block)
Definition: bmserial.h:1203
void reset_compression_stats() BMNOEXCEPT
Reset all accumulated compression statistics.
Definition: bmserial.h:1247
void bienc_arr_sblock(const BV &bv, unsigned sb, bm::encoder &enc) BMNOEXCEPT
Definition: bmserial.h:2414
void xor_tmp_product(const bm::word_t *s_block, const block_match_chain_type &mchain, unsigned i, unsigned j) BMNOEXCEPT
Compute digest based XOR product, place into tmp XOR block.
Definition: bmserial.h:2209
void gap_length_serialization(bool value) BMNOEXCEPT
Set GAP length serialization (serializes GAP levels of the original vector)
Definition: bmserial.h:1275
void set_sim_model(const xor_sim_model_type *sim_model) BMNOEXCEPT
Atach XOR similarity model (must be computed by the same ref vector)
Definition: bmserial.h:1324
bvector_type::block_idx_type block_idx_type
Definition: bmserial.h:82
bvector_type::size_type size_type
Definition: bmserial.h:83
unsigned char find_gap_best_encoding(const bm::gap_word_t *gap_block) BMNOEXCEPT
Determine best representation for GAP block based on current set compression level.
Definition: bmserial.h:1948
void gamma_arr_bit_block(const bm::word_t *block, bm::encoder &enc, bool inverted) BMNOEXCEPT
Definition: bmserial.h:2314
void set_curr_ref_idx(size_type ref_idx) BMNOEXCEPT
Set current index in rer.vector collection (not a row idx or plain idx)
Definition: bmserial.h:1330
void interpolated_gap_array_v0(const bm::gap_word_t *gap_block, unsigned arr_len, bm::encoder &enc, bool inverted) BMNOEXCEPT
Definition: bmserial.h:1532
void set_compression_level(unsigned clevel) BMNOEXCEPT
Set compression level.
Definition: bmserial.h:1254
void allow_stat_reset(bool allow) BMNOEXCEPT
Enable/disable statistics reset on each serilaization.
Definition: bmserial.h:202
void encode_bit_digest(const bm::word_t *blk, bm::encoder &enc, bm::id64_t d0) BMNOEXCEPT
Encode bit-block using digest (hierarchical compression)
Definition: bmserial.h:2111
bm::bv_ref_vector< BV > bv_ref_vector_type
Definition: bmserial.h:86
void byte_order_serialization(bool value) BMNOEXCEPT
Set byte-order serialization (for cross platform compatibility)
Definition: bmserial.h:1281
unsigned char find_bit_best_encoding_l5(const bm::word_t *block) BMNOEXCEPT
Determine best representation for a bit-block (level 5)
Definition: bmserial.h:1664
void set_sparse_cutoff(unsigned cutoff) BMNOEXCEPT
Fine tuning for Binary Interpolative Compression (levels 5+) The parameter sets average population co...
Definition: bmserial.h:1265
void optimize_serialize_destroy(BV &bv, typename serializer< BV >::buffer &buf)
Bitvector serialization into buffer object (resized automatically) Input bit-vector gets optimized an...
Definition: bmserial.h:2268
bvector_type::allocator_type allocator_type
Definition: bmserial.h:79
byte_buffer< allocator_type > buffer
Definition: bmserial.h:85
bool compute_sim_model(xor_sim_model_type &sim_model, const bv_ref_vector_type &ref_vect, const bm::xor_sim_params &params)
Calculate XOR similarity model for ref_vector refernece vector must be associated before.
Definition: bmserial.h:1316
void encode_bit_array(const bm::word_t *block, bm::encoder &enc, bool inverted) BMNOEXCEPT
Encode bit-block as an array of bits.
Definition: bmserial.h:2286
void interpolated_gap_bit_block(const bm::word_t *block, bm::encoder &enc) BMNOEXCEPT
encode bit-block as interpolated gap block
Definition: bmserial.h:2345
bvector_type::blocks_manager_type blocks_manager_type
Definition: bmserial.h:80
void encode_gap_block(const bm::gap_word_t *gap_block, bm::encoder &enc)
Definition: bmserial.h:1999
void set_bookmarks(bool enable, unsigned bm_interval=256) BMNOEXCEPT
Add skip-markers to serialization BLOB for faster range decode at the expense of some BLOB size incre...
Definition: bmserial.h:1287
unsigned get_compression_level() const BMNOEXCEPT
Get current compression level.
Definition: bmserial.h:133
void reset_models() BMNOEXCEPT
Definition: bmserial.h:383
void encode_xor_match_chain(bm::encoder &enc, const block_match_chain_type &mchain) BMNOEXCEPT
Encode XOR match chain.
Definition: bmserial.h:2164
xor_sim_model_type::block_match_chain_type block_match_chain_type
Definition: bmserial.h:90
static void process_bookmark(block_idx_type nb, bookmark_state &bookm, bm::encoder &enc) BMNOEXCEPT
Check if bookmark needs to be placed and if so, encode it into serialization BLOB.
Definition: bmserial.h:2596
serializer(const allocator_type &alloc=allocator_type(), bm::word_t *temp_block=0)
Constructor.
Definition: bmserial.h:1170
void interpolated_arr_bit_block(const bm::word_t *block, bm::encoder &enc, bool inverted) BMNOEXCEPT
Definition: bmserial.h:2505
void interpolated_gap_array(const bm::gap_word_t *gap_block, unsigned arr_len, bm::encoder &enc, bool inverted) BMNOEXCEPT
Encode GAP block as an array with binary interpolated coder.
Definition: bmserial.h:1580
bm::xor_sim_model< BV > xor_sim_model_type
Definition: bmserial.h:87
void gamma_gap_array(const bm::gap_word_t *gap_block, unsigned arr_len, bm::encoder &enc, bool inverted=false) BMNOEXCEPT
Encode GAP block as delta-array with Elias Gamma coder.
Definition: bmserial.h:1487
const size_type * get_compression_stat() const BMNOEXCEPT
Return serialization counter vector.
Definition: bmserial.h:196
void add_model(unsigned char mod, unsigned score) BMNOEXCEPT
Definition: bmserial.h:1655
void bienc_gap_bit_block(const bm::word_t *block, bm::encoder &enc) BMNOEXCEPT
encode bit-block as interpolated bit block of gaps
Definition: bmserial.h:2355
void gamma_gap_bit_block(const bm::word_t *block, bm::encoder &enc) BMNOEXCEPT
Definition: bmserial.h:2305
bvector_type::statistics statistics_type
Definition: bmserial.h:81
void set_ref_vectors(const bv_ref_vector_type *ref_vect)
Attach collection of reference vectors for XOR serialization (no transfer of ownership for the pointe...
Definition: bmserial.h:1302
void encode_bit_interval(const bm::word_t *blk, bm::encoder &enc, unsigned size_control) BMNOEXCEPT
Encode BIT block with repeatable runs of zeroes.
Definition: bmserial.h:2059
void interpolated_encode_gap_block(const bm::gap_word_t *gap_block, bm::encoder &enc) BMNOEXCEPT
Definition: bmserial.h:1388
size_type serialize(const BV &bv, unsigned char *buf, size_t buf_size)
Bitvector serialization into memory block.
Definition: bmserial.h:2706
void encode_header(const BV &bv, bm::encoder &enc) BMNOEXCEPT
Encode serialization header information.
Definition: bmserial.h:1336
void bienc_arr_bit_block(const bm::word_t *block, bm::encoder &enc, bool inverted) BMNOEXCEPT
Definition: bmserial.h:2330
XOR scanner to search for complement-similarities in collections of bit-vectors.
Definition: bmxor.h:820
Encoding utilities for serialization (internal)
unsigned bit_block_find(const bm::word_t *BMRESTRICT block, unsigned nbit, unsigned *BMRESTRICT pos) BMNOEXCEPT
Searches for the next 1 bit in the BIT block.
Definition: bmfunc.h:8389
BMFORCEINLINE bm::id_t word_bitcount(bm::id_t w) BMNOEXCEPT
Definition: bmutil.h:573
void bit_block_copy(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src) BMNOEXCEPT
Bitblock copy operation.
Definition: bmfunc.h:6743
unsigned bit_count_nonzero_size(const T *blk, unsigned data_size) BMNOEXCEPT
Inspects block for full zero words.
Definition: bmfunc.h:8333
bm::id64_t bit_block_and(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src) BMNOEXCEPT
Plain bitblock AND operation. Function does not analyse availability of source and destination blocks...
Definition: bmfunc.h:6824
bm::id_t bit_block_count(const bm::word_t *block) BMNOEXCEPT
Bitcount for bit block.
Definition: bmfunc.h:5017
BMFORCEINLINE void clear_bit(unsigned *dest, unsigned bitpos) BMNOEXCEPT
Set 1 bit in a block.
Definition: bmfunc.h:3716
bm::set_representation best_representation(unsigned bit_count, unsigned total_possible_bitcount, unsigned gap_count, unsigned block_size) BMNOEXCEPT
Choose best representation for a bit-block.
Definition: bmfunc.h:8706
bm::id_t bit_operation_or_count(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2) BMNOEXCEPT
Performs bitblock OR operation and calculates bitcount of the result.
Definition: bmfunc.h:7605
BMFORCEINLINE unsigned word_bitcount64(bm::id64_t x) BMNOEXCEPT
Definition: bmutil.h:596
BMFORCEINLINE void set_bit(unsigned *dest, unsigned bitpos) BMNOEXCEPT
Set 1 bit in a block.
Definition: bmfunc.h:3703
unsigned bit_block_convert_to_arr(T *BMRESTRICT dest, const unsigned *BMRESTRICT src, bool inverted) BMNOEXCEPT
Convert bit block into an array of ints corresponding to 1 bits.
Definition: bmfunc.h:8749
bm::id64_t calc_block_digest0(const bm::word_t *const block) BMNOEXCEPT
Compute digest for 64 non-zero areas.
Definition: bmfunc.h:1092
void bit_invert(T *start) BMNOEXCEPT
Definition: bmfunc.h:6023
bm::id64_t bit_block_xor(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src) BMNOEXCEPT
Plain bitblock XOR operation. Function does not analyse availability of source and destination blocks...
Definition: bmfunc.h:8166
void bit_block_set(bm::word_t *BMRESTRICT dst, bm::word_t value) BMNOEXCEPT
Bitblock memset operation.
Definition: bmfunc.h:4422
bool bit_block_or(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src) BMNOEXCEPT
Plain bitblock OR operation. Function does not analyse availability of source and destination blocks.
Definition: bmfunc.h:7675
bm::id_t bit_operation_xor_count(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2) BMNOEXCEPT
Performs bitblock XOR operation and calculates bitcount of the result.
Definition: bmfunc.h:8275
bm::id64_t bit_block_sub(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src) BMNOEXCEPT
Plain bitblock SUB (AND NOT) operation. Function does not analyse availability of source and destinat...
Definition: bmfunc.h:7945
bm::id_t bit_operation_and_count(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2) BMNOEXCEPT
Performs bitblock AND operation and calculates bitcount of the result.
Definition: bmfunc.h:7466
bm::id_t bit_operation_sub_count(const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2) BMNOEXCEPT
Performs bitblock SUB operation and calculates bitcount of the result.
Definition: bmfunc.h:7515
bm::alloc_pool_guard< allocator_pool_type, bvector< Alloc > > mem_pool_guard
Definition: bm.h:790
operation
Bit operations.
Definition: bmconst.h:190
set_operation
Codes of set operations.
Definition: bmconst.h:167
strategy
Block allocation strategies.
Definition: bmconst.h:145
@ BM_OR
Definition: bmconst.h:192
@ set_COUNT_SUB_AB
Definition: bmconst.h:177
@ set_OR
Definition: bmconst.h:169
@ set_COUNT_XOR
Definition: bmconst.h:175
@ set_COUNT_OR
Definition: bmconst.h:176
@ set_COUNT_B
Definition: bmconst.h:180
@ set_COUNT_SUB_BA
Definition: bmconst.h:178
@ set_ASSIGN
Definition: bmconst.h:172
@ set_SUB
Definition: bmconst.h:170
@ set_COUNT_AND
Definition: bmconst.h:174
@ set_COUNT
Definition: bmconst.h:173
@ set_AND
Definition: bmconst.h:168
@ set_XOR
Definition: bmconst.h:171
@ set_COUNT_A
Definition: bmconst.h:179
@ set_END
Definition: bmconst.h:182
@ BM_GAP
GAP compression is ON.
Definition: bmconst.h:147
size_t serialize(const BV &bv, unsigned char *buf, bm::word_t *temp_block=0, unsigned serialization_flags=0)
Saves bitvector into memory.
Definition: bmserial.h:3074
serialization_flags
Bit mask flags for serialization algorithm <>
Definition: bmserial.h:3026
size_t deserialize(BV &bv, const unsigned char *buf, bm::word_t *temp_block=0, const bm::bv_ref_vector< BV > *ref_vect=0)
Bitvector deserialization from a memory BLOB.
Definition: bmserial.h:3140
void deserialize_range(BV &bv, const unsigned char *buf, typename BV::size_type from, typename BV::size_type to, const bm::bv_ref_vector< BV > *ref_vect=0)
Bitvector range deserialization from a memory BLOB.
Definition: bmserial.h:3203
@ BM_NO_GAP_LENGTH
save no GAP info (save some space)
Definition: bmserial.h:3028
@ BM_NO_BYTE_ORDER
save no byte-order info (save some space)
Definition: bmserial.h:3027
distance_metric
Distance metrics codes defined for vectors A and B.
Definition: bmalgo_impl.h:58
distance_metric operation2metric(set_operation op) BMNOEXCEPT
Convert set operation into compatible distance metric.
Definition: bmalgo_impl.h:73
@ COUNT_XOR
(A ^ B).count()
Definition: bmalgo_impl.h:60
@ COUNT_SUB_BA
(B - A).count()
Definition: bmalgo_impl.h:63
unsigned gap_bit_count_unr(const T *buf) BMNOEXCEPT
Calculates number of bits ON in GAP buffer. Loop unrolled version.
Definition: bmfunc.h:2287
D gap_convert_to_arr(D *BMRESTRICT dest, const T *BMRESTRICT buf, unsigned dest_len, bool invert=false) BMNOEXCEPT
Convert gap block into array of ints corresponding to 1 bits.
Definition: bmfunc.h:4908
gap_word_t * gap_operation_xor(const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2, gap_word_t *BMRESTRICT tmp_buf, unsigned &dsize) BMNOEXCEPT
GAP XOR operation.
Definition: bmfunc.h:6551
void gap_convert_to_bitset(unsigned *BMRESTRICT dest, const T *BMRESTRICT buf, unsigned len=0) BMNOEXCEPT
GAP block to bitblock conversion.
Definition: bmfunc.h:4441
void gap_invert(T *buf) BMNOEXCEPT
Inverts all bits in the GAP buffer.
Definition: bmfunc.h:4586
unsigned gap_set_array(T *buf, const T *arr, unsigned len) BMNOEXCEPT
Convert array to GAP buffer.
Definition: bmfunc.h:3583
void set_gap_level(T *buf, int level) BMNOEXCEPT
Sets GAP block capacity level.
Definition: bmfunc.h:4605
void gap_add_to_bitset(unsigned *BMRESTRICT dest, const T *BMRESTRICT pcurr, unsigned len) BMNOEXCEPT
Adds(OR) GAP block to bitblock.
Definition: bmfunc.h:4016
void gap_set_all(T *buf, unsigned set_max, unsigned value) BMNOEXCEPT
Sets all bits to 0 or 1 (GAP)
Definition: bmfunc.h:4518
unsigned gap_add_value(T *buf, unsigned pos) BMNOEXCEPT
Add new value to the end of GAP buffer.
Definition: bmfunc.h:3343
int gap_calc_level(unsigned len, const T *glevel_len) BMNOEXCEPT
Calculates GAP block capacity level.
Definition: bmfunc.h:4627
BMFORCEINLINE bm::gap_word_t gap_length(const bm::gap_word_t *BMRESTRICT buf) BMNOEXCEPT
Returs GAP block length.
Definition: bmfunc.h:1575
void combine_sub(BV &bv, It first, It last)
SUB Combine bitvector and the iterable sequence.
Definition: bmalgo_impl.h:1248
BV::size_type count_and(const BV &bv1, const BV &bv2) BMNOEXCEPT
Computes bitcount of AND operation of two bitsets.
Definition: bmalgo.h:49
BV::size_type count_or(const BV &bv1, const BV &bv2) BMNOEXCEPT
Computes bitcount of OR operation of two bitsets.
Definition: bmalgo.h:149
BV::size_type count_sub(const BV &bv1, const BV &bv2) BMNOEXCEPT
Computes bitcount of SUB operation of two bitsets.
Definition: bmalgo.h:115
void combine_or(BV &bv, It first, It last)
OR Combine bitvector and the iterable sequence.
Definition: bmalgo_impl.h:1080
bm::distance_metric_descriptor::size_type count_xor(const BV &bv1, const BV &bv2) BMNOEXCEPT
Computes bitcount of XOR operation of two bitsets.
Definition: bmalgo.h:81
Definition: bm.h:78
const unsigned char set_block_ref_eq
block is a copy of a reference block
Definition: bmserial.h:1133
@ e_no_xor_match
Definition: bmxor.h:82
@ e_xor_match_GC
Definition: bmxor.h:83
@ e_xor_match_BC
Definition: bmxor.h:84
@ e_xor_match_EQ
Definition: bmxor.h:86
@ e_xor_match_iBC
Definition: bmxor.h:85
const unsigned char set_block_xor_ref32_um
..... 32-bit (should never happen)
Definition: bmserial.h:1162
const unsigned set_block_digest_wave_size
Definition: bmconst.h:67
const unsigned char set_block_gap
Plain GAP block.
Definition: bmserial.h:1110
BV::size_type process_operation(BV &bv, BV &bv_tmp, bm::set_operation op)
Utility function to process operation using temp vector.
Definition: bmserial.h:6392
void bit_block_change_bc(const bm::word_t *BMRESTRICT block, unsigned *BMRESTRICT gc, unsigned *BMRESTRICT bc) BMNOEXCEPT
Definition: bmfunc.h:5218
const unsigned char set_block_arrbit_inv
List of bits OFF.
Definition: bmserial.h:1127
const unsigned char set_nb_sync_mark8
bookmark sync point (8-bits)
Definition: bmserial.h:1150
const unsigned sblock_flag_max16
Definition: bmserial.h:2409
const unsigned sblock_flag_sb16
16-bit SB index (8-bit by default)
Definition: bmserial.h:2401
const unsigned char set_block_bit_interval
Interval block.
Definition: bmserial.h:1113
const unsigned id_max
Definition: bmconst.h:109
bool check_block_zero(const bm::word_t *blk, bool deep_scan) BMNOEXCEPT
Checks all conditions and returns true if block consists of only 0 bits.
Definition: bmfunc.h:8781
unsigned int word_t
Definition: bmconst.h:39
const unsigned char set_block_bit_digest0
H-compression with digest mask.
Definition: bmserial.h:1131
const unsigned char set_block_xor_ref32
..... 32-bit (should never happen)
Definition: bmserial.h:1136
const unsigned char set_block_arrgap_bienc_inv_v2
Interpolated GAP array (inverted)
Definition: bmserial.h:1144
const unsigned char set_block_bitgap_bienc
Interpolated bit-block as GAPs.
Definition: bmserial.h:1130
unsigned char check_pair_vect_vbr(const BMChain &mchain, const RVect &ref_vect)
Check effective bit-rate for the XOR encode vector.
Definition: bmxor.h:404
const unsigned char set_block_arrgap_egamma_inv
Gamma compressed inverted delta GAP array.
Definition: bmserial.h:1119
bool check_block_one(const bm::word_t *blk, bool deep_scan) BMNOEXCEPT
Checks if block has only 1 bits.
Definition: bmfunc.h:8805
const unsigned char set_block_arr_bienc_inv
Interpolated inverted block int array.
Definition: bmserial.h:1129
unsigned bit_to_gap(gap_word_t *BMRESTRICT dest, const unsigned *BMRESTRICT block, unsigned dest_len) BMNOEXCEPT
Convert bit block to GAP representation.
Definition: bmfunc.h:4836
const unsigned set_sub_array_size
Definition: bmconst.h:95
void compute_s_block_descr(const bm::word_t *BMRESTRICT block, block_waves_xor_descr &BMRESTRICT x_descr, unsigned *BMRESTRICT s_gc, unsigned *BMRESTRICT s_bc) BMNOEXCEPT
Compute reference (non-XOR) 64-dim complexity descriptor for the s-block.
Definition: bmxor.h:430
const unsigned sblock_flag_sb32
32-bit SB index
Definition: bmserial.h:2402
const unsigned char set_block_16one
UP to 65536 all-set blocks.
Definition: bmserial.h:1102
void for_each_dgap(const T *gap_buf, Func &func)
Definition: bmfunc.h:2746
BMFORCEINLINE void get_block_coord(BI_TYPE nb, unsigned &i, unsigned &j) BMNOEXCEPT
Recalc linear bvector block index into 2D matrix coordinates.
Definition: bmfunc.h:172
void combine_count_operation_with_block(const bm::word_t *blk, const bm::word_t *arg_blk, distance_metric_descriptor *dmit, distance_metric_descriptor *dmit_end) BMNOEXCEPT
Internal function computes different distance metrics.
Definition: bmalgo_impl.h:189
const unsigned sblock_flag_max32
Definition: bmserial.h:2411
const unsigned sblock_flag_min32
Definition: bmserial.h:2406
const unsigned set_total_blocks
Definition: bmconst.h:111
const unsigned char set_nb_sync_mark48
Definition: bmserial.h:1154
const unsigned char set_block_arrgap_bienc
Interpolated GAP array.
Definition: bmserial.h:1125
ByteOrder
Byte orders recognized by the library.
Definition: bmconst.h:451
@ LittleEndian
Definition: bmconst.h:453
@ BigEndian
Definition: bmconst.h:452
const unsigned char set_block_8one
Up to 256 all-set blocks.
Definition: bmserial.h:1100
const unsigned char set_nb_sync_mark16
Definition: bmserial.h:1151
const unsigned char set_block_32one
UP to 4G all-set blocks.
Definition: bmserial.h:1104
const unsigned char set_block_bit_0runs
Bit block with encoded zero intervals.
Definition: bmserial.h:1118
const unsigned char set_block_xor_ref8_um
block is un-masked XOR of a reference block (8-bit)
Definition: bmserial.h:1160
const unsigned char set_block_64one
lots of all-set blocks
Definition: bmserial.h:1122
const unsigned char set_block_gap_bienc
Interpolated GAP block (legacy)
Definition: bmserial.h:1124
const unsigned char set_block_xor_gap_ref16
..... 16-bit
Definition: bmserial.h:1138
const unsigned set_compression_default
Default compression level.
Definition: bmserial.h:60
const unsigned char set_block_arrbit
List of bits ON.
Definition: bmserial.h:1112
const unsigned char set_block_1one
One block all-set (1111...)
Definition: bmserial.h:1098
const unsigned char set_block_arrgap_inv
List of bits OFF (GAP block)
Definition: bmserial.h:1120
void convert_sub_to_arr(const BV &bv, unsigned sb, VECT &vect)
convert sub-blocks to an array of set 1s (32-bit)
Definition: bmalgo_impl.h:2016
const unsigned gap_levels
Definition: bmconst.h:85
const unsigned char set_block_gapbit
GAP compressed bitblock.
Definition: bmserial.h:1111
void bit_recomb(It1 &it1, It2 &it2, BinaryOp &op, Encoder &enc, unsigned block_size=bm::set_block_size) BMNOEXCEPT
Definition: bmfunc.h:9124
const unsigned char set_block_arr_bienc_8bh
BIC block 8bit header.
Definition: bmserial.h:1158
bm::operation setop2op(bm::set_operation op) BMNOEXCEPT
Convert set operation to operation.
Definition: bmfunc.h:1311
const unsigned char set_sblock_bienc
super-block interpolated list
Definition: bmserial.h:1157
const unsigned char set_block_xor_chain
XOR chain (composit of sub-blocks)
Definition: bmserial.h:1140
const unsigned char set_nb_bookmark32
jump ahead mark (32-bit)
Definition: bmserial.h:1149
const unsigned sblock_flag_max24
Definition: bmserial.h:2410
const unsigned set_sub_total_bits
Definition: bmconst.h:100
const unsigned set_block_size
Definition: bmconst.h:55
unsigned long long int id64_t
Definition: bmconst.h:35
const unsigned block_waves
Definition: bmconst.h:66
const unsigned char set_block_xor_ref16
block is masked XOR of a reference block (16-bit)
Definition: bmserial.h:1135
const unsigned char set_block_arrgap_bienc_inv
Interpolated GAP array (inverted)
Definition: bmserial.h:1126
const unsigned char set_block_arrgap_egamma
Gamma compressed delta GAP array.
Definition: bmserial.h:1117
const unsigned gap_equiv_len
Definition: bmconst.h:82
const unsigned char set_block_1zero
One all-zero block.
Definition: bmserial.h:1097
const unsigned sblock_flag_min24
24-bit minv
Definition: bmserial.h:2405
const unsigned char set_block_end
End of serialization.
Definition: bmserial.h:1096
unsigned int id_t
Definition: bmconst.h:38
const unsigned gap_max_buff_len
Definition: bmconst.h:80
const unsigned char set_block_arrgap_bienc_v2
//!< Interpolated GAP array (v2)
Definition: bmserial.h:1143
const unsigned char set_block_xor_gap_ref32
..... 32-bit (should never happen)
Definition: bmserial.h:1139
const unsigned char set_block_arrgap
List of bits ON (GAP block)
Definition: bmserial.h:1114
bit_representation
Possible representations for bit sets.
Definition: bmfunc.h:9855
@ e_bit_INT
Int list.
Definition: bmfunc.h:9857
@ e_bit_GAP
GAPs.
Definition: bmfunc.h:9856
@ e_bit_bit
uncompressed bit-block
Definition: bmfunc.h:9861
@ e_bit_IINT
Inverted int list.
Definition: bmfunc.h:9858
@ e_bit_end
Definition: bmfunc.h:9862
@ e_bit_1
all 1s
Definition: bmfunc.h:9859
@ e_bit_0
all 0s (empty block)
Definition: bmfunc.h:9860
const unsigned char set_nb_sync_mark64
..... 64-bit (should never happen)
Definition: bmserial.h:1155
const unsigned char set_nb_sync_mark32
Definition: bmserial.h:1153
const unsigned char set_block_sgapgap
SGAP compressed GAP block.
Definition: bmserial.h:1109
const unsigned sparse_max_l5
Definition: bmserial.h:1166
serialization_header_mask
Definition: bmserial.h:1080
@ BM_HM_NO_GAPL
no GAP levels
Definition: bmserial.h:1085
@ BM_HM_ID_LIST
id list stored
Definition: bmserial.h:1083
@ BM_HM_NO_BO
no byte-order
Definition: bmserial.h:1084
@ BM_HM_SPARSE
very sparse vector
Definition: bmserial.h:1088
@ BM_HM_DEFAULT
Definition: bmserial.h:1081
@ BM_HM_64_BIT
64-bit vector
Definition: bmserial.h:1086
@ BM_HM_RESIZE
resized vector
Definition: bmserial.h:1082
@ BM_HM_HXOR
horizontal XOR compression turned ON
Definition: bmserial.h:1087
BMFORCEINLINE unsigned long long bmi_bslr_u64(unsigned long long w) BMNOEXCEPT
Definition: bmutil.h:335
const unsigned char set_block_arr_bienc
Interpolated block as int array.
Definition: bmserial.h:1128
const unsigned char set_block_64zero
lots of zero blocks
Definition: bmserial.h:1121
const unsigned char set_block_gap_bienc_v2
Interpolated GAP block (v2)
Definition: bmserial.h:1142
const unsigned char set_block_xor_ref8
block is masked XOR of a reference block (8-bit)
Definition: bmserial.h:1134
const unsigned char set_block_gap_egamma
Gamma compressed GAP block.
Definition: bmserial.h:1116
unsigned short gap_word_t
Definition: bmconst.h:78
const unsigned char set_block_32zero
Up to 4G zero blocks.
Definition: bmserial.h:1103
const unsigned char set_block_8zero
Up to 256 zero blocks.
Definition: bmserial.h:1099
const unsigned char set_block_xor_ref16_um
block is un-masked XOR of a reference block (16-bit)
Definition: bmserial.h:1161
const unsigned gap_max_bits
Definition: bmconst.h:81
const unsigned char set_block_bit_1bit
Bit block with 1 bit ON.
Definition: bmserial.h:1115
const unsigned char set_block_aone
All other blocks one.
Definition: bmserial.h:1106
const unsigned char set_nb_bookmark16
jump ahead mark (16-bit)
Definition: bmserial.h:1147
const unsigned set_block_shift
Definition: bmconst.h:56
const unsigned sblock_flag_len16
16-bit len (8-bit by default)
Definition: bmserial.h:2408
const unsigned set_compression_max
Maximum supported compression level.
Definition: bmserial.h:59
BMFORCEINLINE unsigned long long bmi_blsi_u64(unsigned long long w)
Definition: bmutil.h:345
const unsigned char set_nb_sync_mark24
Definition: bmserial.h:1152
const unsigned char set_block_xor_gap_ref8
..... 8-bit
Definition: bmserial.h:1137
unsigned short short_t
Definition: bmconst.h:40
const unsigned sparse_max_l6
Definition: bmserial.h:1167
const unsigned char set_block_azero
All other blocks zero.
Definition: bmserial.h:1105
const unsigned bits_in_block
Definition: bmconst.h:114
const unsigned char set_block_16zero
Up to 65536 zero blocks.
Definition: bmserial.h:1101
const unsigned char set_block_bit
Plain bit block.
Definition: bmserial.h:1107
const unsigned char set_block_bitgap_bienc_v2
Interpolated bit-block as GAPs (v2 - reseved)
Definition: bmserial.h:1145
const unsigned char set_nb_bookmark24
jump ahead mark (24-bit)
Definition: bmserial.h:1148
const unsigned sblock_flag_min16
16-bit minv
Definition: bmserial.h:2404
bool is_const_set_operation(set_operation op) BMNOEXCEPT
Returns true if set operation is constant (bitcount)
Definition: bmfunc.h:1302
const unsigned char set_block_sgapbit
SGAP compressed bitblock.
Definition: bmserial.h:1108
Bit COUNT OR functor.
Definition: bmfunc.h:9192
Bit COUNT SUB AB functor.
Definition: bmfunc.h:9199
Bit SUB BA functor.
Definition: bmfunc.h:9205
Bit COUNT XOR functor.
Definition: bmfunc.h:9186
XOR match chain.
Definition: bmxor.h:290
BLOCK_IDX nb
Definition: bmxor.h:291
unsigned chain_size
Definition: bmxor.h:292
bm::id64_t xor_d64[64]
Definition: bmxor.h:294
unsigned ref_idx[64]
Definition: bmxor.h:293
bm::xor_complement_match match
Definition: bmxor.h:295
Structure to compute XOR gap-count profile by sub-block waves.
Definition: bmxor.h:230
unsigned short sb_bc[bm::block_waves]
BIT counts.
Definition: bmxor.h:233
unsigned short sb_gc[bm::block_waves]
GAP counts.
Definition: bmxor.h:232
size_t max_serialize_mem
estimated maximum memory for serialization
Definition: bmfunc.h:61
Statistical information about bitset's memory allocation details.
Definition: bm.h:125
static ByteOrder byte_order()
Definition: bmconst.h:486
XOR match pair.
Definition: bmxor.h:274
Bookmark state structure.
Definition: bmserial.h:389
unsigned bm_type_
0:32-bit, 1: 24-bit, 2: 16-bit
Definition: bmserial.h:408
bookmark_state(block_idx_type nb_range) BMNOEXCEPT
Definition: bmserial.h:390
size_t min_bytes_range_
minumal distance (bytes) between marks
Definition: bmserial.h:409
block_idx_type nb_
bookmark block idx
Definition: bmserial.h:406
unsigned char * ptr_
bookmark pointer
Definition: bmserial.h:405
block_idx_type nb_range_
target bookmark range in blocks
Definition: bmserial.h:407
XOR similarity model.
Definition: bmxor.h:791
Parameters for XOR similarity search.
Definition: bmxor.h:59