BitMagic-C++
bmsparsevec_serial.h
Go to the documentation of this file.
1#ifndef BMSPARSEVEC_SERIAL__H__INCLUDED__
2#define BMSPARSEVEC_SERIAL__H__INCLUDED__
3/*
4Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5
6Licensed under the Apache License, Version 2.0 (the "License");
7you may not use this file except in compliance with the License.
8You may obtain a copy of the License at
9
10 http://www.apache.org/licenses/LICENSE-2.0
11
12Unless required by applicable law or agreed to in writing, software
13distributed under the License is distributed on an "AS IS" BASIS,
14WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15See the License for the specific language governing permissions and
16limitations under the License.
17
18For more information please visit: http://bitmagic.io
19*/
20
21/*! \file bmsparsevec_serial.h
22 \brief Serialization for sparse_vector<>
23*/
24
25
26#ifndef BM__H__INCLUDED__
27// BitMagic utility headers do not include main "bm.h" declaration
28// #include "bm.h" or "bm64.h" explicitly
29# error missing include (bm.h or bm64.h)
30#endif
31
32#include "bmsparsevec.h"
33#include "bmserial.h"
34#include "bmbuffer.h"
35#include "bmdef.h"
36
37namespace bm
38{
39
40/** \defgroup svserial Sparse vector serialization
41 Sparse vector serialization
42 \ingroup svector
43 */
44
45
46/*!
47 \brief layout class for serialization buffer structure
48
49 Class keeps a memory block sized for the target sparse vector BLOB.
50 This class also provides acess to bit-plane memory, so it becomes possible
51 to use parallel storage methods to save bit-planes into
52 different storage shards.
53
54 \ingroup svserial
55*/
56template<class SV>
58{
59 typedef typename SV::value_type value_type;
63
65
67
68 /*!
69 \brief resize capacity
70 \param capacity - new capacity
71 \return new buffer or 0 if failed
72 */
73 unsigned char* reserve(size_t capacity)
74 {
75 if (capacity == 0)
76 {
77 freemem();
78 return 0;
79 }
80 buf_.reinit(capacity);
81 return buf_.data();
82 }
83
84 /// return current serialized size
85 size_t size() const BMNOEXCEPT { return buf_.size(); }
86
87 /// Set new serialized size
88 void resize(size_t ssize) { buf_.resize(ssize); }
89
90 /// return serialization buffer capacity
91 size_t capacity() const BMNOEXCEPT { return buf_.capacity(); }
92
93 /// free memory
94 void freemem() BMNOEXCEPT { buf_.release(); }
95
96 /// Set plane output pointer and size
97 void set_plane(unsigned i, unsigned char* ptr, size_t buf_size) BMNOEXCEPT
98 {
99 plane_ptrs_[i] = ptr;
100 plane_size_[i] = buf_size;
101 }
102
103 /// Get plane pointer
104 const unsigned char* get_plane(unsigned i) const BMNOEXCEPT
105 { return plane_ptrs_[i]; }
106
107 /// Return serialization buffer pointer
108 const unsigned char* buf() const BMNOEXCEPT { return buf_.buf(); }
109 /// Return serialization buffer pointer
110 const unsigned char* data() const BMNOEXCEPT { return buf_.buf(); }
111
112 /// Resize for the target number of plains / bit-slices
113 void resize_slices(unsigned new_slices_size)
114 {
115 plane_ptrs_.resize(new_slices_size);
116 plane_size_.resize(new_slices_size);
117 }
118
119private:
121 void operator=(const sparse_vector_serial_layout&);
122protected:
123 typedef bm::heap_vector<unsigned char*, allocator_type, true> ptr_vector_type;
124 typedef bm::heap_vector<size_t, allocator_type, true> sizet_vector_type;
125
126
127 buffer_type buf_; ///< serialization buffer
128 ptr_vector_type plane_ptrs_; ///< pointers on serialized bit-planes
129 sizet_vector_type plane_size_; ///< serialized plane size
130// unsigned char* plane_ptrs_[SV::sv_slices]; ///< pointers on serialized bit-planes
131// size_t plane_size_[SV::sv_slices]; ///< serialized plane size
132};
133
134// -------------------------------------------------------------------------
135
136/*!
137 \brief Serialize sparse vector into a memory buffer(s) structure
138
139 Serialization format:
140
141 | HEADER | BIT-VECTORS ... | REMAP_MATRIX
142
143 Header structure:
144 -----------------
145 BYTE+BYTE: Magic-signature 'BM' or 'BC' (c-compressed)
146 BYTE : Byte order ( 0 - Big Endian, 1 - Little Endian)
147 {
148 BYTE : Number of Bit-vector planes (total) (non-zero when < 255 planes)
149 |
150 BYTE: zero - flag of large plane matrix
151 INT64: Nnmber of bit-vector planes
152 }
153 INT64: Vector size
154 INT64: Offset of plane 0 from the header start (value 0 means plane is empty)
155 INT64: Offset of plane 1 from
156 ...
157 INT32: reserved
158
159Bit-vectors:
160------------
161 Based on current bit-vector serialization
162
163Remap Matrix:
164 SubHeader | Matrix BLOB
165
166 sub-header:
167 BYTE: 'R' (remapping) or 'N' (no remapping)
168 N - means no other info is saved on the stream
169 INT64: remap matrix size
170
171 \ingroup svector
172 \ingroup svserial
173*/
174template<typename SV>
176{
177public:
181 typedef typename SV::value_type value_type;
182 typedef typename SV::size_type size_type;
184 typedef typename alloc_type::allocator_pool_type allocator_pool_type;
185 typedef typename
187 typedef typename
189
190
191public:
193
194
195 /*! @name Compression settings */
196 ///@{
197
198 /**
199 Add skip-markers for faster range deserialization
200
201 @param enable - TRUE searilization will add bookmark codes
202 @param bm_interval - bookmark interval in (number of blocks)
203 (suggested between 4 and 512)
204 smaller interval means more bookmarks added to the skip list thus
205 more increasing the BLOB size
206 */
207 void set_bookmarks(bool enable, unsigned bm_interval = 256) BMNOEXCEPT
208 { bvs_.set_bookmarks(enable, bm_interval); }
209
210 /**
211 Enable XOR compression on vector serialization
212 @sa set_xor_ref
213 @sa disable_xor_compression
214 */
216 { set_xor_ref(true); }
217
218 /**
219 Disable XOR compression on serialization
220 */
222 { set_xor_ref((const bv_ref_vector_type*)0); }
223
224 /** Turn ON and OFF XOR compression of sparse vectors
225 Enables XOR reference compression for the sparse vector.
226 Default: disabled
227 Reference bit-vectors from the sparse vector itself
228 */
229 void set_xor_ref(bool is_enabled) BMNOEXCEPT;
230
231 /** Enable external XOR serialization via external reference vectors
232 (data frame ref. vector).
233 This method is useful when we serialize a group of related
234 sparse vectors which benefits from the XOR referencial compression
235
236 @param bv_ref_ptr - external reference vector
237 if NULL - resets the use of reference vector
238 */
239 void set_xor_ref(const bv_ref_vector_type* bv_ref_ptr) BMNOEXCEPT;
240
241 /**
242 Calculate XOR similarity model for ref_vector
243 refernece vector must be associated before
244 @sa set_ref_vectors, set_sim_model
245 @internal
246 */
248 const bv_ref_vector_type& ref_vect,
249 const bm::xor_sim_params& params);
250
251 /**
252 Attach serizalizer to a pre-computed similarity model
253 @param sim_model - pointer to external computed model
254 */
255 void set_sim_model(const xor_sim_model_type* sim_model) BMNOEXCEPT;
256
257 /**
258 Returns the XOR reference compression status (enabled/disabled)
259 */
260 bool is_xor_ref() const BMNOEXCEPT { return is_xor_ref_; }
261
262 ///@}
263
264 /*! @name Serialization */
265 ///@{
266
267 /*!
268 \brief Serialize sparse vector into a memory buffer(s) structure
269
270 \param sv - sparse vector to serialize
271 \param sv_layout - buffer structure to keep the result
272 as defined in bm::serialization_flags
273 */
274 void serialize(const SV& sv,
276
277 /** Get access to the underlying bit-vector serializer
278 This access can be used to fine tune compression settings
279 @sa bm::serializer::set_compression_level
280 */
282 { return bvs_; }
283
284 ///@}
285
286
287protected:
288 void build_xor_ref_vector(const SV& sv);
289
290 static
291 void build_plane_digest(bvector_type& digest_bv, const SV& sv);
292
293 typedef typename SV::remap_matrix_type remap_matrix_type;
294
295 /// serialize the remap matrix used for SV encoding
296 void encode_remap_matrix(bm::encoder& enc, const SV& sv);
297
298 typedef bm::heap_vector<unsigned, alloc_type, true> u32_vector_type;
301private:
303 sparse_vector_serializer& operator=(const sparse_vector_serializer&) = delete;
304
305protected:
307
308 bvector_type plane_digest_bv_; ///< bv.digest of bit-planes
309 buffer_type plane_digest_buf_; ///< serialization buf
311
313 // XOR compression member vars
319};
320
321/**
322 sparse vector de-serializer
323
324*/
325template<typename SV>
327{
328public:
332 typedef typename SV::value_type value_type;
333 typedef typename SV::size_type size_type;
336
337public:
340
341 /** Set external XOR reference vectors
342 (data frame referenece vectors)
343
344 @param bv_ref_ptr - external reference vector
345 if NULL - resets the use of reference
346 */
347 void set_xor_ref(bv_ref_vector_type* bv_ref_ptr);
348
349 /*!
350 Deserialize sparse vector
351
352 @param sv - [out] target sparse vector to populate
353 @param buf - input BLOB source memory pointer
354 @param clear_sv - if true clears the target vector
355
356 @sa deserialize_range
357 */
358 void deserialize(SV& sv,
359 const unsigned char* buf,
360 bool clear_sv = true);
361
362 /*!
363 Deserialize sparse vector for the range [from, to]
364
365 @param sv - [out] target sparse vector to populate
366 @param buf - input BLOB source memory pointer
367 @param from - start vector index for deserialization range
368 @param to - end vector index for deserialization range
369 @param clear_sv - if true clears the target vector
370
371 */
372 void deserialize_range(SV& sv, const unsigned char* buf,
373 size_type from, size_type to,
374 bool clear_sv = true);
375
376 /*!
377 Better use deserialize_range()
378 @sa deserialize_range
379 */
380 void deserialize(SV& sv, const unsigned char* buf,
381 size_type from, size_type to)
382 {
383 deserialize_range(sv, buf, from, to);
384 }
385
386
387
388 /*!
389 Deserialize sparse vector using address mask vector
390 Address mask defines (by set bits) which vector elements to be extracted
391 from the compressed BLOB
392
393 @param sv - [out] target sparse vector to populate
394 @param buf - source memory pointer
395 @param mask_bv - AND mask bit-vector (address gather vector)
396 */
397 void deserialize(SV& sv,
398 const unsigned char* buf,
399 const bvector_type& mask_bv)
400 { idx_range_set_ = false;
401 deserialize_sv(sv, buf, &mask_bv, true);
402 }
403
404
405 /*!
406 Load serialization descriptor, create vectors
407 but DO NOT perform full deserialization
408 @param sv - [out] target sparse vector to populate
409 @param buf - source memory pointer
410 */
411 void deserialize_structure(SV& sv,
412 const unsigned char* buf);
413
414
415protected:
417
418
419 /// Deserialize header/version and other common info
420 ///
421 /// @return number of bit-planes
422 ///
423 unsigned load_header(bm::decoder& dec, SV& sv, unsigned char& matr_s_ser);
424
425 void deserialize_sv(SV& sv, const unsigned char* buf,
426 const bvector_type* mask_bv,
427 bool clear_sv);
428
429
430 /// deserialize bit-vector planes
431 void deserialize_planes(SV& sv, unsigned planes,
432 const unsigned char* buf,
433 const bvector_type* mask_bv = 0);
434
435 /// load offset table
436 void load_planes_off_table(const unsigned char* buf, bm::decoder& dec, unsigned planes);
437
438 /// load NULL bit-plane (returns new planes count)
439 int load_null_plane(SV& sv,
440 int planes,
441 const unsigned char* buf,
442 const bvector_type* mask_bv);
443
444 /// load string remap dict
445 void load_remap(SV& sv, const unsigned char* remap_buf_ptr);
446
447 /// throw error on incorrect deserialization
448 static void raise_invalid_header();
449 /// throw error on incorrect deserialization
450 static void raise_invalid_64bit();
451 /// throw error on incorrect deserialization
452 static void raise_invalid_bitdepth();
453 /// throw error on incorrect deserialization
454 static void raise_invalid_format();
455 /// throw error on incorrect deserialization
456 static void raise_missing_remap_matrix();
457 /// setup deserializers
459
460 /// unset XOR compression vectors
462
463private:
465 sparse_vector_deserializer& operator=(const sparse_vector_deserializer&) = delete;
466
467 typedef bm::heap_vector<unsigned, alloc_type, true> rlen_vector_type;
468
469protected:
470 const unsigned char* remap_buf_ptr_;
474
475 bvector_type plane_digest_bv_; // digest of bit-planes
478
484 bm::heap_vector<size_t, alloc_type, true> off_vect_;
485 bm::heap_vector<unsigned, alloc_type, true> off32_vect_;
486 rlen_vector_type remap_rlen_vect_;
487
488 // XOR compression variables
489 bv_ref_vector_type bv_ref_; ///< reference vector
490 bv_ref_vector_type* bv_ref_ptr_; ///< external ref
491
492 // Range deserialization parameters
496};
497
498
499
500/*!
501 \brief Serialize sparse vector into a memory buffer(s) structure
502
503 \param sv - sparse vector to serialize
504 \param sv_layout - buffer structure to keep the result
505 \param temp_block - temporary buffer
506 (allocate with BM_DECLARE_TEMP_BLOCK(x) for speed)
507
508 \ingroup svserial
509
510 @sa serialization_flags
511 @sa sparse_vector_deserializer
512*/
513template<class SV>
515 const SV& sv,
517 bm::word_t* temp_block = 0)
518{
519 (void)temp_block;
521// sv_serializer.enable_xor_compression();
522 sv_serializer.serialize(sv, sv_layout);
523}
524
525// -------------------------------------------------------------------------
526
527/*!
528 \brief Deserialize sparse vector
529 \param sv - target sparse vector
530 \param buf - source memory buffer
531 \param temp_block - temporary block buffer to avoid re-allocations
532
533 \return 0 (error processing via std::logic_error)
534
535 \ingroup svserial
536 @sa sparse_vector_deserializer
537*/
538template<class SV>
540 const unsigned char* buf,
541 bm::word_t* temp_block=0)
542{
543 (void)temp_block;
545 sv_deserializer.deserialize(sv, buf);
546 return 0;
547}
548
549// -------------------------------------------------------------------------
550
551/**
552 Seriaizer for compressed collections
553*/
554template<class CBC>
556{
557public:
560 typedef typename CBC::buffer_type buffer_type;
561 typedef typename CBC::statistics statistics_type;
562 typedef typename CBC::address_resolver_type address_resolver_type;
563
564public:
565 void serialize(const CBC& buffer_coll,
566 buffer_type& buf,
567 bm::word_t* temp_block = 0);
568};
569
570/**
571 Deseriaizer for compressed collections
572*/
573template<class CBC>
575{
576public:
580 typedef typename CBC::buffer_type buffer_type;
581 typedef typename CBC::statistics statistics_type;
582 typedef typename CBC::address_resolver_type address_resolver_type;
583 typedef typename CBC::container_type container_type;
584
585public:
586 int deserialize(CBC& buffer_coll,
587 const unsigned char* buf,
588 bm::word_t* temp_block=0);
589};
590
591
592// -------------------------------------------------------------------------
593
594/**
595 \brief Serialize compressed collection into memory buffer
596
597Serialization format:
598
599
600<pre>
601 | MAGIC_HEADER | ADDRESS_BITVECTROR | LIST_OF_BUFFER_SIZES | BUFFER(s)
602
603 MAGIC_HEADER:
604 BYTE+BYTE: Magic-signature 'BM' or 'BC'
605 BYTE : Byte order ( 0 - Big Endian, 1 - Little Endian)
606
607 ADDRESS_BITVECTROR:
608 INT64: address bit-vector size
609 [memblock]: serialized address bit-vector
610
611 LIST_OF_BUFFER_SIZES:
612 INT64 - buffer sizes count
613 INT32 - buffer size 0
614 INT32 - buffer size 1
615 ...
616
617 BUFFERS:
618 [memblock]: block0
619 [memblock]: block1
620 ...
621
622</pre>
623*/
624
625template<class CBC>
627 buffer_type& buf,
628 bm::word_t* temp_block)
629{
631 buffer_coll.calc_stat(&st);
632
633 buf.resize(st.max_serialize_mem);
634
635 // ptr where bit-planes start
636 unsigned char* buf_ptr = buf.data();
637
638 bm::encoder enc(buf.data(), buf.capacity());
640 enc.put_8('B');
641 enc.put_8('C');
642 enc.put_8((unsigned char)bo);
643
644 unsigned char* mbuf1 = enc.get_pos(); // bookmark position
645 enc.put_64(0); // address vector size (reservation)
646
647 buf_ptr = enc.get_pos();
648
649 const address_resolver_type& addr_res = buffer_coll.resolver();
650 const bvector_type& bv = addr_res.get_bvector();
651 {
652 bm::serializer<bvector_type > bvs(temp_block);
653 bvs.gap_length_serialization(false);
654
655 size_t addr_bv_size = bvs.serialize(bv, buf_ptr, buf.size());
656 buf_ptr += addr_bv_size;
657
658 enc.set_pos(mbuf1); // rewind to bookmark
659 enc.put_64(addr_bv_size); // save the address vector size
660 }
661 enc.set_pos(buf_ptr); // restore stream position
662 size_t coll_size = buffer_coll.size();
663
664 enc.put_64(coll_size);
665
666 // pass 1 (save buffer sizes)
667 {
668 for (unsigned i = 0; i < buffer_coll.size(); ++i)
669 {
670 const buffer_type& cbuf = buffer_coll.get(i);
671 size_t sz = cbuf.size();
672 enc.put_64(sz);
673 } // for i
674 }
675 // pass 2 (save buffers)
676 {
677 for (unsigned i = 0; i < buffer_coll.size(); ++i)
678 {
679 const buffer_type& cbuf = buffer_coll.get(i);
680 size_t sz = cbuf.size();
681 enc.memcpy(cbuf.buf(), sz);
682 } // for i
683 }
684 buf.resize(enc.size());
685}
686
687// -------------------------------------------------------------------------
688template<class CBC>
690 CBC& buffer_coll,
691 const unsigned char* buf,
692 bm::word_t* temp_block)
693{
694 // TODO: implement correct processing of byte-order corect deserialization
695 // ByteOrder bo_current = globals<true>::byte_order();
696
697 bm::decoder dec(buf);
698 unsigned char h1 = dec.get_8();
699 unsigned char h2 = dec.get_8();
700
701 BM_ASSERT(h1 == 'B' && h2 == 'C');
702 if (h1 != 'B' && h2 != 'C') // no magic header? issue...
703 {
704 return -1;
705 }
706 //unsigned char bv_bo =
707 dec.get_8();
708
709 // -----------------------------------------
710 // restore address resolver
711 //
712 bm::id64_t addr_bv_size = dec.get_64();
713
714 const unsigned char* bv_buf_ptr = dec.get_pos();
715
716 address_resolver_type& addr_res = buffer_coll.resolver();
717 bvector_type& bv = addr_res.get_bvector();
718 bv.clear();
719
720 bm::deserialize(bv, bv_buf_ptr, temp_block);
721 addr_res.sync();
722
723 typename bvector_type::size_type addr_cnt = bv.count();
724 dec.seek((int)addr_bv_size);
725
726 // -----------------------------------------
727 // read buffer sizes
728 //
729 bm::id64_t coll_size = dec.get_64();
730 if (coll_size != addr_cnt)
731 {
732 return -2; // buffer size collection does not match address vector
733 }
734
735 typedef size_t vect_size_type;
736 bm::heap_vector<bm::id64_t, allocator_type, true> buf_size_vec;
737
738 buf_size_vec.resize(vect_size_type(coll_size));
739 {
740 for (unsigned i = 0; i < coll_size; ++i)
741 {
742 bm::id64_t sz = dec.get_64();
743 buf_size_vec[i] = sz;
744 } // for i
745 }
746
747 {
748 container_type& buf_vect = buffer_coll.container();
749 buf_vect.resize(vect_size_type(coll_size));
750 for (unsigned i = 0; i < coll_size; ++i)
751 {
752 bm::id64_t sz = buf_size_vec[i];
753 buffer_type& b = buf_vect.at(i);
754 b.resize(sz);
755 dec.memcpy(b.data(), size_t(sz));
756 } // for i
757 }
758 buffer_coll.sync();
759 return 0;
760}
761
762// -------------------------------------------------------------------------
763//
764// -------------------------------------------------------------------------
765
766template<typename SV>
768: bv_ref_ptr_(0)
769{
771 #ifdef BMXORCOMP
772 is_xor_ref_ = true;
773 #else
774 is_xor_ref_ = false;
775 #endif
776}
777
778// -------------------------------------------------------------------------
779
780template<typename SV>
782 const bv_ref_vector_type* bv_ref_ptr) BMNOEXCEPT
783{
784 bv_ref_ptr_ = bv_ref_ptr;
785 is_xor_ref_ = bool(bv_ref_ptr);
786 sim_model_ptr_ = 0;
787}
788
789// -------------------------------------------------------------------------
790
791template<typename SV>
793{
794 bv_ref_ptr_ = 0; // reset external ref.vector
795 is_xor_ref_ = is_enabled;
796}
797
798// -------------------------------------------------------------------------
799
800template<typename SV>
802 xor_sim_model_type& sim_model,
803 const bv_ref_vector_type& ref_vect,
804 const xor_sim_params& params)
805{
806 bvs_.compute_sim_model(sim_model, ref_vect, params);
807}
808
809// -------------------------------------------------------------------------
810
811template<typename SV>
813 const xor_sim_model_type* sim_model) BMNOEXCEPT
814{
815 sim_model_ptr_ = sim_model;
816}
817
818// -------------------------------------------------------------------------
819
820template<typename SV>
822{
823 //bv_ref_.reset();
824 bv_ref_.build(sv.get_bmatrix());
825}
826
827// -------------------------------------------------------------------------
828
829template<typename SV>
831 const SV& sv)
832{
833 const typename SV::remap_matrix_type* rm = sv.get_remap_matrix();
834 BM_ASSERT(rm);
835
836 const remap_matrix_type& rmatr = *rm;
837
838 size_t rows = rmatr.rows();
839 size_t cols = rmatr.cols();
840
841 BM_ASSERT(cols <= 256);
842 BM_ASSERT(rows <= ~0u);
843
844 // compute CSR capacity vector
845 remap_rlen_vect_.resize(0);
846 for (size_t r = 0; r < rows; ++r)
847 {
848 const unsigned char* BMRESTRICT remap_row = rmatr.row(r);
849 size_t cnt = bm::count_nz(remap_row, cols);
850 if (!cnt)
851 break;
852 remap_rlen_vect_.push_back(unsigned(cnt));
853 } // for r
854
855 rows = remap_rlen_vect_.size(); // effective rows in the remap table
856
857 size_t csr_size_max = rows * sizeof(bm::gap_word_t);
858 for (size_t r = 0; r < rows; ++r)
859 {
860 unsigned rl = remap_rlen_vect_[r];
861 csr_size_max += rl * 2;
862 } // for r
863
864 size_t remap_size = sv.remap_size();
865
866 if (remap_size < csr_size_max)
867 {
868 const unsigned char* matrix_buf = sv.get_remap_buffer();
869 BM_ASSERT(matrix_buf);
870 BM_ASSERT(remap_size);
871
872 enc.put_8('R');
873 enc.put_64(remap_size);
874 enc.memcpy(matrix_buf, size_t(remap_size));
875 }
876 else
877 {
878 enc.put_8('C'); // Compressed sparse row (CSR)
879 enc.put_32(unsigned(rows));
880 enc.put_16(bm::gap_word_t(cols)); // <= 255 chars
881
882 {
884 for (size_t r = 0; r < rows; ++r)
885 {
886 unsigned rl = remap_rlen_vect_[r];
887 bo.gamma(rl);
888 } // for r
889 }
890
891 for (size_t r = 0; r < rows; ++r)
892 {
893 const unsigned char* BMRESTRICT row = rmatr.row(r);
894 for (size_t j = 0; j < cols; ++j)
895 {
896 unsigned char v = row[j];
897 if (v)
898 {
899 enc.put_8((unsigned char)j);
900 enc.put_8(v);
901 }
902 } // for j
903 } // for r
904 }
905
906 enc.put_8('E'); // end of matrix (integrity check token)
907}
908
909// -------------------------------------------------------------------------
910
911template<typename SV>
913 const SV& sv)
914{
915 digest_bv.init();
916 digest_bv.clear(false);
917 unsigned planes = (unsigned)sv.get_bmatrix().rows();
918 for (unsigned i = 0; i < planes; ++i)
919 {
920 typename SV::bvector_type_const_ptr bv = sv.get_slice(i);
921 if (bv)
922 digest_bv.set_bit_no_check(i);
923 } // for i
924}
925
926// -------------------------------------------------------------------------
927
928template<typename SV>
931{
932 bvs_.allow_stat_reset(false); // stats accumulate mode for all bit-slices
933 bvs_.reset_compression_stats();
934
935 if (!sv.size()) // special case of an empty vector
936 {
937 unsigned char* buf = sv_layout.reserve(4);
938 buf[0]='B'; buf[1] = 'Z';
939 sv_layout.resize(2);
940 return;
941 }
942
943 build_plane_digest(plane_digest_bv_, sv);
944 bvs_.set_ref_vectors(0); // disable possible XOR compression for offs.bv
945 bvs_.serialize(plane_digest_bv_, plane_digest_buf_);
946
947 unsigned planes = (unsigned)sv.get_bmatrix().rows();
948 sv_layout.resize_slices(planes);
949
950 // ----------------------------------------------------
951 // memory pre-reservation
952 //
953 typename SV::statistics sv_stat;
954 sv.calc_stat(&sv_stat);
955 sv_stat.max_serialize_mem += plane_digest_buf_.size() + (8 * planes);
956 unsigned char* buf = sv_layout.reserve(sv_stat.max_serialize_mem);
957
958 // ----------------------------------------------------
959 //
960 bm::encoder enc(buf, sv_layout.capacity());
961
962 // header size in bytes
963 unsigned h_size = 1 + 1 + // "BM" or "BC" (magic header)
964 1 + // byte-order
965 1 + // number of bit-planes (for vector)
966 8 + // size (internal 64-bit)
967 8 + // offset to digest (64-bit)
968 4; // reserve
969 // for large plane matrixes
970 {
971 h_size += 1 + // version number
972 8; // number of planes (64-bit)
973 }
974
975 // ----------------------------------------------------
976 // Setup XOR reference compression
977 //
978 if (is_xor_ref())
979 {
980 if (bv_ref_ptr_) // use external reference
981 {
982 // ref vector and similarity model, both must(!) be set
983 BM_ASSERT(sim_model_ptr_);
984 bvs_.set_ref_vectors(bv_ref_ptr_);
985 bvs_.set_sim_model(sim_model_ptr_);
986 }
987 else
988 {
989 bm::xor_sim_params xs_params;
990 build_xor_ref_vector(sv);
991 bvs_.set_ref_vectors(&bv_ref_);
992 if (bvs_.compute_sim_model(sim_model_, bv_ref_, xs_params))
993 bvs_.set_sim_model(&sim_model_);
994 }
995 }
996
997 // ----------------------------------------------------
998 // Serialize all bvector planes
999 //
1000
1001 ::memset(buf, 0, h_size);
1002 unsigned char* buf_ptr = buf + h_size; // ptr where planes start (start+hdr)
1003
1004 for (unsigned i = 0; i < planes; ++i)
1005 {
1006 typename SV::bvector_type_const_ptr bv = sv.get_slice(i);
1007 if (!bv) // empty plane
1008 {
1009 sv_layout.set_plane(i, 0, 0);
1010 continue;
1011 }
1012 if (is_xor_ref())
1013 {
1014 unsigned idx;
1015 if (bv_ref_ptr_) // use external reference
1016 idx = (unsigned)bv_ref_ptr_->find_bv(bv);
1017 else
1018 idx = (unsigned)bv_ref_.find_bv(bv);
1019 BM_ASSERT(idx != bv_ref_.not_found());
1020 bvs_.set_curr_ref_idx(idx);
1021 }
1022 size_t buf_size = (size_t)
1023 bvs_.serialize(*bv, buf_ptr, sv_stat.max_serialize_mem);
1024
1025 sv_layout.set_plane(i, buf_ptr, buf_size);
1026 buf_ptr += buf_size;
1027 if (sv_stat.max_serialize_mem > buf_size)
1028 {
1029 sv_stat.max_serialize_mem -= buf_size;
1030 continue;
1031 }
1032 BM_ASSERT(0); // TODO: throw an exception here
1033 } // for i
1034
1035 bvs_.set_ref_vectors(0); // dis-engage XOR ref vector
1036
1037 // -----------------------------------------------------
1038 // serialize the re-map matrix
1039 //
1040 if (bm::conditional<SV::is_remap_support::value>::test()) // test remapping trait
1041 {
1042 bm::encoder enc_m(buf_ptr, sv_stat.max_serialize_mem);
1043 if (sv.is_remap())
1044 encode_remap_matrix(enc_m, sv);
1045 else
1046 enc_m.put_8('N');
1047 buf_ptr += enc_m.size(); // add encoded data size
1048 }
1049
1050 // ------------------------------------------------------
1051 // save the digest vector
1052 //
1053 size_t digest_offset = size_t(buf_ptr - buf); // digest position from the start
1054 ::memcpy(buf_ptr, plane_digest_buf_.buf(), plane_digest_buf_.size());
1055 buf_ptr += plane_digest_buf_.size();
1056 {
1057 bool use_64bit = false;
1058 plane_off_vect_.resize(0);
1059 for (unsigned i = 0; i < planes; ++i)
1060 {
1061 const unsigned char* p = sv_layout.get_plane(i);
1062 if (p)
1063 {
1064 size_t offset = size_t(p - buf);
1065 if (offset > bm::id_max32)
1066 {
1067 use_64bit = true;
1068 break;
1069 }
1070 plane_off_vect_.push_back(unsigned(offset)); // cast is not a bug
1071 }
1072 } // for i
1073 bm::encoder enc_o(buf_ptr, sv_stat.max_serialize_mem);
1074 if (use_64bit || (plane_off_vect_.size() < 4))
1075 {
1076 enc_o.put_8('6');
1077 // save the offset table as a list of 64-bit values
1078 //
1079 for (unsigned i = 0; i < planes; ++i)
1080 {
1081 const unsigned char* p = sv_layout.get_plane(i);
1082 if (p)
1083 {
1084 size_t offset = size_t(p - buf);
1085 enc_o.put_64(offset);
1086 }
1087 } // for
1088 }
1089 else // searialize 32-bit offset table using BIC
1090 {
1091 BM_ASSERT(plane_off_vect_.size() == plane_digest_bv_.count());
1092 unsigned min_v = plane_off_vect_[0];
1093 unsigned max_v = plane_off_vect_[plane_off_vect_.size()-1];
1094
1095 enc_o.put_8('3');
1096 enc_o.put_32(min_v);
1097 enc_o.put_32(max_v);
1098
1099 bm::bit_out<bm::encoder> bo(enc_o);
1100 bo.bic_encode_u32_cm(plane_off_vect_.data()+1,
1101 unsigned(plane_off_vect_.size()-2),
1102 min_v, max_v);
1103 }
1104 buf_ptr += enc_o.size();
1105 }
1106
1107
1108
1109 sv_layout.resize(size_t(buf_ptr - buf)); // set the true occupied size
1110
1111 // -----------------------------------------------------
1112 // save the header
1113 //
1115
1116 enc.put_8('B'); // magic header 'BM' - bit matrix 'BC' - bit compressed
1117 if (sv.is_compressed())
1118 enc.put_8('C');
1119 else
1120 enc.put_8('M');
1121
1122 enc.put_8((unsigned char)bo); // byte order
1123
1124 unsigned char matr_s_ser = 1;
1125#ifdef BM64ADDR
1126 matr_s_ser = 2;
1127#endif
1128
1129 enc.put_8(0); // number of planes == 0 (legacy magic number)
1130 enc.put_8(matr_s_ser); // matrix serialization version
1131 {
1132 bm::id64_t planes_code = planes | (1ull << 63);
1133 enc.put_64(planes_code); // number of rows in the bit-matrix
1134 }
1135 enc.put_64(sv.size_internal());
1136 enc.put_64(bm::id64_t(digest_offset));
1137}
1138
1139// -------------------------------------------------------------------------
1140//
1141// -------------------------------------------------------------------------
1142
1143template<typename SV>
1145 : remap_buf_ptr_(0), bv_ref_ptr_(0), idx_range_set_(false)
1146{
1147 temp_block_ = alloc_.alloc_bit_block();
1148 not_null_mask_bv_.set_allocator_pool(&pool_);
1149 rsc_mask_bv_.set_allocator_pool(&pool_);
1150}
1151
1152// -------------------------------------------------------------------------
1153
1154template<typename SV>
1155void
1157{
1158 bv_ref_ptr_ = bv_ref_ptr;
1159 if (!bv_ref_ptr_)
1160 clear_xor_compression();
1161}
1162
1163
1164// -------------------------------------------------------------------------
1165
1166template<typename SV>
1168{
1169 if (temp_block_)
1170 alloc_.free_bit_block(temp_block_);
1171}
1172
1173// -------------------------------------------------------------------------
1174
1175template<typename SV>
1177{
1178 op_deserial_.set_ref_vectors(0);
1179 deserial_.set_ref_vectors(0);
1180 bv_ref_.reset();
1181}
1182
1183// -------------------------------------------------------------------------
1184
1185template<typename SV>
1187{
1188 if (bv_ref_ptr_)
1189 {
1190 op_deserial_.set_ref_vectors(bv_ref_ptr_);
1191 deserial_.set_ref_vectors(bv_ref_ptr_);
1192 }
1193 else
1194 {
1195 op_deserial_.set_ref_vectors(&bv_ref_);
1196 deserial_.set_ref_vectors(&bv_ref_);
1197 }
1198}
1199
1200// -------------------------------------------------------------------------
1201
1202template<typename SV>
1204 const unsigned char* buf,
1205 bool clear_sv)
1206{
1207 idx_range_set_ = false;
1208 deserialize_sv(sv, buf, 0, clear_sv);
1209}
1210
1211// -------------------------------------------------------------------------
1212
1213template<typename SV>
1215 const unsigned char* buf)
1216{
1217 bm::decoder dec(buf); // TODO: implement correct processing of byte-order
1218
1219 unsigned char matr_s_ser = 0;
1220 unsigned planes = load_header(dec, sv, matr_s_ser);
1221 if (planes == 0)
1222 return;
1223
1224 // bm::id64_t sv_size = dec.get_64();
1225 load_planes_off_table(buf, dec, planes); // read the offset vector of bit-planes
1226
1227 for (unsigned i = 0; i < planes; ++i)
1228 {
1229 if (!off_vect_[i]) // empty vector
1230 continue;
1231
1232 bvector_type* bv = sv.get_create_slice(i);
1233 BM_ASSERT(bv); (void)bv;
1234
1235 } // for
1236}
1237
1238// -------------------------------------------------------------------------
1239
1240template<typename SV>
1242 const unsigned char* buf,
1243 size_type from,
1244 size_type to,
1245 bool clear_sv)
1246{
1247 if (clear_sv)
1248 sv.clear_all(true);
1249
1250 idx_range_set_ = true; idx_range_from_ = from; idx_range_to_ = to;
1251
1252 remap_buf_ptr_ = 0;
1253 bm::decoder dec(buf); // TODO: implement correct processing of byte-order
1254
1255 unsigned char matr_s_ser = 0;
1256 unsigned planes = load_header(dec, sv, matr_s_ser);
1257
1258 if (!sv_size_) // empty vector
1259 return;
1260
1261 sv.resize_internal(size_type(sv_size_));
1262 bv_ref_.reset();
1263
1264 load_planes_off_table(buf, dec, planes); // read the offset vector of bit-planes
1265
1266 setup_xor_compression();
1267
1268 sv.get_bmatrix().allocate_rows(planes);
1269
1270 // TODO: add range for not NULL plane
1271 planes = (unsigned)load_null_plane(sv, int(planes), buf, 0);
1272
1273 // check if mask needs to be relaculated using the NULL (index) vector
1275 {
1276 // recalculate planes range
1277 size_type sv_left, sv_right;
1278 bool range_valid = sv.resolve_range(from, to, &sv_left, &sv_right);
1279 if (!range_valid)
1280 {
1281 sv.clear();
1282 idx_range_set_ = false;
1283 return;
1284 }
1285 else
1286 {
1287 idx_range_set_ = true; idx_range_from_ = sv_left; idx_range_to_ = sv_right;
1288 }
1289 }
1290
1291 deserialize_planes(sv, planes, buf, 0);
1292
1293 clear_xor_compression();
1294
1295 // load the remap matrix
1296 //
1298 {
1299 if (matr_s_ser)
1300 load_remap(sv, remap_buf_ptr_);
1301 } // if remap traits
1302
1303 sv.sync(true); // force sync, recalculate RS index, remap tables, etc
1304
1305 remap_buf_ptr_ = 0;
1306
1307 idx_range_set_ = false;
1308}
1309
1310// -------------------------------------------------------------------------
1311
1312template<typename SV>
1314 const unsigned char* buf,
1315 const bvector_type* mask_bv,
1316 bool clear_sv)
1317{
1318 if (clear_sv)
1319 sv.clear_all(true);
1320
1321 remap_buf_ptr_ = 0;
1322 bm::decoder dec(buf); // TODO: implement correct processing of byte-order
1323
1324 unsigned char matr_s_ser = 0;
1325 unsigned planes = load_header(dec, sv, matr_s_ser);
1326 if (!sv_size_)
1327 return; // empty vector
1328
1329 sv.resize_internal(size_type(sv_size_));
1330 bv_ref_.reset();
1331
1332 load_planes_off_table(buf, dec, planes); // read the offset vector of bit-planes
1333
1334 setup_xor_compression();
1335
1336 sv.get_bmatrix().allocate_rows(planes);
1337 planes = (unsigned)load_null_plane(sv, int(planes), buf, mask_bv);
1338
1339
1340 // check if mask needs to be relaculated using the NULL (index) vector
1342 {
1343 if (mask_bv)
1344 {
1345 const bvector_type* bv_null = sv.get_null_bvector();
1346 BM_ASSERT(bv_null);
1347 rsc_mask_bv_.clear(true);
1348 not_null_mask_bv_.bit_and(*bv_null, *mask_bv, bvector_type::opt_compress);
1349 rsc_compressor_.compress(rsc_mask_bv_, *bv_null, not_null_mask_bv_);
1350 mask_bv = &rsc_mask_bv_;
1351
1352 // if it needs range recalculation
1353 if (idx_range_set_) // range setting is in effect
1354 {
1355 //bool rf =
1356 rsc_mask_bv_.find_range(idx_range_from_, idx_range_to_);
1357 }
1358 }
1359 }
1360
1361 deserialize_planes(sv, planes, buf, mask_bv);
1362
1363 // restore NULL slice index
1364#ifdef _MSC_VER
1365#pragma warning( push )
1366#pragma warning( disable : 4127)
1367#endif
1368 if (sv.max_vector_size == 1)
1369 {
1370 // NULL vector at: (sv.max_vector_size * sizeof(value_type) * 8 + 1)
1371 const bvector_type* bv_null = sv.get_slice(sv.sv_value_slices);
1372 if (bv_null)
1373 sv.mark_null_idx(sv.sv_value_slices); // last slice is NULL
1374 }
1375#ifdef _MSC_VER
1376#pragma warning( pop )
1377#endif
1378
1379
1380 clear_xor_compression();
1381
1382 // load the remap matrix
1383 //
1385 {
1386 if (matr_s_ser)
1387 load_remap(sv, remap_buf_ptr_);
1388 } // if remap traits
1389
1390 sv.sync(true); // force sync, recalculate RS index, remap tables, etc
1391 remap_buf_ptr_ = 0;
1392}
1393
1394// -------------------------------------------------------------------------
1395
1396template<typename SV>
1398 bm::decoder& dec, SV& sv, unsigned char& matr_s_ser)
1399{
1400 (void)sv;
1401 bm::id64_t planes_code = 0;
1402 unsigned char h1 = dec.get_8();
1403 unsigned char h2 = dec.get_8();
1404
1405 BM_ASSERT(h1 == 'B' && (h2 == 'M' || h2 == 'C' || h2 == 'Z'));
1406
1407 bool sig2_ok = (h2 == 'M' || h2 == 'C' || h2 == 'Z');
1408 if (h1 != 'B' || !sig2_ok) //&& (h2 != 'M' || h2 != 'C')) // no magic header?
1409 raise_invalid_header();
1410 unsigned planes = 0;
1411 if (h2 == 'Z') // empty serialization package
1412 {
1413 sv_size_ = 0;
1414 return planes;
1415 }
1416
1417 unsigned char bv_bo = dec.get_8(); (void) bv_bo;
1418 planes = dec.get_8();
1419 if (planes == 0) // bit-matrix
1420 {
1421 matr_s_ser = dec.get_8(); // matrix serialization version
1422 planes_code = dec.get_64();
1423 planes = (unsigned) planes_code; // number of rows in the bit-matrix
1424 }
1425 #ifdef BM64ADDR
1426 #else
1427 if (matr_s_ser == 2) // 64-bit matrix
1428 raise_invalid_64bit();
1429 #endif
1430
1431 if constexpr (SV::is_dynamic_splices::value == false)
1432 {
1433 unsigned sv_planes = sv.stored_slices();
1434 if (!planes || planes > sv_planes)
1435 raise_invalid_bitdepth();
1436 }
1437
1438 sv_size_ = dec.get_64();
1439
1440 digest_offset_ = 0;
1441 if (planes_code & (1ull << 63))
1442 {
1443 digest_offset_ = dec.get_64();
1444 }
1445
1446 return planes;
1447}
1448
1449// -------------------------------------------------------------------------
1450
1451template<typename SV>
1453 SV& sv,
1454 unsigned planes,
1455 const unsigned char* buf,
1456 const bvector_type* mask_bv)
1457{
1458 if (mask_bv && !idx_range_set_)
1459 idx_range_set_ = mask_bv->find_range(idx_range_from_, idx_range_to_);
1460
1461 // read-deserialize the planes based on offsets
1462 // backward order to bring the NULL vector first
1463 //
1464 for (int i = int(planes-1); i >= 0; --i)
1465 {
1466 size_t offset = off_vect_[unsigned(i)];
1467 if (!offset) // empty vector
1468 continue;
1469 const unsigned char* bv_buf_ptr = buf + offset; // seek to position
1470 bvector_type* bv = sv.get_create_slice(unsigned(i));
1471 BM_ASSERT(bv);
1472
1473 // add the vector into the XOR reference list
1474 if (!bv_ref_ptr_)
1475 bv_ref_.add(bv, unsigned(i));
1476 if (mask_bv) // gather mask set, use AND operation deserializer
1477 {
1478 typename bvector_type::mem_pool_guard mp_g_z(pool_, *bv);
1479
1481 && !remap_buf_ptr_) // last plane vector (special case)
1482 {
1483 size_t read_bytes =
1484 deserial_.deserialize(*bv, bv_buf_ptr, temp_block_);
1485 remap_buf_ptr_ = bv_buf_ptr + read_bytes;
1486 bv->bit_and(*mask_bv, bvector_type::opt_compress);
1487 continue;
1488 }
1489 if (idx_range_set_)
1490 deserial_.set_range(idx_range_from_, idx_range_to_);
1491
1492 deserial_.deserialize(*bv, bv_buf_ptr);
1493
1494 bv->bit_and(*mask_bv, bvector_type::opt_compress);
1495 }
1496 else
1497 {
1499 !remap_buf_ptr_)
1500 {
1501 size_t read_bytes =
1502 deserial_.deserialize(*bv, bv_buf_ptr, temp_block_);
1503 remap_buf_ptr_ = bv_buf_ptr + read_bytes;
1504 if (idx_range_set_)
1505 bv->keep_range(idx_range_from_, idx_range_to_);
1506 continue;
1507 }
1508 if (idx_range_set_)
1509 {
1510 deserial_.set_range(idx_range_from_, idx_range_to_);
1511 deserial_.deserialize(*bv, bv_buf_ptr);
1512 bv->keep_range(idx_range_from_, idx_range_to_);
1513 }
1514 else
1515 {
1516 //size_t read_bytes =
1517 deserial_.deserialize(*bv, bv_buf_ptr, temp_block_);
1518 }
1519 }
1520
1521 } // for i
1522
1523 deserial_.unset_range();
1524
1525}
1526
1527// -------------------------------------------------------------------------
1528
1529template<typename SV>
1531 int planes,
1532 const unsigned char* buf,
1533 const bvector_type* mask_bv)
1534{
1535 BM_ASSERT(planes > 0);
1536 if (!sv.is_nullable())
1537 return planes;
1538 int i = planes - 1;
1539 size_t offset = off_vect_[unsigned(i)];
1540 if (offset)
1541 {
1542 // TODO: improve serialization format to avoid non-range decode of
1543 // the NULL vector just to get to the offset of remap table
1544
1545 const unsigned char* bv_buf_ptr = buf + offset; // seek to position
1546 bvector_type* bv = sv.get_create_slice(unsigned(i));
1547
1548 if (!bv_ref_ptr_)
1549 bv_ref_.add(bv, unsigned(i));
1550
1552 {
1553 // load the whole not-NULL vector regardless of range
1554 // TODO: load [0, idx_range_to_]
1555 size_t read_bytes = deserial_.deserialize(*bv, bv_buf_ptr, temp_block_);
1556 remap_buf_ptr_ = bv_buf_ptr + read_bytes;
1557 }
1558 else // non-compressed SV
1559 {
1560 // NULL plane in string vector with substitute re-map
1561 //
1563 {
1564 BM_ASSERT(!remap_buf_ptr_);
1565 size_t read_bytes = deserial_.deserialize(*bv, bv_buf_ptr, temp_block_);
1566 remap_buf_ptr_ = bv_buf_ptr + read_bytes;
1567 if (idx_range_set_)
1568 bv->keep_range(idx_range_from_, idx_range_to_);
1569 }
1570 else
1571 if (idx_range_set_)
1572 {
1573 deserial_.set_range(idx_range_from_, idx_range_to_);
1574 deserial_.deserialize(*bv, bv_buf_ptr, temp_block_);
1575 bv->keep_range(idx_range_from_, idx_range_to_);
1576 deserial_.unset_range();
1577 }
1578 else
1579 {
1580 deserial_.deserialize(*bv, bv_buf_ptr, temp_block_);
1581 }
1582 if (mask_bv)
1583 bv->bit_and(*mask_bv, bvector_type::opt_compress);
1584 }
1585 }
1586 return planes-1;
1587}
1588
1589// -------------------------------------------------------------------------
1590
1591template<typename SV>
1593 const unsigned char* buf, bm::decoder& dec, unsigned planes)
1594{
1595 off_vect_.resize(planes);
1596 if (digest_offset_)
1597 {
1598 plane_digest_bv_.clear(false);
1599 const unsigned char* buf_ptr = buf + digest_offset_;
1600 size_t read_bytes =
1601 deserial_.deserialize(plane_digest_bv_, buf_ptr, temp_block_);
1602 buf_ptr += read_bytes;
1603
1604 bm::decoder dec_o(buf_ptr);
1605
1606 unsigned char dtype = dec_o.get_8();
1607 switch (dtype)
1608 {
1609 case '6':
1610 for (unsigned i = 0; i < planes; ++i)
1611 {
1612 size_t offset = 0;
1613 if (plane_digest_bv_.test(i))
1614 offset = (size_t) dec_o.get_64();
1615 off_vect_[i] = offset;
1616 } // for i
1617 break;
1618 case '3':
1619 {
1620 unsigned osize = (unsigned)plane_digest_bv_.count();
1621 BM_ASSERT(osize);
1622 off32_vect_.resize(osize);
1623
1624 unsigned min_v = dec_o.get_32();
1625 unsigned max_v = dec_o.get_32();
1626
1627 off32_vect_[0] = min_v;
1628 off32_vect_[osize-1] = max_v;
1629
1630 bm::bit_in<bm::decoder> bi(dec_o);
1631 bi.bic_decode_u32_cm(off32_vect_.data()+1, osize-2, min_v, max_v);
1632
1633 unsigned k = 0;
1634 for (unsigned i = 0; i < planes; ++i)
1635 {
1636 if (plane_digest_bv_.test(i))
1637 {
1638 off_vect_[i] = off32_vect_[k];
1639 ++k;
1640 }
1641 else
1642 off_vect_[i] = 0;
1643 }
1644 }
1645 break;
1646 default:
1647 // TODO: raise an exception
1648 BM_ASSERT(0);
1649 } // switch
1650 }
1651 else
1652 {
1653 for (unsigned i = 0; i < planes; ++i)
1654 {
1655 size_t offset = (size_t) dec.get_64();
1656 off_vect_[i] = offset;
1657 } // for i
1658 }
1659}
1660
1661// -------------------------------------------------------------------------
1662
1663template<typename SV>
1665 const unsigned char* remap_buf_ptr)
1666{
1667 if (!remap_buf_ptr)
1668 return;
1669
1670 bm::decoder dec_m(remap_buf_ptr);
1671
1672 unsigned char rh = dec_m.get_8();
1673 switch (rh)
1674 {
1675 case 'N':
1676 return;
1677 case 'R':
1678 {
1679 size_t remap_size = (size_t) dec_m.get_64();
1680 unsigned char* remap_buf = sv.init_remap_buffer();
1681 BM_ASSERT(remap_buf);
1682 size_t target_remap_size = sv.remap_size();
1683 if (!remap_size || !remap_buf || remap_size != target_remap_size)
1684 {
1685 raise_invalid_format();
1686 }
1687 dec_m.memcpy(remap_buf, remap_size);
1688 }
1689 break;
1690
1691 case 'C': // CSR remap
1692 {
1693 //sv.init_remap_buffer();
1694 typename SV::remap_matrix_type* rmatr = sv.get_remap_matrix();
1695 if (!rmatr)
1696 {
1697 raise_missing_remap_matrix();
1698 }
1699 size_t rows = (size_t) dec_m.get_32();
1700 size_t cols = dec_m.get_16();
1701 if (cols > 256)
1702 {
1703 raise_invalid_format();
1704 }
1705 rmatr->resize(rows, cols, false);
1706 if (rows)
1707 {
1708 rmatr->set_zero();
1709
1710 // read gamma encoded row lens
1711 remap_rlen_vect_.resize(0);
1712 {
1713 bm::bit_in<bm::decoder> bi(dec_m);
1714 for (size_t r = 0; r < rows; ++r)
1715 {
1716 unsigned rl = bi.gamma();
1717 remap_rlen_vect_.push_back(rl);
1718 } // for r
1719 }
1720
1721 for (size_t r = 0; r < rows; ++r)
1722 {
1723 unsigned char* BMRESTRICT row = rmatr->row(r);
1724 size_t cnt = remap_rlen_vect_[r];
1725 if (!cnt || cnt > 256)
1726 {
1727 raise_invalid_format(); // format corruption!
1728 }
1729 for (size_t j = 0; j < cnt; ++j)
1730 {
1731 unsigned idx = dec_m.get_8();
1732 unsigned char v = dec_m.get_8();
1733 row[idx] = v;
1734 } // for j
1735 } // for r
1736 }
1737 }
1738 break;
1739 default:
1740 // re-map matrix code error
1741 raise_invalid_format();
1742 } // switch
1743
1744 // finalize the remap matrix read
1745 //
1746 unsigned char end_tok = dec_m.get_8();
1747 if (end_tok != 'E')
1748 {
1749 raise_invalid_format();
1750 }
1751 sv.set_remap();
1752}
1753
1754// -------------------------------------------------------------------------
1755
1756template<typename SV>
1758{
1759#ifndef BM_NO_STL
1760 throw std::logic_error("BitMagic: Invalid serialization signature header");
1761#else
1762 BM_THROW(BM_ERR_SERIALFORMAT);
1763#endif
1764}
1765
1766// -------------------------------------------------------------------------
1767
1768template<typename SV>
1770{
1771#ifndef BM_NO_STL
1772 throw std::logic_error("BitMagic: Invalid serialization target (64-bit BLOB)");
1773#else
1774 BM_THROW(BM_ERR_SERIALFORMAT);
1775#endif
1776}
1777
1778// -------------------------------------------------------------------------
1779
1780template<typename SV>
1782{
1783#ifndef BM_NO_STL
1784 throw std::logic_error("BitMagic: Invalid serialization target (bit depth)");
1785#else
1786 BM_THROW(BM_ERR_SERIALFORMAT);
1787#endif
1788}
1789
1790// -------------------------------------------------------------------------
1791
1792template<typename SV>
1794{
1795#ifndef BM_NO_STL
1796 throw std::logic_error("BitMagic: Invalid serialization fromat (BLOB corruption?)");
1797#else
1798 BM_THROW(BM_ERR_SERIALFORMAT);
1799#endif
1800}
1801
1802// -------------------------------------------------------------------------
1803
1804template<typename SV>
1806{
1807#ifndef BM_NO_STL
1808 throw std::logic_error("BitMagic: Invalid serialization format (remap matrix)");
1809#else
1810 BM_THROW(BM_ERR_SERIALFORMAT);
1811#endif
1812}
1813
1814// -------------------------------------------------------------------------
1815
1816} // namespace bm
1817
1818#endif
Definitions(internal)
#define BMRESTRICT
Definition: bmdef.h:203
#define BMNOEXCEPT
Definition: bmdef.h:82
#define BM_ASSERT
Definition: bmdef.h:139
Serialization / compression of bvector<>. Set theoretical operations on compressed BLOBs.
Sparse constainer sparse_vector<> for integer types using bit-transposition transform.
Byte based reader for un-aligned bit streaming.
Definition: encoding.h:257
unsigned gamma() BMNOEXCEPT
decode unsigned value using Elias Gamma coding
Definition: encoding.h:1795
void bic_decode_u32_cm(bm::word_t *arr, unsigned sz, bm::word_t lo, bm::word_t hi) BMNOEXCEPT
Binary Interpolative array decode (32-bit)
Definition: encoding.h:1514
Byte based writer for un-aligned bit streaming.
Definition: encoding.h:183
void bic_encode_u32_cm(const bm::word_t *arr, unsigned sz, bm::word_t lo, bm::word_t hi) BMNOEXCEPT
Binary Interpolative encoding (array of 32-bit ints) cm - "center-minimal".
Definition: encoding.h:1294
void gamma(unsigned value) BMNOEXCEPT
Elias Gamma encode the specified value.
Definition: encoding.h:1187
List of reference bit-vectors with their true index associations.
Definition: bmxor.h:624
Bitvector Bit-vector container with runtime compression of bits.
Definition: bm.h:115
@ opt_compress
compress blocks when possible (GAP/prefix sum)
Definition: bm.h:137
allocator_type::allocator_pool_type allocator_pool_type
Definition: bm.h:118
bvector_size_type size_type
Definition: bm.h:121
Alloc allocator_type
Definition: bm.h:117
Deseriaizer for compressed collections.
bvector_type::allocator_type allocator_type
int deserialize(CBC &buffer_coll, const unsigned char *buf, bm::word_t *temp_block=0)
CBC::address_resolver_type address_resolver_type
Seriaizer for compressed collections.
void serialize(const CBC &buffer_coll, buffer_type &buf, bm::word_t *temp_block=0)
CBC::address_resolver_type address_resolver_type
const unsigned char * get_pos() const BMNOEXCEPT
Return current buffer pointer.
Definition: encoding.h:105
void seek(int delta) BMNOEXCEPT
change current position
Definition: encoding.h:99
unsigned char get_8() BMNOEXCEPT
Reads character from the decoding buffer.
Definition: encoding.h:93
void memcpy(unsigned char *dst, size_t count) BMNOEXCEPT
read bytes from the decode buffer
Definition: encoding.h:679
Class for decoding data from memory buffer.
Definition: encoding.h:126
bm::word_t get_32() BMNOEXCEPT
Reads 32-bit word from the decoding buffer.
Definition: encoding.h:751
bm::id64_t get_64() BMNOEXCEPT
Reads 64-bit word from the decoding buffer.
Definition: encoding.h:786
bm::short_t get_16() BMNOEXCEPT
Reads 16-bit word from the decoding buffer.
Definition: encoding.h:722
Deserializer for bit-vector.
Definition: bmserial.h:570
Memory encoding.
Definition: encoding.h:50
size_t size() const BMNOEXCEPT
Returns size of the current encoding stream.
Definition: encoding.h:529
unsigned char * get_pos() const BMNOEXCEPT
Get current memory stream position.
Definition: encoding.h:537
void put_64(bm::id64_t w) BMNOEXCEPT
Puts 64 bits word into encoding buffer.
Definition: encoding.h:606
void put_8(unsigned char c) BMNOEXCEPT
Puts one character into the encoding buffer.
Definition: encoding.h:434
void set_pos(unsigned char *buf_pos) BMNOEXCEPT
Set current memory stream position.
Definition: encoding.h:545
void memcpy(const unsigned char *src, size_t count) BMNOEXCEPT
copy bytes into target buffer or just rewind if src is NULL
Definition: encoding.h:516
void put_32(bm::word_t w) BMNOEXCEPT
Puts 32 bits word into encoding buffer.
Definition: encoding.h:571
void put_16(bm::short_t s) BMNOEXCEPT
Puts short word (16 bits) into the encoding buffer.
Definition: encoding.h:444
Deserializer, performs logical operations between bit-vector and serialized bit-vector.
Definition: bmserial.h:930
Algorithms for rank compression of bit-vector.
Definition: bmalgo.h:453
Bit-vector serialization class.
Definition: bmserial.h:76
void gap_length_serialization(bool value) BMNOEXCEPT
Set GAP length serialization (serializes GAP levels of the original vector)
Definition: bmserial.h:1275
byte_buffer< allocator_type > buffer
Definition: bmserial.h:85
void set_bookmarks(bool enable, unsigned bm_interval=256) BMNOEXCEPT
Add skip-markers to serialization BLOB for faster range decode at the expense of some BLOB size incre...
Definition: bmserial.h:1287
size_type serialize(const BV &bv, unsigned char *buf, size_t buf_size)
Bitvector serialization into memory block.
Definition: bmserial.h:2706
sparse vector de-serializer
void deserialize_planes(SV &sv, unsigned planes, const unsigned char *buf, const bvector_type *mask_bv=0)
deserialize bit-vector planes
void setup_xor_compression()
setup deserializers
bm::operation_deserializer< bvector_type > op_deserial_
void deserialize(SV &sv, const unsigned char *buf, bool clear_sv=true)
bm::serializer< bvector_type >::bv_ref_vector_type bv_ref_vector_type
void deserialize(SV &sv, const unsigned char *buf, const bvector_type &mask_bv)
bm::rank_compressor< bvector_type > rsc_compressor_
void deserialize_structure(SV &sv, const unsigned char *buf)
void deserialize_sv(SV &sv, const unsigned char *buf, const bvector_type *mask_bv, bool clear_sv)
static void raise_invalid_bitdepth()
throw error on incorrect deserialization
static void raise_invalid_header()
throw error on incorrect deserialization
bvector_type::allocator_type::allocator_pool_type allocator_pool_type
bvector_type::allocator_type alloc_type
static void raise_invalid_format()
throw error on incorrect deserialization
unsigned load_header(bm::decoder &dec, SV &sv, unsigned char &matr_s_ser)
Deserialize header/version and other common info.
void deserialize(SV &sv, const unsigned char *buf, size_type from, size_type to)
bm::heap_vector< size_t, alloc_type, true > off_vect_
bm::heap_vector< unsigned, alloc_type, true > off32_vect_
static void raise_invalid_64bit()
throw error on incorrect deserialization
static void raise_missing_remap_matrix()
throw error on incorrect deserialization
const bvector_type * bvector_type_const_ptr
void set_xor_ref(bv_ref_vector_type *bv_ref_ptr)
Set external XOR reference vectors (data frame referenece vectors)
bv_ref_vector_type * bv_ref_ptr_
external ref
bm::deserializer< bvector_type, bm::decoder > deserial_
const unsigned char * remap_buf_ptr_
bv_ref_vector_type bv_ref_
reference vector
int load_null_plane(SV &sv, int planes, const unsigned char *buf, const bvector_type *mask_bv)
load NULL bit-plane (returns new planes count)
void deserialize_range(SV &sv, const unsigned char *buf, size_type from, size_type to, bool clear_sv=true)
void load_planes_off_table(const unsigned char *buf, bm::decoder &dec, unsigned planes)
load offset table
void clear_xor_compression()
unset XOR compression vectors
void load_remap(SV &sv, const unsigned char *remap_buf_ptr)
load string remap dict
bvector_type plane_digest_bv_
bv.digest of bit-planes
void encode_remap_matrix(bm::encoder &enc, const SV &sv)
serialize the remap matrix used for SV encoding
serializer_type::buffer buffer_type
bm::serializer< bvector_type >::bv_ref_vector_type bv_ref_vector_type
static void build_plane_digest(bvector_type &digest_bv, const SV &sv)
void set_bookmarks(bool enable, unsigned bm_interval=256) BMNOEXCEPT
Add skip-markers for faster range deserialization.
const bv_ref_vector_type * bv_ref_ptr_
void set_xor_ref(bool is_enabled) BMNOEXCEPT
Turn ON and OFF XOR compression of sparse vectors Enables XOR reference compression for the sparse ve...
void build_xor_ref_vector(const SV &sv)
const bvector_type * bvector_type_const_ptr
bm::serializer< bvector_type > & get_bv_serializer() BMNOEXCEPT
Get access to the underlying bit-vector serializer This access can be used to fine tune compression s...
void compute_sim_model(xor_sim_model_type &sim_model, const bv_ref_vector_type &ref_vect, const bm::xor_sim_params &params)
Calculate XOR similarity model for ref_vector refernece vector must be associated before.
alloc_type::allocator_pool_type allocator_pool_type
buffer_type plane_digest_buf_
serialization buf
void set_sim_model(const xor_sim_model_type *sim_model) BMNOEXCEPT
Attach serizalizer to a pre-computed similarity model.
bm::serializer< bvector_type > serializer_type
SV::remap_matrix_type remap_matrix_type
void serialize(const SV &sv, sparse_vector_serial_layout< SV > &sv_layout)
Serialize sparse vector into a memory buffer(s) structure.
bool is_xor_ref() const BMNOEXCEPT
Returns the XOR reference compression status (enabled/disabled)
void disable_xor_compression() BMNOEXCEPT
Disable XOR compression on serialization.
void enable_xor_compression() BMNOEXCEPT
Enable XOR compression on vector serialization.
const xor_sim_model_type * sim_model_ptr_
bm::heap_vector< unsigned, alloc_type, true > u32_vector_type
bm::serializer< bvector_type > bvs_
bvector_type::allocator_type alloc_type
bm::serializer< bvector_type >::xor_sim_model_type xor_sim_model_type
bm::alloc_pool_guard< allocator_pool_type, bvector< Alloc > > mem_pool_guard
Definition: bm.h:790
size_t deserialize(BV &bv, const unsigned char *buf, bm::word_t *temp_block=0, const bm::bv_ref_vector< BV > *ref_vect=0)
Bitvector deserialization from a memory BLOB.
Definition: bmserial.h:3140
void sparse_vector_serialize(const SV &sv, sparse_vector_serial_layout< SV > &sv_layout, bm::word_t *temp_block=0)
Serialize sparse vector into a memory buffer(s) structure.
int sparse_vector_deserialize(SV &sv, const unsigned char *buf, bm::word_t *temp_block=0)
Deserialize sparse vector.
Definition: bm.h:78
unsigned int word_t
Definition: bmconst.h:39
SZ count_nz(const VT *arr, SZ arr_size) BMNOEXCEPT
Find count of non-zero elements in the array.
Definition: bmfunc.h:9839
const unsigned id_max32
Definition: bmconst.h:50
ByteOrder
Byte orders recognized by the library.
Definition: bmconst.h:451
unsigned long long int id64_t
Definition: bmconst.h:35
unsigned short gap_word_t
Definition: bmconst.h:78
ad-hoc conditional expressions
Definition: bmutil.h:113
static ByteOrder byte_order()
Definition: bmconst.h:486
layout class for serialization buffer structure
sizet_vector_type plane_size_
serialized plane size
serializer< bvector_type >::buffer buffer_type
void resize(size_t ssize)
Set new serialized size.
size_t capacity() const BMNOEXCEPT
return serialization buffer capacity
const unsigned char * get_plane(unsigned i) const BMNOEXCEPT
Get plane pointer.
unsigned char * reserve(size_t capacity)
resize capacity
size_t size() const BMNOEXCEPT
return current serialized size
void resize_slices(unsigned new_slices_size)
Resize for the target number of plains / bit-slices.
bm::heap_vector< unsigned char *, allocator_type, true > ptr_vector_type
const unsigned char * data() const BMNOEXCEPT
Return serialization buffer pointer.
ptr_vector_type plane_ptrs_
pointers on serialized bit-planes
buffer_type buf_
serialization buffer
void set_plane(unsigned i, unsigned char *ptr, size_t buf_size) BMNOEXCEPT
Set plane output pointer and size.
bvector_type::allocator_type allocator_type
bm::heap_vector< size_t, allocator_type, true > sizet_vector_type
void freemem() BMNOEXCEPT
free memory
const unsigned char * buf() const BMNOEXCEPT
Return serialization buffer pointer.
XOR similarity model.
Definition: bmxor.h:791
Parameters for XOR similarity search.
Definition: bmxor.h:59
bm::bvector bvector_type
Definition: xsample07a.cpp:94