BitMagic-C++
encoding.h
Go to the documentation of this file.
1#ifndef BMENCODING_H__INCLUDED__
2#define BMENCODING_H__INCLUDED__
3/*
4Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5
6Licensed under the Apache License, Version 2.0 (the "License");
7you may not use this file except in compliance with the License.
8You may obtain a copy of the License at
9
10 http://www.apache.org/licenses/LICENSE-2.0
11
12Unless required by applicable law or agreed to in writing, software
13distributed under the License is distributed on an "AS IS" BASIS,
14WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15See the License for the specific language governing permissions and
16limitations under the License.
17
18For more information please visit: http://bitmagic.io
19*/
20
21/*! \file encoding.h
22 \brief Encoding utilities for serialization (internal)
23*/
24
25
26#include <memory.h>
27#include "bmutil.h"
28
29
30#ifdef _MSC_VER
31#pragma warning (push)
32#pragma warning (disable : 4702)
33#endif
34
35namespace bm
36{
37
38
39// ----------------------------------------------------------------
40
41/*!
42 \brief Memory encoding.
43
44 Class for encoding data into memory.
45 Class handles aligment issues with the integer data types.
46
47 \ingroup gammacode
48*/
50{
51public:
52 typedef unsigned char* position_type;
53public:
54 encoder(unsigned char* buf, size_t size) BMNOEXCEPT;
55 void put_8(unsigned char c) BMNOEXCEPT;
57 void put_16(const bm::short_t* s, unsigned count) BMNOEXCEPT;
60 void put_32(const bm::word_t* w, unsigned count) BMNOEXCEPT;
64
65 void put_8_16_32(unsigned w,
66 unsigned char c8, unsigned char c16, unsigned char c32) BMNOEXCEPT;
67 void put_prefixed_array_32(unsigned char c,
68 const bm::word_t* w, unsigned count) BMNOEXCEPT;
69 void put_prefixed_array_16(unsigned char c,
70 const bm::short_t* s, unsigned count,
71 bool encode_count) BMNOEXCEPT;
72 void memcpy(const unsigned char* src, size_t count) BMNOEXCEPT;
73 size_t size() const BMNOEXCEPT;
74 unsigned char* get_pos() const BMNOEXCEPT;
75 void set_pos(unsigned char* buf_pos) BMNOEXCEPT;
76private:
77 unsigned char* buf_;
78 unsigned char* start_;
79 size_t size_;
80};
81
82// ----------------------------------------------------------------
83/**
84 Base class for all decoding functionality
85 \ingroup gammacode
86*/
88{
89public:
90 decoder_base(const unsigned char* buf) BMNOEXCEPT { buf_ = start_ = buf; }
91
92 /// Reads character from the decoding buffer.
93 unsigned char get_8() BMNOEXCEPT { return *buf_++; }
94
95 /// Returns size of the current decoding stream.
96 size_t size() const BMNOEXCEPT { return size_t(buf_ - start_); }
97
98 /// change current position
99 void seek(int delta) BMNOEXCEPT { buf_ += delta; }
100
101 /// read bytes from the decode buffer
102 void memcpy(unsigned char* dst, size_t count) BMNOEXCEPT;
103
104 /// Return current buffer pointer
105 const unsigned char* get_pos() const BMNOEXCEPT { return buf_; }
106
107 /// Set current buffer pointer
108 void set_pos(const unsigned char* pos) BMNOEXCEPT { buf_ = pos; }
109
110 /// Read h-64-bit
111 bm::id64_t get_h64() BMNOEXCEPT;
112
113protected:
114 const unsigned char* buf_;
115 const unsigned char* start_;
116};
117
118
119// ----------------------------------------------------------------
120/**
121 Class for decoding data from memory buffer.
122 Properly handles aligment issues with integer data types.
123 \ingroup gammacode
124*/
125class decoder : public decoder_base
126{
127public:
128 decoder(const unsigned char* buf) BMNOEXCEPT;
129 bm::short_t get_16() BMNOEXCEPT;
130 bm::word_t get_24() BMNOEXCEPT;
131 bm::word_t get_32() BMNOEXCEPT;
132 bm::id64_t get_48() BMNOEXCEPT;
133 bm::id64_t get_64() BMNOEXCEPT;
134 void get_32(bm::word_t* w, unsigned count) BMNOEXCEPT;
135 bool get_32_OR(bm::word_t* w, unsigned count) BMNOEXCEPT;
136 void get_32_AND(bm::word_t* w, unsigned count) BMNOEXCEPT;
137 void get_16(bm::short_t* s, unsigned count) BMNOEXCEPT;
138};
139
140// ----------------------------------------------------------------
141/**
142 Class for decoding data from memory buffer.
143 Properly handles aligment issues with integer data types.
144 Converts data to big endian architecture
145 (presumed it was encoded as little endian)
146 \ingroup gammacode
147*/
149
150
151// ----------------------------------------------------------------
152/**
153 Class for decoding data from memory buffer.
154 Properly handles aligment issues with integer data types.
155 Converts data to little endian architecture
156 (presumed it was encoded as big endian)
157 \ingroup gammacode
158*/
160{
161public:
162 decoder_little_endian(const unsigned char* buf);
163 bm::short_t get_16();
164 bm::word_t get_24();
165 bm::word_t get_32();
166 bm::id64_t get_48();
167 bm::id64_t get_64();
168 void get_32(bm::word_t* w, unsigned count);
169 bool get_32_OR(bm::word_t* w, unsigned count);
170 void get_32_AND(bm::word_t* w, unsigned count);
171 void get_16(bm::short_t* s, unsigned count);
172};
173
174
175/**
176 Byte based writer for un-aligned bit streaming
177
178 @ingroup gammacode
179 @sa encoder
180*/
181template<class TEncoder>
183{
184public:
185 bit_out(TEncoder& dest)
186 : dest_(dest), used_bits_(0), accum_(0)
187 {}
188
189 ~bit_out() { flush(); }
190
191 /// issue single bit into encode bit-stream
192 void put_bit(unsigned value) BMNOEXCEPT;
193
194 /// issue count bits out of value
195 void put_bits(unsigned value, unsigned count) BMNOEXCEPT;
196
197 /// issue 0 into output stream
198 void put_zero_bit() BMNOEXCEPT;
199
200 /// issue specified number of 0s
201 void put_zero_bits(unsigned count) BMNOEXCEPT;
202
203 /// Elias Gamma encode the specified value
204 void gamma(unsigned value) BMNOEXCEPT;
205
206 /// Binary Interpolative array decode
207 void bic_encode_u16(const bm::gap_word_t* arr, unsigned sz,
209 {
210 bic_encode_u16_cm(arr, sz, lo, hi);
211 }
212
213 /// Binary Interpolative encoding (array of 16-bit ints)
214 void bic_encode_u16_rg(const bm::gap_word_t* arr, unsigned sz,
217
218 /// Binary Interpolative encoding (array of 16-bit ints)
219 /// cm - "center-minimal"
220 void bic_encode_u16_cm(const bm::gap_word_t* arr, unsigned sz,
223
224 /// Binary Interpolative encoding (array of 32-bit ints)
225 /// cm - "center-minimal"
226 void bic_encode_u32_cm(const bm::word_t* arr, unsigned sz,
228
229 /// Flush the incomplete 32-bit accumulator word
230 void flush() BMNOEXCEPT { if (used_bits_) flush_accum(); }
231
232private:
233 void flush_accum() BMNOEXCEPT
234 {
235 dest_.put_32(accum_);
236 used_bits_ = accum_ = 0;
237 }
238private:
239 bit_out(const bit_out&);
240 bit_out& operator=(const bit_out&);
241
242private:
243 TEncoder& dest_; ///< Bit stream target
244 unsigned used_bits_; ///< Bits used in the accumulator
245 unsigned accum_; ///< write bit accumulator
246};
247
248
249/**
250 Byte based reader for un-aligned bit streaming
251
252 @ingroup gammacode
253 @sa encoder
254*/
255template<class TDecoder>
257{
258public:
260 : src_(decoder),
261 used_bits_(unsigned(sizeof(accum_) * 8)),
262 accum_(0)
263 {}
264
265 /// decode unsigned value using Elias Gamma coding
266 unsigned gamma() BMNOEXCEPT;
267
268 /// read number of bits out of the stream
269 unsigned get_bits(unsigned count) BMNOEXCEPT;
270
271 /// read 1 bit
272 unsigned get_bit() BMNOEXCEPT;
273
274 /// Binary Interpolative array decode
275 void bic_decode_u16(bm::gap_word_t* arr, unsigned sz,
277 {
278 if (sz)
279 bic_decode_u16_cm(arr, sz, lo, hi);
280 }
281
282 void bic_decode_u16_bitset(bm::word_t* block, unsigned sz,
284 {
285 if (sz)
286 bic_decode_u16_cm_bitset(block, sz, lo, hi);
287 }
288 void bic_decode_u16_dry(unsigned sz,
290 {
291 if (sz)
292 bic_decode_u16_cm_dry(sz, lo, hi);
293 }
294
295
296 /// Binary Interpolative array decode
297 void bic_decode_u16_rg(bm::gap_word_t* arr, unsigned sz,
299 /// Binary Interpolative array decode
300 void bic_decode_u16_cm(bm::gap_word_t* arr, unsigned sz,
302
303 /// Binary Interpolative array decode (32-bit)
304 void bic_decode_u32_cm(bm::word_t* arr, unsigned sz,
306
307
308 /// Binary Interpolative array decode into bitset (32-bit based)
309 void bic_decode_u16_rg_bitset(bm::word_t* block, unsigned sz,
311
312 /// Binary Interpolative array decode into /dev/null
313 void bic_decode_u16_rg_dry(unsigned sz,
315
316 /// Binary Interpolative array decode into bitset (32-bit based)
317 void bic_decode_u16_cm_bitset(bm::word_t* block, unsigned sz,
320
321 /// Binary Interpolative array decode into /dev/null
322 void bic_decode_u16_cm_dry(unsigned sz,
324
325private:
326 bit_in(const bit_in&);
327 bit_in& operator=(const bit_in&);
328private:
329 TDecoder& src_; ///< Source of bytes
330 unsigned used_bits_; ///< Bits used in the accumulator
331 unsigned accum_; ///< read bit accumulator
332};
333
334
335/**
336 Functor for Elias Gamma encoding
337 @ingroup gammacode
338*/
339template<typename T, typename TBitIO>
341{
342public:
343 gamma_encoder(TBitIO& bout) : bout_(bout)
344 {}
345
346 /** Encode word */
347 void operator()(T value) { bout_.gamma(value); }
348private:
350 gamma_encoder& operator=(const gamma_encoder&);
351private:
352 TBitIO& bout_;
353};
354
355
356/**
357 Elias Gamma decoder
358 @ingroup gammacode
359*/
360template<typename T, typename TBitIO>
362{
363public:
364 gamma_decoder(TBitIO& bin) : bin_(bin) {}
365
366 /**
367 Start encoding sequence
368 */
369 void start() {}
370
371 /**
372 Stop decoding sequence
373 */
374 void stop() {}
375
376 /**
377 Decode word
378 */
379 T operator()(void) { return (T)bin_.gamma(); }
380private:
382 gamma_decoder& operator=(const gamma_decoder&);
383private:
384 TBitIO& bin_;
385};
386
387
388// ----------------------------------------------------------------
389// Implementation details.
390// ----------------------------------------------------------------
391
392/*!
393 \fn encoder::encoder(unsigned char* buf, unsigned size)
394 \brief Construction.
395 \param buf - memory buffer pointer.
396 \param size - size of the buffer
397*/
398inline encoder::encoder(unsigned char* buf, size_t a_size) BMNOEXCEPT
399: buf_(buf), start_(buf)
400{
401 size_ = a_size;
402}
403/*!
404 \brief Encode 8-bit prefix + an array
405*/
406inline void encoder::put_prefixed_array_32(unsigned char c,
407 const bm::word_t* w,
408 unsigned count) BMNOEXCEPT
409{
410 put_8(c);
411 put_32(w, count);
412}
413
414/*!
415 \brief Encode 8-bit prefix + an array
416*/
417inline void encoder::put_prefixed_array_16(unsigned char c,
418 const bm::short_t* s,
419 unsigned count,
420 bool encode_count) BMNOEXCEPT
421{
422 put_8(c);
423 if (encode_count)
424 put_16((bm::short_t) count);
425 put_16(s, count);
426}
427
428
429/*!
430 \fn void encoder::put_8(unsigned char c)
431 \brief Puts one character into the encoding buffer.
432 \param c - character to encode
433*/
435{
436 *buf_++ = c;
437}
438
439/*!
440 \fn encoder::put_16(bm::short_t s)
441 \brief Puts short word (16 bits) into the encoding buffer.
442 \param s - short word to encode
443*/
445{
446#if (BM_UNALIGNED_ACCESS_OK == 1)
447 ::memcpy(buf_, &s, sizeof(bm::short_t)); // optimizer takes care of it
448 buf_ += sizeof(bm::short_t);
449#else
450 *buf_++ = (unsigned char) s;
451 s >>= 8;
452 *buf_++ = (unsigned char) s;
453#endif
454}
455
456/*!
457 \brief Method puts array of short words (16 bits) into the encoding buffer.
458*/
459inline void encoder::put_16(const bm::short_t* s, unsigned count) BMNOEXCEPT
460{
461 BM_ASSERT(count);
462#if (BM_UNALIGNED_ACCESS_OK == 1)
463 ::memcpy(buf_, s, sizeof(bm::short_t)*count);
464 buf_ += sizeof(bm::short_t) * count;
465#else
466 unsigned char* buf = buf_;
467 const bm::short_t* s_end = s + count;
468 do
469 {
470 bm::short_t w16 = *s++;
471 unsigned char a = (unsigned char) w16;
472 unsigned char b = (unsigned char) (w16 >> 8);
473
474 *buf++ = a;
475 *buf++ = b;
476
477 } while (s < s_end);
478
479 buf_ = (unsigned char*)buf;
480#endif
481}
482
483/*!
484 \brief but gat plus value based on its VBR evaluation
485*/
486inline
487void encoder::put_8_16_32(unsigned w,
488 unsigned char c8,
489 unsigned char c16,
490 unsigned char c32) BMNOEXCEPT
491{
492 if (w < 256)
493 {
494 put_8(c8);
495 put_8((unsigned char)w);
496 }
497 else
498 {
499 if (w < 65536)
500 {
501 put_8(c16);
502 put_16((unsigned short) w);
503 }
504 else
505 {
506 put_8(c32);
507 put_32(w);
508 }
509 }
510}
511
512/*!
513 \brief copy bytes into target buffer or just rewind if src is NULL
514*/
515inline
516void encoder::memcpy(const unsigned char* src, size_t count) BMNOEXCEPT
517{
518 BM_ASSERT((buf_ + count) < (start_ + size_));
519 if (src)
520 ::memcpy(buf_, src, count);
521 buf_ += count;
522}
523
524
525/*!
526 \fn unsigned encoder::size() const
527 \brief Returns size of the current encoding stream.
528*/
529inline size_t encoder::size() const BMNOEXCEPT
530{
531 return size_t(buf_ - start_);
532}
533
534/**
535 \brief Get current memory stream position
536*/
538{
539 return buf_;
540}
541
542/**
543 \brief Set current memory stream position
544*/
546{
547 buf_ = buf_pos;
548}
549
550/*!
551 \fn void encoder::put_24(bm::word_t w)
552 \brief Puts 24 bits word into encoding buffer.
553 \param w - word to encode.
554*/
556{
557 BM_ASSERT((w & ~(0xFFFFFFU)) == 0);
558
559 buf_[0] = (unsigned char)w;
560 buf_[1] = (unsigned char)(w >> 8);
561 buf_[2] = (unsigned char)(w >> 16);
562 buf_ += 3;
563}
564
565
566/*!
567 \fn void encoder::put_32(bm::word_t w)
568 \brief Puts 32 bits word into encoding buffer.
569 \param w - word to encode.
570*/
572{
573#if (BM_UNALIGNED_ACCESS_OK == 1)
574 ::memcpy(buf_, &w, sizeof(bm::word_t));
575 buf_ += sizeof(bm::word_t);
576#else
577 *buf_++ = (unsigned char) w;
578 *buf_++ = (unsigned char) (w >> 8);
579 *buf_++ = (unsigned char) (w >> 16);
580 *buf_++ = (unsigned char) (w >> 24);
581#endif
582}
583
584/*!
585 \fn void encoder::put_48(bm::id64_t w)
586 \brief Puts 48 bits word into encoding buffer.
587 \param w - word to encode.
588*/
590{
591 BM_ASSERT((w & ~(0xFFFFFFFFFFFFUL)) == 0);
592 *buf_++ = (unsigned char)w;
593 *buf_++ = (unsigned char)(w >> 8);
594 *buf_++ = (unsigned char)(w >> 16);
595 *buf_++ = (unsigned char)(w >> 24);
596 *buf_++ = (unsigned char)(w >> 32);
597 *buf_++ = (unsigned char)(w >> 40);
598}
599
600
601/*!
602 \fn void encoder::put_64(bm::id64_t w)
603 \brief Puts 64 bits word into encoding buffer.
604 \param w - word to encode.
605*/
607{
608#if (BM_UNALIGNED_ACCESS_OK == 1)
609 ::memcpy(buf_, &w, sizeof(bm::id64_t));
610 buf_ += sizeof(bm::id64_t);
611#else
612 *buf_++ = (unsigned char) w;
613 *buf_++ = (unsigned char) (w >> 8);
614 *buf_++ = (unsigned char) (w >> 16);
615 *buf_++ = (unsigned char) (w >> 24);
616 *buf_++ = (unsigned char) (w >> 32);
617 *buf_++ = (unsigned char) (w >> 40);
618 *buf_++ = (unsigned char) (w >> 48);
619 *buf_++ = (unsigned char) (w >> 56);
620#endif
621}
622
623/*!
624 \fn void encoder::put_h64(bm::id64_t w)
625 \brief Puts 64 bits word into encoding buffer with h-compression
626 \param w - word to encode.
627*/
629{
630 unsigned h_mask = bm::compute_h64_mask(w);
631 put_8((unsigned char) h_mask);
632 for (unsigned i = 0; w && (i < 8); ++i, w >>= 8)
633 {
634 if ((unsigned char) w)
635 put_8((unsigned char) w);
636 } // for i
637}
638
639
640
641/*!
642 \brief Encodes array of 32-bit words
643*/
644inline void encoder::put_32(const bm::word_t* w, unsigned count) BMNOEXCEPT
645{
646#if (BM_UNALIGNED_ACCESS_OK == 1)
647 // use memcpy() because compilers now understand it as an idiom and inline
648 ::memcpy(buf_, w, sizeof(bm::word_t) * count);
649 buf_ += sizeof(bm::word_t) * count;
650#else
651 unsigned char* buf = buf_;
652 const bm::word_t* w_end = w + count;
653 do
654 {
655 bm::word_t w32 = *w++;
656 unsigned char a = (unsigned char) w32;
657 unsigned char b = (unsigned char) (w32 >> 8);
658 unsigned char c = (unsigned char) (w32 >> 16);
659 unsigned char d = (unsigned char) (w32 >> 24);
660
661 *buf++ = a;
662 *buf++ = b;
663 *buf++ = c;
664 *buf++ = d;
665 } while (w < w_end);
666
667 buf_ = (unsigned char*)buf;
668#endif
669}
670
671
672// ---------------------------------------------------------------------
673
674
675/*!
676 Load bytes from the decode buffer
677*/
678inline
679void decoder_base::memcpy(unsigned char* dst, size_t count) BMNOEXCEPT
680{
681 if (dst)
682 ::memcpy(dst, buf_, count);
683 buf_ += count;
684}
685
686/*!
687 \fn bm::id64_t decoder_base::get_h64()
688 \brief Reads 64-bit word from the decoding buffer.
689*/
690inline
692{
693 bm::id64_t w = 0;
694 unsigned h_mask = (unsigned char) *buf_++;
695 for (unsigned i = 0; h_mask && (i < 8); ++i)
696 {
697 if (h_mask & (1u<<i))
698 {
699 h_mask &= ~(1u<<i);
700 unsigned char a = (unsigned char) *buf_++;
701 w |= (bm::id64_t(a) << (i*8));
702 }
703 } // for i
704 return w;
705}
706
707
708/*!
709 \fn decoder::decoder(const unsigned char* buf)
710 \brief Construction
711 \param buf - pointer to the decoding memory.
712*/
713inline decoder::decoder(const unsigned char* buf) BMNOEXCEPT
714: decoder_base(buf)
715{
716}
717
718/*!
719 \fn bm::short_t decoder::get_16()
720 \brief Reads 16-bit word from the decoding buffer.
721*/
723{
724#if (BM_UNALIGNED_ACCESS_OK == 1)
725 bm::short_t a;
726 ::memcpy(&a, buf_, sizeof(bm::short_t));
727#else
728 bm::short_t a = (bm::short_t)(buf_[0] + ((bm::short_t)buf_[1] << 8));
729#endif
730 buf_ += sizeof(a);
731 return a;
732}
733
734/*!
735 \fn bm::word_t decoder::get_24()
736 \brief Reads 32-bit word from the decoding buffer.
737*/
739{
740 bm::word_t a = buf_[0] + ((unsigned)buf_[1] << 8) +
741 ((unsigned)buf_[2] << 16);
742 buf_ += 3;
743 return a;
744}
745
746
747/*!
748 \fn bm::word_t decoder::get_32()
749 \brief Reads 32-bit word from the decoding buffer.
750*/
752{
753#if (BM_UNALIGNED_ACCESS_OK == 1)
754 bm::word_t a;
755 ::memcpy(&a, buf_, sizeof(bm::word_t));
756#else
757 bm::word_t a = buf_[0]+ ((unsigned)buf_[1] << 8) +
758 ((unsigned)buf_[2] << 16) + ((unsigned)buf_[3] << 24);
759#endif
760 buf_+=sizeof(a);
761 return a;
762}
763
764/*!
765 \fn bm::word_t decoder::get_48()
766 \brief Reads 64-bit word from the decoding buffer.
767*/
768inline
770{
771 bm::id64_t a = buf_[0] +
772 ((bm::id64_t)buf_[1] << 8) +
773 ((bm::id64_t)buf_[2] << 16) +
774 ((bm::id64_t)buf_[3] << 24) +
775 ((bm::id64_t)buf_[4] << 32) +
776 ((bm::id64_t)buf_[5] << 40);
777 buf_ += 6;
778 return a;
779}
780
781/*!
782 \fn bm::id64_t decoder::get_64()
783 \brief Reads 64-bit word from the decoding buffer.
784*/
785inline
787{
788#if (BM_UNALIGNED_ACCESS_OK == 1)
789 bm::id64_t a;
790 ::memcpy(&a, buf_, sizeof(bm::id64_t));
791#else
792 bm::id64_t a = buf_[0]+
793 ((bm::id64_t)buf_[1] << 8) +
794 ((bm::id64_t)buf_[2] << 16) +
795 ((bm::id64_t)buf_[3] << 24) +
796 ((bm::id64_t)buf_[4] << 32) +
797 ((bm::id64_t)buf_[5] << 40) +
798 ((bm::id64_t)buf_[6] << 48) +
799 ((bm::id64_t)buf_[7] << 56);
800#endif
801 buf_ += sizeof(a);
802 return a;
803}
804
805
806/*!
807 \fn void decoder::get_32(bm::word_t* w, unsigned count)
808 \brief Reads block of 32-bit words from the decoding buffer.
809 \param w - pointer on memory block to read into.
810 \param count - size of memory block in words.
811*/
812inline void decoder::get_32(bm::word_t* w, unsigned count) BMNOEXCEPT
813{
814 if (!w)
815 {
816 seek(int(count * sizeof(bm::word_t)));
817 return;
818 }
819#if (BM_UNALIGNED_ACCESS_OK == 1)
820 ::memcpy(w, buf_, count * sizeof(bm::word_t));
821 seek(int(count * sizeof(bm::word_t)));
822 return;
823#else
824 const unsigned char* buf = buf_;
825 const bm::word_t* w_end = w + count;
826 do
827 {
828 bm::word_t a = buf[0]+ ((unsigned)buf[1] << 8) +
829 ((unsigned)buf[2] << 16) + ((unsigned)buf[3] << 24);
830 *w++ = a;
831 buf += sizeof(a);
832 } while (w < w_end);
833 buf_ = (unsigned char*)buf;
834#endif
835}
836
837/*!
838 \brief Reads block of 32-bit words from the decoding buffer and ORs
839 to the destination
840 \param w - pointer on memory block to read into
841 \param count - should match bm::set_block_size
842*/
843inline
845{
846 if (!w)
847 {
848 seek(int(count * sizeof(bm::word_t)));
849 return false;
850 }
851#if defined(BMAVX2OPT)
852 __m256i* buf_start = (__m256i*)buf_;
853 seek(int(count * sizeof(bm::word_t)));
854 __m256i* buf_end = (__m256i*)buf_;
855
856 return bm::avx2_or_arr_unal((__m256i*)w, buf_start, buf_end);
857#elif defined(BMSSE42OPT) || defined(BMSSE2OPT)
858 __m128i* buf_start = (__m128i*)buf_;
859 seek(int(count * sizeof(bm::word_t)));
860 __m128i* buf_end = (__m128i*)buf_;
861
862 return bm::sse2_or_arr_unal((__m128i*)w, buf_start, buf_end);
863#else
864 bm::word_t acc = 0;
865 const bm::word_t not_acc = acc = ~~acc;
866
867 for (unsigned i = 0; i < count; i+=4)
868 {
869 acc &= (w[i+0] |= get_32());
870 acc &= (w[i+1] |= get_32());
871 acc &= (w[i+2] |= get_32());
872 acc &= (w[i+3] |= get_32());
873 }
874 return acc == not_acc;
875#endif
876}
877
878/*!
879 \brief Reads block of 32-bit words from the decoding buffer and ANDs
880 to the destination
881 \param w - pointer on memory block to read into
882 \param count - should match bm::set_block_size
883*/
884inline
886{
887 if (!w)
888 {
889 seek(int(count * sizeof(bm::word_t)));
890 return;
891 }
892#if defined(BMAVX2OPT)
893 __m256i* buf_start = (__m256i*)buf_;
894 seek(int(count * sizeof(bm::word_t)));
895 __m256i* buf_end = (__m256i*)buf_;
896
897 bm::avx2_and_arr_unal((__m256i*)w, buf_start, buf_end);
898#elif defined(BMSSE42OPT) || defined(BMSSE2OPT)
899 __m128i* buf_start = (__m128i*)buf_;
900 seek(int(count * sizeof(bm::word_t)));
901 __m128i* buf_end = (__m128i*)buf_;
902
903 bm::sse2_and_arr_unal((__m128i*)w, buf_start, buf_end);
904#else
905 for (unsigned i = 0; i < count; i+=4)
906 {
907 w[i+0] &= get_32();
908 w[i+1] &= get_32();
909 w[i+2] &= get_32();
910 w[i+3] &= get_32();
911 }
912#endif
913}
914
915
916
917/*!
918 \fn void decoder::get_16(bm::short_t* s, unsigned count)
919 \brief Reads block of 32-bit words from the decoding buffer.
920 \param s - pointer on memory block to read into.
921 \param count - size of memory block in words.
922*/
923inline void decoder::get_16(bm::short_t* s, unsigned count) BMNOEXCEPT
924{
925 BM_ASSERT(count);
926 if (!s)
927 {
928 seek(int(count * sizeof(bm::short_t)));
929 return;
930 }
931#if (BM_UNALIGNED_ACCESS_OK == 1)
932 ::memcpy(s, buf_, sizeof(bm::short_t) * count);
933 buf_ += sizeof(bm::short_t) * count;
934#else
935 const unsigned char* buf = buf_;
936 const bm::short_t* s_end = s + count;
937 do
938 {
939 bm::short_t a = (bm::short_t)(buf[0] + ((bm::short_t)buf[1] << 8));
940 *s++ = a;
941 buf += sizeof(a);
942 } while (s < s_end);
943 buf_ = (unsigned char*)buf;
944#endif
945
946}
947
948
949
950// ---------------------------------------------------------------------
951
952inline decoder_little_endian::decoder_little_endian(const unsigned char* buf)
953: decoder_base(buf)
954{
955}
956
957inline
959{
962 bm::short_t a = bm::short_t((v1 << 8) + v2);
963 buf_ += sizeof(a);
964 return a;
965}
966
968{
969 // TODO: validate if this is a correct for cross endian opts
970 bm::word_t a = buf_[0] + ((unsigned)buf_[1] << 8) +
971 ((unsigned)buf_[2] << 16);
972 buf_ += 3;
973 return a;
974}
975
976inline
978{
979 bm::word_t a = ((unsigned)buf_[0] << 24)+ ((unsigned)buf_[1] << 16) +
980 ((unsigned)buf_[2] << 8) + ((unsigned)buf_[3]);
981 buf_+=sizeof(a);
982 return a;
983}
984
985inline
987{
988 bm::id64_t a = buf_[0] +
989 ((bm::id64_t)buf_[1] << 8) +
990 ((bm::id64_t)buf_[2] << 16) +
991 ((bm::id64_t)buf_[3] << 24) +
992 ((bm::id64_t)buf_[4] << 32) +
993 ((bm::id64_t)buf_[5] << 40);
994 buf_ += 6;
995 return a;
996}
997
998inline
1000{
1001 bm::id64_t a = buf_[0]+
1002 ((bm::id64_t)buf_[1] << 56) +
1003 ((bm::id64_t)buf_[2] << 48) +
1004 ((bm::id64_t)buf_[3] << 40) +
1005 ((bm::id64_t)buf_[4] << 32) +
1006 ((bm::id64_t)buf_[5] << 24) +
1007 ((bm::id64_t)buf_[6] << 16) +
1008 ((bm::id64_t)buf_[7] << 8);
1009 buf_+=sizeof(a);
1010 return a;
1011}
1012
1013inline
1015{
1016 if (!w)
1017 {
1018 seek(int(count * sizeof(bm::word_t)));
1019 return;
1020 }
1021
1022 const unsigned char* buf = buf_;
1023 const bm::word_t* w_end = w + count;
1024 do
1025 {
1026 bm::word_t a = ((unsigned)buf[0] << 24)+ ((unsigned)buf[1] << 16) +
1027 ((unsigned)buf[2] << 8) + ((unsigned)buf[3]);
1028 *w++ = a;
1029 buf += sizeof(a);
1030 } while (w < w_end);
1031 buf_ = (unsigned char*)buf;
1032}
1033
1034inline
1036{
1037 if (!w)
1038 {
1039 seek(int(count * sizeof(bm::word_t)));
1040 return false;
1041 }
1042
1043 bm::word_t acc = 0;
1044 const bm::word_t not_acc = acc = ~~acc;
1045
1046 for (unsigned i = 0; i < count; i+=4)
1047 {
1048 acc &= (w[i+0] |= get_32());
1049 acc &= (w[i+1] |= get_32());
1050 acc &= (w[i+2] |= get_32());
1051 acc &= (w[i+3] |= get_32());
1052 }
1053 return acc == not_acc;
1054}
1055
1056inline
1058{
1059 for (unsigned i = 0; i < count; i+=4)
1060 {
1061 w[i+0] &= get_32();
1062 w[i+1] &= get_32();
1063 w[i+2] &= get_32();
1064 w[i+3] &= get_32();
1065 }
1066}
1067
1068
1069inline
1071{
1072 if (!s)
1073 {
1074 seek(int(count * sizeof(bm::short_t)));
1075 return;
1076 }
1077
1078 const unsigned char* buf = buf_;
1079 const bm::short_t* s_end = s + count;
1080 do
1081 {
1082 bm::short_t v1 = bm::short_t(buf_[0]);
1083 bm::short_t v2 = bm::short_t(buf_[1]);
1084 bm::short_t a = bm::short_t((v1 << 8) + v2);
1085 *s++ = a;
1086 buf += sizeof(a);
1087 } while (s < s_end);
1088 buf_ = (unsigned char*)buf;
1089}
1090
1091// ----------------------------------------------------------------------
1092//
1093
1094template<typename TEncoder>
1096{
1097 BM_ASSERT(value <= 1);
1098 accum_ |= (value << used_bits_);
1099 if (++used_bits_ == (sizeof(accum_) * 8))
1100 flush_accum();
1101}
1102
1103// ----------------------------------------------------------------------
1104
1105template<typename TEncoder>
1106void bit_out<TEncoder>::put_bits(unsigned value, unsigned count) BMNOEXCEPT
1107{
1108 unsigned used = used_bits_;
1109 unsigned acc = accum_;
1110
1111 {
1112 unsigned mask = ~0u;
1113 mask >>= (sizeof(accum_) * 8) - count;
1114 value &= mask;
1115 }
1116 for (;count;)
1117 {
1118 unsigned free_bits = unsigned(sizeof(accum_) * 8) - used;
1119 BM_ASSERT(free_bits);
1120 acc |= value << used;
1121
1122 if (count <= free_bits)
1123 {
1124 used += count;
1125 break;
1126 }
1127 else
1128 {
1129 value >>= free_bits;
1130 count -= free_bits;
1131 dest_.put_32(acc);
1132 acc = used = 0;
1133 continue;
1134 }
1135 }
1136 if (used == (sizeof(accum_) * 8))
1137 {
1138 dest_.put_32(acc);
1139 acc = used = 0;
1140 }
1141 used_bits_ = used;
1142 accum_ = acc;
1143}
1144
1145// ----------------------------------------------------------------------
1146
1147template<typename TEncoder>
1149{
1150 if (++used_bits_ == (sizeof(accum_) * 8))
1151 flush_accum();
1152}
1153
1154// ----------------------------------------------------------------------
1155
1156template<typename TEncoder>
1158{
1159 unsigned used = used_bits_;
1160 unsigned free_bits = (sizeof(accum_) * 8) - used;
1161 if (count >= free_bits)
1162 {
1163 flush_accum();
1164 count -= free_bits;
1165 used = 0;
1166
1167 for ( ;count >= sizeof(accum_) * 8; count -= sizeof(accum_) * 8)
1168 {
1169 dest_.put_32(0);
1170 }
1171 used += count;
1172 }
1173 else
1174 {
1175 used += count;
1176 }
1177 accum_ |= (1u << used);
1178 if (++used == (sizeof(accum_) * 8))
1179 flush_accum();
1180 else
1181 used_bits_ = used;
1182}
1183
1184// ----------------------------------------------------------------------
1185
1186template<typename TEncoder>
1188{
1189 BM_ASSERT(value);
1190
1191 unsigned logv = bm::bit_scan_reverse32(value);
1192
1193 // Put zeroes + 1 bit
1194
1195 unsigned used = used_bits_;
1196 unsigned acc = accum_;
1197 const unsigned acc_bits = (sizeof(acc) * 8);
1198 unsigned free_bits = acc_bits - used;
1199
1200 {
1201 unsigned count = logv;
1202 if (count >= free_bits)
1203 {
1204 dest_.put_32(acc);
1205 acc = used ^= used;
1206 count -= free_bits;
1207
1208 for ( ;count >= acc_bits; count -= acc_bits)
1209 {
1210 dest_.put_32(0);
1211 }
1212 used += count;
1213 }
1214 else
1215 {
1216 used += count;
1217 }
1218 acc |= (1 << used);
1219 if (++used == acc_bits)
1220 {
1221 dest_.put_32(acc);
1222 acc = used ^= used;
1223 }
1224 }
1225
1226 // Put the value bits
1227 //
1228 {
1229 unsigned mask = (~0u);
1230 mask >>= acc_bits - logv;
1231 value &= mask;
1232 }
1233 for (;logv;)
1234 {
1235 acc |= value << used;
1236 free_bits = acc_bits - used;
1237 if (logv <= free_bits)
1238 {
1239 used += logv;
1240 break;
1241 }
1242 else
1243 {
1244 value >>= free_bits;
1245 logv -= free_bits;
1246 dest_.put_32(acc);
1247 acc = used ^= used;
1248 continue;
1249 }
1250 } // for
1251
1252 used_bits_ = used;
1253 accum_ = acc;
1254}
1255
1256// ----------------------------------------------------------------------
1257
1258template<typename TEncoder>
1260 const bm::gap_word_t* arr,
1261 unsigned sz,
1263{
1264 for (;sz;)
1265 {
1266 BM_ASSERT(lo <= hi);
1267 unsigned mid_idx = sz >> 1;
1268 bm::gap_word_t val = arr[mid_idx];
1269
1270 // write the interpolated value
1271 // write(x, r) where x=(arr[mid] - lo - mid) r=(hi - lo - sz + 1);
1272 {
1273 unsigned r = hi - lo - sz + 1;
1274 if (r)
1275 {
1276 unsigned value = val - lo - mid_idx;
1277 unsigned logv = bm::bit_scan_reverse32(r);
1278 put_bits(value, logv+1);
1279 }
1280 }
1281
1282 bic_encode_u16_rg(arr, mid_idx, lo, gap_word_t(val-1));
1283 // tail recursion
1284 // bic_encode_u16(arr + mid_idx + 1, sz - mid_idx - 1, gap_word_t(val+1), hi);
1285 arr += mid_idx + 1;
1286 sz -= mid_idx + 1;
1287 lo = gap_word_t(val + 1);
1288 } // for sz
1289}
1290
1291// ----------------------------------------------------------------------
1292
1293template<typename TEncoder>
1295 unsigned sz,
1296 bm::word_t lo,
1298{
1299 for (;sz;)
1300 {
1301 BM_ASSERT(lo <= hi);
1302 unsigned mid_idx = sz >> 1;
1303 bm::word_t val = arr[mid_idx];
1304
1305 // write the interpolated value
1306 // write(x, r) where x=(arr[mid] - lo - mid) r=(hi - lo - sz + 1);
1307 {
1308 unsigned r = hi - lo - sz + 1;
1309 if (r)
1310 {
1311 unsigned value = val - lo - mid_idx;
1312
1313 unsigned n = r + 1;
1314 unsigned logv = bm::bit_scan_reverse32(n);
1315 unsigned c = (unsigned)(1ull << (logv + 1)) - n;
1316 int64_t half_c = c >> 1; // c / 2;
1317 int64_t half_r = r >> 1; // r / 2;
1318 int64_t lo1 = half_r - half_c;
1319 int64_t hi1 = half_r + half_c + 1;
1320 lo1 -= (n & 1);
1321 logv += (value <= lo1 || value >= hi1);
1322
1323 put_bits(value, logv);
1324 }
1325 }
1326
1327 bic_encode_u32_cm(arr, mid_idx, lo, val-1);
1328 // tail recursive call:
1329 // bic_encode_u32_cm(arr + mid_idx + 1, sz - mid_idx - 1, val+1, hi);
1330 arr += mid_idx + 1;
1331 sz -= mid_idx + 1;
1332 lo = val + 1;
1333 } // for sz
1334}
1335
1336// ----------------------------------------------------------------------
1337
1338#if 0
1339/**
1340 Shuffle structure for BIC
1341 @internal
1342*/
1343template<unsigned N>
1344struct bic_encode_stack_u16
1345{
1346 bm::gap_word_t val_[N];
1347 bm::gap_word_t mid_[N];
1348 bm::gap_word_t lo_[N];
1349 bm::gap_word_t r_[N];
1350
1351 unsigned stack_size_ = 0;
1352
1353 void bic_shuffle(const bm::gap_word_t* arr,
1355 {
1356 for (;sz;)
1357 {
1358 BM_ASSERT(lo <= hi);
1359 bm::gap_word_t mid_idx = sz >> 1;
1360 bm::gap_word_t val = arr[mid_idx];
1361 unsigned r = hi - lo - sz + 1;
1362 if (r)
1363 {
1364 unsigned s = stack_size_++;
1365 r_[s] = (bm::gap_word_t)r;
1366 val_[s] = val;
1367 mid_[s] = mid_idx;
1368 lo_[s] = lo;
1369 }
1370
1371 bic_shuffle(arr, mid_idx, lo, bm::gap_word_t(val-1));
1372 // tail recursive call:
1373 // bic_shuffle(arr + mid_idx + 1, sz - mid_idx - 1, val+1, hi);
1374 arr += mid_idx + 1;
1375 sz -= mid_idx + 1;
1376 lo = bm::gap_word_t(val + 1);
1377 } // for sz
1378 }
1379};
1380
1381template<typename TEncoder>
1383 unsigned sz_i,
1384 bm::gap_word_t lo_i,
1386{
1387 BM_ASSERT(sz_i <= 65535);
1388
1389 bic_encode_stack_u16<bm::bie_cut_off> u16_stack;
1390 // BIC re-ordering
1391 u16_stack.bic_shuffle(arr, bm::gap_word_t(sz_i), lo_i, hi_i);
1392
1393 BM_ASSERT(sz_i == u16_stack.stack_size_);
1394 for (unsigned i = 0; i < sz_i; ++i)
1395 {
1396 bm::gap_word_t val = u16_stack.val_[i];
1397 bm::gap_word_t mid_idx = u16_stack.mid_[i];
1398 bm::gap_word_t lo = u16_stack.lo_[i];
1399 unsigned r = u16_stack.r_[i];
1400
1401 unsigned value = val - lo - mid_idx;
1402 unsigned n = r + 1;
1403 unsigned logv = bm::bit_scan_reverse32(n);
1404 unsigned c = (unsigned)(1ull << (logv + 1)) - n;
1405
1406 int64_t half_c = c >> 1; // c / 2;
1407 int64_t half_r = r >> 1; // r / 2;
1408 int64_t lo1 = half_r - half_c;
1409 int64_t hi1 = half_r + half_c + 1;
1410 lo1 -= (n & 1);
1411 logv += (value <= lo1 || value >= hi1);
1412
1413 put_bits(value, logv);
1414 } // for i
1415}
1416#endif
1417
1418
1419template<typename TEncoder>
1421 unsigned sz,
1422 bm::gap_word_t lo,
1424{
1425 for (;sz;)
1426 {
1427 BM_ASSERT(lo <= hi);
1428 unsigned mid_idx = sz >> 1;
1429 bm::gap_word_t val = arr[mid_idx];
1430
1431 // write the interpolated value
1432 // write(x, r) where x=(arr[mid] - lo - mid) r=(hi - lo - sz + 1);
1433 {
1434 unsigned r = hi - lo - sz + 1;
1435 if (r)
1436 {
1437 unsigned value = val - lo - mid_idx;
1438
1439 unsigned n = r + 1;
1440 unsigned logv = bm::bit_scan_reverse32(n);
1441 unsigned c = (unsigned)(1ull << (logv + 1)) - n;
1442 unsigned half_c = c >> 1; // c / 2;
1443 unsigned half_r = r >> 1; // r / 2;
1444 int64_t lo1 = (int64_t(half_r) - half_c - (n & 1u));
1445 unsigned hi1 = (half_r + half_c);
1446 logv += (value <= lo1 || value > hi1);
1447
1448 put_bits(value, logv);
1449 }
1450 }
1451
1452 bic_encode_u16_cm(arr, mid_idx, lo, bm::gap_word_t(val-1));
1453 // tail recursive call:
1454 // bic_encode_u32_cm(arr + mid_idx + 1, sz - mid_idx - 1, val+1, hi);
1455 arr += ++mid_idx;
1456 sz -= mid_idx;
1457 lo = bm::gap_word_t(val + 1);
1458 } // for sz
1459}
1460
1461
1462
1463
1464
1465
1466// ----------------------------------------------------------------------
1467//
1468// ----------------------------------------------------------------------
1469
1470
1471template<class TDecoder>
1473 bm::gap_word_t lo,
1475{
1476 for (;sz;)
1477 {
1478 BM_ASSERT(lo <= hi);
1479
1480 unsigned val;
1481 // read the value
1482 {
1483 unsigned r = hi - lo - sz + 1;
1484 if (r)
1485 {
1486 unsigned logv = bm::bit_scan_reverse32(r) + 1;
1487 val = get_bits(logv);
1488 BM_ASSERT(val <= r);
1489 }
1490 else
1491 {
1492 val = 0;
1493 }
1494 }
1495 unsigned mid_idx = sz >> 1;
1496 val += lo + mid_idx;
1497
1498 BM_ASSERT(val < 65536);
1499 BM_ASSERT(mid_idx < 65536);
1500
1501 arr[mid_idx] = bm::gap_word_t(val);
1502 if (sz == 1)
1503 return;
1504 bic_decode_u16_rg(arr, mid_idx, lo, bm::gap_word_t(val - 1));
1505 arr += mid_idx + 1;
1506 sz -= mid_idx + 1;
1507 lo = bm::gap_word_t(val + 1);
1508 } // for sz
1509}
1510
1511// ----------------------------------------------------------------------
1512
1513template<class TDecoder>
1515 bm::word_t lo,
1517{
1518 BM_ASSERT(sz);
1519 do //for (;sz;)
1520 {
1521 BM_ASSERT(lo <= hi);
1522
1523 unsigned val;
1524
1525 // read the interpolated value
1526 // x = read(r)+ lo + mid, where r = (hi - lo - sz + 1);
1527 val = hi - lo - sz + 1;
1528 if (val)
1529 {
1530 unsigned logv = bm::bit_scan_reverse32(val+1);
1531
1532 unsigned c = unsigned((1ull << (logv + 1)) - val - 1);
1533 int64_t half_c = c >> 1;
1534 int64_t half_r = val >> 1;
1535 int64_t lo1 = half_r - half_c - ((val + 1) & 1);
1536 int64_t hi1 = half_r + half_c + 1;
1537
1538 val = get_bits(logv);
1539 if (val <= lo1 || val >= hi1)
1540 val += (get_bit() << logv);
1541 }
1542
1543 unsigned mid_idx = sz >> 1;
1544 val += lo + mid_idx;
1545 arr[mid_idx] = val;
1546 if (sz == 1)
1547 return;
1548
1549 bic_decode_u32_cm(arr, mid_idx, lo, val-1);
1550 // tail recursive call:
1551 // bic_decode_u32_cm(arr + mid_idx + 1, sz - mid_idx - 1, val + 1, hi);
1552 arr += ++mid_idx;
1553 sz -= mid_idx;
1554 lo = val + 1;
1555 } while (sz);// for sz
1556}
1557
1558// ----------------------------------------------------------------------
1559
1560template<class TDecoder>
1562 bm::gap_word_t lo,
1564{
1565 BM_ASSERT(sz);
1566 //for (;sz;)
1567 do
1568 {
1569 BM_ASSERT(lo <= hi);
1570
1571 unsigned val;
1572
1573 // read the interpolated value
1574 // x = read(r)+ lo + mid, where r = (hi - lo - sz + 1);
1575 val = hi - lo - sz + 1;
1576 if (val)
1577 {
1578 unsigned logv = bm::bit_scan_reverse32(val+1);
1579
1580 unsigned c = unsigned((1ull << (logv + 1)) - val - 1);
1581 int64_t half_c = c >> 1; // c / 2;
1582 int64_t half_r = val >> 1; // r / 2;
1583 int64_t lo1 = half_r - half_c - ((val + 1) & 1);
1584 int64_t hi1 = half_r + half_c + 1;
1585 val = get_bits(logv);
1586 if (val <= lo1 || val >= hi1)
1587 val += (get_bit() << logv);
1588 }
1589
1590 unsigned mid_idx = sz >> 1;
1591 val += lo + mid_idx;
1592 arr[mid_idx] = bm::gap_word_t(val);
1593 if (sz == 1)
1594 return;
1595
1596 bic_decode_u16_cm(arr, mid_idx, lo, bm::gap_word_t(val-1));
1597 // tail recursive call:
1598 // bic_decode_u16_cm(arr + mid_idx + 1, sz - mid_idx - 1, val + 1, hi);
1599 arr += ++mid_idx;
1600 sz -= mid_idx;
1601 lo = bm::gap_word_t(val + 1);
1602 } while (sz);// for sz
1603}
1604
1605// ----------------------------------------------------------------------
1606
1607template<class TDecoder>
1609 bm::gap_word_t lo,
1611{
1612 for (;sz;)
1613 {
1614 BM_ASSERT(lo <= hi);
1615
1616 unsigned val;
1617
1618 // read the interpolated value
1619 // x = read(r)+ lo + mid, where r = (hi - lo - sz + 1);
1620 val = hi - lo - sz + 1;
1621 if (val)
1622 {
1623 unsigned logv = bm::bit_scan_reverse32(val+1);
1624
1625 unsigned c = unsigned((1ull << (logv + 1)) - val - 1);
1626 int64_t half_c = c >> 1; // c / 2;
1627 int64_t half_r = val >> 1; // r / 2;
1628 int64_t lo1 = half_r - half_c - ((val + 1) & 1);
1629 int64_t hi1 = half_r + half_c + 1;
1630
1631 val = get_bits(logv);
1632 if (val <= lo1 || val >= hi1)
1633 val += (get_bit() << logv);
1634 }
1635
1636 unsigned mid_idx = sz >> 1;
1637 val += lo + mid_idx;
1638
1639 // set bit in the target block
1640 {
1641 unsigned nword = (val >> bm::set_word_shift);
1642 block[nword] |= (1u << (val & bm::set_word_mask));
1643 }
1644
1645 if (sz == 1)
1646 return;
1647
1648 bic_decode_u16_cm_bitset(block, mid_idx, lo, bm::gap_word_t(val-1));
1649 // tail recursive call:
1650 // bic_decode_u32_cm(block, sz - mid_idx - 1, val + 1, hi);
1651 sz -= ++mid_idx;// +1;
1652 lo = bm::gap_word_t(val + 1);
1653 } // for sz
1654}
1655
1656// ----------------------------------------------------------------------
1657
1658template<class TDecoder>
1660 bm::gap_word_t lo,
1662{
1663 for (;sz;)
1664 {
1665 BM_ASSERT(lo <= hi);
1666
1667 unsigned val;
1668
1669 // read the interpolated value
1670 // x = read(r)+ lo + mid, where r = (hi - lo - sz + 1);
1671 {
1672 unsigned r = hi - lo - sz + 1;
1673 if (r)
1674 {
1675 unsigned logv = bm::bit_scan_reverse32(r+1);
1676
1677 unsigned c = unsigned((1ull << (logv + 1)) - r - 1);
1678 int64_t half_c = c >> 1; // c / 2;
1679 int64_t half_r = r >> 1; // r / 2;
1680 int64_t lo1 = half_r - half_c - ((r + 1) & 1);
1681 int64_t hi1 = half_r + half_c + 1;
1682 r = get_bits(logv);
1683 if (r <= lo1 || r >= hi1)
1684 r += (get_bits(1) << logv);
1685 }
1686 val = r;
1687 }
1688
1689 unsigned mid_idx = sz >> 1;
1690 val += lo + mid_idx;
1691
1692 if (sz == 1)
1693 return;
1694
1695 bic_decode_u16_cm_dry(mid_idx, lo, bm::gap_word_t(val-1));
1696 // tail recursive call:
1697 // bic_decode_u32_cm_dry(sz - mid_idx - 1, val + 1, hi);
1698 sz -= mid_idx + 1;
1699 lo = bm::gap_word_t(val + 1);
1700 } // for sz
1701}
1702
1703
1704// ----------------------------------------------------------------------
1705
1706template<class TDecoder>
1708 bm::gap_word_t lo,
1710{
1711 for (;sz;)
1712 {
1713 BM_ASSERT(lo <= hi);
1714
1715 unsigned val;
1716 // read the value
1717 {
1718 unsigned r = hi - lo - sz + 1;
1719 if (r)
1720 {
1721 unsigned logv = bm::bit_scan_reverse32(r) + 1;
1722 val = get_bits(logv);
1723 BM_ASSERT(val <= r);
1724 }
1725 else
1726 {
1727 val = 0;
1728 }
1729 }
1730 unsigned mid_idx = sz >> 1;
1731 val += lo + mid_idx;
1732 BM_ASSERT(val < 65536);
1733 BM_ASSERT(mid_idx < 65536);
1734
1735 // set bit in the target block
1736 {
1737 unsigned nword = (val >> bm::set_word_shift);
1738 block[nword] |= (1u << (val & bm::set_word_mask));
1739 }
1740
1741 if (sz == 1)
1742 return;
1743 bic_decode_u16_rg_bitset(block, mid_idx, lo, bm::gap_word_t(val - 1));
1744 // tail recursion of:
1745 //bic_decode_u16_bitset(block, sz - mid_idx - 1, bm::gap_word_t(val + 1), hi);
1746 sz -= mid_idx + 1;
1747 lo = bm::gap_word_t(val + 1);
1748 } // for sz
1749}
1750
1751// ----------------------------------------------------------------------
1752
1753template<class TDecoder>
1755 bm::gap_word_t lo,
1757{
1758 for (;sz;)
1759 {
1760 BM_ASSERT(lo <= hi);
1761
1762 unsigned val;
1763 // read the value
1764 {
1765 unsigned r = hi - lo - sz + 1;
1766 if (r)
1767 {
1768 unsigned logv = bm::bit_scan_reverse32(r) + 1;
1769 val = get_bits(logv);
1770 BM_ASSERT(val <= r);
1771 }
1772 else
1773 {
1774 val = 0;
1775 }
1776 }
1777 unsigned mid_idx = sz >> 1;
1778 val += lo + mid_idx;
1779 BM_ASSERT(val < 65536);
1780 BM_ASSERT(mid_idx < 65536);
1781
1782 if (sz == 1)
1783 return;
1784 bic_decode_u16_rg_dry(mid_idx, lo, bm::gap_word_t(val - 1));
1785 sz -= mid_idx + 1;
1786 lo = bm::gap_word_t(val + 1);
1787 } // for sz
1788}
1789
1790
1791
1792// ----------------------------------------------------------------------
1793
1794template<class TDecoder>
1796{
1797 unsigned acc = accum_;
1798 unsigned used = used_bits_;
1799
1800 if (used == (sizeof(acc) * 8))
1801 {
1802 acc = src_.get_32();
1803 used ^= used;
1804 }
1805 unsigned zero_bits = 0;
1806 while (true)
1807 {
1808 if (acc == 0)
1809 {
1810 zero_bits = unsigned(zero_bits +(sizeof(acc) * 8) - used);
1811 used = 0;
1812 acc = src_.get_32();
1813 continue;
1814 }
1815 unsigned first_bit_idx =
1816 #if defined(BM_x86) && (defined(__GNUG__) || defined(_MSC_VER)) && !(defined(__arm64__) || defined(__arm__))
1817 bm::bsf_asm32(acc);
1818 #else
1819 bm::bit_scan_fwd(acc);
1820 #endif
1821 acc >>= first_bit_idx;
1822 zero_bits += first_bit_idx;
1823 used += first_bit_idx;
1824 break;
1825 } // while
1826
1827 // eat the border bit
1828 //
1829 if (used == (sizeof(acc) * 8))
1830 {
1831 acc = src_.get_32();
1832 used = 1;
1833 }
1834 else
1835 {
1836 ++used;
1837 }
1838 acc >>= 1;
1839
1840 // get the value
1841 unsigned current;
1842
1843 unsigned free_bits = unsigned((sizeof(acc) * 8) - used);
1844 if (zero_bits <= free_bits)
1845 {
1846 take_accum:
1847 current =
1848 (acc & block_set_table<true>::_left[zero_bits]) | (1 << zero_bits);
1849 acc >>= zero_bits;
1850 used += zero_bits;
1851 goto ret;
1852 }
1853
1854 if (used == (sizeof(acc) * 8))
1855 {
1856 acc = src_.get_32();
1857 used ^= used;
1858 goto take_accum;
1859 }
1860
1861 // take the part
1862 current = acc;
1863 // read the next word
1864 acc = src_.get_32();
1865 used = zero_bits - free_bits;
1866 current |=
1867 ((acc & block_set_table<true>::_left[used]) << free_bits) |
1868 (1 << zero_bits);
1869
1870 acc >>= used;
1871ret:
1872 accum_ = acc;
1873 used_bits_ = used;
1874 return current;
1875}
1876
1877// ----------------------------------------------------------------------
1878
1879template<class TDecoder>
1881{
1882 BM_ASSERT(count);
1883 const unsigned maskFF = ~0u;
1884 unsigned acc = accum_;
1885 unsigned used = used_bits_;
1886
1887 unsigned value = 0;
1888 unsigned free_bits = unsigned((sizeof(acc) * 8) - used);
1889 if (count <= free_bits)
1890 {
1891 take_accum:
1892 value = acc & (maskFF >> (32 - count));
1893 acc >>= count;
1894 used += count;
1895 goto ret;
1896 }
1897 if (used == (sizeof(acc) * 8))
1898 {
1899 acc = src_.get_32();
1900 used = 0;
1901 goto take_accum;
1902 }
1903 value = acc;
1904 acc = src_.get_32();
1905 used = count - free_bits;
1906 value |= ((acc & (maskFF >> (32 - used))) << free_bits);
1907 acc >>= used;
1908ret:
1909 accum_ = acc;
1910 used_bits_ = used;
1911 return value;
1912}
1913
1914// ----------------------------------------------------------------------
1915
1916template<class TDecoder>
1918{
1919 const unsigned maskFF = ~0u;
1920 unsigned value = 0;
1921 unsigned free_bits = unsigned((sizeof(accum_) * 8) - used_bits_);
1922 if (1 <= free_bits)
1923 {
1924 take_accum:
1925 value = accum_ & (maskFF >> (32 - 1));
1926 accum_ >>= 1;
1927 used_bits_ += 1;
1928 return value;
1929 }
1930 if (used_bits_ == (sizeof(accum_) * 8))
1931 {
1932 accum_ = src_.get_32();
1933 used_bits_ = 0;
1934 goto take_accum;
1935 }
1936 value = accum_;
1937 accum_ = src_.get_32();
1938 used_bits_ = 1 - free_bits;
1939 value |= ((accum_ & (maskFF >> (32 - used_bits_))) << free_bits);
1940 accum_ >>= used_bits_;
1941 return value;
1942}
1943
1944// ----------------------------------------------------------------------
1945
1946} // namespace bm
1947
1948#ifdef _MSC_VER
1949#pragma warning(pop)
1950#endif
1951
1952#endif
#define BMNOEXCEPT
Definition: bmdef.h:82
#define BMFORCEINLINE
Definition: bmdef.h:213
#define BM_ASSERT
Definition: bmdef.h:139
Bit manipulation primitives (internal)
Byte based reader for un-aligned bit streaming.
Definition: encoding.h:257
unsigned gamma() BMNOEXCEPT
decode unsigned value using Elias Gamma coding
Definition: encoding.h:1795
void bic_decode_u16_cm(bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Binary Interpolative array decode.
Definition: encoding.h:1561
void bic_decode_u16_bitset(bm::word_t *block, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Definition: encoding.h:282
unsigned get_bit() BMNOEXCEPT
read 1 bit
Definition: encoding.h:1917
void bic_decode_u16_cm_dry(unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Binary Interpolative array decode into /dev/null.
Definition: encoding.h:1659
bit_in(TDecoder &decoder) BMNOEXCEPT
Definition: encoding.h:259
void bic_decode_u32_cm(bm::word_t *arr, unsigned sz, bm::word_t lo, bm::word_t hi) BMNOEXCEPT
Binary Interpolative array decode (32-bit)
Definition: encoding.h:1514
void bic_decode_u16_rg(bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Binary Interpolative array decode.
Definition: encoding.h:1472
void bic_decode_u16_dry(unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Definition: encoding.h:288
void bic_decode_u16_rg_dry(unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Binary Interpolative array decode into /dev/null.
Definition: encoding.h:1754
unsigned get_bits(unsigned count) BMNOEXCEPT
read number of bits out of the stream
Definition: encoding.h:1880
void bic_decode_u16_cm_bitset(bm::word_t *block, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Binary Interpolative array decode into bitset (32-bit based)
Definition: encoding.h:1608
void bic_decode_u16_rg_bitset(bm::word_t *block, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Binary Interpolative array decode into bitset (32-bit based)
Definition: encoding.h:1707
Byte based writer for un-aligned bit streaming.
Definition: encoding.h:183
void bic_encode_u16_rg(const bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Binary Interpolative encoding (array of 16-bit ints)
Definition: encoding.h:1259
void flush() BMNOEXCEPT
Flush the incomplete 32-bit accumulator word.
Definition: encoding.h:230
bit_out(TEncoder &dest)
Definition: encoding.h:185
void put_zero_bits(unsigned count) BMNOEXCEPT
issue specified number of 0s
Definition: encoding.h:1157
void bic_encode_u32_cm(const bm::word_t *arr, unsigned sz, bm::word_t lo, bm::word_t hi) BMNOEXCEPT
Binary Interpolative encoding (array of 32-bit ints) cm - "center-minimal".
Definition: encoding.h:1294
void put_bit(unsigned value) BMNOEXCEPT
issue single bit into encode bit-stream
Definition: encoding.h:1095
void put_bits(unsigned value, unsigned count) BMNOEXCEPT
issue count bits out of value
Definition: encoding.h:1106
void gamma(unsigned value) BMNOEXCEPT
Elias Gamma encode the specified value.
Definition: encoding.h:1187
void put_zero_bit() BMNOEXCEPT
issue 0 into output stream
Definition: encoding.h:1148
void bic_encode_u16_cm(const bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) BMNOEXCEPT
Binary Interpolative encoding (array of 16-bit ints) cm - "center-minimal".
Definition: encoding.h:1420
Base class for all decoding functionality.
Definition: encoding.h:88
const unsigned char * get_pos() const BMNOEXCEPT
Return current buffer pointer.
Definition: encoding.h:105
decoder_base(const unsigned char *buf) BMNOEXCEPT
Definition: encoding.h:90
const unsigned char * buf_
Definition: encoding.h:114
bm::id64_t get_h64() BMNOEXCEPT
Read h-64-bit.
Definition: encoding.h:691
void seek(int delta) BMNOEXCEPT
change current position
Definition: encoding.h:99
size_t size() const BMNOEXCEPT
Returns size of the current decoding stream.
Definition: encoding.h:96
unsigned char get_8() BMNOEXCEPT
Reads character from the decoding buffer.
Definition: encoding.h:93
void memcpy(unsigned char *dst, size_t count) BMNOEXCEPT
read bytes from the decode buffer
Definition: encoding.h:679
void set_pos(const unsigned char *pos) BMNOEXCEPT
Set current buffer pointer.
Definition: encoding.h:108
Class for decoding data from memory buffer.
Definition: encoding.h:160
decoder_little_endian(const unsigned char *buf)
Definition: encoding.h:952
bm::short_t get_16()
Definition: encoding.h:958
bool get_32_OR(bm::word_t *w, unsigned count)
Definition: encoding.h:1035
void get_32_AND(bm::word_t *w, unsigned count)
Definition: encoding.h:1057
Class for decoding data from memory buffer.
Definition: encoding.h:126
bm::word_t get_32() BMNOEXCEPT
Reads 32-bit word from the decoding buffer.
Definition: encoding.h:751
bm::word_t get_24() BMNOEXCEPT
Reads 32-bit word from the decoding buffer.
Definition: encoding.h:738
bool get_32_OR(bm::word_t *w, unsigned count) BMNOEXCEPT
Reads block of 32-bit words from the decoding buffer and ORs to the destination.
Definition: encoding.h:844
bm::id64_t get_64() BMNOEXCEPT
Reads 64-bit word from the decoding buffer.
Definition: encoding.h:786
void get_32_AND(bm::word_t *w, unsigned count) BMNOEXCEPT
Reads block of 32-bit words from the decoding buffer and ANDs to the destination.
Definition: encoding.h:885
bm::short_t get_16() BMNOEXCEPT
Reads 16-bit word from the decoding buffer.
Definition: encoding.h:722
decoder(const unsigned char *buf) BMNOEXCEPT
Construction.
Definition: encoding.h:713
bm::id64_t get_48() BMNOEXCEPT
Reads 64-bit word from the decoding buffer.
Definition: encoding.h:769
Memory encoding.
Definition: encoding.h:50
unsigned char * position_type
Definition: encoding.h:52
size_t size() const BMNOEXCEPT
Returns size of the current encoding stream.
Definition: encoding.h:529
void put_48(bm::id64_t w) BMNOEXCEPT
Puts 48 bits word into encoding buffer.
Definition: encoding.h:589
unsigned char * get_pos() const BMNOEXCEPT
Get current memory stream position.
Definition: encoding.h:537
void put_64(bm::id64_t w) BMNOEXCEPT
Puts 64 bits word into encoding buffer.
Definition: encoding.h:606
encoder(unsigned char *buf, size_t size) BMNOEXCEPT
Construction.
Definition: encoding.h:398
void put_8(unsigned char c) BMNOEXCEPT
Puts one character into the encoding buffer.
Definition: encoding.h:434
void set_pos(unsigned char *buf_pos) BMNOEXCEPT
Set current memory stream position.
Definition: encoding.h:545
void put_prefixed_array_16(unsigned char c, const bm::short_t *s, unsigned count, bool encode_count) BMNOEXCEPT
Encode 8-bit prefix + an array.
Definition: encoding.h:417
void put_h64(bm::id64_t w) BMNOEXCEPT
Puts 64 bits word into encoding buffer with h-compression.
Definition: encoding.h:628
void memcpy(const unsigned char *src, size_t count) BMNOEXCEPT
copy bytes into target buffer or just rewind if src is NULL
Definition: encoding.h:516
void put_32(bm::word_t w) BMNOEXCEPT
Puts 32 bits word into encoding buffer.
Definition: encoding.h:571
void put_24(bm::word_t w) BMNOEXCEPT
Puts 24 bits word into encoding buffer.
Definition: encoding.h:555
void put_16(bm::short_t s) BMNOEXCEPT
Puts short word (16 bits) into the encoding buffer.
Definition: encoding.h:444
void put_prefixed_array_32(unsigned char c, const bm::word_t *w, unsigned count) BMNOEXCEPT
Encode 8-bit prefix + an array.
Definition: encoding.h:406
void put_8_16_32(unsigned w, unsigned char c8, unsigned char c16, unsigned char c32) BMNOEXCEPT
but gat plus value based on its VBR evaluation
Definition: encoding.h:487
Elias Gamma decoder.
Definition: encoding.h:362
void stop()
Stop decoding sequence.
Definition: encoding.h:374
void start()
Start encoding sequence.
Definition: encoding.h:369
T operator()(void)
Decode word.
Definition: encoding.h:379
gamma_decoder(TBitIO &bin)
Definition: encoding.h:364
Functor for Elias Gamma encoding.
Definition: encoding.h:341
gamma_encoder(TBitIO &bout)
Definition: encoding.h:343
void operator()(T value)
Encode word.
Definition: encoding.h:347
bool avx2_or_arr_unal(__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src, const __m256i *BMRESTRICT src_end)
OR array elements against another unaligned array dst |= *src.
Definition: bmavx2.h:840
unsigned avx2_and_arr_unal(__m256i *BMRESTRICT dst, const __m256i *BMRESTRICT src, const __m256i *BMRESTRICT src_end)
AND array elements against another array (unaligned) dst &= *src.
Definition: bmavx2.h:729
bool sse2_or_arr_unal(__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end) BMNOEXCEPT
OR array elements against another array (unaligned) dst |= *src.
Definition: bmsse_util.h:426
unsigned sse2_and_arr_unal(__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end) BMNOEXCEPT
AND array elements against another array (unaligned) dst &= *src.
Definition: bmsse_util.h:259
decoder decoder_big_endian
Class for decoding data from memory buffer.
Definition: encoding.h:148
Definition: bm.h:78
unsigned int word_t
Definition: bmconst.h:39
BMFORCEINLINE T bit_scan_fwd(T v) BMNOEXCEPT
Definition: bmutil.h:297
BMFORCEINLINE unsigned bit_scan_reverse32(unsigned w) BMNOEXCEPT
Definition: bmutil.h:304
const unsigned set_word_shift
Definition: bmconst.h:72
unsigned long long int id64_t
Definition: bmconst.h:35
unsigned compute_h64_mask(unsigned long long w)
Сompute mask of bytes presense in 64-bit word.
Definition: bmutil.h:556
unsigned short gap_word_t
Definition: bmconst.h:78
const unsigned set_word_mask
Definition: bmconst.h:73
unsigned short short_t
Definition: bmconst.h:40
Structure keeps all-left/right ON bits masks.
Definition: bmconst.h:363