BitMagic-C++
bmstrsparsevec.h
Go to the documentation of this file.
1#ifndef BMSTRSPARSEVEC__H__INCLUDED__
2#define BMSTRSPARSEVEC__H__INCLUDED__
3/*
4Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5
6Licensed under the Apache License, Version 2.0 (the "License");
7you may not use this file except in compliance with the License.
8You may obtain a copy of the License at
9
10 http://www.apache.org/licenses/LICENSE-2.0
11
12Unless required by applicable law or agreed to in writing, software
13distributed under the License is distributed on an "AS IS" BASIS,
14WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15See the License for the specific language governing permissions and
16limitations under the License.
17
18For more information please visit: http://bitmagic.io
19*/
20
21/*! \file bmstrsparsevec.h
22 \brief string sparse vector based on bit-transposed matrix
23*/
24
25#include <stddef.h>
26#include "bmconst.h"
27
28#ifndef BM_NO_STL
29#include <stdexcept>
30#include <string_view>
31#endif
32
33#ifndef BM__H__INCLUDED__
34// BitMagic utility headers do not include main "bm.h" declaration
35// #include "bm.h" or "bm64.h" explicitly
36# error missing include (bm.h or bm64.h)
37#endif
38
39#include "bmtrans.h"
40#include "bmalgo.h"
41#include "bmbuffer.h"
42#include "bmbmatrix.h"
43#include "bmdef.h"
44
45namespace bm
46{
47
48enum class remap_setup
49{
50 COPY_RTABLES, //!< copy remap tables only (without data)
51};
52
53
54/*!
55 \brief succinct sparse vector for strings with compression using bit-slicing ( transposition) method
56
57 Initial string is bit-transposed into bit-slices so collection may use less
58 memory due to prefix sum (GAP) compression in bit-slices. In addition, the container
59 can use chracter re-mapping using char freaquencies to compute the minimal codes.
60 Re-mapping can reduce memory footprint, get better search performance and improve storage
61 compression.
62
63 Template parameters:
64 CharType - type of character (char or unsigned char) (wchar not tested)
65 BV - bit-vector for bit-slicing
66 STR_SIZE - initial string size (can dynamically increase on usage)
67
68 @ingroup sv
69*/
70template<typename CharType, typename BV, unsigned STR_SIZE>
71class str_sparse_vector : public base_sparse_vector<CharType, BV, STR_SIZE>
72{
73public:
74 typedef BV bvector_type;
77 typedef CharType value_type;
78 typedef CharType* value_type_prt;
80 typedef typename BV::allocator_type allocator_type;
83 typedef typename allocator_type::allocator_pool_type allocator_pool_type;
87
88 /*! Statistical information about memory allocation details. */
89 struct statistics : public bv_statistics
90 {};
91
93 {
94 sv_octet_slices = STR_SIZE
95 };
96
97 /** Matrix of character remappings
98 @internal
99 */
100 typedef bm::dynamic_heap_matrix<unsigned char, allocator_type>
103
104 /** Matrix of character frequencies (for optimal code remap)
105 @internal
106 */
107 typedef
108 bm::dynamic_heap_matrix<size_t, allocator_type> octet_freq_matrix_type;
109
110 struct is_remap_support { enum trait { value = true }; };
111 struct is_rsc_support { enum trait { value = false }; };
112 struct is_dynamic_splices { enum trait { value = true }; };
113
115 {
116 public:
117 typedef
118 bm::heap_vector<CharType, typename bvector_type::allocator_type, true>
120 protected:
122 };
123
124 /**
125 Reference class to access elements via common [] operator
126 @ingroup sv
127 */
129 {
130 public:
133 size_type idx)
134 : str_sv_(str_sv), idx_(idx)
135 {
136 this->buf_.resize(str_sv.effective_max_str());
137 }
138
139 operator const value_type*() const BMNOEXCEPT
140 {
141 return get();
142 }
143
144 const value_type* get() const BMNOEXCEPT
145 {
146 str_sv_.get(idx_, this->buf_.data(), str_sv_.effective_max_str());
147 return this->buf_.data();
148 }
149
150 bool operator==(const const_reference& ref) const BMNOEXCEPT
151 { return bool(*this) == bool(ref); }
152 bool is_null() const BMNOEXCEPT { return str_sv_.is_null(idx_); }
153 private:
155 size_type idx_;
156 };
157
158 /**
159 Reference class to access elements via common [] operator
160 @ingroup sv
161 */
162 class reference : protected reference_base
163 {
164 public:
166 size_type idx)
167 : str_sv_(str_sv), idx_(idx)
168 {
169 this->buf_.resize(str_sv.effective_max_str());
170 }
171
172 operator const value_type*() const BMNOEXCEPT
173 {
174 return get();
175 }
176
177 const value_type* get() const BMNOEXCEPT
178 {
179 str_sv_.get(idx_, this->buf_.data(), str_sv_.effective_max_str());
180 return this->buf_.data();
181 }
182
184 {
185 // TO DO: implement element copy bit by bit
186 str_sv_.set(idx_, (const value_type*)ref);
187 return *this;
188 }
189
191 {
192 str_sv_.set(idx_, str);
193 return *this;
194 }
195 bool operator==(const reference& ref) const BMNOEXCEPT
196 { return bool(*this) == bool(ref); }
197 bool is_null() const BMNOEXCEPT { return str_sv_.is_null(idx_); }
198 private:
200 size_type idx_;
201 };
202
203 /**
204 Const iterator to do quick traverse of the sparse vector.
205
206 Implementation uses buffer for decoding so, competing changes
207 to the original vector may not match the iterator returned values.
208
209 This iterator keeps an operational buffer of decoded elements, so memory footprint is NOT negligable.
210
211 @ingroup sv
212 */
214 {
215 public:
216 friend class str_sparse_vector;
217#ifndef BM_NO_STL
218 typedef std::input_iterator_tag iterator_category;
219 typedef std::basic_string_view<CharType> string_view_type;
220#endif
227 typedef typename allocator_type::allocator_pool_type allocator_pool_type;
228
229 typedef long long difference_type;
230 typedef CharType* pointer;
231 typedef CharType*& reference;
232 public:
233 /**
234 Construct iterator (not attached to any particular vector)
235 */
237 /**
238 Construct iterator (attached to sparse vector)
239 @param sv - pointer to sparse vector
240 */
242 /**
243 Construct iterator (attached to sparse vector) and positioned
244 @param sv - reference to sparse vector
245 @param pos - position in the vector to start
246 */
248
250
251 /**
252 setup iterator to retrieve a sub-string of a string
253 @param from - Position of the first character to be copied
254 @param len - length of a substring (defult: 0 read to the available end)
255 */
256 void set_substr(unsigned from, unsigned len = 0) BMNOEXCEPT;
257
258
259 bool operator==(const const_iterator& it) const BMNOEXCEPT
260 { return (pos_ == it.pos_) && (sv_ == it.sv_); }
262 { return ! operator==(it); }
264 { return pos_ < it.pos_; }
266 { return pos_ <= it.pos_; }
268 { return pos_ > it.pos_; }
270 { return pos_ >= it.pos_; }
271
272 /// \brief Get current position (value)
273 const value_type* operator*() const
274 { return this->value(); }
275
276 /// \brief Advance to the next available value
278 { this->advance(); return *this; }
279
280 /// \brief Advance to the next available value
282 { const_iterator tmp(*this);this->advance(); return tmp; }
283
284
285 /// \brief Get zero terminated string value at the current position
286 const value_type* value() const;
287
288 /// \brief Get current string as string_view
290
291 /// \brief Get NULL status
292 bool is_null() const BMNOEXCEPT { return sv_->is_null(this->pos_); }
293
294 /// Returns true if iterator is at a valid position
295 bool valid() const BMNOEXCEPT { return pos_ != bm::id_max; }
296
297 /// Invalidate current iterator
299
300 /// Current position (index) in the vector
301 size_type pos() const BMNOEXCEPT { return pos_; }
302
303 /// re-position to a specified position
305
306 /// advance iterator forward by one
307 void advance() BMNOEXCEPT;
308
309 protected:
311 {
312 n_rows = 1024
313 };
314 typedef dynamic_heap_matrix<CharType, allocator_type> buffer_matrix_type;
315
316 private:
317 const str_sparse_vector_type* sv_; ///!< ptr to parent
318 unsigned substr_from_; ///!< substring from
319 unsigned substr_to_; ///!< substring to
320 mutable size_type pos_; ///!< Position
321 mutable buffer_matrix_type buf_matrix_; ///!< decode value buffer
322 mutable size_type pos_in_buf_; ///!< buffer position
323 };
324
325
326 /**
327 Back insert iterator implements buffered insert, faster than generic
328 access assignment.
329
330 Limitations for buffered inserter:
331 1. Do not use more than one inserter (into one vector) at the same time
332 2. Use method flush() at the end to send the rest of accumulated buffer
333 flush is happening automatically on destruction, but if flush produces an
334 exception (for whatever reason) it will be an exception in destructor.
335 As such, explicit flush() is safer way to finilize the sparse vector load.
336
337 @ingroup sv
338 */
340 {
341 public:
342#ifndef BM_NO_STL
343 typedef std::output_iterator_tag iterator_category;
344#endif
351 typedef typename allocator_type::allocator_pool_type allocator_pool_type;
352
353 typedef void difference_type;
354 typedef void pointer;
355 typedef void reference;
356
357 public:
361
363 {
364 BM_ASSERT(bi.empty());
365 buf_matrix_.init_resize(
366 bi.buf_matrix_.rows(), bi.buf_matrix_.cols());
367 this->flush_impl(); sv_ = bi.sv_;
368 return *this;
369 }
370
372
373 /**
374 Set optimization on load option (deafult: false)
375 */
377 { opt_mode_ = opt_mode; }
378
379 /**
380 Method to configure back inserter to collect statistics on optimal character codes.
381 This methos makes back inserter slower, but can be used to accelerate later remap() of
382 the sparse vector. Use flush at the end to apply the remapping.
383 By default inserter does not collect additional statistics.
384
385 Important! You should NOT use intermediate flush if you set remapping!
386
387 @sa flush
388 */
389 void set_remap(bool flag) BMNOEXCEPT { remap_flags_ = flag; }
390
391 /// Get curent remap state flags
392 unsigned get_remap() const BMNOEXCEPT { return remap_flags_; }
393
394 /** push value to the vector */
396 { this->add(v); return *this; }
397
398
399 /** push value to the vector */
400 template<typename StrType>
402 {
403 this->add(v.c_str()); return *this; // TODO: avoid c_str()
404 }
405
406 /** noop */
407 back_insert_iterator& operator*() { return *this; }
408 /** noop */
409 back_insert_iterator& operator++() { return *this; }
410 /** noop */
411 back_insert_iterator& operator++( int ) { return *this; }
412
413 /** add value to the container*/
414 void add(const value_type* v);
415
416 /** add NULL (no-value) to the container */
417 void add_null();
418
419 /** add a series of consequitve NULLs (no-value) to the container */
420 void add_null(size_type count);
421
422 /** flush the accumulated buffer. It is important to call flush at the end, before destruction of the
423 inserter. Flush may throw exceptions, which will be possible to intercept.
424 Otherwise inserter is flushed in the destructor.
425 */
426 void flush();
427
428 // access to internals
429 //
430
431 /// Get octet frequence matrix
432 /// @internal
434 { return omatrix_; }
435
436 protected:
437 /** return true if insertion buffer is empty */
438 bool empty() const BMNOEXCEPT;
439
441
442 /** add value to the buffer without changing the NULL vector
443 @param v - value to push back
444 @internal
445 */
446 void add_value(const value_type* v);
447
448 /**
449 account new value as remap statistics
450 */
451 void add_remap_stat(const value_type* v);
452
453 void flush_impl();
454
455 private:
456 enum buf_size_e
457 {
458 n_buf_size = str_sparse_vector_type::ins_buf_size // 1024 * 8
459 };
460 typedef bm::dynamic_heap_matrix<CharType, allocator_type> buffer_matrix_type;
461 friend class str_sparse_vector;
462
463 protected:
464 str_sparse_vector_type* sv_; ///!< pointer on the parent vector
465 bvector_type* bv_null_; ///!< not NULL vector pointer
466 buffer_matrix_type buf_matrix_; ///!< value buffer
467 size_type pos_in_buf_; ///!< buffer position
468 block_idx_type prev_nb_ = 0;///!< previous block added
469 typename
471 ///
472 unsigned remap_flags_ = 0; ///< target remapping
473 octet_freq_matrix_type omatrix_; ///< octet frequency matrix
474 };
475
476
477public:
478
479 /*!
480 \brief Sparse vector constructor
481
482 \param null_able - defines if vector supports NULL values flag
483 by default it is OFF, use bm::use_null to enable it
484 \param ap - allocation strategy for underlying bit-vectors
485 Default allocation policy uses BM_BIT setting (fastest access)
486 \param bv_max_size - maximum possible size of underlying bit-vectors
487 Please note, this is NOT size of svector itself, it is dynamic upper limit
488 which should be used very carefully if we surely know the ultimate size
489 \param alloc - allocator for bit-vectors
490
491 \sa bvector<>
492 \sa bm::bvector<>::allocation_policy
493 \sa bm::startegy
494 */
497 size_type bv_max_size = bm::id_max,
498 const allocator_type& alloc = allocator_type());
499
500 /*! copy-ctor */
502
503 /*! construct empty sparse vector, copying the remap tables from another vector
504 \param str_sv - source vector to take the remap tables from (assumed to be remaped)
505 \param remap_mode - remap table copy param
506 */
507 str_sparse_vector(const str_sparse_vector& str_sv, bm::remap_setup remap_mode);
508
509
510 /*! copy assignmment operator */
513 {
514 if (this != &str_sv)
516 remap_flags_ = str_sv.remap_flags_;
519 return *this;
520 }
521#ifndef BM_NO_CXX11
522 /*! move-ctor */
524 {
525 parent_type::swap(str_sv);
526 remap_flags_ = str_sv.remap_flags_;
527 remap_matrix1_.swap(str_sv.remap_matrix1_);
528 remap_matrix2_.swap(str_sv.remap_matrix2_);
529 }
530
531 /*! move assignmment operator */
534 {
535 if (this != &str_sv)
536 this->swap(str_sv);
537 return *this;
538 }
539#endif
540
541public:
542
543 // ------------------------------------------------------------
544 /*! @name String element access */
545 ///@{
546
547 /** \brief Operator to get read access to an element */
549 { return const_reference(*this, idx); }
550
551 /** \brief Operator to get write access to an element */
552 reference operator[](size_type idx) { return reference(*this, idx); }
553
554 /*!
555 \brief set specified element with bounds checking and automatic resize
556 \param idx - element index (vector auto-resized if needs to)
557 \param str - string to set (zero terminated)
558 */
559 void set(size_type idx, const value_type* str);
560
561 /*!
562 \brief set NULL status for the specified element
563 Vector is resized automatically
564 \param idx - element index (vector auto-resized if needs to)
565 */
566 void set_null(size_type idx);
567
568 /**
569 Set NULL all elements set as 1 in the argument vector
570 \param bv_idx - index bit-vector for elements which needs to be turned to NULL
571 */
572 void set_null(const bvector_type& bv_idx)
573 { this->bit_sub_rows(bv_idx, true); }
574
575 /**
576 Set vector elements spcified by argument bit-vector to empty
577 Note that set to empty elements are NOT going to tuned to NULL (NULL qualifier is preserved)
578 \param bv_idx - index bit-vector for elements which to be set to 0
579 */
580 void clear(const bvector_type& bv_idx)
581 { this->bit_sub_rows(bv_idx, false); }
582
583
584 /**
585 Set NULL all elements NOT set as 1 in the argument vector
586 \param bv_idx - index bit-vector for elements which needs to be kept
587 */
588 void keep(const bvector_type& bv_idx) { this->bit_and_rows(bv_idx); }
589
590
591 /*!
592 \brief insert the specified element
593 \param idx - element index (vector auto-resized if needs to)
594 \param str - string to set (zero terminated)
595 */
596 void insert(size_type idx, const value_type* str);
597
598
599 /*!
600 \brief insert STL string
601 \param idx - element index (vector auto-resized if needs to)
602 \param str - STL string to set
603 */
604 template<typename StrType>
605 void insert(size_type idx, const StrType& str)
606 {
607 this->insert(idx, str.c_str()); // TODO: avoid c_str()
608 }
609
610 /*!
611 \brief erase the specified element
612 \param idx - element index
613 */
614 void erase(size_type idx);
615
616 /*!
617 \brief get specified element
618
619 \param idx - element index
620 \param str - string buffer
621 \param buf_size - string buffer size
622
623 @return string length
624 */
626 value_type* str, size_type buf_size) const BMNOEXCEPT;
627
628 /*!
629 \brief set specified element with bounds checking and automatic resize
630
631 This is an equivalent of set() method, but templetized to be
632 more compatible with the STL std::string and the likes
633
634 \param idx - element index (vector auto-resized if needs to)
635 \param str - input string
636 expected an STL class with size() support,
637 like basic_string<> or vector<char>
638 */
639 template<typename StrType>
640 void assign(size_type idx, const StrType& str)
641 {
642 if (idx >= this->size())
643 this->size_ = idx+1;
644
645 size_type str_size = size_type(str.size());
646 if (!str_size)
647 {
648 this->clear_value_planes_from(0, idx);
649 return;
650 }
651 for (size_type i=0; i < str_size; ++i)
652 {
653 CharType ch = str[i];
654 if (remap_flags_) // compressional re-mapping is in effect
655 {
656 unsigned char remap_value = remap_matrix2_.get(i, unsigned(ch));
657 BM_ASSERT(remap_value);
658 ch = CharType(remap_value);
659 }
660 this->bmatr_.set_octet(idx, i, (unsigned char)ch);
661 if (!ch)
662 break;
663 } // for i
664 this->clear_value_planes_from(unsigned(str_size*8), idx);
665 if (bvector_type* bv_null = this->get_null_bvect())
666 bv_null->set_bit_no_check(idx);
667 }
668
669 /*!
670 \brief push back a string
671 \param str - string to set
672 (STL class with size() support, like basic_string)
673 */
674 template<typename StrType>
675 void push_back(const StrType& str) { assign(this->size_, str); }
676
677 /*!
678 \brief push back a string (zero terminated)
679 \param str - string to set
680 */
681 void push_back(const value_type* str) { set(this->size_, str); }
682
683 /*!
684 \brief push back specified amount of NULL values
685 \param count - number of NULLs to push back
686 */
687 void push_back_null(size_type count);
688
689 /*!
690 \brief push back NULL value
691 */
693
694 /*!
695 \brief get specified string element if NOT NULL
696 Template method expects an STL-compatible type basic_string<>
697 \param idx - element index (vector auto-resized if needs to)
698 \param str - string to get [out]
699 \return true if element is not null and try-get successfull
700 */
701 template<typename StrType>
702 bool try_get(size_type idx, StrType& str) const
703 {
704 if (this->is_null(idx))
705 return false;
706 get(idx, str);
707 return true;
708 }
709
710
711 /*!
712 \brief get specified string element
713 Template method expects an STL-compatible type basic_string<>
714 \param idx - element index (vector auto-resized if needs to)
715 \param str - string to get [out]
716 */
717 template<typename StrType>
718 void get(size_type idx, StrType& str) const
719 {
720 str.clear();
721 for (unsigned i = 0; true; ++i)
722 {
723 CharType ch = CharType(this->bmatr_.get_octet(idx, i));
724 if (!ch)
725 break;
726 if (remap_flags_)
727 {
728 const unsigned char* remap_row = remap_matrix1_.row(i);
729 unsigned char remap_value = remap_row[unsigned(ch)];
730 BM_ASSERT(remap_value);
731 if (!remap_value) // unknown dictionary element
732 {
734 break;
735 }
736 ch = CharType(remap_value);
737 }
738 str.push_back(ch);
739 } // for i
740 }
741
742 /*! Swap content */
743 void swap(str_sparse_vector& str_sv) BMNOEXCEPT;
744
745 ///@}
746
747 // ------------------------------------------------------------
748 /*! @name Element comparison functions */
749 ///@{
750
751 /**
752 \brief Compare vector element with argument lexicographically
753
754 The function does not account for NULL values, NULL element is treated as an empty string
755
756 NOTE: for a re-mapped container, input string may have no correct
757 remapping, in this case we have an ambiguity
758 (we know it is not equal (0) but LT or GT?).
759 Behavior is undefined.
760
761 \param idx - vactor element index
762 \param str - argument to compare with
763
764 \return 0 - equal, < 0 - vect[idx] < str, >0 otherwise
765 */
766 int compare(size_type idx, const value_type* str) const BMNOEXCEPT;
767
768 /**
769 \brief Compare two vector elements
770
771 The function does not account for NULL values, NULL element is treated as an empty string
772 \param idx1 - vactor element index 1
773 \param idx2 - vactor element index 2
774
775 \return 0 - equal, < 0 - vect[idx1] < vect[idx2], >0 otherwise
776 */
777 int compare(size_type idx1, size_type idx2) const BMNOEXCEPT;
778
779
780 /**
781 \brief Find size of common prefix between two vector elements in octets
782 \return size of common prefix
783 */
784 unsigned common_prefix_length(size_type idx1, size_type idx2) const BMNOEXCEPT;
785
786 /**
787 Variant of compare for remapped vectors. Caller MUST guarantee vector is remapped.
788 */
789 int compare_remap(size_type idx, const value_type* str) const BMNOEXCEPT;
790
791 /**
792 Variant of compare for non-mapped vectors. Caller MUST guarantee vector is not remapped.
793 */
794 int compare_nomap(size_type idx, const value_type* str) const BMNOEXCEPT;
795
796 ///@}
797
798
799 // ------------------------------------------------------------
800 /*! @name Clear */
801 ///@{
802
803 /*! \brief resize to zero, free memory */
804 void clear_all(bool free_mem) BMNOEXCEPT;
805
806 /*! \brief resize to zero, free memory */
807 void clear() BMNOEXCEPT { clear_all(true); }
808
809 /*!
810 \brief clear range (assign bit 0 for all planes)
811 \param left - interval start
812 \param right - interval end (closed interval)
813 \param set_null - set cleared values to unassigned (NULL)
814 */
816 clear_range(size_type left, size_type right, bool set_null = false)
817 {
819 return *this;
820 }
821
822
823 ///@}
824
825
826 // ------------------------------------------------------------
827 /*! @name Size, etc */
828 ///@{
829
830 /*! \brief return size of the vector
831 \return size of sparse vector
832 */
833 size_type size() const { return this->size_; }
834
835 /*! \brief return true if vector is empty
836 \return true if empty
837 */
838 bool empty() const { return (size() == 0); }
839
840 /*! \brief resize vector
841 \param sz - new size
842 */
843 void resize(size_type sz) { parent_type::resize(sz, true); }
844
845 /*! \brief get maximum string length capacity
846 \return maximum string length sparse vector can take
847 */
848 static size_type max_str() { return sv_octet_slices; }
849
850 /*! \brief get effective string length used in vector
851 Calculate and returns efficiency, how close are we
852 to the reserved maximum.
853 \return current string length maximum
854 */
856
857 /*! \brief get effective string length used in vector
858 \return current string length maximum
859 */
861
862 /**
863 \brief recalculate size to exclude tail NULL elements
864 After this call size() will return the true size of the vector
865 */
866 void sync_size() BMNOEXCEPT;
867
868 ///@}
869
870
871 // ------------------------------------------------------------
872 /*! @name Memory optimization/compression */
873 ///@{
874
875 /*!
876 \brief run memory optimization for all vector planes
877 \param temp_block - pre-allocated memory block to avoid unnecessary re-allocs
878 \param opt_mode - requested compression depth
879 \param stat - memory allocation statistics after optimization
880 */
881 void optimize(
882 bm::word_t* temp_block = 0,
883 typename bvector_type::optmode opt_mode = bvector_type::opt_compress,
884 typename str_sparse_vector<CharType, BV, STR_SIZE>::statistics* stat = 0);
885
886 /*!
887 @brief Calculates memory statistics.
888
889 Function fills statistics structure containing information about how
890 this vector uses memory and estimation of max. amount of memory
891 bvector needs to serialize itself.
892
893 @param st - pointer on statistics structure to be filled in.
894
895 @sa statistics
896 */
897 void calc_stat(
898 struct str_sparse_vector<CharType, BV, STR_SIZE>::statistics* st
899 ) const BMNOEXCEPT;
900
901 /**
902 @brief Turn sparse vector into immutable mode
903 Read-only (immutable) vector uses less memory and allows faster searches.
904 Before freezing it is recommenede to call optimize() to get full memory saving effect
905 @sa optimize, remap
906 */
907 void freeze() { this->freeze_matr(); }
908
909 /** Returns true if vector is read-only */
910 bool is_ro() const BMNOEXCEPT { return this->is_ro_; }
911
912 ///@}
913
914 // ------------------------------------------------------------
915 /*! @name Iterator access */
916 ///@{
917
918 /** Provide const iterator access to container content */
919 const_iterator begin() const BMNOEXCEPT;
920
921 /** Provide const iterator access to the end */
923
924 /** Get const_itertor re-positioned to specific element
925 @param idx - position in the sparse vector
926 */
928 { return const_iterator(this, idx); }
929
930 /** Provide back insert iterator
931 Back insert iterator implements buffered insertion, which is faster, than random access
932 or push_back
933 */
935 { return back_insert_iterator(this); }
936
937 ///@}
938
939
940
941 // ------------------------------------------------------------
942 /*! @name Various traits */
943 ///@{
944
945 /** \brief various type traits
946 */
947 static constexpr
948 bool is_compressed() BMNOEXCEPT { return false; }
949
950 static constexpr
951 bool is_str() BMNOEXCEPT { return true; }
952
953 ///@}
954
955 // ------------------------------------------------------------
956 /*! @name Char remapping, succinct utilities
957
958 Remapping runs character usage analysis (frequency analysis)
959 based on that implements reduction of dit-depth thus improves
960 search performance and memory usage (both RAM and serialized).
961
962 Remapping limits farther modifications of sparse vector.
963 (Use remapped vector as read-only).
964 */
965
966 ///@{
967
968 /**
969 Get character remapping status (true | false)
970 */
971 bool is_remap() const BMNOEXCEPT { return remap_flags_ != 0; }
972
973 /**
974 Build remapping profile and load content from another sparse vector
975 Remapped vector likely saves memory (both RAM and disk) but
976 should not be modified (should be read-only).
977
978 \param str_sv - source sparse vector (assumed it is not remapped)
979 \param omatrix - pointer to externall computed char freaquency matrix (optional)
980 \so remap, freeze
981 */
982 void remap_from(const str_sparse_vector& str_sv,
983 octet_freq_matrix_type* omatrix = 0);
984
985 /**
986 Build remapping profile and re-load content to save memory
987 */
988 void remap();
989
990 /*!
991 Calculate flags which octets are present on each byte-plane.
992 @internal
993 */
994 void calc_octet_stat(octet_freq_matrix_type& octet_matrix) const;
995
996 /*!
997 Compute optimal remap codes
998 @internal
999 */
1000 void build_octet_remap(
1001 slice_octet_matrix_type& octet_remap_matrix1,
1002 slice_octet_matrix_type& octet_remap_matrix2,
1003 octet_freq_matrix_type& octet_occupancy_matrix) const;
1004 /*!
1005 remap string from external (ASCII) system to matrix internal code
1006 @return true if remapping was ok, false if found incorrect value
1007 for the plane
1008 @internal
1009 */
1010 static
1011 bool remap_tosv(value_type* BMRESTRICT sv_str,
1012 size_type buf_size,
1013 const value_type* BMRESTRICT str,
1014 const slice_octet_matrix_type& BMRESTRICT octet_remap_matrix2
1015 ) BMNOEXCEPT;
1016
1017 /*!
1018 remap string from external (ASCII) system to matrix internal code
1019 @internal
1020 */
1022 size_type buf_size,
1023 const value_type* str) const BMNOEXCEPT
1024 {
1025 return remap_tosv(sv_str, buf_size, str, remap_matrix2_);
1026 }
1027 /*!
1028 remap string from internal code to external (ASCII) system
1029 @return true if remapping was ok, false if found incorrect value
1030 for the plane
1031 @internal
1032 */
1033 static
1034 bool remap_fromsv(
1036 size_type buf_size,
1037 const value_type* BMRESTRICT sv_str,
1038 const slice_octet_matrix_type& BMRESTRICT octet_remap_matrix1
1039 ) BMNOEXCEPT;
1040 /*!
1041 re-calculate remap matrix2 based on matrix1
1042 @internal
1043 */
1044 void recalc_remap_matrix2();
1045
1046 ///@}
1047
1048 // ------------------------------------------------------------
1049 /*! @name Export content to C-style */
1050 ///@{
1051
1052 /**
1053 \brief Bulk export strings to a C-style matrix of chars
1054
1055 \param cmatr - dest matrix (bm::heap_matrix)
1056 \param idx_from - index in the sparse vector to export from
1057 \param dec_size - decoding size (matrix column allocation should match)
1058 \param zero_mem - set to false if target array is pre-initialized
1059 with 0s to avoid performance penalty
1060
1061 \return number of actually exported elements (can be less than requested)
1062 */
1063 template<typename CharMatrix>
1064 size_type decode(CharMatrix& cmatr,
1065 size_type idx_from,
1066 size_type dec_size,
1067 bool zero_mem = true) const
1068 {
1069 size_type str_len = effective_max_str();
1070 return decode_substr(cmatr, idx_from, dec_size,
1071 0, unsigned(str_len-1), zero_mem);
1072 }
1073
1074 /**
1075 \brief Bulk export strings to a C-style matrix of chars
1076
1077 \param cmatr - dest matrix (bm::heap_matrix)
1078 \param idx_from - index in the sparse vector to export from
1079 \param dec_size - decoding size (matrix column allocation should match)
1080 \param substr_from - sub-string position from
1081 \param substr_to - sub-string position to
1082 \param zero_mem - set to false if target array is pre-initialized
1083 with 0s to avoid performance penalty
1084
1085 \return number of actually exported elements (can be less than requested)
1086 */
1087 template<typename CharMatrix>
1088 size_type decode_substr(CharMatrix& cmatr,
1089 size_type idx_from,
1090 size_type dec_size,
1091 unsigned substr_from,
1092 unsigned substr_to,
1093 bool zero_mem = true) const
1094 {
1095
1096 /// Decoder functor
1097 /// @internal
1098 ///
1099 struct sv_decode_visitor_func
1100 {
1101 sv_decode_visitor_func(CharMatrix& cmatr) BMNOEXCEPT2
1102 : cmatr_(cmatr)
1103 {}
1104
1105 int add_bits(size_type bv_offset,
1106 const unsigned char* BMRESTRICT bits,
1107 unsigned bits_size) BMNOEXCEPT
1108 {
1109 BM_ASSERT(bits_size);
1110
1111 // can be negative (-1) when bv base offset = 0 and sv = 1,2..
1112 size_type base = bv_offset - sv_off_;
1113 const unsigned_value_type m = mask_;
1114 const unsigned i = substr_i_;
1115 unsigned j = 0;
1116 do
1117 {
1118 size_type idx = bits[j] + base;
1119 value_type* BMRESTRICT str = cmatr_.row(idx);
1120 str[i] |= m;
1121 } while (++j < bits_size);
1122/*
1123 for (unsigned j = 0; j < bits_size; ++j)
1124 {
1125 size_type idx = bits[j] + base;
1126 value_type* BMRESTRICT str = cmatr_.row(idx);
1127 str[i] |= m;
1128 } // for j
1129*/
1130 return 0;
1131 }
1132
1133 int add_range(size_type bv_offset, size_type sz) BMNOEXCEPT
1134 {
1135 BM_ASSERT(sz);
1136
1137 auto base = bv_offset - sv_off_;
1138 const unsigned_value_type m = mask_;
1139 const unsigned i = substr_i_;
1140 size_type j = 0;
1141 do
1142 {
1143 size_type idx = j + base;
1144 value_type* BMRESTRICT str = cmatr_.row(idx);
1145 str[i] |= m;
1146 } while(++j < sz);
1147/*
1148 for (size_type j = 0; j < sz; ++j)
1149 {
1150 size_type idx = j + base;
1151 value_type* BMRESTRICT str = cmatr_.row(idx);
1152 str[i] |= m;
1153 } // for j
1154*/
1155 return 0;
1156 }
1157
1158 CharMatrix& cmatr_; ///< target array for reverse transpose
1159 unsigned_value_type mask_ = 0; ///< bit-plane mask
1160 unsigned substr_i_= 0; ///< i
1161 size_type sv_off_ = 0; ///< SV read offset
1162 };
1163
1164
1165 BM_ASSERT(substr_from <= substr_to);
1166 BM_ASSERT(cmatr.is_init());
1167
1168 if (zero_mem)
1169 cmatr.set_zero(); // TODO: set zero based on requested capacity
1170
1171 size_type rows = size_type(cmatr.rows());
1172 size_type max_sz = this->size() - idx_from;
1173 if (max_sz < dec_size)
1174 dec_size = max_sz;
1175 if (rows < dec_size)
1176 dec_size = rows;
1177 if (!dec_size)
1178 return dec_size;
1179
1180 sv_decode_visitor_func func(cmatr);
1181
1182 for (unsigned i = substr_from; i <= substr_to; ++i)
1183 {
1184 unsigned bi = 0;
1185 func.substr_i_ = i - substr_from; // to put substr at the str[0]
1186
1187 auto rsize = this->bmatr_.rows_not_null();
1188 for (unsigned k = i * 8; k < (i * 8) + 8; ++k, ++bi)
1189 {
1190 if (k >= rsize)
1191 goto break2;
1192 const bvector_type* bv = this->bmatr_.get_row(k);
1193 if (!bv)
1194 continue;
1195 BM_ASSERT (bv != this->get_null_bvector());
1196
1197 func.mask_ = unsigned_value_type(1u << bi);
1198 func.sv_off_ = idx_from;
1199
1200 size_type end = idx_from + dec_size;
1201 bm::for_each_bit_range_no_check(*bv, idx_from, end-1, func);
1202
1203 } // for k
1204 } // for i
1205 break2:
1206
1207 if (remap_flags_)
1208 {
1209 for (unsigned i = 0; i < dec_size; ++i)
1210 {
1211 typename CharMatrix::value_type* str = cmatr.row(i);
1212 remap_matrix1_.remapz(str);
1213 } // for i
1214 }
1215 return dec_size;
1216 }
1217
1218 /**
1219 \brief Bulk import of strings from a C-style matrix of chars
1220
1221 \param cmatr - source matrix (bm::heap_matrix)
1222 [in/out] parameter gets modified(corrupted)
1223 in the process
1224 \param idx_from - destination index in the sparse vector
1225 \param imp_size - import size (number or rows to import)
1226 */
1227 template<typename CharMatrix>
1228 void import(CharMatrix& cmatr, size_type idx_from, size_type imp_size)
1229 {
1230 if (!imp_size)
1231 return;
1232 if (idx_from < this->size_) // in case it touches existing elements
1233 {
1234 // clear all planes in the range to provide corrrect import of 0 values
1235 this->clear_range(idx_from, idx_from + imp_size - 1);
1236 }
1237 import_no_check(cmatr, idx_from, imp_size);
1238 }
1239
1240 /**
1241 \brief Bulk push-back import of strings from a C-style matrix of chars
1242
1243 \param cmatr - source matrix (bm::heap_matrix)
1244 [in/out] parameter gets modified(corrupted)
1245 in the process
1246 \param imp_size - import size (number or rows to import)
1247 */
1248 template<typename CharMatrix>
1249 void import_back(CharMatrix& cmatr, size_type imp_size)
1250 {
1251 if (!imp_size)
1252 return;
1253 import_no_check(cmatr, this->size(), imp_size);
1254 }
1255
1256
1257 ///@}
1258
1259 // ------------------------------------------------------------
1260 /*! @name Merge, split, partition data */
1261 ///@{
1262
1263 /**
1264 @brief copy range of values from another sparse vector
1265
1266 Copy [left..right] values from the source vector,
1267 clear everything outside the range.
1268
1269 \param sv - source vector
1270 \param left - index from in losed diapason of [left..right]
1271 \param right - index to in losed diapason of [left..right]
1272 \param slice_null - "use_null" copy range for NULL vector or
1273 do not copy it
1274 */
1276 size_type left, size_type right,
1277 bm::null_support slice_null = bm::use_null);
1278
1279 /**
1280 \brief merge with another sparse vector using OR operation
1281 Merge is different from join(), because it borrows data from the source
1282 vector, so it gets modified (destructive join)
1283
1284 \param tr_sv - [in, out]argument vector to join with (vector mutates)
1285
1286 \return self reference
1287 */
1290
1291 /**
1292 Keep only specified interval in the sparse vector, clear all other
1293 elements.
1294
1295 \param left - interval start
1296 \param right - interval end (closed interval)
1297 \param slice_null - "use_null" copy range for NULL vector or not
1298 */
1299 void keep_range(size_type left, size_type right,
1300 bm::null_support slice_null = bm::use_null);
1301
1302 ///@}
1303
1304 // ------------------------------------------------------------
1305
1306 /*! \brief syncronize internal structures */
1307 void sync(bool force);
1308
1309 /*!
1310 \brief check if another sparse vector has the same content and size
1311
1312 \param sv - sparse vector for comparison
1313 \param null_able - flag to consider NULL vector in comparison (default)
1314 or compare only value content planes
1315
1316 \return true, if it is the same
1317 */
1319 bm::null_support null_able = bm::use_null) const BMNOEXCEPT;
1320
1321 /**
1322 \brief find position of compressed element by its rank
1323 */
1324 static
1325 bool find_rank(size_type rank, size_type& pos) BMNOEXCEPT;
1326
1327 /**
1328 \brief size of sparse vector (may be different for RSC)
1329 */
1331
1332protected:
1334 {
1335 ins_buf_size = bm::gap_max_bits // 1024 * 8
1337
1338 /// @internal
1339 template<typename CharMatrix, size_t BufSize = ins_buf_size>
1340 void import_no_check(CharMatrix& cmatr,
1341 size_type idx_from, size_type imp_size,
1342 bool set_not_null = true)
1343 {
1344 BM_ASSERT (cmatr.is_init());
1345
1346 unsigned max_str_size = 0;
1347 {
1348 for (unsigned j = 0; j < imp_size; ++j)
1349 {
1350 typename CharMatrix::value_type* str = cmatr.row(j);
1351 typename CharMatrix::size_type i;
1352 typename CharMatrix::size_type cols = cmatr.cols();
1353 for (i = 0; i < cols; ++i)
1354 {
1355 value_type ch = str[i];
1356 if (!ch)
1357 {
1358 max_str_size =
1359 (unsigned)((i > max_str_size) ? i : max_str_size);
1360 break;
1361 }
1362 if (remap_flags_) // re-mapping is in effect
1363 {
1364 unsigned char remap_value =
1365 remap_matrix2_.get(i, (unsigned char)(ch));
1366 BM_ASSERT(remap_value); // unknown ?!
1367 /*
1368 if (!remap_value) // unknown dictionary element
1369 throw_bad_value(0); */
1370 str[i] = CharType(remap_value);
1371 }
1372 } // for i
1373 } // for j
1374 }
1375
1376 this->bmatr_.allocate_rows((1+max_str_size) * 8 + this->is_nullable());
1377
1378 unsigned_value_type ch_slice[BufSize];
1379 for (unsigned i = 0; i < max_str_size; ++i)
1380 {
1381 unsigned ch_acc = 0;
1382#if defined(BMVECTOPT) || defined(BM_USE_GCC_BUILD)
1383 if (imp_size == ins_buf_size) /// full buffer import can use loop unrolling
1384 {
1385 for (size_type j = 0; j < imp_size; j+=4)
1386 {
1387 unsigned_value_type ch0 = (unsigned_value_type)cmatr.row(j)[i];
1388 unsigned_value_type ch1 = (unsigned_value_type)cmatr.row(j+1)[i];
1389 unsigned_value_type ch2 = (unsigned_value_type)cmatr.row(j+2)[i];
1390 unsigned_value_type ch3 = (unsigned_value_type)cmatr.row(j+3)[i];
1391
1392 ch_acc |= ch0 | ch1 | ch2 | ch3;
1393 ch_slice[j] = ch0; ch_slice[j+1] = ch1;
1394 ch_slice[j+2] = ch2; ch_slice[j+3] = ch3;
1395 }
1396 }
1397 else
1398#endif
1399 {
1400 for (size_type j = 0; j < imp_size; ++j)
1401 {
1402 unsigned_value_type ch = (unsigned_value_type)cmatr.row(j)[i];
1403 ch_acc |= ch;
1404 ch_slice[j] = ch;
1405 }
1406 }
1407 import_char_slice(ch_slice, ch_acc, i, idx_from, imp_size);
1408 }
1409
1410 size_type idx_to = idx_from + imp_size - 1;
1411 if (set_not_null)
1412 {
1413 if (bvector_type* bv_null = this->get_null_bvect())
1414 bv_null->set_range(idx_from, idx_to);
1415 }
1416 if (idx_to >= this->size())
1417 this->size_ = idx_to+1;
1418
1419 }
1420#ifdef _MSC_VER
1421#pragma warning( push )
1422#pragma warning( disable : 4146 )
1423#endif
1424 /// @internal
1425 template<size_t BufSize = ins_buf_size>
1427 unsigned ch_acc,
1428 size_type char_slice_idx,
1429 size_type idx_from, size_type imp_size)
1430 {
1431 size_type bit_list[BufSize];
1432 for ( ;ch_acc; ch_acc &= ch_acc - 1) // bit-scan
1433 {
1434 unsigned n_bits = 0;
1435 const unsigned bi = (bm::word_bitcount((ch_acc & -ch_acc) - 1));
1436 unsigned mask = 1u << bi;
1437#if defined(BMVECTOPT) || defined(BM_USE_GCC_BUILD)
1438 if (imp_size == ins_buf_size) /// full buffer import can use loop unrolling
1439 {
1440 mask |= (mask << 8) | (mask << 16) | (mask << 24);
1441 for (size_type j = 0; j < imp_size; j+=4)
1442 {
1443 unsigned ch0 = ((unsigned)ch_slice[j+0]) |
1444 ((unsigned)ch_slice[j+1] << 8) |
1445 ((unsigned)ch_slice[j+2] << 16) |
1446 ((unsigned)ch_slice[j+3] << 24);
1447 ch0 &= mask;
1448 ch0 = (ch0 >> bi) | (ch0 >> (bi+7)) |
1449 (ch0 >> (bi+14)) | (ch0 >> (bi+21));
1450 ch0 &= 15u;
1451 BM_ASSERT(bm::word_bitcount(ch0) <= 4);
1452 for (size_type base_idx = idx_from + j ;ch0; ch0 &= ch0 - 1) // bit-scan
1453 {
1454 const unsigned bit_idx =
1455 (bm::word_bitcount((ch0 & -ch0) - 1));
1456 bit_list[n_bits++] = base_idx + bit_idx;
1457 } // for ch0
1458 } // for j
1459 }
1460 else
1461#endif
1462 {
1463 for (size_type j = 0; j < imp_size; ++j)
1464 {
1465 unsigned ch = unsigned(ch_slice[j]);
1466 if (ch & mask)
1467 bit_list[n_bits++] = idx_from + j;
1468 } // for j
1469 }
1470
1471 if (n_bits) // set transposed bits to the target plane
1472 {
1473 bvector_type* bv =
1474 this->get_create_slice((unsigned)(char_slice_idx * 8) + bi);
1475 bv->import_sorted(&bit_list[0], n_bits, false);
1476 }
1477 } // for ch_acc
1478 }
1479#ifdef _MSC_VER
1480#pragma warning( pop )
1481#endif
1482 // ------------------------------------------------------------
1483 /*! @name Errors and exceptions */
1484 ///@{
1485
1486 /**
1487 \brief throw range error
1488 \internal
1489 */
1490 static
1491 void throw_range_error(const char* err_msg);
1492
1493 /**
1494 \brief throw domain error
1495 \internal
1496 */
1497 static
1498 void throw_bad_value(const char* err_msg);
1499
1500 ///@}
1501
1502 /*! \brief set value without checking boundaries */
1503 void set_value(size_type idx, const value_type* str);
1504
1505 /*! \brief set value without checking boundaries or support of NULL */
1506 void set_value_no_null(size_type idx, const value_type* str);
1507
1508 /*! \brief insert value without checking boundaries */
1509 void insert_value(size_type idx, const value_type* str);
1510
1511 /*! \brief insert value without checking boundaries or support of NULL */
1512 void insert_value_no_null(size_type idx, const value_type* str);
1513
1514
1515 size_type size_internal() const { return size(); }
1517
1518 size_t remap_size() const { return remap_matrix1_.get_buffer().size(); }
1519 const unsigned char* get_remap_buffer() const
1520 { return remap_matrix1_.get_buffer().buf(); }
1521 unsigned char* init_remap_buffer()
1522 {
1523 remap_matrix1_.init(true);
1524 return remap_matrix1_.get_buffer().data();
1525 }
1526 void set_remap() { remap_flags_ = 1; }
1527
1528protected:
1529
1531 size_type* idx_from, size_type* idx_to) const
1532 {
1533 *idx_from = from; *idx_to = to; return true;
1534 }
1535
1537 { return &remap_matrix1_; }
1539 { return &remap_matrix1_; }
1540
1541 /**
1542 reamp using statistics table from inserter
1543 */
1544 void remap(back_insert_iterator& iit);
1545
1546 /**
1547 Remap from implementation, please note that move_data flag can violate cosnt-ness
1548 */
1549 void remap_from_impl(const str_sparse_vector& str_sv,
1550 octet_freq_matrix_type* omatrix,
1551 bool move_data);
1552
1553protected:
1554 template<class SVect> friend class sparse_vector_serializer;
1555 template<class SVect> friend class sparse_vector_deserializer;
1556
1557protected:
1558 unsigned remap_flags_; ///< remapping status
1559 slice_octet_matrix_type remap_matrix1_; ///< octet remap table 1
1560 slice_octet_matrix_type remap_matrix2_; ///< octet remap table 2
1561};
1562
1563//---------------------------------------------------------------------
1564//---------------------------------------------------------------------
1565
1566
1567template<class CharType, class BV, unsigned STR_SIZE>
1569 bm::null_support null_able,
1571 size_type bv_max_size,
1572 const allocator_type& alloc)
1573: parent_type(null_able, ap, bv_max_size, alloc),
1574 remap_flags_(0)
1575{
1576 static_assert(STR_SIZE > 1,
1577 "BM:: String vector size must be > 1 (to accomodate 0 terminator)");
1578}
1579
1580
1581//---------------------------------------------------------------------
1582
1583template<class CharType, class BV, unsigned STR_SIZE>
1585 const str_sparse_vector& str_sv)
1586: parent_type(str_sv),
1587 remap_flags_(str_sv.remap_flags_),
1588 remap_matrix1_(str_sv.remap_matrix1_),
1589 remap_matrix2_(str_sv.remap_matrix2_)
1590{
1591 static_assert(STR_SIZE > 1,
1592 "BM:: String vector size must be > 1 (to accomodate 0 terminator)");
1593}
1594
1595//---------------------------------------------------------------------
1596
1597template<class CharType, class BV, unsigned STR_SIZE>
1599 const str_sparse_vector& str_sv, bm::remap_setup remap_mode)
1600: parent_type(str_sv.get_null_support()),
1601 remap_flags_(str_sv.remap_flags_),
1602 remap_matrix1_(str_sv.remap_matrix1_),
1603 remap_matrix2_(str_sv.remap_matrix2_)
1604{
1605 BM_ASSERT(str_sv.remap_flags_); // source vector should be remapped
1607 static_assert(STR_SIZE > 1,
1608 "BM:: String vector size must be > 1 (to accomodate 0 terminator)");
1609 (void) remap_mode;
1610}
1611
1612//---------------------------------------------------------------------
1613
1614template<class CharType, class BV, unsigned STR_SIZE>
1617{
1618 parent_type::swap(str_sv);
1619 bm::xor_swap(remap_flags_, str_sv.remap_flags_);
1620 remap_matrix1_.swap(str_sv.remap_matrix1_);
1621 remap_matrix2_.swap(str_sv.remap_matrix2_);
1622}
1623
1624//---------------------------------------------------------------------
1625
1626template<class CharType, class BV, unsigned STR_SIZE>
1628 size_type idx, const value_type* str)
1629{
1630 if (idx >= this->size())
1631 this->size_ = idx+1;
1632 set_value(idx, str);
1633}
1634
1635//---------------------------------------------------------------------
1636
1637template<class CharType, class BV, unsigned STR_SIZE>
1639 size_type idx, const value_type* str)
1640{
1641 if (idx >= this->size())
1642 {
1643 this->size_ = idx+1;
1644 set_value(idx, str);
1645 return;
1646 }
1647 insert_value(idx, str);
1648 this->size_++;
1649}
1650
1651//---------------------------------------------------------------------
1652
1653template<class CharType, class BV, unsigned STR_SIZE>
1655{
1656 BM_ASSERT(idx < this->size_);
1657 if (idx >= this->size_)
1658 return;
1659 this->erase_column(idx, true);
1660 this->size_--;
1661}
1662
1663//---------------------------------------------------------------------
1664
1665template<class CharType, class BV, unsigned STR_SIZE>
1667{
1668 if (idx >= this->size_)
1669 this->size_ = idx + 1; // assumed nothing todo outside current size
1670 else
1671 this->bmatr_.clear_column(idx, 0);
1672}
1673//---------------------------------------------------------------------
1674
1675template<class CharType, class BV, unsigned STR_SIZE>
1677{
1678 BM_ASSERT(count);
1679 BM_ASSERT(bm::id_max - count > this->size_);
1680 BM_ASSERT(this->is_nullable());
1681
1682 this->size_ += count;
1683}
1684
1685//---------------------------------------------------------------------
1686
1687template<class CharType, class BV, unsigned STR_SIZE>
1689 size_type idx, const value_type* str)
1690{
1691 set_value_no_null(idx, str);
1692 if (bvector_type* bv_null = this->get_null_bvect())
1693 bv_null->set_bit_no_check(idx);
1694}
1695
1696//---------------------------------------------------------------------
1697
1698template<class CharType, class BV, unsigned STR_SIZE>
1700 size_type idx, const value_type* str)
1701{
1702 for (unsigned i = 0; true; ++i)
1703 {
1704 CharType ch = str[i];
1705 if (!ch)
1706 {
1707 this->clear_value_planes_from(i*8, idx);
1708 return;
1709 }
1710 if (remap_flags_) // compressional re-mapping is in effect
1711 {
1712 auto r = remap_matrix2_.rows();
1713 if (i >= r)
1714 {
1715 remap_matrix1_.resize(i + 1, remap_matrix1_.cols(), true);
1716 remap_matrix2_.resize(i + 1, remap_matrix2_.cols(), true);
1717 }
1718 unsigned char remap_value = remap_matrix2_.get(i, unsigned(ch));
1719 BM_ASSERT(remap_value);
1720 if (!remap_value) // unknown dictionary element
1721 {
1722 this->clear_value_planes_from(i*8, idx);
1723 return;
1724 }
1725 ch = CharType(remap_value);
1726 }
1727 this->bmatr_.set_octet(idx, i, (unsigned char)ch);
1728 } // for i
1729}
1730
1731//---------------------------------------------------------------------
1732
1733template<class CharType, class BV, unsigned STR_SIZE>
1735 size_type idx, const value_type* str)
1736{
1737 insert_value_no_null(idx, str);
1738 this->insert_null(idx, true);
1739}
1740
1741//---------------------------------------------------------------------
1742
1743template<class CharType, class BV, unsigned STR_SIZE>
1745 size_type idx, const value_type* str)
1746{
1747 for (unsigned i = 0; true; ++i)
1748 {
1749 CharType ch = str[i];
1750 if (!ch)
1751 {
1752 this->insert_clear_value_planes_from(i*8, idx);
1753 return;
1754 }
1755
1756 if (remap_flags_) // compressional re-mapping is in effect
1757 {
1758 unsigned char remap_value = remap_matrix2_.get(i, unsigned(ch));
1759 BM_ASSERT(remap_value);
1760 if (!remap_value) // unknown dictionary element
1761 {
1762 this->insert_clear_value_planes_from(i*8, idx);
1763 return;
1764 }
1765 ch = CharType(remap_value);
1766 }
1767 this->bmatr_.insert_octet(idx, i, (unsigned char)ch);
1768 } // for i
1769}
1770
1771
1772//---------------------------------------------------------------------
1773
1774template<class CharType, class BV, unsigned STR_SIZE>
1777 size_type idx, value_type* str, size_type buf_size) const BMNOEXCEPT
1778{
1779 size_type i = 0;
1780 for (; true; ++i)
1781 {
1782 if (i >= buf_size)
1783 break;
1784 CharType ch = CharType(this->bmatr_.get_octet(idx, i));
1785 str[i] = ch;
1786 if (!ch)
1787 break;
1788 }
1789 if (remap_flags_)
1790 remap_matrix1_.remap(str, i);
1791 return i;
1792}
1793
1794//---------------------------------------------------------------------
1795
1796template<class CharType, class BV, unsigned STR_SIZE>
1798 bm::word_t* temp_block,
1799 typename bvector_type::optmode opt_mode,
1801{
1802 typename bvector_type::statistics stbv;
1803 parent_type::optimize(temp_block, opt_mode, &stbv);
1804
1805 if (st)
1806 st->add(stbv);
1807}
1808
1809//---------------------------------------------------------------------
1810
1811template<class CharType, class BV, unsigned STR_SIZE>
1814 ) const BMNOEXCEPT
1815{
1816 BM_ASSERT(st);
1817 typename bvector_type::statistics stbv;
1818 parent_type::calc_stat(&stbv);
1819
1820 st->reset();
1821
1822 st->bit_blocks += stbv.bit_blocks;
1823 st->gap_blocks += stbv.gap_blocks;
1824 st->ptr_sub_blocks += stbv.ptr_sub_blocks;
1825 st->bv_count += stbv.bv_count;
1826 st->max_serialize_mem += stbv.max_serialize_mem + 8;
1827 st->memory_used += stbv.memory_used;
1828 st->gap_cap_overhead += stbv.gap_cap_overhead;
1829
1830 size_t remap_mem_usage = sizeof(remap_flags_);
1831 remap_mem_usage += remap_matrix1_.get_buffer().mem_usage();
1832 remap_mem_usage += remap_matrix2_.get_buffer().mem_usage();
1833
1834 st->memory_used += remap_mem_usage;
1835 if (remap_flags_) // use of remapping requires some extra storage
1836 {
1837 st->max_serialize_mem += (remap_mem_usage * 2);
1838 }
1839}
1840
1841//---------------------------------------------------------------------
1842
1843template<class CharType, class BV, unsigned STR_SIZE>
1845 size_type idx, const value_type* str) const BMNOEXCEPT
1846{
1847 BM_ASSERT(str);
1848 BM_ASSERT(is_remap()); // MUST guarantee remapping
1849
1850 int res = 0;
1851 for (unsigned i = 0; true; ++i)
1852 {
1853 CharType octet2 = str[i];
1854 CharType octet1 = (CharType)this->bmatr_.get_octet(idx, i);
1855 if (!octet1)
1856 {
1857 res = -octet2; // -1 || 0
1858 break;
1859 }
1860 const unsigned char* remap_row = remap_matrix1_.row(i);
1861 unsigned char remap_value1 = remap_row[unsigned(octet1)];
1862 BM_ASSERT(remap_value1);
1863 res = (remap_value1 > octet2) - (remap_value1 < octet2);
1864 if (res || !octet2)
1865 break;
1866 } // for i
1867 return res;
1868}
1869
1870//---------------------------------------------------------------------
1871
1872template<class CharType, class BV, unsigned STR_SIZE>
1874 const value_type* str) const BMNOEXCEPT
1875{
1876 BM_ASSERT(str);
1877 BM_ASSERT(!is_remap()); // MUST guarantee remapping
1878
1879 int res = 0;
1880 for (unsigned i = 0; true; ++i)
1881 {
1882 CharType octet2 = str[i];
1883 CharType octet1 = (CharType)this->bmatr_.get_octet(idx, i);
1884 if (!octet1)
1885 {
1886 res = -octet2; // -1 || 0
1887 break;
1888 }
1889 res = (octet1 > octet2) - (octet1 < octet2);
1890 if (res || !octet2)
1891 break;
1892 } // for i
1893 return res;
1894}
1895
1896
1897//---------------------------------------------------------------------
1898
1899template<class CharType, class BV, unsigned STR_SIZE>
1901 size_type idx,
1902 const value_type* str) const BMNOEXCEPT
1903{
1904 BM_ASSERT(str);
1905 int res = remap_flags_ ? compare_remap(idx, str)
1906 : compare_nomap(idx, str);
1907 return res;
1908}
1909
1910//---------------------------------------------------------------------
1911
1912template<class CharType, class BV, unsigned STR_SIZE>
1914 size_type idx1,
1915 size_type idx2) const BMNOEXCEPT
1916{
1917 int res = 0;
1918 if (idx1 == idx2)
1919 return 0;
1920 if (remap_flags_)
1921 {
1922 for (unsigned i = 0; true; ++i)
1923 {
1924 // TODO: implement function to return two octets at once
1925 CharType octet2 = (CharType)this->bmatr_.get_octet(idx2, i);
1926 CharType octet1 = (CharType)this->bmatr_.get_octet(idx1, i);
1927 if (!octet1)
1928 {
1929 res = -octet2; // -1 || 0
1930 break;
1931 }
1932 const unsigned char* remap_row = remap_matrix1_.row(i);
1933 unsigned char remap_value1 = remap_row[unsigned(octet1)];
1934 BM_ASSERT(remap_value1);
1935 unsigned char remap_value2 = remap_row[unsigned(octet2)];
1936 BM_ASSERT(remap_value2);
1937 res = (remap_value1 > remap_value2) - (remap_value1 < remap_value2);
1938 if (res || !octet2)
1939 break;
1940 } // for i
1941 }
1942 else
1943 {
1944 for (unsigned i = 0; true; ++i)
1945 {
1946 CharType octet2 = (CharType)this->bmatr_.get_octet(idx2, i);
1947 CharType octet1 = (CharType)this->bmatr_.get_octet(idx1, i);
1948 if (!octet1)
1949 {
1950 res = -octet2; // -1 || 0
1951 break;
1952 }
1953 res = (octet1 > octet2) - (octet1 < octet2);
1954 if (res || !octet2)
1955 break;
1956 } // for i
1957 }
1958 return res;
1959
1960}
1961
1962//---------------------------------------------------------------------
1963
1964template<class CharType, class BV, unsigned STR_SIZE>
1966 size_type idx1, size_type idx2) const BMNOEXCEPT
1967{
1968 unsigned i = 0;
1969 for (; true; ++i)
1970 {
1971 CharType ch1 = CharType(this->bmatr_.get_octet(idx1, i));
1972 CharType ch2 = CharType(this->bmatr_.get_octet(idx2, i));
1973 if (!ch1 || !ch2)
1974 {
1975 if (i)
1976 --i;
1977 break;
1978 }
1979 if (ch1 != ch2)
1980 break;
1981 } // for
1982
1983 return i;
1984}
1985
1986//---------------------------------------------------------------------
1987
1988template<class CharType, class BV, unsigned STR_SIZE>
1989bool
1991 size_type rank,
1992 size_type& pos) BMNOEXCEPT
1993{
1994 BM_ASSERT(rank);
1995 pos = rank - 1;
1996 return true;
1997}
1998
1999//---------------------------------------------------------------------
2000
2001template<class CharType, class BV, unsigned STR_SIZE>
2004 const BMNOEXCEPT
2005{
2006 return this->bmatr_.octet_size();
2007}
2008
2009//---------------------------------------------------------------------
2010
2011template<class CharType, class BV, unsigned STR_SIZE>
2013 octet_freq_matrix_type& octet_matrix) const
2014{
2015 size_type max_str_len = effective_max_str();
2016 octet_matrix.resize(max_str_len, 256, false);
2017 octet_matrix.set_zero(); //init(true);
2018 {
2019 const_iterator it(this);
2020 for(; it.valid(); ++it)
2021 {
2022 const value_type* s = *it; // get asciiz char*
2023 if(!s)
2024 continue;
2025 for (unsigned i = 0; true; ++i) // for each char in str
2026 {
2027 value_type ch = s[i];
2028 if (!ch)
2029 break;
2030 typename
2031 octet_freq_matrix_type::value_type* row = octet_matrix.row(i);
2032 unsigned ch_idx = (unsigned char)ch;
2033 row[ch_idx] += 1;
2034 } // for i
2035 } // for it
2036 }
2037}
2038
2039//---------------------------------------------------------------------
2040
2041template<class CharType, class BV, unsigned STR_SIZE>
2043 slice_octet_matrix_type& octet_remap_matrix1,
2044 slice_octet_matrix_type& octet_remap_matrix2,
2045 octet_freq_matrix_type& octet_occupancy_matrix) const
2046{
2047 size_type max_str_len = effective_max_str();
2048 octet_remap_matrix1.resize(max_str_len, 256, false);
2049 octet_remap_matrix1.set_zero();
2050 octet_remap_matrix2.resize(max_str_len, 256, false);
2051 octet_remap_matrix2.set_zero();
2052
2053 for (unsigned i = 0; i < octet_occupancy_matrix.rows(); ++i)
2054 {
2055 typename octet_freq_matrix_type::value_type* frq_row =
2056 octet_occupancy_matrix.row(i);
2057
2058 unsigned char* remap_row1 = octet_remap_matrix1.row(i);
2059 unsigned char* remap_row2 = octet_remap_matrix2.row(i);
2060
2061 const typename slice_octet_matrix_type::size_type row_size =
2062 octet_occupancy_matrix.cols();
2063 for (unsigned remap_code = 1; true; ++remap_code)
2064 {
2065 typename octet_freq_matrix_type::size_type char_idx;
2066 bool found = bm::find_max_nz(frq_row, row_size, &char_idx);
2067 #if 0
2068 bool found = bm::find_first_nz(frq_row, row_size, &char_idx);
2069 #endif
2070 if (!found)
2071 break;
2072 BM_ASSERT(char_idx);
2073 unsigned char ch = (unsigned char)char_idx;
2074 remap_row1[remap_code] = ch;
2075 remap_row2[ch] = (unsigned char)remap_code;
2076 frq_row[char_idx] = 0; // clear the picked char
2077 } // for
2078 } // for i
2079}
2080
2081//---------------------------------------------------------------------
2082
2083template<class CharType, class BV, unsigned STR_SIZE>
2085{
2086 BM_ASSERT(remap_flags_);
2087
2088 auto rows = remap_matrix1_.rows();
2089 remap_matrix2_.resize(rows, remap_matrix1_.cols(), false);
2090 if (rows)
2091 {
2092 remap_matrix2_.set_zero();
2093 //remap_matrix2_.init(true);
2094 for (unsigned i = 0; i < remap_matrix1_.rows(); ++i)
2095 {
2096 const unsigned char* remap_row1 = remap_matrix1_.row(i);
2097 unsigned char* remap_row2 = remap_matrix2_.row(i);
2098 for (unsigned j = 1; j < remap_matrix1_.cols(); ++j)
2099 {
2100 if (remap_row1[j])
2101 {
2102 unsigned ch_code = remap_row1[j];
2103 remap_row2[ch_code] = (unsigned char)j;
2104 BM_ASSERT(ch_code < 256);
2105 }
2106 } // for j
2107 } // for i
2108 } // if rows
2109}
2110
2111//---------------------------------------------------------------------
2112
2113template<class CharType, class BV, unsigned STR_SIZE>
2115 value_type* BMRESTRICT sv_str,
2116 size_type buf_size,
2117 const value_type* BMRESTRICT str,
2118 const slice_octet_matrix_type& BMRESTRICT octet_remap_matrix2) BMNOEXCEPT
2119{
2120 for (unsigned i = 0; i < buf_size; ++i)
2121 {
2122 CharType ch = str[i];
2123 if (!ch)
2124 {
2125 sv_str[i] = ch;
2126 break;
2127 }
2128 const unsigned char* remap_row = octet_remap_matrix2.row(i);
2129 unsigned char remap_value = remap_row[unsigned(ch)];
2130 if (!remap_value) // unknown dictionary element
2131 return false;
2132 sv_str[i] = CharType(remap_value);
2133 } // for i
2134 return true;
2135}
2136
2137//---------------------------------------------------------------------
2138
2139template<class CharType, class BV, unsigned MAX_STR_SIZE>
2142 size_type buf_size,
2143 const value_type* BMRESTRICT sv_str,
2144 const slice_octet_matrix_type& BMRESTRICT octet_remap_matrix1
2145 ) BMNOEXCEPT
2146{
2147 for (unsigned i = 0; i < buf_size; ++i)
2148 {
2149 CharType ch = sv_str[i];
2150 if (!ch)
2151 {
2152 str[i] = ch;
2153 break;
2154 }
2155 const unsigned char* remap_row = octet_remap_matrix1.row(i);
2156 unsigned char remap_value = remap_row[unsigned(ch)];
2157 if (!remap_value) // unknown dictionary element
2158 return false;
2159 str[i] = CharType(remap_value);
2160 } // for i
2161 return true;
2162}
2163
2164//---------------------------------------------------------------------
2165
2166template<class CharType, class BV, unsigned MAX_STR_SIZE>
2168{
2170 sv_tmp(this->get_null_support());
2171 sv_tmp.remap_from_impl(*this, 0, true /*move data*/);
2172 sv_tmp.swap(*this);
2173}
2174
2175//---------------------------------------------------------------------
2176
2177template<class CharType, class BV, unsigned MAX_STR_SIZE>
2180{
2181 if (iit.remap_flags_ && iit.omatrix_.rows())
2182 {
2184 sv_tmp(this->get_null_support());
2185 sv_tmp.remap_from_impl(*this, &iit.omatrix_, true /*move data*/);
2186 sv_tmp.swap(*this);
2187 }
2188 else
2189 remap();
2190}
2191
2192//---------------------------------------------------------------------
2193
2194template<class CharType, class BV, unsigned STR_SIZE>
2195void
2197 const str_sparse_vector& str_sv,
2198 octet_freq_matrix_type* omatrix)
2199{
2200 remap_from_impl(str_sv, omatrix, false);
2201}
2202
2203//---------------------------------------------------------------------
2204
2205template<class CharType, class BV, unsigned STR_SIZE>
2206void
2208 const str_sparse_vector& str_sv,
2209 octet_freq_matrix_type* omatrix,
2210 bool move_data)
2211{
2212 const unsigned buffer_size = ins_buf_size; // bm::gap_max_bits; // 65536;
2213
2214 if (str_sv.is_remap())
2215 {
2216 *this = str_sv;
2217 return;
2218 }
2219
2221 typename
2222 bm::alloc_pool_guard<typename bvector_type::allocator_pool_type, str_sparse_vector> g1, g2;
2223 if (move_data)
2224 {
2225 str_sparse_vector& sv = const_cast<str_sparse_vector&>(str_sv);
2226 g1.assign_if_not_set(pool, *this);
2227 g2.assign_if_not_set(pool, sv);
2228
2229 auto r = sv.get_bmatrix().rows();
2230 pool.set_block_limit(r + 10);
2231 }
2232
2233 this->clear_all(true);
2234 if (str_sv.empty()) // no content to remap
2235 return;
2236
2237 octet_freq_matrix_type occ_matrix; // occupancy map
2238 if (!omatrix)
2239 {
2240 str_sv.calc_octet_stat(occ_matrix);
2241 omatrix = &occ_matrix;
2242 }
2243 str_sv.build_octet_remap(remap_matrix1_, remap_matrix2_, *omatrix);
2244 remap_flags_ = 1; // turn ON remapped mode
2245
2246 typedef bm::dynamic_heap_matrix<CharType, allocator_type> buffer_matrix_type;
2247
2248 size_type str_len = str_sv.effective_max_str()+1;
2249 buffer_matrix_type cmatr(buffer_size, str_len);
2250 cmatr.init(true); // init and set zero
2251
2252 for (size_type i{0}, dsize; true; i += dsize)
2253 {
2254 dsize = str_sv.decode(cmatr, i, buffer_size, true);
2255 if (!dsize)
2256 break;
2257 if (move_data && (dsize == ins_buf_size)) // free the src.vect blocks
2258 {
2259 // here const_cast is OK, because we violate cosnt-ness only
2260 // in internal safe cases controlled by the upper level call
2261 //
2262 str_sparse_vector& sv = const_cast<str_sparse_vector&>(str_sv);
2263 sv.clear_range(i, i+dsize-1, false);
2264 }
2265
2266 this->import(cmatr, i, dsize);
2267 } // for i
2268
2269 if (bvector_type* bv_null = this->get_null_bvect())
2270 {
2271 if (const bvector_type* bv_null_arg = str_sv.get_null_bvector())
2272 if (move_data)
2273 {
2274 bvector_type* bv = const_cast<bvector_type*>(bv_null_arg);
2275 bv_null->swap(*bv);
2276 }
2277 else
2278 *bv_null = *bv_null_arg;
2279 else
2280 {
2281 // TODO: exception? assert? maybe it is OK...
2282 }
2283 }
2284
2285}
2286
2287//---------------------------------------------------------------------
2288
2289template<class CharType, class BV, unsigned STR_SIZE>
2291{
2292 if (remap_flags_)
2293 recalc_remap_matrix2();
2294}
2295
2296//---------------------------------------------------------------------
2297
2298template<class CharType, class BV, unsigned STR_SIZE>
2301 bm::null_support null_able) const BMNOEXCEPT
2302{
2303 // at this point both vectors should have the same remap settings
2304 // to be considered "equal".
2305 // Strictly speaking this is incorrect, because re-map and non-remap
2306 // vectors may have the same content
2307
2308 if (remap_flags_ != sv.remap_flags_)
2309 return false;
2310 if (remap_flags_)
2311 {
2312 // TODO: equal matrix dimention overlap may be not enough
2313 // (check the non-overlap to be zero)
2314 // dimentionality shrink is a result of de-serialization
2315 bool b;
2316 b = remap_matrix1_.equal_overlap(sv.remap_matrix1_);
2317 if (!b)
2318 return b;
2319 b = remap_matrix2_.equal_overlap(sv.remap_matrix2_);
2320 if (!b)
2321 return b;
2322 }
2323 return parent_type::equal(sv, null_able);
2324}
2325
2326//---------------------------------------------------------------------
2327
2328template<class CharType, class BV, unsigned STR_SIZE>
2331 size_type left, size_type right,
2332 bm::null_support slice_null)
2333{
2334 if (left > right)
2335 bm::xor_swap(left, right);
2336 this->clear_all(true);
2337
2338 remap_flags_ = sv.remap_flags_;
2339 remap_matrix1_ = sv.remap_matrix1_;
2340 remap_matrix2_ = sv.remap_matrix2_;
2341
2342 this->copy_range_slices(sv, left, right, slice_null);
2343 this->resize(sv.size());
2344}
2345
2346//---------------------------------------------------------------------
2347
2348template<class CharType, class BV, unsigned STR_SIZE>
2352{
2353 size_type arg_size = str_sv.size();
2354 if (this->size_ < arg_size)
2355 resize(arg_size);
2356
2357 // there is an assumption here that we only need to copy remap flags once
2358 // because we merge matrices with the same remaps
2359 // otherwise - undefined behavior
2360 //
2361 if (remap_flags_ != str_sv.remap_flags_)
2362 {
2363 remap_flags_ = str_sv.remap_flags_;
2364 remap_matrix1_ = str_sv.remap_matrix1_;
2365 remap_matrix2_ = str_sv.remap_matrix2_;
2366 }
2367 bvector_type* bv_null = this->get_null_bvect();
2368
2369 this->merge_matr(str_sv.bmatr_);
2370
2371 // our vector is NULL-able but argument is not (assumed all values are real)
2372 if (bv_null && !str_sv.is_nullable())
2373 bv_null->set_range(0, arg_size-1);
2374
2375 return *this;
2376
2377}
2378
2379//---------------------------------------------------------------------
2380
2381template<class CharType, class BV, unsigned STR_SIZE>
2383 size_type left, size_type right,
2384 bm::null_support slice_null)
2385{
2386 if (right < left)
2387 bm::xor_swap(left, right);
2388 this->keep_range_no_check(left, right, slice_null);
2389}
2390
2391//---------------------------------------------------------------------
2392
2393template<class CharType, class BV, unsigned STR_SIZE>
2396{
2397 typedef typename
2399 return it_type(this);
2400}
2401
2402//---------------------------------------------------------------------
2403
2404template<class CharType, class BV, unsigned STR_SIZE>
2406 bool free_mem) BMNOEXCEPT
2407{
2408 parent_type::clear_all(free_mem);
2409}
2410
2411//---------------------------------------------------------------------
2412
2413template<class CharType, class BV, unsigned STR_SIZE>
2415 const char* err_msg)
2416{
2417#ifndef BM_NO_STL
2418 throw std::range_error(err_msg);
2419#else
2420 BM_ASSERT_THROW(false, BM_ERR_RANGE);
2421#endif
2422}
2423
2424//---------------------------------------------------------------------
2425
2426template<class CharType, class BV, unsigned STR_SIZE>
2428 const char* err_msg)
2429{
2430#ifndef BM_NO_STL
2431 if (!err_msg)
2432 err_msg = "Unknown/incomparable dictionary character";
2433 throw std::domain_error(err_msg);
2434#else
2435 BM_ASSERT_THROW(false, BM_BAD_VALUE);
2436#endif
2437}
2438
2439
2440//---------------------------------------------------------------------
2441//
2442//---------------------------------------------------------------------
2443
2444
2445template<class CharType, class BV, unsigned STR_SIZE>
2447: sv_(0), substr_from_(0), substr_to_(STR_SIZE),
2448 pos_(bm::id_max), pos_in_buf_(~size_type(0))
2449{
2450}
2451
2452//---------------------------------------------------------------------
2453
2454template<class CharType, class BV, unsigned STR_SIZE>
2457: sv_(it.sv_),
2458 substr_from_(it.substr_from_), substr_to_(it.substr_to_),
2459 pos_(it.pos_), pos_in_buf_(~size_type(0))
2460{
2461}
2462
2463//---------------------------------------------------------------------
2464
2465template<class CharType, class BV, unsigned STR_SIZE>
2468: sv_(sv), pos_(sv->empty() ? bm::id_max : 0), pos_in_buf_(~size_type(0))
2469{
2470 substr_from_ = 0;
2471 substr_to_ = (unsigned) sv_->effective_max_str();
2472 buf_matrix_.resize(n_rows, substr_to_+1);
2473}
2474
2475//---------------------------------------------------------------------
2476
2477template<class CharType, class BV, unsigned STR_SIZE>
2481: sv_(sv), pos_(pos >= sv->size() ? bm::id_max : pos), pos_in_buf_(~size_type(0))
2482{
2483 substr_from_ = 0;
2484 substr_to_ = (unsigned) sv_->effective_max_str();
2485 buf_matrix_.resize(n_rows, substr_to_+1);
2486}
2487
2488//---------------------------------------------------------------------
2489
2490template<class CharType, class BV, unsigned STR_SIZE>
2492 unsigned from, unsigned len) BMNOEXCEPT
2493{
2494 unsigned max_str = sv_->effective_max_str();
2495 substr_from_ = from;
2496 if (!len)
2497 {
2498 len = 1 + max_str - from;
2499 substr_to_ = from + len;
2500 }
2501 else
2502 {
2503 // TODO: check for overflow
2504 substr_to_ = substr_from_ + (len - 1);
2505 }
2506 if (max_str < substr_to_)
2507 substr_to_ = max_str;
2508 buf_matrix_.resize(n_rows, len+1, false/*no content copy*/);
2509}
2510
2511//---------------------------------------------------------------------
2512
2513template<class CharType, class BV, unsigned STR_SIZE>
2516{
2517 BM_ASSERT(sv_);
2518 BM_ASSERT(this->valid());
2519
2520 if (pos_in_buf_ == ~size_type(0))
2521 {
2522 if (!buf_matrix_.is_init())
2523 buf_matrix_.init();
2524 pos_in_buf_ = 0;
2525 size_type d = sv_->decode_substr(buf_matrix_, pos_, n_rows,
2526 substr_from_, substr_to_);
2527 if (!d)
2528 {
2529 pos_ = bm::id_max;
2530 return 0;
2531 }
2532 }
2533 if (is_null())
2534 return 0;
2535 return buf_matrix_.row(pos_in_buf_);
2536}
2537
2538//---------------------------------------------------------------------
2539
2540template<class CharType, class BV, unsigned STR_SIZE>
2543{
2544 BM_ASSERT(sv_);
2545 BM_ASSERT(this->valid());
2546
2547 if (pos_in_buf_ == ~size_type(0))
2548 {
2549 if (!buf_matrix_.is_init())
2550 buf_matrix_.init();
2551 pos_in_buf_ = 0;
2552 size_type d = sv_->decode_substr(buf_matrix_, pos_, n_rows,
2553 substr_from_, substr_to_);
2554 if (!d)
2555 {
2556 pos_ = bm::id_max;
2557 return string_view_type();
2558 }
2559 }
2560 if (is_null())
2561 return string_view_type();
2562 return string_view_type(buf_matrix_.row(pos_in_buf_));
2563}
2564
2565
2566//---------------------------------------------------------------------
2567
2568template<class CharType, class BV, unsigned STR_SIZE>
2569void
2572 ) BMNOEXCEPT
2573{
2574 pos_ = (!sv_ || pos >= sv_->size()) ? bm::id_max : pos;
2575 pos_in_buf_ = ~~size_type(0);
2576}
2577
2578//---------------------------------------------------------------------
2579
2580template<class CharType, class BV, unsigned STR_SIZE>
2581void
2583{
2584 if (pos_ == bm::id_max) // nothing to do, we are at the end
2585 return;
2586 ++pos_;
2587
2588 if (pos_ >= sv_->size())
2589 this->invalidate();
2590 else
2591 {
2592 if (pos_in_buf_ != ~size_type(0))
2593 {
2594 ++pos_in_buf_;
2595 if (pos_in_buf_ >= n_rows)
2596 pos_in_buf_ = ~~size_type(0);
2597 }
2598 }
2599}
2600
2601//---------------------------------------------------------------------
2602//
2603//---------------------------------------------------------------------
2604
2605template<class CharType, class BV, unsigned STR_SIZE>
2607: sv_(0), bv_null_(0), pos_in_buf_(~size_type(0))
2608{}
2609
2610//---------------------------------------------------------------------
2611
2612template<class CharType, class BV, unsigned STR_SIZE>
2615: sv_(sv), pos_in_buf_(~size_type(0))
2616{
2617 if (sv)
2618 {
2619 prev_nb_ = sv_->size() >> bm::set_block_shift;
2620 bv_null_ = sv_->get_null_bvect();
2621 unsigned esize = (unsigned) sv_->effective_max_str();
2622 if (esize < STR_SIZE)
2623 esize = STR_SIZE;
2624 buf_matrix_.init_resize(n_buf_size, esize);
2625 }
2626 else
2627 {
2628 bv_null_ = 0; prev_nb_ = 0;
2629 }
2630}
2631
2632//---------------------------------------------------------------------
2633
2634template<class CharType, class BV, unsigned STR_SIZE>
2637: sv_(bi.sv_), bv_null_(bi.bv_null_), buf_matrix_(bi.buf_matrix_.rows(), bi.buf_matrix_.cols()),
2638 pos_in_buf_(~size_type(0)), prev_nb_(bi.prev_nb_), opt_mode_(bi.opt_mode_),
2639 remap_flags_(bi.remap_flags_), omatrix_(bi.omatrix_)
2640{
2641 BM_ASSERT(bi.empty());
2642}
2643
2644//---------------------------------------------------------------------
2645
2646template<class CharType, class BV, unsigned STR_SIZE>
2648{
2649 this->flush();
2650}
2651
2652//---------------------------------------------------------------------
2653
2654template<class CharType, class BV, unsigned STR_SIZE>
2655bool
2657 const BMNOEXCEPT
2658{
2659 return (pos_in_buf_ == ~size_type(0) || !sv_);
2660}
2661
2662//---------------------------------------------------------------------
2663
2664template<class CharType, class BV, unsigned STR_SIZE>
2666{
2667 flush_impl();
2668 if (remap_flags_)
2669 {
2670 buf_matrix_.free();
2671 sv_->remap(*this);
2672 remap_flags_ = 0;
2673 }
2674}
2675
2676//---------------------------------------------------------------------
2677
2678template<class CharType, class BV, unsigned STR_SIZE>
2680{
2681 if (this->empty())
2682 return;
2683
2684 size_type imp_idx = sv_->size();
2685 sv_->import_no_check(buf_matrix_, imp_idx, pos_in_buf_+1, false);
2686 pos_in_buf_ = ~~size_type(0);
2687 block_idx_type nb = sv_->size() >> bm::set_block_shift;
2688 if (nb != prev_nb_)
2689 {
2690 sv_->optimize_block(prev_nb_, opt_mode_);
2691 prev_nb_ = nb;
2692 }
2693}
2694
2695
2696//---------------------------------------------------------------------
2697
2698template<class CharType, class BV, unsigned STR_SIZE>
2701{
2702 if (!v)
2703 {
2704 this->add_null();
2705 return;
2706 }
2707 size_type buf_idx = this->pos_in_buf_; // offset in
2708 size_type sz = sv_->size();
2709
2710 this->add_value(v);
2711 if (bv_null_)
2712 bv_null_->set_bit_no_check(sz + buf_idx + 1);
2713}
2714
2715//---------------------------------------------------------------------
2716
2717template<class CharType, class BV, unsigned STR_SIZE>
2719{
2720 /*size_type buf_idx = */this->add_value("");
2721}
2722
2723//---------------------------------------------------------------------
2724
2725template<class CharType, class BV, unsigned STR_SIZE>
2728{
2729 for (size_type i = 0; i < count; ++i) // TODO: optimization
2730 this->add_value("");
2731}
2732
2733//---------------------------------------------------------------------
2734
2735
2736template<class CharType, class BV, unsigned STR_SIZE>
2737void
2740{
2742
2743 size_t slen = ::strlen(v);
2744
2745 auto orows = omatrix_.rows();
2746 if (slen > orows)
2747 {
2748 if (!orows)
2749 {
2750 omatrix_.resize(slen, 256, false);
2751 omatrix_.set_zero();
2752 }
2753 else
2754 {
2755 omatrix_.resize(slen, 256, true);
2756 for (; orows < omatrix_.rows(); ++orows)
2757 {
2758 typename
2759 octet_freq_matrix_type::value_type* r = omatrix_.row(orows);
2760 ::memset(r, 0, 256 * sizeof(r[0]));
2761 } // for orows
2762 }
2763 }
2764 for (size_t i = 0; i < slen; ++i)
2765 {
2766 value_type ch = v[i];
2767 typename
2768 octet_freq_matrix_type::value_type* row = omatrix_.row(i);
2769 unsigned ch_idx = (unsigned char)ch;
2770 row[ch_idx] += 1;
2771 } // for i
2772}
2773
2774//---------------------------------------------------------------------
2775
2776template<class CharType, class BV, unsigned STR_SIZE>
2777void
2780{
2781 BM_ASSERT(sv_);
2782 BM_ASSERT(v);
2783 BM_ASSERT(buf_matrix_.rows()>0);
2784
2785 if (pos_in_buf_ >= buf_matrix_.rows()-1)
2786 {
2787 if (pos_in_buf_ == ~size_type(0) && (!buf_matrix_.is_init()))
2788 buf_matrix_.init();
2789 else
2790 this->flush_impl();
2791 pos_in_buf_ = 0; buf_matrix_.set_zero();
2792 }
2793 else
2794 {
2795 ++pos_in_buf_;
2796 }
2797
2798 if (remap_flags_)
2799 add_remap_stat(v);
2800
2801 value_type* r = buf_matrix_.row(pos_in_buf_);
2802
2803 typename buffer_matrix_type::size_type i;
2804 typename buffer_matrix_type::size_type cols = buf_matrix_.cols();
2805 for (i = 0; i < cols; ++i)
2806 {
2807 r[i] = v[i];
2808 if (!r[i])
2809 return;
2810 } // for i
2811
2812 // string is longer than the initial size, matrix resize is needed
2813 for (cols = i; true; ++cols) // find the new length
2814 {
2815 if (!v[cols])
2816 break;
2817 } // for cols
2818
2819 // cols is now string length and the new mattrix size parameter
2820 buf_matrix_.resize(buf_matrix_.rows(), cols + 1);
2821
2822 r = buf_matrix_.row(pos_in_buf_);
2823 cols = buf_matrix_.cols();
2824 for (; i < cols; ++i)
2825 {
2826 r[i] = v[i];
2827 if (!r[i])
2828 return;
2829 } // for i
2830 BM_ASSERT(0);
2831}
2832
2833//---------------------------------------------------------------------
2834
2835template<class CharType, class BV, unsigned STR_SIZE>
2837{
2838 const bvector_type* bv_null = this->get_null_bvector();
2839 if (!bv_null)
2840 return;
2841 bool found = bv_null->find_reverse(this->size_);
2842 this->size_ += found;
2843}
2844
2845//---------------------------------------------------------------------
2846
2847} // namespace
2848
2849#endif
Algorithms for bvector<> (main include)
basic bit-matrix class and utilities
Constants, lookup tables and typedefs.
Definitions(internal)
#define BMRESTRICT
Definition: bmdef.h:203
#define BMNOEXCEPT
Definition: bmdef.h:82
#define BM_ASSERT
Definition: bmdef.h:139
#define BM_ASSERT_THROW(x, xerrcode)
Definition: bmdef.h:338
#define BMNOEXCEPT2
Definition: bmdef.h:108
Utilities for bit transposition (internal) (experimental!)
Base class for bit-transposed(bit-sliced) sparse vector construction.
Definition: bmbmatrix.h:336
void swap(base_sparse_vector< CharType, BV, MAX_SIZE > &bsv) BMNOEXCEPT
Definition: bmbmatrix.h:1624
void resize(size_type new_size, bool set_null)
Definition: bmbmatrix.h:1691
void copy_from(const base_sparse_vector< CharType, BV, MAX_SIZE > &bsv)
Definition: bmbmatrix.h:1543
void clear_range(size_type left, size_type right, bool set_null)
Definition: bmbmatrix.h:1655
bvector_type_ptr get_create_slice(unsigned i)
get access to bit-plain, function checks and creates a plane
Definition: bmbmatrix.h:1730
const bmatrix_type & get_bmatrix() const BMNOEXCEPT
Definition: bmbmatrix.h:532
bmatrix_type bmatr_
bit-transposed matrix
Definition: bmbmatrix.h:677
bool is_null(size_type idx) const BMNOEXCEPT
test if specified element is NULL
Definition: bmbmatrix.h:1709
const bvector_type * get_null_bvector() const BMNOEXCEPT
Get bit-vector of assigned values or NULL (if not constructed that way)
Definition: bmbmatrix.h:430
void bit_and_rows(const bvector_type &bv)
Set AND (intersect) operation on all existing bit-slices.
Definition: bmbmatrix.h:651
std::make_unsigned< value_type >::type unsigned_value_type
Definition: bmbmatrix.h:360
void bit_sub_rows(const bvector_type &bv, bool use_null)
Set SUB (MINUS) operation on all existing bit-slices.
Definition: bmbmatrix.h:644
bvector_type * get_null_bvect() BMNOEXCEPT
Definition: bmbmatrix.h:504
void clear_value_planes_from(unsigned plane_idx, size_type idx)
Definition: bmbmatrix.h:1813
bool is_nullable() const BMNOEXCEPT
check if container supports NULL(unassigned) values
Definition: bmbmatrix.h:418
Basic dense bit-matrix class.
Definition: bmbmatrix.h:56
size_type rows_not_null() const BMNOEXCEPT
Definition: bmbmatrix.h:142
void allocate_rows(size_type rsize)
allocate matrix rows of bit-vectors (new rows are NULLs)
Definition: bmbmatrix.h:854
size_type rows() const BMNOEXCEPT
Definition: bmbmatrix.h:139
void set_octet(size_type pos, size_type octet_idx, unsigned char octet)
Definition: bmbmatrix.h:1139
unsigned char get_octet(size_type pos, size_type octet_idx) const BMNOEXCEPT
Definition: bmbmatrix.h:1225
bvector_type_const_ptr get_row(size_type i) const BMNOEXCEPT
Definition: bmbmatrix.h:761
Constant iterator designed to enumerate "ON" bits.
Definition: bm.h:603
Bitvector Bit-vector container with runtime compression of bits.
Definition: bm.h:115
optmode
Optimization mode Every next level means additional checks (better compression vs time)
Definition: bm.h:133
@ opt_compress
compress blocks when possible (GAP/prefix sum)
Definition: bm.h:137
allocator_type::allocator_pool_type allocator_pool_type
Definition: bm.h:118
bvector_size_type size_type
Definition: bm.h:121
Alloc allocator_type
Definition: bm.h:117
sparse vector de-serializer
Back insert iterator implements buffered insert, faster than generic access assignment.
bvector_type * bv_null_
!< pointer on the parent vector
back_insert_iterator & operator*()
noop
void add_remap_stat(const value_type *v)
account new value as remap statistics
void add(const value_type *v)
add value to the container
unsigned get_remap() const BMNOEXCEPT
Get curent remap state flags.
octet_freq_matrix_type omatrix_
octet frequency matrix
void add_value(const value_type *v)
add value to the buffer without changing the NULL vector
back_insert_iterator & operator++()
noop
void flush()
flush the accumulated buffer.
buffer_matrix_type buf_matrix_
!< not NULL vector pointer
back_insert_iterator & operator++(int)
noop
str_sparse_vector_type * str_sparse_vector_type_ptr
bvector_type::block_idx_type block_idx_type
str_sparse_vector_type::value_type value_type
str_sparse_vector_type::size_type size_type
void add_null()
add NULL (no-value) to the container
void set_optimize(typename bvector_type::optmode opt_mode) BMNOEXCEPT
Set optimization on load option (deafult: false)
bvector_type::allocator_type allocator_type
back_insert_iterator & operator=(const value_type *v)
push value to the vector
bvector_type::optmode opt_mode_
!< previous block added
void set_remap(bool flag) BMNOEXCEPT
Method to configure back inserter to collect statistics on optimal character codes.
block_idx_type prev_nb_
!< buffer position
str_sparse_vector< CharType, BV, STR_SIZE > str_sparse_vector_type
bool empty() const BMNOEXCEPT
return true if insertion buffer is empty
const octet_freq_matrix_type & get_octet_matrix() const noexcept
Get octet frequence matrix.
void add_null(size_type count)
add a series of consequitve NULLs (no-value) to the container
allocator_type::allocator_pool_type allocator_pool_type
back_insert_iterator & operator=(const StrType &v)
push value to the vector
str_sparse_vector_type::bvector_type bvector_type
Const iterator to do quick traverse of the sparse vector.
str_sparse_vector_type * str_sparse_vector_type_ptr
std::input_iterator_tag iterator_category
size_type pos() const BMNOEXCEPT
Current position (index) in the vector.
void invalidate() BMNOEXCEPT
Invalidate current iterator.
bool operator<(const const_iterator &it) const BMNOEXCEPT
bool operator!=(const const_iterator &it) const BMNOEXCEPT
dynamic_heap_matrix< CharType, allocator_type > buffer_matrix_type
bool is_null() const BMNOEXCEPT
Get NULL status.
const value_type * operator*() const
Get current position (value)
const value_type * value() const
Get zero terminated string value at the current position.
allocator_type::allocator_pool_type allocator_pool_type
bvector_type::allocator_type allocator_type
void go_to(size_type pos) BMNOEXCEPT
re-position to a specified position
void advance() BMNOEXCEPT
advance iterator forward by one
const_iterator & operator++() BMNOEXCEPT
Advance to the next available value.
str_sparse_vector< CharType, BV, STR_SIZE > str_sparse_vector_type
const_iterator() BMNOEXCEPT
Construct iterator (not attached to any particular vector)
bool operator<=(const const_iterator &it) const BMNOEXCEPT
str_sparse_vector_type::bvector_type bvector_type
bool valid() const BMNOEXCEPT
Returns true if iterator is at a valid position.
str_sparse_vector_type::size_type size_type
str_sparse_vector_type::value_type value_type
string_view_type get_string_view() const
Get current string as string_view.
bool operator>=(const const_iterator &it) const BMNOEXCEPT
const_iterator & operator++(int) BMNOEXCEPT
Advance to the next available value.
void set_substr(unsigned from, unsigned len=0) BMNOEXCEPT
setup iterator to retrieve a sub-string of a string
std::basic_string_view< CharType > string_view_type
bool operator==(const const_iterator &it) const BMNOEXCEPT
bool operator>(const const_iterator &it) const BMNOEXCEPT
Reference class to access elements via common [] operator.
const_reference(const str_sparse_vector< CharType, BV, STR_SIZE > &str_sv, size_type idx)
const value_type * get() const BMNOEXCEPT
bool operator==(const const_reference &ref) const BMNOEXCEPT
bm::heap_vector< CharType, typename bvector_type::allocator_type, true > bufffer_type
Reference class to access elements via common [] operator.
const value_type * get() const BMNOEXCEPT
reference(str_sparse_vector< CharType, BV, STR_SIZE > &str_sv, size_type idx)
reference & operator=(const value_type *str)
bool is_null() const BMNOEXCEPT
reference & operator=(const reference &ref)
bool operator==(const reference &ref) const BMNOEXCEPT
succinct sparse vector for strings with compression using bit-slicing ( transposition) method
void insert(size_type idx, const value_type *str)
insert the specified element
void swap(str_sparse_vector &str_sv) BMNOEXCEPT
void clear_all(bool free_mem) BMNOEXCEPT
resize to zero, free memory
void calc_octet_stat(octet_freq_matrix_type &octet_matrix) const
bool is_ro() const BMNOEXCEPT
Returns true if vector is read-only.
const_iterator end() const BMNOEXCEPT
Provide const iterator access to the end
void optimize(bm::word_t *temp_block=0, typename bvector_type::optmode opt_mode=bvector_type::opt_compress, typename str_sparse_vector< CharType, BV, STR_SIZE >::statistics *stat=0)
run memory optimization for all vector planes
void set_value(size_type idx, const value_type *str)
set value without checking boundaries
void resize(size_type sz)
resize vector
void calc_stat(struct str_sparse_vector< CharType, BV, STR_SIZE >::statistics *st) const BMNOEXCEPT
Calculates memory statistics.
bool empty() const
return true if vector is empty
static size_type max_str()
get maximum string length capacity
unsigned char * init_remap_buffer()
void set_null(size_type idx)
set NULL status for the specified element Vector is resized automatically
unsigned common_prefix_length(size_type idx1, size_type idx2) const BMNOEXCEPT
Find size of common prefix between two vector elements in octets.
void set_null(const bvector_type &bv_idx)
Set NULL all elements set as 1 in the argument vector.
void remap_from_impl(const str_sparse_vector &str_sv, octet_freq_matrix_type *omatrix, bool move_data)
Remap from implementation, please note that move_data flag can violate cosnt-ness.
void sync(bool force)
syncronize internal structures
slice_octet_matrix_type remap_matrix1_
octet remap table 1
const bvector_type * bvector_type_const_ptr
str_sparse_vector< CharType, BV, STR_SIZE > & merge(str_sparse_vector< CharType, BV, STR_SIZE > &str_sv)
merge with another sparse vector using OR operation Merge is different from join(),...
static constexpr bool is_compressed() BMNOEXCEPT
various type traits
void assign(size_type idx, const StrType &str)
set specified element with bounds checking and automatic resize
str_sparse_vector(bm::null_support null_able=bm::no_null, allocation_policy_type ap=allocation_policy_type(), size_type bv_max_size=bm::id_max, const allocator_type &alloc=allocator_type())
Sparse vector constructor.
bm::dynamic_heap_matrix< unsigned char, allocator_type > slice_octet_matrix_type
Matrix of character remappings.
bool equal(const str_sparse_vector< CharType, BV, STR_SIZE > &sv, bm::null_support null_able=bm::use_null) const BMNOEXCEPT
check if another sparse vector has the same content and size
void remap_from(const str_sparse_vector &str_sv, octet_freq_matrix_type *omatrix=0)
Build remapping profile and load content from another sparse vector Remapped vector likely saves memo...
size_type size() const
return size of the vector
int compare_nomap(size_type idx, const value_type *str) const BMNOEXCEPT
Variant of compare for non-mapped vectors.
void build_octet_remap(slice_octet_matrix_type &octet_remap_matrix1, slice_octet_matrix_type &octet_remap_matrix2, octet_freq_matrix_type &octet_occupancy_matrix) const
void keep(const bvector_type &bv_idx)
Set NULL all elements NOT set as 1 in the argument vector.
const_iterator begin() const BMNOEXCEPT
Provide const iterator access to container content
void import_char_slice(const unsigned_value_type *ch_slice, unsigned ch_acc, size_type char_slice_idx, size_type idx_from, size_type imp_size)
bm::basic_bmatrix< BV > bmatrix_type
bool is_remap() const BMNOEXCEPT
Get character remapping status (true | false)
void insert_value(size_type idx, const value_type *str)
insert value without checking boundaries
void push_back(const StrType &str)
push back a string
void import_no_check(CharMatrix &cmatr, size_type idx_from, size_type imp_size, bool set_not_null=true)
bvector_type::allocation_policy allocation_policy_type
void clear(const bvector_type &bv_idx)
Set vector elements spcified by argument bit-vector to empty Note that set to empty elements are NOT ...
void freeze()
Turn sparse vector into immutable mode Read-only (immutable) vector uses less memory and allows faste...
BV::allocator_type allocator_type
void insert_value_no_null(size_type idx, const value_type *str)
insert value without checking boundaries or support of NULL
int compare(size_type idx, const value_type *str) const BMNOEXCEPT
Compare vector element with argument lexicographically.
bm::dynamic_heap_matrix< size_t, allocator_type > octet_freq_matrix_type
Matrix of character frequencies (for optimal code remap)
size_type get(size_type idx, value_type *str, size_type buf_size) const BMNOEXCEPT
get specified element
slice_octet_matrix_type remap_matrix2_
octet remap table 2
bool resolve_range(size_type from, size_type to, size_type *idx_from, size_type *idx_to) const
parent_type::unsigned_value_type unsigned_value_type
void import_back(CharMatrix &cmatr, size_type imp_size)
Bulk push-back import of strings from a C-style matrix of chars.
size_type decode(CharMatrix &cmatr, size_type idx_from, size_type dec_size, bool zero_mem=true) const
Bulk export strings to a C-style matrix of chars.
void resize_internal(size_type sz)
static bool find_rank(size_type rank, size_type &pos) BMNOEXCEPT
find position of compressed element by its rank
bool remap_tosv(value_type *sv_str, size_type buf_size, const value_type *str) const BMNOEXCEPT
bool try_get(size_type idx, StrType &str) const
get specified string element if NOT NULL Template method expects an STL-compatible type basic_string<...
void erase(size_type idx)
erase the specified element
int compare_remap(size_type idx, const value_type *str) const BMNOEXCEPT
Variant of compare for remapped vectors.
size_type effective_size() const BMNOEXCEPT
size of sparse vector (may be different for RSC)
const unsigned char * get_remap_buffer() const
bvector_type * bvector_type_ptr
str_sparse_vector(str_sparse_vector< CharType, BV, STR_SIZE > &&str_sv) BMNOEXCEPT
void insert(size_type idx, const StrType &str)
insert STL string
static void throw_bad_value(const char *err_msg)
throw domain error
void get(size_type idx, StrType &str) const
get specified string element Template method expects an STL-compatible type basic_string<>
reference operator[](size_type idx)
Operator to get write access to an element
static constexpr bool is_str() BMNOEXCEPT
void push_back(const value_type *str)
push back a string (zero terminated)
str_sparse_vector< CharType, BV, STR_SIZE > & operator=(const str_sparse_vector< CharType, BV, STR_SIZE > &str_sv)
size_type decode_substr(CharMatrix &cmatr, size_type idx_from, size_type dec_size, unsigned substr_from, unsigned substr_to, bool zero_mem=true) const
Bulk export strings to a C-style matrix of chars.
const remap_matrix_type * get_remap_matrix() const
base_sparse_vector< CharType, BV, STR_SIZE > parent_type
void push_back_null()
push back NULL value
size_type effective_vector_max() const
get effective string length used in vector
void remap()
Build remapping profile and re-load content to save memory.
back_insert_iterator get_back_inserter()
Provide back insert iterator Back insert iterator implements buffered insertion, which is faster,...
size_type effective_max_str() const BMNOEXCEPT
get effective string length used in vector Calculate and returns efficiency, how close are we to the ...
void sync_size() BMNOEXCEPT
recalculate size to exclude tail NULL elements After this call size() will return the true size of th...
slice_octet_matrix_type remap_matrix_type
bvector_type::enumerator bvector_enumerator_type
const_iterator get_const_iterator(size_type idx) const BMNOEXCEPT
Get const_itertor re-positioned to specific element.
void set_value_no_null(size_type idx, const value_type *str)
set value without checking boundaries or support of NULL
void copy_range(const str_sparse_vector< CharType, BV, STR_SIZE > &sv, size_type left, size_type right, bm::null_support slice_null=bm::use_null)
copy range of values from another sparse vector
size_t remap_size() const
static void throw_range_error(const char *err_msg)
throw range error
str_sparse_vector< CharType, BV, STR_SIZE > & clear_range(size_type left, size_type right, bool set_null=false)
clear range (assign bit 0 for all planes)
void clear() BMNOEXCEPT
resize to zero, free memory
size_type size_internal() const
static bool remap_fromsv(value_type *BMRESTRICT str, size_type buf_size, const value_type *BMRESTRICT sv_str, const slice_octet_matrix_type &BMRESTRICT octet_remap_matrix1) BMNOEXCEPT
void set(size_type idx, const value_type *str)
set specified element with bounds checking and automatic resize
static bool remap_tosv(value_type *BMRESTRICT sv_str, size_type buf_size, const value_type *BMRESTRICT str, const slice_octet_matrix_type &BMRESTRICT octet_remap_matrix2) BMNOEXCEPT
unsigned remap_flags_
remapping status
remap_matrix_type * get_remap_matrix()
const const_reference operator[](size_type idx) const
Operator to get read access to an element
void keep_range(size_type left, size_type right, bm::null_support slice_null=bm::use_null)
Keep only specified interval in the sparse vector, clear all other elements.
allocator_type::allocator_pool_type allocator_pool_type
bvector_type::size_type size_type
BMFORCEINLINE bm::id_t word_bitcount(bm::id_t w) BMNOEXCEPT
Definition: bmutil.h:573
unsigned bit_list(T w, B *bits) BMNOEXCEPT
Unpacks word into list of ON bit indexes.
Definition: bmfunc.h:587
null_support
NULL-able value support.
Definition: bmconst.h:228
@ use_null
support "non-assigned" or "NULL" logic
Definition: bmconst.h:229
@ no_null
do not support NULL values
Definition: bmconst.h:230
Definition: bm.h:78
const unsigned id_max
Definition: bmconst.h:109
bool find_first_nz(const VT *arr, SZ arr_size, SZ *found_idx) BMNOEXCEPT
Find max non-zero value in an array.
Definition: bmfunc.h:9820
unsigned int word_t
Definition: bmconst.h:39
int for_each_bit_range_no_check(const BV &bv, typename BV::size_type left, typename BV::size_type right, Func &bit_functor)
Implementation of for_each_bit_range without boilerplave checks.
Definition: bmalgo_impl.h:1925
BMFORCEINLINE void xor_swap(W &x, W &y) BMNOEXCEPT
XOR swap two variables.
Definition: bmutil.h:534
remap_setup
@ COPY_RTABLES
copy remap tables only (without data)
bool find_max_nz(const VT *arr, SZ arr_size, SZ *found_idx) BMNOEXCEPT
Find max non-zero value in an array.
Definition: bmfunc.h:9799
const unsigned gap_max_bits
Definition: bmconst.h:81
const unsigned set_block_shift
Definition: bmconst.h:56
Structure with statistical information about memory allocation footprint, serialization projection,...
Definition: bmfunc.h:56
size_t gap_cap_overhead
gap memory overhead between length and capacity
Definition: bmfunc.h:63
size_t ptr_sub_blocks
Number of sub-blocks.
Definition: bmfunc.h:59
size_t gap_blocks
Number of GAP blocks.
Definition: bmfunc.h:58
size_t bit_blocks
Number of bit blocks.
Definition: bmfunc.h:57
size_t bv_count
Number of bit-vectors.
Definition: bmfunc.h:60
size_t max_serialize_mem
estimated maximum memory for serialization
Definition: bmfunc.h:61
size_t memory_used
memory usage for all blocks and service tables
Definition: bmfunc.h:62
void add(const bv_statistics &st) BMNOEXCEPT
Sum data from another sttructure.
Definition: bmfunc.h:101
memory allocation policy
Definition: bm.h:805
Statistical information about bitset's memory allocation details.
Definition: bm.h:125
bool is_remap
Definition: xsample05.cpp:93