BitMagic-C++
bmstrsparsevec.h
Go to the documentation of this file.
1 #ifndef BMSTRSPARSEVEC__H__INCLUDED__
2 #define BMSTRSPARSEVEC__H__INCLUDED__
3 /*
4 Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 For more information please visit: http://bitmagic.io
19 */
20 
21 /*! \file bmstrsparsevec.h
22  \brief string sparse vector based on bit-transposed matrix
23 */
24 
25 #include <stddef.h>
26 #include "bmconst.h"
27 
28 #ifndef BM_NO_STL
29 #include <stdexcept>
30 #endif
31 
32 #ifndef BM__H__INCLUDED__
33 // BitMagic utility headers do not include main "bm.h" declaration
34 // #include "bm.h" or "bm64.h" explicitly
35 # error missing include (bm.h or bm64.h)
36 #endif
37 
38 #include "bmtrans.h"
39 #include "bmalgo.h"
40 #include "bmbuffer.h"
41 #include "bmbmatrix.h"
42 #include "bmdef.h"
43 
44 namespace bm
45 {
46 
47 /*!
48  \brief sparse vector for strings with compression using bit transposition method
49 
50  Initial string is bit-transposed into bit-planes so collection may use less
51  memory due to prefix sum (GAP) compression in bit-plains.
52 
53  @ingroup sv
54 */
55 template<typename CharType, typename BV, unsigned MAX_STR_SIZE>
56 class str_sparse_vector : public base_sparse_vector<CharType, BV, MAX_STR_SIZE>
57 {
58 public:
59  typedef BV bvector_type;
62  typedef CharType value_type;
64  typedef typename BV::allocator_type allocator_type;
67  typedef typename allocator_type::allocator_pool_type allocator_pool_type;
70 
71  /*! Statistical information about memory allocation details. */
72  struct statistics : public bv_statistics
73  {};
74 
76  {
77  sv_octet_plains = MAX_STR_SIZE
78  };
79 
80  typedef
81  bm::heap_matrix<unsigned char,
82  MAX_STR_SIZE, // ROWS
83  256, // COLS
86 
87  struct is_remap_support { enum trait { value = true }; };
88  struct is_rsc_support { enum trait { value = false }; };
89 
90  /**
91  Reference class to access elements via common [] operator
92  @ingroup sv
93  */
95  {
96  public:
99  : str_sv_(str_sv), idx_(idx)
100  {}
101 
102  operator const value_type*() const BMNOEXCEPT
103  {
104  str_sv_.get(idx_, buf_, MAX_STR_SIZE);
105  return &(buf_[0]);
106  }
107 
108  bool operator==(const const_reference& ref) const BMNOEXCEPT
109  { return bool(*this) == bool(ref); }
110  bool is_null() const BMNOEXCEPT { return str_sv_.is_null(idx_); }
111  private:
113  size_type idx_;
114  mutable CharType buf_[MAX_STR_SIZE];
115  };
116 
117  /**
118  Reference class to access elements via common [] operator
119  @ingroup sv
120  */
121  class reference
122  {
123  public:
125  size_type idx) BMNOEXCEPT
126  : str_sv_(str_sv), idx_(idx)
127  {}
128 
129  operator const value_type*() const BMNOEXCEPT
130  {
131  str_sv_.get(idx_, buf_, MAX_STR_SIZE);
132  return &(buf_[0]);
133  }
134 
136  {
137  // TO DO: implement element copy bit by bit
138  str_sv_.set(idx_, (const value_type*)ref);
139  return *this;
140  }
141 
143  {
144  str_sv_.set(idx_, str);
145  return *this;
146  }
147  bool operator==(const reference& ref) const BMNOEXCEPT
148  { return bool(*this) == bool(ref); }
149  bool is_null() const BMNOEXCEPT { return str_sv_.is_null(idx_); }
150  private:
152  size_type idx_;
153  mutable CharType buf_[MAX_STR_SIZE];
154  };
155 
156  /**
157  Const iterator to do quick traverse of the sparse vector.
158 
159  Implementation uses buffer for decoding so, competing changes
160  to the original vector may not match the iterator returned values.
161 
162  This iterator keeps an operational buffer of transposed elements,
163  so memory footprint is not negligable.
164 
165  @ingroup sv
166  */
168  {
169  public:
170  friend class str_sparse_vector;
171 #ifndef BM_NO_STL
172  typedef std::input_iterator_tag iterator_category;
173 #endif
180  typedef typename allocator_type::allocator_pool_type allocator_pool_type;
181 
182  typedef long long difference_type;
183  typedef CharType* pointer;
184  typedef CharType*& reference;
185  public:
190 
191  bool operator==(const const_iterator& it) const BMNOEXCEPT
192  { return (pos_ == it.pos_) && (sv_ == it.sv_); }
193  bool operator!=(const const_iterator& it) const BMNOEXCEPT
194  { return ! operator==(it); }
195  bool operator < (const const_iterator& it) const BMNOEXCEPT
196  { return pos_ < it.pos_; }
197  bool operator <= (const const_iterator& it) const BMNOEXCEPT
198  { return pos_ <= it.pos_; }
199  bool operator > (const const_iterator& it) const BMNOEXCEPT
200  { return pos_ > it.pos_; }
201  bool operator >= (const const_iterator& it) const BMNOEXCEPT
202  { return pos_ >= it.pos_; }
203 
204  /// \brief Get current position (value)
205  const value_type* operator*() const BMNOEXCEPT { return this->value(); }
206 
207  /// \brief Advance to the next available value
209  { this->advance(); return *this; }
210 
211  /// \brief Advance to the next available value
213  { const_iterator tmp(*this);this->advance(); return tmp; }
214 
215 
216  /// \brief Get current position (value)
217  const value_type* value() const BMNOEXCEPT;
218 
219  /// \brief Get NULL status
220  bool is_null() const BMNOEXCEPT { return sv_->is_null(this->pos_); }
221 
222  /// Returns true if iterator is at a valid position
223  bool valid() const BMNOEXCEPT { return pos_ != bm::id_max; }
224 
225  /// Invalidate current iterator
226  void invalidate() BMNOEXCEPT { pos_ = bm::id_max; }
227 
228  /// Current position (index) in the vector
229  size_type pos() const BMNOEXCEPT { return pos_; }
230 
231  /// re-position to a specified position
233 
234  /// advance iterator forward by one
235  void advance() BMNOEXCEPT;
236 
237  protected:
238  typedef bm::heap_matrix<CharType,
239  1024, // ROWS: number of strings in one batch
240  MAX_STR_SIZE, // COLS
242 
243  private:
244  const str_sparse_vector_type* sv_; ///!< ptr to parent
245  mutable size_type pos_; ///!< Position
246  mutable buffer_matrix_type buf_matrix_; ///!< decode value buffer
247  mutable size_type pos_in_buf_; ///!< buffer position
248  };
249 
250 
251  /**
252  Back insert iterator implements buffered insert, faster than generic
253  access assignment.
254 
255  Limitations for buffered inserter:
256  1. Do not use more than one inserter (into one vector) at the same time
257  2. Use method flush() at the end to send the rest of accumulated buffer
258  flush is happening automatically on destruction, but if flush produces an
259  exception (for whatever reason) it will be an exception in destructor.
260  As such, explicit flush() is safer way to finilize the sparse vector load.
261 
262  @ingroup sv
263  */
265  {
266  public:
267 #ifndef BM_NO_STL
268  typedef std::output_iterator_tag iterator_category;
269 #endif
276  typedef typename allocator_type::allocator_pool_type allocator_pool_type;
277 
278  typedef void difference_type;
279  typedef void pointer;
280  typedef void reference;
281 
282  public:
286 
288  {
289  BM_ASSERT(bi.empty());
290  this->flush(); sv_ = bi.sv_;
291  return *this;
292  }
293 
295 
296  /** push value to the vector */
298  { this->add(v); return *this; }
299 
300 
301  /** push value to the vector */
302  template<typename StrType>
303  back_insert_iterator& operator=(const StrType& v)
304  {
305  this->add(v.c_str()); return *this; // TODO: avoid c_str()
306  }
307 
308  /** noop */
309  back_insert_iterator& operator*() { return *this; }
310  /** noop */
311  back_insert_iterator& operator++() { return *this; }
312  /** noop */
313  back_insert_iterator& operator++( int ) { return *this; }
314 
315  /** add value to the container*/
316  void add(const value_type* v);
317 
318  /** add NULL (no-value) to the container */
319  void add_null();
320 
321  /** add a series of consequitve NULLs (no-value) to the container */
322  void add_null(size_type count);
323 
324  /** return true if insertion buffer is empty */
325  bool empty() const BMNOEXCEPT;
326 
327  /** flush the accumulated buffer */
328  void flush();
329  protected:
331 
332  /** add value to the buffer without changing the NULL vector
333  @param v - value to push back
334  @return index of added value in the internal buffer
335  @internal
336  */
337  size_type add_value(const value_type* v);
338 
339  private:
340  enum buf_size_e
341  {
342  n_buf_size = 1024 * 8
343  };
344  typedef bm::heap_matrix<CharType,
345  n_buf_size, // ROWS: number of strings in one batch
346  MAX_STR_SIZE, // COLS
348 
349  private:
350  str_sparse_vector_type* sv_; ///!< pointer on the parent vector
351  bvector_type* bv_null_; ///!< not NULL vector pointer
352  buffer_matrix_type buf_matrix_; ///!< value buffer
353  size_type pos_in_buf_; ///!< buffer position
354  block_idx_type prev_nb_; ///!< previous block added
355  };
356 
357 
358 public:
359 
360  /*!
361  \brief Sparse vector constructor
362 
363  \param null_able - defines if vector supports NULL values flag
364  by default it is OFF, use bm::use_null to enable it
365  \param ap - allocation strategy for underlying bit-vectors
366  Default allocation policy uses BM_BIT setting (fastest access)
367  \param bv_max_size - maximum possible size of underlying bit-vectors
368  Please note, this is NOT size of svector itself, it is dynamic upper limit
369  which should be used very carefully if we surely know the ultimate size
370  \param alloc - allocator for bit-vectors
371 
372  \sa bvector<>
373  \sa bm::bvector<>::allocation_policy
374  \sa bm::startegy
375  */
378  size_type bv_max_size = bm::id_max,
379  const allocator_type& alloc = allocator_type());
380 
381  /*! copy-ctor */
382  str_sparse_vector(const str_sparse_vector& str_sv);
383 
384  /*! copy assignmment operator */
387  {
388  if (this != &str_sv)
389  parent_type::copy_from(str_sv);
390  remap_flags_ = str_sv.remap_flags_;
393  return *this;
394  }
395 #ifndef BM_NO_CXX11
396  /*! move-ctor */
398  {
399  parent_type::swap(str_sv);
400  remap_flags_ = str_sv.remap_flags_;
401  remap_matrix1_.swap(str_sv.remap_matrix1_);
402  remap_matrix2_.swap(str_sv.remap_matrix2_);
403  }
404 
405  /*! move assignmment operator */
408  {
409  if (this != &str_sv)
410  {
411  this->swap(str_sv);
412  }
413  return *this;
414  }
415 #endif
416 
417 public:
418 
419  // ------------------------------------------------------------
420  /*! @name String element access */
421  ///@{
422 
423  /** \brief Operator to get write access to an element */
424  reference operator[](size_type idx) { return reference(*this, idx); }
425 
426  /** \brief Operator to get read access to an element */
428  { return const_reference(*this, idx); }
429 
430  /*!
431  \brief set specified element with bounds checking and automatic resize
432  \param idx - element index (vector auto-resized if needs to)
433  \param str - string to set (zero terminated)
434  */
435  void set(size_type idx, const value_type* str);
436 
437  /*!
438  \brief set NULL status for the specified element
439  Vector is resized automatically
440  \param idx - element index (vector auto-resized if needs to)
441  */
442  void set_null(size_type idx);
443 
444 
445  /*!
446  \brief insert the specified element
447  \param idx - element index (vector auto-resized if needs to)
448  \param str - string to set (zero terminated)
449  */
450  void insert(size_type idx, const value_type* str);
451 
452 
453  /*!
454  \brief insert STL string
455  \param idx - element index (vector auto-resized if needs to)
456  \param str - STL string to set
457  */
458  template<typename StrType>
459  void insert(size_type idx, const StrType& str)
460  {
461  this->insert(idx, str.c_str()); // TODO: avoid c_str()
462  }
463 
464  /*!
465  \brief erase the specified element
466  \param idx - element index
467  */
468  void erase(size_type idx);
469 
470  /*!
471  \brief get specified element
472 
473  \param idx - element index
474  \param str - string buffer
475  \param buf_size - string buffer size
476 
477  @return string length
478  */
479  size_type get(size_type idx,
480  value_type* str, size_type buf_size) const BMNOEXCEPT;
481 
482  /*!
483  \brief set specified element with bounds checking and automatic resize
484 
485  This is an equivalent of set() method, but templetized to be
486  more compatible with the STL std::string and the likes
487 
488  \param idx - element index (vector auto-resized if needs to)
489  \param str - input string
490  expected an STL class with size() support,
491  like basic_string<> or vector<char>
492  */
493  template<typename StrType>
494  void assign(size_type idx, const StrType& str)
495  {
496  if (idx >= this->size())
497  this->size_ = idx+1;
498 
499  size_type str_size = size_type(str.size());
500  size_type sz = size_type((str_size < MAX_STR_SIZE) ? str_size : MAX_STR_SIZE-1);
501  if (!sz)
502  {
503  this->clear_value_plains_from(0, idx);
504  return;
505  }
506  unsigned i = 0;
507  for (; i < sz; ++i)
508  {
509  CharType ch = str[i];
510  if (remap_flags_) // compressional re-mapping is in effect
511  {
512  unsigned char remap_value = remap_matrix2_.get(i, unsigned(ch));
513  BM_ASSERT(remap_value);
514  ch = CharType(remap_value);
515  }
516  this->bmatr_.set_octet(idx, i, (unsigned char)ch);
517  if (!ch)
518  break;
519  } // for i
520  if (idx > sz)
521  return;
522  this->bmatr_.set_octet(idx, sz, 0);
523  this->clear_value_plains_from(unsigned(sz*8+1), idx);
524  }
525 
526  /*!
527  \brief push back a string
528  \param str - string to set
529  (STL class with size() support, like basic_string)
530  */
531  template<typename StrType>
532  void push_back(const StrType& str) { assign(this->size_, str); }
533 
534  /*!
535  \brief push back a string (zero terminated)
536  \param str - string to set
537  */
538  void push_back(const value_type* str) { set(this->size_, str); }
539 
540 
541  /*!
542  \brief get specified string element
543  Template method expects an STL-compatible type basic_string<>
544  \param idx - element index (vector auto-resized if needs to)
545  \param str - string to get [out]
546  */
547  template<typename StrType>
548  void get(size_type idx, StrType& str) const
549  {
550  str.clear();
551  for (unsigned i = 0; i < MAX_STR_SIZE; ++i)
552  {
553  CharType ch = CharType(this->bmatr_.get_octet(idx, i));
554  if (ch == 0)
555  break;
556  if (remap_flags_)
557  {
558  const unsigned char* remap_row = remap_matrix1_.row(i);
559  unsigned char remap_value = remap_row[unsigned(ch)];
560  BM_ASSERT(remap_value);
561  if (!remap_value) // unknown dictionary element
562  {
563  throw_bad_value(0);
564  break;
565  }
566  ch = CharType(remap_value);
567  }
568  str.push_back(ch);
569  } // for i
570  }
571 
572  /*! Swap content */
573  void swap(str_sparse_vector& str_sv) BMNOEXCEPT;
574 
575  ///@}
576 
577  // ------------------------------------------------------------
578  /*! @name Element comparison functions */
579  ///@{
580 
581  /**
582  \brief Compare vector element with argument lexicographically
583 
584  NOTE: for a re-mapped container, input string may have no correct
585  remapping, in this case we have an ambiguity
586  (we know it is not equal (0) but LT or GT?).
587  Behavior is undefined.
588 
589  \param idx - vactor element index
590  \param str - argument to compare with
591 
592  \return 0 - equal, < 0 - vect[i] < str, >0 otherwise
593  */
594  int compare(size_type idx, const value_type* str) const BMNOEXCEPT;
595 
596 
597  /**
598  \brief Find size of common prefix between two vector elements in octets
599  \return size of common prefix
600  */
601  unsigned common_prefix_length(size_type idx1, size_type idx2) const BMNOEXCEPT;
602 
603  ///@}
604 
605 
606  // ------------------------------------------------------------
607  /*! @name Clear */
608  ///@{
609 
610  /*! \brief resize to zero, free memory */
611  void clear() BMNOEXCEPT;
612 
613  /*!
614  \brief clear range (assign bit 0 for all plains)
615  \param left - interval start
616  \param right - interval end (closed interval)
617  \param set_null - set cleared values to unassigned (NULL)
618  */
619  str_sparse_vector<CharType, BV, MAX_STR_SIZE>&
620  clear_range(size_type left, size_type right, bool set_null = false)
621  {
622  parent_type::clear_range(left, right, set_null);
623  return *this;
624  }
625 
626 
627  ///@}
628 
629 
630  // ------------------------------------------------------------
631  /*! @name Size, etc */
632  ///@{
633 
634  /*! \brief return size of the vector
635  \return size of sparse vector
636  */
637  size_type size() const { return this->size_; }
638 
639  /*! \brief return true if vector is empty
640  \return true if empty
641  */
642  bool empty() const { return (size() == 0); }
643 
644  /*! \brief resize vector
645  \param sz - new size
646  */
647  void resize(size_type sz) { parent_type::resize(sz); }
648 
649  /*! \brief get maximum string length capacity
650  \return maximum string length sparse vector can take
651  */
652  static size_type max_str() { return sv_octet_plains; }
653 
654  /*! \brief get effective string length used in vector
655  Calculate and returns efficiency, how close are we
656  to the reserved maximum.
657  \return current string length maximum
658  */
660 
661  /*! \brief get effective string length used in vector
662  \return current string length maximum
663  */
665  ///@}
666 
667 
668  // ------------------------------------------------------------
669  /*! @name Memory optimization/compression */
670  ///@{
671 
672  /*!
673  \brief run memory optimization for all vector plains
674  \param temp_block - pre-allocated memory block to avoid unnecessary re-allocs
675  \param opt_mode - requested compression depth
676  \param stat - memory allocation statistics after optimization
677  */
678  void optimize(
679  bm::word_t* temp_block = 0,
682 
683  /*!
684  @brief Calculates memory statistics.
685 
686  Function fills statistics structure containing information about how
687  this vector uses memory and estimation of max. amount of memory
688  bvector needs to serialize itself.
689 
690  @param st - pointer on statistics structure to be filled in.
691 
692  @sa statistics
693  */
694  void calc_stat(
696  ) const BMNOEXCEPT;
697 
698 
699  ///@}
700 
701  // ------------------------------------------------------------
702  /*! @name Iterator access */
703  //@{
704 
705  /** Provide const iterator access to container content */
707 
708  /** Provide const iterator access to the end */
710 
711  /** Get const_itertor re-positioned to specific element
712  @param idx - position in the sparse vector
713  */
715  { return const_iterator(this, idx); }
716 
717  /** Provide back insert iterator
718  Back insert iterator implements buffered insertion, which is faster, than random access
719  or push_back
720  */
722  { return back_insert_iterator(this); }
723 
724  ///@}
725 
726 
727 
728  // ------------------------------------------------------------
729  /*! @name Various traits */
730  ///@{
731 
732  /** \brief trait if sparse vector is "compressed" (false)
733  */
734  static
735  bool is_compressed() BMNOEXCEPT { return false; }
736 
737  ///@}
738 
739  // ------------------------------------------------------------
740  /*! @name remapping, succinct utilities
741  Remapping implements reduction of dit-depth thus improves
742  search performance. Remapping limits farther modifications
743  of sparse vector.
744  */
745  ///@{
746 
747  /**
748  Get remapping status (true|false)
749  */
750  bool is_remap() const BMNOEXCEPT { return remap_flags_ != 0; }
751 
752  /**
753  Build remapping profile and load content from another sparse vector
754  \param str_sv - source sparse vector (assumed it is not remapped)
755  */
756  void remap_from(const str_sparse_vector& str_sv);
757 
758  /*!
759  Calculate flags which octets are present on each byte-plain.
760  @internal
761  */
762  void calc_octet_stat(plain_octet_matrix_type& octet_matrix) const BMNOEXCEPT;
763 
764  static
765  void build_octet_remap(
766  plain_octet_matrix_type& octet_remap_matrix1,
767  plain_octet_matrix_type& octet_remap_matrix2,
768  const plain_octet_matrix_type& octet_occupancy_matrix);
769  /*!
770  remap string from external (ASCII) system to matrix internal code
771  @return true if remapping was ok, false if found incorrect value
772  for the plain
773  @internal
774  */
775  static
776  bool remap_tosv(value_type* BMRESTRICT sv_str,
777  size_type buf_size,
778  const value_type* BMRESTRICT str,
779  const plain_octet_matrix_type& BMRESTRICT octet_remap_matrix2
780  ) BMNOEXCEPT;
781 
782  /*!
783  remap string from external (ASCII) system to matrix internal code
784  @internal
785  */
786  bool remap_tosv(value_type* sv_str,
787  size_type buf_size,
788  const value_type* str) const BMNOEXCEPT
789  {
790  return remap_tosv(sv_str, buf_size, str, remap_matrix2_);
791  }
792  /*!
793  remap string from internal code to external (ASCII) system
794  @return true if remapping was ok, false if found incorrect value
795  for the plain
796  @internal
797  */
798  static
799  bool remap_fromsv(
800  value_type* BMRESTRICT str,
801  size_type buf_size,
802  const value_type* BMRESTRICT sv_str,
803  const plain_octet_matrix_type& BMRESTRICT octet_remap_matrix1
804  ) BMNOEXCEPT;
805  /*!
806  re-calculate remap matrix2 based on matrix1
807  @internal
808  */
809  void recalc_remap_matrix2();
810 
811  ///@}
812 
813  // ------------------------------------------------------------
814  /*! @name Export content to C-style */
815  ///@{
816 
817  /**
818  \brief Bulk export strings to a C-style matrix of chars
819 
820  \param cmatr - dest matrix (bm::heap_matrix)
821  \param idx_from - index in the sparse vector to export from
822  \param dec_size - decoding size (matrix column allocation should match)
823  \param zero_mem - set to false if target array is pre-initialized
824  with 0s to avoid performance penalty
825 
826  \return number of actually exported elements (can be less than requested)
827  */
828  template<typename CharMatrix>
829  size_type decode(CharMatrix& cmatr,
830  size_type idx_from, size_type dec_size,
831  bool zero_mem = true) const
832  {
833  BM_ASSERT(cmatr.is_init());
834  if (zero_mem)
835  cmatr.set_zero();
836 
837  size_type rows = size_type(cmatr.rows());
838  BM_ASSERT(cmatr.cols() >= MAX_STR_SIZE);
839  size_type max_sz = this->size() - idx_from;
840  if (max_sz < dec_size)
841  dec_size = max_sz;
842  if (rows < dec_size)
843  dec_size = rows;
844  if (!dec_size)
845  return dec_size;
846 
847  for (unsigned i = 0; i < MAX_STR_SIZE; ++i)
848  {
849  unsigned bi = 0;
850  for (unsigned k = i * 8; k < (i * 8) + 8; ++k, ++bi)
851  {
852  const bvector_type* bv = this->bmatr_.get_row(k);
853  if (!bv)
854  continue;
855  value_type mask = value_type(1u << bi);
856  typename bvector_type::enumerator en(bv, idx_from);
857  for ( ;en.valid(); ++en )
858  {
859  size_type idx = *en - idx_from;
860  if (idx >= dec_size)
861  break;
862  typename CharMatrix::value_type* str = cmatr.row(idx);
863  str[i] |= mask;
864  } // for en
865  } // for k
866  } // for i
867 
868  if (remap_flags_)
869  {
870  for (unsigned i = 0; i < dec_size; ++i)
871  {
872  typename CharMatrix::value_type* str = cmatr.row(i);
873  remap_matrix1_.remapz(str);
874  } // for i
875  }
876  return dec_size;
877  }
878 
879  /**
880  \brief Bulk import of strings from a C-style matrix of chars
881 
882  \param cmatr - source matrix (bm::heap_matrix)
883  [in/out] parameter gets modified(corrupted)
884  in the process
885  \param idx_from - destination index in the sparse vector
886  \param imp_size - import size (number or rows to import)
887  */
888  template<typename CharMatrix>
889  void import(CharMatrix& cmatr, size_type idx_from, size_type imp_size)
890  {
891  if (!imp_size)
892  return;
893  if (idx_from < this->size_) // in case it touches existing elements
894  {
895  // clear all plains in the range to provide corrrect import of 0 values
896  this->clear_range(idx_from, idx_from + imp_size - 1);
897  }
898  import_no_check(cmatr, idx_from, imp_size);
899  }
900 
901  /**
902  \brief Bulk push-back import of strings from a C-style matrix of chars
903 
904  \param cmatr - source matrix (bm::heap_matrix)
905  [in/out] parameter gets modified(corrupted)
906  in the process
907  \param imp_size - import size (number or rows to import)
908  */
909  template<typename CharMatrix>
910  void import_back(CharMatrix& cmatr, size_type imp_size)
911  {
912  if (!imp_size)
913  return;
914  import_no_check(cmatr, this->size(), imp_size);
915  }
916 
917 
918  ///@}
919 
920  // ------------------------------------------------------------
921  /*! @name Merge, split, partition data */
922  ///@{
923 
924  /**
925  @brief copy range of values from another sparse vector
926 
927  Copy [left..right] values from the source vector,
928  clear everything outside the range.
929 
930  \param sv - source vector
931  \param left - index from in losed diapason of [left..right]
932  \param right - index to in losed diapason of [left..right]
933  \param splice_null - "use_null" copy range for NULL vector or
934  do not copy it
935  */
937  size_type left, size_type right,
938  bm::null_support splice_null = bm::use_null);
939 
940  ///@}
941 
942  // ------------------------------------------------------------
943 
944  /*! \brief syncronize internal structures */
945  void sync(bool force);
946 
947  /*!
948  \brief check if another sparse vector has the same content and size
949 
950  \param sv - sparse vector for comparison
951  \param null_able - flag to consider NULL vector in comparison (default)
952  or compare only value content plains
953 
954  \return true, if it is the same
955  */
957  bm::null_support null_able = bm::use_null) const BMNOEXCEPT;
958 
959  /**
960  \brief find position of compressed element by its rank
961  */
962  static
964 
965  /**
966  \brief size of sparse vector (may be different for RSC)
967  */
968  size_type effective_size() const BMNOEXCEPT { return size(); }
969 
970 protected:
971 
972  /// @internal
973  template<typename CharMatrix>
974  void import_no_check(CharMatrix& cmatr,
975  size_type idx_from, size_type imp_size,
976  bool set_not_null = true)
977  {
978  BM_ASSERT (cmatr.is_init());
979 
980  unsigned max_str_size = 0;
981  {
982  for (unsigned j = 0; j < imp_size; ++j)
983  {
984  typename CharMatrix::value_type* str = cmatr.row(j);
985  unsigned i;
986  for (i = 0; i < MAX_STR_SIZE; ++i)
987  {
988  value_type ch = str[i];
989  if (!ch)
990  {
991  max_str_size = (i > max_str_size) ? i : max_str_size;
992  break;
993  }
994  if (remap_flags_) // re-mapping is in effect
995  {
996  unsigned char remap_value = remap_matrix2_.get(i, unsigned(ch));
997  BM_ASSERT(remap_value);
998  if (!remap_value) // unknown dictionary element
999  throw_bad_value(0);
1000  str[i] = CharType(remap_value);
1001  }
1002  } // for i
1003  if (i == MAX_STR_SIZE)
1004  max_str_size = i;
1005  } // for j
1006  }
1007 
1008  size_type bit_list[CharMatrix::n_rows];
1009  for (unsigned i = 0; i < max_str_size; ++i)
1010  {
1011  for (unsigned bi = 0; bi < 8; ++bi)
1012  {
1013  unsigned n_bits = 0;
1014  value_type mask = value_type(1u << bi);
1015  for (size_type j = 0; j < imp_size; ++j)
1016  {
1017  typename CharMatrix::value_type* str = cmatr.row(j);
1018  value_type ch = str[i];
1019  if (!ch)
1020  continue;
1021  if (ch & mask)
1022  {
1023  bit_list[n_bits++] = idx_from + j;
1024  str[i] ^= mask;
1025  }
1026  } // for j
1027  if (n_bits) // set transposed bits to the target plain
1028  {
1029  unsigned plain = i*8 + bi;
1030  bvector_type* bv = this->bmatr_.get_row(plain);
1031  if (!bv)
1032  {
1033  bv = this->bmatr_.construct_row(plain);
1034  bv->init();
1035  }
1036  bv->set(&bit_list[0], n_bits, BM_SORTED);
1037  }
1038  } // for k
1039  } // for i
1040 
1041  size_type idx_to = idx_from + imp_size - 1;
1042  if (set_not_null)
1043  {
1044  bvector_type* bv_null = this->get_null_bvect();
1045  if (bv_null)
1046  bv_null->set_range(idx_from, idx_to);
1047  }
1048  if (idx_to >= this->size())
1049  this->size_ = idx_to+1;
1050 
1051  }
1052 
1053  // ------------------------------------------------------------
1054  /*! @name Errors and exceptions */
1055  ///@{
1056 
1057  /**
1058  \brief throw range error
1059  \internal
1060  */
1061  static
1062  void throw_range_error(const char* err_msg);
1063 
1064  /**
1065  \brief throw domain error
1066  \internal
1067  */
1068  static
1069  void throw_bad_value(const char* err_msg);
1070 
1071  ///@}
1072 
1073  /*! \brief set value without checking boundaries */
1074  void set_value(size_type idx, const value_type* str);
1075 
1076  /*! \brief set value without checking boundaries or support of NULL */
1077  void set_value_no_null(size_type idx, const value_type* str);
1078 
1079  /*! \brief insert value without checking boundaries */
1080  void insert_value(size_type idx, const value_type* str);
1081 
1082  /*! \brief insert value without checking boundaries or support of NULL */
1083  void insert_value_no_null(size_type idx, const value_type* str);
1084 
1085 
1086  size_type size_internal() const { return size(); }
1088 
1089  size_t remap_size() const { return remap_matrix1_.get_buffer().size(); }
1090  const unsigned char* get_remap_buffer() const
1091  { return remap_matrix1_.get_buffer().buf(); }
1092  unsigned char* init_remap_buffer()
1093  {
1094  remap_matrix1_.init();
1095  return remap_matrix1_.get_buffer().data();
1096  }
1097  void set_remap() { remap_flags_ = 1; }
1098 
1099 protected:
1100 
1102  size_type* idx_from, size_type* idx_to) const
1103  {
1104  *idx_from = from; *idx_to = to; return true;
1105  }
1106 
1107 protected:
1108  template<class SVect> friend class sparse_vector_serializer;
1109  template<class SVect> friend class sparse_vector_deserializer;
1110 
1111 protected:
1112  unsigned remap_flags_; ///< remapping status
1113  plain_octet_matrix_type remap_matrix1_; ///< octet remap table 1
1114  plain_octet_matrix_type remap_matrix2_; ///< octet remap table 2
1115 };
1116 
1117 //---------------------------------------------------------------------
1118 //---------------------------------------------------------------------
1119 
1120 
1121 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1123  bm::null_support null_able,
1125  size_type bv_max_size,
1126  const allocator_type& alloc)
1127 : parent_type(null_able, ap, bv_max_size, alloc),
1128  remap_flags_(0)
1129 {}
1130 
1131 
1132 //---------------------------------------------------------------------
1133 
1134 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1136  const str_sparse_vector& str_sv)
1137 : parent_type(str_sv),
1138  remap_flags_(str_sv.remap_flags_),
1139  remap_matrix1_(str_sv.remap_matrix1_),
1140  remap_matrix2_(str_sv.remap_matrix2_)
1141 {}
1142 
1143 //---------------------------------------------------------------------
1144 
1145 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1147  str_sparse_vector& str_sv) BMNOEXCEPT
1148 {
1149  parent_type::swap(str_sv);
1150  bm::xor_swap(remap_flags_, str_sv.remap_flags_);
1151  remap_matrix1_.swap(str_sv.remap_matrix1_);
1152  remap_matrix2_.swap(str_sv.remap_matrix2_);
1153 }
1154 
1155 //---------------------------------------------------------------------
1156 
1157 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1159  size_type idx, const value_type* str)
1160 {
1161  if (idx >= this->size())
1162  this->size_ = idx+1;
1163  set_value(idx, str);
1164 }
1165 
1166 //---------------------------------------------------------------------
1167 
1168 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1170  size_type idx, const value_type* str)
1171 {
1172  if (idx >= this->size())
1173  {
1174  this->size_ = idx+1;
1175  set_value(idx, str);
1176  return;
1177  }
1178  insert_value(idx, str);
1179  this->size_++;
1180 }
1181 
1182 //---------------------------------------------------------------------
1183 
1184 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1186 {
1187  BM_ASSERT(idx < this->size_);
1188  if (idx >= this->size_)
1189  return;
1190  this->erase_column(idx);
1191  this->size_--;
1192 }
1193 
1194 //---------------------------------------------------------------------
1195 
1196 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1198 {
1199  bvector_type* bv_null = this->get_null_bvect();
1200  if (bv_null)
1201  bv_null->clear_bit_no_check(idx);
1202  if (idx >= this->size_)
1203  {
1204  this->size_ = idx + 1;
1205  }
1206 }
1207 
1208 //---------------------------------------------------------------------
1209 
1210 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1212  size_type idx, const value_type* str)
1213 {
1214  set_value_no_null(idx, str);
1215  bvector_type* bv_null = this->get_null_bvect();
1216  if (bv_null)
1217  bv_null->set_bit_no_check(idx);
1218 }
1219 
1220 //---------------------------------------------------------------------
1221 
1222 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1224  size_type idx, const value_type* str)
1225 {
1226  for (unsigned i = 0; i < MAX_STR_SIZE; ++i)
1227  {
1228  CharType ch = str[i];
1229  if (!ch)
1230  {
1231  this->clear_value_plains_from(i*8, idx);
1232  return;
1233  }
1234 
1235  if (remap_flags_) // compressional re-mapping is in effect
1236  {
1237  unsigned char remap_value = remap_matrix2_.get(i, unsigned(ch));
1238  BM_ASSERT(remap_value);
1239  if (!remap_value) // unknown dictionary element
1240  {
1241  this->clear_value_plains_from(i*8, idx);
1242  return;
1243  }
1244  ch = CharType(remap_value);
1245  }
1246  this->bmatr_.set_octet(idx, i, (unsigned char)ch);
1247  } // for i
1248 }
1249 
1250 //---------------------------------------------------------------------
1251 
1252 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1254  size_type idx, const value_type* str)
1255 {
1256  insert_value_no_null(idx, str);
1257  this->insert_null(idx, true);
1258 }
1259 
1260 //---------------------------------------------------------------------
1261 
1262 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1264  size_type idx, const value_type* str)
1265 {
1266  for (unsigned i = 0; i < MAX_STR_SIZE; ++i)
1267  {
1268  CharType ch = str[i];
1269  if (!ch)
1270  {
1271  this->insert_clear_value_plains_from(i*8, idx);
1272  return;
1273  }
1274 
1275  if (remap_flags_) // compressional re-mapping is in effect
1276  {
1277  unsigned char remap_value = remap_matrix2_.get(i, unsigned(ch));
1278  BM_ASSERT(remap_value);
1279  if (!remap_value) // unknown dictionary element
1280  {
1281  this->insert_clear_value_plains_from(i*8, idx);
1282  return;
1283  }
1284  ch = CharType(remap_value);
1285  }
1286  this->bmatr_.insert_octet(idx, i, (unsigned char)ch);
1287  } // for i
1288 }
1289 
1290 
1291 //---------------------------------------------------------------------
1292 
1293 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1296  size_type idx, value_type* str, size_type buf_size) const BMNOEXCEPT
1297 {
1298  size_type i = 0;
1299  for (; i < MAX_STR_SIZE; ++i)
1300  {
1301  if (i < buf_size)
1302  str[i] = 0;
1303  else
1304  break;
1305  CharType ch = CharType(this->bmatr_.get_octet(idx, i));
1306  if (ch == 0)
1307  {
1308  str[i] = 0;
1309  break;
1310  }
1311  str[i] = ch;
1312  } // for i
1313  if (remap_flags_)
1314  {
1315  remap_matrix1_.remap(str, i);
1316  }
1317  return i;
1318 }
1319 
1320 //---------------------------------------------------------------------
1321 
1322 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1324  bm::word_t* temp_block,
1325  typename bvector_type::optmode opt_mode,
1327 {
1328  typename bvector_type::statistics stbv;
1329  parent_type::optimize(temp_block, opt_mode, &stbv);
1330 
1331  if (st)
1332  st->add(stbv);
1333 }
1334 
1335 //---------------------------------------------------------------------
1336 
1337 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1340  ) const BMNOEXCEPT
1341 {
1342  BM_ASSERT(st);
1343  typename bvector_type::statistics stbv;
1344  parent_type::calc_stat(&stbv);
1345 
1346  st->reset();
1347 
1348  st->bit_blocks += stbv.bit_blocks;
1349  st->gap_blocks += stbv.gap_blocks;
1350  st->ptr_sub_blocks += stbv.ptr_sub_blocks;
1351  st->bv_count += stbv.bv_count;
1352  st->max_serialize_mem += stbv.max_serialize_mem + 8;
1353  st->memory_used += stbv.memory_used;
1354  st->gap_cap_overhead += stbv.gap_cap_overhead;
1355 
1356  size_t remap_mem_usage = sizeof(remap_flags_);
1357  remap_mem_usage += remap_matrix1_.get_buffer().mem_usage();
1358  remap_mem_usage += remap_matrix2_.get_buffer().mem_usage();
1359 
1360  st->memory_used += remap_mem_usage;
1361  if (remap_flags_) // use of remapping requires some extra storage
1362  {
1363  st->max_serialize_mem += remap_mem_usage;
1364  }
1365 }
1366 
1367 //---------------------------------------------------------------------
1368 
1369 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1371  size_type idx,
1372  const value_type* str) const BMNOEXCEPT
1373 {
1374  BM_ASSERT(str);
1375  int res = 0;
1376  for (unsigned i = 0; i < MAX_STR_SIZE; ++i)
1377  {
1378  CharType ch = str[i];
1379  if (remap_flags_ && ch)
1380  {
1381  unsigned char remap_value = remap_matrix2_.get(i, unsigned(ch));
1382  if (!remap_value) // unknown dictionary element
1383  {
1384  throw_bad_value(0);
1385  }
1386  ch = CharType(remap_value);
1387  }
1388 
1389  res = this->bmatr_.compare_octet(idx, i, ch);
1390  if (res || !ch)
1391  break;
1392  } // for
1393  return res;
1394 }
1395 
1396 //---------------------------------------------------------------------
1397 
1398 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1400  size_type idx1, size_type idx2) const BMNOEXCEPT
1401 {
1402  unsigned i = 0;
1403  for (; i < MAX_STR_SIZE; ++i)
1404  {
1405  CharType ch1 = CharType(this->bmatr_.get_octet(idx1, i));
1406  CharType ch2 = CharType(this->bmatr_.get_octet(idx2, i));
1407  if (!ch1 || !ch2)
1408  {
1409  if (i)
1410  --i;
1411  break;
1412  }
1413  if (ch1 != ch2)
1414  {
1415  break;
1416  }
1417  } // for
1418 
1419  return i;
1420 }
1421 
1422 //---------------------------------------------------------------------
1423 
1424 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1425 bool
1427  size_type rank,
1428  size_type& pos) BMNOEXCEPT
1429 {
1430  BM_ASSERT(rank);
1431  pos = rank - 1;
1432  return true;
1433 }
1434 
1435 //---------------------------------------------------------------------
1436 
1437 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1440  const BMNOEXCEPT
1441 {
1442  for (int i = MAX_STR_SIZE-1; i >= 0; --i)
1443  {
1444  unsigned octet_plain = unsigned(i) * unsigned(sizeof(CharType)) * 8;
1445  for (unsigned j = 0; j < sizeof(CharType) * 8; ++j)
1446  {
1447  if (this->bmatr_.row(octet_plain+j))
1448  return unsigned(i)+1;
1449  } // for j
1450  } // for i
1451  return 0;
1452 }
1453 
1454 //---------------------------------------------------------------------
1455 
1456 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1458  plain_octet_matrix_type& octet_matrix) const BMNOEXCEPT
1459 {
1460  octet_matrix.init();
1461  octet_matrix.set_zero();
1462 
1463  size_type size = this->size();
1464 
1465  for (unsigned i = 0; i < MAX_STR_SIZE; ++i)
1466  {
1467  unsigned char* row = octet_matrix.row(i);
1468 
1469  // TODO: optimize partial transposition
1470  for (size_type j = 0; j < size; ++j)
1471  {
1472  unsigned char ch = this->bmatr_.get_octet(j, i);
1473  unsigned k = ch;
1474  if (k)
1475  row[k] = 1;
1476  } // for j
1477  } // for i
1478 }
1479 
1480 //---------------------------------------------------------------------
1481 
1482 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1484  plain_octet_matrix_type& octet_remap_matrix1,
1485  plain_octet_matrix_type& octet_remap_matrix2,
1486  const plain_octet_matrix_type& octet_occupancy_matrix)
1487 {
1488  octet_remap_matrix1.init();
1489  octet_remap_matrix1.set_zero();
1490  octet_remap_matrix2.init();
1491  octet_remap_matrix2.set_zero();
1492 
1493  for (unsigned i = 0; i < octet_occupancy_matrix.rows(); ++i)
1494  {
1495  const unsigned char* row = octet_occupancy_matrix.row(i);
1496  unsigned char* remap_row1 = octet_remap_matrix1.row(i);
1497  unsigned char* remap_row2 = octet_remap_matrix2.row(i);
1498  unsigned count = 1;
1499  for (unsigned j = 1; j < octet_occupancy_matrix.cols(); ++j)
1500  {
1501  if (row[j]) // octet is present
1502  {
1503  // set two remapping table values
1504  remap_row1[count] = (unsigned char)j;
1505  remap_row2[j] = (unsigned char)count;
1506  ++count;
1507  BM_ASSERT(count < 256);
1508  }
1509  } // for j
1510  } // for i
1511 }
1512 
1513 //---------------------------------------------------------------------
1514 
1515 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1517 {
1518  BM_ASSERT(remap_flags_);
1519 
1520  remap_matrix2_.init();
1521  remap_matrix2_.set_zero();
1522 
1523  for (unsigned i = 0; i < remap_matrix1_.rows(); ++i)
1524  {
1525  const unsigned char* remap_row1 = remap_matrix1_.row(i);
1526  unsigned char* remap_row2 = remap_matrix2_.row(i);
1527  for (unsigned j = 1; j < remap_matrix1_.cols(); ++j)
1528  {
1529  if (remap_row1[j])
1530  {
1531  unsigned count = remap_row1[j];
1532  remap_row2[count] = (unsigned char)j;
1533  BM_ASSERT(count < 256);
1534  }
1535  } // for j
1536  } // for i
1537 }
1538 
1539 //---------------------------------------------------------------------
1540 
1541 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1543  value_type* BMRESTRICT sv_str,
1544  size_type buf_size,
1545  const value_type* BMRESTRICT str,
1546  const plain_octet_matrix_type& BMRESTRICT octet_remap_matrix2) BMNOEXCEPT
1547 {
1548  for (unsigned i = 0; i < buf_size; ++i)
1549  {
1550  CharType ch = str[i];
1551  if (ch == 0)
1552  {
1553  sv_str[i] = ch;
1554  break;
1555  }
1556  const unsigned char* remap_row = octet_remap_matrix2.row(i);
1557  unsigned char remap_value = remap_row[unsigned(ch)];
1558  if (!remap_value) // unknown dictionary element
1559  {
1560  return false;
1561  }
1562  sv_str[i] = CharType(remap_value);
1563  } // for i
1564  return true;
1565 }
1566 
1567 //---------------------------------------------------------------------
1568 
1569 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1571  value_type* BMRESTRICT str,
1572  size_type buf_size,
1573  const value_type* BMRESTRICT sv_str,
1574  const plain_octet_matrix_type& BMRESTRICT octet_remap_matrix1
1575  ) BMNOEXCEPT
1576 {
1577  for (unsigned i = 0; i < buf_size; ++i)
1578  {
1579  CharType ch = sv_str[i];
1580  if (ch == 0)
1581  {
1582  str[i] = ch;
1583  break;
1584  }
1585  const unsigned char* remap_row = octet_remap_matrix1.row(i);
1586  unsigned char remap_value = remap_row[unsigned(ch)];
1587  if (!remap_value) // unknown dictionary element
1588  {
1589  return false;
1590  }
1591  str[i] = CharType(remap_value);
1592  } // for i
1593  return true;
1594 }
1595 
1596 //---------------------------------------------------------------------
1597 
1598 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1599 void
1601 {
1602  if (str_sv.is_remap())
1603  {
1604  *this = str_sv;
1605  return;
1606  }
1607  this->clear();
1608  if (str_sv.empty()) // no content to remap
1609  {
1610  return;
1611  }
1612 
1613  plain_octet_matrix_type omatrix; // occupancy map
1614  str_sv.calc_octet_stat(omatrix);
1615 
1616  str_sv.build_octet_remap(remap_matrix1_, remap_matrix2_, omatrix);
1617  remap_flags_ = 1; // turn ON remapped mode
1618 
1619  const unsigned buffer_size = 1024 * 8;
1620 
1621  typedef bm::heap_matrix<CharType,
1622  buffer_size, // ROWS: number of strings in one batch
1623  MAX_STR_SIZE, // COLS
1624  allocator_type> remap_buffer_type;
1625 
1626  remap_buffer_type cmatr(true);
1627  for (size_type i = 0; true; )
1628  {
1629  size_type dsize = str_sv.decode(cmatr, i, buffer_size, true);
1630  if (!dsize)
1631  break;
1632  this->import(cmatr, i, dsize);
1633  i += dsize;
1634  } // for i
1635 }
1636 
1637 //---------------------------------------------------------------------
1638 
1639 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1641 {
1642  if (remap_flags_)
1643  {
1644  recalc_remap_matrix2();
1645  }
1646 }
1647 
1648 //---------------------------------------------------------------------
1649 
1650 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1653  bm::null_support null_able) const BMNOEXCEPT
1654 {
1655  // at this point both vectors should have the same remap settings
1656  // to be considered "equal".
1657  // Strictly speaking this is incorrect, because re-map and non-remap
1658  // vectors may have the same content
1659 
1660  if (remap_flags_ != sv.remap_flags_)
1661  return false;
1662  if (remap_flags_)
1663  {
1664  bool b;
1665  b = remap_matrix1_.get_buffer().equal(sv.remap_matrix1_.get_buffer());
1666  if (!b)
1667  return b;
1668  b = remap_matrix2_.get_buffer().equal(sv.remap_matrix2_.get_buffer());
1669  if (!b)
1670  return b;
1671  }
1672  return parent_type::equal(sv, null_able);
1673 }
1674 
1675 //---------------------------------------------------------------------
1676 
1677 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1680  size_type left, size_type right,
1681  bm::null_support splice_null)
1682 {
1683  if (left > right)
1684  bm::xor_swap(left, right);
1685  this->clear();
1686 
1687  remap_flags_ = sv.remap_flags_;
1688  remap_matrix1_ = sv.remap_matrix1_;
1689  remap_matrix2_ = sv.remap_matrix2_;
1690 
1691  this->copy_range_plains(sv, left, right, splice_null);
1692  this->resize(sv.size());
1693 }
1694 
1695 
1696 //---------------------------------------------------------------------
1697 
1698 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1701 {
1702  typedef typename
1704  return it_type(this);
1705 }
1706 
1707 //---------------------------------------------------------------------
1708 
1709 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1711 {
1712  parent_type::clear();
1713 }
1714 
1715 //---------------------------------------------------------------------
1716 
1717 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1719  const char* err_msg)
1720 {
1721 #ifndef BM_NO_STL
1722  throw std::range_error(err_msg);
1723 #else
1724  BM_ASSERT_THROW(false, BM_ERR_RANGE);
1725 #endif
1726 }
1727 
1728 //---------------------------------------------------------------------
1729 
1730 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1732  const char* err_msg)
1733 {
1734 #ifndef BM_NO_STL
1735  if (!err_msg)
1736  err_msg = "Unknown/incomparable dictionary character";
1737  throw std::domain_error(err_msg);
1738 #else
1739  BM_ASSERT_THROW(false, BM_BAD_VALUE);
1740 #endif
1741 }
1742 
1743 
1744 //---------------------------------------------------------------------
1745 //
1746 //---------------------------------------------------------------------
1747 
1748 
1749 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1751 : sv_(0), pos_(bm::id_max), pos_in_buf_(~size_type(0))
1752 {}
1753 
1754 //---------------------------------------------------------------------
1755 
1756 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1759 : sv_(it.sv_), pos_(it.pos_), pos_in_buf_(~size_type(0))
1760 {}
1761 
1762 //---------------------------------------------------------------------
1763 
1764 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1767 : sv_(sv), pos_(sv->empty() ? bm::id_max : 0), pos_in_buf_(~size_type(0))
1768 {}
1769 
1770 //---------------------------------------------------------------------
1771 
1772 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1776 : sv_(sv), pos_(pos >= sv->size() ? bm::id_max : pos), pos_in_buf_(~size_type(0))
1777 {}
1778 
1779 //---------------------------------------------------------------------
1780 
1781 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1784 {
1785  BM_ASSERT(sv_);
1786  BM_ASSERT(this->valid());
1787  if (pos_in_buf_ == ~size_type(0))
1788  {
1789  if (!buf_matrix_.is_init())
1790  buf_matrix_.init();
1791  pos_in_buf_ = 0;
1792  size_type d = sv_->decode(buf_matrix_, pos_, buffer_matrix_type::n_rows);
1793  if (!d)
1794  {
1795  pos_ = bm::id_max;
1796  return 0;
1797  }
1798  }
1799  return buf_matrix_.row(pos_in_buf_);
1800 }
1801 
1802 //---------------------------------------------------------------------
1803 
1804 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1805 void
1808  ) BMNOEXCEPT
1809 {
1810  pos_ = (!sv_ || pos >= sv_->size()) ? bm::id_max : pos;
1811  pos_in_buf_ = ~size_type(0);
1812 }
1813 
1814 //---------------------------------------------------------------------
1815 
1816 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1817 void
1819 {
1820  if (pos_ == bm::id_max) // nothing to do, we are at the end
1821  return;
1822  ++pos_;
1823 
1824  if (pos_ >= sv_->size())
1825  this->invalidate();
1826  else
1827  {
1828  if (pos_in_buf_ != ~size_type(0))
1829  {
1830  ++pos_in_buf_;
1831  if (pos_in_buf_ >= buffer_matrix_type::n_rows)
1832  pos_in_buf_ = ~size_type(0);
1833  }
1834  }
1835 }
1836 
1837 //---------------------------------------------------------------------
1838 //
1839 //---------------------------------------------------------------------
1840 
1841 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1843 : sv_(0), bv_null_(0), pos_in_buf_(~size_type(0)), prev_nb_(0)
1844 {}
1845 
1846 //---------------------------------------------------------------------
1847 
1848 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1851 : sv_(sv), pos_in_buf_(~size_type(0))
1852 {
1853  if (sv)
1854  {
1855  prev_nb_ = sv_->size() >> bm::set_block_shift;
1856  bv_null_ = sv_->get_null_bvect();
1857  }
1858  else
1859  {
1860  bv_null_ = 0; prev_nb_ = 0;
1861  }
1862 }
1863 
1864 //---------------------------------------------------------------------
1865 
1866 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1869 : sv_(bi.sv_), bv_null_(bi.bv_null_), pos_in_buf_(~size_type(0)), prev_nb_(bi.prev_nb_)
1870 {
1871  BM_ASSERT(bi.empty());
1872 }
1873 
1874 //---------------------------------------------------------------------
1875 
1876 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1878 {
1879  this->flush();
1880 }
1881 
1882 //---------------------------------------------------------------------
1883 
1884 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1885 bool
1887  const BMNOEXCEPT
1888 {
1889  return (pos_in_buf_ == ~size_type(0) || !sv_);
1890 }
1891 
1892 //---------------------------------------------------------------------
1893 
1894 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1896 {
1897  if (this->empty())
1898  return;
1899 
1900  sv_->import_no_check(buf_matrix_, sv_->size(), pos_in_buf_+1, false);
1901  pos_in_buf_ = ~size_type(0);
1902  block_idx_type nb = sv_->size() >> bm::set_block_shift;
1903  if (nb != prev_nb_)
1904  {
1905  // optimize all previous blocks in all planes
1906  sv_->optimize_block(prev_nb_);
1907  prev_nb_ = nb;
1908  }
1909 }
1910 
1911 //---------------------------------------------------------------------
1912 
1913 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1916 {
1917  if (!v)
1918  {
1919  this->add_null();
1920  return;
1921  }
1922  size_type buf_idx = this->add_value(v);
1923  if (bv_null_)
1924  {
1925  size_type sz = sv_->size();
1926  bv_null_->set_bit_no_check(sz + buf_idx);
1927  }
1928 }
1929 
1930 //---------------------------------------------------------------------
1931 
1932 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1934 {
1935  /*size_type buf_idx = */this->add_value("");
1936 }
1937 
1938 //---------------------------------------------------------------------
1939 
1940 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1943 {
1944  for (size_type i = 0; i < count; ++i) // TODO: optimization
1945  this->add_value("");
1946 }
1947 
1948 
1949 //---------------------------------------------------------------------
1950 
1951 template<class CharType, class BV, unsigned MAX_STR_SIZE>
1955 {
1956  BM_ASSERT(sv_);
1957  BM_ASSERT(v);
1958  if (pos_in_buf_ == ~size_type(0))
1959  {
1960  if (!buf_matrix_.is_init())
1961  buf_matrix_.init();
1962  pos_in_buf_ = 0;
1963  buf_matrix_.set_zero();
1964  }
1965  else
1966  if (pos_in_buf_ >= buffer_matrix_type::n_rows-1)
1967  {
1968  this->flush();
1969  pos_in_buf_ = 0;
1970  buf_matrix_.set_zero();
1971  }
1972  else
1973  {
1974  ++pos_in_buf_;
1975  }
1976  value_type* r = buf_matrix_.row(pos_in_buf_);
1977  for (unsigned i = 0; i < buffer_matrix_type::n_columns; ++i)
1978  {
1979  r[i] = v[i];
1980  if (!r[i])
1981  break;
1982  } // for i
1983  return pos_in_buf_;
1984 }
1985 
1986 //---------------------------------------------------------------------
1987 
1988 
1989 } // namespace
1990 
1991 #endif
bm::str_sparse_vector::back_insert_iterator::flush
void flush()
flush the accumulated buffer
Definition: bmstrsparsevec.h:1895
bm::str_sparse_vector::init_remap_buffer
unsigned char * init_remap_buffer()
Definition: bmstrsparsevec.h:1092
bm::str_sparse_vector::is_remap
bool is_remap() const BMNOEXCEPT
Get remapping status (true|false)
Definition: bmstrsparsevec.h:750
bm::str_sparse_vector::insert
void insert(size_type idx, const StrType &str)
insert STL string
Definition: bmstrsparsevec.h:459
bmconst.h
Constants, tables and typedefs.
bm::str_sparse_vector::value_type
CharType value_type
Definition: bmstrsparsevec.h:62
bm::str_sparse_vector::copy_range
void copy_range(const str_sparse_vector< CharType, BV, MAX_STR_SIZE > &sv, size_type left, size_type right, bm::null_support splice_null=bm::use_null)
copy range of values from another sparse vector
Definition: bmstrsparsevec.h:1678
bm::str_sparse_vector::const_iterator::str_sparse_vector_type
str_sparse_vector< CharType, BV, MAX_STR_SIZE > str_sparse_vector_type
Definition: bmstrsparsevec.h:174
bm::str_sparse_vector::const_iterator::allocator_type
bvector_type::allocator_type allocator_type
Definition: bmstrsparsevec.h:179
bm::bv_statistics::max_serialize_mem
size_t max_serialize_mem
estimated maximum memory for serialization
Definition: bmfunc.h:60
bm::str_sparse_vector::allocator_pool_type
allocator_type::allocator_pool_type allocator_pool_type
Definition: bmstrsparsevec.h:67
bm::base_sparse_vector< CharType, BV, MAX_STR_SIZE >::bmatr_
bmatrix_type bmatr_
bit-transposed matrix
Definition: bmbmatrix.h:495
bm::sparse_vector_deserializer
sparse vector de-serializer
Definition: bmsparsevec_serial.h:223
bm::str_sparse_vector::const_iterator::size_type
str_sparse_vector_type::size_type size_type
Definition: bmstrsparsevec.h:177
bm::str_sparse_vector::back_insert_iterator::empty
bool empty() const BMNOEXCEPT
return true if insertion buffer is empty
Definition: bmstrsparsevec.h:1886
bm::str_sparse_vector::remap_matrix2_
plain_octet_matrix_type remap_matrix2_
octet remap table 2
Definition: bmstrsparsevec.h:1114
bm::str_sparse_vector::const_iterator::allocator_pool_type
allocator_type::allocator_pool_type allocator_pool_type
Definition: bmstrsparsevec.h:180
bm::str_sparse_vector::octet_plains
octet_plains
Definition: bmstrsparsevec.h:75
bm::str_sparse_vector::back_insert_iterator::bvector_type
str_sparse_vector_type::bvector_type bvector_type
Definition: bmstrsparsevec.h:274
bm::base_sparse_vector< CharType, BV, MAX_STR_SIZE >::clear_value_plains_from
void clear_value_plains_from(unsigned plain_idx, size_type idx)
Definition: bmbmatrix.h:1406
bm::str_sparse_vector::size_internal
size_type size_internal() const
Definition: bmstrsparsevec.h:1086
bm::bvector::optmode
optmode
Optimization mode Every next level means additional checks (better compression vs time)
Definition: bm.h:129
bm::str_sparse_vector::const_iterator::operator*
const value_type * operator*() const BMNOEXCEPT
Get current position (value)
Definition: bmstrsparsevec.h:205
bm::str_sparse_vector::back_insert_iterator::operator++
back_insert_iterator & operator++(int)
noop
Definition: bmstrsparsevec.h:313
bm::str_sparse_vector::const_iterator::buffer_matrix_type
bm::heap_matrix< CharType, 1024, MAX_STR_SIZE, allocator_type > buffer_matrix_type
Definition: bmstrsparsevec.h:241
bm::str_sparse_vector::reference::operator=
reference & operator=(const reference &ref)
Definition: bmstrsparsevec.h:135
bm::str_sparse_vector::get
size_type get(size_type idx, value_type *str, size_type buf_size) const BMNOEXCEPT
get specified element
Definition: bmstrsparsevec.h:1295
bm::base_sparse_vector< CharType, BV, MAX_STR_SIZE >::plain
bvector_type_ptr plain(unsigned i) BMNOEXCEPT
get access to bit-plain as is (can return NULL)
Definition: bmbmatrix.h:376
bm::str_sparse_vector::push_back
void push_back(const StrType &str)
push back a string
Definition: bmstrsparsevec.h:532
bm::basic_bmatrix::get_octet
unsigned char get_octet(size_type pos, size_type octet_idx) const BMNOEXCEPT
Definition: bmbmatrix.h:913
bm::bv_statistics::bit_blocks
size_t bit_blocks
Number of bit blocks.
Definition: bmfunc.h:56
bm::str_sparse_vector::resize_internal
void resize_internal(size_type sz)
Definition: bmstrsparsevec.h:1087
bm::base_sparse_vector
Base class for bit-transposed sparse vector construction.
Definition: bmbmatrix.h:253
bm::str_sparse_vector::const_iterator::invalidate
void invalidate() BMNOEXCEPT
Invalidate current iterator.
Definition: bmstrsparsevec.h:226
bm::no_null
@ no_null
do not support NULL values
Definition: bmconst.h:216
bm::str_sparse_vector::get_remap_buffer
const unsigned char * get_remap_buffer() const
Definition: bmstrsparsevec.h:1090
bm::str_sparse_vector::back_insert_iterator::allocator_pool_type
allocator_type::allocator_pool_type allocator_pool_type
Definition: bmstrsparsevec.h:276
bm::str_sparse_vector::remap_tosv
static bool remap_tosv(value_type *BMRESTRICT sv_str, size_type buf_size, const value_type *BMRESTRICT str, const plain_octet_matrix_type &BMRESTRICT octet_remap_matrix2) BMNOEXCEPT
Definition: bmstrsparsevec.h:1542
bm::str_sparse_vector::const_iterator::value_type
str_sparse_vector_type::value_type value_type
Definition: bmstrsparsevec.h:176
bm::str_sparse_vector::insert_value_no_null
void insert_value_no_null(size_type idx, const value_type *str)
insert value without checking boundaries or support of NULL
Definition: bmstrsparsevec.h:1263
bm::str_sparse_vector::throw_bad_value
static void throw_bad_value(const char *err_msg)
throw domain error
Definition: bmstrsparsevec.h:1731
bm::str_sparse_vector::set_value
void set_value(size_type idx, const value_type *str)
set value without checking boundaries
Definition: bmstrsparsevec.h:1211
bm::str_sparse_vector::get
void get(size_type idx, StrType &str) const
get specified string element Template method expects an STL-compatible type basic_string<>
Definition: bmstrsparsevec.h:548
bm::str_sparse_vector::begin
const_iterator begin() const BMNOEXCEPT
Provide const iterator access to container content
Definition: bmstrsparsevec.h:1700
bm::str_sparse_vector::const_reference::const_reference
const_reference(const str_sparse_vector< CharType, BV, MAX_STR_SIZE > &str_sv, size_type idx) BMNOEXCEPT
Definition: bmstrsparsevec.h:97
bm::str_sparse_vector::insert_value
void insert_value(size_type idx, const value_type *str)
insert value without checking boundaries
Definition: bmstrsparsevec.h:1253
bm::BM_SORTED
@ BM_SORTED
input set is sorted (ascending order)
Definition: bmconst.h:191
bm::str_sparse_vector::const_iterator::pointer
CharType * pointer
Definition: bmstrsparsevec.h:183
bm::base_sparse_vector< CharType, BV, MAX_STR_SIZE >::block_idx_type
bvector_type::block_idx_type block_idx_type
Definition: bmbmatrix.h:468
bm::str_sparse_vector::get_const_iterator
const_iterator get_const_iterator(size_type idx) const BMNOEXCEPT
Get const_itertor re-positioned to specific element.
Definition: bmstrsparsevec.h:714
bm::str_sparse_vector::calc_stat
void calc_stat(struct str_sparse_vector< CharType, BV, MAX_STR_SIZE >::statistics *st) const BMNOEXCEPT
Calculates memory statistics.
Definition: bmstrsparsevec.h:1338
bm::str_sparse_vector::plain_octet_matrix_type
bm::heap_matrix< unsigned char, MAX_STR_SIZE, 256, typename bvector_type::allocator_type > plain_octet_matrix_type
Definition: bmstrsparsevec.h:85
bm::bvector::enumerator
Constant iterator designed to enumerate "ON" bits.
Definition: bm.h:599
bm::str_sparse_vector::set_remap
void set_remap()
Definition: bmstrsparsevec.h:1097
BM_ASSERT_THROW
#define BM_ASSERT_THROW(x, xerrcode)
Definition: bmdef.h:401
bm::str_sparse_vector::is_rsc_support
Definition: bmstrsparsevec.h:88
bm::str_sparse_vector::effective_vector_max
size_type effective_vector_max() const
get effective string length used in vector
Definition: bmstrsparsevec.h:664
bm::bvector::allocator_type
Alloc allocator_type
Definition: bm.h:110
bm::str_sparse_vector::back_insert_iterator::block_idx_type
bvector_type::block_idx_type block_idx_type
Definition: bmstrsparsevec.h:330
bm::str_sparse_vector::back_insert_iterator::operator=
back_insert_iterator & operator=(const value_type *v)
push value to the vector
Definition: bmstrsparsevec.h:297
bm::str_sparse_vector::const_iterator::operator++
const_iterator & operator++() BMNOEXCEPT
Advance to the next available value.
Definition: bmstrsparsevec.h:208
bm::str_sparse_vector::bvector_enumerator_type
bvector_type::enumerator bvector_enumerator_type
Definition: bmstrsparsevec.h:66
bmalgo.h
Algorithms for bvector<> (main include)
bm::bv_statistics::gap_blocks
size_t gap_blocks
Number of GAP blocks.
Definition: bmfunc.h:57
bm::str_sparse_vector::const_reference::operator==
bool operator==(const const_reference &ref) const BMNOEXCEPT
Definition: bmstrsparsevec.h:108
bm::str_sparse_vector::operator[]
reference operator[](size_type idx)
Operator to get write access to an element
Definition: bmstrsparsevec.h:424
bm::str_sparse_vector::parent_type
base_sparse_vector< CharType, BV, MAX_STR_SIZE > parent_type
Definition: bmstrsparsevec.h:69
bm::str_sparse_vector::back_insert_iterator::operator*
back_insert_iterator & operator*()
noop
Definition: bmstrsparsevec.h:309
bm::bv_statistics::gap_cap_overhead
size_t gap_cap_overhead
gap memory overhead between length and capacity
Definition: bmfunc.h:62
bm::str_sparse_vector::back_insert_iterator::~back_insert_iterator
~back_insert_iterator()
Definition: bmstrsparsevec.h:1877
bm::str_sparse_vector::push_back
void push_back(const value_type *str)
push back a string (zero terminated)
Definition: bmstrsparsevec.h:538
bm::str_sparse_vector::empty
bool empty() const
return true if vector is empty
Definition: bmstrsparsevec.h:642
bm::str_sparse_vector::bvector_type_const_ptr
const typedef bvector_type * bvector_type_const_ptr
Definition: bmstrsparsevec.h:61
bm::str_sparse_vector::set_null
void set_null(size_type idx)
set NULL status for the specified element Vector is resized automatically
Definition: bmstrsparsevec.h:1197
bm::base_sparse_vector< CharType, BV, MAX_STR_SIZE >::const_reference
const typedef value_type & const_reference
Definition: bmbmatrix.h:272
bm::str_sparse_vector::allocator_type
BV::allocator_type allocator_type
Definition: bmstrsparsevec.h:64
bm::str_sparse_vector::const_iterator::advance
void advance() BMNOEXCEPT
advance iterator forward by one
Definition: bmstrsparsevec.h:1818
bm::str_sparse_vector::const_iterator::str_sparse_vector
friend class str_sparse_vector
Definition: bmstrsparsevec.h:170
bm::use_null
@ use_null
support "non-assigned" or "NULL" logic
Definition: bmconst.h:215
bm::str_sparse_vector::size
size_type size() const
return size of the vector
Definition: bmstrsparsevec.h:637
bm::str_sparse_vector::reference
Reference class to access elements via common [] operator.
Definition: bmstrsparsevec.h:121
bm::id_max
const unsigned id_max
Definition: bmconst.h:108
bm::str_sparse_vector
sparse vector for strings with compression using bit transposition method
Definition: bmstrsparsevec.h:56
bm::str_sparse_vector::const_iterator::operator!=
bool operator!=(const const_iterator &it) const BMNOEXCEPT
Definition: bmstrsparsevec.h:193
bm::str_sparse_vector::const_reference::is_null
bool is_null() const BMNOEXCEPT
Definition: bmstrsparsevec.h:110
bm::xor_swap
void xor_swap(W &x, W &y) BMNOEXCEPT
XOR swap two scalar variables.
Definition: bmfunc.h:911
bm::bvector::iterator_base::valid
bool valid() const BMNOEXCEPT
Checks if iterator is still valid. Analog of != 0 comparison for pointers.
Definition: bm.h:280
bm::base_sparse_vector< CharType, BV, MAX_STR_SIZE >::clear_range
void clear_range(size_type left, size_type right, bool set_null)
Definition: bmbmatrix.h:1239
bm::basic_bmatrix::get_row
bvector_type_const_ptr get_row(size_type i) const BMNOEXCEPT
Definition: bmbmatrix.h:570
bm::base_sparse_vector< CharType, BV, MAX_STR_SIZE >::get_null_bvect
bvector_type * get_null_bvect()
Definition: bmbmatrix.h:380
bm::str_sparse_vector::is_rsc_support::trait
trait
Definition: bmstrsparsevec.h:88
bm::str_sparse_vector::remap_flags_
unsigned remap_flags_
remapping status
Definition: bmstrsparsevec.h:1112
bm::str_sparse_vector::back_insert_iterator::operator=
back_insert_iterator & operator=(const StrType &v)
push value to the vector
Definition: bmstrsparsevec.h:303
bm::basic_bmatrix::set_octet
void set_octet(size_type pos, size_type octet_idx, unsigned char octet)
Definition: bmbmatrix.h:821
bm::str_sparse_vector::operator=
str_sparse_vector< CharType, BV, MAX_STR_SIZE > & operator=(const str_sparse_vector< CharType, BV, MAX_STR_SIZE > &str_sv)
Definition: bmstrsparsevec.h:385
BMNOEXCEPT
#define BMNOEXCEPT
Definition: bmdef.h:79
bm::str_sparse_vector::get_back_inserter
back_insert_iterator get_back_inserter()
Provide back insert iterator Back insert iterator implements buffered insertion, which is faster,...
Definition: bmstrsparsevec.h:721
bm::str_sparse_vector::is_remap_support
Definition: bmstrsparsevec.h:87
bm::str_sparse_vector::const_iterator::reference
CharType *& reference
Definition: bmstrsparsevec.h:184
bm::bvector::opt_compress
@ opt_compress
compress blocks when possible (GAP/prefix sum)
Definition: bm.h:134
bm::str_sparse_vector::compare
int compare(size_type idx, const value_type *str) const BMNOEXCEPT
Compare vector element with argument lexicographically.
Definition: bmstrsparsevec.h:1370
bm::bvector::allocation_policy
memory allocation policy
Definition: bm.h:833
bm::str_sparse_vector::const_iterator::operator++
const_iterator & operator++(int) BMNOEXCEPT
Advance to the next available value.
Definition: bmstrsparsevec.h:212
bm::str_sparse_vector::remap_size
size_t remap_size() const
Definition: bmstrsparsevec.h:1089
bm::str_sparse_vector::is_remap_support::value
@ value
Definition: bmstrsparsevec.h:87
bm::str_sparse_vector::const_reference
Reference class to access elements via common [] operator.
Definition: bmstrsparsevec.h:94
bm::base_sparse_vector< CharType, BV, MAX_STR_SIZE >::size_
size_type size_
array size
Definition: bmbmatrix.h:496
bm::str_sparse_vector::back_insert_iterator::allocator_type
bvector_type::allocator_type allocator_type
Definition: bmstrsparsevec.h:275
bm::str_sparse_vector::back_insert_iterator::size_type
str_sparse_vector_type::size_type size_type
Definition: bmstrsparsevec.h:273
bm::str_sparse_vector::bvector_type_ptr
bvector_type * bvector_type_ptr
Definition: bmstrsparsevec.h:60
bm::str_sparse_vector::decode
size_type decode(CharMatrix &cmatr, size_type idx_from, size_type dec_size, bool zero_mem=true) const
Bulk export strings to a C-style matrix of chars.
Definition: bmstrsparsevec.h:829
bm::base_sparse_vector< CharType, BV, MAX_STR_SIZE >::resize
void resize(size_type new_size)
Definition: bmbmatrix.h:1267
bm::str_sparse_vector::sync
void sync(bool force)
syncronize internal structures
Definition: bmstrsparsevec.h:1640
bm::str_sparse_vector::is_compressed
static bool is_compressed() BMNOEXCEPT
trait if sparse vector is "compressed" (false)
Definition: bmstrsparsevec.h:735
bm::str_sparse_vector::end
const_iterator end() const BMNOEXCEPT
Provide const iterator access to the end
Definition: bmstrsparsevec.h:709
bm::bit_list
unsigned bit_list(T w, B *bits) BMNOEXCEPT
Unpacks word into list of ON bit indexes.
Definition: bmfunc.h:440
BM_ASSERT
#define BM_ASSERT
Definition: bmdef.h:130
bm::str_sparse_vector::allocation_policy_type
bvector_type::allocation_policy allocation_policy_type
Definition: bmstrsparsevec.h:65
bm::base_sparse_vector::is_null
bool is_null(size_type idx) const BMNOEXCEPT
test if specified element is NULL
Definition: bmbmatrix.h:1285
bm::str_sparse_vector::size_type
bvector_type::size_type size_type
Definition: bmstrsparsevec.h:63
bm::str_sparse_vector::const_iterator
Const iterator to do quick traverse of the sparse vector.
Definition: bmstrsparsevec.h:167
bm::str_sparse_vector::swap
void swap(str_sparse_vector &str_sv) BMNOEXCEPT
Definition: bmstrsparsevec.h:1146
bm::str_sparse_vector::build_octet_remap
static void build_octet_remap(plain_octet_matrix_type &octet_remap_matrix1, plain_octet_matrix_type &octet_remap_matrix2, const plain_octet_matrix_type &octet_occupancy_matrix)
Definition: bmstrsparsevec.h:1483
bm::str_sparse_vector::back_insert_iterator::add
void add(const value_type *v)
add value to the container
Definition: bmstrsparsevec.h:1914
bm::str_sparse_vector::remap_matrix1_
plain_octet_matrix_type remap_matrix1_
octet remap table 1
Definition: bmstrsparsevec.h:1113
bm::str_sparse_vector::const_iterator::difference_type
long long difference_type
Definition: bmstrsparsevec.h:182
bm::str_sparse_vector::clear_range
str_sparse_vector< CharType, BV, MAX_STR_SIZE > & clear_range(size_type left, size_type right, bool set_null=false)
clear range (assign bit 0 for all plains)
Definition: bmstrsparsevec.h:620
bm::str_sparse_vector::is_rsc_support::value
@ value
Definition: bmstrsparsevec.h:88
bm::str_sparse_vector::const_iterator::go_to
void go_to(size_type pos) BMNOEXCEPT
re-position to a specified position
Definition: bmstrsparsevec.h:1806
bm::basic_bmatrix::construct_row
bvector_type_ptr construct_row(size_type row)
Definition: bmbmatrix.h:715
bmdef.h
Definitions(internal)
bm::bv_statistics::ptr_sub_blocks
size_t ptr_sub_blocks
Number of sub-blocks.
Definition: bmfunc.h:58
bm::str_sparse_vector::sv_octet_plains
@ sv_octet_plains
Definition: bmstrsparsevec.h:77
bm::str_sparse_vector::str_sparse_vector
str_sparse_vector(str_sparse_vector< CharType, BV, MAX_STR_SIZE > &&str_sv) BMNOEXCEPT
Definition: bmstrsparsevec.h:397
bm::basic_bmatrix
Basic dense bit-matrix class.
Definition: bmbmatrix.h:53
bm::str_sparse_vector::operator[]
const_reference operator[](size_type idx) const
Operator to get read access to an element
Definition: bmstrsparsevec.h:427
bm::str_sparse_vector::reference::operator==
bool operator==(const reference &ref) const BMNOEXCEPT
Definition: bmstrsparsevec.h:147
bm::str_sparse_vector::const_iterator::operator>
bool operator>(const const_iterator &it) const BMNOEXCEPT
Definition: bmstrsparsevec.h:199
bm::str_sparse_vector::const_iterator::operator>=
bool operator>=(const const_iterator &it) const BMNOEXCEPT
Definition: bmstrsparsevec.h:201
bm::base_sparse_vector< CharType, BV, MAX_STR_SIZE >::copy_from
void copy_from(const base_sparse_vector< CharType, BV, MAX_SIZE > &bsv)
Definition: bmbmatrix.h:1175
bm::str_sparse_vector::remap_fromsv
static bool remap_fromsv(value_type *BMRESTRICT str, size_type buf_size, const value_type *BMRESTRICT sv_str, const plain_octet_matrix_type &BMRESTRICT octet_remap_matrix1) BMNOEXCEPT
Definition: bmstrsparsevec.h:1570
bm::str_sparse_vector::find_rank
static bool find_rank(size_type rank, size_type &pos) BMNOEXCEPT
find position of compressed element by its rank
Definition: bmstrsparsevec.h:1426
bm::str_sparse_vector::const_iterator::operator<
bool operator<(const const_iterator &it) const BMNOEXCEPT
Definition: bmstrsparsevec.h:195
bm::str_sparse_vector::back_insert_iterator::reference
void reference
Definition: bmstrsparsevec.h:280
bm::str_sparse_vector::const_iterator::valid
bool valid() const BMNOEXCEPT
Returns true if iterator is at a valid position.
Definition: bmstrsparsevec.h:223
bm::str_sparse_vector::resolve_range
bool resolve_range(size_type from, size_type to, size_type *idx_from, size_type *idx_to) const
Definition: bmstrsparsevec.h:1101
bm::str_sparse_vector::back_insert_iterator::difference_type
void difference_type
Definition: bmstrsparsevec.h:278
bm::str_sparse_vector::const_iterator::operator==
bool operator==(const const_iterator &it) const BMNOEXCEPT
Definition: bmstrsparsevec.h:191
bm::str_sparse_vector::const_iterator::value
const value_type * value() const BMNOEXCEPT
Get current position (value)
Definition: bmstrsparsevec.h:1783
bm::str_sparse_vector::const_iterator::is_null
bool is_null() const BMNOEXCEPT
Get NULL status.
Definition: bmstrsparsevec.h:220
bm::str_sparse_vector::remap_tosv
bool remap_tosv(value_type *sv_str, size_type buf_size, const value_type *str) const BMNOEXCEPT
Definition: bmstrsparsevec.h:786
bm::str_sparse_vector::reference::operator=
reference & operator=(const value_type *str)
Definition: bmstrsparsevec.h:142
bm::str_sparse_vector::str_sparse_vector
str_sparse_vector(bm::null_support null_able=bm::no_null, allocation_policy_type ap=allocation_policy_type(), size_type bv_max_size=bm::id_max, const allocator_type &alloc=allocator_type())
Sparse vector constructor.
Definition: bmstrsparsevec.h:1122
bm::str_sparse_vector::clear
void clear() BMNOEXCEPT
resize to zero, free memory
Definition: bmstrsparsevec.h:1710
bm::bv_statistics::bv_count
size_t bv_count
Number of bit-vectors.
Definition: bmfunc.h:59
bm::str_sparse_vector::import_no_check
void import_no_check(CharMatrix &cmatr, size_type idx_from, size_type imp_size, bool set_not_null=true)
Definition: bmstrsparsevec.h:974
bm::str_sparse_vector::effective_max_str
size_type effective_max_str() const BMNOEXCEPT
get effective string length used in vector Calculate and returns efficiency, how close are we to the ...
Definition: bmstrsparsevec.h:1439
bmbmatrix.h
basic bit-matrix class and utilities
bm::str_sparse_vector::const_iterator::bvector_type
str_sparse_vector_type::bvector_type bvector_type
Definition: bmstrsparsevec.h:178
bm::set_block_shift
const unsigned set_block_shift
Definition: bmconst.h:55
bm::str_sparse_vector::back_insert_iterator
Back insert iterator implements buffered insert, faster than generic access assignment.
Definition: bmstrsparsevec.h:264
bm::str_sparse_vector::equal
bool equal(const str_sparse_vector< CharType, BV, MAX_STR_SIZE > &sv, bm::null_support null_able=bm::use_null) const BMNOEXCEPT
check if another sparse vector has the same content and size
Definition: bmstrsparsevec.h:1651
bm::str_sparse_vector::const_iterator::pos
size_type pos() const BMNOEXCEPT
Current position (index) in the vector.
Definition: bmstrsparsevec.h:229
bm
Definition: bm.h:76
bm::base_sparse_vector< CharType, BV, MAX_STR_SIZE >::swap
void swap(base_sparse_vector< CharType, BV, MAX_SIZE > &bsv) BMNOEXCEPT
Definition: bmbmatrix.h:1208
bm::str_sparse_vector::insert
void insert(size_type idx, const value_type *str)
insert the specified element
Definition: bmstrsparsevec.h:1169
bm::str_sparse_vector::throw_range_error
static void throw_range_error(const char *err_msg)
throw range error
Definition: bmstrsparsevec.h:1718
bm::str_sparse_vector::back_insert_iterator::iterator_category
std::output_iterator_tag iterator_category
Definition: bmstrsparsevec.h:268
bm::bv_statistics::memory_used
size_t memory_used
memory usage for all blocks and service tables
Definition: bmfunc.h:61
bm::str_sparse_vector::const_iterator::str_sparse_vector_type_ptr
str_sparse_vector_type * str_sparse_vector_type_ptr
Definition: bmstrsparsevec.h:175
bm::str_sparse_vector::reference::reference
reference(str_sparse_vector< CharType, BV, MAX_STR_SIZE > &str_sv, size_type idx) BMNOEXCEPT
Definition: bmstrsparsevec.h:124
bm::null_support
null_support
NULL-able value support.
Definition: bmconst.h:213
bm::str_sparse_vector::resize
void resize(size_type sz)
resize vector
Definition: bmstrsparsevec.h:647
bm::str_sparse_vector::common_prefix_length
unsigned common_prefix_length(size_type idx1, size_type idx2) const BMNOEXCEPT
Find size of common prefix between two vector elements in octets.
Definition: bmstrsparsevec.h:1399
bmtrans.h
Utilities for bit transposition (internal) (experimental!)
bm::str_sparse_vector::const_iterator::const_iterator
const_iterator() BMNOEXCEPT
Definition: bmstrsparsevec.h:1750
bm::word_t
unsigned int word_t
Definition: bmconst.h:38
BMRESTRICT
#define BMRESTRICT
Definition: bmdef.h:193
bm::str_sparse_vector::recalc_remap_matrix2
void recalc_remap_matrix2()
Definition: bmstrsparsevec.h:1516
bm::str_sparse_vector::effective_size
size_type effective_size() const BMNOEXCEPT
size of sparse vector (may be different for RSC)
Definition: bmstrsparsevec.h:968
bm::str_sparse_vector::set_value_no_null
void set_value_no_null(size_type idx, const value_type *str)
set value without checking boundaries or support of NULL
Definition: bmstrsparsevec.h:1223
bm::str_sparse_vector::optimize
void optimize(bm::word_t *temp_block=0, typename bvector_type::optmode opt_mode=bvector_type::opt_compress, typename str_sparse_vector< CharType, BV, MAX_STR_SIZE >::statistics *stat=0)
run memory optimization for all vector plains
Definition: bmstrsparsevec.h:1323
bm::str_sparse_vector::back_insert_iterator::value_type
str_sparse_vector_type::value_type value_type
Definition: bmstrsparsevec.h:272
bm::str_sparse_vector::bvector_type
BV bvector_type
Definition: bmstrsparsevec.h:59
bm::str_sparse_vector::back_insert_iterator::add_null
void add_null()
add NULL (no-value) to the container
Definition: bmstrsparsevec.h:1933
bm::bvector::size_type
bm::id_t size_type
Definition: bm.h:117
bm::str_sparse_vector::back_insert_iterator::str_sparse_vector_type
str_sparse_vector< CharType, BV, MAX_STR_SIZE > str_sparse_vector_type
Definition: bmstrsparsevec.h:270
bm::str_sparse_vector::const_iterator::iterator_category
std::input_iterator_tag iterator_category
Definition: bmstrsparsevec.h:172
bm::str_sparse_vector::back_insert_iterator::pointer
void pointer
Definition: bmstrsparsevec.h:279
bm::str_sparse_vector::calc_octet_stat
void calc_octet_stat(plain_octet_matrix_type &octet_matrix) const BMNOEXCEPT
Definition: bmstrsparsevec.h:1457
bm::str_sparse_vector::import_back
void import_back(CharMatrix &cmatr, size_type imp_size)
Bulk push-back import of strings from a C-style matrix of chars.
Definition: bmstrsparsevec.h:910
bm::str_sparse_vector::set
void set(size_type idx, const value_type *str)
set specified element with bounds checking and automatic resize
Definition: bmstrsparsevec.h:1158
bm::str_sparse_vector::is_remap_support::trait
trait
Definition: bmstrsparsevec.h:87
bm::str_sparse_vector::back_insert_iterator::add_value
size_type add_value(const value_type *v)
add value to the buffer without changing the NULL vector
Definition: bmstrsparsevec.h:1953
bm::sparse_vector_serializer
Definition: bmsparsevec_serial.h:160
bm::bv_statistics
Structure with statistical information about memory allocation footprint, serialization projection,...
Definition: bmfunc.h:54
bm::str_sparse_vector::assign
void assign(size_type idx, const StrType &str)
set specified element with bounds checking and automatic resize
Definition: bmstrsparsevec.h:494
bm::str_sparse_vector::reference::is_null
bool is_null() const BMNOEXCEPT
Definition: bmstrsparsevec.h:149
bm::str_sparse_vector::remap_from
void remap_from(const str_sparse_vector &str_sv)
Build remapping profile and load content from another sparse vector.
Definition: bmstrsparsevec.h:1600
bm::bv_statistics::add
void add(const bv_statistics &st) BMNOEXCEPT
Sum data from another sttructure.
Definition: bmfunc.h:105
bm::str_sparse_vector::statistics
Definition: bmstrsparsevec.h:72
bm::str_sparse_vector::max_str
static size_type max_str()
get maximum string length capacity
Definition: bmstrsparsevec.h:652
bm::str_sparse_vector::back_insert_iterator::operator++
back_insert_iterator & operator++()
noop
Definition: bmstrsparsevec.h:311
bm::bvector::statistics
Statistical information about bitset's memory allocation details.
Definition: bm.h:121
bm::str_sparse_vector::erase
void erase(size_type idx)
erase the specified element
Definition: bmstrsparsevec.h:1185
bm::str_sparse_vector::back_insert_iterator::back_insert_iterator
back_insert_iterator() BMNOEXCEPT
Definition: bmstrsparsevec.h:1842
bm::str_sparse_vector::bmatrix_type
bm::basic_bmatrix< BV > bmatrix_type
Definition: bmstrsparsevec.h:68
bm::str_sparse_vector::const_iterator::operator<=
bool operator<=(const const_iterator &it) const BMNOEXCEPT
Definition: bmstrsparsevec.h:197
bm::str_sparse_vector::back_insert_iterator::str_sparse_vector_type_ptr
str_sparse_vector_type * str_sparse_vector_type_ptr
Definition: bmstrsparsevec.h:271