BitMagic-C++
bmsparsevec.h
Go to the documentation of this file.
1 #ifndef BMSPARSEVEC_H__INCLUDED__
2 #define BMSPARSEVEC_H__INCLUDED__
3 /*
4 Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 For more information please visit: http://bitmagic.io
19 */
20 
21 /*! \file bmsparsevec.h
22  \brief Sparse constainer sparse_vector<> for integer types using
23  bit-transposition transform
24 */
25 
26 #include <memory.h>
27 
28 #ifndef BM_NO_STL
29 #include <stdexcept>
30 #endif
31 
32 #include "bm.h"
33 #include "bmtrans.h"
34 #include "bmalgo.h"
35 #include "bmbuffer.h"
36 #include "bmbmatrix.h"
37 #include "bmdef.h"
38 
39 namespace bm
40 {
41 
42 /** \defgroup svector Sparse and compressed vectors
43  Sparse vector for integer types using bit transposition transform
44 
45  @ingroup bmagic
46  */
47 
48 
49 /** \defgroup sv sparse_vector<>
50  Sparse vector for integer types using bit transposition transform
51 
52  @ingroup bmagic
53  */
54 
55 
56 /*!
57  \brief sparse vector with runtime compression using bit transposition method
58 
59  Sparse vector implements variable bit-depth storage model.
60  Initial data is bit-transposed into bit-planes so each element
61  may use less memory than the original native data type prescribes.
62  For example, 32-bit integer may only use 20 bits.
63 
64  Another level of compression is provided by bit-vector (BV template parameter)
65  used for storing bit planes. bvector<> implements varians of on the fly block
66  compression, so if a significant area of a sparse vector uses less bits - it
67  will save memory.
68 
69  Overall it provides variable bit-depth compression, sparse compression in
70  bit-plains.
71 
72  @ingroup sv
73 */
74 template<class Val, class BV>
75 class sparse_vector : public base_sparse_vector<Val, BV, 1>
76 {
77 public:
78  typedef Val value_type;
80  typedef BV bvector_type;
81  typedef bvector_type* bvector_type_ptr;
82  typedef const bvector_type* bvector_type_const_ptr;
83  typedef const value_type& const_reference;
84  typedef typename BV::allocator_type allocator_type;
87  typedef typename allocator_type::allocator_pool_type allocator_pool_type;
90 
91 
92  /*! Statistical information about memory allocation details. */
93  struct statistics : public bv_statistics
94  {};
95 
96  /*! Traits and features used in algorithms to correctly run
97  on a particular type of sparse vector
98  */
99  struct is_remap_support { enum trait { value = false }; };
100  struct is_rsc_support { enum trait { value = false }; };
101 
102  /**
103  Reference class to access elements via common [] operator
104  @ingroup sv
105  */
106  class reference
107  {
108  public:
110  : sv_(sv), idx_(idx)
111  {}
112  operator value_type() const { return sv_.get(idx_); }
114  {
115  sv_.set(idx_, (value_type)ref);
116  return *this;
117  }
118  reference& operator=(value_type val)
119  {
120  sv_.set(idx_, val);
121  return *this;
122  }
123  bool operator==(const reference& ref) const
124  { return bool(*this) == bool(ref); }
125  bool is_null() const { return sv_.is_null(idx_); }
126  private:
128  size_type idx_;
129  };
130 
131 
132  /**
133  Const iterator to traverse the sparse vector.
134 
135  Implementation uses buffer for decoding so, competing changes
136  to the original vector may not match the iterator returned values.
137 
138  This iterator keeps an operational buffer for 8K elements,
139  so memory footprint is not negligable (about 64K for unsigned int)
140 
141  @ingroup sv
142  */
144  {
145  public:
146  friend class sparse_vector;
147 
148 #ifndef BM_NO_STL
149  typedef std::input_iterator_tag iterator_category;
150 #endif
152  typedef sparse_vector_type* sparse_vector_type_ptr;
158  typedef bm::byte_buffer<allocator_type> buffer_type;
159 
160  typedef unsigned difference_type;
161  typedef unsigned* pointer;
162  typedef value_type& reference;
163 
164 
165  public:
166  const_iterator();
167  const_iterator(const sparse_vector_type* sv);
168  const_iterator(const sparse_vector_type* sv, size_type pos);
169  const_iterator(const const_iterator& it);
170 
171  bool operator==(const const_iterator& it) const
172  { return (pos_ == it.pos_) && (sv_ == it.sv_); }
173  bool operator!=(const const_iterator& it) const
174  { return ! operator==(it); }
175  bool operator < (const const_iterator& it) const
176  { return pos_ < it.pos_; }
177  bool operator <= (const const_iterator& it) const
178  { return pos_ <= it.pos_; }
179  bool operator > (const const_iterator& it) const
180  { return pos_ > it.pos_; }
181  bool operator >= (const const_iterator& it) const
182  { return pos_ >= it.pos_; }
183 
184  /// \brief Get current position (value)
185  value_type operator*() const { return this->value(); }
186 
187 
188  /// \brief Advance to the next available value
189  const_iterator& operator++() { this->advance(); return *this; }
190 
191  /// \brief Advance to the next available value
193  { const_iterator tmp(*this);this->advance(); return tmp; }
194 
195 
196  /// \brief Get current position (value)
197  value_type value() const;
198 
199  /// \brief Get NULL status
200  bool is_null() const;
201 
202  /// Returns true if iterator is at a valid position
203  bool valid() const { return pos_ != bm::id_max; }
204 
205  /// Invalidate current iterator
206  void invalidate() { pos_ = bm::id_max; }
207 
208  /// Current position (index) in the vector
209  bm::id_t pos() const { return pos_; }
210 
211  /// re-position to a specified position
212  void go_to(size_type pos);
213 
214  /// advance iterator forward by one
215  void advance();
216 
217  void skip_zero_values();
218  private:
219  enum buf_size_e
220  {
221  n_buf_size = 1024 * 8
222  };
223 
224  private:
225  const sparse_vector_type* sv_; ///!< ptr to parent
226  size_type pos_; ///!< Position
227  mutable buffer_type buffer_; ///!< value buffer
228  mutable value_type* buf_ptr_; ///!< position in the buffer
229  mutable allocator_pool_type pool_;
230  };
231 
232  /**
233  Back insert iterator implements buffered insert, faster than generic
234  access assignment.
235 
236  Limitations for buffered inserter:
237  1. Do not use more than one inserter (into one vector) at the same time
238  2. Use method flush() at the end to send the rest of accumulated buffer
239  flush is happening automatically on destruction, but if flush produces an
240  exception (for whatever reason) it will be an exception in destructor.
241  As such, explicit flush() is safer way to finilize the sparse vector load.
242 
243  @ingroup sv
244  */
246  {
247  public:
248 #ifndef BM_NO_STL
249  typedef std::output_iterator_tag iterator_category;
250 #endif
252  typedef sparse_vector_type* sparse_vector_type_ptr;
258  typedef bm::byte_buffer<allocator_type> buffer_type;
259 
260  typedef void difference_type;
261  typedef void pointer;
262  typedef void reference;
263 
264  public:
266  back_insert_iterator(sparse_vector_type* sv);
268 
270  {
271  BM_ASSERT(bi.empty());
272  this->flush(); sv_ = bi.sv_;
273  return *this;
274  }
275 
277 
278  /** push value to the vector */
279  back_insert_iterator& operator=(value_type v) { this->add(v); return *this; }
280  /** noop */
281  back_insert_iterator& operator*() { return *this; }
282  /** noop */
283  back_insert_iterator& operator++() { return *this; }
284  /** noop */
285  back_insert_iterator& operator++( int ) { return *this; }
286 
287  /** add value to the container*/
288  void add(value_type v);
289 
290  /** add NULL (no-value) to the container */
291  void add_null();
292 
293  /** add a series of consequitve NULLs (no-value) to the container */
294  void add_null(size_type count);
295 
296  /** return true if insertion buffer is empty */
297  bool empty() const;
298 
299  /** flush the accumulated buffer */
300  void flush();
301  protected:
302 
303  /** add value to the buffer without changing the NULL vector
304  @param v - value to push back
305  @return index of added value in the internal buffer
306  @internal
307  */
308  unsigned add_value(value_type v);
309 
310  private:
311  enum buf_size_e
312  {
313  n_buf_size = 1024 * 8
314  };
315 
316  private:
317  bm::sparse_vector<Val, BV>* sv_; ///!< pointer on the parent vector
318  bvector_type* bv_null_; ///!< not NULL vector pointer
319  buffer_type buffer_; ///!< value buffer
320  value_type* buf_ptr_; ///!< position in the buffer
321  };
322 
325 
326 public:
327  // ------------------------------------------------------------
328  /*! @name Construction and assignment */
329  ///@{
330 
331  /*!
332  \brief Sparse vector constructor
333 
334  \param null_able - defines if vector supports NULL values flag
335  by default it is OFF, use bm::use_null to enable it
336  \param ap - allocation strategy for underlying bit-vectors
337  Default allocation policy uses BM_BIT setting (fastest access)
338  \param bv_max_size - maximum possible size of underlying bit-vectors
339  Please note, this is NOT size of svector itself, it is dynamic upper limit
340  which should be used very carefully if we surely know the ultimate size
341  \param alloc - allocator for bit-vectors
342 
343  \sa bvector<>
344  \sa bm::bvector<>::allocation_policy
345  \sa bm::startegy
346  */
348  allocation_policy_type ap = allocation_policy_type(),
349  size_type bv_max_size = bm::id_max,
350  const allocator_type& alloc = allocator_type());
351 
352  /*! copy-ctor */
354 
355  /*! copy assignmment operator */
357  {
358  if (this != &sv)
360  return *this;
361  }
362 
363 #ifndef BM_NO_CXX11
364  /*! move-ctor */
366 
367 
368  /*! move assignmment operator */
370  {
371  if (this != &sv)
372  {
373  clear();
374  swap(sv);
375  }
376  return *this;
377  }
378 #endif
379 
381  ///@}
382 
383 
384  // ------------------------------------------------------------
385  /*! @name Element access */
386  ///@{
387 
388  /** \brief Operator to get write access to an element */
389  reference operator[](size_type idx) { return reference(*this, idx); }
390 
391  /*!
392  \brief get specified element without bounds checking
393  \param idx - element index
394  \return value of the element
395  */
396  value_type operator[](size_type idx) const { return this->get(idx); }
397 
398  /*!
399  \brief access specified element with bounds checking
400  \param idx - element index
401  \return value of the element
402  */
403  value_type at(size_type idx) const;
404  /*!
405  \brief get specified element without bounds checking
406  \param idx - element index
407  \return value of the element
408  */
409  value_type get(bm::id_t idx) const;
410 
411  /*!
412  \brief set specified element with bounds checking and automatic resize
413  \param idx - element index
414  \param v - element value
415  */
416  void set(size_type idx, value_type v);
417 
418  /*!
419  \brief increment specified element by one
420  \param idx - element index
421  */
422  void inc(size_type idx);
423 
424  /*!
425  \brief push value back into vector
426  \param v - element value
427  */
428  void push_back(value_type v);
429 
430  /*!
431  \brief clear specified element with bounds checking and automatic resize
432  \param idx - element index
433  \param set_null - if true the value receives NULL (unassigned) value
434  */
435  void clear(size_type idx, bool set_null = false);
436 
437  ///@}
438 
439  // ------------------------------------------------------------
440  /*! @name Iterator access */
441  //@{
442 
443  /** Provide const iterator access to container content */
444  const_iterator begin() const;
445 
446  /** Provide const iterator access to the end */
447  const_iterator end() const { return const_iterator(this, bm::id_max); }
448 
449  /** Get const_itertor re-positioned to specific element
450  @param idx - position in the sparse vector
451  */
452  const_iterator get_const_iterator(size_type idx) const { return const_iterator(this, idx); }
453 
454  /** Provide back insert iterator
455  Back insert iterator implements buffered insertion,
456  which is faster, than random access or push_back
457  */
459  ///@}
460 
461 
462  // ------------------------------------------------------------
463  /*! @name Various traits */
464  //@{
465 
466  /** \brief set specified element to unassigned value (NULL)
467  \param idx - element index
468  */
469  void set_null(size_type idx);
470 
471  /** \brief trait if sparse vector is "compressed" (false)
472  */
473  static
474  bool is_compressed() { return false; }
475 
476  ///@}
477 
478 
479  // ------------------------------------------------------------
480  /*! @name Loading of sparse vector from C-style array */
481  //@{
482 
483  /*!
484  \brief Import list of elements from a C-style array
485  \param arr - source array
486  \param arr_size - source size
487  \param offset - target index in the sparse vector
488  */
489  void import(const value_type* arr, size_type arr_size, size_type offset = 0);
490 
491  /*!
492  \brief Import list of elements from a C-style array (pushed back)
493  \param arr - source array
494  \param srr_size - source size
495  */
496  void import_back(const value_type* arr, size_type arr_size);
497  ///@}
498 
499  // ------------------------------------------------------------
500  /*! @name Export content to C-style array */
501  ///@{
502 
503  /*!
504  \brief Bulk export list of elements to a C-style array
505 
506  For efficiency, this is left as a low level function,
507  it does not do any bounds checking on the target array, it will
508  override memory and crash if you are not careful with allocation
509  and request size.
510 
511  \param arr - dest array
512  \param idx_from - index in the sparse vector to export from
513  \param dec_size - decoding size (array allocation should match)
514  \param zero_mem - set to false if target array is pre-initialized
515  with 0s to avoid performance penalty
516 
517  \return number of actually exported elements (can be less than requested)
518 
519  \sa gather
520  */
521  size_type decode(value_type* arr,
522  size_type idx_from,
523  size_type dec_size,
524  bool zero_mem = true) const;
525 
526 
527  /*!
528  \brief Gather elements to a C-style array
529 
530  Gather collects values from different locations, for best
531  performance feed it with sorted list of indexes.
532 
533  Faster than one-by-one random access.
534 
535  For efficiency, this is left as a low level function,
536  it does not do any bounds checking on the target array, it will
537  override memory and crash if you are not careful with allocation
538  and request size.
539 
540  \param arr - dest array
541  \param idx - index list to gather elements
542  \param size - decoding index list size (array allocation should match)
543  \param sorted_idx - sort order directive for the idx array
544  (BM_UNSORTED, BM_SORTED, BM_UNKNOWN)
545  Sort order affects both performance and correctness(!), use BM_UNKNOWN
546  if not sure.
547 
548  \return number of actually exported elements (can be less than requested)
549 
550  \sa decode
551  */
552  size_type gather(value_type* arr,
553  const size_type* idx,
554  size_type size,
555  bm::sort_order sorted_idx) const;
556  ///@}
557 
558  /*! \brief content exchange
559  */
561 
562  // ------------------------------------------------------------
563  /*! @name Clear */
564  ///@{
565 
566  /*! \brief resize to zero, free memory */
567  void clear() BMNOEXEPT;
568 
569  /*!
570  \brief clear range (assign bit 0 for all plains)
571  \param left - interval start
572  \param right - interval end (closed interval)
573  \param set_null - set cleared values to unassigned (NULL)
574  */
575  sparse_vector<Val, BV>& clear_range(size_type left,
576  size_type right,
577  bool set_null = false);
578 
579  ///@}
580 
581  // ------------------------------------------------------------
582  /*! @name Size, etc */
583  ///@{
584 
585  /*! \brief return size of the vector
586  \return size of sparse vector
587  */
588  size_type size() const { return this->size_; }
589 
590  /*! \brief return true if vector is empty
591  \return true if empty
592  */
593  bool empty() const { return (size() == 0); }
594 
595  /*! \brief resize vector
596  \param sz - new size
597  */
598  void resize(size_type sz) { parent_type::resize(sz); }
599  ///@}
600 
601  // ------------------------------------------------------------
602  /*! @name Comparison */
603  ///@{
604 
605  /*!
606  \brief check if another sparse vector has the same content and size
607 
608  \param sv - sparse vector for comparison
609  \param null_able - flag to consider NULL vector in comparison (default)
610  or compare only value content plains
611 
612  \return true, if it is the same
613  */
614  bool equal(const sparse_vector<Val, BV>& sv,
615  bm::null_support null_able = bm::use_null) const;
616  ///@}
617 
618  // ------------------------------------------------------------
619  /*! @name Memory optimization */
620  ///@{
621 
622  /*!
623  \brief run memory optimization for all vector plains
624  \param temp_block - pre-allocated memory block to avoid unnecessary re-allocs
625  \param opt_mode - requested compression depth
626  \param stat - memory allocation statistics after optimization
627  */
628  void optimize(bm::word_t* temp_block = 0,
630  typename sparse_vector<Val, BV>::statistics* stat = 0);
631  /*!
632  \brief Optimize sizes of GAP blocks
633 
634  This method runs an analysis to find optimal GAP levels for all bit plains
635  of the vector.
636  */
637  void optimize_gap_size();
638 
639  /*!
640  @brief Calculates memory statistics.
641 
642  Function fills statistics structure containing information about how
643  this vector uses memory and estimation of max. amount of memory
644  bvector needs to serialize itself.
645 
646  @param st - pointer on statistics structure to be filled in.
647 
648  @sa statistics
649  */
650  void calc_stat(struct sparse_vector<Val, BV>::statistics* st) const;
651  ///@}
652 
653  // ------------------------------------------------------------
654  /*! @name Merge, split, partition data */
655  ///@{
656 
657  /*!
658  \brief join all with another sparse vector using OR operation
659  \param sv - argument vector to join with
660  \return slf reference
661  @sa merge
662  */
664 
665  /*!
666  \brief merge with another sparse vector using OR operation
667  Merge is different from join(), because it borrows data from the source
668  vector, so it gets modified.
669 
670  \param sv - [in, out]argument vector to join with (vector mutates)
671 
672  \return slf reference
673  @sa join
674  */
676 
677 
678  /**
679  @brief copy range of values from another sparse vector
680 
681  Copy [left..right] values from the source vector,
682  clear everything outside the range.
683 
684  \param sv - source vector
685  \param left - index from in losed diapason of [left..right]
686  \param right - index to in losed diapason of [left..right]
687  */
688  void copy_range(const sparse_vector<Val, BV>& sv,
689  size_type left, size_type right);
690 
691  /**
692  @brief Apply value filter, defined by mask vector
693 
694  All bit-plains are ANDed against the filter mask.
695  */
696  void filter(const bvector_type& bv_mask);
697 
698  ///@}
699 
700 
701  // ------------------------------------------------------------
702  /*! @name Access to internals */
703  ///@{
704 
705 
706  /*! \brief syncronize internal structures */
707  void sync(bool /*force*/) {}
708 
709 
710  /*!
711  \brief Bulk export list of elements to a C-style array
712 
713  Use of all extract() methods is restricted.
714  Please consider decode() for the same purpose.
715 
716  \param arr - dest array
717  \param size - dest size
718  \param offset - target index in the sparse vector to export from
719  \param zero_mem - set to false if target array is pre-initialized
720  with 0s to avoid performance penalty
721 
722  \return number of exported elements
723 
724  \sa decode
725 
726  @internal
727  */
728  size_type extract(value_type* arr,
729  size_type size,
730  size_type offset = 0,
731  bool zero_mem = true,
732  allocator_pool_type* pool_ptr = 0) const;
733 
734  /** \brief extract small window without use of masking vector
735  \sa decode
736  @internal
737  */
738  size_type extract_range(value_type* arr,
739  size_type size,
740  size_type offset,
741  bool zero_mem = true) const;
742 
743  /** \brief extract medium window without use of masking vector
744  \sa decode
745  @internal
746  */
747  size_type extract_plains(value_type* arr,
748  size_type size,
749  size_type offset,
750  bool zero_mem = true) const;
751 
752  /** \brief address translation for this type of container
753  \internal
754  */
755  static
756  size_type translate_address(size_type i) { return i; }
757 
758  /**
759  \brief throw range error
760  \internal
761  */
762  static
763  void throw_range_error(const char* err_msg);
764 
765  /**
766  \brief throw bad alloc
767  \internal
768  */
769  static
770  void throw_bad_alloc();
771 
772 
773  /**
774  \brief find position of compressed element by its rank
775  */
776  static
777  bool find_rank(bm::id_t rank, bm::id_t& pos);
778 
779  /**
780  \brief size of sparse vector (may be different for RSC)
781  */
782  size_type effective_size() const { return size(); }
783 
784  /**
785  \brief Always 1 (non-matrix type)
786  */
787  size_type effective_vector_max() const { return 1; }
788 
789  ///@}
790 
791  /// Set allocator pool for local (non-threaded)
792  /// memory cyclic(lots of alloc-free ops) opertations
793  ///
794  void set_allocator_pool(allocator_pool_type* pool_ptr);
795 
796 protected:
798  {
800  };
801 
802 
803  /*! \brief set value without checking boundaries */
804  void set_value(size_type idx, value_type v);
805 
806  /*! \brief set value without checking boundaries or support of NULL */
807  void set_value_no_null(size_type idx, value_type v);
808 
809  /*! \brief push value back into vector without NULL semantics */
810  void push_back_no_null(value_type v);
811 
812 
813  void resize_internal(size_type sz) { resize(sz); }
814  size_type size_internal() const { return size(); }
815 
816  bool is_remap() const { return false; }
817  size_t remap_size() const { return 0; }
818  const unsigned char* get_remap_buffer() const { return 0; }
819  unsigned char* init_remap_buffer() { return 0; }
820  void set_remap() { }
821 
822 protected:
823  template<class V, class SV> friend class rsc_sparse_vector;
824  template<class SVect> friend class sparse_vector_scanner;
825  template<class SVect> friend class sparse_vector_serializer;
826  template<class SVect> friend class sparse_vector_deserializer;
827 
828 };
829 
830 
831 //---------------------------------------------------------------------
832 //---------------------------------------------------------------------
833 
834 
835 template<class Val, class BV>
837  bm::null_support null_able,
838  allocation_policy_type ap,
839  size_type bv_max_size,
840  const allocator_type& alloc)
841 : parent_type(null_able, ap, bv_max_size, alloc)
842 {}
843 
844 //---------------------------------------------------------------------
845 
846 template<class Val, class BV>
848 : parent_type(sv)
849 {}
850 
851 //---------------------------------------------------------------------
852 #ifndef BM_NO_CXX11
853 
854 template<class Val, class BV>
856 {
857  parent_type::swap(sv);
858 }
859 
860 #endif
861 
862 
863 //---------------------------------------------------------------------
864 
865 template<class Val, class BV>
867 {}
868 
869 //---------------------------------------------------------------------
870 
871 template<class Val, class BV>
873 {
874  parent_type::swap(sv);
875 }
876 
877 //---------------------------------------------------------------------
878 
879 template<class Val, class BV>
881 {
882 #ifndef BM_NO_STL
883  throw std::range_error(err_msg);
884 #else
885  BM_ASSERT_THROW(false, BM_ERR_RANGE);
886 #endif
887 }
888 
889 //---------------------------------------------------------------------
890 
891 template<class Val, class BV>
893 {
894  BV::throw_bad_alloc();
895 }
896 
897 //---------------------------------------------------------------------
898 
899 template<class Val, class BV>
901 {
902  clear(idx, true);
903 }
904 
905 //---------------------------------------------------------------------
906 
907 template<class Val, class BV>
908 void sparse_vector<Val, BV>::import(const value_type* arr,
909  size_type arr_size,
910  size_type offset)
911 {
912  unsigned char b_list[sizeof(Val)*8];
913  unsigned row_len[sizeof(Val)*8] = {0, };
914 
915  const unsigned transpose_window = 256;
917 
918  if (arr_size == 0)
919  throw_range_error("sparse_vector range error (import size 0)");
920 
921  if (offset < this->size_) // in case it touches existing elements
922  {
923  // clear all plains in the range to provide corrrect import of 0 values
924  this->clear_range(offset, offset + arr_size - 1);
925  }
926 
927  // transposition algorithm uses bitscan to find index bits and store it
928  // in temporary matrix (list for each bit plain), matrix here works
929  // when array gets to big - the list gets loaded into bit-vector using
930  // bulk load algorithm, which is faster than single bit access
931  //
932 
933  size_type i;
934  for (i = 0; i < arr_size; ++i)
935  {
936  unsigned bcnt = bm::bitscan(arr[i], b_list);
937  const unsigned bit_idx = i + offset;
938 
939  for (unsigned j = 0; j < bcnt; ++j)
940  {
941  unsigned p = b_list[j];
942  unsigned rl = row_len[p];
943  tm.row(p)[rl] = bit_idx;
944  row_len[p] = ++rl;
945 
946  if (rl == transpose_window)
947  {
948  bvector_type* bv = this->get_plain(p);
949  const bm::id_t* r = tm.row(p);
950  bv->set(r, rl, BM_SORTED);
951  row_len[p] = 0;
952  tm.row(p)[0] = 0;
953  }
954  } // for j
955  } // for i
956 
957  // process incomplete transposition lines
958  //
959  for (unsigned k = 0; k < tm.rows(); ++k)
960  {
961  unsigned rl = row_len[k];
962  if (rl)
963  {
964  bvector_type* bv = this->get_plain(k);
965  const bm::id_t* r = tm.row(k);
966  bv->set(r, rl, BM_SORTED);
967  }
968  } // for k
969 
970 
971  if (i + offset > this->size_)
972  this->size_ = i + offset;
973 
974  bvector_type* bv_null = this->get_null_bvect();
975  if (bv_null) // configured to support NULL assignments
976  {
977  bv_null->set_range(offset, offset + arr_size - 1);
978  }
979 }
980 
981 //---------------------------------------------------------------------
982 
983 template<class Val, class BV>
984 void sparse_vector<Val, BV>::import_back(const value_type* arr,
985  size_type arr_size)
986 {
987  this->import(arr, arr_size, this->size());
988 }
989 
990 //---------------------------------------------------------------------
991 
992 template<class Val, class BV>
995  size_type idx_from,
996  size_type dec_size,
997  bool zero_mem) const
998 {
999  if (dec_size < 32)
1000  {
1001  return extract_range(arr, dec_size, idx_from, zero_mem);
1002  }
1003  if (dec_size < 1024)
1004  {
1005  return extract_plains(arr, dec_size, idx_from, zero_mem);
1006  }
1007  return extract(arr, dec_size, idx_from, zero_mem);
1008 }
1009 
1010 //---------------------------------------------------------------------
1011 
1012 template<class Val, class BV>
1015  const size_type* idx,
1016  size_type size,
1017  bm::sort_order sorted_idx) const
1018 {
1019  BM_ASSERT(arr);
1020  BM_ASSERT(idx);
1021  BM_ASSERT(size);
1022 
1023  if (size == 1) // corner case: get 1 value
1024  {
1025  arr[0] = this->get(idx[0]);
1026  return size;
1027  }
1028  ::memset(arr, 0, sizeof(value_type)*size);
1029 
1030  for (unsigned i = 0; i < size;)
1031  {
1032  bool sorted_block = true;
1033 
1034  // look ahead for the depth of the same block
1035  // (speculate more than one index lookup per block)
1036  //
1037  unsigned nb = unsigned(idx[i] >> bm::set_block_shift);
1038  unsigned r = i;
1039 
1040  switch (sorted_idx)
1041  {
1042  case BM_UNKNOWN:
1043  {
1044  size_type idx_prev = idx[r];
1045  for (; (r < size) && (nb == unsigned(idx[r] >> bm::set_block_shift)); ++r)
1046  {
1047  sorted_block = !(idx[r] < idx_prev); // sorted check
1048  idx_prev = idx[r];
1049  }
1050  }
1051  break;
1052  case BM_UNSORTED:
1053  sorted_block = false;
1054 
1055  for (; r < size; ++r)
1056  if (nb != unsigned(idx[r] >> bm::set_block_shift))
1057  break;
1058  break;
1059  // no break(!) intentional fall through
1060  case BM_SORTED:
1061  r = bm::idx_arr_block_lookup(idx, size, nb, r);
1062  break;
1063  case BM_SORTED_UNIFORM:
1064  r = size;
1065  break;
1066  default:
1067  BM_ASSERT(0);
1068  } // switch
1069 
1070  // single element hit, use plain random access
1071  if (r == i+1)
1072  {
1073  arr[i] = this->get(idx[i]);
1074  ++i;
1075  continue;
1076  }
1077 
1078  // process block co-located elements at ones for best (CPU cache opt)
1079  //
1080  unsigned i0 = nb >> bm::set_array_shift; // top block address
1081  unsigned j0 = nb & bm::set_array_mask; // address in sub-block
1082 
1083  unsigned eff_plains = this->effective_plains();
1084  for (unsigned j = 0; j < eff_plains; ++j)
1085  {
1086  const bm::word_t* blk = this->bmatr_.get_block(j, i0, j0);
1087  if (!blk)
1088  continue;
1089  value_type vm;
1090  if (blk == FULL_BLOCK_FAKE_ADDR)
1091  {
1092  vm = (1u << j);
1093  for (unsigned k = i; k < r; ++k)
1094  arr[k] |= vm;
1095  continue;
1096  }
1097  if (BM_IS_GAP(blk))
1098  {
1099  const bm::gap_word_t* gap_blk = BMGAP_PTR(blk);
1100  unsigned is_set;
1101 
1102  if (sorted_block) // b-search hybrid with scan lookup
1103  {
1104  for (unsigned k = i; k < r; )
1105  {
1106  unsigned nbit = unsigned(idx[k] & bm::set_block_mask);
1107 
1108  unsigned gidx = bm::gap_bfind(gap_blk, nbit, &is_set);
1109  unsigned gap_value = gap_blk[gidx];
1110  if (is_set)
1111  {
1112  arr[k] |= vm = (1u << j);
1113  for (++k; k < r; ++k) // speculative look-up
1114  {
1115  if (unsigned(idx[k] & bm::set_block_mask) <= gap_value)
1116  arr[k] |= vm;
1117  else
1118  break;
1119  }
1120  }
1121  else // 0 GAP - skip. not set
1122  {
1123  for (++k;
1124  (k < r) &&
1125  (unsigned(idx[k] & bm::set_block_mask) <= gap_value);
1126  ++k) {}
1127  }
1128  } // for k
1129  }
1130  else // unsorted block gather request: b-search lookup
1131  {
1132  for (unsigned k = i; k < r; ++k)
1133  {
1134  unsigned nbit = unsigned(idx[k] & bm::set_block_mask);
1135  is_set = bm::gap_test_unr(gap_blk, nbit);
1136  arr[k] |= value_type(bool(is_set) << j);
1137  } // for k
1138  }
1139  continue;
1140  }
1141  bm::bit_block_gather_scatter(arr, blk, idx, r, i, j);
1142  } // for (each plain)
1143 
1144  i = r;
1145 
1146  } // for i
1147 
1148  return size;
1149 }
1150 
1151 //---------------------------------------------------------------------
1152 
1153 template<class Val, class BV>
1156  size_type size,
1157  size_type offset,
1158  bool zero_mem) const
1159 {
1160  if (size == 0)
1161  return 0;
1162  if (zero_mem)
1163  ::memset(arr, 0, sizeof(value_type)*size);
1164 
1165  size_type start = offset;
1166  size_type end = start + size;
1167  if (end > this->size_)
1168  {
1169  end = this->size_;
1170  }
1171 
1172  // calculate logical block coordinates and masks
1173  //
1174  unsigned nb = unsigned(start >> bm::set_block_shift);
1175  unsigned i0 = nb >> bm::set_array_shift; // top block address
1176  unsigned j0 = nb & bm::set_array_mask; // address in sub-block
1177  unsigned nbit = unsigned(start & bm::set_block_mask);
1178  unsigned nword = unsigned(nbit >> bm::set_word_shift);
1179  unsigned mask0 = 1u << (nbit & bm::set_word_mask);
1180  const bm::word_t* blk = 0;
1181  unsigned is_set;
1182 
1183  for (unsigned j = 0; j < sizeof(Val)*8; ++j)
1184  {
1185  blk = this->bmatr_.get_block(j, i0, j0);
1186  bool is_gap = BM_IS_GAP(blk);
1187 
1188  for (unsigned k = start; k < end; ++k)
1189  {
1190  unsigned nb1 = unsigned(k >> bm::set_block_shift);
1191  if (nb1 != nb) // block switch boundaries
1192  {
1193  nb = nb1;
1194  i0 = nb >> bm::set_array_shift;
1195  j0 = nb & bm::set_array_mask;
1196  blk = this->bmatr_.get_block(j, i0, j0);
1197  is_gap = BM_IS_GAP(blk);
1198  }
1199 
1200  if (!blk)
1201  continue;
1202  nbit = unsigned(k & bm::set_block_mask);
1203  if (is_gap)
1204  {
1205  is_set = bm::gap_test_unr(BMGAP_PTR(blk), nbit);
1206  }
1207  else
1208  {
1209  if (blk == FULL_BLOCK_FAKE_ADDR)
1210  {
1211  is_set = 1;
1212  }
1213  else
1214  {
1215  nword = unsigned(nbit >> bm::set_word_shift);
1216  mask0 = 1u << (nbit & bm::set_word_mask);
1217  is_set = (blk[nword] & mask0);
1218  }
1219  }
1220  size_type idx = k - offset;
1221  value_type vm = (bool) is_set;
1222  vm <<= j;
1223  arr[idx] |= vm;
1224 
1225  } // for k
1226 
1227  } // for j
1228  return 0;
1229 }
1230 
1231 //---------------------------------------------------------------------
1232 
1233 template<class Val, class BV>
1236  size_type size,
1237  size_type offset,
1238  bool zero_mem) const
1239 {
1240  if (size == 0)
1241  return 0;
1242 
1243  if (zero_mem)
1244  ::memset(arr, 0, sizeof(value_type)*size);
1245 
1246  size_type start = offset;
1247  size_type end = start + size;
1248  if (end > this->size_)
1249  {
1250  end = this->size_;
1251  }
1252 
1253  for (size_type i = 0; i < parent_type::value_bits(); ++i)
1254  {
1255  const bvector_type* bv = this->bmatr_.get_row(i);
1256  if (!bv)
1257  continue;
1258 
1259  value_type mask = 1;
1260  mask <<= i;
1261  typename BV::enumerator en(bv, offset);
1262  for (;en.valid(); ++en)
1263  {
1264  size_type idx = *en - offset;
1265  if (idx >= size)
1266  break;
1267  arr[idx] |= mask;
1268  } // for
1269 
1270  } // for i
1271 
1272  return 0;
1273 }
1274 
1275 
1276 //---------------------------------------------------------------------
1277 
1278 template<class Val, class BV>
1281  size_type size,
1282  size_type offset,
1283  bool zero_mem,
1284  allocator_pool_type* pool_ptr) const
1285 {
1286  /// Decoder functor
1287  /// @internal
1288  ///
1289  struct sv_decode_visitor_func
1290  {
1291  sv_decode_visitor_func(value_type* varr,
1292  value_type mask,
1293  size_type off)
1294  : arr_(varr), mask_(mask), off_(off)
1295  {}
1296 
1297  void add_bits(bm::id_t arr_offset, const unsigned char* bits, unsigned bits_size)
1298  {
1299  size_type idx_base = arr_offset - off_;
1300 
1301  const value_type m = mask_;
1302  unsigned i = 0;
1303  for (; i < bits_size; ++i)
1304  {
1305  arr_[idx_base + bits[i]] |= m;
1306  }
1307  }
1308 
1309  void add_range(bm::id_t arr_offset, unsigned sz)
1310  {
1311  size_type idx_base = arr_offset - off_;
1312  const value_type m = mask_;
1313  for (unsigned i = 0;i < sz; ++i)
1314  arr_[i + idx_base] |= m;
1315  }
1316  value_type* arr_;
1317  value_type mask_;
1318  size_type off_;
1319  };
1320 
1321 
1322  if (size == 0)
1323  return 0;
1324 
1325  if (zero_mem)
1326  ::memset(arr, 0, sizeof(value_type)*size);
1327 
1328  size_type start = offset;
1329  size_type end = start + size;
1330  if (end > this->size_)
1331  {
1332  end = this->size_;
1333  }
1334 
1335  bool masked_scan = !(offset == 0 && size == this->size());
1336 
1337  if (masked_scan) // use temp vector to decompress the area
1338  {
1339  bvector_type bv_mask;
1340  bv_mask.set_allocator_pool(pool_ptr);
1341 
1342  for (size_type i = 0; i < parent_type::value_bits(); ++i)
1343  {
1344  const bvector_type* bv = this->bmatr_.get_row(i);
1345  if (bv)
1346  {
1347  bv_mask.copy_range(*bv, offset, end - 1);
1348  sv_decode_visitor_func func(arr, (value_type(1) << i), offset);
1349  bm::for_each_bit(bv_mask, func);
1350  }
1351  } // for i
1352  }
1353  else
1354  {
1355  for (size_type i = 0; i < parent_type::value_bits(); ++i)
1356  {
1357  const bvector_type* bv = this->bmatr_.get_row(i);
1358  if (bv)
1359  {
1360  sv_decode_visitor_func func(arr, (value_type(1) << i), 0);
1361  bm::for_each_bit(*bv, func);
1362  }
1363  } // for i
1364  }
1365 
1366  return end - start;
1367 }
1368 
1369 //---------------------------------------------------------------------
1370 
1371 template<class Val, class BV>
1374 {
1375  if (idx >= this->size_)
1376  throw_range_error("sparse vector range error");
1377  return this->get(idx);
1378 }
1379 
1380 //---------------------------------------------------------------------
1381 
1382 template<class Val, class BV>
1385 {
1386  BM_ASSERT(i < size());
1387 
1388  value_type v = 0;
1389  unsigned eff_plains = this->effective_plains();
1390  for (unsigned j = 0; j < eff_plains; j+=4)
1391  {
1392  bool b = this->bmatr_.test_4rows(j);
1393  if (b)
1394  {
1395  value_type vm = this->bmatr_.get_half_octet(i, j);
1396  v |= vm << j;
1397  }
1398  } // for j
1399  return v;
1400 }
1401 
1402 
1403 //---------------------------------------------------------------------
1404 
1405 template<class Val, class BV>
1406 void sparse_vector<Val, BV>::set(size_type idx, value_type v)
1407 {
1408  if (idx >= size())
1409  {
1410  this->size_ = idx+1;
1411  }
1412  set_value(idx, v);
1413 }
1414 
1415 //---------------------------------------------------------------------
1416 
1417 template<class Val, class BV>
1418 void sparse_vector<Val, BV>::clear(size_type idx, bool set_null)
1419 {
1420  if (idx >= size())
1421  this->size_ = idx+1;
1422 
1423  set_value(idx, (value_type)0);
1424  if (set_null)
1425  {
1426  bvector_type* bv_null = this->get_null_bvect();
1427  if (bv_null)
1428  bv_null->set(idx, false);
1429  }
1430 }
1431 
1432 //---------------------------------------------------------------------
1433 
1434 template<class Val, class BV>
1436 {
1437  set_value(this->size_, v);
1438  ++(this->size_);
1439 }
1440 
1441 //---------------------------------------------------------------------
1442 
1443 template<class Val, class BV>
1445 {
1446  set_value_no_null(this->size_, v);
1447  ++(this->size_);
1448 }
1449 
1450 //---------------------------------------------------------------------
1451 
1452 template<class Val, class BV>
1453 void sparse_vector<Val, BV>::set_value(size_type idx, value_type v)
1454 {
1455  set_value_no_null(idx, v);
1456  bvector_type* bv_null = this->get_null_bvect();
1457  if (bv_null)
1458  bv_null->set_bit_no_check(idx);
1459 }
1460 
1461 //---------------------------------------------------------------------
1462 
1463 template<class Val, class BV>
1464 void sparse_vector<Val, BV>::set_value_no_null(size_type idx, value_type v)
1465 {
1466  // calculate logical block coordinates and masks
1467  //
1468  unsigned nb = unsigned(idx >> bm::set_block_shift);
1469  unsigned i0 = nb >> bm::set_array_shift; // top block address
1470  unsigned j0 = nb & bm::set_array_mask; // address in sub-block
1471 
1472  // clear the plains where needed
1473  unsigned eff_plains = this->effective_plains();
1474  unsigned bsr = v ? bm::bit_scan_reverse(v) : 0u;
1475 
1476  for (unsigned i = bsr; i < eff_plains; ++i)
1477  {
1478  const bm::word_t* blk = this->bmatr_.get_block(i, i0, j0);
1479  if (blk)
1480  {
1481  bvector_type* bv = this->bmatr_.get_row(i);
1482  if (bv)
1483  bv->clear_bit_no_check(idx);
1484  }
1485  }
1486  if (v)
1487  {
1488  value_type mask = 1u;
1489  for (unsigned j = 0; j <= bsr; ++j)
1490  {
1491  if (v & mask)
1492  {
1493  bvector_type* bv = this->get_plain(j);
1494  bv->set_bit_no_check(idx);
1495  }
1496  else
1497  {
1498  const bm::word_t* blk = this->bmatr_.get_block(j, i0, j0);
1499  if (blk)
1500  {
1501  bvector_type* bv = this->bmatr_.get_row(j);
1502  bv->clear_bit_no_check(idx);
1503  }
1504  }
1505  mask <<= 1;
1506  }
1507  }
1508 }
1509 
1510 //---------------------------------------------------------------------
1511 
1512 template<class Val, class BV>
1513 void sparse_vector<Val, BV>::inc(size_type idx)
1514 {
1515  if (idx >= this->size_)
1516  this->size_ = idx+1;
1517 
1518  for (unsigned i = 0; i < parent_type::sv_value_plains; ++i)
1519  {
1520  bvector_type* bv = this->get_plain(i);
1521  bool carry_over = bv->inc(idx);
1522  if (!carry_over)
1523  break;
1524  }
1525  bvector_type* bv_null = this->get_null_bvect();
1526  if (bv_null)
1527  bv_null->set_bit_no_check(idx);
1528 }
1529 
1530 //---------------------------------------------------------------------
1531 
1532 template<class Val, class BV>
1534 {
1535  parent_type::clear();
1536 }
1537 
1538 //---------------------------------------------------------------------
1539 
1540 template<class Val, class BV>
1542 {
1543  BM_ASSERT(rank);
1544  pos = rank - 1;
1545  return true;
1546 }
1547 
1548 //---------------------------------------------------------------------
1549 
1550 template<class Val, class BV>
1553  typename sparse_vector<Val, BV>::size_type left,
1554  typename sparse_vector<Val, BV>::size_type right,
1555  bool set_null)
1556 {
1557  parent_type::clear_range(left, right, set_null);
1558  return *this;
1559 }
1560 
1561 //---------------------------------------------------------------------
1562 
1563 template<class Val, class BV>
1565  struct sparse_vector<Val, BV>::statistics* st) const
1566 {
1567  BM_ASSERT(st);
1568  typename bvector_type::statistics stbv;
1569  parent_type::calc_stat(&stbv);
1570 
1571  st->reset();
1572 
1573  st->bit_blocks += stbv.bit_blocks;
1574  st->gap_blocks += stbv.gap_blocks;
1575  st->ptr_sub_blocks += stbv.ptr_sub_blocks;
1576 
1577  st->max_serialize_mem += stbv.max_serialize_mem + 8;
1578  st->memory_used += stbv.memory_used;
1579 }
1580 
1581 //---------------------------------------------------------------------
1582 
1583 template<class Val, class BV>
1585  bm::word_t* temp_block,
1586  typename bvector_type::optmode opt_mode,
1588 {
1589  typename bvector_type::statistics stbv;
1590  parent_type::optimize(temp_block, opt_mode, &stbv);
1591 
1592  if (st)
1593  {
1594  st->bit_blocks += stbv.bit_blocks;
1595  st->gap_blocks += stbv.gap_blocks;
1596  st->max_serialize_mem += stbv.max_serialize_mem + 8;
1597  st->memory_used += stbv.memory_used;
1598  }
1599 }
1600 
1601 //---------------------------------------------------------------------
1602 
1603 template<class Val, class BV>
1605 {
1606  unsigned stored_plains = this->stored_plains();
1607  for (unsigned j = 0; j < stored_plains; ++j)
1608  {
1609  bvector_type* bv = this->bmatr_.get_row(j);
1610  if (bv)
1611  {
1612  bv->optimize_gap_size();
1613  }
1614  }
1615 }
1616 
1617 //---------------------------------------------------------------------
1618 
1619 template<class Val, class BV>
1622 {
1623  size_type arg_size = sv.size();
1624  if (this->size_ < arg_size)
1625  {
1626  resize(arg_size);
1627  }
1628  bvector_type* bv_null = this->get_null_bvect();
1629  unsigned plains;
1630  if (bv_null)
1631  plains = this->stored_plains();
1632  else
1633  plains = this->plains();
1634 
1635  for (unsigned j = 0; j < plains; ++j)
1636  {
1637  const bvector_type* arg_bv = sv.bmatr_.row(j);
1638  if (arg_bv)
1639  {
1640  bvector_type* bv = this->bmatr_.get_row(j);
1641  if (!bv)
1642  bv = this->get_plain(j);
1643  *bv |= *arg_bv;
1644  }
1645  } // for j
1646 
1647  // our vector is NULL-able but argument is not (assumed all values are real)
1648  if (bv_null && !sv.is_nullable())
1649  {
1650  bv_null->set_range(0, arg_size-1);
1651  }
1652 
1653  return *this;
1654 }
1655 
1656 //---------------------------------------------------------------------
1657 
1658 template<class Val, class BV>
1661 {
1662  size_type arg_size = sv.size();
1663  if (this->size_ < arg_size)
1664  {
1665  resize(arg_size);
1666  }
1667  bvector_type* bv_null = this->get_null_bvect();
1668  unsigned plains;
1669  if (bv_null)
1670  plains = this->stored_plains();
1671  else
1672  plains = this->plains();
1673 
1674  for (unsigned j = 0; j < plains; ++j)
1675  {
1676  bvector_type* arg_bv = sv.bmatr_.get_row(j);//sv.plains_[j];
1677  if (arg_bv)
1678  {
1679  bvector_type* bv = this->bmatr_.get_row(j);//this->plains_[j];
1680  if (!bv)
1681  bv = this->get_plain(j);
1682  bv->merge(*arg_bv);
1683  }
1684  } // for j
1685 
1686  // our vector is NULL-able but argument is not (assumed all values are real)
1687  if (bv_null && !sv.is_nullable())
1688  {
1689  bv_null->set_range(0, arg_size-1);
1690  }
1691 
1692  return *this;
1693 }
1694 
1695 //---------------------------------------------------------------------
1696 
1697 template<class Val, class BV>
1699  typename sparse_vector<Val, BV>::size_type left,
1700  typename sparse_vector<Val, BV>::size_type right)
1701 {
1702  if (left > right)
1703  bm::xor_swap(left, right);
1704 
1705  bvector_type* bv_null = this->get_null_bvect();
1706  unsigned plains;
1707  if (bv_null)
1708  {
1709  plains = this->stored_plains();
1710  const bvector_type* bv_null_arg = sv.get_null_bvector();
1711  if (bv_null_arg)
1712  bv_null->copy_range(*bv_null_arg, left, right);
1713  }
1714  else
1715  plains = this->plains();
1716 
1717  for (unsigned j = 0; j < plains; ++j)
1718  {
1719  const bvector_type* arg_bv = sv.bmatr_.row(j);
1720  if (arg_bv)
1721  {
1722  bvector_type* bv = this->bmatr_.get_row(j);
1723  if (!bv)
1724  bv = this->get_plain(j);
1725  bv->copy_range(*arg_bv, left, right);
1726  }
1727  } // for j
1728  this->resize(sv.size());
1729 }
1730 //---------------------------------------------------------------------
1731 
1732 template<class Val, class BV>
1734  const typename sparse_vector<Val, BV>::bvector_type& bv_mask)
1735 {
1736  bvector_type* bv_null = this->get_null_bvect();
1737  unsigned plains;
1738  if (bv_null)
1739  {
1740  plains = this->stored_plains();
1741  bv_null->bit_and(bv_mask);
1742  }
1743  else
1744  plains = this->plains();
1745 
1746  for (unsigned j = 0; j < plains; ++j)
1747  {
1748  bvector_type* bv = this->bmatr_.get_row(j);
1749  if (bv)
1750  bv->bit_and(bv_mask);
1751  }
1752 }
1753 
1754 
1755 //---------------------------------------------------------------------
1756 
1757 template<class Val, class BV>
1759  bm::null_support null_able) const
1760 {
1761  return parent_type::equal(sv, null_able);
1762 }
1763 
1764 //---------------------------------------------------------------------
1765 
1766 template<class Val, class BV>
1769 {
1770  typedef typename sparse_vector<Val, BV>::const_iterator it_type;
1771  return it_type(this);
1772 }
1773 
1774 //---------------------------------------------------------------------
1775 
1776 template<class Val, class BV>
1779 {
1780  this->bmatr_.set_allocator_pool(pool_ptr);
1781 }
1782 
1783 
1784 //---------------------------------------------------------------------
1785 //
1786 //---------------------------------------------------------------------
1787 
1788 
1789 template<class Val, class BV>
1791 : sv_(0), pos_(bm::id_max), buf_ptr_(0)
1792 {}
1793 
1794 //---------------------------------------------------------------------
1795 
1796 template<class Val, class BV>
1798  const typename sparse_vector<Val, BV>::const_iterator& it)
1799 : sv_(it.sv_), pos_(it.pos_), buf_ptr_(0)
1800 {}
1801 
1802 //---------------------------------------------------------------------
1803 
1804 template<class Val, class BV>
1807 : sv_(sv), buf_ptr_(0)
1808 {
1809  BM_ASSERT(sv_);
1810  pos_ = sv_->empty() ? bm::id_max : 0u;
1811 }
1812 
1813 //---------------------------------------------------------------------
1814 
1815 template<class Val, class BV>
1819 : sv_(sv), buf_ptr_(0)
1820 {
1821  BM_ASSERT(sv_);
1822  this->go_to(pos);
1823 }
1824 
1825 //---------------------------------------------------------------------
1826 
1827 template<class Val, class BV>
1829 {
1830  pos_ = (!sv_ || pos >= sv_->size()) ? bm::id_max : pos;
1831  buf_ptr_ = 0;
1832 }
1833 
1834 //---------------------------------------------------------------------
1835 
1836 template<class Val, class BV>
1838 {
1839  if (pos_ == bm::id_max) // nothing to do, we are at the end
1840  return;
1841  ++pos_;
1842  if (pos_ >= sv_->size())
1843  this->invalidate();
1844  else
1845  {
1846  if (buf_ptr_)
1847  {
1848  ++buf_ptr_;
1849  if (buf_ptr_ - ((value_type*)buffer_.data()) >= n_buf_size)
1850  buf_ptr_ = 0;
1851  }
1852  }
1853 }
1854 
1855 //---------------------------------------------------------------------
1856 
1857 template<class Val, class BV>
1860 {
1861  BM_ASSERT(this->valid());
1862  value_type v;
1863 
1864  if (!buf_ptr_)
1865  {
1866  buffer_.reserve(n_buf_size * sizeof(value_type));
1867  buf_ptr_ = (value_type*)(buffer_.data());
1868  sv_->extract(buf_ptr_, n_buf_size, pos_, true, &pool_);
1869  }
1870  v = *buf_ptr_;
1871  return v;
1872 }
1873 
1874 //---------------------------------------------------------------------
1875 
1876 template<class Val, class BV>
1878 {
1879  value_type v = value();
1880  if (buf_ptr_)
1881  {
1882  v = *buf_ptr_;
1883  value_type* buf_end = ((value_type*)buffer_.data()) + n_buf_size;
1884  while(!v)
1885  {
1886  ++pos_;
1887  if (++buf_ptr_ < buf_end)
1888  v = *buf_ptr_;
1889  else
1890  break;
1891  }
1892  if (pos_ >= sv_->size())
1893  {
1894  pos_ = bm::id_max;
1895  return;
1896  }
1897  if (buf_ptr_ >= buf_end)
1898  buf_ptr_ = 0;
1899  }
1900 }
1901 
1902 //---------------------------------------------------------------------
1903 
1904 template<class Val, class BV>
1906 {
1907  return sv_->is_null(pos_);
1908 }
1909 
1910 
1911 //---------------------------------------------------------------------
1912 //
1913 //---------------------------------------------------------------------
1914 
1915 
1916 template<class Val, class BV>
1918 : sv_(0), bv_null_(0), buf_ptr_(0)
1919 {}
1920 
1921 //---------------------------------------------------------------------
1922 
1923 template<class Val, class BV>
1926 : sv_(sv), buf_ptr_(0)
1927 {
1928  bv_null_ = sv_? sv_->get_null_bvect() : 0;
1929 }
1930 
1931 //---------------------------------------------------------------------
1932 
1933 template<class Val, class BV>
1936 : sv_(bi.sv_), bv_null_(bi.bv_null_), buf_ptr_(0)
1937 {
1938  BM_ASSERT(bi.empty());
1939 }
1940 
1941 //---------------------------------------------------------------------
1942 
1943 template<class Val, class BV>
1945 {
1946  this->flush();
1947 }
1948 
1949 //---------------------------------------------------------------------
1950 
1951 template<class Val, class BV>
1954 {
1955  unsigned buf_idx = this->add_value(v);
1956  if (bv_null_)
1957  {
1958  typename sparse_vector<Val, BV>::size_type sz = sv_->size();
1959  bv_null_->set_bit_no_check(sz + buf_idx);
1960  }
1961 }
1962 
1963 //---------------------------------------------------------------------
1964 
1965 template<class Val, class BV>
1968 {
1969  BM_ASSERT(sv_);
1970  if (!buf_ptr_) // not allocated (yet)
1971  {
1972  buffer_.reserve(n_buf_size * sizeof(value_type));
1973  buf_ptr_ = (value_type*)(buffer_.data());
1974  *buf_ptr_ = v;
1975  ++buf_ptr_;
1976  return 0;
1977  }
1978  if (buf_ptr_ - ((value_type*)buffer_.data()) >= n_buf_size)
1979  {
1980  this->flush();
1981  buf_ptr_ = (value_type*)(buffer_.data());
1982  }
1983 
1984  *buf_ptr_ = v;
1985  size_type buf_idx = size_type(buf_ptr_ - ((value_type*)buffer_.data()));
1986  ++buf_ptr_;
1987  return buf_idx;
1988 }
1989 
1990 //---------------------------------------------------------------------
1991 
1992 template<class Val, class BV>
1994 {
1995  this->add(value_type(0));
1996 }
1997 
1998 //---------------------------------------------------------------------
1999 
2000 template<class Val, class BV>
2003 {
2004  for (size_type i = 0; i < count; ++i) // TODO: optimization
2005  this->add(value_type(0));
2006 }
2007 
2008 //---------------------------------------------------------------------
2009 
2010 template<class Val, class BV>
2012 {
2013  return (!buf_ptr_ || !sv_);
2014 }
2015 
2016 //---------------------------------------------------------------------
2017 
2018 template<class Val, class BV>
2020 {
2021  if (this->empty())
2022  return;
2023  value_type* d = (value_type*)buffer_.data();
2024  sv_->import_back(d, size_type(buf_ptr_ - d));
2025  buf_ptr_ = 0;
2026 }
2027 
2028 //---------------------------------------------------------------------
2029 
2030 
2031 } // namespace bm
2032 
2033 #include "bmundef.h"
2034 
2035 
2036 #endif
memory allocation policy
Definition: bm.h:1241
std::input_iterator_tag iterator_category
Definition: bmsparsevec.h:149
Compressed bit-vector bvector<> container, set algebraic methods, traversal iterators.
back_insert_iterator & operator++(int)
noop
Definition: bmsparsevec.h:285
value_type at(size_type idx) const
access specified element with bounds checking
Definition: bmsparsevec.h:1373
size_type effective_vector_max() const
Always 1 (non-matrix type)
Definition: bmsparsevec.h:787
void filter(const bvector_type &bv_mask)
Apply value filter, defined by mask vector.
Definition: bmsparsevec.h:1733
bool is_nullable() const
check if container supports NULL(unassigned) values
Definition: bmbmatrix.h:312
bm::basic_bmatrix< BV > bmatrix_type
Definition: bmsparsevec.h:88
back_insert_iterator & operator=(value_type v)
push value to the vector
Definition: bmsparsevec.h:279
bvector_type * bvector_type_ptr
Definition: bmsparsevec.h:81
unsigned char * init_remap_buffer()
Definition: bmsparsevec.h:819
friend back_insert_iterator
Definition: bmsparsevec.h:324
Base class for bit-transposed sparse vector construction.
Definition: bmbmatrix.h:243
const unsigned set_word_shift
Definition: bmconst.h:64
const value_type & const_reference
Definition: bmsparsevec.h:83
unsigned gap_blocks
Number of GAP blocks.
Definition: bmfunc.h:57
sparse_vector_type::value_type value_type
Definition: bmsparsevec.h:153
void optimize_gap_size()
Optimize sizes of GAP blocks.
Definition: bm.h:3011
const unsigned set_array_shift
Definition: bmconst.h:83
back_insert_iterator & operator*()
noop
Definition: bmsparsevec.h:281
bm::byte_buffer< allocator_type > buffer_type
Definition: bmsparsevec.h:158
void optimize(bm::word_t *temp_block=0, typename bvector_type::optmode opt_mode=bvector_type::opt_compress, typename sparse_vector< Val, BV >::statistics *stat=0)
run memory optimization for all vector plains
Definition: bmsparsevec.h:1584
bool valid() const
Returns true if iterator is at a valid position.
Definition: bmsparsevec.h:203
bool is_null() const
Get NULL status.
Definition: bmsparsevec.h:1905
static bool find_rank(bm::id_t rank, bm::id_t &pos)
find position of compressed element by its rank
Definition: bmsparsevec.h:1541
void add(value_type v)
add value to the container
Definition: bmsparsevec.h:1952
bm::id_t size_type
Definition: bmsparsevec.h:79
Rank-Select compressed sparse vector.
sparse_vector< Val, BV > & merge(sparse_vector< Val, BV > &sv)
merge with another sparse vector using OR operation Merge is different from join(), because it borrows data from the source vector, so it gets modified.
Definition: bmsparsevec.h:1660
Definition: bm.h:69
basic bit-matrix class and utilities
void add_null()
add NULL (no-value) to the container
Definition: bmsparsevec.h:1993
size_t max_serialize_mem
Estimated maximum of memory required for serialization.
Definition: bmfunc.h:60
pre-processor un-defines to avoid global space pollution (internal)
const bvector_type * row(size_type i) const
Definition: bmbmatrix.h:515
static unsigned rows()
Definition: bmtrans.h:61
void resize_internal(size_type sz)
Definition: bmsparsevec.h:813
#define BMNOEXEPT
Definition: bmdef.h:78
algorithms for sparse_vector scan/seach
size_type decode(value_type *arr, size_type idx_from, size_type dec_size, bool zero_mem=true) const
Bulk export list of elements to a C-style array.
Definition: bmsparsevec.h:994
sparse_vector_type::size_type size_type
Definition: bmsparsevec.h:254
Back insert iterator implements buffered insert, faster than generic access assignment.
Definition: bmsparsevec.h:245
static bool is_compressed()
trait if sparse vector is "compressed" (false)
Definition: bmsparsevec.h:474
void xor_swap(W &x, W &y)
XOR swap two scalar variables.
Definition: bmfunc.h:855
bvector_type::allocator_type allocator_type
Definition: bmsparsevec.h:256
unsigned idx_arr_block_lookup(const unsigned *idx, unsigned size, unsigned nb, unsigned start)
block boundaries look ahead
Definition: bmfunc.h:7136
back_insert_iterator & operator=(const back_insert_iterator &bi)
Definition: bmsparsevec.h:269
void inc(size_type idx)
increment specified element by one
Definition: bmsparsevec.h:1513
compress blocks when possible (GAP/prefix sum)
Definition: bm.h:157
const bvector_type * get_null_bvector() const
Get bit-vector of assigned values or NULL (if not constructed that way)
Definition: bmbmatrix.h:318
void go_to(size_type pos)
re-position to a specified position
Definition: bmsparsevec.h:1828
sparse_vector< Val, BV > & join(const sparse_vector< Val, BV > &sv)
join all with another sparse vector using OR operation
Definition: bmsparsevec.h:1621
do not support NULL values
Definition: bmconst.h:193
const unsigned id_max
Definition: bmconst.h:44
void set_value_no_null(size_type idx, value_type v)
set value without checking boundaries or support of NULL
Definition: bmsparsevec.h:1464
Statistical information about bitset&#39;s memory allocation details.
Definition: bm.h:144
Algorithms for bvector<> (main include)
bool operator!=(const const_iterator &it) const
Definition: bmsparsevec.h:173
void optimize_gap_size()
Optimize sizes of GAP blocks.
Definition: bmsparsevec.h:1604
unsigned add_value(value_type v)
add value to the buffer without changing the NULL vector
Definition: bmsparsevec.h:1966
void set_allocator_pool(allocator_pool_type *pool_ptr)
Set allocator pool for local (non-threaded) memory cyclic(lots of alloc-free ops) opertations...
Definition: bmsparsevec.h:1777
void import_back(const value_type *arr, size_type arr_size)
Import list of elements from a C-style array (pushed back)
Definition: bmsparsevec.h:984
sort order unknown
Definition: bmconst.h:170
bvector_type::enumerator bvector_enumerator_type
Definition: bmsparsevec.h:86
sparse vector with runtime compression using bit transposition method
Definition: bmsparsevec.h:75
null_support
NULL-able value support.
Definition: bmconst.h:190
unsigned bit_scan_reverse(T value)
Definition: bmfunc.h:156
size_t memory_used
Memory used by bitvector including temp and service blocks.
Definition: bmfunc.h:62
Alloc allocator_type
Definition: bm.h:138
input set is sorted (ascending order)
Definition: bmconst.h:168
bvector_type::allocator_type::allocator_pool_type allocator_pool_type
Definition: bmsparsevec.h:257
void flush()
flush the accumulated buffer
Definition: bmsparsevec.h:2019
sort_order
Sort order declaration.
Definition: bmconst.h:165
void clear_bit_no_check(bm::id_t n)
Clears bit n without precondiion checks.
Definition: bm.h:1603
unsigned int word_t
Definition: bmconst.h:36
size_type extract(value_type *arr, size_type size, size_type offset=0, bool zero_mem=true, allocator_pool_type *pool_ptr=0) const
Bulk export list of elements to a C-style array.
Definition: bmsparsevec.h:1280
void for_each_bit(const BV &bv, Func &bit_functor)
bit-vector visitor scanner to traverse each 1 bit using C++ visitor
Definition: bmalgo.h:61
const_iterator & operator++(int)
Advance to the next available value.
Definition: bmsparsevec.h:192
void push_back(value_type v)
push value back into vector
Definition: bmsparsevec.h:1435
bvector_type::allocator_type::allocator_pool_type allocator_pool_type
Definition: bmsparsevec.h:157
const_iterator begin() const
Provide const iterator access to container content.
Definition: bmsparsevec.h:1768
bvector< Alloc > & set_range(bm::id_t left, bm::id_t right, bool value=true)
Sets all bits in the specified closed interval [left,right] Interval must be inside the bvector&#39;s siz...
Definition: bm.h:2431
bvector_type::allocation_policy allocation_policy_type
Definition: bmsparsevec.h:85
bvector< Alloc > & set(bm::id_t n, bool val=true)
Sets bit n if val is true, clears bit n if val is false.
Definition: bm.h:3367
support "non-assigned" or "NULL" logic
Definition: bmconst.h:192
Basic dense bit-matrix class.
Definition: bmbmatrix.h:53
void resize(size_type new_size)
Definition: bmbmatrix.h:1199
unsigned gap_bfind(const T *buf, unsigned pos, unsigned *is_set)
Definition: bmfunc.h:1096
sparse_vector< Val, BV > sparse_vector_type
Definition: bmsparsevec.h:251
size_type extract_plains(value_type *arr, size_type size, size_type offset, bool zero_mem=true) const
extract medium window without use of masking vector
Definition: bmsparsevec.h:1235
bool operator==(const const_iterator &it) const
Definition: bmsparsevec.h:171
unsigned gap_test_unr(const T *buf, const unsigned pos)
Tests if bit = pos is true. Analog of bm::gap_test with SIMD unrolling.
Definition: bmfunc.h:1167
reference operator[](size_type idx)
Operator to get write access to an element.
Definition: bmsparsevec.h:389
bool empty() const
return true if vector is empty
Definition: bmsparsevec.h:593
sparse_vector< Val, BV > & clear_range(size_type left, size_type right, bool set_null=false)
clear range (assign bit 0 for all plains)
Definition: bmsparsevec.h:1552
bool empty() const
return true if insertion buffer is empty
Definition: bmsparsevec.h:2011
sparse_vector_type::bvector_type bvector_type
Definition: bmsparsevec.h:155
void resize(size_type sz)
resize vector
Definition: bmsparsevec.h:598
value_type value() const
Get current position (value)
Definition: bmsparsevec.h:1859
bool is_remap() const
Definition: bmsparsevec.h:816
void sync(bool)
syncronize internal structures
Definition: bmsparsevec.h:707
const unsigned set_array_mask
Definition: bmconst.h:84
unsigned short gap_word_t
Definition: bmconst.h:70
sparse_vector< Val, BV > sparse_vector_type
Definition: bmsparsevec.h:151
back_insert_iterator get_back_inserter()
Provide back insert iterator Back insert iterator implements buffered insertion, which is faster...
Definition: bmsparsevec.h:458
sparse_vector_type::value_type value_type
Definition: bmsparsevec.h:253
size_type extract_range(value_type *arr, size_type size, size_type offset, bool zero_mem=true) const
extract small window without use of masking vector
Definition: bmsparsevec.h:1155
sparse_vector_type * sparse_vector_type_ptr
Definition: bmsparsevec.h:152
size_t remap_size() const
Definition: bmsparsevec.h:817
BV::allocator_type allocator_type
Definition: bmsparsevec.h:84
unsigned ptr_sub_blocks
Number of sub-blocks.
Definition: bmfunc.h:58
static size_type translate_address(size_type i)
address translation for this type of container
Definition: bmsparsevec.h:756
bool inc(bm::id_t n)
Increment the specified element.
Definition: bm.h:3557
bvector_type::allocator_type allocator_type
Definition: bmsparsevec.h:156
void set_null(size_type idx)
set specified element to unassigned value (NULL)
Definition: bmsparsevec.h:900
void reset()
Reset statisctics.
Definition: bmfunc.h:89
sparse_vector_type::bvector_type bvector_type
Definition: bmsparsevec.h:255
bool is_null(size_type idx) const
test if specified element is NULL
Definition: bmbmatrix.h:1217
size_type gather(value_type *arr, const size_type *idx, size_type size, bm::sort_order sorted_idx) const
Gather elements to a C-style array.
Definition: bmsparsevec.h:1014
allocator_type::allocator_pool_type allocator_pool_type
Definition: bm.h:139
const unsigned char * get_remap_buffer() const
Definition: bmsparsevec.h:818
value_type operator[](size_type idx) const
get specified element without bounds checking
Definition: bmsparsevec.h:396
const_iterator & operator++()
Advance to the next available value.
Definition: bmsparsevec.h:189
Definitions(internal)
void copy_from(const base_sparse_vector< Val, BV, MAX_SIZE > &bsv)
Definition: bmbmatrix.h:1107
#define BM_ASSERT_THROW(x, xerrcode)
Definition: bmdef.h:335
Serialize sparse vector into a memory buffer(s) structure.
reference(sparse_vector< Val, BV > &sv, size_type idx) BMNOEXEPT
Definition: bmsparsevec.h:109
void copy_range(const bvector< Alloc > &bvect, bm::id_t left, bm::id_t right)
Copy all bits in the specified closed interval [left,right].
Definition: bm.h:6127
static void throw_range_error(const char *err_msg)
throw range error
Definition: bmsparsevec.h:880
back_insert_iterator & operator++()
noop
Definition: bmsparsevec.h:283
const bvector_type * bvector_type_const_ptr
Definition: bmsparsevec.h:82
#define BM_IS_GAP(ptr)
Definition: bmdef.h:167
Utilities for bit transposition (internal) (experimental!)
void merge(bm::bvector< Alloc > &bvect)
Merge/move content from another vector.
Definition: bm.h:4312
void copy_range(const sparse_vector< Val, BV > &sv, size_type left, size_type right)
copy range of values from another sparse vector
Definition: bmsparsevec.h:1698
void calc_stat(struct sparse_vector< Val, BV >::statistics *st) const
Calculates memory statistics.
Definition: bmsparsevec.h:1564
size_type size() const
return size of the vector
Definition: bmsparsevec.h:588
std::output_iterator_tag iterator_category
Definition: bmsparsevec.h:249
const unsigned set_block_mask
Definition: bmconst.h:50
unsigned bit_blocks
Number of bit blocks.
Definition: bmfunc.h:56
bm::id_t pos() const
Current position (index) in the vector.
Definition: bmsparsevec.h:209
void clear() BMNOEXEPT
resize to zero, free memory
Definition: bmsparsevec.h:1533
bvector_type_const_ptr get_row(size_type i) const
Definition: bmbmatrix.h:525
Constant iterator designed to enumerate "ON" bits.
Definition: bm.h:601
optmode
Optimization mode Every next level means additional checks (better compression vs time) ...
Definition: bm.h:152
void bit_block_gather_scatter(TRGW *arr, const bm::word_t *blk, const IDX *idx, SZ size, unsigned start, unsigned bit_idx)
bit index to word gather-scatter algorithm
Definition: bmfunc.h:7086
allocator_type::allocator_pool_type allocator_pool_type
Definition: bmsparsevec.h:87
#define BMGAP_PTR(ptr)
Definition: bmdef.h:165
Const iterator to traverse the sparse vector.
Definition: bmsparsevec.h:143
bool equal(const sparse_vector< Val, BV > &sv, bm::null_support null_able=bm::use_null) const
check if another sparse vector has the same content and size
Definition: bmsparsevec.h:1758
~sparse_vector() BMNOEXEPT
Definition: bmsparsevec.h:866
base_sparse_vector< Val, BV, 1 > parent_type
Definition: bmsparsevec.h:89
sparse vector de-serializer
reference & operator=(const reference &ref)
Definition: bmsparsevec.h:113
const unsigned set_word_mask
Definition: bmconst.h:65
Reference class to access elements via common [] operator.
Definition: bmsparsevec.h:106
bm::bvector< Alloc > & bit_and(const bm::bvector< Alloc > &bv1, const bm::bvector< Alloc > &bv2, typename bm::bvector< Alloc >::optmode opt_mode)
3-operand AND : this := bv1 AND bv2
Definition: bm.h:4552
bool operator==(const reference &ref) const
Definition: bmsparsevec.h:123
unsigned int id_t
Definition: bmconst.h:35
void invalidate()
Invalidate current iterator.
Definition: bmsparsevec.h:206
input set is NOT sorted
Definition: bmconst.h:167
void advance()
advance iterator forward by one
Definition: bmsparsevec.h:1837
const_iterator get_const_iterator(size_type idx) const
Get const_itertor re-positioned to specific element.
Definition: bmsparsevec.h:452
size_type size_internal() const
Definition: bmsparsevec.h:814
unsigned short bitscan(V w, B *bits)
Definition: bmfunc.h:503
reference & operator=(value_type val)
Definition: bmsparsevec.h:118
#define BM_ASSERT
Definition: bmdef.h:116
void push_back_no_null(value_type v)
push value back into vector without NULL semantics
Definition: bmsparsevec.h:1444
void set_bit_no_check(bm::id_t n)
Set bit without checking preconditions (size, etc)
Definition: bm.h:3254
void set_allocator_pool(allocator_pool_type *pool_ptr)
Set allocator pool for local (non-threaded) memory cyclic(lots of alloc-free ops) opertations...
Definition: bm.h:1448
void set_value(size_type idx, value_type v)
set value without checking boundaries
Definition: bmsparsevec.h:1453
value_type operator*() const
Get current position (value)
Definition: bmsparsevec.h:185
bmatrix_type bmatr_
bit-transposed matrix
Definition: bmbmatrix.h:450
Structure with statistical information about bitset&#39;s memory allocation details.
Definition: bmfunc.h:54
const unsigned set_block_shift
Definition: bmconst.h:49
const_iterator end() const
Provide const iterator access to the end.
Definition: bmsparsevec.h:447
Mini-matrix for bit transposition purposes.
Definition: bmtrans.h:40
size_type effective_size() const
size of sparse vector (may be different for RSC)
Definition: bmsparsevec.h:782
sparse_vector_type * sparse_vector_type_ptr
Definition: bmsparsevec.h:252
const T * row(unsigned row_idx) const
Definition: bmtrans.h:64
static void throw_bad_alloc()
throw bad alloc
Definition: bmsparsevec.h:892
bm::byte_buffer< allocator_type > buffer_type
Definition: bmsparsevec.h:258
#define FULL_BLOCK_FAKE_ADDR
Definition: bmdef.h:136
sorted and in one block (internal!)
Definition: bmconst.h:169
sparse_vector_type::size_type size_type
Definition: bmsparsevec.h:154
sparse_vector(bm::null_support null_able=bm::no_null, allocation_policy_type ap=allocation_policy_type(), size_type bv_max_size=bm::id_max, const allocator_type &alloc=allocator_type())
Sparse vector constructor.
Definition: bmsparsevec.h:836
void swap(sparse_vector< Val, BV > &sv) BMNOEXEPT
content exchange
Definition: bmsparsevec.h:872
sparse_vector< Val, BV > & operator=(const sparse_vector< Val, BV > &sv)
Definition: bmsparsevec.h:356