BitMagicC++Library
bmsparsevec.h
Go to the documentation of this file.
1 #ifndef BMSPARSEVEC_H__INCLUDED__
2 #define BMSPARSEVEC_H__INCLUDED__
3 /*
4 Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 For more information please visit: http://bitmagic.io
19 */
20 
21 #include <memory.h>
22 
23 #ifndef BM_NO_STL
24 #include <stdexcept>
25 #endif
26 
27 #include "bm.h"
28 #include "bmtrans.h"
29 #include "bmalgo.h"
30 #include "bmdef.h"
31 
32 namespace bm
33 {
34 
35 /** \defgroup svector Sparse vector
36  Sparse vector for integer types using bit transposition transform
37  \ingroup bmagic
38  */
39 
40 
41 /*!
42  \brief sparse vector with runtime compression using bit transposition method
43 
44  Sparse vector implements variable bit-depth storage model.
45  Initial data is bit-transposed into bit-planes so initial each element
46  may use less memory than the original native data type prescribes.
47  For example, 32-bit integer may only use 20 bits.
48 
49  Another level of compression is provided by bit-vector (BV template parameter)
50  used for storing bit planes. bvector<> implements varians of on the fly block
51  compression, so if a significant area of a sparse vector uses less bits - it
52  will save memory.
53 
54  Overall it provides variable bit-depth compression, sparse compression in
55  bit-plains.
56 
57  \ingroup svector
58 */
59 template<class Val, class BV>
61 {
62 public:
63  typedef Val value_type;
65  typedef BV bvector_type;
66  typedef bvector_type* bvector_type_ptr;
67  typedef const value_type& const_reference;
68  typedef typename BV::allocator_type allocator_type;
69  typedef typename bvector_type::allocation_policy allocation_policy_type;
70 
71  /*! Statistical information about memory allocation details. */
72  struct statistics : public bv_statistics
73  {};
74 
75  /**
76  Reference class to access elements via common [] operator
77  */
78  class reference
79  {
80  public:
82  : sv_(sv), idx_(idx)
83  {}
84  operator value_type() const { return sv_.get(idx_); }
86  {
87  sv_.set(idx_, (value_type)ref);
88  return *this;
89  }
90  reference& operator=(value_type val)
91  {
92  sv_.set(idx_, val);
93  return *this;
94  }
95  bool operator==(const reference& ref) const
96  { return bool(*this) == bool(ref); }
97  private:
99  size_type idx_;
100  };
101 
102 public:
103  /*!
104  \brief Sparse vector constructor
105  \param ap - allocation strategy for underlying bit-vectors
106  Default allocation policy uses BM_BIT setting (fastest access)
107  \param bv_max_size - maximum possible size of underlying bit-vectors
108  Please note, this is NOT size of svector itself, it is dynamic upper limit
109  which should be used very carefully if we surely know the ultimate size
110  \param alloc - allocator for bit-vectors
111 
112  \sa bvector<>
113  \sa bm::bvector<>::allocation_policy
114  \sa bm::startegy
115  */
116  sparse_vector(allocation_policy_type ap = allocation_policy_type(),
117  size_type bv_max_size = bm::id_max,
118  const allocator_type& alloc = allocator_type());
119 
120  /*! copy-ctor */
122 
123 
124 #ifndef BM_NO_CXX11
125  /*! move-ctor */
127 
128 
129  /*! move assignmment operator */
131  {
132  if (this != &sv)
133  {
134  clear();
135  swap(sv);
136  }
137  return *this;
138  }
139 #endif
140 
141  /*! copy assignmment operator */
143  {
144  if (this != &sv)
145  {
146  clear();
147  resize(sv.size());
148  bv_size_ = sv.bv_size_;
149  alloc_ = sv.alloc_;
150  effective_plains_ = sv.effective_plains_;
151 
152  for (size_type i = 0; i < sv.effective_plains(); ++i)
153  {
154  const bvector_type* bv = sv.plains_[i];
155  if (bv)
156  {
157 #ifdef BM_NO_STL // C compatibility mode
158  void* mem = ::malloc(sizeof(bvector_type));
159  if (mem == 0)
160  {
161  BM_ASSERT_THROW(false, BM_ERR_BADALLOC);
162  }
163  plains[i] = new(mem) bvector_type(*bv);
164 #else
165  plains_[i] = new bvector_type(*bv);
166 #endif
167  }
168  } // for i
169  }
170  return *this;
171  }
172 
173 
175 
176  reference operator[](size_type idx)
177  {
178  BM_ASSERT(idx < size_);
179  return reference(*this, idx);
180  }
181 
182 
183  /*! \brief content exchange
184  */
186 
187  /*!
188  \brief get specified element without bounds checking
189  \param idx - element index
190  \return value of the element
191  */
192  value_type operator[](size_type idx) const { return this->get(idx); }
193 
194  /*!
195  \brief Import list of elements from a C-style array
196  \param arr - source array
197  \param size - source size
198  \param offset - target index in the sparse vector
199  */
200  void import(const value_type* arr, size_type size, size_type offset = 0);
201 
202 
203  /*!
204  \brief Bulk export list of elements to a C-style array
205 
206  For efficiency, this is left as a low level function,
207  it does not do any bounds checking on the target array, it will
208  override memory and crash if you are not careful with allocation
209  and request size.
210 
211  \param arr - dest array
212  \param idx_from - index in the sparse vector to export from
213  \param size - decoding size (array allocation should match)
214  \param zero_mem - set to false if target array is pre-initialized
215  with 0s to avoid performance penalty
216 
217  \return number of actually exported elements (can be less than requested)
218 
219  \sa decode
220 
221  @internal
222  */
223  size_type decode(value_type* arr,
224  size_type idx_from,
225  size_type size,
226  bool zero_mem = true) const;
227 
228 
229 
230  /*! \brief return size of the vector
231  \return size of sparse vector
232  */
233  size_type size() const;
234 
235 
236  /*! \brief return true if vector is empty
237  \return true if empty
238  */
239  bool empty() const;
240 
241 
242  /*! \brief resize vector
243  \param sz - new size
244  */
245  void resize(size_type sz);
246 
247  /*! \brief resize to zero, free memory
248  */
249  void clear() BMNOEXEPT;
250 
251  /*!
252  \brief access specified element with bounds checking
253  \param idx - element index
254  \return value of the element
255  */
256  value_type at(size_type idx) const;
257 
258  /*!
259  \brief get specified element without bounds checking
260  \param idx - element index
261  \return value of the element
262  */
263  value_type get(bm::id_t idx) const;
264 
265  /*!
266  \brief set specified element with bounds checking and automatic resize
267  \param idx - element index
268  \param v - element value
269  */
270  void set(size_type idx, value_type v);
271 
272  /*!
273  \brief push value back into vector
274  \param v - element value
275  */
276  void push_back(value_type v);
277 
278  /*!
279  \brief check if another sparse vector has the same content and size
280  \return true, if it is the same
281  */
282  bool equal(const sparse_vector<Val, BV>& sv) const;
283 
284 
285  /*!
286  \brief run memory optimization for all vector plains
287  \param temp_block - pre-allocated memory block to avoid unnecessary re-allocs
288  \param opt_mode - requested compression depth
289  \param stat - memory allocation statistics after optimization
290  */
291  void optimize(bm::word_t* temp_block = 0,
292  typename bvector_type::optmode opt_mode = bvector_type::opt_compress,
293  typename sparse_vector<Val, BV>::statistics* stat = 0);
294  /*!
295  \brief Optimize sizes of GAP blocks
296 
297  This method runs an analysis to find optimal GAP levels for all bit plains
298  of the vector.
299  */
300  void optimize_gap_size();
301 
302  /*!
303  \brief join all with another sparse vector using OR operation
304  \param sv - argument vector to join with
305  \return slf reference
306  */
308 
309 
310  /*!
311  @brief Calculates memory statistics.
312 
313  @param st - pointer on statistics structure to be filled in.
314 
315  Function fills statistics structure containing information about how
316  this vector uses memory and estimation of max. amount of memory
317  bvector needs to serialize itself.
318 
319  @sa statistics
320  */
321  void calc_stat(struct sparse_vector<Val, BV>::statistics* st) const;
322 
323 
324  /*!
325  \brief get access to bit-plain, function checks and creates a plain
326  */
327  bvector_type_ptr get_plain(unsigned i);
328 
329  /*!
330  \brief get total number of bit-plains in the vector
331  */
332  static unsigned plains() { return unsigned(sizeof(Val)*8); }
333 
334  /*!
335  \brief get access to bit-plain as is (can return NULL)
336  */
337  bvector_type_ptr plain(unsigned i) { return plains_[i]; }
338  const bvector_type_ptr plain(unsigned i) const { return plains_[i]; }
339 
340  /*!
341  \brief free memory in bit-plain
342  */
343  void free_plain(unsigned i);
344 
345  /*!
346  \brief clear range (assign bit 0 for all plains)
347  \param left - interval start
348  \param right - interval end (closed interval)
349  */
350  sparse_vector<Val, BV>& clear_range(size_type left, size_type right);
351 
352 
353  // -------------------------- auxiliary methods, for internal use
354 
355  /*!
356  \brief Bulk export list of elements to a C-style array
357 
358  Use of all extract() methods is restricted.
359  Please consider decode() for the same purpose.
360 
361  \param arr - dest array
362  \param size - dest size
363  \param offset - target index in the sparse vector to export from
364  \param zero_mem - set to false if target array is pre-initialized
365  with 0s to avoid performance penalty
366 
367  \return number of exported elements
368 
369  \sa decode
370 
371  @internal
372  */
373  size_type extract(value_type* arr,
374  size_type size,
375  size_type offset = 0,
376  bool zero_mem = true) const;
377 
378  /** \brief extract small window without use of masking vector
379  \sa decode
380  @internal
381  */
382  size_type extract_range(value_type* arr,
383  size_type size,
384  size_type offset,
385  bool zero_mem = true) const;
386 
387  /** \brief extract medium window without use of masking vector
388  \sa decode
389  @internal
390  */
391  size_type extract_plains(value_type* arr,
392  size_type size,
393  size_type offset,
394  bool zero_mem = true) const;
395 
396 private:
397 
398  /*! \brief free all internal vectors
399  */
400  void free_vectors() BMNOEXEPT;
401 
402  unsigned effective_plains() const { return effective_plains_ + 1; }
403 
404 
405 protected:
406  /*! \brief set value without checking boundaries
407  */
408  void set_value(size_type idx, value_type v);
409  const bm::word_t* get_block(unsigned p, unsigned i, unsigned j) const;
410  void throw_range_error(const char* err_msg) const;
411 
412 private:
413 
414  size_type bv_size_;
415  allocator_type alloc_;
416  allocation_policy_type ap_;
417 
418  bvector_type_ptr plains_[sizeof(Val)*8];
419  size_type size_;
420  unsigned effective_plains_;
421 };
422 
423 //---------------------------------------------------------------------
424 
425 
426 template<class Val, class BV>
429  size_type bv_max_size,
430  const allocator_type& alloc)
431 : bv_size_(bv_max_size),
432  alloc_(alloc),
433  ap_(ap),
434  size_(0),
435  effective_plains_(0)
436 {
437  ::memset(plains_, 0, sizeof(plains_));
438 }
439 
440 //---------------------------------------------------------------------
441 
442 template<class Val, class BV>
444 : bv_size_ (sv.bv_size_),
445  alloc_(sv.alloc_),
446  ap_(sv.ap_),
447  size_(sv.size_),
448  effective_plains_(sv.effective_plains_)
449 {
450  if (this != &sv)
451  {
452  for (size_type i = 0; i < sizeof(Val)*8; ++i)
453  {
454  const bvector_type* bv = sv.plains_[i];
455  if (bv)
456  plains_[i] = new bvector_type(*bv);
457  else
458  plains_[i] = 0;
459  } // for i
460  }
461 }
462 
463 //---------------------------------------------------------------------
464 #ifndef BM_NO_CXX11
465 
466 template<class Val, class BV>
468 {
469  bv_size_ = 0;
470  alloc_ = sv.alloc_;
471  ap_ = sv.ap_;
472  size_ = sv.size_;
473  effective_plains_ = sv.effective_plains_;
474 
475  for (size_type i = 0; i < sizeof(Val)*8; ++i)
476  {
477  plains_[i] = sv.plains_[i];
478  sv.plains_[i] = 0;
479  }
480  sv.size_ = 0;
481 }
482 
483 #endif
484 
485 
486 
487 //---------------------------------------------------------------------
488 
489 template<class Val, class BV>
491 {
492  free_vectors();
493 }
494 
495 //---------------------------------------------------------------------
496 
497 template<class Val, class BV>
499 {
500  if (this != &sv)
501  {
502  bm::xor_swap(bv_size_, sv.bv_size_);
503 
504  allocator_type alloc_tmp = alloc_;
505  alloc_ = sv.alloc_;
506  sv.alloc_ = alloc_tmp;
507 
508  allocation_policy_type ap_tmp = ap_;
509  ap_ = sv.ap_;
510  sv.ap_ = ap_tmp;
511 
512  for (size_type i = 0; i < sizeof(Val)*8; ++i)
513  {
514  bvector_type* bv_tmp = plains_[i];
515  plains_[i] = sv.plains_[i];
516  sv.plains_[i] = bv_tmp;
517  } // for i
518 
519  bm::xor_swap(size_, sv.size_);
520  }
521 }
522 
523 //---------------------------------------------------------------------
524 
525 template<class Val, class BV>
526 void sparse_vector<Val, BV>::throw_range_error(const char* err_msg) const
527 {
528 #ifndef BM_NO_STL
529  throw std::range_error(err_msg);
530 #else
531  BM_ASSERT_THROW(false, BM_ERR_RANGE);
532 #endif
533 }
534 
535 //---------------------------------------------------------------------
536 
537 template<class Val, class BV>
539  size_type size,
540  size_type offset)
541 {
542  unsigned char b_list[sizeof(Val)*8];
543  unsigned row_len[sizeof(Val)*8] = {0, };
544 
545  const unsigned transpose_window = 256;
547 
548  if (size == 0)
549  {
550  throw_range_error("sparse_vector range error (import size 0)");
551  }
552 
553  // clear all plains in the range to provide corrrect import of 0 values
554  this->clear_range(offset, offset + size - 1);
555 
556  // transposition algorithm uses bitscen to find index bits and store it
557  // in temporary matrix (list for each bit plain), matrix here works
558  // when array gets to big - the list gets loaded into bit-vector using
559  // bulk load algorithm, which is faster than single bit access
560  //
561 
562  size_type i;
563  for (i = 0; i < size; ++i)
564  {
565  unsigned bcnt = bm::bitscan(arr[i], b_list);
566  const unsigned bit_idx = i + offset;
567 
568  for (unsigned j = 0; j < bcnt; ++j)
569  {
570  unsigned p = b_list[j];
571  unsigned rl = row_len[p];
572  tm.row(p)[rl] = bit_idx;
573  row_len[p] = ++rl;
574 
575  if (rl == transpose_window)
576  {
577  bvector_type* bv = get_plain(p);
578  const bm::id_t* r = tm.row(p);
579  bm::combine_or(*bv, r, r + rl);
580  row_len[p] = 0;
581  tm.row(p)[0] = 0;
582  }
583  } // for j
584  } // for i
585 
586  // process incomplete transposition lines
587  //
588  for (unsigned k = 0; k < tm.rows(); ++k)
589  {
590  unsigned rl = row_len[k];
591  if (rl)
592  {
593  bvector_type* bv = get_plain(k);
594  const bm::id_t* r = tm.row(k);
595  bm::combine_or(*bv, r, r + rl);
596  }
597  } // for k
598 
599 
600  if (i + offset > size_)
601  size_ = i + offset;
602 }
603 
604 //---------------------------------------------------------------------
605 
606 template<class Val, class BV>
609  size_type idx_from,
610  size_type size,
611  bool zero_mem) const
612 {
613  if (size < 32)
614  {
615  return extract_range(arr, size, idx_from, zero_mem);
616  }
617  if (size < 1024)
618  {
619  return extract_plains(arr, size, idx_from, zero_mem);
620  }
621  return extract(arr, size, idx_from, zero_mem);
622 }
623 
624 //---------------------------------------------------------------------
625 
626 template<class Val, class BV>
629  size_type size,
630  size_type offset,
631  bool zero_mem) const
632 {
633  if (size == 0)
634  return 0;
635  if (zero_mem)
636  ::memset(arr, 0, sizeof(value_type)*size);
637 
638  size_type start = offset;
639  size_type end = start + size;
640  if (end > size_)
641  {
642  end = size_;
643  }
644 
645  // calculate logical block coordinates and masks
646  //
647  unsigned nb = unsigned(start >> bm::set_block_shift);
648  unsigned i0 = nb >> bm::set_array_shift; // top block address
649  unsigned j0 = nb & bm::set_array_mask; // address in sub-block
650  unsigned nbit = unsigned(start & bm::set_block_mask);
651  unsigned nword = unsigned(nbit >> bm::set_word_shift);
652  unsigned mask0 = 1u << (nbit & bm::set_word_mask);
653  const bm::word_t* blk = 0;
654  unsigned is_set;
655 
656  for (unsigned j = 0; j < sizeof(Val)*8; ++j)
657  {
658  blk = get_block(j, i0, j0);
659  bool is_gap = BM_IS_GAP(blk);
660 
661  for (unsigned k = start; k < end; ++k)
662  {
663  unsigned nb1 = unsigned(k >> bm::set_block_shift);
664  if (nb1 != nb) // block switch boundaries
665  {
666  nb = nb1;
667  i0 = nb >> bm::set_array_shift;
668  j0 = nb & bm::set_array_mask;
669  blk = get_block(j, i0, j0);
670  is_gap = BM_IS_GAP(blk);
671  }
672 
673  if (!blk)
674  continue;
675  nbit = unsigned(k & bm::set_block_mask);
676  if (is_gap)
677  {
678  is_set = bm::gap_test_unr(BMGAP_PTR(blk), nbit);
679  }
680  else
681  {
682  nword = unsigned(nbit >> bm::set_word_shift);
683  mask0 = 1u << (nbit & bm::set_word_mask);
684  is_set = (blk[nword] & mask0);
685  }
686  size_type idx = k - offset;
687  value_type vm = (bool) is_set;
688  vm <<= j;
689  arr[idx] |= vm;
690 
691  } // for k
692 
693  } // for j
694  return 0;
695 }
696 
697 //---------------------------------------------------------------------
698 
699 template<class Val, class BV>
702  size_type size,
703  size_type offset,
704  bool zero_mem) const
705 {
706  if (size == 0)
707  return 0;
708 
709  if (zero_mem)
710  ::memset(arr, 0, sizeof(value_type)*size);
711 
712  size_type start = offset;
713  size_type end = start + size;
714  if (end > size_)
715  {
716  end = size_;
717  }
718 
719  for (size_type i = 0; i < sizeof(Val) * 8; ++i)
720  {
721  const bvector_type* bv = plains_[i];
722  if (!bv)
723  continue;
724 
725  value_type mask = 1;
726  mask <<= i;
727  typename BV::enumerator en(bv, offset);
728  for (;en.valid(); ++en)
729  {
730  size_type idx = *en - offset;
731  if (idx >= size)
732  break;
733  arr[idx] |= mask;
734  } // for
735 
736  } // for i
737 
738  return 0;
739 }
740 
741 
742 //---------------------------------------------------------------------
743 
744 template<class Val, class BV>
747  size_type size,
748  size_type offset,
749  bool zero_mem) const
750 {
751  /// Decoder functor
752  /// @internal
753  ///
754  struct sv_decode_visitor_func
755  {
756  sv_decode_visitor_func(value_type* arr,
757  value_type mask,
758  size_type offset)
759  : arr_(arr), mask_(mask), off_(offset)
760  {}
761 
762  void add_bits(bm::id_t offset, const unsigned char* bits, unsigned size)
763  {
764  size_type idx_base = offset - off_;
765  for (unsigned i = 0; i < size; ++i)
766  {
767  size_type idx = idx_base + bits[i];
768  arr_[idx] |= mask_;
769  }
770 
771  }
772  void add_range(bm::id_t offset, unsigned size)
773  {
774  size_type idx_base = offset - off_;
775  for (unsigned i = 0; i < size; ++i)
776  {
777  arr_[i + idx_base] |= mask_;
778  }
779  }
780  value_type* arr_;
781  value_type mask_;
782  size_type off_;
783  };
784 
785 
786  if (size == 0)
787  return 0;
788 
789  if (zero_mem)
790  ::memset(arr, 0, sizeof(value_type)*size);
791 
792  size_type start = offset;
793  size_type end = start + size;
794  if (end > size_)
795  {
796  end = size_;
797  }
798 
799  bool masked_scan = !(offset == 0 && size == this->size());
800 
801  if (masked_scan) // use temp vector to decompress the area
802  {
803  // for large array extraction use logical opartions
804  // (faster due to vectorization)
805  bvector_type bv_mask;
806  for (size_type i = 0; i < sizeof(Val) * 8; ++i)
807  {
808  const bvector_type* bv = plains_[i];
809  if (bv)
810  {
811  bv_mask.set_range(offset, end - 1);
812  bv_mask.bit_and(*bv);
813 
814  sv_decode_visitor_func func(arr, (value_type(1) << i), offset);
815  bm::for_each_bit(bv_mask, func);
816  bv_mask.clear();
817  }
818  } // for i
819  }
820  else
821  {
822  for (size_type i = 0; i < sizeof(Val)*8; ++i)
823  {
824  const bvector_type* bv = plains_[i];
825  if (bv)
826  {
827  sv_decode_visitor_func func(arr, (value_type(1) << i), 0);
828  bm::for_each_bit(*bv, func);
829  }
830  } // for i
831  }
832 
833  return 0;
834 }
835 
836 //---------------------------------------------------------------------
837 
838 template<class Val, class BV>
841 {
842  return size_;
843 }
844 
845 //---------------------------------------------------------------------
846 
847 template<class Val, class BV>
849 {
850  return (size_ == 0);
851 }
852 
853 
854 //---------------------------------------------------------------------
855 
856 template<class Val, class BV>
858 {
859  if (sz == size_) // nothing to do
860  return;
861 
862  if (!sz) // resize to zero is an equivalent of non-destructive deallocation
863  {
864  clear();
865  return;
866  }
867 
868  if (sz < size_) // vector shrink
869  {
870  this->clear_range(sz, size_-1); // clear the tails
871  }
872 
873  size_ = sz;
874 }
875 
876 //---------------------------------------------------------------------
877 
878 
879 template<class Val, class BV>
882 {
883  BM_ASSERT(i < (sizeof(Val)*8));
884 
885  bvector_type_ptr bv = plains_[i];
886  if (!bv)
887  {
888 #ifdef BM_NO_STL // C compatibility mode
889  void* mem = ::malloc(sizeof(bvector_type));
890  if (mem == 0)
891  {
892  BM_ASSERT_THROW(false, BM_ERR_BADALLOC);
893  }
894  bv = new(mem) bvector_type(ap_.strat, ap_.glevel_len,
895  bv_size_,
896  alloc_);
897 
898 #else
899  bv = new bvector_type(ap_.strat, ap_.glevel_len,
900  bv_size_,
901  alloc_);
902 #endif
903 
904  bv->init();
905  plains_[i] = bv;
906  if (i > effective_plains_)
907  effective_plains_ = i;
908  }
909  return bv;
910 }
911 
912 //---------------------------------------------------------------------
913 
914 template<class Val, class BV>
917 {
918  if (idx >= size_)
919  throw_range_error("sparse vector range error");
920  return this->get(idx);
921 }
922 
923 //---------------------------------------------------------------------
924 
925 template<class Val, class BV>
926 const bm::word_t* sparse_vector<Val, BV>::get_block(unsigned p, unsigned i, unsigned j) const
927 {
928  const bvector_type* bv = this->plains_[p];
929  if (bv)
930  {
931  const typename bvector_type::blocks_manager_type& bman = bv->get_blocks_manager();
932  return bman.get_block(i, j);
933  }
934  return 0;
935 }
936 
937 //---------------------------------------------------------------------
938 
939 template<class Val, class BV>
942 {
943  BM_ASSERT(i < size_);
944 
945  value_type v = 0;
946 
947  // calculate logical block coordinates and masks
948  //
949  unsigned nb = unsigned(i >> bm::set_block_shift);
950  unsigned i0 = nb >> bm::set_array_shift; // top block address
951  unsigned j0 = nb & bm::set_array_mask; // address in sub-block
952  unsigned nbit = unsigned(i & bm::set_block_mask);
953  unsigned nword = unsigned(nbit >> bm::set_word_shift);
954  unsigned mask0 = 1u << (nbit & bm::set_word_mask);
955  const bm::word_t* blk;
956  const bm::word_t* blka[4];
957 
958  unsigned eff_plains = effective_plains();
959  for (unsigned j = 0; j < eff_plains; j+=4)
960  {
961  bool b = plains_[j+0] || plains_[j+1] || plains_[j+2] || plains_[j+3];
962  if (!b)
963  continue;
964 
965  blka[0] = get_block(j+0, i0, j0);
966  blka[1] = get_block(j+1, i0, j0);
967  blka[2] = get_block(j+2, i0, j0);
968  blka[3] = get_block(j+3, i0, j0);
969 
970  if ((blk = blka[0+0])!=0)
971  {
972  unsigned is_set = (BM_IS_GAP(blk)) ? bm::gap_test_unr(BMGAP_PTR(blk), nbit) : (blk[nword] & mask0);
973  value_type vm = (bool) is_set;
974  vm <<= (j+0);
975  v |= vm;
976  }
977  if ((blk = blka[0+1])!=0)
978  {
979  unsigned is_set = (BM_IS_GAP(blk)) ? bm::gap_test_unr(BMGAP_PTR(blk), nbit) : (blk[nword] & mask0);
980  value_type vm = (bool) is_set;
981  vm <<= (j+1);
982  v |= vm;
983  }
984  if ((blk = blka[0+2])!=0)
985  {
986  unsigned is_set = (BM_IS_GAP(blk)) ? bm::gap_test_unr(BMGAP_PTR(blk), nbit) : (blk[nword] & mask0);
987  value_type vm = (bool) is_set;
988  vm <<= (j+2);
989  v |= vm;
990  }
991  if ((blk = blka[0+3])!=0)
992  {
993  unsigned is_set = (BM_IS_GAP(blk)) ? bm::gap_test_unr(BMGAP_PTR(blk), nbit) : (blk[nword] & mask0);
994  value_type vm = (bool) is_set;
995  vm <<= (j+3);
996  v |= vm;
997  }
998 
999  } // for j
1000 
1001  return v;
1002 }
1003 
1004 
1005 //---------------------------------------------------------------------
1006 
1007 template<class Val, class BV>
1009 {
1010  if (idx >= size_)
1011  {
1012  size_ = idx+1;
1013  }
1014  set_value(idx, v);
1015 }
1016 
1017 //---------------------------------------------------------------------
1018 
1019 template<class Val, class BV>
1021 {
1022  set_value(size_, v);
1023  ++size_;
1024 }
1025 
1026 //---------------------------------------------------------------------
1027 
1028 template<class Val, class BV>
1030 {
1031  // calculate logical block coordinates and masks
1032  //
1033  unsigned nb = unsigned(idx >> bm::set_block_shift);
1034  unsigned i0 = nb >> bm::set_array_shift; // top block address
1035  unsigned j0 = nb & bm::set_array_mask; // address in sub-block
1036 
1037  // clear the plains where needed
1038  unsigned eff_plains = effective_plains();
1039  for (unsigned i = 0; i < eff_plains; ++i)
1040  {
1041  const bm::word_t* blk = get_block(i, i0, j0);
1042  if (blk)
1043  {
1044  BM_ASSERT(plains_[i]);
1045  bvector_type* bv = plains_[i];
1046  bv->clear_bit(idx);
1047  }
1048  }
1049 
1050  // set bits in plains
1051  unsigned b_list[sizeof(Val) * 8];
1052  unsigned bcnt = bm::bitscan(v, b_list);
1053 
1054  for (unsigned j = 0; j < bcnt; ++j)
1055  {
1056  unsigned p = b_list[j];
1057  bvector_type* bv = get_plain(p);
1058  bv->set_bit_no_check(idx);
1059  } // for j
1060 }
1061 
1062 //---------------------------------------------------------------------
1063 
1064 template<class Val, class BV>
1066 {
1067  free_vectors();
1068  size_ = 0;
1069  effective_plains_ = 0;
1070  ::memset(plains_, 0, sizeof(plains_));
1071 }
1072 
1073 //---------------------------------------------------------------------
1074 
1075 
1076 template<class Val, class BV>
1078 {
1079  for (size_type i = 0; i < sizeof(Val)*8; ++i)
1080  delete plains_[i];
1081 }
1082 
1083 //---------------------------------------------------------------------
1084 
1085 
1086 template<class Val, class BV>
1088 {
1089  BM_ASSERT(i < sizeof(Val)*8);
1090  bvector_type* bv = plains_[i];
1091  delete bv;
1092  plains_[i] = 0;
1093 }
1094 
1095 //---------------------------------------------------------------------
1096 
1097 template<class Val, class BV>
1100  typename sparse_vector<Val, BV>::size_type left,
1101  typename sparse_vector<Val, BV>::size_type right)
1102 {
1103  if (right < left)
1104  {
1105  return clear_range(right, left);
1106  }
1107  unsigned eff_plains = effective_plains();
1108  for (unsigned i = 0; i < eff_plains; ++i)
1109  {
1110  bvector_type* bv = plains_[i];
1111  if (bv)
1112  {
1113  bv->set_range(left, right, false);
1114  }
1115  } // for i
1116 
1117  return *this;
1118 }
1119 
1120 //---------------------------------------------------------------------
1121 
1122 template<class Val, class BV>
1124  struct sparse_vector<Val, BV>::statistics* st) const
1125 {
1126  BM_ASSERT(st);
1127 
1128  st->bit_blocks = st->gap_blocks = 0;
1129  st->max_serialize_mem = st->memory_used = 0;
1130 
1131  unsigned eff_plains = effective_plains();
1132  for (unsigned j = 0; j < eff_plains; ++j)
1133  {
1134  const bvector_type* bv = this->plains_[j];
1135  if (bv)
1136  {
1137  typename bvector_type::statistics stbv;
1138  bv->calc_stat(&stbv);
1139 
1140  st->bit_blocks += stbv.bit_blocks;
1141  st->gap_blocks += stbv.gap_blocks;
1142  st->max_serialize_mem += stbv.max_serialize_mem + 8;
1143  st->memory_used += stbv.memory_used;
1144  }
1145  } // for j
1146  // header accounting
1147  st->max_serialize_mem += 1 + 1 + 1 + 1 + 8 + (8 * sizeof(Val) * 8);
1148 
1149 }
1150 
1151 //---------------------------------------------------------------------
1152 
1153 template<class Val, class BV>
1155  bm::word_t* temp_block,
1156  typename bvector_type::optmode opt_mode,
1158 {
1159  if (st)
1160  {
1161  st->bit_blocks = st->gap_blocks = 0;
1162  st->max_serialize_mem = st->memory_used = 0;
1163  }
1164  unsigned eff_plains = effective_plains();
1165  for (unsigned j = 0; j < eff_plains; ++j)
1166  {
1167  bvector_type* bv = this->plains_[j];
1168  if (bv)
1169  {
1170  if (!bv->any()) // empty vector?
1171  {
1172  delete this->plains_[j];
1173  this->plains_[j] = 0;
1174  continue;
1175  }
1176 
1177  typename bvector_type::statistics stbv;
1178  bv->optimize(temp_block, opt_mode, &stbv);
1179 
1180  if (st)
1181  {
1182  st->bit_blocks += stbv.bit_blocks;
1183  st->gap_blocks += stbv.gap_blocks;
1184  st->max_serialize_mem += stbv.max_serialize_mem + 8;
1185  st->memory_used += stbv.memory_used;
1186  }
1187 
1188  }
1189  } // for j
1190 
1191 }
1192 
1193 //---------------------------------------------------------------------
1194 
1195 
1196 template<class Val, class BV>
1198 {
1199  unsigned eff_plains = effective_plains();
1200  for (unsigned j = 0; j < eff_plains; ++j)
1201  {
1202  bvector_type* bv = this->plains_[j];
1203  if (bv)
1204  {
1205  bv->optimize_gap_size();
1206  }
1207  }
1208 }
1209 
1210 //---------------------------------------------------------------------
1211 
1212 template<class Val, class BV>
1215 {
1216  size_type arg_size = sv.size();
1217  if (size_ < arg_size)
1218  {
1219  resize(arg_size);
1220  }
1221  unsigned arg_eff_plains = sv.effective_plains();
1222  for (unsigned j = 0; j < arg_eff_plains; ++j)
1223  {
1224  bvector_type* arg_bv = sv.plains_[j];
1225  if (arg_bv)
1226  {
1227  bvector_type* bv = this->plains_[j];
1228  if (!bv)
1229  {
1230  bv = get_plain(j);
1231  }
1232  *bv |= *arg_bv;
1233  }
1234  } // for j
1235  return *this;
1236 }
1237 
1238 
1239 //---------------------------------------------------------------------
1240 
1241 template<class Val, class BV>
1243 {
1244  size_type arg_size = sv.size();
1245  if (size_ != arg_size)
1246  {
1247  return false;
1248  }
1249 
1250  unsigned eff_plains = effective_plains();
1251  unsigned arg_eff_plains = sv.effective_plains();
1252  if (arg_eff_plains > eff_plains)
1253  eff_plains = arg_eff_plains;
1254 
1255  for (unsigned j = 0; j < eff_plains; ++j)
1256  {
1257  const bvector_type* bv = plains_[j];
1258  const bvector_type* arg_bv = sv.plains_[j];
1259  if (!bv && !arg_bv)
1260  continue;
1261  // check if any not NULL and not empty
1262  if (!bv && arg_bv)
1263  {
1264  if (arg_bv->any())
1265  return false;
1266  }
1267  if (bv && !arg_bv)
1268  {
1269  if (bv->any())
1270  return false;
1271  }
1272  // both not NULL
1273  int cmp = bv->compare(*arg_bv);
1274  if (cmp != 0)
1275  return false;
1276  } // for j
1277  return true;
1278 }
1279 
1280 //---------------------------------------------------------------------
1281 
1282 
1283 } // namespace bm
1284 
1285 #include "bmundef.h"
1286 
1287 
1288 #endif
value_type at(size_type idx) const
access specified element with bounds checking
Definition: bmsparsevec.h:916
bvector_type_ptr get_plain(unsigned i)
get access to bit-plain, function checks and creates a plain
Definition: bmsparsevec.h:881
void free_plain(unsigned i)
free memory in bit-plain
Definition: bmsparsevec.h:1087
size_type size() const
return size of the vector
Definition: bmsparsevec.h:840
const bm::word_t * get_block(unsigned p, unsigned i, unsigned j) const
Definition: bmsparsevec.h:926
bvector_type * bvector_type_ptr
Definition: bmsparsevec.h:66
void throw_range_error(const char *err_msg) const
Definition: bmsparsevec.h:526
const unsigned set_word_shift
Definition: bmconst.h:54
const value_type & const_reference
Definition: bmsparsevec.h:67
unsigned gap_blocks
Number of GAP blocks.
Definition: bmfunc.h:55
static unsigned plains()
get total number of bit-plains in the vector
Definition: bmsparsevec.h:332
const unsigned set_array_shift
Definition: bmconst.h:73
void optimize(bm::word_t *temp_block=0, typename bvector_type::optmode opt_mode=bvector_type::opt_compress, typename sparse_vector< Val, BV >::statistics *stat=0)
run memory optimization for all vector plains
Definition: bmsparsevec.h:1154
bm::id_t size_type
Definition: bmsparsevec.h:64
Definition: bm.h:59
size_t max_serialize_mem
Estimated maximum of memory required for serialization.
Definition: bmfunc.h:57
static unsigned rows()
Definition: bmtrans.h:56
#define BMNOEXEPT
Definition: bmdef.h:74
void combine_or(BV &bv, It first, It last)
OR Combine bitvector and the iterable sequence.
Definition: bmalgo_impl.h:1148
void xor_swap(W &x, W &y)
XOR swap two scalar variables.
Definition: bmfunc.h:324
sparse_vector< Val, BV > & join(const sparse_vector< Val, BV > &sv)
join all with another sparse vector using OR operation
Definition: bmsparsevec.h:1214
const unsigned id_max
Definition: bmconst.h:40
void optimize_gap_size()
Optimize sizes of GAP blocks.
Definition: bmsparsevec.h:1197
sparse vector with runtime compression using bit transposition method
Definition: bmsparsevec.h:60
size_t memory_used
Memory used by bitvector including temp and service blocks.
Definition: bmfunc.h:59
unsigned int word_t
Definition: bmconst.h:35
void for_each_bit(const BV &bv, Func &bit_functor)
bit-vector visitor scanner to traverse each 1 bit using C++ visitor
Definition: bmalgo.h:41
bool equal(const sparse_vector< Val, BV > &sv) const
check if another sparse vector has the same content and size
Definition: bmsparsevec.h:1242
void push_back(value_type v)
push value back into vector
Definition: bmsparsevec.h:1020
bvector_type::allocation_policy allocation_policy_type
Definition: bmsparsevec.h:69
size_type extract_plains(value_type *arr, size_type size, size_type offset, bool zero_mem=true) const
extract medium window without use of masking vector
Definition: bmsparsevec.h:701
unsigned gap_test_unr(const T *buf, const unsigned pos)
Tests if bit = pos is true. Analog of bm::gap_test with SIMD unrolling.
Definition: bmfunc.h:525
reference operator[](size_type idx)
Definition: bmsparsevec.h:176
bool empty() const
return true if vector is empty
Definition: bmsparsevec.h:848
void resize(size_type sz)
resize vector
Definition: bmsparsevec.h:857
const unsigned set_array_mask
Definition: bmconst.h:74
blocks_manager< Alloc > blocks_manager_type
Definition: bm.h:113
size_type extract_range(value_type *arr, size_type size, size_type offset, bool zero_mem=true) const
extract small window without use of masking vector
Definition: bmsparsevec.h:628
sparse_vector(allocation_policy_type ap=allocation_policy_type(), size_type bv_max_size=bm::id_max, const allocator_type &alloc=allocator_type())
Sparse vector constructor.
Definition: bmsparsevec.h:427
size_type extract(value_type *arr, size_type size, size_type offset=0, bool zero_mem=true) const
Bulk export list of elements to a C-style array.
Definition: bmsparsevec.h:746
BV::allocator_type allocator_type
Definition: bmsparsevec.h:68
sparse_vector< Val, BV > & clear_range(size_type left, size_type right)
clear range (assign bit 0 for all plains)
Definition: bmsparsevec.h:1099
value_type operator[](size_type idx) const
get specified element without bounds checking
Definition: bmsparsevec.h:192
#define BM_ASSERT_THROW(x, xerrcode)
Definition: bmdef.h:289
reference(sparse_vector< Val, BV > &sv, size_type idx) BMNOEXEPT
Definition: bmsparsevec.h:81
bvector_type_ptr plain(unsigned i)
get access to bit-plain as is (can return NULL)
Definition: bmsparsevec.h:337
#define BM_IS_GAP(ptr)
Definition: bmdef.h:161
void calc_stat(struct sparse_vector< Val, BV >::statistics *st) const
Calculates memory statistics.
Definition: bmsparsevec.h:1123
const unsigned set_block_mask
Definition: bmconst.h:46
unsigned bit_blocks
Number of bit blocks.
Definition: bmfunc.h:53
void clear() BMNOEXEPT
resize to zero, free memory
Definition: bmsparsevec.h:1065
optmode
Optimization mode Every next level means additional checks (better compression vs time) ...
Definition: bm.h:1663
#define BMGAP_PTR(ptr)
Definition: bmdef.h:159
~sparse_vector() BMNOEXEPT
Definition: bmsparsevec.h:490
reference & operator=(const reference &ref)
Definition: bmsparsevec.h:85
const unsigned set_word_mask
Definition: bmconst.h:55
Reference class to access elements via common [] operator.
Definition: bmsparsevec.h:78
bool operator==(const reference &ref) const
Definition: bmsparsevec.h:95
unsigned int id_t
Definition: bmconst.h:34
unsigned short bitscan(V w, B *bits)
Definition: bmfunc.h:5000
reference & operator=(value_type val)
Definition: bmsparsevec.h:90
#define BM_ASSERT
Definition: bmdef.h:112
void set_value(size_type idx, value_type v)
set value without checking boundaries
Definition: bmsparsevec.h:1029
Structure with statistical information about bitset&#39;s memory allocation details.
Definition: bmfunc.h:50
const unsigned set_block_shift
Definition: bmconst.h:45
size_type decode(value_type *arr, size_type idx_from, size_type size, bool zero_mem=true) const
Bulk export list of elements to a C-style array.
Definition: bmsparsevec.h:608
Mini-matrix for bit transposition purposes.
Definition: bmtrans.h:35
const T * row(unsigned row_idx) const
Definition: bmtrans.h:59
const bvector_type_ptr plain(unsigned i) const
Definition: bmsparsevec.h:338
sparse_vector< Val, BV > & operator=(sparse_vector< Val, BV > &&sv) BMNOEXEPT
Definition: bmsparsevec.h:130
void swap(sparse_vector< Val, BV > &sv) BMNOEXEPT
content exchange
Definition: bmsparsevec.h:498