BitMagic-C++
bmsparsevec_compr.h
Go to the documentation of this file.
1#ifndef BMSPARSEVEC_COMPR_H__INCLUDED__
2#define BMSPARSEVEC_COMPR_H__INCLUDED__
3/*
4Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5
6Licensed under the Apache License, Version 2.0 (the "License");
7you may not use this file except in compliance with the License.
8You may obtain a copy of the License at
9
10 http://www.apache.org/licenses/LICENSE-2.0
11
12Unless required by applicable law or agreed to in writing, software
13distributed under the License is distributed on an "AS IS" BASIS,
14WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15See the License for the specific language governing permissions and
16limitations under the License.
17
18For more information please visit: http://bitmagic.io
19*/
20
21/*! \file bmsparsevec_compr.h
22 \brief Compressed sparse container rsc_sparse_vector<> for integer types
23*/
24
25#include <memory.h>
26
27#ifndef BM_NO_STL
28#include <stdexcept>
29#endif
30
31#ifndef BM__H__INCLUDED__
32// BitMagic utility headers do not include main "bm.h" declaration
33// #include "bm.h" or "bm64.h" explicitly
34# error missing include (bm.h or bm64.h)
35#endif
36
37
38#include "bmsparsevec.h"
39#include "bmdef.h"
40
41namespace bm
42{
43
44
45/*!
46 \brief Rank-Select compressed sparse vector
47
48 Container uses Rank-Select method of compression, where
49 all NULL columns gets dropped, effective address of columns is computed
50 using address bit-vector.
51
52 Use for cases, where memory efficiency is preferable over
53 single element access latency.
54
55 \ingroup sv
56*/
57template<class Val, class SV>
59{
60public:
62 {
63 sv_slices = (sizeof(Val) * 8 + 1),
64 sv_value_slices = (sizeof(Val) * 8)
65 };
66
67 typedef Val value_type;
69 typedef typename SV::size_type size_type;
71 typedef typename SV::const_iterator sparse_vector_const_iterator;
79 typedef typename SV::bmatrix_type bmatrix_type;
80 typedef typename SV::unsigned_value_type unsigned_value_type;
81
83 {
85 };
86
87 struct is_remap_support { enum trait { value = false }; };
88 struct is_rsc_support { enum trait { value = true }; };
89 struct is_dynamic_splices { enum trait { value = false }; };
90
91public:
92 /*! Statistical information about memory allocation details. */
93 struct statistics : public bv_statistics
94 {};
95
96public:
97 /**
98 Reference class to access elements via common [] operator
99 */
101 {
102 public:
104 : csv_(csv), idx_(idx)
105 {}
106 operator value_type() const BMNOEXCEPT { return csv_.get(idx_); }
107 bool operator==(const reference& ref) const BMNOEXCEPT
108 { return bool(*this) == bool(ref); }
109 bool is_null() const BMNOEXCEPT { return csv_.is_null(idx_); }
110 private:
112 size_type idx_;
113 };
114
115 /**
116 Const iterator to traverse the rsc sparse vector.
117
118 Implementation uses buffer for decoding so, competing changes
119 to the original vector may not match the iterator returned values.
120
121 This iterator keeps an operational buffer, memory footprint is not
122 negligable
123
124 @ingroup sv
125 */
127 {
128 public:
129 friend class rsc_sparse_vector;
130
131#ifndef BM_NO_STL
132 typedef std::input_iterator_tag iterator_category;
133#endif
140 typedef typename
142 typedef bm::byte_buffer<allocator_type> buffer_type;
143
144 typedef unsigned difference_type;
145 typedef unsigned* pointer;
147
148 public:
153
154 bool operator==(const const_iterator& it) const BMNOEXCEPT
155 { return (pos_ == it.pos_) && (csv_ == it.csv_); }
157 { return ! operator==(it); }
159 { return pos_ < it.pos_; }
161 { return pos_ <= it.pos_; }
163 { return pos_ > it.pos_; }
165 { return pos_ >= it.pos_; }
166
167 /// \brief Get current position (value)
168 value_type operator*() const { return this->value(); }
169
170
171 /// \brief Advance to the next available value
172 const_iterator& operator++() BMNOEXCEPT { this->advance(); return *this; }
173
174 /// \brief Advance to the next available value
176 { const_iterator tmp(*this);this->advance(); return tmp; }
177
178
179 /// \brief Get current position (value)
181
182 /// \brief Get NULL status
183 bool is_null() const BMNOEXCEPT;
184
185 /// Returns true if iterator is at a valid position
186 bool valid() const BMNOEXCEPT { return pos_ != bm::id_max; }
187
188 /// Invalidate current iterator
190
191 /// Current position (index) in the vector
192 size_type pos() const BMNOEXCEPT{ return pos_; }
193
194 /// re-position to a specified position
196
197 /// advance iterator forward by one
198 /// @return true if it is still valid
200
202 private:
203 enum buf_size_e
204 {
205 n_buf_size = 1024 * 8
206 };
207
208 private:
209 const rsc_sparse_vector_type* csv_; ///!< ptr to parent
210 size_type pos_; ///!< Position
211 mutable buffer_type vbuffer_; ///!< value buffer
212 mutable buffer_type tbuffer_; ///!< temp buffer
213 mutable value_type* buf_ptr_; ///!< position in the buffer
214 };
215
216
217
218 /**
219 Back insert iterator implements buffered insert, faster than generic
220 access assignment.
221
222 Limitations for buffered inserter:
223 1. Do not use more than one inserter per vector at a time
224 2. Use method flush() at the end to send the rest of accumulated buffer
225 flush is happening automatically on destruction, but if flush produces an
226 exception (for whatever reason) it will be an exception in destructor.
227 As such, explicit flush() is safer way to finilize the sparse vector load.
228
229 @ingroup sv
230 */
232 {
233 public:
234#ifndef BM_NO_STL
235 typedef std::output_iterator_tag iterator_category;
236#endif
242
243 typedef void difference_type;
244 typedef void pointer;
245 typedef void reference;
246
247 public:
248
249 /*! @name Construction and assignment */
250 ///@{
251
254
256 void operator=(const back_insert_iterator& bi)
257 {
258 BM_ASSERT(bi.empty());
259 this->flush(); sv_bi_ = bi.sv_bi_;
260 }
261
263 ///@}
264
265 /** push value to the vector */
267 { this->add(v); return *this; }
268 /** noop */
269 back_insert_iterator& operator*() { return *this; }
270 /** noop */
271 back_insert_iterator& operator++() { return *this; }
272 /** noop */
273 back_insert_iterator& operator++( int ) { return *this; }
274
275 /** add value to the container*/
277
278 /** add NULL (no-value) to the container */
280
281 /** add a series of consequitve NULLs (no-value) to the container */
283
284 /** flush the accumulated buffer */
285 void flush();
286 protected:
287
288 /** add value to the buffer without changing the NULL vector
289 @param v - value to push back
290 @return index of added value in the internal buffer
291 @internal
292 */
293 ///size_type add_value(value_type v);
294
296 typedef
298 private:
299 rsc_sparse_vector_type* csv_; ///!< pointer on the parent vector
300 sparse_vector_bi sv_bi_;
301 };
302
303public:
304 // ------------------------------------------------------------
305 /*! @name Construction and assignment */
306
307 //@{
308
311 size_type bv_max_size = bm::id_max,
312 const allocator_type& alloc = allocator_type());
313
314 /**
315 Contructor to pre-initialize the list of assigned (not NULL) elements.
316
317 If the list of not NULL elements is known upfront it can help to
318 pre-declare it, enable rank-select index and then use set function.
319 This scenario gives significant speed boost, comparing random assignment
320
321 @param bv_null - not NULL vector for the container
322 */
324
326
327 /*! copy-ctor */
329
330
331 /*! copy assignmment operator */
332 rsc_sparse_vector<Val,SV>& operator=(const rsc_sparse_vector<Val, SV>& csv)
333 {
334 if (this != &csv)
335 {
336 sv_ = csv.sv_;
337 size_ = csv.size_; max_id_ = csv.max_id_;
338 in_sync_ = csv.in_sync_;
339 if (in_sync_)
340 {
341 rs_idx_->copy_from(*(csv.rs_idx_));
342 }
343 }
344 return *this;
345 }
346
347#ifndef BM_NO_CXX11
348 /*! move-ctor */
350
351 /*! move assignmment operator */
353 {
354 if (this != &csv)
355 {
356 clear_all(true);
357 sv_.swap(csv.sv_);
358 size_ = csv.size_; max_id_ = csv.max_id_; in_sync_ = csv.in_sync_;
359 if (in_sync_)
360 rs_idx_->copy_from(*(csv.rs_idx_));
361 }
362 return *this;
363 }
364#endif
365
366 //@}
367 // ------------------------------------------------------------
368 /*! @name Size, etc */
369 ///@{
370
371 /*! \brief return size of the vector
372 \return size of sparse vector
373 */
375
376 /*! \brief return true if vector is empty
377 \return true if empty
378 */
379 bool empty() const BMNOEXCEPT { return sv_.empty(); }
380
381 /**
382 \brief recalculate size to exclude tail NULL elements
383 After this call size() will return the true size of the vector
384 */
386
387 /*! \brief change vector size
388 \param new_size - new vector size
389 */
390 void resize(size_type new_size);
391
392 /*
393 \brief Returns count of not NULL elements (population)
394 in the given range [left..right]
395 Uses rank-select index to accelerate the search (after sync())
396
397 \param left - index of first bit start counting from
398 \param right - index of last bit
399
400 @sa sync
401 */
404
405 ///@}
406
407 // ------------------------------------------------------------
408 /*! @name Element access */
409 //@{
410
411 /*!
412 \brief get specified element without bounds checking
413 \param idx - element index
414 \return value of the element
415 */
416 value_type operator[](size_type idx) const { return this->get(idx); }
417
418 /*!
419 \brief access specified element with bounds checking
420 \param idx - element index
421 \return value of the element
422 */
424
425 /*!
426 \brief get specified element without bounds checking
427 \param idx - element index
428 \return value of the element
429 */
431
432 /**
433 \brief get specified element with NOT NULL check
434 \param idx - element index
435 \param v - [out] value to get
436 \return true if value was aquired (NOT NULL), false otherwise
437 @sa is_null, get
438 */
440
441 /**
442 \brief get specified element with NOT NULL check
443 Caller guarantees that the vector is in sync mode (RS-index access).
444 \param idx - element index
445 \param v - [out] value to get
446 \return true if value was aquired (NOT NULL), false otherwise
447 @sa is_null, get, sync
448 */
450
451
452 /*!
453 \brief set specified element with bounds checking and automatic resize
454
455 Method cannot insert elements, so every new idx has to be greater or equal
456 than what it used before. Elements must be loaded in a sorted order.
457
458 \param idx - element index
459 \param v - element value
460 */
462
463
464 /*!
465 \brief add element with automatic resize
466 \param v - element value
467 */
469 { this->push_back(size_, v); }
470
471 /*!
472 \brief push back specified amount of NULL values
473 \param count - number of NULLs to push back
474 */
476
477 /*!
478 \brief push back NULL value
479 */
481
482 /*!
483 \brief set specified element with bounds checking and automatic resize
484 \param idx - element index
485 \param v - element value
486 */
488
489
490 /*!
491 \brief increment specified element by one
492 \param idx - element index
493 */
494 void inc(size_type idx);
495
496 /*!
497 \brief increment specified element by one
498 \param idx - element index
499 \param v - increment value
500 */
502
503 /*!
504 \brief increment specified element by one, element MUST be NOT NULL
505 Faster than just inc() if element is NULL - behavior is undefined
506 \param idx - element index
507 \param v - increment value
508 @sa inc
509 */
511
512 /*!
513 \brief set specified element to NULL
514 RSC vector actually erases element when it is set to NULL (expensive).
515 \param idx - element index
516 */
518
519 /**
520 Set NULL all elements set as 1 in the argument vector.
521 Function also clears all the values to 0.
522 Note: this can be a very expensive function for an RS container.
523 \param bv_idx - index bit-vector for elements which to be set to NULL
524 */
525 void set_null(const bvector_type& bv_idx);
526
527
528 /**
529 Set vector elements spcified by argument bit-vector to zero
530 Note that set to 0 elements are NOT going to tuned to NULL (NULL qualifier is preserved)
531 \param bv_idx - index bit-vector for elements which to be set to 0
532 */
533 void clear(const bvector_type& bv_idx);
534
535
536
537 /** \brief test if specified element is NULL
538 \param idx - element index
539 \return true if it is NULL false if it was assigned or container
540 is not configured to support assignment flags
541 */
542 bool is_null(size_type idx) const BMNOEXCEPT;
543
544 /**
545 \brief Get bit-vector of assigned values (or NULL)
546 */
548
549 /**
550 \brief find position of compressed element by its rank
551 \param rank - rank (virtual index in sparse vector)
552 \param idx - index (true position)
553 */
554 bool find_rank(size_type rank, size_type& idx) const BMNOEXCEPT;
555
556 //@}
557
558 // ------------------------------------------------------------
559 /*! @name Export content to C-stype array */
560 ///@{
561
562 /**
563 \brief C-style decode
564 \param arr - decode target array (must be properly sized)
565 \param idx_from - start address to decode
566 \param size - number of elements to decode
567 \param zero_mem - flag if array needs to beset to zeros first
568
569 @return actual decoded size
570 @sa decode_buf
571 */
573 size_type idx_from,
575 bool zero_mem = true) const;
576
577
578 /**
579 \brief C-style decode (variant with external memory)
580 Analog of decode, but requires two arrays.
581 Faster than decode in many cases.
582
583 \param arr - decode target array (must be properly sized)
584 \param arr_buf_tmp - decode temp bufer (must be same size of arr)
585 \param idx_from - start address to decode
586 \param size - number of elements to decode
587 \param zero_mem - flag if array needs to beset to zeros first
588
589 @return actual decoded size
590 @sa decode
591 */
593 value_type* arr_buf_tmp,
594 size_type idx_from,
596 bool zero_mem = true) const BMNOEXCEPT;
597
598 /*!
599 \brief Gather elements to a C-style array
600
601 Gather collects values from different locations, for best
602 performance feed it with sorted list of indexes.
603
604 Faster than one-by-one random access.
605
606 For efficiency, this is left as a low level function,
607 it does not do any bounds checking on the target array, it will
608 override memory and crash if you are not careful with allocation
609 and request size.
610
611 \param arr - destination array
612 \param idx - index list to gather elements (read only)
613 \param idx_tmp_buf - temp scratch buffer for index rank-select index translation
614 must be correctly allocated to match size. No value initialization requirement.
615 \param size - decoding index list size (array allocation should match)
616 \param sorted_idx - sort order directive for the idx array
617 (BM_UNSORTED, BM_SORTED, BM_UNKNOWN)
618 Sort order affects both performance and correctness(!), use BM_UNKNOWN
619 if not sure.
620
621 \return number of actually exported elements (can be less than requested)
622
623 \sa decode
624 */
626 const size_type* idx,
627 size_type* idx_tmp_buf,
629 bm::sort_order sorted_idx) const;
630
631
632 ///@}
633
634
635 // ------------------------------------------------------------
636 /*! @name Various traits */
637 ///@{
638 /**
639 \brief if container supports NULL(unassigned) values (true)
640 */
641 bool is_nullable() const { return true; }
642
643 /** \brief various type traits
644 */
645 static constexpr
646 bool is_compressed() BMNOEXCEPT { return true; }
647
648 static constexpr
649 bool is_str() BMNOEXCEPT { return false; }
650
651 ///@}
652
653
654 // ------------------------------------------------------------
655 /*! @name Comparison */
656 ///@{
657
658 /*!
659 \brief check if another vector has the same content
660 \return true, if it is the same
661 */
663 //@}
664
665
666 // ------------------------------------------------------------
667 /*! @name Load-Export compressed vector with data */
668 ///@{
669 /*!
670 \brief Load compressed vector from a sparse vector (with NULLs)
671 \param sv_src - source sparse vector
672 */
673 void load_from(const sparse_vector_type& sv_src);
674
675 /*!
676 \brief Exort compressed vector to a sparse vector (with NULLs)
677 \param sv - target sparse vector
678 */
680
681 ///@}
682
683 // ------------------------------------------------------------
684 /*! @name Iterator access */
685 ///@{
686
687 /** Provide const iterator access to container content */
689 {
690 if (!in_sync_)
691 throw_no_rsc_index(); // call sync() to build RSC fast access index
692 return const_iterator(this);
693 }
694
695 /** Provide const iterator access to the end */
697 { return const_iterator(this, bm::id_max); }
698
699 /** Get const_itertor re-positioned to specific element
700 @param idx - position in the sparse vector
701 */
703 { return const_iterator(this, idx); }
704
706 ///@}
707
708 // ------------------------------------------------------------
709 /*! @name Memory optimization */
710 ///@{
711
712 /*!
713 \brief run memory optimization for all vector slices
714 \param temp_block - pre-allocated memory block to avoid unnecessary re-allocs
715 \param opt_mode - requested compression depth
716 \param stat - memory allocation statistics after optimization
717 */
719 bm::word_t* temp_block = 0,
721 statistics* stat = 0);
722
723 /*! \brief resize to zero, free memory
724 @param free_mem - free bit vector slices if true
725 */
726 void clear_all(bool free_mem) BMNOEXCEPT;
727
728 /*! \brief resize to zero, free memory
729 @param free_mem - free bit vector slices if true
730 */
731 void clear() BMNOEXCEPT { clear_all(true); }
732
733 /*!
734 @brief Calculates memory statistics.
735
736 Function fills statistics structure containing information about how
737 this vector uses memory and estimation of max. amount of memory
738 bvector needs to serialize itself.
739
740 @param st - pointer on statistics structure to be filled in.
741
742 @sa statistics
743 */
746
747 /**
748 @brief Turn sparse vector into immutable mode
749 Read-only (immutable) vector uses less memory and allows faster searches.
750 Before freezing it is recommenede to call optimize() to get full memory saving effect
751 @sa optimize
752 */
753 void freeze() { sv_.freeze(); }
754
755 /** Returns true if vector is read-only */
756 bool is_ro() const BMNOEXCEPT { return sv_.is_ro_; }
757
758
759
760 ///@}
761
762 // ------------------------------------------------------------
763 /*! @name Merge, split, partition data */
764 ///@{
765
766 /**
767 @brief copy range of values from another sparse vector
768
769 Copy [left..right] values from the source vector,
770 clear everything outside the range.
771
772 \param csv - source vector
773 \param left - index from in losed diapason of [left..right]
774 \param right - index to in losed diapason of [left..right]
775 */
777 size_type left, size_type right);
778
779 /**
780 @brief merge two vectors (argument gets destroyed)
781 It is important that both vectors have the same NULL vectors
782 @param csv - [in,out] argumnet vector to merge
783 (works like move so arg should not be used after the merge)
784 */
786
787 ///@}
788
789 // ------------------------------------------------------------
790 /*! @name Fast access structues sync */
791 ///@{
792 /*!
793 \brief Re-calculate rank-select index for faster access to vector
794 \param force - force recalculation even if it is already recalculated
795 */
796 void sync(bool force);
797
798 /*!
799 \brief Re-calculate prefix sum table used for rank search (if necessary)
800 */
801 void sync() { sync(false); }
802
803 /*!
804 \brief returns true if prefix sum table is in sync with the vector
805 */
806 bool in_sync() const BMNOEXCEPT { return in_sync_; }
807 /*!
808 \brief returns true if prefix sum table is in sync with the vector
809 */
810 bool is_sync() const BMNOEXCEPT { return in_sync_; }
811
812 /*!
813 \brief Unsync the prefix sum table
814 */
815 void unsync() BMNOEXCEPT { in_sync_ = is_dense_ = false; }
816 ///@}
817
818 // ------------------------------------------------------------
819 /*! @name Access to internals */
820 ///@{
821
822 /*!
823 \brief get access to bit-plane, function checks and creates a plane
824 \return bit-vector for the bit slice
825 */
827 { return sv_.get_slice(i); }
828
830 { return sv_.get_create_slice(i); }
831
832 /*!
833 Number of effective bit-slices in the value type
834 */
836 { return sv_.effective_slices(); }
837
838 /*!
839 \brief get total number of bit-slices in the vector
840 */
841 static unsigned slices() BMNOEXCEPT
842 { return sparse_vector_type::slices(); }
843
844 /** Number of stored bit-slices (value slices + extra */
845 static unsigned stored_slices()
846 { return sparse_vector_type::stored_slices(); }
847
848 /*!
849 \brief access dense vector
850 */
851 const sparse_vector_type& get_sv() const BMNOEXCEPT { return sv_; }
852
853 /*!
854 \brief size of internal dense vector
855 */
856 size_type effective_size() const BMNOEXCEPT { return sv_.size(); }
857
858 /**
859 \brief Always 1 (non-matrix type)
860 */
862
863 /*!
864 get read-only access to inetrnal bit-matrix
865 */
867 { return sv_.get_bmatrix(); }
868
869 /*!
870 get read-only access to inetrnal bit-matrix
871 */
873 { return sv_.get_bmatrix(); }
874
875
876 /*! Get Rank-Select index pointer
877 @return NULL if sync() was not called to construct the index
878 @sa sync()
879 */
881 { return in_sync_ ? rs_idx_ : 0; }
882
883 void mark_null_idx(unsigned null_idx) BMNOEXCEPT
884 { sv_.mark_null_idx(null_idx); }
885
886 /*!
887 \brief Resolve logical address to access via rank compressed address
888
889 \param idx - input id to resolve
890 \param idx_to - output id
891
892 \return true if id is known and resolved successfully
893 @internal
894 */
895 bool resolve(size_type idx, size_type* idx_to) const BMNOEXCEPT;
896
897 ///@}
898
899protected:
901 {
903 };
904
905
906 bool resolve_sync(size_type idx, size_type* idx_to) const BMNOEXCEPT;
907
909 size_type* idx_from, size_type* idx_to) const BMNOEXCEPT;
910
912 {
913 sv_.resize_internal(sz);
914 }
915 size_type size_internal() const BMNOEXCEPT { return sv_.size(); }
916
917 constexpr bool is_remap() const BMNOEXCEPT { return false; }
918 size_t remap_size() const BMNOEXCEPT { return 0; }
919 const unsigned char* get_remap_buffer() const BMNOEXCEPT { return 0; }
920 unsigned char* init_remap_buffer() BMNOEXCEPT { return 0; }
922
923 /// unused remap matrix type for compatibility with the sparse serializer
924 typedef
925 bm::heap_matrix<unsigned char, 1, 1,
927
928 const remap_matrix_type* get_remap_matrix() const { return 0; }
930
932
933 /**
934 Convert signed value type to unsigned representation
935 */
936 static
938 { return sparse_vector_type::s2u(v); }
939 static
941 { return sparse_vector_type::u2s(v); }
942
943private:
944
945 /// Allocate memory for RS index
946 void construct_rs_index();
947 /// Free rs-index
948 void free_rs_index();
949
950 /**
951 \brief throw error that RSC index not constructed (call sync())
952 \internal
953 @sa sync
954 */
955 static
956 void throw_no_rsc_index();
957
958protected:
959 template<class SVect> friend class sparse_vector_scanner;
960 template<class SVect> friend class sparse_vector_serializer;
961 template<class SVect> friend class sparse_vector_deserializer;
962 template<class SVect> friend class sparse_vector_scanner;
963
964
965private:
966 sparse_vector_type sv_; ///< transpose-sparse vector for "dense" packing
967 size_type size_; ///< vector size (logical)
968 size_type max_id_; ///< control variable for sorted load
969 bool in_sync_; ///< flag if prefix sum is in-sync with vector
970 rs_index_type* rs_idx_ = 0; ///< prefix sum for rank-select translation
971 bool is_dense_ = false; ///< flag if vector is dense
972};
973
974//---------------------------------------------------------------------
975//
976//---------------------------------------------------------------------
977
978template<class Val, class SV>
981 size_type bv_max_size,
982 const allocator_type& alloc)
983: sv_(bm::use_null, ap, bv_max_size, alloc), in_sync_(false)
984{
985 (void) null_able;
986 BM_ASSERT(null_able == bm::use_null);
987 BM_ASSERT(int(sv_value_slices) == int(SV::sv_value_slices));
988 size_ = max_id_ = 0;
989 construct_rs_index();
990}
991
992//---------------------------------------------------------------------
993
994template<class Val, class SV>
996: sv_(bm::use_null), in_sync_(false)
997{
998 construct_rs_index();
999 bvector_type* bv = sv_.get_null_bvect();
1000 BM_ASSERT(bv);
1001 *bv = bv_null;
1002
1003 bool found = bv->find_reverse(max_id_);
1004 if (found)
1005 {
1006 size_ = max_id_ + 1;
1007 size_type sz = bv->count();
1008 sv_.resize(sz);
1009 }
1010 else
1011 {
1012 BM_ASSERT(!bv->any());
1013 size_ = max_id_ = 0;
1014 }
1015}
1016
1017//---------------------------------------------------------------------
1018
1019template<class Val, class SV>
1021{
1022 free_rs_index();
1023}
1024
1025//---------------------------------------------------------------------
1026
1027template<class Val, class SV>
1029 const rsc_sparse_vector<Val, SV>& csv)
1030: sv_(csv.sv_), size_(csv.size_), max_id_(csv.max_id_), in_sync_(csv.in_sync_)
1031{
1032 BM_ASSERT(int(sv_value_slices) == int(SV::sv_value_slices));
1033
1034 construct_rs_index();
1035 if (in_sync_)
1036 rs_idx_->copy_from(*(csv.rs_idx_));
1037}
1038
1039//---------------------------------------------------------------------
1040
1041template<class Val, class SV>
1044: sv_(bm::use_null),
1045 size_(0),
1046 max_id_(0), in_sync_(false)
1047{
1048 if (this != &csv)
1049 {
1050 sv_.swap(csv.sv_);
1051 size_ = csv.size_; max_id_ = csv.max_id_; in_sync_ = csv.in_sync_;
1052 rs_idx_ = csv.rs_idx_; csv.rs_idx_ = 0;
1053 }
1054}
1055
1056//---------------------------------------------------------------------
1057
1058template<class Val, class SV>
1061{
1062 return size_;
1063}
1064
1065//---------------------------------------------------------------------
1066
1067template<class Val, class SV>
1069{
1070 BM_ASSERT(new_size < bm::id_max);
1071 if (!new_size) // clear memory
1072 {
1073 sv_.resize(0);
1074 BM_ASSERT(sv_.get_null_bvect()->none());
1075 in_sync_ = false;
1076 size_ = max_id_ = 0;
1077 return;
1078 }
1079 if (new_size >= size_) // vector grows
1080 {
1081 size_ = new_size;
1082 max_id_ = new_size - 1;
1083 return;
1084 }
1085
1086 // vector shrinks
1087 // compute tail rank
1088 bvector_type* bv_null = sv_.get_null_bvect();
1089 size_type clear_size = bv_null->count_range(new_size, bm::id_max-1);
1090
1091 if (!clear_size) // tail set/rank is empty
1092 {
1093 size_ = new_size;
1094 max_id_ = new_size - 1;
1095 BM_ASSERT(!bv_null->any_range(size_, bm::id_max-1));
1096 return;
1097 }
1098
1099 BM_ASSERT(sv_.size() >= clear_size);
1100 size_type new_sv_size = sv_.size() - clear_size;
1101 sv_.resize_internal(new_sv_size, false); // without touching NULL plane
1102 bv_null->clear_range(new_size, bm::id_max-1);
1103
1104 size_ = new_size;
1105 max_id_ = new_size - 1;
1106
1107 BM_ASSERT(!bv_null->any_range(size_, bm::id_max-1));
1108
1109 in_sync_ = false;
1110}
1111
1112//---------------------------------------------------------------------
1113
1114template<class Val, class SV>
1116{
1117 if (sv_.empty())
1118 {}
1119 else
1120 if (idx <= max_id_ && size_)
1121 {
1122 sv_.throw_range_error("compressed sparse vector push_back() range error");
1123 }
1124 push_back_no_check(idx, v);
1125}
1126
1127//---------------------------------------------------------------------
1128
1129template<class Val, class SV>
1131{
1132 BM_ASSERT(size_ < bm::id_max - count); // overflow assert
1133 size_ += count;
1134 max_id_ = size_-1;
1135}
1136
1137//---------------------------------------------------------------------
1138
1139template<class Val, class SV>
1141{
1142 bvector_type* bv_null = sv_.get_null_bvect();
1143 BM_ASSERT(bv_null);
1144
1145 bv_null->set_bit_no_check(idx);
1146 sv_.push_back_no_null(v);
1147
1148 max_id_ = idx;
1149 size_ = idx + 1;
1150 in_sync_ = false;
1151}
1152
1153//---------------------------------------------------------------------
1154
1155template<class Val, class SV>
1157{
1158 bvector_type* bv_null = sv_.get_null_bvect();
1159 BM_ASSERT(bv_null);
1160
1161 bool found = bv_null->test(idx); // TODO: use extract bit
1162 if (found)
1163 {
1164 // TODO: maybe RS-index is available
1165 size_type sv_idx = bv_null->count_range(0, idx);
1166 bv_null->clear_bit_no_check(idx);
1167 sv_.erase(--sv_idx, false/*not delete NULL vector*/);
1168 in_sync_ = false;
1169 }
1170 else
1171 {
1172 if (idx > max_id_)
1173 {
1174 max_id_ = idx;
1175 size_ = max_id_ + 1;
1176 }
1177 }
1178}
1179
1180//---------------------------------------------------------------------
1181
1182template<class Val, class SV>
1184{
1185 bvector_type* bv_null = sv_.get_null_bvect();
1186 bvector_type bv_sub; // subtraction vector cleared from NOT NULLs
1187 bv_sub.bit_and(bv_idx, *bv_null);
1188 // clear the main matrix to accelerate the erase in all bit-slices
1189 {
1191 bvector_type bv_sub_rsc;
1192 rank_compr.compress(bv_sub_rsc, *bv_null, bv_sub);
1193 sv_.clear(bv_sub_rsc);
1194 }
1195
1196 in_sync_ = false;
1197 typename bvector_type::enumerator en(&bv_sub, 0);
1198 for (size_type cnt = 0; en.valid(); ++en, ++cnt)
1199 {
1200 auto idx = *en;
1201
1202 size_type sv_idx = bv_null->count_range(0, idx);
1203 sv_idx -= cnt; // correct rank for what we deleted previously
1204 sv_.erase(--sv_idx, false/*not delete the NULL vector*/);
1205 }
1206 bv_null->bit_sub(bv_sub);
1207}
1208
1209//---------------------------------------------------------------------
1210
1211template<class Val, class SV>
1213{
1214 const bvector_type* bv_null = sv_.get_null_bvector();
1215
1216 bvector_type bv_sub; // subtraction vector cleared from NOT NULLs
1217 bv_sub.bit_and(bv_idx, *bv_null);
1218
1220 bvector_type bv_sub_rsc;
1221 rank_compr.compress(bv_sub_rsc, *bv_null, bv_sub);
1222
1223 sv_.clear(bv_sub_rsc);
1224}
1225
1226//---------------------------------------------------------------------
1227
1228template<class Val, class SV>
1230{
1231 bvector_type* bv_null = sv_.get_null_bvect();
1232 BM_ASSERT(bv_null);
1233
1234 size_type sv_idx;
1235 bool found = bv_null->test(idx);
1236
1237 sv_idx = in_sync_ ? bv_null->count_to(idx, *rs_idx_)
1238 : bv_null->count_range(0, idx); // TODO: make test'n'count
1239
1240 if (found)
1241 {
1242 sv_.inc_no_null(--sv_idx);
1243 }
1244 else
1245 {
1246 sv_.insert_value_no_null(sv_idx, 1);
1247 bv_null->set_bit_no_check(idx);
1248
1249 if (idx > max_id_)
1250 {
1251 max_id_ = idx;
1252 size_ = max_id_ + 1;
1253 }
1254 in_sync_ = false;
1255 }
1256}
1257
1258//---------------------------------------------------------------------
1259
1260template<class Val, class SV>
1262{
1263 bvector_type* bv_null = sv_.get_null_bvect();
1264 BM_ASSERT(bv_null);
1265
1266 size_type sv_idx;
1267 bool found = bv_null->test(idx);
1268
1269 sv_idx = in_sync_ ? bv_null->count_to(idx, *rs_idx_)
1270 : bv_null->count_range(0, idx); // TODO: make test'n'count
1271
1272 if (found)
1273 {
1274 sv_.inc_no_null(--sv_idx, v);
1275 }
1276 else
1277 {
1278 sv_.insert_value_no_null(sv_idx, v);
1279 bv_null->set_bit_no_check(idx);
1280
1281 if (idx > max_id_)
1282 {
1283 max_id_ = idx;
1284 size_ = max_id_ + 1;
1285 }
1286 in_sync_ = false;
1287 }
1288}
1289
1290//---------------------------------------------------------------------
1291
1292template<class Val, class SV>
1294{
1295 bvector_type* bv_null = sv_.get_null_bvect();
1296 BM_ASSERT(bv_null->test(idx)); // idx must be NOT NULL
1297
1298 size_type sv_idx;
1299 sv_idx = in_sync_ ? bv_null->count_to(idx, *rs_idx_)
1300 : bv_null->count_range(0, idx); // TODO: make test'n'count
1301 --sv_idx;
1302 if (v == 1)
1303 sv_.inc_no_null(sv_idx);
1304 else
1305 sv_.inc_no_null(sv_idx, v);
1306}
1307
1308
1309//---------------------------------------------------------------------
1310
1311template<class Val, class SV>
1313{
1314 bvector_type* bv_null = sv_.get_null_bvect();
1315 BM_ASSERT(bv_null);
1316
1317 size_type sv_idx;
1318 bool found = bv_null->test(idx);
1319
1320 sv_idx = in_sync_ ? bv_null->count_to(idx, *rs_idx_)
1321 : bv_null->count_range(0, idx); // TODO: make test'n'count
1322
1323 if (found)
1324 {
1325 sv_.set_value_no_null(--sv_idx, v, true);
1326 }
1327 else
1328 {
1329 sv_.insert_value_no_null(sv_idx, v);
1330 bv_null->set_bit_no_check(idx);
1331
1332 if (idx > max_id_)
1333 {
1334 max_id_ = idx;
1335 size_ = max_id_ + 1;
1336 }
1337 in_sync_ = false;
1338 }
1339}
1340
1341//---------------------------------------------------------------------
1342
1343template<class Val, class SV>
1345 const rsc_sparse_vector<Val, SV>& csv) const BMNOEXCEPT
1346{
1347 if (this == &csv)
1348 return true;
1349 if (max_id_ != csv.max_id_ || size_ != csv.size_)
1350 return false;
1351 bool same_sv = sv_.equal(csv.sv_);
1352 return same_sv;
1353}
1354
1355//---------------------------------------------------------------------
1356
1357template<class Val, class SV>
1359 const sparse_vector_type& sv_src)
1360{
1361 max_id_ = size_ = 0;
1362
1363 bvector_type* bv_null = sv_.get_null_bvect();
1364 BM_ASSERT(bv_null);
1365
1366 const bvector_type* bv_null_src = sv_src.get_null_bvector();
1367 if (!bv_null_src) // dense vector (no NULL columns)
1368 {
1369 sv_ = sv_src;
1370 BM_ASSERT(sv_.get_null_bvect());
1371 }
1372 else
1373 {
1374 sv_.clear_all(true);
1375 *bv_null = *bv_null_src;
1376
1377 bm::rank_compressor<bvector_type> rank_compr; // re-used for planes
1378
1379 unsigned src_planes = sv_src.slices();
1380 for (unsigned i = 0; i < src_planes; ++i)
1381 {
1382 const bvector_type* bv_src_plane = sv_src.get_slice(i);
1383 if (bv_src_plane)
1384 {
1385 bvector_type* bv_plane = sv_.get_create_slice(i);
1386 rank_compr.compress(*bv_plane, *bv_null, *bv_src_plane);
1387 }
1388 } // for
1389 size_type count = bv_null->count(); // set correct sizes
1390 sv_.resize(count);
1391 }
1392
1393 sync(true);
1394}
1395
1396//---------------------------------------------------------------------
1397
1398template<class Val, class SV>
1400{
1401 sv.clear_all(true);
1402
1403 const bvector_type* bv_null_src = this->get_null_bvector();
1404 if (!bv_null_src)
1405 {
1406 BM_ASSERT(bv_null_src);
1407 return;
1408 }
1409
1410 bvector_type* bv_null = sv.get_null_bvect();
1411 BM_ASSERT(bv_null);
1412 *bv_null = *bv_null_src;
1413
1414 bm::rank_compressor<bvector_type> rank_compr; // re-used for planes
1415
1416 unsigned src_planes = sv_.slices();
1417 for (unsigned i = 0; i < src_planes; ++i)
1418 {
1419 if (const bvector_type* bv_src_plane = sv_.get_slice(i))
1420 {
1421 bvector_type* bv_plane = sv.get_create_slice(i);
1422 rank_compr.decompress(*bv_plane, *bv_null, *bv_src_plane);
1423 }
1424 } // for i
1425 sv.resize(this->size());
1426}
1427
1428//---------------------------------------------------------------------
1429
1430template<class Val, class SV>
1432{
1433 if (in_sync_ && force == false)
1434 return; // nothing to do
1435 const bvector_type* bv_null = sv_.get_null_bvector();
1436 BM_ASSERT(bv_null);
1437 bv_null->build_rs_index(rs_idx_); // compute popcount prefix list
1438
1439 if (force)
1440 sync_size();
1441
1442 size_type cnt = size_ ? bv_null->count_range(0, size_-1, *rs_idx_)
1443 : 0;
1444 is_dense_ = (cnt == size_); // dense vector?
1445 BM_ASSERT(size_ >= bv_null->count());
1446
1447 in_sync_ = true;
1448}
1449
1450//---------------------------------------------------------------------
1451
1452template<class Val, class SV>
1454{
1455 const bvector_type* bv_null = sv_.get_null_bvector();
1456 BM_ASSERT(bv_null);
1457 // sync the max-id
1458 bool found = bv_null->find_reverse(max_id_);
1459 if (!found)
1460 max_id_ = size_ = 0;
1461 else
1462 size_ = max_id_ + 1;
1463 sync(false);
1464}
1465
1466//---------------------------------------------------------------------
1467
1468template<class Val, class SV>
1470 size_type* idx_to) const BMNOEXCEPT
1471{
1472 BM_ASSERT(idx_to);
1473 const bvector_type* bv_null = sv_.get_null_bvector();
1474 if (idx >= size_)
1475 {
1476 BM_ASSERT(bv_null->get_bit(idx) == false);
1477 return false;
1478 }
1479 if (in_sync_)
1480 return resolve_sync(idx, idx_to);
1481
1482 // not in-sync: slow access
1483 bool found = bv_null->test(idx);
1484 if (!found)
1485 return found;
1486 *idx_to = bv_null->count_range_no_check(0, idx);
1487 return found;
1488}
1489
1490//---------------------------------------------------------------------
1491
1492template<class Val, class SV>
1494 size_type idx,
1495 size_type* idx_to) const BMNOEXCEPT
1496{
1497 BM_ASSERT(idx_to);
1498 BM_ASSERT(in_sync_);
1499
1500 const bvector_type* bv_null = sv_.get_null_bvector();
1501 if (is_dense_)
1502 {
1503 *idx_to = idx+1;
1504 if (idx <= size_)
1505 return true;
1506 *idx_to = bm::id_max;
1507 BM_ASSERT(bv_null->get_bit(idx) == false);
1508 return false;
1509 }
1510 *idx_to = bv_null->count_to_test(idx, *rs_idx_);
1511 return bool(*idx_to);
1512}
1513
1514//---------------------------------------------------------------------
1515
1516template<class Val, class SV>
1518 size_type from, size_type to,
1519 size_type* idx_from, size_type* idx_to) const BMNOEXCEPT
1520{
1521 BM_ASSERT(idx_to && idx_from);
1522 const bvector_type* bv_null = sv_.get_null_bvector();
1523 size_type copy_sz, sv_left;
1524 if (in_sync_)
1525 copy_sz = bv_null->count_range(from, to, *rs_idx_);
1526 else // slow access
1527 copy_sz = bv_null->count_range(from, to);
1528 if (!copy_sz)
1529 return false;
1530
1531 if (in_sync_)
1532 sv_left = bv_null->rank_corrected(from, *rs_idx_);
1533 else
1534 {
1535 sv_left = bv_null->count_range(0, from);
1536 bool tl = bv_null->test(from); // TODO: add count and test
1537 sv_left -= tl; // rank correction
1538 }
1539
1540 *idx_from = sv_left; *idx_to = sv_left + copy_sz - 1;
1541 return true;
1542}
1543
1544
1545//---------------------------------------------------------------------
1546
1547template<class Val, class SV>
1550{
1551 size_type sv_idx;
1552 if (idx >= size())
1553 sv_.throw_range_error("compressed collection access error");
1554
1555 bool found = resolve(idx, &sv_idx);
1556 if (!found)
1557 {
1558 sv_.throw_range_error("compressed collection item not found");
1559 }
1560 return sv_.at(--sv_idx);
1561}
1562
1563//---------------------------------------------------------------------
1564
1565template<class Val, class SV>
1568{
1569 size_type sv_idx;
1570 bool found = resolve(idx, &sv_idx);
1571 if (!found)
1572 return value_type(0);
1573 BM_ASSERT(!is_null(idx));
1574 return sv_.get(--sv_idx);
1575}
1576
1577//---------------------------------------------------------------------
1578
1579template<class Val, class SV>
1581 size_type idx, value_type& v) const BMNOEXCEPT
1582{
1583 size_type sv_idx;
1584 if (!resolve(idx, &sv_idx))
1585 return false;
1586 v = sv_.get(--sv_idx);
1587 return true;
1588}
1589
1590//---------------------------------------------------------------------
1591
1592template<class Val, class SV>
1594 size_type idx, value_type& v) const BMNOEXCEPT
1595{
1596 size_type sv_idx;
1597 bool found = resolve_sync(idx, &sv_idx);
1598 if (!found)
1599 return found;
1600 v = sv_.get(--sv_idx);
1601 return true;
1602}
1603
1604//---------------------------------------------------------------------
1605
1606template<class Val, class SV>
1608{
1609 const bvector_type* bv_null = sv_.get_null_bvector();
1610 BM_ASSERT(bv_null);
1611 return !(bv_null->test(idx));
1612}
1613
1614//---------------------------------------------------------------------
1615
1616template<class Val, class SV>
1618 typename bvector_type::optmode opt_mode,
1619 statistics* st)
1620{
1621 sv_.optimize(temp_block, opt_mode, (typename sparse_vector_type::statistics*)st);
1622 if (st)
1623 {
1624 st->memory_used += sizeof(rs_index_type);
1625 st->memory_used += rs_idx_->get_total() *
1626 (sizeof(typename rs_index_type::size_type)
1627 + sizeof(typename rs_index_type::sb_pair_type));
1628 }
1629 if (is_sync()) // must rebuild the sync index after optimization
1630 this->sync(true);
1631}
1632
1633//---------------------------------------------------------------------
1634
1635template<class Val, class SV>
1637{
1638 sv_.clear_all(free_mem);
1639 in_sync_ = false; max_id_ = size_ = 0;
1640}
1641
1642//---------------------------------------------------------------------
1643
1644template<class Val, class SV>
1647{
1648 BM_ASSERT(st);
1649 sv_.calc_stat((typename sparse_vector_type::statistics*)st);
1650 if (st)
1651 {
1652 st->memory_used += sizeof(rs_index_type);
1653 st->memory_used += rs_idx_->get_total() *
1654 (sizeof(typename rs_index_type::size_type)
1655 + sizeof(typename rs_index_type::sb_pair_type));
1656 }
1657}
1658
1659//---------------------------------------------------------------------
1660
1661template<class Val, class SV>
1664{
1665 return sv_.get_null_bvector();
1666}
1667
1668//---------------------------------------------------------------------
1669
1670template<class Val, class SV>
1671bool
1673 size_type& idx) const BMNOEXCEPT
1674{
1675 BM_ASSERT(rank);
1676 bool b;
1677 const bvector_type* bv_null = get_null_bvector();
1678 if (in_sync())
1679 b = bv_null->select(rank, idx, *rs_idx_);
1680 else
1681 b = bv_null->find_rank(rank, 0, idx);
1682 return b;
1683}
1684
1685//---------------------------------------------------------------------
1686
1687template<class Val, class SV>
1689{
1690#ifndef BM_NO_STL
1691 throw std::domain_error("Rank-Select index not constructed, call sync() first");
1692#else
1693 BM_ASSERT_THROW(false, BM_ERR_RANGE);
1694#endif
1695}
1696
1697//---------------------------------------------------------------------
1698
1699
1700template<class Val, class SV>
1703 size_type idx_from,
1704 size_type size,
1705 bool zero_mem) const
1706{
1707 if (size == 0)
1708 return 0;
1709 if (!in_sync_)
1710 throw_no_rsc_index(); // call sync() to build RSC fast access index
1711
1712 BM_ASSERT(arr);
1713 BM_ASSERT(rs_idx_);
1714
1715 if (idx_from >= this->size())
1716 return 0;
1717
1718 if ((bm::id_max - size) <= idx_from)
1719 size = bm::id_max - idx_from;
1720 if ((idx_from + size) > this->size())
1721 size = this->size() - idx_from;
1722
1723 const bvector_type* bv_null = sv_.get_null_bvector();
1724 size_type rank = bv_null->rank_corrected(idx_from, *rs_idx_);
1725
1726 BM_ASSERT(rank == bv_null->count_range(0, idx_from) - bv_null->test(idx_from));
1727
1728 size_type i = 0;
1729
1730 bvector_enumerator_type en_i = bv_null->get_enumerator(idx_from);
1731 if (!en_i.valid())
1732 return i;
1733
1734 if (zero_mem)
1735 ::memset(arr, 0, sizeof(value_type)*size);
1736
1737 sparse_vector_const_iterator it = sv_.get_const_iterator(rank);
1738 if (it.valid())
1739 {
1740 do
1741 {
1742 size_type en_idx = *en_i;
1743 size_type delta = en_idx - idx_from;
1744 idx_from += delta;
1745 i += delta;
1746 if (i >= size)
1747 return size;
1748 arr[i++] = it.value();
1749 if (!en_i.advance())
1750 break;
1751 if (!it.advance())
1752 break;
1753 ++idx_from;
1754 } while (i < size);
1755 }
1756 return i;
1757}
1758
1759
1760template<class Val, class SV>
1763 value_type* arr_buf_tmp,
1764 size_type idx_from,
1765 size_type size,
1766 bool zero_mem) const BMNOEXCEPT
1767{
1768 if (!size || (idx_from >= this->size()))
1769 return 0;
1770
1771 BM_ASSERT(arr && arr_buf_tmp);
1772 BM_ASSERT(arr != arr_buf_tmp);
1773 BM_ASSERT(in_sync_); // call sync() to build RSC fast access index
1774 BM_ASSERT(rs_idx_);
1775
1776 if ((bm::id_max - size) <= idx_from)
1777 size = bm::id_max - idx_from;
1778 if ((idx_from + size) > this->size())
1779 size = this->size() - idx_from;
1780
1781 if (zero_mem)
1782 ::memset(arr, 0, sizeof(value_type)*size);
1783
1784 const bvector_type* bv_null = sv_.get_null_bvector();
1785 size_type rank = bv_null->rank_corrected(idx_from, *rs_idx_);
1786
1787 BM_ASSERT(rank == bv_null->count_range(0, idx_from) - bv_null->test(idx_from));
1788
1789 bvector_enumerator_type en_i = bv_null->get_enumerator(idx_from);
1790 if (!en_i.valid())
1791 return size;
1792
1793 size_type i = en_i.value();
1794 if (idx_from + size <= i) // empty space (all zeros)
1795 return size;
1796
1797 size_type extract_cnt =
1798 bv_null->count_range_no_check(idx_from, idx_from + size - 1, *rs_idx_);
1799
1800 BM_ASSERT(extract_cnt <= this->size());
1801 auto ex_sz = sv_.decode(arr_buf_tmp, rank, extract_cnt, true);
1802 BM_ASSERT(ex_sz == extract_cnt); (void) ex_sz;
1803
1804 for (i = 0; i < extract_cnt; ++i)
1805 {
1806 BM_ASSERT(en_i.valid());
1807 size_type en_idx = *en_i;
1808 arr[en_idx-idx_from] = arr_buf_tmp[i];
1809 en_i.advance();
1810 } // for i
1811
1812 return size;
1813}
1814
1815
1816//---------------------------------------------------------------------
1817
1818template<class Val, class SV>
1821 const size_type* idx,
1822 size_type* idx_tmp_buf,
1823 size_type size,
1824 bm::sort_order sorted_idx) const
1825{
1826 BM_ASSERT(arr);
1827 BM_ASSERT(idx);
1828 BM_ASSERT(idx_tmp_buf);
1829 BM_ASSERT(size);
1830
1831 if (size == 1) // corner case: get 1 value
1832 {
1833 arr[0] = this->get(idx[0]);
1834 return size;
1835 }
1836
1837 if (is_dense_) // rank-select idx recalc is not needed (with bounds check)
1838 {
1839 BM_ASSERT(in_sync_);
1840 for (size_type i = 0; i < size; ++i)
1841 {
1842 if (idx[i] < size_)
1843 idx_tmp_buf[i] = idx[i];
1844 else
1845 {
1846 idx_tmp_buf[i] = bm::id_max;
1847 if (sorted_idx == bm::BM_SORTED)
1848 sorted_idx = bm::BM_UNKNOWN; // UNK will evaluate the sort-order
1849 }
1850 } // for i
1851 }
1852 else
1853 {
1854 // validate index, resolve rank addresses
1855 //
1856 for (size_type i = 0; i < size; ++i)
1857 {
1858 size_type sv_idx;
1859 if (resolve(idx[i], &sv_idx))
1860 {
1861 idx_tmp_buf[i] = sv_idx-1;
1862 }
1863 else
1864 {
1865 if (sorted_idx == bm::BM_SORTED)
1866 sorted_idx = bm::BM_UNKNOWN; // UNK will evaluate the sort-order
1867 idx_tmp_buf[i] = bm::id_max;
1868 }
1869 } // for i
1870 }
1871
1872 // gather the data using resolved indexes
1873 //
1874 size = sv_.gather(arr, idx_tmp_buf, size, sorted_idx);
1875 return size;
1876}
1877
1878
1879//---------------------------------------------------------------------
1880
1881template<class Val, class SV>
1883{
1884 if (rs_idx_)
1885 return;
1886 rs_idx_ = (rs_index_type*) bm::aligned_new_malloc(sizeof(rs_index_type));
1887 rs_idx_ = new(rs_idx_) rs_index_type(); // placement new
1888}
1889
1890//---------------------------------------------------------------------
1891
1892template<class Val, class SV>
1893void rsc_sparse_vector<Val, SV>::free_rs_index()
1894{
1895 if (rs_idx_)
1896 {
1897 rs_idx_->~rs_index_type();
1898 bm::aligned_free(rs_idx_);
1899 }
1900}
1901
1902//---------------------------------------------------------------------
1903
1904template<class Val, class SV>
1906 const rsc_sparse_vector<Val, SV>& csv,
1907 size_type left, size_type right)
1908{
1909 if (left > right)
1910 bm::xor_swap(left, right);
1911
1912 if (left >= csv.size())
1913 return;
1914
1915 size_ = csv.size_; max_id_ = csv.max_id_;
1916 in_sync_ = false;
1917
1918 const bvector_type* arg_bv_null = csv.sv_.get_null_bvector();
1919 size_type sv_left, sv_right;
1920 bool range_valid = csv.resolve_range(left, right, &sv_left, &sv_right);
1921 if (!range_valid)
1922 {
1923 sv_.clear_all(true); sv_.resize(size_);
1924 bvector_type* bv_null = sv_.get_null_bvect();
1925 bv_null->copy_range(*arg_bv_null, 0, right);
1926 return;
1927 }
1928 bvector_type* bv_null = sv_.get_null_bvect();
1929 bv_null->copy_range(*arg_bv_null, 0, right); // not NULL vector gets a full copy
1930 sv_.copy_range(csv.sv_, sv_left, sv_right, bm::no_null); // don't copy NULL
1931}
1932
1933
1934//---------------------------------------------------------------------
1935
1936template<class Val, class SV>
1938{
1939 // MUST have the same NULL to work
1940 BM_ASSERT(sv_.get_null_bvector()->equal(*csv.sv_.get_null_bvector()));
1941
1942 sv_.merge(csv.sv_);
1943}
1944
1945//---------------------------------------------------------------------
1946
1947template<class Val, class SV>
1950 size_type left,
1951 size_type right) const BMNOEXCEPT
1952{
1953 if (left > right)
1954 bm::xor_swap(left, right);
1955
1956 const bvector_type* bv_null = sv_.get_null_bvector();
1957 size_type range = right - left;
1958 if ((range < rs3_border0) || !in_sync_)
1959 {
1960 return bv_null->count_range_no_check(left, right);
1961 }
1962 return bv_null->count_range_no_check(left, right, *rs_idx_);
1963}
1964
1965//---------------------------------------------------------------------
1966//
1967//---------------------------------------------------------------------
1968
1969
1970template<class Val, class SV>
1972: csv_(0)
1973{}
1974
1975
1976//---------------------------------------------------------------------
1977
1978template<class Val, class SV>
1981: csv_(csv),
1982 sv_bi_(csv->sv_.get_back_inserter())
1983{
1984 sv_bi_.disable_set_null(); // NULL will be handled outside
1985}
1986
1987//---------------------------------------------------------------------
1988
1989template<class Val, class SV>
1991 const back_insert_iterator& bi)
1992: csv_(bi.csv_),
1993 sv_bi_(bi.sv_bi_)
1994{
1995}
1996
1997
1998//---------------------------------------------------------------------
1999
2000template<class Val, class SV>
2002{
2003 this->flush();
2004}
2005
2006//---------------------------------------------------------------------
2007
2008template<class Val, class SV>
2011{
2012 BM_ASSERT(csv_);
2013 BM_ASSERT(bm::id64_t(csv_->size_) + 1 < bm::id64_t(bm::id_max));
2014
2015 sv_bi_.add_value_no_null(v);
2016 bvector_type* bv_null = sv_bi_.get_null_bvect();
2017 BM_ASSERT(bv_null);
2018 bv_null->set_bit_no_check(csv_->size_);
2019
2020 csv_->max_id_++;
2021 csv_->size_++;
2022}
2023
2024//---------------------------------------------------------------------
2025
2026template<class Val, class SV>
2028{
2029 BM_ASSERT(csv_);
2030 BM_ASSERT(bm::id64_t(csv_->size_) + 1 < bm::id64_t(bm::id_max));
2031
2032 csv_->max_id_++; csv_->size_++;
2033}
2034
2035//---------------------------------------------------------------------
2036
2037template<class Val, class SV>
2040{
2041 BM_ASSERT(csv_);
2042 BM_ASSERT(bm::id64_t(csv_->size_) + count < bm::id64_t(bm::id_max));
2043
2044 csv_->max_id_+=count;
2045 csv_->size_+=count;
2046}
2047
2048//---------------------------------------------------------------------
2049
2050template<class Val, class SV>
2052{
2053 if (sv_bi_.flush())
2054 csv_->in_sync_ = false;
2055}
2056
2057//---------------------------------------------------------------------
2058//
2059//---------------------------------------------------------------------
2060
2061template<class Val, class BV>
2063: csv_(0), pos_(bm::id_max), buf_ptr_(0)
2064{}
2065
2066//---------------------------------------------------------------------
2067
2068template<class Val, class SV>
2071: csv_(it.csv_), pos_(it.pos_), buf_ptr_(0)
2072{}
2073
2074//---------------------------------------------------------------------
2075
2076template<class Val, class SV>
2079 ) BMNOEXCEPT
2080: csv_(csv), buf_ptr_(0)
2081{
2082 BM_ASSERT(csv_);
2083 pos_ = csv_->empty() ? bm::id_max : 0u;
2084}
2085
2086//---------------------------------------------------------------------
2087
2088template<class Val, class SV>
2092: csv_(csv), buf_ptr_(0)
2093{
2094 BM_ASSERT(csv_);
2095 this->go_to(pos);
2096}
2097
2098//---------------------------------------------------------------------
2099
2100template<class Val, class SV>
2102{
2103 pos_ = (!csv_ || pos >= csv_->size()) ? bm::id_max : pos;
2104 buf_ptr_ = 0;
2105}
2106
2107//---------------------------------------------------------------------
2108
2109template<class Val, class SV>
2111{
2112 if (pos_ == bm::id_max) // nothing to do, we are at the end
2113 return false;
2114 ++pos_;
2115 if (pos_ >= csv_->size())
2116 {
2117 this->invalidate();
2118 return false;
2119 }
2120 if (buf_ptr_)
2121 {
2122 ++buf_ptr_;
2123 if (buf_ptr_ - ((value_type*)vbuffer_.data()) >= n_buf_size)
2124 buf_ptr_ = 0;
2125 }
2126 return true;
2127}
2128
2129//---------------------------------------------------------------------
2130
2131template<class Val, class SV>
2134{
2135 BM_ASSERT(this->valid());
2136 value_type v;
2137
2138 if (!buf_ptr_)
2139 {
2140 vbuffer_.reserve(n_buf_size * sizeof(value_type));
2141 tbuffer_.reserve(n_buf_size * sizeof(value_type));
2142 buf_ptr_ = (value_type*)(vbuffer_.data());
2143 value_type* tmp_buf_ptr = (value_type*) (tbuffer_.data());
2144
2145 csv_->decode_buf(buf_ptr_, tmp_buf_ptr, pos_, n_buf_size, true);
2146 }
2147 v = *buf_ptr_;
2148 return v;
2149}
2150
2151//---------------------------------------------------------------------
2152
2153template<class Val, class SV>
2155{
2156 value_type v = value();
2157 if (buf_ptr_)
2158 {
2159 v = *buf_ptr_;
2160 value_type* buf_end = ((value_type*)vbuffer_.data()) + n_buf_size;
2161 while(!v)
2162 {
2163 ++pos_;
2164 if (++buf_ptr_ < buf_end)
2165 v = *buf_ptr_;
2166 else
2167 break;
2168 }
2169 if (pos_ >= csv_->size())
2170 {
2171 pos_ = bm::id_max;
2172 return;
2173 }
2174 if (buf_ptr_ >= buf_end)
2175 buf_ptr_ = 0;
2176 }
2177}
2178
2179//---------------------------------------------------------------------
2180
2181template<class Val, class SV>
2183{
2184 return csv_->is_null(pos_);
2185}
2186
2187
2188//---------------------------------------------------------------------
2189
2190
2191
2192} // namespace bm
2193
2194
2195
2196#endif
Definitions(internal)
#define BMNOEXCEPT
Definition: bmdef.h:82
#define BM_ASSERT
Definition: bmdef.h:139
#define BM_ASSERT_THROW(x, xerrcode)
Definition: bmdef.h:338
Sparse constainer sparse_vector<> for integer types using bit-transposition transform.
Constant iterator designed to enumerate "ON" bits.
Definition: bm.h:603
size_type value() const BMNOEXCEPT
Get current position (value)
Definition: bm.h:659
bool advance() BMNOEXCEPT
Definition: bm.h:680
bool valid() const BMNOEXCEPT
Checks if iterator is still valid.
Definition: bm.h:283
optmode
Optimization mode Every next level means additional checks (better compression vs time)
Definition: bm.h:133
@ opt_compress
compress blocks when possible (GAP/prefix sum)
Definition: bm.h:137
allocator_type::allocator_pool_type allocator_pool_type
Definition: bm.h:118
rs_index< allocator_type > rs_index_type
Definition: bm.h:816
Alloc allocator_type
Definition: bm.h:117
Algorithms for rank compression of bit-vector.
Definition: bmalgo.h:453
void decompress(BV &bv_target, const BV &bv_idx, const BV &bv_src)
Rank decompression.
Definition: bmalgo.h:570
void compress(BV &bv_target, const BV &bv_idx, const BV &bv_src)
Rank compression algorithm based on two palallel iterators/enumerators set of source vector gets re-m...
Definition: bmalgo.h:497
Back insert iterator implements buffered insert, faster than generic access assignment.
void flush()
flush the accumulated buffer
rsc_sparse_vector_type * rsc_sparse_vector_type_ptr
rsc_sparse_vector_type::sparse_vector_type sparse_vector_type
add value to the buffer without changing the NULL vector
back_insert_iterator & operator++(int)
noop
void add(value_type v)
add value to the container
rsc_sparse_vector_type::bvector_type bvector_type
rsc_sparse_vector< Val, SV > rsc_sparse_vector_type
rsc_sparse_vector_type::size_type size_type
rsc_sparse_vector_type::value_type value_type
back_insert_iterator & operator=(value_type v)
push value to the vector
void add_null() BMNOEXCEPT
add NULL (no-value) to the container
sparse_vector_type::back_insert_iterator sparse_vector_bi
Const iterator to traverse the rsc sparse vector.
bool advance() BMNOEXCEPT
advance iterator forward by one
rsc_sparse_vector< Val, SV > rsc_sparse_vector_type
size_type pos() const BMNOEXCEPT
Current position (index) in the vector.
rsc_sparse_vector_type::value_type value_type
bool operator!=(const const_iterator &it) const BMNOEXCEPT
bool operator<=(const const_iterator &it) const BMNOEXCEPT
bool operator>(const const_iterator &it) const BMNOEXCEPT
bm::byte_buffer< allocator_type > buffer_type
void go_to(size_type pos) BMNOEXCEPT
re-position to a specified position
bool operator<(const const_iterator &it) const BMNOEXCEPT
bool operator==(const const_iterator &it) const BMNOEXCEPT
const_iterator & operator++(int)
Advance to the next available value.
bool is_null() const BMNOEXCEPT
Get NULL status.
rsc_sparse_vector_type::bvector_type bvector_type
std::input_iterator_tag iterator_category
rsc_sparse_vector_type * rsc_sparse_vector_type_ptr
bool valid() const BMNOEXCEPT
Returns true if iterator is at a valid position.
void invalidate() BMNOEXCEPT
Invalidate current iterator.
bool operator>=(const const_iterator &it) const BMNOEXCEPT
const_iterator & operator++() BMNOEXCEPT
Advance to the next available value.
rsc_sparse_vector_type::size_type size_type
bvector_type::allocator_type::allocator_pool_type allocator_pool_type
value_type operator*() const
Get current position (value)
value_type value() const
Get current position (value)
bvector_type::allocator_type allocator_type
Reference class to access elements via common [] operator.
bool operator==(const reference &ref) const BMNOEXCEPT
bool is_null() const BMNOEXCEPT
reference(rsc_sparse_vector< Val, SV > &csv, size_type idx) BMNOEXCEPT
Rank-Select compressed sparse vector.
static constexpr bool is_str() BMNOEXCEPT
void clear_all(bool free_mem) BMNOEXCEPT
resize to zero, free memory
static constexpr bool is_compressed() BMNOEXCEPT
various type traits
static unsigned stored_slices()
Number of stored bit-slices (value slices + extra.
bvector_type * bvector_type_ptr
const bmatrix_type & get_bmatrix() const BMNOEXCEPT
bool try_get(size_type idx, value_type &v) const BMNOEXCEPT
get specified element with NOT NULL check
bool resolve(size_type idx, size_type *idx_to) const BMNOEXCEPT
Resolve logical address to access via rank compressed address.
void load_from(const sparse_vector_type &sv_src)
Load compressed vector from a sparse vector (with NULLs)
remap_matrix_type * get_remap_matrix()
bool is_nullable() const
if container supports NULL(unassigned) values (true)
bool resolve_sync(size_type idx, size_type *idx_to) const BMNOEXCEPT
void merge_not_null(rsc_sparse_vector< Val, SV > &csv)
merge two vectors (argument gets destroyed) It is important that both vectors have the same NULL vect...
size_type decode(value_type *arr, size_type idx_from, size_type size, bool zero_mem=true) const
C-style decode.
void freeze()
Turn sparse vector into immutable mode Read-only (immutable) vector uses less memory and allows faste...
void push_back(size_type idx, value_type v)
set specified element with bounds checking and automatic resize
void clear(const bvector_type &bv_idx)
Set vector elements spcified by argument bit-vector to zero Note that set to 0 elements are NOT going...
back_insert_iterator get_back_inserter()
void load_to(sparse_vector_type &sv) const
Exort compressed vector to a sparse vector (with NULLs)
SV::bmatrix_type bmatrix_type
void set(size_type idx, value_type v)
set specified element with bounds checking and automatic resize
bm::heap_matrix< unsigned char, 1, 1, typename bvector_type::allocator_type > remap_matrix_type
unused remap matrix type for compatibility with the sparse serializer
const_iterator end() const BMNOEXCEPT
Provide const iterator access to the end
value_type at(size_type idx) const
access specified element with bounds checking
void optimize(bm::word_t *temp_block=0, typename bvector_type::optmode opt_mode=bvector_type::opt_compress, statistics *stat=0)
run memory optimization for all vector slices
void sync(bool force)
Re-calculate rank-select index for faster access to vector.
size_type count_range_notnull(size_type left, size_type right) const BMNOEXCEPT
size_type effective_vector_max() const BMNOEXCEPT
Always 1 (non-matrix type)
const rs_index_type * get_RS() const BMNOEXCEPT
SV::unsigned_value_type unsigned_value_type
rsc_sparse_vector(bm::null_support null_able=bm::use_null, allocation_policy_type ap=allocation_policy_type(), size_type bv_max_size=bm::id_max, const allocator_type &alloc=allocator_type())
void push_back_null()
push back NULL value
void unsync() BMNOEXCEPT
Unsync the prefix sum table.
bool resolve_range(size_type from, size_type to, size_type *idx_from, size_type *idx_to) const BMNOEXCEPT
size_type effective_size() const BMNOEXCEPT
size of internal dense vector
static unsigned_value_type s2u(value_type v) BMNOEXCEPT
Convert signed value type to unsigned representation.
void set_remap() BMNOEXCEPT
void set_null(size_type idx)
set specified element to NULL RSC vector actually erases element when it is set to NULL (expensive).
void inc_not_null(size_type idx, value_type v)
increment specified element by one, element MUST be NOT NULL Faster than just inc() if element is NUL...
bvector_type_const_ptr get_slice(unsigned i) const BMNOEXCEPT
get access to bit-plane, function checks and creates a plane
void resize(size_type new_size)
change vector size
void resize_internal(size_type sz)
size_type decode_buf(value_type *arr, value_type *arr_buf_tmp, size_type idx_from, size_type size, bool zero_mem=true) const BMNOEXCEPT
C-style decode (variant with external memory) Analog of decode, but requires two arrays.
size_type size_internal() const BMNOEXCEPT
SV::const_iterator sparse_vector_const_iterator
bvector_type_ptr get_create_slice(unsigned i)
constexpr bool is_remap() const BMNOEXCEPT
bool equal(const rsc_sparse_vector< Val, SV > &csv) const BMNOEXCEPT
check if another vector has the same content
value_type get(size_type idx) const BMNOEXCEPT
get specified element without bounds checking
bool empty() const BMNOEXCEPT
return true if vector is empty
const value_type & const_reference
const_iterator get_const_iterator(size_type idx) const BMNOEXCEPT
Get const_itertor re-positioned to specific element.
const bvector_type * bvector_type_const_ptr
size_t remap_size() const BMNOEXCEPT
void sync()
Re-calculate prefix sum table used for rank search (if necessary)
const remap_matrix_type * get_remap_matrix() const
const unsigned char * get_remap_buffer() const BMNOEXCEPT
void set_null(const bvector_type &bv_idx)
Set NULL all elements set as 1 in the argument vector.
bool is_null(size_type idx) const BMNOEXCEPT
test if specified element is NULL
static value_type u2s(unsigned_value_type v) BMNOEXCEPT
bmatrix_type & get_bmatrix() BMNOEXCEPT
unsigned char * init_remap_buffer() BMNOEXCEPT
void mark_null_idx(unsigned null_idx) BMNOEXCEPT
void calc_stat(struct rsc_sparse_vector< Val, SV >::statistics *st) const BMNOEXCEPT
Calculates memory statistics.
bvector_type::allocator_type allocator_type
rsc_sparse_vector(rsc_sparse_vector< Val, SV > &&csv) BMNOEXCEPT
const bvector_type * get_null_bvector() const BMNOEXCEPT
Get bit-vector of assigned values (or NULL)
const sparse_vector_type & get_sv() const BMNOEXCEPT
access dense vector
rsc_sparse_vector< Val, SV > & operator=(rsc_sparse_vector< Val, SV > &&csv) BMNOEXCEPT
void clear() BMNOEXCEPT
resize to zero, free memory
void push_back(value_type v)
add element with automatic resize
unsigned effective_slices() const BMNOEXCEPT
bool try_get_sync(size_type idx, value_type &v) const BMNOEXCEPT
get specified element with NOT NULL check Caller guarantees that the vector is in sync mode (RS-index...
bvector_type::rs_index_type rs_index_type
bool is_sync() const BMNOEXCEPT
returns true if prefix sum table is in sync with the vector
bool find_rank(size_type rank, size_type &idx) const BMNOEXCEPT
find position of compressed element by its rank
size_type size() const BMNOEXCEPT
return size of the vector
bvector_type::enumerator bvector_enumerator_type
static unsigned slices() BMNOEXCEPT
get total number of bit-slices in the vector
void push_back_null(size_type count)
push back specified amount of NULL values
bool in_sync() const BMNOEXCEPT
returns true if prefix sum table is in sync with the vector
void copy_range(const rsc_sparse_vector< Val, SV > &csv, size_type left, size_type right)
copy range of values from another sparse vector
void inc(size_type idx)
increment specified element by one
size_type gather(value_type *arr, const size_type *idx, size_type *idx_tmp_buf, size_type size, bm::sort_order sorted_idx) const
Gather elements to a C-style array.
void push_back_no_check(size_type idx, value_type v)
bool is_ro() const BMNOEXCEPT
Returns true if vector is read-only.
bvector_type::allocation_policy allocation_policy_type
SV::bvector_type bvector_type
void inc(size_type idx, value_type v)
increment specified element by one
void sync_size() BMNOEXCEPT
recalculate size to exclude tail NULL elements After this call size() will return the true size of th...
const_iterator begin() const
Provide const iterator access to container content
sparse vector de-serializer
algorithms for sparse_vector scan/search
sort_order
Sort order declaration.
Definition: bmconst.h:203
null_support
NULL-able value support.
Definition: bmconst.h:228
@ BM_SORTED
input set is sorted (ascending order)
Definition: bmconst.h:205
@ BM_UNKNOWN
sort order unknown
Definition: bmconst.h:207
@ use_null
support "non-assigned" or "NULL" logic
Definition: bmconst.h:229
@ no_null
do not support NULL values
Definition: bmconst.h:230
Definition: bm.h:78
const unsigned id_max
Definition: bmconst.h:109
unsigned int word_t
Definition: bmconst.h:39
void aligned_free(void *ptr) BMNOEXCEPT
Aligned free.
Definition: bmalloc.h:465
void * aligned_new_malloc(size_t size)
Aligned malloc (unlike classic malloc it throws bad_alloc exception)
Definition: bmalloc.h:437
BMFORCEINLINE void xor_swap(W &x, W &y) BMNOEXCEPT
XOR swap two variables.
Definition: bmutil.h:534
unsigned long long int id64_t
Definition: bmconst.h:35
const unsigned rs3_border0
Definition: bmconst.h:119
Structure with statistical information about memory allocation footprint, serialization projection,...
Definition: bmfunc.h:56
size_t memory_used
memory usage for all blocks and service tables
Definition: bmfunc.h:62
memory allocation policy
Definition: bm.h:805
bm::bvector bvector_type
Definition: xsample07a.cpp:94