BitMagic-C++
bmsparsevec_util.h
Go to the documentation of this file.
1#ifndef BMSPARSEVEC_UTIL_H__INCLUDED__
2#define BMSPARSEVEC_UTIL_H__INCLUDED__
3/*
4Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5
6Licensed under the Apache License, Version 2.0 (the "License");
7you may not use this file except in compliance with the License.
8You may obtain a copy of the License at
9
10 http://www.apache.org/licenses/LICENSE-2.0
11
12Unless required by applicable law or agreed to in writing, software
13distributed under the License is distributed on an "AS IS" BASIS,
14WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15See the License for the specific language governing permissions and
16limitations under the License.
17
18For more information please visit: http://bitmagic.io
19*/
20
21#include <memory.h>
22#include <stdexcept>
23
24#ifndef BM__H__INCLUDED__
25// BitMagic utility headers do not include main "bm.h" declaration
26// #include "bm.h" or "bm64.h" explicitly
27# error missing include (bm.h or bm64.h)
28#endif
29
30#include "bmserial.h"
31#include "bmdef.h"
32
33namespace bm
34{
35
36
37
38/*!
39 \brief Bit-bector prefix sum address resolver using bit-vector prefix sum
40 as a space compactor.
41
42 @internal
43*/
44template<class BV>
46{
47public:
48 typedef BV bvector_type;
49 typedef typename BV::size_type size_type;
51public:
55
57 {
58 if (this != &addr_res)
59 {
60 addr_bv_ = addr_res.addr_bv_;
61 in_sync_ = addr_res.in_sync_;
62 if (in_sync_)
63 {
64 rs_index_->copy_from(*addr_res.rs_index_);
65 }
66 }
67 return *this;
68 }
69
70 /*!
71 \brief Move content from the argument address resolver
72 */
74
75 /*!
76 \brief Resolve id to integer id (address)
77
78 Resolve id to address with full sync and range checking
79
80 \param id_from - input id to resolve
81 \param id_to - output id
82
83 \return true if id is known and resolved successfully
84 */
85 bool resolve(size_type id_from, size_type* id_to) const BMNOEXCEPT;
86
87 /*!
88 \brief Resolve id to integer id (address) without sync check
89
90 If prefix sum table is not sync - unexpected behavior
91
92 \param id_from - input id to resolve
93 \param id_to - output id
94
95 \return true if id is known and resolved successfully
96 */
97 bool get(size_type id_from, size_type* id_to) const BMNOEXCEPT;
98
99 /*!
100 \brief Set id (bit) to address resolver
101 */
102 void set(size_type id_from);
103
104 /*!
105 \brief Re-calculate prefix sum table
106 \param force - force recalculation even if it is already recalculated
107 */
108 void calc_prefix_sum(bool force = false);
109
110 /*!
111 \brief Re-calculate prefix sum table (same as calc_prefix_sum)
112 \param force - force recalculation even if it is already recalculated
113 @sa calc_prefix_sum
114 */
115 void sync(bool force = false) { calc_prefix_sum(force); }
116
117 /*!
118 \brief returns true if prefix sum table is in sync with the vector
119 */
120 bool in_sync() const { return in_sync_; }
121
122 /*!
123 \brief Unsync the prefix sum table
124 */
125 void unsync() { in_sync_ = false; }
126
127 /*!
128 \brief Get const reference to the underlying bit-vector
129 */
130 const bvector_type& get_bvector() const { return addr_bv_; }
131
132 /*!
133 \brief Get writable reference to the underlying bit-vector
134
135 Access to underlying vector assumes modification and
136 loss of prefix sum acceleration structures.
137 Need to call sync() at the end of transaction.
138 */
139 bvector_type& get_bvector() { unsync(); return addr_bv_; }
140
141 /*!
142 \brief optimize underlying bit-vector
143 */
144 void optimize(bm::word_t* temp_block = 0);
145
146 /*!
147 \brief equality comparison
148 */
149 bool equal(const bvps_addr_resolver& addr_res) const BMNOEXCEPT;
150
151protected:
154
155private:
156 bvector_type addr_bv_; ///< bit-vector for id translation
157 rs_index_type* rs_index_; ///< rank translation index
158 bool in_sync_; ///< flag if prefix sum is in-sync with vector
159};
160
161
162/*!
163 \brief sparse vector based address resolver
164 (no space compactor, just bit-plane compressors provided by sparse_vector)
165
166 @internal
167*/
168template<class SV>
170{
171public:
175public:
177 sv_addr_resolver(const sv_addr_resolver& addr_res);
178
179 /*!
180 \brief Resolve id to integer id (address)
181
182 \param id_from - input id to resolve
183 \param id_to - output id
184
185 \return true if id is known and resolved successfully
186 */
187 bool resolve(size_type id_from, size_type* id_to) const;
188
189 /*!
190 \brief Resolve id to integer id (address)
191
192 \param id_from - input id to resolve
193 \param id_to - output id
194
195 \return true if id is known and resolved successfully
196 */
197 bool get(size_type id_from, size_type* id_to) const;
198
199 /*!
200 \brief Set id (bit) to address resolver
201 */
202 void set(size_type id_from);
203
204 /*!
205 \brief optimize underlying sparse vectors
206 */
207 void optimize(bm::word_t* temp_block = 0);
208
209 /*!
210 \brief Get const reference to the underlying bit-vector of set values
211 */
212 const bvector_type& get_bvector() const { return set_flags_bv_; }
213
214private:
215 bvector_type set_flags_bv_; ///< bit-vector of set flags
216 sparse_vector_type addr_sv_; ///< sparse vector for address map
217 size_type max_addr_; ///< maximum allocated address/index
218};
219
220
221/**
222 \brief Compressed (sparse collection of objects)
223 @internal
224*/
225template<class Value, class BV>
227{
228public:
229 typedef BV bvector_type;
230 typedef typename BV::size_type size_type;
233 typedef Value value_type;
234 typedef Value mapped_type;
235 typedef std::vector<value_type> container_type;
237
238public:
240
241 /**
242 Add new value to compressed collection.
243 Must be added in sorted key order (growing).
244
245 Unsorted will not be added!
246
247 \return true if value was added (does not violate sorted key order)
248 */
249 bool push_back(key_type key, const value_type& val);
250
251 /**
252 find and return const associated value (with bounds/presense checking)
253 */
254 const value_type& at(key_type key) const;
255
256 /**
257 find and return associated value (with bounds/presense checking)
258 */
260
261 /** Checkpoint method to prepare collection for reading
262 */
263 void sync();
264
265 /** perform memory optimizations/compression
266 */
267 void optimize(bm::word_t* temp_block=0);
268
269 /** Resolve key address (index) in the dense vector
270 */
271 bool resolve(key_type key, address_type* addr) const;
272
273 /** Get access to associated value by resolved address
274 */
275 const value_type& get(address_type addr) const;
276
277 /** Get address resolver
278 */
279 const address_resolver_type& resolver() const { return addr_res_; }
280
281 /** Get address resolver
282 */
284
285 /** size of collection
286 */
287 size_t size() const { return dense_vect_.size(); }
288
289 /** perform equality comparison with another collection
290 */
291 bool equal(const compressed_collection<Value, BV>& ccoll) const;
292
293 /** return dense container for direct access
294 (this should be treated as an internal function designed for deserialization)
295 */
297
298protected:
299 void throw_range_error(const char* err_msg) const;
300
301protected:
302 address_resolver_type addr_res_; ///< address resolver
303 container_type dense_vect_; ///< compressed space container
304 key_type last_add_; ///< last added element
305};
306
307/**
308 \brief Compressed (sparse collection of objects)
309 @internal
310*/
311template<class BV>
313 public compressed_collection<typename serializer<BV>::buffer, BV>
314{
315public:
316 typedef BV bvector_type;
319
320 /// collection statistics
322 {
323 size_t memory_used; ///< total capacity
324 size_t max_serialize_mem; ///< memory needed for serialization
325 };
326public:
327
328 /// move external buffer into collection
329 ///
331 {
332 bool added = this->push_back(key, buffer_type());
333 if (!added)
334 return added;
335 buffer_type& buf = this->at(key);
336 buf.swap(buffer);
337 return added;
338 }
339
340 /// compute statistics on memory consumption
341 ///
342 void calc_stat(statistics* st) const
343 {
344 BM_ASSERT(st);
345
346 // evaluate address resolver consumption
347 //
348 typename BV::statistics bv_st;
349 const BV& addr_bv = this->addr_res_.get_bvector();
350 addr_bv.calc_stat(&bv_st);
351 st->memory_used = bv_st.memory_used;
352 st->max_serialize_mem = bv_st.max_serialize_mem;
353
354 // sum-up all buffers
355 for (size_t i = 0; i < this->dense_vect_.size(); ++i)
356 {
357 const buffer_type& buf = this->dense_vect_.at(i);
358 st->memory_used += buf.capacity();
359 st->max_serialize_mem += buf.size();
360 } // for i
361
362 // header needs
363 size_t h_size = 2 + 1 + ((this->dense_vect_.size()+1) * 8);
364 st->max_serialize_mem += h_size;
365
366 // 10% extra on top (safety) for serialization
367 size_t extra_mem = (st->max_serialize_mem / 10);
368 if (!extra_mem)
369 extra_mem = 4096;
370 st->max_serialize_mem += extra_mem;
371 }
372};
373
374
375
376//---------------------------------------------------------------------
377
378template<class BV>
380: rs_index_(0), in_sync_(false)
381{
382 addr_bv_.init();
384}
385
386//---------------------------------------------------------------------
387
388template<class BV>
390{
391 free_rs_index();
392}
393
394//---------------------------------------------------------------------
395
396template<class BV>
398{
399 if (rs_index_)
400 return;
401 rs_index_ =
403 rs_index_ = new(rs_index_) rs_index_type(); // placement new
404}
405
406//---------------------------------------------------------------------
407
408template<class BV>
410{
411 if (rs_index_)
412 {
413 rs_index_->~rs_index();
414 bm::aligned_free(rs_index_);
415 rs_index_ = 0;
416 }
417}
418
419//---------------------------------------------------------------------
420
421template<class BV>
423: addr_bv_(addr_res.addr_bv_),
424 in_sync_(addr_res.in_sync_)
425{
426 rs_index_ = 0;
428
429 if (in_sync_)
430 {
431 rs_index_->copy_from(*addr_res.rs_index_);
432 }
433 addr_bv_.init();
434}
435
436//---------------------------------------------------------------------
437
438
439template<class BV>
441{
442 if (this != &addr_res)
443 {
444 addr_bv_.move_from(addr_res.addr_bv_);
445 in_sync_ = addr_res.in_sync_;
446 if (in_sync_)
447 {
448 free_rs_index();
449 rs_index_ = addr_res.rs_index_;
450 addr_res.rs_index_ = 0;
451 }
452 }
453 else
454 {
455 rs_index_ = 0; in_sync_ = false;
456 }
457}
458
459//---------------------------------------------------------------------
460
461template<class BV>
463 size_type* id_to) const BMNOEXCEPT
464{
465 BM_ASSERT(id_to);
466 if (in_sync_)
467 {
468 *id_to = addr_bv_.count_to_test(id_from, *rs_index_);
469 }
470 else // slow access
471 {
472 bool found = addr_bv_.test(id_from);
473 if (!found)
474 {
475 *id_to = 0;
476 }
477 else
478 {
479 *id_to = addr_bv_.count_range(0, id_from);
480 }
481 }
482 return (bool) *id_to;
483}
484
485//---------------------------------------------------------------------
486
487template<class BV>
489 size_type* id_to) const BMNOEXCEPT
490{
491 BM_ASSERT(id_to);
492 BM_ASSERT(in_sync_);
493
494 *id_to = addr_bv_.count_to_test(id_from, *rs_index_);
495 return (bool)*id_to;
496}
497
498//---------------------------------------------------------------------
499
500template<class BV>
502{
503 BM_ASSERT(!addr_bv_.test(id_from));
504
505 in_sync_ = false;
506 addr_bv_.set(id_from);
507}
508
509
510//---------------------------------------------------------------------
511
512
513template<class BV>
515{
516 if (in_sync_ && force == false)
517 return; // nothing to do
518
519 addr_bv_.build_rs_index(rs_index_); // compute popcount prefix list
520 in_sync_ = true;
521}
522
523//---------------------------------------------------------------------
524
525template<class BV>
527{
528 addr_bv_.optimize(temp_block);
529}
530
531//---------------------------------------------------------------------
532
533template<class BV>
535 const bvps_addr_resolver& addr_res) const BMNOEXCEPT
536{
537 return addr_bv_.equal(addr_res.addr_bv_);
538}
539
540//---------------------------------------------------------------------
541//
542//---------------------------------------------------------------------
543
544
545
546template<class SV>
548: max_addr_(0)
549{
550 set_flags_bv_.init();
551}
552
553//---------------------------------------------------------------------
554
555template<class SV>
557: set_flags_bv_(addr_res.set_flags_bv_),
558 addr_sv_(addr_res.addr_sv_),
559 max_addr_(addr_res.max_addr_)
560{
561}
562
563//---------------------------------------------------------------------
564
565template<class SV>
567{
568 BM_ASSERT(id_to);
569
570 bool found = set_flags_bv_.test(id_from);
571 *id_to = found ? addr_sv_.at(id_from) : 0;
572 return found;
573}
574
575//---------------------------------------------------------------------
576
577template<class SV>
579{
580 bool found = set_flags_bv_.test(id_from);
581 if (!found)
582 {
583 set_flags_bv_.set(id_from);
584 ++max_addr_;
585 addr_sv_.set(id_from, max_addr_);
586 }
587}
588
589//---------------------------------------------------------------------
590
591template<class SV>
593{
594 set_flags_bv_.optimize(temp_block);
595 addr_sv_.optimize(temp_block);
596}
597
598//---------------------------------------------------------------------
599
600
601template<class Value, class BV>
603: last_add_(0)
604{
605}
606
607//---------------------------------------------------------------------
608
609template<class Value, class BV>
611{
612 if (dense_vect_.empty()) // adding first one
613 {
614 }
615 else
616 if (key <= last_add_)
617 {
618 BM_ASSERT(0);
619 return false;
620 }
621
622 addr_res_.set(key);
623 dense_vect_.push_back(val);
624 last_add_ = key;
625 return true;
626}
627
628//---------------------------------------------------------------------
629
630template<class Value, class BV>
632{
633 addr_res_.sync();
634}
635
636//---------------------------------------------------------------------
637
638template<class Value, class BV>
640{
641 addr_res_.optimize(temp_block);
642}
643
644//---------------------------------------------------------------------
645
646template<class Value, class BV>
648 address_type* addr) const
649{
650 bool found = addr_res_.resolve(key, addr);
651 *addr -= found;
652 return found;
653}
654
655//---------------------------------------------------------------------
656
657
658template<class Value, class BV>
661{
662 return dense_vect_.at(addr);
663}
664
665//---------------------------------------------------------------------
666
667template<class Value, class BV>
670{
671 address_type idx;
672 bool found = addr_res_.resolve(key, &idx);
673 if (!found)
674 {
675 throw_range_error("compressed collection item not found");
676 }
677 return get(idx-1);
678}
679
680//---------------------------------------------------------------------
681
682template<class Value, class BV>
685{
686 address_type idx;
687 bool found = addr_res_.resolve(key, &idx);
688 if (!found)
689 {
690 throw_range_error("compressed collection item not found");
691 }
692 return dense_vect_.at(idx-1);
693}
694
695
696//---------------------------------------------------------------------
697
698template<class Value, class BV>
700{
701#ifndef BM_NO_STL
702 throw std::range_error(err_msg);
703#else
704 BM_ASSERT_THROW(false, BM_ERR_RANGE);
705#endif
706}
707
708//---------------------------------------------------------------------
709
710template<class Value, class BV>
712 const compressed_collection<Value, BV>& ccoll) const
713{
714 const bvector_type& bv = addr_res_.get_bvector();
715 const bvector_type& bva = ccoll.addr_res_.get_bvector();
716
717 int cmp = bv.compare(bva);
718 if (cmp != 0)
719 return false;
720 BM_ASSERT(dense_vect_.size() == ccoll.dense_vect_.size());
721 for (size_t i = 0; i < dense_vect_.size(); ++i)
722 {
723 const value_type& v = dense_vect_[i];
724 const value_type& va = ccoll.dense_vect_[i];
725 if (!(v == va))
726 return false;
727 }
728 return true;
729}
730
731//---------------------------------------------------------------------
732
733
734} // namespace bm
735
736
737
738#endif
Definitions(internal)
#define BMNOEXCEPT
Definition: bmdef.h:82
#define BM_ASSERT
Definition: bmdef.h:139
#define BM_ASSERT_THROW(x, xerrcode)
Definition: bmdef.h:338
Serialization / compression of bvector<>. Set theoretical operations on compressed BLOBs.
Bitvector Bit-vector container with runtime compression of bits.
Definition: bm.h:115
bvector_size_type size_type
Definition: bm.h:121
rs_index< allocator_type > rs_index_type
Definition: bm.h:816
Bit-bector prefix sum address resolver using bit-vector prefix sum as a space compactor.
void unsync()
Unsync the prefix sum table.
void sync(bool force=false)
Re-calculate prefix sum table (same as calc_prefix_sum)
void calc_prefix_sum(bool force=false)
Re-calculate prefix sum table.
bool in_sync() const
returns true if prefix sum table is in sync with the vector
bvps_addr_resolver(const bvps_addr_resolver &addr_res)
bvector_type::rs_index_type rs_index_type
bvps_addr_resolver & operator=(const bvps_addr_resolver &addr_res)
bool resolve(size_type id_from, size_type *id_to) const BMNOEXCEPT
Resolve id to integer id (address)
void optimize(bm::word_t *temp_block=0)
optimize underlying bit-vector
bool get(size_type id_from, size_type *id_to) const BMNOEXCEPT
Resolve id to integer id (address) without sync check.
void set(size_type id_from)
Set id (bit) to address resolver.
void move_from(bvps_addr_resolver &addr_res) BMNOEXCEPT
Move content from the argument address resolver.
bool equal(const bvps_addr_resolver &addr_res) const BMNOEXCEPT
equality comparison
bvector_type & get_bvector()
Get writable reference to the underlying bit-vector.
const bvector_type & get_bvector() const
Get const reference to the underlying bit-vector.
Compressed (sparse collection of objects)
void calc_stat(statistics *st) const
compute statistics on memory consumption
serializer< BV >::buffer buffer_type
bool move_buffer(typename parent_type::key_type key, buffer_type &buffer)
move external buffer into collection
compressed_collection< typename serializer< BV >::buffer, BV > parent_type
Compressed (sparse collection of objects)
address_resolver_type & resolver()
Get address resolver.
bool push_back(key_type key, const value_type &val)
Add new value to compressed collection.
void optimize(bm::word_t *temp_block=0)
perform memory optimizations/compression
container_type & container()
return dense container for direct access (this should be treated as an internal function designed for...
bm::bvps_addr_resolver< bvector_type > address_resolver_type
void sync()
Checkpoint method to prepare collection for reading.
value_type & at(key_type key)
find and return associated value (with bounds/presense checking)
size_t size() const
size of collection
key_type last_add_
last added element
container_type dense_vect_
compressed space container
bool equal(const compressed_collection< Value, BV > &ccoll) const
perform equality comparison with another collection
const value_type & at(key_type key) const
find and return const associated value (with bounds/presense checking)
const value_type & get(address_type addr) const
Get access to associated value by resolved address.
void throw_range_error(const char *err_msg) const
address_resolver_type addr_res_
address resolver
bool resolve(key_type key, address_type *addr) const
Resolve key address (index) in the dense vector.
const address_resolver_type & resolver() const
Get address resolver.
std::vector< value_type > container_type
byte_buffer< allocator_type > buffer
Definition: bmserial.h:85
sparse vector based address resolver (no space compactor, just bit-plane compressors provided by spar...
bvector_type::size_type size_type
const bvector_type & get_bvector() const
Get const reference to the underlying bit-vector of set values.
SV::bvector_type bvector_type
bool get(size_type id_from, size_type *id_to) const
Resolve id to integer id (address)
bool resolve(size_type id_from, size_type *id_to) const
Resolve id to integer id (address)
void set(size_type id_from)
Set id (bit) to address resolver.
void optimize(bm::word_t *temp_block=0)
optimize underlying sparse vectors
Definition: bm.h:78
unsigned int word_t
Definition: bmconst.h:39
void aligned_free(void *ptr) BMNOEXCEPT
Aligned free.
Definition: bmalloc.h:465
void * aligned_new_malloc(size_t size)
Aligned malloc (unlike classic malloc it throws bad_alloc exception)
Definition: bmalloc.h:437
size_t max_serialize_mem
memory needed for serialization
bm::bvector bvector_type
Definition: xsample07a.cpp:94