BitMagic-C++
bmalgo.h
Go to the documentation of this file.
1 #ifndef BMALGO__H__INCLUDED__
2 #define BMALGO__H__INCLUDED__
3 /*
4 Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 For more information please visit: http://bitmagic.io
19 */
20 
21 /*! \file bmalgo.h
22  \brief Algorithms for bvector<> (main include)
23 */
24 
25 #include "bm.h"
26 #include "bmfunc.h"
27 #include "bmdef.h"
28 
29 #include "bmalgo_impl.h"
30 
31 
32 
33 namespace bm
34 {
35 
36 #define BM_SCANNER_OP(x) \
37  if (0 != (block = blk_blk[j+x])) \
38  { \
39  if (BM_IS_GAP(block)) \
40  { \
41  bm::for_each_gap_blk(BMGAP_PTR(block), (r+j+x)*bm::bits_in_block,\
42  bit_functor); \
43  } \
44  else \
45  { \
46  block = BLOCK_ADDR_SAN(block);\
47  bm::for_each_bit_blk(block, (r+j+x)*bm::bits_in_block,bit_functor); \
48  } \
49  }
50 
51 
52 /**
53  @brief bit-vector visitor scanner to traverse each 1 bit using C++ visitor
54 
55  @param bv - bit vector to scan
56  @param bit_functor (should support add_bits() and add_range() methods
57 
58  \ingroup setalgo
59 */
60 template<class BV, class Func>
61 void for_each_bit(const BV& bv,
62  Func& bit_functor)
63 {
64  const typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
65  bm::word_t*** blk_root = bman.top_blocks_root();
66 
67  if (!blk_root)
68  return;
69 
70  unsigned tsize = bman.top_block_size();
71  for (unsigned i = 0; i < tsize; ++i)
72  {
73  bm::word_t** blk_blk = blk_root[i];
74  if (!blk_blk)
75  {
76  continue;
77  }
78  const bm::word_t* block;
79  unsigned r = i * bm::set_array_size;
80  unsigned j = 0;
81  do
82  {
83  #ifdef BM64_AVX2
84  if (!avx2_test_all_zero_wave(blk_blk + j))
85  {
86  BM_SCANNER_OP(0)
87  BM_SCANNER_OP(1)
88  BM_SCANNER_OP(2)
89  BM_SCANNER_OP(3)
90  }
91  j += 4;
92  #elif defined(BM64_SSE4)
93  if (!sse42_test_all_zero_wave(blk_blk + j))
94  {
95  BM_SCANNER_OP(0)
96  BM_SCANNER_OP(1)
97  }
98  j += 2;
99  #else
100  BM_SCANNER_OP(0)
101  ++j;
102  #endif
103 
104  } while (j < bm::set_array_size);
105 
106  } // for i
107 }
108 
109 #undef BM_SCANNER_OP
110 
111 /**
112  @brief bit-vector visitor scanner to traverse each 1 bit using C callback
113 
114  @param bv - bit vector to scan
115  @param handle_ptr - handle to private memory used by callback
116  @param callback_ptr - callback function
117 
118  \ingroup setalgo
119 
120  @sa bit_visitor_callback_type
121 */
122 template<class BV>
123 void visit_each_bit(const BV& bv,
124  void* handle_ptr,
125  bit_visitor_callback_type callback_ptr)
126 {
127  // private adaptor for C-style callbacks
128  struct callback_adaptor
129  {
130  callback_adaptor(void* h, bit_visitor_callback_type cb_func)
131  : handle_(h), func_(cb_func)
132  {}
133 
134  void add_bits(bm::id_t offset, const unsigned char* bits, unsigned size)
135  {
136  for (unsigned i = 0; i < size; ++i)
137  func_(handle_, offset + bits[i]);
138  }
139  void add_range(bm::id_t offset, unsigned size)
140  {
141  for (unsigned i = 0; i < size; ++i)
142  func_(handle_, offset + i);
143  }
144 
145  void* handle_;
147  };
148 
149  callback_adaptor func(handle_ptr, callback_ptr);
150  bm::for_each_bit(bv, func);
151 }
152 
153 
154 /**
155  Algorithms for rank compression of bit-vector
156 
157  1. Source vector (bv_src) is a subset of index vector (bv_idx)
158  2. As a subset it can be collapsed using bit-rank method, where each position
159  in the source vector is defined by population count (range) [0..index_position] (count_range())
160  As a result all integer set of source vector gets re-mapped in
161  accord with the index vector.
162 
163  \ingroup setalgo
164 */
165 template<typename BV>
167 {
168 public:
169  typedef BV bvector_type;
170  typedef typename BV::rs_index_type rs_index_type;
172  {
173  n_buffer_cap = 1024
174  };
175 public:
176 
177  /**
178  Rank decompression
179  */
180  void decompress(BV& bv_target, const BV& bv_idx, const BV& bv_src);
181 
182  /**
183  Rank compression algorithm based on two palallel iterators/enumerators set of source
184  vector gets re-mapped in accord with the index/rank vector.
185 
186  \param bv_target - target bit-vector
187  \param bv_idx - index (rank) vector used for address recalculation
188  \param bv_src - source vector for re-mapping
189  */
190  void compress(BV& bv_target, const BV& bv_idx, const BV& bv_src);
191 
192  /**
193  \brief Source vector priority + index based rank
194 
195  @sa compress
196  */
197  void compress_by_source(BV& bv_target,
198  const BV& bv_idx,
199  const rs_index_type& bc_idx,
200  const BV& bv_src);
201 };
202 
203 
204 // ------------------------------------------------------------------------
205 //
206 // ------------------------------------------------------------------------
207 
208 
209 template<class BV>
210 void rank_compressor<BV>::compress(BV& bv_target,
211  const BV& bv_idx,
212  const BV& bv_src)
213 {
214  bv_target.clear();
215  bv_target.init();
216 
217  if (&bv_idx == &bv_src)
218  {
219  bv_target = bv_src;
220  return;
221  }
222  bm::id_t ibuffer[n_buffer_cap];
223  bm::id_t b_size;
224 
225  typedef typename BV::enumerator enumerator_t;
226  enumerator_t en_s = bv_src.first();
227  enumerator_t en_i = bv_idx.first();
228 
229  bm::id_t r_idx = b_size = 0;
230  bm::id_t i, s;
231 
232  for (; en_i.valid(); )
233  {
234  if (!en_s.valid())
235  break;
236  i = *en_i; s = *en_s;
237 
238  BM_ASSERT(s >= i);
239  BM_ASSERT(bv_idx.test(i));
240 
241  if (i == s)
242  {
243  ibuffer[b_size++] = r_idx++;
244  if (b_size == n_buffer_cap)
245  {
246  bm::combine_or(bv_target, ibuffer+0, ibuffer+b_size);
247  b_size ^= b_size; // = 0
248  }
249  ++en_i; ++en_s;
250  continue;
251  }
252  BM_ASSERT(s > i);
253 
254  bm::id_t dist = s - i;
255  if (dist >= 64) // sufficiently far away, jump
256  {
257  bm::id_t r_dist = bv_idx.count_range(i + 1, s);
258  r_idx += r_dist;
259  en_i.go_to(s);
260  BM_ASSERT(en_i.valid());
261  }
262  else // small distance, iterate to close the gap
263  {
264  for (; s > i; ++r_idx)
265  {
266  ++en_i;
267  i = *en_i;
268  } // for
269  BM_ASSERT(en_i.valid());
270  }
271  } // for
272 
273  if (b_size)
274  {
275  bm::combine_or(bv_target, ibuffer+0, ibuffer+b_size);
276  }
277 
278 }
279 
280 // ------------------------------------------------------------------------
281 
282 
283 template<class BV>
285  const BV& bv_idx,
286  const BV& bv_src)
287 {
288  bv_target.clear();
289  bv_target.init();
290 
291  if (&bv_idx == &bv_src)
292  {
293  bv_target = bv_src;
294  return;
295  }
296 
297  bm::id_t r_idx, i, s, b_size;
298  bm::id_t ibuffer[n_buffer_cap];
299 
300  b_size = r_idx = 0;
301 
302  typedef typename BV::enumerator enumerator_t;
303  enumerator_t en_s = bv_src.first();
304  enumerator_t en_i = bv_idx.first();
305  for (; en_i.valid(); )
306  {
307  if (!en_s.valid())
308  break;
309  s = *en_s;
310  i = *en_i;
311  if (s == r_idx)
312  {
313  ibuffer[b_size++] = i;
314  if (b_size == n_buffer_cap)
315  {
316  bm::combine_or(bv_target, ibuffer+0, ibuffer+b_size);
317  b_size ^= b_size; // = 0
318  }
319  ++en_i; ++en_s; ++r_idx;
320  continue;
321  }
322  // source is "faster" than index, need to re-align
323  BM_ASSERT(s > r_idx);
324  unsigned rank = s - r_idx + 1u;
325  unsigned new_pos = 0;
326 
327  if (rank < 256)
328  {
329  en_i.skip(s - r_idx);
330  BM_ASSERT(en_i.valid());
331  new_pos = *en_i;
332  }
333  else
334  {
335  bv_idx.find_rank(rank, i, new_pos);
336  BM_ASSERT(new_pos);
337  en_i.go_to(new_pos);
338  BM_ASSERT(en_i.valid());
339  }
340 
341  r_idx = s;
342  ibuffer[b_size++] = new_pos;
343  if (b_size == n_buffer_cap)
344  {
345  bm::combine_or(bv_target, ibuffer+0, ibuffer+b_size);
346  b_size ^= b_size; // = 0
347  }
348  ++en_i; ++en_s; ++r_idx;
349 
350  } // for en
351 
352  if (b_size)
353  {
354  bm::combine_or(bv_target, ibuffer+0, ibuffer+b_size);
355  }
356 }
357 
358 // ------------------------------------------------------------------------
359 
360 template<class BV>
362  const BV& bv_idx,
363  const rs_index_type& bc_idx,
364  const BV& bv_src)
365 {
366  /// Rank compressor visitor (functor)
367  /// @internal
368  struct visitor_func
369  {
370  visitor_func(bvector_type& bv_out,
371  const bvector_type& bv_index,
372  const rs_index_type& bc_index)
373  : bv_target_(bv_out),
374  bv_index_(bv_index),
375  bc_index_(bc_index)
376  {}
377 
378  void add_bits(bm::id_t arr_offset, const unsigned char* bits, unsigned bits_size)
379  {
380  for (unsigned i = 0; i < bits_size; ++i)
381  {
382  bm::id_t idx = arr_offset + bits[i];
383  BM_ASSERT(bv_index_.test(idx));
384 
385  bm::id_t r_idx = bv_index_.count_to(idx, bc_index_) - 1;
386  bv_target_.set_bit_no_check(r_idx);
387  }
388  }
389  void add_range(bm::id_t arr_offset, unsigned sz)
390  {
391  for (unsigned i = 0; i < sz; ++i)
392  {
393  bm::id_t idx = i + arr_offset;
394  BM_ASSERT(bv_index_.test(idx));
395 
396  bm::id_t r_idx = bv_index_.count_to(idx, bc_index_) - 1;
397  bv_target_.set_bit_no_check(r_idx);
398  }
399  }
400 
401  bvector_type& bv_target_;
402  const bvector_type& bv_index_;
403  const rs_index_type& bc_index_;
404  };
405  // ------------------------------------
406 
407  bv_target.clear();
408  bv_target.init();
409 
410  if (&bv_idx == &bv_src)
411  {
412  bv_target = bv_src;
413  return;
414  }
415  visitor_func func(bv_target, bv_idx, bc_idx);
416  bm::for_each_bit(bv_src, func);
417 }
418 
419 
420 
421 } // bm
422 
423 #include "bmundef.h"
424 
425 #endif
Compressed bit-vector bvector<> container, set algebraic methods, traversal iterators.
BV::rs_index_type rs_index_type
Definition: bmalgo.h:170
void compress_by_source(BV &bv_target, const BV &bv_idx, const rs_index_type &bc_idx, const BV &bv_src)
Source vector priority + index based rank.
Definition: bmalgo.h:361
Definition: bm.h:69
Algorithms for rank compression of bit-vector.
Definition: bmalgo.h:166
pre-processor un-defines to avoid global space pollution (internal)
int(* bit_visitor_callback_type)(void *handle_ptr, bm::id_t bit_idx)
Callback type to visit (callback style) bits in bit-vector(s)
Definition: bm.h:65
void combine_or(BV &bv, It first, It last)
OR Combine bitvector and the iterable sequence.
Definition: bmalgo_impl.h:1134
BMFORCEINLINE bool sse42_test_all_zero_wave(const void *ptr)
check if wave of pointers is all NULL
Definition: bmsse4.h:584
#define BM_SCANNER_OP(x)
Definition: bmalgo.h:36
unsigned int word_t
Definition: bmconst.h:36
void for_each_bit(const BV &bv, Func &bit_functor)
bit-vector visitor scanner to traverse each 1 bit using C++ visitor
Definition: bmalgo.h:61
Algorithms for bvector<>
void decompress(BV &bv_target, const BV &bv_idx, const BV &bv_src)
Rank decompression.
Definition: bmalgo.h:284
Bit manipulation primitives (internal)
Definitions(internal)
const unsigned set_array_size
Definition: bmconst.h:82
unsigned int id_t
Definition: bmconst.h:35
void compress(BV &bv_target, const BV &bv_idx, const BV &bv_src)
Rank compression algorithm based on two palallel iterators/enumerators set of source vector gets re-m...
Definition: bmalgo.h:210
#define BM_ASSERT
Definition: bmdef.h:116
void visit_each_bit(const BV &bv, void *handle_ptr, bit_visitor_callback_type callback_ptr)
bit-vector visitor scanner to traverse each 1 bit using C callback
Definition: bmalgo.h:123