BitMagic-C++
bmaggregator.h
Go to the documentation of this file.
1 #ifndef BMAGGREGATOR__H__INCLUDED__
2 #define BMAGGREGATOR__H__INCLUDED__
3 /*
4 Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 For more information please visit: http://bitmagic.io
19 */
20 
21 /*! \file bmaggregator.h
22  \brief Algorithms for fast aggregation of N bvectors
23 */
24 
25 #include "bm.h"
26 #include "bmfunc.h"
27 #include "bmdef.h"
28 
29 #include "bmalgo_impl.h"
30 
31 
32 namespace bm
33 {
34 
35 
36 /**
37  Algorithms for fast aggregation of a group of bit-vectors
38 
39  Current implementation can aggregate up to max_aggregator_cap vectors
40  (TODO: remove this limitation)
41 
42  Algorithms of this class use cache locality optimizations and efficient
43  on cases, wehen we need to apply the same logical operation (aggregate)
44  more than 2x vectors.
45 
46  TARGET = BV1 or BV2 or BV3 or BV4 ...
47 
48  @ingroup setalgo
49 */
50 template<typename BV>
52 {
53 public:
54  typedef BV bvector_type;
55  typedef const bvector_type* bvector_type_const_ptr;
57 
59 
60  /// Maximum aggregation capacity in one pass
61  enum max_size
62  {
64  };
65 
66  /// Codes for aggregation operations which can be pipelined for efficient execution
67  ///
68  enum operation
69  {
72  };
73 
75  {
80  };
81 
82 public:
83 
84  /*! @name Construction and setup */
85  //@{
86  aggregator();
87  ~aggregator();
88 
89  /**
90  \brief set on-the-fly bit-block compression
91  By default aggregator does not try to optimize result, but in some cases
92  it can be quite a lot faster than calling bvector<>::optimize() later
93  (because block data sits in CPU cache).
94 
95  \param opt - optimization mode (full compression by default)
96  */
99  { opt_mode_ = opt; }
100  //@}
101 
102 
103  // -----------------------------------------------------------------------
104 
105  /*! @name Methods to setup argument groups and run operations on groups */
106  //@{
107 
108  /**
109  Attach source bit-vector to a argument group (0 or 1).
110  Arg group 1 used for fused operations like (AND-SUB)
111  \param bv - input bit-vector pointer to attach
112  \param agr_group - input argument group (0 - default, 1 - fused op)
113 
114  @return current arg group size (0 if vector was not added (empty))
115  @sa reset
116  */
117  unsigned add(const bvector_type* bv, unsigned agr_group = 0);
118 
119  /**
120  Reset aggregate groups, forget all attached vectors
121  */
122  void reset();
123 
124  /**
125  Aggregate added group of vectors using logical OR
126  Operation does NOT perform an explicit reset of arg group(s)
127 
128  \param bv_target - target vector (input is arg group 0)
129 
130  @sa add, reset
131  */
132  void combine_or(bvector_type& bv_target);
133 
134  /**
135  Aggregate added group of vectors using logical AND
136  Operation does NOT perform an explicit reset of arg group(s)
137 
138  \param bv_target - target vector (input is arg group 0)
139 
140  @sa add, reset
141  */
142  void combine_and(bvector_type& bv_target);
143 
144  /**
145  Aggregate added group of vectors using fused logical AND-SUB
146  Operation does NOT perform an explicit reset of arg group(s)
147 
148  \param bv_target - target vector (input is arg group 0(AND), 1(SUB) )
149 
150  \return true if anything was found
151 
152  @sa add, reset
153  */
154  bool combine_and_sub(bvector_type& bv_target);
155 
156 
157  /**
158  Aggregate added group of vectors using fused logical AND-SUB
159  Operation does NOT perform an explicit reset of arg group(s)
160  Operation can terminate early if anything was found.
161 
162  \param bv_target - target vector (input is arg group 0(AND), 1(SUB) )
163  \param any - if true, search result will terminate of first found result
164 
165  \return true if anything was found
166 
167  @sa add, reset
168  */
169  bool combine_and_sub(bvector_type& bv_target, bool any);
170 
171  bool find_first_and_sub(bm::id_t& idx);
172 
173 
174  /**
175  Aggregate added group of vectors using SHIFT-RIGHT and logical AND
176  Operation does NOT perform an explicit reset of arg group(s)
177 
178  \param bv_target - target vector (input is arg group 0)
179 
180  @return bool if anything was found
181 
182  @sa add, reset
183  */
184  void combine_shift_right_and(bvector_type& bv_target);
185 
186  /**
187  Set search hint for the range, where results needs to be searched
188  (experimental for internal use).
189  */
190  void set_range_hint(bm::id_t from, bm::id_t to);
191 
192  //@}
193 
194  // -----------------------------------------------------------------------
195 
196  /*! @name Logical operations (C-style interface) */
197  //@{
198 
199  /**
200  Aggregate group of vectors using logical OR
201  \param bv_target - target vector
202  \param bv_src - array of pointers on bit-vector aggregate arguments
203  \param src_size - size of bv_src (how many vectors to aggregate)
204  */
205  void combine_or(bvector_type& bv_target,
206  const bvector_type_const_ptr* bv_src, unsigned src_size);
207 
208  /**
209  Aggregate group of vectors using logical AND
210  \param bv_target - target vector
211  \param bv_src - array of pointers on bit-vector aggregate arguments
212  \param src_size - size of bv_src (how many vectors to aggregate)
213  */
214  void combine_and(bvector_type& bv_target,
215  const bvector_type_const_ptr* bv_src, unsigned src_size);
216 
217  /**
218  Fusion aggregate group of vectors using logical AND MINUS another set
219 
220  \param bv_target - target vector
221  \param bv_src_and - array of pointers on bit-vectors for AND
222  \param src_and_size - size of AND group
223  \param bv_src_sub - array of pointers on bit-vectors for SUBstract
224  \param src_sub_size - size of SUB group
225  \param any - flag if caller needs any results asap (incomplete results)
226 
227  \return true when found
228  */
229  bool combine_and_sub(bvector_type& bv_target,
230  const bvector_type_const_ptr* bv_src_and, unsigned src_and_size,
231  const bvector_type_const_ptr* bv_src_sub, unsigned src_sub_size,
232  bool any);
233 
234  bool find_first_and_sub(bm::id_t& idx,
235  const bvector_type_const_ptr* bv_src_and, unsigned src_and_size,
236  const bvector_type_const_ptr* bv_src_sub, unsigned src_sub_size);
237 
238  /**
239  Fusion aggregate group of vectors using SHIFT right with AND
240 
241  \param bv_target - target vector
242  \param bv_src_and - array of pointers on bit-vectors for AND masking
243  \param src_and_size - size of AND group
244  \param any - flag if caller needs any results asap (incomplete results)
245 
246  \return true when found
247  */
248  bool combine_shift_right_and(bvector_type& bv_target,
249  const bvector_type_const_ptr* bv_src_and, unsigned src_and_size,
250  bool any);
251 
252 
253  //@}
254 
255  // -----------------------------------------------------------------------
256 
257  /*! @name Horizontal Logical operations used for tests (C-style interface) */
258  //@{
259 
260  /**
261  Horizontal OR aggregation (potentially slower) method.
262  \param bv_target - target vector
263  \param bv_src - array of pointers on bit-vector aggregate arguments
264  \param src_size - size of bv_src (how many vectors to aggregate)
265  */
266  void combine_or_horizontal(bvector_type& bv_target,
267  const bvector_type_const_ptr* bv_src, unsigned src_size);
268  /**
269  Horizontal AND aggregation (potentially slower) method.
270  \param bv_target - target vector
271  \param bv_src - array of pointers on bit-vector aggregate arguments
272  \param src_size - size of bv_src (how many vectors to aggregate)
273  */
274  void combine_and_horizontal(bvector_type& bv_target,
275  const bvector_type_const_ptr* bv_src, unsigned src_size);
276 
277  /**
278  Horizontal AND-SUB aggregation (potentially slower) method.
279  \param bv_target - target vector
280  \param bv_src_and - array of pointers on bit-vector to AND aggregate
281  \param src_and_size - size of bv_src_and
282  \param bv_src_sub - array of pointers on bit-vector to SUB aggregate
283  \param src_sub_size - size of bv_src_sub
284  */
285  void combine_and_sub_horizontal(bvector_type& bv_target,
286  const bvector_type_const_ptr* bv_src_and,
287  unsigned src_and_size,
288  const bvector_type_const_ptr* bv_src_sub,
289  unsigned src_sub_size);
290 
291  //@}
292 
293 
294  // -----------------------------------------------------------------------
295 
296  /*! @name Pipeline operations */
297  //@{
298 
299  /** Get current operation code */
300  int get_operation() const { return operation_; }
301 
302  /** Set operation code for the aggregator */
303  void set_operation(int op_code) { operation_ = op_code; }
304 
305  /**
306  Prepare operation, create internal resources, analyse dependencies.
307  Prerequisites are: that operation is set and all argument vectors are added
308  */
309  void stage(bm::word_t* temp_block);
310 
311  /**
312  Run a step of current arrgegation operation
313  */
314  operation_status run_step(unsigned i, unsigned j);
315 
316  operation_status get_operation_status() const { return operation_status_; }
317 
318  const bvector_type* get_target() const { return bv_target_; }
319 
320  bm::word_t* get_temp_block() { return ar_->tb1; }
321 
322  //@}
323 
324 protected:
326 
327 
328  void combine_or(unsigned i, unsigned j,
329  bvector_type& bv_target,
330  const bvector_type_const_ptr* bv_src, unsigned src_size);
331 
332  void combine_and(unsigned i, unsigned j,
333  bvector_type& bv_target,
334  const bvector_type_const_ptr* bv_src, unsigned src_size);
335 
336  digest_type combine_and_sub(unsigned i, unsigned j,
337  const bvector_type_const_ptr* bv_src_and, unsigned src_and_size,
338  const bvector_type_const_ptr* bv_src_sub, unsigned src_sub_size);
339 
340  void prepare_shift_right_and(bvector_type& bv_target,
341  const bvector_type_const_ptr* bv_src, unsigned src_size);
342 
343  bool combine_shift_right_and(unsigned i, unsigned j,
344  bvector_type& bv_target,
345  const bvector_type_const_ptr* bv_src, unsigned src_size);
346 
347  static
348  unsigned resize_target(bvector_type& bv_target,
349  const bvector_type_const_ptr* bv_src,
350  unsigned src_size,
351  bool init_clear = true);
352 
353  static
354  unsigned max_top_blocks(const bvector_type_const_ptr* bv_src, unsigned src_size);
355 
356  bm::word_t* sort_input_blocks_or(const bvector_type_const_ptr* bv_src,
357  unsigned src_size,
358  unsigned i, unsigned j,
359  unsigned* arg_blk_count,
360  unsigned* arg_blk_gap_count);
361 
362  bm::word_t* sort_input_blocks_and(const bvector_type_const_ptr* bv_src,
363  unsigned src_size,
364  unsigned i, unsigned j,
365  unsigned* arg_blk_count,
366  unsigned* arg_blk_gap_count);
367 
368 
369  bool process_bit_blocks_or(blocks_manager_type& bman_target,
370  unsigned i, unsigned j, unsigned block_count);
371 
372  void process_gap_blocks_or(unsigned block_count);
373 
374  digest_type process_bit_blocks_and(unsigned block_count, digest_type digest);
375 
376  digest_type process_gap_blocks_and(unsigned block_count, digest_type digest);
377 
378  bool test_gap_blocks_and(unsigned block_count, unsigned bit_idx);
379  bool test_gap_blocks_sub(unsigned block_count, unsigned bit_idx);
380 
381  digest_type process_bit_blocks_sub(unsigned block_count, digest_type digest);
382 
383  digest_type process_gap_blocks_sub(unsigned block_count, digest_type digest);
384 
385  static
386  unsigned find_effective_sub_block_size(unsigned i,
387  const bvector_type_const_ptr* bv_src,
388  unsigned src_size);
389 
390  bool any_carry_overs(unsigned co_size) const;
391 
392  /**
393  @return carry over
394  */
395  bool process_shift_right_and(const bm::word_t* arg_blk,
396  digest_type& digest,
397  unsigned carry_over);
398 
399  const bm::word_t* get_arg_block(const bvector_type_const_ptr* bv_src,
400  unsigned k, unsigned i, unsigned j);
401 
402  bvector_type* check_create_target();
403 
404 private:
405  /// Memory arena for logical operations
406  ///
407  /// @internal
408  struct arena
409  {
411  BM_DECLARE_TEMP_BLOCK(tb_opt); ///< temp block for results optimization
412  const bm::word_t* v_arg_or_blk[max_aggregator_cap]; ///< source blocks list (OR)
413  const bm::gap_word_t* v_arg_or_blk_gap[max_aggregator_cap]; ///< source GAP blocks list (OR)
414  const bm::word_t* v_arg_and_blk[max_aggregator_cap]; ///< source blocks list (AND)
415  const bm::gap_word_t* v_arg_and_blk_gap[max_aggregator_cap]; ///< source GAP blocks list (AND)
416 
417  bvector_type_const_ptr arg_bv0[max_aggregator_cap]; ///< arg group 0
418  bvector_type_const_ptr arg_bv1[max_aggregator_cap]; ///< arg group 1
419  unsigned char carry_overs_[max_aggregator_cap]; /// carry over flags
420  };
421 
422  aggregator(const aggregator&) = delete;
423  aggregator& operator=(const aggregator&) = delete;
424 
425 private:
426  arena* ar_; ///< data arena ptr (heap allocated)
427  unsigned arg_group0_size = 0;
428  unsigned arg_group1_size = 0;
429  allocator_pool_type pool_; ///< pool for operations with cyclic mem.use
430 
431  bm::word_t* temp_blk_= 0; ///< external temp block ptr
432  int operation_ = 0; ///< operation code (default: not defined)
433  operation_status operation_status_ = op_undefined;
434  bvector_type* bv_target_ = 0; ///< target bit-vector
435  unsigned top_block_size_ = 0; ///< operation top block (i) size
436 
437  // search range setting (hint) [from, to]
438  bool range_set_ = false; ///< range flag
439  bm::id_t range_from_ = bm::id_max; ///< search from
440  bm::id_t range_to_ = bm::id_max; ///< search to
441 
442  typename bvector_type::optmode opt_mode_;
443 
444 };
445 
446 
447 
448 
449 // ------------------------------------------------------------------------
450 //
451 // ------------------------------------------------------------------------
452 
453 /**
454  Experimental method ro run multiple aggregators in sync
455 
456  Pipeleine algorithm walts the sequence of iterators (pointers on aggregators)
457  and run them all via aggregator<>::run_step() method
458 
459  @param first - iterator (pointer on aggregator)
460  @param last - iterator (pointer on aggregator)
461  @ingroup setalgo
462 */
463 template<typename Agg, typename It>
464 void aggregator_pipeline_execute(It first, It last)
465 {
466  bm::word_t* tb = (*first)->get_temp_block();
467 
468  int pipeline_size = 0;
469  for (It it = first; it != last; ++it, ++pipeline_size)
470  {
471  Agg& agg = *(*it);
472  agg.stage(tb);
473  }
474  for (unsigned i = 0; i < bm::set_array_size; ++i)
475  {
476  unsigned j = 0;
477  do
478  {
479  // run all aggregators for the [i,j] coordinate
480  unsigned w = 0;
481  for (It it = first; it != last; ++it, ++w)
482  {
483  Agg& agg = *(*it);
484  auto op_st = agg.get_operation_status();
485  if (op_st != Agg::op_done)
486  {
487  op_st = agg.run_step(i, j);
488  pipeline_size -= (op_st == Agg::op_done);
489  }
490  } // for it
491  if (pipeline_size <= 0)
492  return;
493 
494  } while (++j < bm::set_array_size);
495 
496  } // for i
497 }
498 
499 
500 // ------------------------------------------------------------------------
501 //
502 // ------------------------------------------------------------------------
503 
504 
505 template<typename BV>
507 : opt_mode_(bvector_type::opt_none)
508 {
509  ar_ = (arena*) bm::aligned_new_malloc(sizeof(arena));
510 }
511 
512 // ------------------------------------------------------------------------
513 
514 template<typename BV>
516 {
517  BM_ASSERT(ar_);
518  bm::aligned_free(ar_);
519  delete bv_target_;
520 }
521 
522 // ------------------------------------------------------------------------
523 
524 template<typename BV>
526 {
527  arg_group0_size = arg_group1_size = operation_ = top_block_size_ = 0;
528  operation_status_ = op_undefined;
529  range_set_ = false;
530  range_from_ = range_to_ = bm::id_max;
531 }
532 
533 // ------------------------------------------------------------------------
534 
535 template<typename BV>
537 {
538  range_from_ = from; range_to_ = to;
539  range_set_ = true;
540 }
541 
542 // ------------------------------------------------------------------------
543 
544 template<typename BV>
546 {
547  if (!bv_target_)
548  {
549  bv_target_ = new bvector_type();
550  bv_target_->init();
551  }
552  return bv_target_;
553 }
554 
555 // ------------------------------------------------------------------------
556 
557 template<typename BV>
558 unsigned aggregator<BV>::add(const bvector_type* bv, unsigned agr_group)
559 {
560  BM_ASSERT_THROW(agr_group <= 1, BM_ERR_RANGE);
561  BM_ASSERT(agr_group <= 1);
562 
563  if (agr_group) // arg group 1
564  {
565  BM_ASSERT(arg_group1_size < max_aggregator_cap);
566  BM_ASSERT_THROW(arg_group1_size < max_aggregator_cap, BM_ERR_RANGE);
567 
568  if (!bv)
569  return arg_group1_size;
570 
571  ar_->arg_bv1[arg_group1_size++] = bv;
572  return arg_group1_size;
573  }
574  else // arg group 0
575  {
576  BM_ASSERT(arg_group0_size < max_aggregator_cap);
577  BM_ASSERT_THROW(arg_group0_size < max_aggregator_cap, BM_ERR_RANGE);
578 
579  if (!bv)
580  return arg_group0_size;
581 
582  ar_->arg_bv0[arg_group0_size++] = bv;
583  return arg_group0_size;
584  }
585 }
586 
587 // ------------------------------------------------------------------------
588 
589 template<typename BV>
591 {
592  combine_or(bv_target, ar_->arg_bv0, arg_group0_size);
593 }
594 
595 // ------------------------------------------------------------------------
596 
597 template<typename BV>
599 {
600  combine_and(bv_target, ar_->arg_bv0, arg_group0_size);
601 }
602 
603 // ------------------------------------------------------------------------
604 
605 template<typename BV>
607 {
608  return combine_and_sub(bv_target,
609  ar_->arg_bv0, arg_group0_size,
610  ar_->arg_bv1, arg_group1_size, false);
611 }
612 
613 // ------------------------------------------------------------------------
614 
615 template<typename BV>
617 {
618  return combine_and_sub(bv_target,
619  ar_->arg_bv0, arg_group0_size,
620  ar_->arg_bv1, arg_group1_size, any);
621 }
622 
623 // ------------------------------------------------------------------------
624 
625 template<typename BV>
627 {
628  return find_first_and_sub(idx,
629  ar_->arg_bv0, arg_group0_size,
630  ar_->arg_bv1, arg_group1_size);
631 }
632 
633 // ------------------------------------------------------------------------
634 
635 template<typename BV>
637 {
638  combine_shift_right_and(bv_target, ar_->arg_bv0, arg_group0_size, false);
639 }
640 
641 // ------------------------------------------------------------------------
642 
643 template<typename BV>
645  const bvector_type_const_ptr* bv_src, unsigned src_size)
646 {
647  BM_ASSERT_THROW(src_size < max_aggregator_cap, BM_ERR_RANGE);
648  if (!src_size)
649  {
650  bv_target.clear();
651  return;
652  }
653 
654  unsigned top_blocks = resize_target(bv_target, bv_src, src_size);
655  for (unsigned i = 0; i < top_blocks; ++i)
656  {
657  unsigned set_array_max = find_effective_sub_block_size(i, bv_src, src_size);
658  unsigned j = 0;
659  do
660  {
661  combine_or(i, j, bv_target, bv_src, src_size);
662  } while (++j < set_array_max);
663  } // for i
664 }
665 
666 // ------------------------------------------------------------------------
667 
668 template<typename BV>
670  const bvector_type_const_ptr* bv_src, unsigned src_size)
671 {
672  BM_ASSERT_THROW(src_size < max_aggregator_cap, BM_ERR_RANGE);
673 
674  if (!src_size)
675  {
676  bv_target.clear();
677  return;
678  }
679 
680  unsigned top_blocks = resize_target(bv_target, bv_src, src_size);
681 
682  for (unsigned i = 0; i < top_blocks; ++i)
683  {
684  unsigned set_array_max = find_effective_sub_block_size(i, bv_src, src_size);
685  unsigned j = 0;
686  do
687  {
688  combine_and(i, j, bv_target, bv_src, src_size);
689  } while (++j < set_array_max);
690  } // for i
691 }
692 
693 // ------------------------------------------------------------------------
694 
695 template<typename BV>
697  const bvector_type_const_ptr* bv_src_and, unsigned src_and_size,
698  const bvector_type_const_ptr* bv_src_sub, unsigned src_sub_size,
699  bool any)
700 {
701  BM_ASSERT_THROW(src_and_size < max_aggregator_cap, BM_ERR_RANGE);
702  BM_ASSERT_THROW(src_sub_size < max_aggregator_cap, BM_ERR_RANGE);
703 
704  bool global_found = false;
705 
706  if (!bv_src_and || !src_and_size)
707  {
708  bv_target.clear();
709  return false;
710  }
711 
712  unsigned top_blocks = resize_target(bv_target, bv_src_and, src_and_size);
713  unsigned top_blocks2 = resize_target(bv_target, bv_src_sub, src_sub_size, false);
714 
715  if (top_blocks2 > top_blocks)
716  top_blocks = top_blocks2;
717 
718  for (unsigned i = 0; i < top_blocks; ++i)
719  {
720  unsigned set_array_max = find_effective_sub_block_size(i, bv_src_and, src_and_size);
721  if (src_sub_size)
722  {
723  unsigned set_array_max2 =
724  find_effective_sub_block_size(i, bv_src_sub, src_sub_size);
725  if (set_array_max2 > set_array_max)
726  set_array_max = set_array_max2;
727  }
728  unsigned j = 0;
729  do
730  {
731  digest_type digest = combine_and_sub(i, j,
732  bv_src_and, src_and_size,
733  bv_src_sub, src_sub_size);
734  bool found = digest;
735  if (found)
736  {
737  blocks_manager_type& bman_target = bv_target.get_blocks_manager();
738  bman_target.opt_copy_bit_block(i, j, ar_->tb1,
739  opt_mode_, ar_->tb_opt);
740  if (any)
741  return found;
742  }
743  global_found |= found;
744  } while (++j < set_array_max);
745  } // for i
746  return global_found;
747 }
748 
749 // ------------------------------------------------------------------------
750 
751 template<typename BV>
753  const bvector_type_const_ptr* bv_src_and, unsigned src_and_size,
754  const bvector_type_const_ptr* bv_src_sub, unsigned src_sub_size)
755 {
756  BM_ASSERT_THROW(src_and_size < max_aggregator_cap, BM_ERR_RANGE);
757  BM_ASSERT_THROW(src_sub_size < max_aggregator_cap, BM_ERR_RANGE);
758 
759  if (!bv_src_and || !src_and_size)
760  return false;
761 
762  unsigned top_blocks = max_top_blocks(bv_src_sub, src_sub_size);
763  unsigned top_blocks2 = max_top_blocks(bv_src_sub, src_sub_size);
764 
765  if (top_blocks2 > top_blocks)
766  top_blocks = top_blocks2;
767 
768  // compute range blocks coordinates
769  //
770  unsigned nblock_from = unsigned(range_from_ >> bm::set_block_shift);
771  unsigned nblock_to = unsigned(range_to_ >> bm::set_block_shift);
772  unsigned top_from = nblock_from >> bm::set_array_shift;
773  unsigned top_to = nblock_to >> bm::set_array_shift;
774 
775  if (range_set_)
776  {
777  if (nblock_from == nblock_to) // one block search
778  {
779  unsigned i = top_from;
780  unsigned j = nblock_from & bm::set_array_mask;
781  digest_type digest = combine_and_sub(i, j,
782  bv_src_and, src_and_size,
783  bv_src_sub, src_sub_size);
784  if (digest)
785  {
786  unsigned block_bit_idx = 0;
787  bool found = bm::bit_find_first(ar_->tb1, &block_bit_idx, digest);
788  BM_ASSERT(found);
789  idx = bm::block_to_global_index(i, j, block_bit_idx);
790  return found;
791  }
792  return false;
793  }
794 
795  if (top_to < top_blocks)
796  top_blocks = top_to+1;
797  }
798  else
799  {
800  top_from = 0;
801  }
802 
803  for (unsigned i = top_from; i < top_blocks; ++i)
804  {
805  unsigned set_array_max = bm::set_array_size;
806  unsigned j = 0;
807 
808  if (range_set_)
809  {
810  if (i == top_from)
811  {
812  j = nblock_from & bm::set_array_mask;
813  }
814  if (i == top_to)
815  {
816  set_array_max = 1 + (nblock_to & bm::set_array_mask);
817  }
818  }
819  else
820  {
821  set_array_max = find_effective_sub_block_size(i, bv_src_and, src_and_size);
822  if (src_sub_size)
823  {
824  unsigned set_array_max2 =
825  find_effective_sub_block_size(i, bv_src_sub, src_sub_size);
826  if (set_array_max2 > set_array_max)
827  set_array_max = set_array_max2;
828  }
829  }
830  do
831  {
832  digest_type digest = combine_and_sub(i, j,
833  bv_src_and, src_and_size,
834  bv_src_sub, src_sub_size);
835  if (digest)
836  {
837  unsigned block_bit_idx = 0;
838  bool found = bm::bit_find_first(ar_->tb1, &block_bit_idx, digest);
839  BM_ASSERT(found);
840  idx = bm::block_to_global_index(i, j, block_bit_idx);
841  return found;
842  }
843  } while (++j < set_array_max);
844  } // for i
845  return false;
846 }
847 
848 // ------------------------------------------------------------------------
849 
850 template<typename BV>
851 unsigned
853  const bvector_type_const_ptr* bv_src,
854  unsigned src_size)
855 {
856  // quick hack to avoid scanning large, arrays, where such scan can be quite
857  // expensive by itself (this makes this function approximate)
858  if (src_size > 32)
859  return bm::set_array_size;
860 
861  unsigned max_size = 1;
862  for (unsigned k = 0; k < src_size; ++k)
863  {
864  const bvector_type* bv = bv_src[k];
865  BM_ASSERT(bv);
866  const typename bvector_type::blocks_manager_type& bman_arg =
867  bv->get_blocks_manager();
868  const bm::word_t* const* blk_blk_arg = bman_arg.get_topblock(i);
869  if (!blk_blk_arg)
870  continue;
871 
872  for (unsigned j = bm::set_array_size - 1; j > max_size; --j)
873  {
874  if (blk_blk_arg[j])
875  {
876  max_size = j;
877  break;
878  }
879  }
880  } // for k
881  ++max_size;
882  BM_ASSERT(max_size <= bm::set_array_size);
883 
884  return max_size;
885 }
886 
887 // ------------------------------------------------------------------------
888 
889 template<typename BV>
890 void aggregator<BV>::combine_or(unsigned i, unsigned j,
891  bvector_type& bv_target,
892  const bvector_type_const_ptr* bv_src,
893  unsigned src_size)
894 {
895  typename bvector_type::blocks_manager_type& bman_target = bv_target.get_blocks_manager();
896 
897  unsigned arg_blk_count = 0;
898  unsigned arg_blk_gap_count = 0;
899  bm::word_t* blk =
900  sort_input_blocks_or(bv_src, src_size, i, j,
901  &arg_blk_count, &arg_blk_gap_count);
902 
903  BM_ASSERT(blk == 0 || blk == FULL_BLOCK_FAKE_ADDR);
904 
905  if (blk == FULL_BLOCK_FAKE_ADDR) // nothing to do - golden block(!)
906  {
907  bman_target.check_alloc_top_subblock(i);
908  bman_target.set_block_ptr(i, j, blk);
909  }
910  else
911  {
912  blk = ar_->tb1;
913  if (arg_blk_count || arg_blk_gap_count)
914  {
915  bool all_one =
916  process_bit_blocks_or(bman_target, i, j, arg_blk_count);
917  if (!all_one)
918  {
919  if (arg_blk_gap_count)
920  {
921  process_gap_blocks_or(arg_blk_gap_count);
922  }
923  // we have some results, allocate block and copy from temp
924  bman_target.opt_copy_bit_block(i, j, ar_->tb1,
925  opt_mode_, ar_->tb_opt);
926  }
927  }
928  }
929 }
930 
931 // ------------------------------------------------------------------------
932 
933 template<typename BV>
934 void aggregator<BV>::combine_and(unsigned i, unsigned j,
935  bvector_type& bv_target,
936  const bvector_type_const_ptr* bv_src,
937  unsigned src_size)
938 {
939  BM_ASSERT(src_size);
940 
941  typename bvector_type::blocks_manager_type& bman_target = bv_target.get_blocks_manager();
942 
943  unsigned arg_blk_count = 0;
944  unsigned arg_blk_gap_count = 0;
945  bm::word_t* blk =
946  sort_input_blocks_and(bv_src, src_size,
947  i, j,
948  &arg_blk_count, &arg_blk_gap_count);
949 
950  BM_ASSERT(blk == 0 || blk == FULL_BLOCK_FAKE_ADDR);
951 
952  if (!blk) // nothing to do - golden block(!)
953  return;
954  if (arg_blk_count || arg_blk_gap_count)
955  {
956  // AND bit-blocks
957  //
958  bm::id64_t digest = ~0ull;
959  digest = process_bit_blocks_and(arg_blk_count, digest);
960  if (!digest)
961  return;
962 
963  // AND all GAP blocks (if any)
964  //
965  if (arg_blk_gap_count)
966  {
967  digest =
968  process_gap_blocks_and(arg_blk_gap_count, digest);
969  }
970  if (digest) // some results
971  {
972  // we have some results, allocate block and copy from temp
973  bman_target.opt_copy_bit_block(i, j, ar_->tb1,
974  opt_mode_, ar_->tb_opt);
975  }
976  }
977 }
978 
979 // ------------------------------------------------------------------------
980 
981 template<typename BV>
983 aggregator<BV>::combine_and_sub(unsigned i, unsigned j,
984  const bvector_type_const_ptr* bv_src_and, unsigned src_and_size,
985  const bvector_type_const_ptr* bv_src_sub, unsigned src_sub_size)
986 {
987  BM_ASSERT(src_and_size);
988  unsigned arg_blk_and_count = 0;
989  unsigned arg_blk_and_gap_count = 0;
990  unsigned arg_blk_sub_count = 0;
991  unsigned arg_blk_sub_gap_count = 0;
992 
993  bm::word_t* blk = sort_input_blocks_and(bv_src_and, src_and_size,
994  i, j,
995  &arg_blk_and_count, &arg_blk_and_gap_count);
996  BM_ASSERT(blk == 0 || blk == FULL_BLOCK_FAKE_ADDR);
997  if (!blk || !(arg_blk_and_count | arg_blk_and_gap_count))
998  return 0; // nothing to do - golden block(!)
999 
1000  if (src_sub_size)
1001  {
1002  blk = sort_input_blocks_or(bv_src_sub, src_sub_size,
1003  i, j,
1004  &arg_blk_sub_count, &arg_blk_sub_gap_count);
1005  BM_ASSERT(blk == 0 || blk == FULL_BLOCK_FAKE_ADDR);
1006  if (blk == FULL_BLOCK_FAKE_ADDR)
1007  return 0; // nothing to do - golden block(!)
1008  }
1009  digest_type digest = ~0ull;
1010 
1011 
1012  // AND-SUB bit-blocks
1013  //
1014  digest = process_bit_blocks_and(arg_blk_and_count, digest);
1015  if (!digest)
1016  return digest;
1017  digest = process_bit_blocks_sub(arg_blk_sub_count, digest);
1018  if (!digest)
1019  return digest;
1020 
1021  // AND all GAP block
1022  //
1023  digest =
1024  process_gap_blocks_and(arg_blk_and_gap_count, digest);
1025  if (!digest)
1026  return digest;
1027 
1028  if (arg_blk_sub_gap_count)
1029  {
1030  digest =
1031  process_gap_blocks_sub(arg_blk_sub_gap_count, digest);
1032  }
1033 
1034  return digest;
1035 }
1036 
1037 // ------------------------------------------------------------------------
1038 
1039 template<typename BV>
1040 void aggregator<BV>::process_gap_blocks_or(unsigned arg_blk_gap_count)
1041 {
1042  bm::word_t* blk = ar_->tb1;
1043  for (unsigned k = 0; k < arg_blk_gap_count; ++k)
1044  bm::gap_add_to_bitset(blk, ar_->v_arg_or_blk_gap[k]);
1045 }
1046 
1047 // ------------------------------------------------------------------------
1048 
1049 template<typename BV>
1051 aggregator<BV>::process_gap_blocks_and(unsigned arg_blk_gap_count,
1052  digest_type digest)
1053 {
1054  bm::word_t* blk = ar_->tb1;
1055  bool single_bit_found;
1056  unsigned single_bit_idx;
1057  for (unsigned k = 0; k < arg_blk_gap_count; ++k)
1058  {
1059  bm::gap_and_to_bitset(blk, ar_->v_arg_and_blk_gap[k], digest);
1060  digest = bm::update_block_digest0(blk, digest);
1061  if (!digest)
1062  {
1064  break;
1065  }
1066  single_bit_found = bm::bit_find_first_if_1(blk, &single_bit_idx, digest);
1067  if (single_bit_found)
1068  {
1069  for (++k; k < arg_blk_gap_count; ++k)
1070  {
1071  bool b = bm::gap_test_unr(ar_->v_arg_and_blk_gap[k], single_bit_idx);
1072  if (!b)
1073  return 0; // AND 0 causes result to turn 0
1074  }
1075  break;
1076  }
1077  }
1078  return digest;
1079 }
1080 
1081 // ------------------------------------------------------------------------
1082 
1083 template<typename BV>
1085 aggregator<BV>::process_gap_blocks_sub(unsigned arg_blk_gap_count,
1086  digest_type digest)
1087 {
1088  bm::word_t* blk = ar_->tb1;
1089  bool single_bit_found;
1090  unsigned single_bit_idx;
1091  for (unsigned k = 0; k < arg_blk_gap_count; ++k)
1092  {
1093  bm::gap_sub_to_bitset(blk, ar_->v_arg_or_blk_gap[k], digest);
1094  digest = bm::update_block_digest0(blk, digest);
1095  if (!digest)
1096  {
1098  break;
1099  }
1100  // check if logical operation reduced to a corner case of one single bit
1101  single_bit_found = bm::bit_find_first_if_1(blk, &single_bit_idx, digest);
1102  if (single_bit_found)
1103  {
1104  for (++k; k < arg_blk_gap_count; ++k)
1105  {
1106  bool b = bm::gap_test_unr(ar_->v_arg_or_blk_gap[k], single_bit_idx);
1107  if (b)
1108  return 0; // AND-NOT causes search result to turn 0
1109  } // for k
1110  break;
1111  }
1112  } // for k
1113  return digest;
1114 }
1115 
1116 // ------------------------------------------------------------------------
1117 
1118 template<typename BV>
1119 bool aggregator<BV>::test_gap_blocks_and(unsigned arg_blk_gap_count,
1120  unsigned bit_idx)
1121 {
1122  unsigned b = 1;
1123  for (unsigned i = 0; i < arg_blk_gap_count && b; ++i)
1124  {
1125  b = bm::gap_test_unr(ar_->v_arg_and_blk_gap[i], bit_idx);
1126  }
1127  return b;
1128 }
1129 
1130 // ------------------------------------------------------------------------
1131 
1132 template<typename BV>
1133 bool aggregator<BV>::test_gap_blocks_sub(unsigned arg_blk_gap_count,
1134  unsigned bit_idx)
1135 {
1136  unsigned b = 1;
1137  for (unsigned i = 0; i < arg_blk_gap_count; ++i)
1138  {
1139  b = bm::gap_test_unr(ar_->v_arg_or_blk_gap[i], bit_idx);
1140  if (b)
1141  return false;
1142  }
1143  return true;
1144 }
1145 
1146 // ------------------------------------------------------------------------
1147 
1148 
1149 template<typename BV>
1151  unsigned i, unsigned j,
1152  unsigned arg_blk_count)
1153 {
1154  bm::word_t* blk = ar_->tb1;
1155  bool all_one;
1156 
1157  unsigned k = 0;
1158 
1159  if (arg_blk_count) // copy the first block
1160  bm::bit_block_copy(blk, ar_->v_arg_or_blk[k++]);
1161  else
1162  bm::bit_block_set(blk, 0);
1163 
1164  // process all BIT blocks
1165  //
1166  unsigned unroll_factor, len, len_unr;
1167 
1168  unroll_factor = 4;
1169  len = arg_blk_count - k;
1170  len_unr = len - (len % unroll_factor);
1171  for( ;k < len_unr; k+=unroll_factor)
1172  {
1173  all_one = bm::bit_block_or_5way(blk,
1174  ar_->v_arg_or_blk[k], ar_->v_arg_or_blk[k+1],
1175  ar_->v_arg_or_blk[k+2], ar_->v_arg_or_blk[k+3]);
1176  if (all_one)
1177  {
1178  BM_ASSERT(blk == ar_->tb1);
1180  bman_target.set_block(i, j, FULL_BLOCK_FAKE_ADDR, false);
1181  return true;
1182  }
1183  } // for k
1184 
1185  unroll_factor = 2;
1186  len = arg_blk_count - k;
1187  len_unr = len - (len % unroll_factor);
1188  for( ;k < len_unr; k+=unroll_factor)
1189  {
1190  all_one = bm::bit_block_or_3way(blk, ar_->v_arg_or_blk[k], ar_->v_arg_or_blk[k+1]);
1191  if (all_one)
1192  {
1193  BM_ASSERT(blk == ar_->tb1);
1195  bman_target.set_block(i, j, FULL_BLOCK_FAKE_ADDR, false);
1196  return true;
1197  }
1198  } // for k
1199 
1200  for (; k < arg_blk_count; ++k)
1201  {
1202  all_one = bm::bit_block_or(blk, ar_->v_arg_or_blk[k]);
1203  if (all_one)
1204  {
1205  BM_ASSERT(blk == ar_->tb1);
1207  bman_target.set_block(i, j, FULL_BLOCK_FAKE_ADDR, false);
1208  return true;
1209  }
1210  } // for k
1211 
1212  return false;
1213 }
1214 
1215 // ------------------------------------------------------------------------
1216 
1217 template<typename BV>
1220  digest_type digest)
1221 {
1222  bm::word_t* blk = ar_->tb1;
1223  unsigned k = 0;
1224 
1225  unsigned nb_from = unsigned(range_from_ >> bm::set_block_shift);
1226  unsigned nb_to = unsigned(range_to_ >> bm::set_block_shift);
1227  if (range_set_ && (nb_from == nb_to))
1228  {
1229  unsigned nbit_from = unsigned(range_from_ & bm::set_block_mask);
1230  unsigned nbit_to = unsigned(range_to_ & bm::set_block_mask);
1231  digest_type digest0 = bm::digest_mask(nbit_from, nbit_to);
1232  digest &= digest0;
1233  bm::block_init_digest0(blk, digest);
1234  }
1235  else
1236  {
1237  switch (arg_blk_count)
1238  {
1239  case 0:
1240  bm::block_init_digest0(blk, digest); // 0xFF... by default
1241  return digest;
1242  case 1:
1243  bm::bit_block_copy(blk, ar_->v_arg_and_blk[k]);
1244  return bm::calc_block_digest0(blk);
1245  default:
1246  digest = bm::bit_block_and_2way(blk,
1247  ar_->v_arg_and_blk[k],
1248  ar_->v_arg_and_blk[k+1],
1249  ~0ull);
1250  k += 2;
1251  break;
1252  } // switch
1253  }
1254 
1255  unsigned unroll_factor, len, len_unr;
1256  unsigned single_bit_idx;
1257 
1258  unroll_factor = 4;
1259  len = arg_blk_count - k;
1260  len_unr = len - (len % unroll_factor);
1261  for (; k < len_unr; k += unroll_factor)
1262  {
1263  digest =
1265  ar_->v_arg_and_blk[k], ar_->v_arg_and_blk[k + 1],
1266  ar_->v_arg_and_blk[k + 2], ar_->v_arg_and_blk[k + 3],
1267  digest);
1268  if (!digest) // all zero
1269  return digest;
1270  bool found = bm::bit_find_first_if_1(blk, &single_bit_idx, digest);
1271  if (found)
1272  {
1273  unsigned nword = unsigned(single_bit_idx >> bm::set_word_shift);
1274  unsigned mask = 1u << (single_bit_idx & bm::set_word_mask);
1275  for (++k; k < arg_blk_count; ++k)
1276  {
1277  const bm::word_t* arg_blk = ar_->v_arg_and_blk[k];
1278  if (!(mask & arg_blk[nword]))
1279  {
1280  blk[nword] = 0;
1281  return 0;
1282  }
1283  } // for k
1284  break;
1285  }
1286  } // for k
1287 
1288  for (; k < arg_blk_count; ++k)
1289  {
1290  if (ar_->v_arg_and_blk[k] == FULL_BLOCK_REAL_ADDR)
1291  continue;
1292  digest = bm::bit_block_and(blk, ar_->v_arg_and_blk[k], digest);
1293  if (!digest) // all zero
1294  return digest;
1295  } // for k
1296  return digest;
1297 }
1298 
1299 // ------------------------------------------------------------------------
1300 
1301 template<typename BV>
1304  digest_type digest)
1305 {
1306  bm::word_t* blk = ar_->tb1;
1307  unsigned single_bit_idx;
1308  const word_t** args = &ar_->v_arg_or_blk[0];
1309  for (unsigned k = 0; k < arg_blk_count; ++k)
1310  {
1311  if (ar_->v_arg_or_blk[k] == FULL_BLOCK_REAL_ADDR) // golden block
1312  {
1313  digest = 0;
1314  break;
1315  }
1316  digest = bm::bit_block_sub(blk, args[k], digest);
1317  if (!digest) // all zero
1318  break;
1319 
1320  bool found = bm::bit_find_first_if_1(blk, &single_bit_idx, digest);
1321  if (found)
1322  {
1323  const unsigned mask = 1u << (single_bit_idx & bm::set_word_mask);
1324  unsigned nword = unsigned(single_bit_idx >> bm::set_word_shift);
1325  for (++k; k < arg_blk_count; ++k)
1326  {
1327  if (mask & args[k][nword])
1328  {
1329  blk[nword] = 0;
1330  return 0;
1331  }
1332  } // for k
1333  break;
1334  }
1335  } // for k
1336  return digest;
1337 }
1338 
1339 // ------------------------------------------------------------------------
1340 
1341 template<typename BV>
1343  const bvector_type_const_ptr* bv_src, unsigned src_size,
1344  bool init_clear)
1345 {
1346  typename bvector_type::blocks_manager_type& bman_target = bv_target.get_blocks_manager();
1347  if (init_clear)
1348  {
1349  if (bman_target.is_init())
1350  bman_target.deinit_tree();
1351  }
1352 
1353  unsigned top_blocks = bman_target.top_block_size();
1354  bm::id_t size = bv_target.size();
1355  bool need_realloc = false;
1356 
1357  // pre-scan to do target size harmonization
1358  for (unsigned i = 0; i < src_size; ++i)
1359  {
1360  const bvector_type* bv = bv_src[i];
1361  BM_ASSERT(bv);
1362  const typename bvector_type::blocks_manager_type& bman_arg =
1363  bv->get_blocks_manager();
1364  unsigned arg_top_blocks = bman_arg.top_block_size();
1365  if (arg_top_blocks > top_blocks)
1366  {
1367  need_realloc = true;
1368  top_blocks = arg_top_blocks;
1369  }
1370  bm::id_t arg_size = bv->size();
1371  if (arg_size > size)
1372  size = arg_size;
1373  } // for i
1374 
1375  if (need_realloc)
1376  {
1377  bman_target.reserve_top_blocks(top_blocks);
1378  }
1379  if (!bman_target.is_init())
1380  bman_target.init_tree();
1381  if (size > bv_target.size())
1382  bv_target.resize(size);
1383 
1384  return top_blocks;
1385 }
1386 
1387 // ------------------------------------------------------------------------
1388 
1389 template<typename BV>
1390 unsigned aggregator<BV>::max_top_blocks(const bvector_type_const_ptr* bv_src, unsigned src_size)
1391 {
1392  unsigned top_blocks = 1;
1393 
1394  // pre-scan to do target size harmonization
1395  for (unsigned i = 0; i < src_size; ++i)
1396  {
1397  const bvector_type* bv = bv_src[i];
1398  BM_ASSERT(bv);
1399  const typename bvector_type::blocks_manager_type& bman_arg = bv->get_blocks_manager();
1400  unsigned arg_top_blocks = bman_arg.top_block_size();
1401  if (arg_top_blocks > top_blocks)
1402  top_blocks = arg_top_blocks;
1403  } // for i
1404  return top_blocks;
1405 }
1406 
1407 // ------------------------------------------------------------------------
1408 
1409 template<typename BV>
1411  unsigned src_size,
1412  unsigned i, unsigned j,
1413  unsigned* arg_blk_count,
1414  unsigned* arg_blk_gap_count)
1415 {
1416  bm::word_t* blk = 0;
1417  for (unsigned k = 0; k < src_size; ++k)
1418  {
1419  const bvector_type* bv = bv_src[k];
1420  BM_ASSERT(bv);
1421  const blocks_manager_type& bman_arg = bv->get_blocks_manager();
1422  const bm::word_t* arg_blk = bman_arg.get_block_ptr(i, j);
1423  if (!arg_blk)
1424  continue;
1425  if (BM_IS_GAP(arg_blk))
1426  {
1427  ar_->v_arg_or_blk_gap[*arg_blk_gap_count] = BMGAP_PTR(arg_blk);
1428  (*arg_blk_gap_count)++;
1429  }
1430  else // FULL or bit block
1431  {
1432  if (IS_FULL_BLOCK(arg_blk))
1433  {
1434  blk = FULL_BLOCK_FAKE_ADDR;
1435  *arg_blk_gap_count = *arg_blk_count = 0; // nothing to do
1436  break;
1437  }
1438  ar_->v_arg_or_blk[*arg_blk_count] = arg_blk;
1439  (*arg_blk_count)++;
1440  }
1441  } // for k
1442  return blk;
1443 }
1444 
1445 // ------------------------------------------------------------------------
1446 
1447 template<typename BV>
1449  unsigned src_size,
1450  unsigned i, unsigned j,
1451  unsigned* arg_blk_count,
1452  unsigned* arg_blk_gap_count)
1453 {
1455  for (unsigned k = 0; k < src_size; ++k)
1456  {
1457  const bvector_type* bv = bv_src[k];
1458  BM_ASSERT(bv);
1459  const typename bvector_type::blocks_manager_type& bman_arg = bv->get_blocks_manager();
1460  const bm::word_t* arg_blk = bman_arg.get_block_ptr(i, j);
1461  if (!arg_blk)
1462  {
1463  blk = 0;
1464  *arg_blk_gap_count = *arg_blk_count = 0; // nothing to do
1465  break;
1466  }
1467  if (BM_IS_GAP(arg_blk))
1468  {
1469  ar_->v_arg_and_blk_gap[*arg_blk_gap_count] = BMGAP_PTR(arg_blk);
1470  (*arg_blk_gap_count)++;
1471  }
1472  else // FULL or bit block
1473  {
1474  ar_->v_arg_and_blk[*arg_blk_count] =
1475  IS_FULL_BLOCK(arg_blk) ? FULL_BLOCK_REAL_ADDR: arg_blk;
1476  (*arg_blk_count)++;
1477  }
1478  } // for k
1479  return blk;
1480 }
1481 
1482 // ------------------------------------------------------------------------
1483 
1484 template<typename BV>
1486  const bvector_type_const_ptr* bv_src, unsigned src_size)
1487 {
1488  BM_ASSERT(src_size);
1489  if (src_size == 0)
1490  {
1491  bv_target.clear();
1492  return;
1493  }
1494 
1495  const bvector_type* bv = bv_src[0];
1496  bv_target = *bv;
1497 
1498  for (unsigned i = 1; i < src_size; ++i)
1499  {
1500  bv = bv_src[i];
1501  BM_ASSERT(bv);
1502  bv_target.bit_or(*bv);
1503  }
1504 }
1505 
1506 // ------------------------------------------------------------------------
1507 
1508 template<typename BV>
1510  const bvector_type_const_ptr* bv_src, unsigned src_size)
1511 {
1512  BM_ASSERT(src_size);
1513 
1514  if (src_size == 0)
1515  {
1516  bv_target.clear();
1517  return;
1518  }
1519  const bvector_type* bv = bv_src[0];
1520  bv_target = *bv;
1521 
1522  for (unsigned i = 1; i < src_size; ++i)
1523  {
1524  bv = bv_src[i];
1525  BM_ASSERT(bv);
1526  bv_target.bit_and(*bv);
1527  }
1528 }
1529 
1530 // ------------------------------------------------------------------------
1531 
1532 template<typename BV>
1534  const bvector_type_const_ptr* bv_src_and,
1535  unsigned src_and_size,
1536  const bvector_type_const_ptr* bv_src_sub,
1537  unsigned src_sub_size)
1538 {
1539  BM_ASSERT(src_and_size);
1540 
1541  combine_and_horizontal(bv_target, bv_src_and, src_and_size);
1542 
1543  for (unsigned i = 0; i < src_sub_size; ++i)
1544  {
1545  const bvector_type* bv = bv_src_sub[i];
1546  BM_ASSERT(bv);
1547  bv_target -= *bv;
1548  }
1549 }
1550 
1551 // ------------------------------------------------------------------------
1552 
1553 template<typename BV>
1555  const bvector_type_const_ptr* bv_src,
1556  unsigned src_size)
1557 {
1558  top_block_size_ = resize_target(bv_target, bv_src, src_size);
1559 
1560  // set initial carry overs all to 0
1561  for (unsigned i = 0; i < src_size; ++i) // reset co flags
1562  ar_->carry_overs_[i] = 0;
1563 }
1564 
1565 // ------------------------------------------------------------------------
1566 
1567 template<typename BV>
1569  bvector_type& bv_target,
1570  const bvector_type_const_ptr* bv_src_and, unsigned src_and_size,
1571  bool any)
1572 {
1573  BM_ASSERT_THROW(src_size < max_aggregator_cap, BM_ERR_RANGE);
1574  if (!src_and_size)
1575  {
1576  bv_target.clear();
1577  return false;
1578  }
1579  prepare_shift_right_and(bv_target, bv_src_and, src_and_size);
1580 
1581  for (unsigned i = 0; i < bm::set_array_size; ++i)
1582  {
1583  if (i > top_block_size_)
1584  {
1585  if (!this->any_carry_overs(src_and_size))
1586  break; // quit early if there is nothing to carry on
1587  }
1588 
1589  unsigned j = 0;
1590  do
1591  {
1592  bool found = combine_shift_right_and(i, j, bv_target, bv_src_and, src_and_size);
1593  if (found && any)
1594  return found;
1595  } while (++j < bm::set_array_size);
1596 
1597  } // for i
1598 
1599  return bv_target.any();
1600 }
1601 
1602 // ------------------------------------------------------------------------
1603 
1604 template<typename BV>
1605 bool aggregator<BV>::combine_shift_right_and(unsigned i, unsigned j,
1606  bvector_type& bv_target,
1607  const bvector_type_const_ptr* bv_src,
1608  unsigned src_size)
1609 {
1610  typename bvector_type::blocks_manager_type& bman_target =
1611  bv_target.get_blocks_manager();
1612  bm::word_t* blk = temp_blk_ ? temp_blk_ : ar_->tb1;
1613  unsigned char* carry_overs = &(ar_->carry_overs_[0]);
1614 
1615  bm::id64_t digest = ~0ull; // start with a full content digest
1616 
1617  // first initial block is just copied over from the first AND
1618  {
1619  const blocks_manager_type& bman_arg = bv_src[0]->get_blocks_manager();
1620  BM_ASSERT(bman_arg.is_init());
1621  const bm::word_t* arg_blk = bman_arg.get_block(i, j);
1622  if (BM_IS_GAP(arg_blk))
1623  {
1624  bm::bit_block_set(blk, 0);
1625  bm::gap_add_to_bitset(blk, BMGAP_PTR(arg_blk));
1626  }
1627  else
1628  {
1629  if (arg_blk)
1630  bm::bit_block_copy(blk, arg_blk);
1631  else
1632  {
1633  bm::bit_block_set(blk, 0);
1634  digest = 0;
1635  }
1636  }
1637  carry_overs[0] = 0;
1638  }
1639 
1640  unsigned k = 1;
1641  for (; k < src_size; ++k)
1642  {
1643  unsigned carry_over = carry_overs[k];
1644  if (!digest && !carry_over) // 0 into "00000" block >> 0
1645  {
1647  continue;
1648  }
1649  const bm::word_t* arg_blk = get_arg_block(bv_src, k, i, j);
1650  carry_overs[k] = process_shift_right_and(arg_blk, digest, carry_over);
1651  } // for k
1652 
1653  // block now gets emitted into the target bit-vector
1654  if (digest)
1655  {
1657  bman_target.opt_copy_bit_block(i, j, blk,
1658  opt_mode_, ar_->tb_opt);
1659  return true;
1660  }
1661  return false;
1662 }
1663 
1664 // ------------------------------------------------------------------------
1665 
1666 template<typename BV>
1668  digest_type& digest,
1669  unsigned carry_over)
1670 {
1671  bm::word_t* blk = temp_blk_ ? temp_blk_ : ar_->tb1;
1672 
1673  if (BM_IS_GAP(arg_blk)) // GAP argument
1674  {
1675  if (digest)
1676  {
1677  carry_over =
1678  bm::bit_block_shift_r1_and_unr(blk, carry_over,
1679  FULL_BLOCK_REAL_ADDR, &digest);
1680  }
1681  else // digest == 0, but carry_over can change that
1682  {
1683  blk[0] = carry_over;
1684  carry_over = 0;
1685  digest = blk[0]; // NOTE: this does NOT account for AND (yet)
1686  }
1687  BM_ASSERT(bm::calc_block_digest0(blk) == digest);
1688 
1689  bm::gap_and_to_bitset(blk, BMGAP_PTR(arg_blk), digest);
1690  digest = bm::update_block_digest0(blk, digest);
1691  }
1692  else // 2 bit-blocks
1693  {
1694  if (arg_blk) // use fast fused SHIFT-AND operation
1695  {
1696  if (digest)
1697  {
1698  carry_over =
1699  bm::bit_block_shift_r1_and_unr(blk, carry_over, arg_blk,
1700  &digest);
1701  }
1702  else // digest == 0
1703  {
1704  blk[0] = carry_over & arg_blk[0];
1705  carry_over = 0;
1706  digest = blk[0];
1707  }
1708  BM_ASSERT(bm::calc_block_digest0(blk) == digest);
1709  }
1710  else // arg is zero - target block => zero
1711  {
1712  unsigned co = blk[bm::set_block_size-1] >> 31; // carry out
1713  if (digest)
1714  {
1715  bm::bit_block_set(blk, 0); // TODO: digest based set
1716  digest ^= digest;
1717  }
1718  carry_over = co;
1719  }
1720  }
1721  return carry_over;
1722 }
1723 
1724 // ------------------------------------------------------------------------
1725 
1726 template<typename BV>
1728  const bvector_type_const_ptr* bv_src,
1729  unsigned k, unsigned i, unsigned j)
1730 {
1731  const blocks_manager_type& bman_arg = bv_src[k]->get_blocks_manager();
1732  return bman_arg.get_block(i, j);
1733 }
1734 
1735 // ------------------------------------------------------------------------
1736 
1737 template<typename BV>
1738 bool aggregator<BV>::any_carry_overs(unsigned co_size) const
1739 {
1740  for (unsigned i = 0; i < co_size; ++i)
1741  if (ar_->carry_overs_[i])
1742  return true;
1743  return false;
1744 }
1745 
1746 // ------------------------------------------------------------------------
1747 
1748 template<typename BV>
1750 {
1751  bvector_type* bv_target = check_create_target(); // create target vector
1752  BM_ASSERT(bv_target);
1753 
1754  temp_blk_ = temp_block;
1755 
1756  switch (operation_)
1757  {
1758  case BM_NOT_DEFINED:
1759  break;
1760  case BM_SHIFT_R_AND:
1761  prepare_shift_right_and(*bv_target, ar_->arg_bv0, arg_group0_size);
1762  operation_status_ = op_prepared;
1763  break;
1764  default:
1765  BM_ASSERT(0);
1766  } // switch
1767 }
1768 
1769 // ------------------------------------------------------------------------
1770 
1771 template<typename BV>
1773 aggregator<BV>::run_step(unsigned i, unsigned j)
1774 {
1775  BM_ASSERT(operation_status_ == op_prepared || operation_status_ == op_in_progress);
1777 
1778  switch (operation_)
1779  {
1780  case BM_NOT_DEFINED:
1781  break;
1782 
1783  case BM_SHIFT_R_AND:
1784  {
1785  if (i > top_block_size_)
1786  {
1787  if (!this->any_carry_overs(arg_group0_size))
1788  {
1789  operation_status_ = op_done;
1790  return operation_status_;
1791  }
1792  }
1793  //bool found =
1794  this->combine_shift_right_and(i, j, *bv_target_,
1795  ar_->arg_bv0, arg_group0_size);
1796  operation_status_ = op_in_progress;
1797  }
1798  break;
1799  default:
1800  BM_ASSERT(0);
1801  break;
1802  } // switch
1803 
1804  return operation_status_;
1805 }
1806 
1807 
1808 // ------------------------------------------------------------------------
1809 
1810 
1811 } // bm
1812 
1813 #include "bmundef.h"
1814 
1815 #endif
Compressed bit-vector bvector<> container, set algebraic methods, traversal iterators.
void bit_block_copy(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src)
Bitblock copy operation.
Definition: bmfunc.h:4693
void aggregator_pipeline_execute(It first, It last)
Experimental method ro run multiple aggregators in sync.
Definition: bmaggregator.h:464
digest_type process_bit_blocks_sub(unsigned block_count, digest_type digest)
max_size
Maximum aggregation capacity in one pass.
Definition: bmaggregator.h:61
const unsigned set_block_size
Definition: bmconst.h:48
bool bit_block_or_5way(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2, const bm::word_t *BMRESTRICT src3, const bm::word_t *BMRESTRICT src4)
5 way (target, source1, source2) bitblock OR operation. Function does not analyse availability of sou...
Definition: bmfunc.h:5657
const unsigned set_word_shift
Definition: bmconst.h:64
void prepare_shift_right_and(bvector_type &bv_target, const bvector_type_const_ptr *bv_src, unsigned src_size)
unsigned add(const bvector_type *bv, unsigned agr_group=0)
Attach source bit-vector to a argument group (0 or 1).
Definition: bmaggregator.h:558
void combine_or(bvector_type &bv_target)
Aggregate added group of vectors using logical OR Operation does NOT perform an explicit reset of arg...
Definition: bmaggregator.h:590
const unsigned set_array_shift
Definition: bmconst.h:83
unsigned long long int id64_t
Definition: bmconst.h:32
void combine_and(bvector_type &bv_target)
Aggregate added group of vectors using logical AND Operation does NOT perform an explicit reset of ar...
Definition: bmaggregator.h:598
bvector_type * check_create_target()
Definition: bmaggregator.h:545
void * aligned_new_malloc(size_t size)
Aligned malloc (unlike classic malloc it throws bad_alloc exception)
Definition: bmalloc.h:401
bool combine_and_sub(bvector_type &bv_target)
Aggregate added group of vectors using fused logical AND-SUB Operation does NOT perform an explicit r...
Definition: bmaggregator.h:606
Definition: bm.h:69
bool bit_block_or_3way(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2)
3 way (target | source1 | source2) bitblock OR operation. Function does not analyse availability of s...
Definition: bmfunc.h:5616
bvector_type::blocks_manager_type blocks_manager_type
Definition: bmaggregator.h:325
void set_range_hint(bm::id_t from, bm::id_t to)
Set search hint for the range, where results needs to be searched (experimental for internal use)...
Definition: bmaggregator.h:536
pre-processor un-defines to avoid global space pollution (internal)
void combine_shift_right_and(bvector_type &bv_target)
Aggregate added group of vectors using SHIFT-RIGHT and logical AND Operation does NOT perform an expl...
Definition: bmaggregator.h:636
bool bit_block_or(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src)
Plain bitblock OR operation. Function does not analyse availability of source and destination blocks...
Definition: bmfunc.h:5499
void combine_or(BV &bv, It first, It last)
OR Combine bitvector and the iterable sequence.
Definition: bmalgo_impl.h:1134
compress blocks when possible (GAP/prefix sum)
Definition: bm.h:157
const unsigned id_max
Definition: bmconst.h:44
#define BM_DECLARE_TEMP_BLOCK(x)
Definition: bm.h:47
bool process_bit_blocks_or(blocks_manager_type &bman_target, unsigned i, unsigned j, unsigned block_count)
void reset()
Reset aggregate groups, forget all attached vectors.
Definition: bmaggregator.h:525
void aligned_free(void *ptr)
Aligned free.
Definition: bmalloc.h:429
void gap_and_to_bitset(unsigned *dest, const T *pcurr)
ANDs GAP block to bitblock.
Definition: bmfunc.h:2863
static unsigned resize_target(bvector_type &bv_target, const bvector_type_const_ptr *bv_src, unsigned src_size, bool init_clear=true)
bool any_carry_overs(unsigned co_size) const
unsigned int word_t
Definition: bmconst.h:36
#define IS_FULL_BLOCK(addr)
Definition: bmdef.h:139
bm::id64_t digest_type
Definition: bmaggregator.h:56
Algorithms for bvector<>
const bm::word_t * get_arg_block(const bvector_type_const_ptr *bv_src, unsigned k, unsigned i, unsigned j)
void set_optimization(typename bvector_type::optmode opt=bvector_type::opt_compress)
set on-the-fly bit-block compression By default aggregator does not try to optimize result...
Definition: bmaggregator.h:97
bool bit_is_all_zero(const bm::word_t *BMRESTRICT start)
Returns "true" if all bits in the block are 0.
Definition: bmfunc.h:923
const bvector_type * get_target() const
Definition: bmaggregator.h:318
Bit manipulation primitives (internal)
unsigned gap_test_unr(const T *buf, const unsigned pos)
Tests if bit = pos is true. Analog of bm::gap_test with SIMD unrolling.
Definition: bmfunc.h:1167
void bit_block_set(bm::word_t *BMRESTRICT dst, bm::word_t value)
Bitblock memset operation.
Definition: bmfunc.h:3209
bm::id64_t update_block_digest0(const bm::word_t *const block, bm::id64_t digest)
Compute digest for 64 non-zero areas based on existing digest (function revalidates zero areas) ...
Definition: bmfunc.h:727
operation
Codes for aggregation operations which can be pipelined for efficient execution.
Definition: bmaggregator.h:68
const unsigned set_array_mask
Definition: bmconst.h:84
void gap_add_to_bitset(unsigned *dest, const T *pcurr)
Adds(OR) GAP block to bitblock.
Definition: bmfunc.h:2831
unsigned short gap_word_t
Definition: bmconst.h:70
digest_type process_bit_blocks_and(unsigned block_count, digest_type digest)
blocks_manager< Alloc > blocks_manager_type
Definition: bm.h:140
void process_gap_blocks_or(unsigned block_count)
void gap_sub_to_bitset(unsigned *dest, const T *pcurr)
SUB (AND NOT) GAP block to bitblock.
Definition: bmfunc.h:2719
void block_init_digest0(bm::word_t *const block, bm::id64_t digest)
Init block with 000111000 pattren based on digest.
Definition: bmfunc.h:656
operation_status run_step(unsigned i, unsigned j)
Run a step of current arrgegation operation.
void set_operation(int op_code)
Set operation code for the aggregator.
Definition: bmaggregator.h:303
allocator_type::allocator_pool_type allocator_pool_type
Definition: bm.h:139
bool test_gap_blocks_and(unsigned block_count, unsigned bit_idx)
digest_type process_gap_blocks_and(unsigned block_count, digest_type digest)
#define FULL_BLOCK_REAL_ADDR
Definition: bmdef.h:135
Definitions(internal)
#define BM_ASSERT_THROW(x, xerrcode)
Definition: bmdef.h:335
bm::word_t * sort_input_blocks_and(const bvector_type_const_ptr *bv_src, unsigned src_size, unsigned i, unsigned j, unsigned *arg_blk_count, unsigned *arg_blk_gap_count)
Algorithms for fast aggregation of a group of bit-vectors.
Definition: bmaggregator.h:51
void stage(bm::word_t *temp_block)
Prepare operation, create internal resources, analyse dependencies.
void combine_and_sub_horizontal(bvector_type &bv_target, const bvector_type_const_ptr *bv_src_and, unsigned src_and_size, const bvector_type_const_ptr *bv_src_sub, unsigned src_sub_size)
Horizontal AND-SUB aggregation (potentially slower) method.
bm::id64_t bit_block_sub(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src)
Plain bitblock SUB (AND NOT) operation. Function does not analyse availability of source and destinat...
Definition: bmfunc.h:5766
void combine_and_horizontal(bvector_type &bv_target, const bvector_type_const_ptr *bv_src, unsigned src_size)
Horizontal AND aggregation (potentially slower) method.
bool test_gap_blocks_sub(unsigned block_count, unsigned bit_idx)
bvector_type::allocator_type::allocator_pool_type allocator_pool_type
Definition: bmaggregator.h:58
void combine_and(BV &bv, It first, It last)
AND Combine bitvector and the iterable sequence.
Definition: bmalgo_impl.h:1420
#define BM_IS_GAP(ptr)
Definition: bmdef.h:167
bool find_first_and_sub(bm::id_t &idx)
Definition: bmaggregator.h:626
BMFORCEINLINE bm::id64_t digest_mask(unsigned from, unsigned to)
Compute digest mask for [from..to] positions.
Definition: bmfunc.h:618
void combine_or_horizontal(bvector_type &bv_target, const bvector_type_const_ptr *bv_src, unsigned src_size)
Horizontal OR aggregation (potentially slower) method.
const unsigned set_block_mask
Definition: bmconst.h:50
unsigned bit_find_first(const bm::word_t *block, unsigned *first)
BIT block find the first set bit.
Definition: bmfunc.h:6280
optmode
Optimization mode Every next level means additional checks (better compression vs time) ...
Definition: bm.h:152
bool bit_block_shift_r1_and_unr(bm::word_t *BMRESTRICT block, bm::word_t co_flag, const bm::word_t *BMRESTRICT mask_block, bm::id64_t *BMRESTRICT digest)
Right bit-shift bitblock by 1 bit (reference) + AND.
Definition: bmfunc.h:4274
#define BMGAP_PTR(ptr)
Definition: bmdef.h:165
const unsigned set_array_size
Definition: bmconst.h:82
const unsigned set_word_mask
Definition: bmconst.h:65
bm::word_t * get_temp_block()
Definition: bmaggregator.h:320
unsigned int id_t
Definition: bmconst.h:35
operation_status get_operation_status() const
Definition: bmaggregator.h:316
const unsigned max_size
Definition: xsample01.cpp:56
bm::id64_t bit_block_and(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src)
Plain bitblock AND operation. Function does not analyse availability of source and destination blocks...
Definition: bmfunc.h:4733
int get_operation() const
Get current operation code.
Definition: bmaggregator.h:300
#define BM_ASSERT
Definition: bmdef.h:116
bm::word_t * sort_input_blocks_or(const bvector_type_const_ptr *bv_src, unsigned src_size, unsigned i, unsigned j, unsigned *arg_blk_count, unsigned *arg_blk_gap_count)
id64_t wordop_t
Definition: bmconst.h:101
bm::bvector bvector_type
bm::id64_t calc_block_digest0(const bm::word_t *const block)
Compute digest for 64 non-zero areas.
Definition: bmfunc.h:686
bool bit_find_first_if_1(const bm::word_t *block, unsigned *first, bm::id64_t digest)
BIT block find the first set bit if only 1 bit is set.
Definition: bmfunc.h:6348
static unsigned max_top_blocks(const bvector_type_const_ptr *bv_src, unsigned src_size)
bool is_bits_one(const bm::wordop_t *start)
Returns "true" if all bits in the block are 1.
Definition: bmfunc.h:4386
const unsigned set_block_shift
Definition: bmconst.h:49
bm::id_t block_to_global_index(unsigned i, unsigned j, unsigned block_idx)
calculate bvector<> global bit-index from block-local coords
Definition: bmfunc.h:7291
bm::id64_t bit_block_and_2way(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2, bm::id64_t digest)
digest based bit-block AND
Definition: bmfunc.h:4893
const bvector_type * bvector_type_const_ptr
Definition: bmaggregator.h:55
digest_type process_gap_blocks_sub(unsigned block_count, digest_type digest)
bm::id64_t bit_block_and_5way(bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src0, const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src2, const bm::word_t *BMRESTRICT src3, bm::id64_t digest)
digest based bit-block AND 5-way
Definition: bmfunc.h:4826
bool process_shift_right_and(const bm::word_t *arg_blk, digest_type &digest, unsigned carry_over)
#define FULL_BLOCK_FAKE_ADDR
Definition: bmdef.h:136
static unsigned find_effective_sub_block_size(unsigned i, const bvector_type_const_ptr *bv_src, unsigned src_size)
Definition: bmaggregator.h:852