BitMagic-C++
bmsparsevec_algo.h
Go to the documentation of this file.
1 #ifndef BMSPARSEVEC_ALGO__H__INCLUDED__
2 #define BMSPARSEVEC_ALGO__H__INCLUDED__
3 /*
4 Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 For more information please visit: http://bitmagic.io
19 */
20 /*! \file bmsparsevec_algo.h
21  \brief Algorithms for sparse_vector<>
22 */
23 
24 #include "bmdef.h"
25 #include "bmsparsevec.h"
26 #include "bmaggregator.h"
27 #include "bmbuffer.h"
28 #include "bmdef.h"
29 
30 #ifdef _MSC_VER
31 # pragma warning( disable: 4146 )
32 #endif
33 
34 
35 /** \defgroup svalgo Sparse vector algorithms
36  Sparse vector algorithms
37  \ingroup svector
38  */
39 
40 
41 namespace bm
42 {
43 
44 
45 /*!
46  \brief Clip dynamic range for signal higher than specified
47 
48  \param svect - sparse vector to do clipping
49  \param high_bit - max bit (inclusive) allowed for this signal vector
50 
51 
52  \ingroup svalgo
53  \sa dynamic_range_clip_low
54 */
55 template<typename SV>
56 void dynamic_range_clip_high(SV& svect, unsigned high_bit)
57 {
58  unsigned sv_plains = svect.plains();
59 
60  BM_ASSERT(sv_plains > high_bit && high_bit > 0);
61 
62  typename SV::bvector_type bv_acc;
63  unsigned i;
64 
65  // combine all the high bits into accumulator vector
66  for (i = high_bit+1; i < sv_plains; ++i)
67  {
68  typename SV::bvector_type* bv_plain = svect.plain(i);
69  if (bv_plain)
70  {
71  bv_acc.bit_or(*bv_plain);
72  svect.free_plain(i);
73  }
74  } // for i
75 
76  // set all bits ON for all low vectors, which happen to be clipped
77  for (i = high_bit; true; --i)
78  {
79  typename SV::bvector_type* bv_plain = svect.get_plain(i);
80  bv_plain->bit_or(bv_acc);
81  if (i == 0)
82  break;
83  } // for i
84 }
85 
86 
87 /*!
88  \brief Clip dynamic range for signal lower than specified (boost)
89 
90  \param svect - sparse vector to do clipping
91  \param low_bit - low bit (inclusive) allowed for this signal vector
92 
93  \ingroup svalgo
94  \sa dynamic_range_clip_high
95 */
96 template<typename SV>
97 void dynamic_range_clip_low(SV& svect, unsigned low_bit)
98 {
99  if (low_bit == 0) return; // nothing to do
100  BM_ASSERT(svect.plains() > low_bit);
101 
102  unsigned sv_plains = svect.plains();
103  typename SV::bvector_type bv_acc1;
104  unsigned i;
105 
106  // combine all the high bits into accumulator vector
107  for (i = low_bit+1; i < sv_plains; ++i)
108  {
109  typename SV::bvector_type* bv_plain = svect.plain(i);
110  if (bv_plain)
111  bv_acc1.bit_or(*bv_plain);
112  } // for i
113 
114  // accumulate all vectors below the clipping point
115  typename SV::bvector_type bv_acc2;
116  typename SV::bvector_type* bv_low_plain = svect.get_plain(low_bit);
117 
118  for (i = low_bit-1; true; --i)
119  {
120  typename SV::bvector_type* bv_plain = svect.plain(i);
121  if (bv_plain)
122  {
123  bv_acc2.bit_or(*bv_plain);
124  svect.free_plain(i);
125  if (i == 0)
126  break;
127  }
128  } // for i
129 
130  // now we want to set 1 in the clipping low plain based on
131  // exclusive or (XOR) between upper and lower parts)
132  // as a result high signal (any bits in the upper plains) gets
133  // slightly lower value (due to clipping) low signal gets amplified
134  // (lower contrast algorithm)
135 
136  bv_acc1.bit_xor(bv_acc2);
137  bv_low_plain->bit_or(bv_acc1);
138 }
139 
140 
141 /**
142  \brief algorithms for sparse_vector scan/seach
143 
144  Scanner uses properties of bit-vector plains to answer questions
145  like "find all sparse vector elements equivalent to XYZ".
146 
147  Class uses fast algorithms based on properties of bit-plains.
148  This is NOT a brute force, direct scan.
149 
150  @ingroup svalgo
151  @ingroup setalgo
152 */
153 template<typename SV>
155 {
156 public:
157  typedef typename SV::bvector_type bvector_type;
158  typedef const bvector_type* bvector_type_const_ptr;
159  typedef bvector_type* bvector_type_ptr;
160  typedef typename SV::value_type value_type;
161  typedef typename SV::size_type size_type;
162 
164  typedef typename allocator_type::allocator_pool_type allocator_pool_type;
165 
166 public:
168 
169  /**
170  \brief bind sparse vector for all searches
171 
172  \param sv - input sparse vector to bind for searches
173  \param sorted - source index is sorted, build index for binary search
174  */
175  void bind(const SV& sv, bool sorted);
176 
177  /**
178  \brief reset sparse vector binding
179  */
180  void reset_binding();
181 
182  /**
183  \brief find all sparse vector elements EQ to search value
184 
185  Find all sparse vector elements equivalent to specified value
186 
187  \param sv - input sparse vector
188  \param value - value to search for
189  \param bv_out - output bit-vector (search result masks 1 elements)
190  */
191  void find_eq(const SV& sv,
192  typename SV::value_type value,
193  typename SV::bvector_type& bv_out);
194 
195  /**
196  \brief find first sparse vector element
197 
198  Find all sparse vector elements equivalent to specified value.
199  Works well if sperse vector represents unordered set
200 
201  \param sv - input sparse vector
202  \param value - value to search for
203  \param pos - output found sparse vector element index
204 
205  \return true if found
206  */
207  bool find_eq(const SV& sv,
208  typename SV::value_type value,
209  typename SV::size_type& pos);
210 
211  /**
212  \brief find first sparse vector element (string)
213  */
214  bool find_eq_str(const SV& sv,
215  const typename SV::value_type* str,
216  typename SV::size_type& pos);
217 
218  /**
219  \brief binary find first sparse vector element (string)
220  Sparse vector must be attached (bind())
221  @sa bind
222  */
223  bool find_eq_str(const typename SV::value_type* str,
224  typename SV::size_type& pos);
225 
226  /**
227  \brief binary find first sparse vector element (string)
228  Sparse vector must be sorted.
229  */
230  bool bfind_eq_str(const SV& sv,
231  const typename SV::value_type* str,
232  typename SV::size_type& pos);
233 
234  /**
235  \brief lower bound search for an array position
236 
237  Method assumes the sparse array is sorted
238 
239  \param sv - input sparse vector
240  \param value - value to search for
241  \param pos - output sparse vector element index
242 
243  \return true if value found
244  */
245  bool lower_bound_str(const SV& sv,
246  const typename SV::value_type* str,
247  typename SV::size_type& pos);
248 
249  /**
250  \brief binary find first sparse vector element (string)
251  Sparse vector must be sorted and attached
252  @sa bind
253  */
254  bool bfind_eq_str(const typename SV::value_type* str,
255  typename SV::size_type& pos);
256 
257  /**
258  \brief find all sparse vector elements EQ to 0
259  \param sv - input sparse vector
260  \param bv_out - output bit-vector (search result masks 1 elements)
261  */
262  void find_zero(const SV& sv,
263  typename SV::bvector_type& bv_out);
264 
265  /*!
266  \brief Find non-zero elements
267  Output vector is computed as a logical OR (join) of all plains
268 
269  \param sv - input sparse vector
270  \param bv_out - output bit-bector of non-zero elements
271  */
272  void find_nonzero(const SV& sv, typename SV::bvector_type& bv_out);
273 
274  /**
275  \brief invert search result ("EQ" to "not EQ")
276 
277  \param sv - input sparse vector
278  \param bv_out - output bit-bector of non-zero elements
279  */
280  void invert(const SV& sv, typename SV::bvector_type& bv_out);
281 
282  /**
283  \brief find all values A IN (C, D, E, F)
284  \param sv - input sparse vector
285  \param start - start iterator (set to search)
286  \param end - end iterator (set to search)
287  \param bv_out - output bit-bector of non-zero elements
288  */
289  template<typename It>
290  void find_eq(const SV& sv,
291  It start,
292  It end,
293  typename SV::bvector_type& bv_out)
294  {
295  typename bvector_type::mem_pool_guard mp_guard;
296  mp_guard.assign_if_not_set(pool_, bv_out); // set algorithm-local memory pool to avoid heap contention
297 
298  bvector_type bv1;
299  typename bvector_type::mem_pool_guard mp_guard1(pool_, bv1);
300  bool any_zero = false;
301  for (; start < end; ++start)
302  {
303  value_type v = *start;
304  any_zero |= (v == 0);
305  bool found = find_eq_with_nulls(sv, v, bv1);
306  if (found)
307  bv_out.bit_or(bv1);
308  else
309  {
310  BM_ASSERT(!bv1.any());
311  }
312  } // for
313  if (any_zero)
314  correct_nulls(sv, bv_out);
315  }
316 
317  /// For testing purposes only
318  ///
319  /// @internal
320  void find_eq_with_nulls_horizontal(const SV& sv,
321  typename SV::value_type value,
322  typename SV::bvector_type& bv_out);
323 
324  /** Exclude possible NULL values from the result vector
325  \param sv - input sparse vector
326  \param bv_out - output bit-bector of non-zero elements
327  */
328  void correct_nulls(const SV& sv, typename SV::bvector_type& bv_out);
329 
330 protected:
331 
332  /// set search boundaries (hint for the aggregator)
333  void set_search_range(size_type from, size_type to);
334 
335  /// reset (disable) search range
336  void reset_search_range();
337 
338  /// find value (may include NULL indexes)
339  bool find_eq_with_nulls(const SV& sv,
340  typename SV::value_type value,
341  typename SV::bvector_type& bv_out,
342  typename SV::size_type search_limit = 0);
343 
344  /// find first value (may include NULL indexes)
345  bool find_first_eq(const SV& sv,
346  typename SV::value_type value,
347  bm::id_t& idx);
348 
349  /// find first string value (may include NULL indexes)
350  bool find_first_eq(const SV& sv,
351  const typename SV::value_type* str,
352  bm::id_t& idx,
353  bool remaped);
354 
355 
356  /// Prepare aggregator for AND-SUB (EQ) search
357  bool prepare_and_sub_aggregator(const SV& sv,
358  typename SV::value_type value);
359 
360  /// Prepare aggregator for AND-SUB (EQ) search (string)
361  bool prepare_and_sub_aggregator(const SV& sv,
362  const typename SV::value_type* str,
363  unsigned octet_start);
364 
365  /// Rank-Select decompression for RSC vectors
366  void decompress(const SV& sv, typename SV::bvector_type& bv_out);
367 
368  int compare_str(const SV& sv, size_type idx, const value_type* str);
369 
370 protected:
372  void operator=(const sparse_vector_scanner&) = delete;
373 
374 protected:
375 
377  {
378  max_columns = SV::max_vector_size
379  };
380 
382  {
385  };
386 
387  typedef bm::heap_matrix<value_type,
389  max_columns,
390  allocator_type> heap_matrix_type0;
391  typedef bm::heap_matrix<value_type,
392  bm::set_total_blocks*3,
393  max_columns,
394  allocator_type> heap_matrix_type3;
395 
396  typedef bm::heap_matrix<typename SV::value_type,
398  SV::sv_octet_plains,
399  allocator_type> matrix_search_buf_type;
400 
401 
402 private:
403  allocator_pool_type pool_;
404  bvector_type bv_tmp_;
407 
408  size_type mask_from_;
409  size_type mask_to_;
410  bool mask_set_;
411 
412  const SV* bound_sv_;
413  heap_matrix_type0 block0_elements_cache_; ///< cache for elements[0] of each block
414  heap_matrix_type3 block3_elements_cache_; ///< cache for elements[16384x] of each block
415  size_type effective_str_max_;
416 
417  value_type remap_value_vect_[SV::max_vector_size];
418  /// masks of allocated bit-plains (1 - means there is a bit-plain)
419  bm::id64_t vector_plain_masks_[SV::max_vector_size];
420  matrix_search_buf_type hmatr_; ///< heap matrix for string search linear stage
421 };
422 
423 
424 /*!
425  \brief Integer set to set transformation (functional image in groups theory)
426  https://en.wikipedia.org/wiki/Image_(mathematics)
427 
428  Input sets gets translated through the function, which is defined as
429  "one to one (or NULL)" binary relation object (sparse_vector).
430  Also works for M:1 relationships.
431 
432  \ingroup svalgo
433  \ingroup setalgo
434 */
435 template<typename SV>
437 {
438 public:
439  typedef typename SV::bvector_type bvector_type;
440  typedef typename SV::value_type value_type;
441  typedef typename SV::size_type size_type;
443 public:
444 
447 
448 
449  /** Get read access to zero-elements vector
450  Zero vector gets populated after attach_sv() is called
451  or as a side-effect of remap() with immediate sv param
452  */
453  const bvector_type& get_bv_zero() const { return bv_zero_; }
454 
455  /** Perform remapping (Image function) (based on attached translation table)
456 
457  \param bv_in - input set, defined as a bit-vector
458  \param bv_out - output set as a bit-vector
459 
460  @sa attach_sv
461  */
462  void remap(const bvector_type& bv_in,
463  bvector_type& bv_out);
464 
465  /** Perform remapping (Image function)
466 
467  \param bv_in - input set, defined as a bit-vector
468  \param sv_brel - binary relation (translation table) sparse vector
469  \param bv_out - output set as a bit-vector
470  */
471  void remap(const bvector_type& bv_in,
472  const SV& sv_brel,
473  bvector_type& bv_out);
474 
475  /** Remap single set element
476 
477  \param id_from - input value
478  \param sv_brel - translation table sparse vector
479  \param id_to - out value
480 
481  \return - true if value was found and remapped
482  */
483  bool remap(size_type id_from, const SV& sv_brel, size_type& id_to);
484 
485 
486  /** Run remap transformation
487 
488  \param bv_in - input set, defined as a bit-vector
489  \param sv_brel - translation table sparse vector
490  \param bv_out - output set as a bit-vector
491 
492  @sa remap
493  */
494  void run(const bvector_type& bv_in,
495  const SV& sv_brel,
496  bvector_type& bv_out)
497  {
498  remap(bv_in, sv_brel, bv_out);
499  }
500 
501  /** Attach a translation table vector for remapping (Image function)
502 
503  \param sv_brel - binary relation sparse vector pointer
504  (pass NULL to detach)
505  \param compute_stats - flag to perform computation of some statistics
506  later used in remapping. This only make sense
507  for series of remappings on the same translation
508  vector.
509  @sa remap
510  */
511  void attach_sv(const SV* sv_brel, bool compute_stats = false);
512 
513 
514 protected:
515  void one_pass_run(const bvector_type& bv_in,
516  const SV& sv_brel,
517  bvector_type& bv_out);
518 
519  /// @internal
520  template<unsigned BSIZE>
522  {
523  size_type BM_VECT_ALIGN gather_idx_[BSIZE] BM_VECT_ALIGN_ATTR;
524  value_type BM_VECT_ALIGN buffer_[BSIZE] BM_VECT_ALIGN_ATTR;
525  };
526 
528  {
529  sv_g_size = 1024 * 8
530  };
532 
533 
534 protected:
536  void operator=(const set2set_11_transform&) = delete;
537 
538 protected:
539  const SV* sv_ptr_; ///< current translation table vector
540  gather_buffer_type* gb_; ///< intermediate buffers
541  bvector_type bv_product_;///< temp vector
542 
543  bool have_stats_; ///< flag of statistics presense
544  bvector_type bv_zero_; ///< bit-vector for zero elements
545 
546  allocator_pool_type pool_;
547 };
548 
549 
550 
551 //----------------------------------------------------------------------------
552 //
553 //----------------------------------------------------------------------------
554 
555 template<typename SV>
557 : sv_ptr_(0), gb_(0), have_stats_(false)
558 {
559  gb_ = (gather_buffer_type*)::malloc(sizeof(gather_buffer_type));
560  if (!gb_)
561  {
562  SV::throw_bad_alloc();
563  }
564 }
565 
566 //----------------------------------------------------------------------------
567 
568 template<typename SV>
570 {
571  if (gb_)
572  ::free(gb_);
573 }
574 
575 
576 //----------------------------------------------------------------------------
577 
578 template<typename SV>
579 void set2set_11_transform<SV>::attach_sv(const SV* sv_brel, bool compute_stats)
580 {
581  sv_ptr_ = sv_brel;
582  if (!sv_ptr_)
583  {
584  have_stats_ = false;
585  }
586  else
587  {
588  if (sv_brel->empty() || !compute_stats)
589  return; // nothing to do
590  const bvector_type* bv_non_null = sv_brel->get_null_bvector();
591  if (bv_non_null)
592  return; // already have 0 elements vector
593 
594 
595  typename bvector_type::mem_pool_guard mp_g_z;
597 
599  scanner.find_zero(*sv_brel, bv_zero_);
600  have_stats_ = true;
601  }
602 }
603 
604 //----------------------------------------------------------------------------
605 
606 template<typename SV>
608  const SV& sv_brel,
609  size_type& id_to)
610 {
611  if (sv_brel.empty())
612  return false; // nothing to do
613 
614  const bvector_type* bv_non_null = sv_brel.get_null_bvector();
615  if (bv_non_null)
616  {
617  if (!bv_non_null->test(id_from))
618  return false;
619  }
620  size_type idx = sv_brel.translate_address(id_from);
621  id_to = sv_brel.get(idx);
622  return true;
623 }
624 
625 //----------------------------------------------------------------------------
626 
627 template<typename SV>
629  const SV& sv_brel,
630  bvector_type& bv_out)
631 {
632  typename bvector_type::mem_pool_guard mp_g_out, mp_g_p, mp_g_z;
633  mp_g_out.assign_if_not_set(pool_, bv_out);
636 
637  attach_sv(&sv_brel);
638 
639  remap(bv_in, bv_out);
640 
641  attach_sv(0); // detach translation table
642 }
643 
644 template<typename SV>
646  bvector_type& bv_out)
647 {
649 
650  bv_out.clear();
651 
652  if (sv_ptr_ == 0 || sv_ptr_->empty())
653  return; // nothing to do
654 
655  bv_out.init(); // just in case to "fast set" later
656 
657  typename bvector_type::mem_pool_guard mp_g_out, mp_g_p, mp_g_z;
658  mp_g_out.assign_if_not_set(pool_, bv_out);
661 
662 
663  const bvector_type* enum_bv;
664 
665  const bvector_type * bv_non_null = sv_ptr_->get_null_bvector();
666  if (bv_non_null)
667  {
668  // TODO: optimize with 2-way ops
669  bv_product_ = bv_in;
670  bv_product_.bit_and(*bv_non_null);
671  enum_bv = &bv_product_;
672  }
673  else // non-NULL vector is not available
674  {
675  enum_bv = &bv_in;
676  // if we have any elements mapping into "0" on the other end
677  // we map it once (chances are there are many duplicates)
678  //
679 
680  if (have_stats_) // pre-attached translation statistics
681  {
682  bv_product_ = bv_in;
683  unsigned cnt1 = bv_product_.count();
684  bv_product_.bit_sub(bv_zero_);
685  unsigned cnt2 = bv_product_.count();
686 
687  BM_ASSERT(cnt2 <= cnt1);
688 
689  if (cnt1 != cnt2) // mapping included 0 elements
690  bv_out.set_bit_no_check(0);
691 
692  enum_bv = &bv_product_;
693  }
694  }
695 
696 
697 
698  unsigned buf_cnt, nb_old, nb;
699  buf_cnt = nb_old = 0;
700 
701  typename bvector_type::enumerator en(enum_bv->first());
702  for (; en.valid(); ++en)
703  {
704  typename SV::size_type idx = *en;
705  idx = sv_ptr_->translate_address(idx);
706 
707  nb = unsigned(idx >> bm::set_block_shift);
708  if (nb != nb_old) // new blocks
709  {
710  if (buf_cnt)
711  {
712  sv_ptr_->gather(&gb_->buffer_[0], &gb_->gather_idx_[0], buf_cnt, BM_SORTED_UNIFORM);
713  bv_out.set(&gb_->buffer_[0], buf_cnt, BM_SORTED);
714  buf_cnt ^= buf_cnt;
715  }
716  nb_old = nb;
717  gb_->gather_idx_[buf_cnt++] = idx;
718  }
719  else
720  {
721  gb_->gather_idx_[buf_cnt++] = idx;
722  }
723 
724  if (buf_cnt == sv_g_size)
725  {
726  sv_ptr_->gather(&gb_->buffer_[0], &gb_->gather_idx_[0], buf_cnt, BM_SORTED_UNIFORM);
727  bv_out.set(&gb_->buffer_[0], buf_cnt, bm::BM_SORTED);
728  buf_cnt ^= buf_cnt;
729  }
730  } // for en
731  if (buf_cnt)
732  {
733  sv_ptr_->gather(&gb_->buffer_[0], &gb_->gather_idx_[0], buf_cnt, BM_SORTED_UNIFORM);
734  bv_out.set(&gb_->buffer_[0], buf_cnt, bm::BM_SORTED);
735  }
736 }
737 
738 
739 //----------------------------------------------------------------------------
740 
741 template<typename SV>
743  const SV& sv_brel,
744  bvector_type& bv_out)
745 {
746  if (sv_brel.empty())
747  return; // nothing to do
748 
749  bv_out.init();
750 
751  typename SV::const_iterator it = sv_brel.begin();
752  for (; it.valid(); ++it)
753  {
754  typename SV::value_type t_id = *it;
755  bm::id_t idx = it.pos();
756  if (bv_in.test(idx))
757  {
758  bv_out.set_bit_no_check(t_id);
759  }
760  } // for
761 }
762 
763 
764 //----------------------------------------------------------------------------
765 //
766 //----------------------------------------------------------------------------
767 
768 template<typename SV>
770 {
771  mask_set_ = false;
772  mask_from_ = mask_to_ = bm::id_max;
773 
774  bound_sv_ = 0;
775  effective_str_max_ = 0;
776 }
777 
778 //----------------------------------------------------------------------------
779 
780 template<typename SV>
781 void sparse_vector_scanner<SV>::bind(const SV& sv, bool sorted)
782 {
783  bound_sv_ = &sv;
784  if (sorted)
785  {
786  block0_elements_cache_.init();
787  block0_elements_cache_.set_zero();
788  block3_elements_cache_.init();
789  block3_elements_cache_.set_zero();
790 
791  effective_str_max_ = sv.effective_vector_max();
792  // fill in elements cache
793  for (size_type i = 0; i < sv.size(); i+= bm::gap_max_bits)
794  {
795  unsigned nb = unsigned(i >> bm::set_block_shift);
796  value_type* s0 = block0_elements_cache_.row(nb);
797  sv.get(i, s0, block0_elements_cache_.cols());
798 
799  for (size_type k = 0; k < 3; ++k)
800  {
801  value_type* s1 = block3_elements_cache_.row(nb * 3 + k);
802  size_type idx = i + (k+1) * bm::sub_block3_size;
803  sv.get(idx, s1, block3_elements_cache_.cols());
804  } // for k
805  } // for i
806  }
807  // pre-calculate vector plain masks
808  //
809  for (unsigned i = 0; i < SV::max_vector_size; ++i)
810  {
811  vector_plain_masks_[i] = sv.get_plains_mask(i);
812  } // for i
813 
814 }
815 
816 //----------------------------------------------------------------------------
817 
818 template<typename SV>
820 {
821  bound_sv_ = 0;
822  effective_str_max_ = 0;
823 }
824 
825 //----------------------------------------------------------------------------
826 
827 template<typename SV>
829  typename SV::bvector_type& bv_out)
830 {
831  if (sv.size() == 0)
832  {
833  bv_out.clear();
834  return;
835  }
836 
837  find_nonzero(sv, bv_out);
838  if (sv.is_compressed())
839  {
840  bv_out.invert();
841  bv_out.set_range(sv.effective_size(), bm::id_max - 1, false);
842  decompress(sv, bv_out);
843  }
844  else
845  {
846  invert(sv, bv_out);
847  }
848  correct_nulls(sv, bv_out);
849 }
850 
851 //----------------------------------------------------------------------------
852 
853 template<typename SV>
854 void sparse_vector_scanner<SV>::invert(const SV& sv, typename SV::bvector_type& bv_out)
855 {
856  if (sv.size() == 0)
857  {
858  bv_out.clear();
859  return;
860  }
861  bv_out.invert();
862  const bvector_type* bv_null = sv.get_null_bvector();
863  if (bv_null) // correct result to only use not NULL elements
864  bv_out &= *bv_null;
865  else
866  bv_out.set_range(sv.size(), bm::id_max - 1, false);
867 }
868 
869 //----------------------------------------------------------------------------
870 
871 template<typename SV>
873  typename SV::bvector_type& bv_out)
874 {
875  const bvector_type* bv_null = sv.get_null_bvector();
876  if (bv_null) // correct result to only use not NULL elements
877  bv_out.bit_and(*bv_null);
878 }
879 
880 //----------------------------------------------------------------------------
881 
882 template<typename SV>
884  typename SV::value_type value,
885  typename SV::bvector_type& bv_out,
886  typename SV::size_type search_limit)
887 {
888  if (sv.empty())
889  return false; // nothing to do
890 
891  if (!value)
892  {
893  find_zero(sv, bv_out);
894  return bv_out.any();
895  }
896  agg_.reset();
897 
898  bool found = prepare_and_sub_aggregator(sv, value);
899  if (!found)
900  {
901  bv_out.clear();
902  return found;
903  }
904 
905  bool any = (search_limit == 1);
906  found = agg_.combine_and_sub(bv_out, any);
907  agg_.reset();
908  return found;
909 }
910 
911 //----------------------------------------------------------------------------
912 
913 template<typename SV>
915  typename SV::value_type value,
916  bm::id_t& idx)
917 {
918  if (sv.empty())
919  return false; // nothing to do
920 
921  BM_ASSERT(value); // cannot handle zero values
922  if (!value)
923  return false;
924 
925  agg_.reset();
926  bool found = prepare_and_sub_aggregator(sv, value);
927  if (!found)
928  return found;
929  found = agg_.find_first_and_sub(idx);
930  agg_.reset();
931  return found;
932 }
933 
934 
935 //----------------------------------------------------------------------------
936 
937 template<typename SV>
939  const typename SV::value_type* str,
940  bm::id_t& idx,
941  bool remaped)
942 {
943  if (sv.empty())
944  return false; // nothing to do
945  BM_ASSERT(*str);
946 
947  if (!*str)
948  return false;
949 
950  agg_.reset();
951  unsigned common_prefix_len = 0;
952 
953  if (mask_set_)
954  {
955  agg_.set_range_hint(mask_from_, mask_to_);
956  common_prefix_len = sv.common_prefix_length(mask_from_, mask_to_);
957  }
958 
959  if (remaped)
960  {
961  str = remap_value_vect_;
962  }
963  else
964  {
965  if (sv.is_remap() && str != remap_value_vect_)
966  {
967  bool r = sv.remap_tosv(remap_value_vect_, SV::max_vector_size, str);
968  if (!r)
969  return r;
970  str = remap_value_vect_;
971  }
972  }
973 
974  bool found = prepare_and_sub_aggregator(sv, str, common_prefix_len);
975  if (!found)
976  return found;
977 
978  found = agg_.find_first_and_sub(idx);
979  agg_.reset();
980  return found;
981 }
982 
983 //----------------------------------------------------------------------------
984 
985 template<typename SV>
987  const typename SV::value_type* str,
988  unsigned octet_start)
989 {
990  unsigned char bits[64];
991 
992  int len = 0;
993  for (; str[len] != 0; ++len)
994  {}
995  BM_ASSERT(len);
996 
997  // use reverse order (faster for sorted arrays)
998  for (int octet_idx = len-1; octet_idx >= 0; --octet_idx)
999  {
1000  if (unsigned(octet_idx) < octet_start) // common prefix
1001  break;
1002 
1003  unsigned value = unsigned(str[octet_idx]) & 0xFF;
1004  BM_ASSERT(value != 0);
1005 
1006  bm::id64_t plains_mask;
1007  if (&sv == bound_sv_)
1008  plains_mask = vector_plain_masks_[octet_idx];
1009  else
1010  plains_mask = sv.get_plains_mask(unsigned(octet_idx));
1011 
1012  if ((value & plains_mask) != value) // pre-screen for impossible values
1013  return false; // found non-existing plain
1014 
1015  // prep the lists for combined AND-SUB aggregator
1016  //
1017  unsigned short bit_count_v = bm::bitscan(value, bits);
1018  for (unsigned i = 0; i < bit_count_v; ++i)
1019  {
1020  unsigned bit_idx = bits[i];
1021  BM_ASSERT(value & (value_type(1) << bit_idx));
1022  unsigned plain_idx = (unsigned(octet_idx) * 8) + bit_idx;
1023  const bvector_type* bv = sv.get_plain(plain_idx);
1024  agg_.add(bv);
1025  } // for i
1026 
1027  unsigned vinv = unsigned(value);
1028  if (bm::conditional<sizeof(value_type) == 1>::test())
1029  {
1030  vinv ^= 0xFF;
1031  }
1032  else // 2-byte char
1033  {
1034  BM_ASSERT(sizeof(value_type) == 2);
1035  vinv ^= 0xFFFF;
1036  }
1037  // exclude the octet bits which are all not set (no vectors)
1038  vinv &= unsigned(plains_mask);
1039  for (unsigned octet_plain = (unsigned(octet_idx) * 8); vinv; vinv &= vinv-1)
1040  {
1041  bm::id_t t = vinv & -vinv;
1042  unsigned bit_idx = bm::word_bitcount(t - 1);
1043  unsigned plain_idx = octet_plain + bit_idx;
1044  const bvector_type* bv = sv.get_plain(plain_idx);
1045  BM_ASSERT(bv);
1046  agg_.add(bv, 1); // agg to SUB group
1047  } // for
1048  } // for octet_idx
1049 
1050  // add all vectors above string len to the SUB operation group
1051  //
1052  typename SV::size_type plain_idx = unsigned(len * 8) + 1;
1053  typename SV::size_type plains;
1054  if (&sv == bound_sv_)
1055  plains = effective_str_max_ * unsigned(sizeof(value_type)) * 8;
1056  else
1057  plains = sv.plains();
1058  for (; plain_idx < plains; ++plain_idx)
1059  {
1060  bvector_type_const_ptr bv = sv.get_plain(plain_idx);
1061  if (bv)
1062  agg_.add(bv, 1); // agg to SUB group
1063  } // for
1064  return true;
1065 }
1066 
1067 //----------------------------------------------------------------------------
1068 
1069 template<typename SV>
1071  typename SV::value_type value)
1072 {
1073  unsigned char bits[sizeof(value) * 8];
1074  unsigned short bit_count_v = bm::bitscan(value, bits);
1075  BM_ASSERT(bit_count_v);
1076 
1077  // prep the lists for combined AND-SUB aggregator
1078  // (backward order has better chance for bit reduction on AND)
1079  //
1080  for (unsigned i = bit_count_v; i > 0; --i)
1081  {
1082  unsigned bit_idx = bits[i-1];
1083  BM_ASSERT(value & (value_type(1) << bit_idx));
1084  const bvector_type* bv = sv.get_plain(bit_idx);
1085  if (bv)
1086  agg_.add(bv);
1087  else
1088  return false;
1089  }
1090 
1091  unsigned sv_plains = sv.effective_plains();
1092  for (unsigned i = 0; (i < sv_plains) && value; ++i)
1093  {
1094  bvector_type_const_ptr bv = sv.get_plain(i);
1095  if (bv && !(value & (value_type(1) << i)))
1096  agg_.add(bv, 1); // agg to SUB group
1097  } // for i
1098  return true;
1099 }
1100 
1101 
1102 //----------------------------------------------------------------------------
1103 
1104 template<typename SV>
1106  typename SV::value_type value,
1107  typename SV::bvector_type& bv_out)
1108 {
1109  if (sv.empty())
1110  return; // nothing to do
1111 
1112  if (!value)
1113  {
1114  find_zero(sv, bv_out);
1115  return;
1116  }
1117 
1118  unsigned char bits[sizeof(value) * 8];
1119  unsigned short bit_count_v = bm::bitscan(value, bits);
1120  BM_ASSERT(bit_count_v);
1121 
1122  // aggregate AND all matching vectors
1123  {
1124  const bvector_type* bv_plain = sv.get_plain(bits[--bit_count_v]);
1125  if (bv_plain)
1126  bv_out = *bv_plain;
1127  else // plain not found
1128  {
1129  bv_out.clear();
1130  return;
1131  }
1132  }
1133  for (unsigned i = 0; i < bit_count_v; ++i)
1134  {
1135  const bvector_type* bv_plain = sv.get_plain(bits[i]);
1136  if (bv_plain)
1137  bv_out &= *bv_plain;
1138  else // mandatory plain not found - empty result!
1139  {
1140  bv_out.clear();
1141  return;
1142  }
1143  } // for i
1144 
1145  // SUB all other plains
1146  unsigned sv_plains = sv.effective_plains();
1147  for (unsigned i = 0; (i < sv_plains) && value; ++i)
1148  {
1149  const bvector_type* bv_plain = sv.get_plain(i);
1150  if (bv_plain && !(value & (value_type(1) << i)))
1151  bv_out -= *bv_plain;
1152  }
1153 }
1154 
1155 //----------------------------------------------------------------------------
1156 
1157 template<typename SV>
1158 bool sparse_vector_scanner<SV>::find_eq_str(const typename SV::value_type* str,
1159  typename SV::size_type& pos)
1160 {
1161  BM_ASSERT(bound_sv_);
1162  return find_eq_str(*bound_sv_, str, pos);
1163 }
1164 
1165 //----------------------------------------------------------------------------
1166 
1167 template<typename SV>
1169  const typename SV::value_type* str,
1170  typename SV::size_type& pos)
1171 {
1172  bool found = false;
1173  if (sv.empty())
1174  return found;
1175  if (*str)
1176  {
1177  bool remaped = false;
1178  if (bm::conditional<SV::is_remap_support::value>::test()) // test remapping trait
1179  {
1180  if (sv.is_remap() && str != remap_value_vect_)
1181  {
1182  remaped = sv.remap_tosv(remap_value_vect_, SV::max_vector_size, str);
1183  if (!remaped)
1184  return remaped;
1185  str = remap_value_vect_;
1186  }
1187  }
1188 
1189  bm::id_t found_pos;
1190  found = find_first_eq(sv, str, found_pos, remaped);
1191  if (found)
1192  {
1193  pos = found_pos;
1194  if (bm::conditional<SV::is_rsc_support::value>::test()) // test rank/select trait
1195  {
1196  if (sv.is_compressed()) // if compressed vector - need rank translation
1197  found = sv.find_rank(found_pos + 1, pos);
1198  }
1199  }
1200  }
1201  else // search for zero value
1202  {
1203  // TODO: implement optimized version which would work witout temp vector
1204  typename SV::bvector_type bv_zero;
1205  find_zero(sv, bv_zero);
1206  found = bv_zero.find(pos);
1207  }
1208  return found;
1209 }
1210 
1211 //----------------------------------------------------------------------------
1212 
1213 template<typename SV>
1215  const typename SV::value_type* str,
1216  typename SV::size_type& pos)
1217 {
1218  bool found = false;
1219  if (sv.empty())
1220  return found;
1221 
1222  if (*str)
1223  {
1224  bool remaped = false;
1225  // test search pre-condition based on remap tables
1227  {
1228  if (sv.is_remap() && str != remap_value_vect_)
1229  {
1230  remaped = sv.remap_tosv(
1231  remap_value_vect_, SV::max_vector_size, str);
1232  if (!remaped)
1233  return remaped;
1234  }
1235  }
1236 
1237  reset_search_range();
1238 
1239  // narrow down the search
1240  const unsigned min_distance_cutoff = bm::gap_max_bits + bm::gap_max_bits / 2;
1241  size_type l, r, dist;
1242  l = 0; r = sv.size()-1;
1243  bm::id_t found_pos;
1244 
1245  // binary search to narrow down the search window
1246  while (l <= r)
1247  {
1248  dist = r - l;
1249  if (dist < min_distance_cutoff)
1250  {
1251  // we are in an narrow <2 blocks window, but still may be in two
1252  // different neighboring blocks, lets try to narrow
1253  // to exactly one block
1254 
1255  unsigned nb_l = unsigned(l >> bm::set_block_shift);
1256  unsigned nb_r = unsigned(r >> bm::set_block_shift);
1257  if (nb_l != nb_r)
1258  {
1259  size_type mid = nb_r * bm::gap_max_bits;
1260  if (mid < r)
1261  {
1262  int cmp = this->compare_str(sv, mid, str);
1263  if (cmp < 0) // mid < str
1264  l = mid;
1265  else
1266  r = mid-(cmp!=0); // branchless if (cmp==0) r=mid;
1267  BM_ASSERT(l < r);
1268  }
1269  nb_l = unsigned(l >> bm::set_block_shift);
1270  nb_r = unsigned(r >> bm::set_block_shift);
1271  }
1272 
1273  if (nb_l == nb_r)
1274  {
1275  unsigned max_nb = sv.size() >> bm::set_block_shift;
1276  if (nb_l != max_nb)
1277  {
1278  // linear in-place fixed depth scan to identify the sub-range
1280  int cmp = this->compare_str(sv, mid, str);
1281  if (cmp < 0)
1282  {
1283  l = mid;
1284  mid = nb_r * bm::gap_max_bits + bm::sub_block3_size * 2;
1285  cmp = this->compare_str(sv, mid, str);
1286  if (cmp < 0)
1287  {
1288  l = mid;
1289  mid = nb_r * bm::gap_max_bits + bm::sub_block3_size * 3;
1290  cmp = this->compare_str(sv, mid, str);
1291  if (cmp < 0)
1292  l = mid;
1293  else
1294  r = mid;
1295  }
1296  else
1297  {
1298  r = mid;
1299  }
1300  }
1301  else
1302  {
1303  r = mid;
1304  }
1305  }
1306  }
1307 
1308  set_search_range(l, r);
1309  break;
1310  }
1311 
1312  typename SV::size_type mid = dist/2+l;
1313  unsigned nb = unsigned(mid >> bm::set_block_shift);
1314  mid = nb * bm::gap_max_bits;
1315  if (mid <= l)
1316  {
1317  if (nb == 0 && r > bm::gap_max_bits)
1318  mid = bm::gap_max_bits;
1319  else
1320  mid = dist / 2 + l;
1321  }
1322  BM_ASSERT(mid > l);
1323 
1324  int cmp = this->compare_str(sv, mid, str);
1325  if (cmp == 0)
1326  {
1327  found_pos = mid;
1328  found = true;
1329  set_search_range(l, mid);
1330  break;
1331  }
1332  if (cmp < 0)
1333  l = mid+1;
1334  else
1335  r = mid-1;
1336  } // while
1337 
1338  // use linear search (range is set)
1339  found = find_first_eq(sv, str, found_pos, remaped);
1340  if (found)
1341  {
1342  pos = found_pos;
1343  if (bm::conditional<SV::is_rsc_support::value>::test()) // test rank/select trait
1344  {
1345  if (sv.is_compressed()) // if compressed vector - need rank translation
1346  found = sv.find_rank(found_pos + 1, pos);
1347  }
1348  }
1349  reset_search_range();
1350  }
1351  else // search for zero value
1352  {
1353  // TODO: implement optimized version which would work without temp vector
1354  typename SV::bvector_type bv_zero;
1355  find_zero(sv, bv_zero);
1356  found = bv_zero.find(pos);
1357  }
1358  return found;
1359 }
1360 
1361 //----------------------------------------------------------------------------
1362 
1363 template<typename SV>
1364 bool sparse_vector_scanner<SV>::bfind_eq_str(const typename SV::value_type* str,
1365  typename SV::size_type& pos)
1366 {
1367  BM_ASSERT(bound_sv_);
1368  return bfind_eq_str(*bound_sv_, str, pos);
1369 }
1370 
1371 //----------------------------------------------------------------------------
1372 
1373 template<typename SV>
1375  const SV& sv,
1376  const typename SV::value_type* str,
1377  typename SV::size_type& pos)
1378 {
1379  int cmp;
1380  size_type l = 0, r = sv.size();
1381 
1382  if (l == r) // empty vector
1383  {
1384  pos = 0;
1385  return false;
1386  }
1387  --r;
1388 
1389  // check initial boundary conditions if insert point is at tail/head
1390  cmp = this->compare_str(sv, l, str); // left (0) boundary check
1391  if (cmp > 0) // vect[x] > str
1392  {
1393  pos = 0;
1394  return false;
1395  }
1396  if (cmp == 0)
1397  {
1398  pos = 0;
1399  return true;
1400  }
1401  cmp = this->compare_str(sv, r, str); // right(size-1) boundary check
1402  if (cmp == 0)
1403  {
1404  pos = r;
1405  // back-scan to rewind all duplicates
1406  for (; r >= 0; --r)
1407  {
1408  cmp = this->compare_str(sv, r, str);
1409  if (cmp != 0)
1410  return true;
1411  pos = r;
1412  } // for i
1413  return true;
1414  }
1415  if (cmp < 0) // vect[x] < str
1416  {
1417  pos = r+1;
1418  return false;
1419  }
1420 
1421  size_type dist = r - l;
1422  if (dist < linear_cutoff1)
1423  {
1424  for (; l <= r; ++l)
1425  {
1426  cmp = this->compare_str(sv, l, str);
1427  if (cmp == 0)
1428  {
1429  pos = l;
1430  return true;
1431  }
1432  if (cmp > 0)
1433  {
1434  pos = l;
1435  return false;
1436  }
1437  } // for
1438  }
1439  while (l <= r)
1440  {
1441  unsigned mid = (r-l)/2+l;
1442  cmp = this->compare_str(sv, mid, str);
1443  if (cmp == 0)
1444  {
1445  pos = mid;
1446  // back-scan to rewind all duplicates
1447  for (size_type i = mid-1; i >= 0; --i)
1448  {
1449  cmp = this->compare_str(sv, i, str);
1450  if (cmp != 0)
1451  return true;
1452  pos = i;
1453  } // for i
1454  pos = 0;
1455  return true;
1456  }
1457  if (cmp < 0)
1458  l = mid+1;
1459  else
1460  r = mid-1;
1461 
1462  dist = r - l;
1463  if (dist < linear_cutoff2) // do linear scan here
1464  {
1465  hmatr_.init();
1466 
1467  dist = sv.decode(hmatr_, l, dist+1);
1468  for (unsigned i = 0; i < dist; ++i, ++l)
1469  {
1470  const typename SV::value_type* hm_str = hmatr_.row(i);
1471  cmp = ::strcmp(hm_str, str);
1472  pos = l;
1473  if (cmp == 0)
1474  return true;
1475  if (cmp > 0) // vect[x] > str
1476  return false;
1477  }
1478  cmp = this->compare_str(sv, l, str);
1479  if (cmp > 0) // vect[x] > str
1480  {
1481  pos = l;
1482  return false;
1483  }
1484 
1485  BM_ASSERT(0);
1486  return false;
1487  }
1488  } // while
1489 
1490  BM_ASSERT(0);
1491  return false;
1492 }
1493 
1494 
1495 //----------------------------------------------------------------------------
1496 
1497 template<typename SV>
1499  size_type idx,
1500  const value_type* str)
1501 {
1502  if (bound_sv_ == &sv)
1503  {
1504  unsigned nb = unsigned(idx >> bm::set_block_shift);
1505  unsigned nbit = unsigned(idx & bm::set_block_mask);
1506  if (nbit == 0) // access to sentinel, first block element
1507  {
1508  value_type* s0 = block0_elements_cache_.row(nb);
1509  if (*s0 == 0) // uninitialized element
1510  {
1511  sv.get(idx, s0, block0_elements_cache_.cols());
1512  }
1513  int res = 0;
1514  for (unsigned i = 0; i < block0_elements_cache_.cols(); ++i)
1515  {
1516  char octet = str[i]; char value = s0[i];
1517  res = (value > octet) - (value < octet);
1518  if (res || !octet)
1519  break;
1520  } // for i
1521  BM_ASSERT(res == sv.compare(idx, str));
1522  return res;
1523  }
1524  else
1525  {
1526  if (nbit % bm::sub_block3_size == 0) // TODO: use AND mask here
1527  {
1528  size_type k = nbit / bm::sub_block3_size - 1;
1529  value_type* s1 = block3_elements_cache_.row(nb * 3 + k);
1530  int res = 0;
1531  for (unsigned i = 0; i < block3_elements_cache_.cols(); ++i)
1532  {
1533  char octet = str[i]; char value = s1[i];
1534  res = (value > octet) - (value < octet);
1535  if (res || !octet)
1536  break;
1537  } // for i
1538  BM_ASSERT(res == sv.compare(idx, str));
1539  return res;
1540  }
1541  }
1542  }
1543  return sv.compare(idx, str);
1544 }
1545 
1546 //----------------------------------------------------------------------------
1547 
1548 template<typename SV>
1550  typename SV::value_type value,
1551  typename SV::bvector_type& bv_out)
1552 {
1553  if (sv.empty())
1554  {
1555  bv_out.clear();
1556  return; // nothing to do
1557  }
1558  if (!value)
1559  {
1560  find_zero(sv, bv_out);
1561  return;
1562  }
1563 
1564  find_eq_with_nulls(sv, value, bv_out, 0);
1565 
1566  decompress(sv, bv_out);
1567  correct_nulls(sv, bv_out);
1568 }
1569 
1570 //----------------------------------------------------------------------------
1571 
1572 template<typename SV>
1574  typename SV::value_type value,
1575  typename SV::size_type& pos)
1576 {
1577  if (!value) // zero value - special case
1578  {
1579  bvector_type bv_zero;
1580  find_eq(sv, value, bv_zero);
1581  bool found = bv_zero.find(pos);
1582  return found;
1583  }
1584 
1585  bm::id_t found_pos;
1586  bool found = find_first_eq(sv, value, found_pos);
1587  if (found)
1588  {
1589  pos = found_pos;
1590  if (bm::conditional<SV::is_rsc_support::value>::test()) // test rank/select trait
1591  {
1592  if (sv.is_compressed()) // if compressed vector - need rank translation
1593  found = sv.find_rank(found_pos + 1, pos);
1594  }
1595  }
1596  return found;
1597 }
1598 
1599 //----------------------------------------------------------------------------
1600 
1601 template<typename SV>
1603  typename SV::bvector_type& bv_out)
1604 {
1605  agg_.reset(); // in case if previous scan was interrupted
1606  for (unsigned i = 0; i < sv.plains(); ++i)
1607  agg_.add(sv.get_plain(i));
1608  agg_.combine_or(bv_out);
1609  agg_.reset();
1610 }
1611 
1612 //----------------------------------------------------------------------------
1613 
1614 template<typename SV>
1616  typename SV::bvector_type& bv_out)
1617 {
1618  if (!sv.is_compressed())
1619  return; // nothing to do
1620  const bvector_type* bv_non_null = sv.get_null_bvector();
1621  BM_ASSERT(bv_non_null);
1622 
1623  // TODO: implement faster decompressor for small result-sets
1624  rank_compr_.decompress(bv_tmp_, *bv_non_null, bv_out);
1625  bv_out.swap(bv_tmp_);
1626 }
1627 
1628 //----------------------------------------------------------------------------
1629 
1630 template<typename SV>
1632 {
1633  BM_ASSERT(from < to);
1634  mask_from_ = from;
1635  mask_to_ = to;
1636  mask_set_ = true;
1637 }
1638 
1639 //----------------------------------------------------------------------------
1640 
1641 template<typename SV>
1643 {
1644  mask_set_ = false;
1645  mask_from_ = mask_to_ = bm::id_max;
1646 }
1647 
1648 
1649 //----------------------------------------------------------------------------
1650 //
1651 //----------------------------------------------------------------------------
1652 
1653 
1654 } // namespace bm
1655 
1656 #include "bmundef.h"
1657 
1658 #endif
bm::heap_matrix< typename SV::value_type, linear_cutoff2, SV::sv_octet_plains, allocator_type > matrix_search_buf_type
SV::bvector_type bvector_type
const SV * sv_ptr_
current translation table vector
allocator_pool_type pool_
void find_eq(const SV &sv, typename SV::value_type value, typename SV::bvector_type &bv_out)
find all sparse vector elements EQ to search value
#define BM_VECT_ALIGN
Definition: bmdef.h:293
BMFORCEINLINE bm::id_t word_bitcount(bm::id_t w)
Definition: bmfunc.h:179
ad-hoc conditional expressions
Definition: bmfunc.h:123
gather_buffer_type * gb_
intermediate buffers
void dynamic_range_clip_low(SV &svect, unsigned low_bit)
Clip dynamic range for signal lower than specified (boost)
void bind(const SV &sv, bool sorted)
bind sparse vector for all searches
const bvector_type * bvector_type_const_ptr
void invert(const SV &sv, typename SV::bvector_type &bv_out)
invert search result ("EQ" to "not EQ")
bm::heap_matrix< value_type, bm::set_total_blocks, max_columns, allocator_type > heap_matrix_type0
unsigned long long int id64_t
Definition: bmconst.h:32
Sparse constainer sparse_vector<> for integer types using bit-transposition transform.
void assign_if_not_set(allocator_pool_type &pool, bvector< Alloc > &bv)
Definition: bm.h:1213
bvector_type bv_zero_
bit-vector for zero elements
void attach_sv(const SV *sv_brel, bool compute_stats=false)
Attach a translation table vector for remapping (Image function)
void find_eq(const SV &sv, It start, It end, typename SV::bvector_type &bv_out)
find all values A IN (C, D, E, F)
Definition: bm.h:69
bool find_eq_with_nulls(const SV &sv, typename SV::value_type value, typename SV::bvector_type &bv_out, typename SV::size_type search_limit=0)
find value (may include NULL indexes)
pre-processor un-defines to avoid global space pollution (internal)
algorithms for sparse_vector scan/seach
size_type BM_VECT_ALIGN gather_idx_ [BSIZE] BM_VECT_ALIGN_ATTR
void reset_binding()
reset sparse vector binding
gather_buffer< sv_g_size > gather_buffer_type
int compare_str(const SV &sv, size_type idx, const value_type *str)
const unsigned id_max
Definition: bmconst.h:44
void remap(const bvector_type &bv_in, bvector_type &bv_out)
Perform remapping (Image function) (based on attached translation table)
void reset_search_range()
reset (disable) search range
bvector_type bv_product_
temp vector
Algorithms for fast aggregation of N bvectors.
void operator=(const sparse_vector_scanner &)=delete
void set_search_range(size_type from, size_type to)
set search boundaries (hint for the aggregator)
Alloc allocator_type
Definition: bm.h:138
input set is sorted (ascending order)
Definition: bmconst.h:168
const unsigned sub_block3_size
Definition: bmconst.h:96
void one_pass_run(const bvector_type &bv_in, const SV &sv_brel, bvector_type &bv_out)
bool prepare_and_sub_aggregator(const SV &sv, typename SV::value_type value)
Prepare aggregator for AND-SUB (EQ) search.
bvector_type::allocator_type allocator_type
bool find_eq_str(const SV &sv, const typename SV::value_type *str, typename SV::size_type &pos)
find first sparse vector element (string)
allocator_type::allocator_pool_type allocator_pool_type
Definition: bm.h:139
Definitions(internal)
void find_zero(const SV &sv, typename SV::bvector_type &bv_out)
find all sparse vector elements EQ to 0
allocator_type::allocator_pool_type allocator_pool_type
void find_nonzero(const SV &sv, typename SV::bvector_type &bv_out)
Find non-zero elements Output vector is computed as a logical OR (join) of all plains.
bool find_first_eq(const SV &sv, typename SV::value_type value, bm::id_t &idx)
find first value (may include NULL indexes)
const unsigned gap_max_bits
Definition: bmconst.h:73
value_type BM_VECT_ALIGN buffer_ [BSIZE] BM_VECT_ALIGN_ATTR
bvector_type::allocator_type::allocator_pool_type allocator_pool_type
bm::heap_matrix< value_type, bm::set_total_blocks *3, max_columns, allocator_type > heap_matrix_type3
bool lower_bound_str(const SV &sv, const typename SV::value_type *str, typename SV::size_type &pos)
lower bound search for an array position
const unsigned set_block_mask
Definition: bmconst.h:50
Constant iterator designed to enumerate "ON" bits.
Definition: bm.h:601
void find_eq_with_nulls_horizontal(const SV &sv, typename SV::value_type value, typename SV::bvector_type &bv_out)
For testing purposes only.
void run(const bvector_type &bv_in, const SV &sv_brel, bvector_type &bv_out)
Run remap transformation.
unsigned int id_t
Definition: bmconst.h:35
unsigned short bitscan(V w, B *bits)
Definition: bmfunc.h:503
SV::bvector_type bvector_type
void decompress(const SV &sv, typename SV::bvector_type &bv_out)
Rank-Select decompression for RSC vectors.
#define BM_ASSERT
Definition: bmdef.h:116
const unsigned set_total_blocks
Definition: bmconst.h:85
const bvector_type & get_bv_zero() const
Get read access to zero-elements vector Zero vector gets populated after attach_sv() is called or as ...
bm::bvector bvector_type
const unsigned set_block_shift
Definition: bmconst.h:49
void correct_nulls(const SV &sv, typename SV::bvector_type &bv_out)
Exclude possible NULL values from the result vector.
void dynamic_range_clip_high(SV &svect, unsigned high_bit)
Clip dynamic range for signal higher than specified.
bm::bvector< Alloc > & bit_or(const bm::bvector< Alloc > &bv1, const bm::bvector< Alloc > &bv2, typename bm::bvector< Alloc >::optmode opt_mode)
3-operand OR : this := bv1 OR bv2
Definition: bm.h:4386
sorted and in one block (internal!)
Definition: bmconst.h:169
bool have_stats_
flag of statistics presense
bool bfind_eq_str(const SV &sv, const typename SV::value_type *str, typename SV::size_type &pos)
binary find first sparse vector element (string) Sparse vector must be sorted.
Integer set to set transformation (functional image in groups theory) https://en.wikipedia.org/wiki/Image_(mathematics)