BitMagic-C++
bmsparsevec_algo.h
Go to the documentation of this file.
1 #ifndef BMSPARSEVEC_ALGO__H__INCLUDED__
2 #define BMSPARSEVEC_ALGO__H__INCLUDED__
3 /*
4 Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 For more information please visit: http://bitmagic.io
19 */
20 /*! \file bmsparsevec_algo.h
21  \brief Algorithms for sparse_vector<>
22 */
23 
24 #include "bmdef.h"
25 #include "bmsparsevec.h"
26 #include "bmaggregator.h"
27 #include "bmdef.h"
28 
29 
30 /** \defgroup svalgo Sparse vector algorithms
31  Sparse vector algorithms
32  \ingroup svector
33  */
34 
35 
36 namespace bm
37 {
38 
39 
40 /*!
41  \brief Clip dynamic range for signal higher than specified
42 
43  \param svect - sparse vector to do clipping
44  \param high_bit - max bit (inclusive) allowed for this signal vector
45 
46 
47  \ingroup svalgo
48  \sa dynamic_range_clip_low
49 */
50 template<typename SV>
51 void dynamic_range_clip_high(SV& svect, unsigned high_bit)
52 {
53  unsigned sv_plains = svect.plains();
54 
55  BM_ASSERT(sv_plains > high_bit && high_bit > 0);
56 
57  typename SV::bvector_type bv_acc;
58  unsigned i;
59 
60  // combine all the high bits into accumulator vector
61  for (i = high_bit+1; i < sv_plains; ++i)
62  {
63  typename SV::bvector_type* bv_plain = svect.plain(i);
64  if (bv_plain)
65  {
66  bv_acc.bit_or(*bv_plain);
67  svect.free_plain(i);
68  }
69  } // for i
70 
71  // set all bits ON for all low vectors, which happen to be clipped
72  for (i = high_bit; true; --i)
73  {
74  typename SV::bvector_type* bv_plain = svect.get_plain(i);
75  bv_plain->bit_or(bv_acc);
76  if (i == 0)
77  break;
78  } // for i
79 }
80 
81 
82 /*!
83  \brief Clip dynamic range for signal lower than specified (boost)
84 
85  \param svect - sparse vector to do clipping
86  \param low_bit - low bit (inclusive) allowed for this signal vector
87 
88  \ingroup svalgo
89  \sa dynamic_range_clip_high
90 */
91 template<typename SV>
92 void dynamic_range_clip_low(SV& svect, unsigned low_bit)
93 {
94  if (low_bit == 0) return; // nothing to do
95  BM_ASSERT(svect.plains() > low_bit);
96 
97  unsigned sv_plains = svect.plains();
98  typename SV::bvector_type bv_acc1;
99  unsigned i;
100 
101  // combine all the high bits into accumulator vector
102  for (i = low_bit+1; i < sv_plains; ++i)
103  {
104  typename SV::bvector_type* bv_plain = svect.plain(i);
105  if (bv_plain)
106  bv_acc1.bit_or(*bv_plain);
107  } // for i
108 
109  // accumulate all vectors below the clipping point
110  typename SV::bvector_type bv_acc2;
111  typename SV::bvector_type* bv_low_plain = svect.get_plain(low_bit);
112 
113  for (i = low_bit-1; true; --i)
114  {
115  typename SV::bvector_type* bv_plain = svect.plain(i);
116  if (bv_plain)
117  {
118  bv_acc2.bit_or(*bv_plain);
119  svect.free_plain(i);
120  if (i == 0)
121  break;
122  }
123  } // for i
124 
125  // now we want to set 1 in the clipping low plain based on
126  // exclusive or (XOR) between upper and lower parts)
127  // as a result high signal (any bits in the upper plains) gets
128  // slightly lower value (due to clipping) low signal gets amplified
129  // (lower contrast algorithm)
130 
131  bv_acc1.bit_xor(bv_acc2);
132  bv_low_plain->bit_or(bv_acc1);
133 }
134 
135 
136 /**
137  \brief algorithms for sparse_vector scan/seach
138 
139  Scanner uses properties of bit-vector plains to answer questions
140  like "find all sparse vector elements equivalent to XYZ".
141 
142  Class uses fast algorithms based on properties of bit-plains.
143  This is NOT a brute force, direct scan.
144 
145  @ingroup svalgo
146  @ingroup setalgo
147 */
148 template<typename SV>
150 {
151 public:
152  typedef typename SV::bvector_type bvector_type;
153  typedef const bvector_type* bvector_type_const_ptr;
154  typedef bvector_type* bvector_type_ptr;
155  typedef typename SV::value_type value_type;
156  typedef typename SV::size_type size_type;
157  typedef typename bvector_type::allocator_type::allocator_pool_type allocator_pool_type;
158 
159 public:
161 
162  /**
163  \brief find all sparse vector elements EQ to search value
164 
165  Find all sparse vector elements equivalent to specified value
166 
167  \param sv - input sparse vector
168  \param value - value to search for
169  \param bv_out - output bit-vector (search result masks 1 elements)
170  */
171  void find_eq(const SV& sv,
172  typename SV::value_type value,
173  typename SV::bvector_type& bv_out);
174 
175  /**
176  \brief find first sparse vector element
177 
178  Find all sparse vector elements equivalent to specified value.
179  Works well if sperse vector represents unordered set
180 
181  \param sv - input sparse vector
182  \param value - value to search for
183  \param pos - output found sparse vector element index
184 
185  \return true if found
186  */
187  bool find_eq(const SV& sv,
188  typename SV::value_type value,
189  typename SV::size_type& pos);
190 
191  /**
192  \brief find first sparse vector element (string)
193 
194  */
195  bool find_eq_str(const SV& sv,
196  const typename SV::value_type* str,
197  typename SV::size_type& pos);
198 
199  /**
200  \brief binary find first sparse vector element (string)
201 
202  Sparse vector must be sorted.
203 
204  */
205  bool bfind_eq_str(const SV& sv,
206  const typename SV::value_type* str,
207  typename SV::size_type& pos);
208 
209 
210  /**
211  \brief find all sparse vector elements EQ to 0
212  \param sv - input sparse vector
213  \param bv_out - output bit-vector (search result masks 1 elements)
214  */
215  void find_zero(const SV& sv,
216  typename SV::bvector_type& bv_out);
217 
218  /*!
219  \brief Find non-zero elements
220  Output vector is computed as a logical OR (join) of all plains
221 
222  \param sv - input sparse vector
223  \param bv_out - output bit-bector of non-zero elements
224  */
225  void find_nonzero(const SV& sv, typename SV::bvector_type& bv_out);
226 
227  /**
228  \brief invert search result ("EQ" to "not EQ")
229 
230  \param sv - input sparse vector
231  \param bv_out - output bit-bector of non-zero elements
232  */
233  void invert(const SV& sv, typename SV::bvector_type& bv_out);
234 
235  /**
236  \brief find all values A IN (C, D, E, F)
237  \param sv - input sparse vector
238  \param start - start iterator (set to search)
239  \param end - end iterator (set to search)
240  \param bv_out - output bit-bector of non-zero elements
241  */
242  template<typename It>
243  void find_eq(const SV& sv,
244  It start,
245  It end,
246  typename SV::bvector_type& bv_out)
247  {
248  typename bvector_type::mem_pool_guard mp_guard;
249  mp_guard.assign_if_not_set(pool_, bv_out); // set algorithm-local memory pool to avoid heap contention
250 
251  bvector_type bv1;
252  typename bvector_type::mem_pool_guard mp_guard1(pool_, bv1);
253  bool any_zero = false;
254  for (; start < end; ++start)
255  {
256  value_type v = *start;
257  any_zero |= (v == 0);
258  bool found = find_eq_with_nulls(sv, v, bv1);
259  if (found)
260  bv_out.bit_or(bv1);
261  else
262  {
263  BM_ASSERT(!bv1.any());
264  }
265  } // for
266  if (any_zero)
267  correct_nulls(sv, bv_out);
268  }
269 
270  /// For testing purposes only
271  ///
272  /// @internal
273  void find_eq_with_nulls_horizontal(const SV& sv,
274  typename SV::value_type value,
275  typename SV::bvector_type& bv_out);
276 
277  /** Exclude possible NULL values from the result vector
278  \param sv - input sparse vector
279  \param bv_out - output bit-bector of non-zero elements
280  */
281  void correct_nulls(const SV& sv, typename SV::bvector_type& bv_out);
282 
283 protected:
284 
285  /// set search boundaries
286  void set_search_range(size_type from, size_type to);
287 
288  /// reset (disable) search range
289  void reset_search_range();
290 
291  /// find value (may include NULL indexes)
292  bool find_eq_with_nulls(const SV& sv,
293  typename SV::value_type value,
294  typename SV::bvector_type& bv_out,
295  typename SV::size_type search_limit = 0);
296 
297  /// find first value (may include NULL indexes)
298  bool find_first_eq(const SV& sv,
299  typename SV::value_type value,
300  bm::id_t& idx);
301 
302  /// find first string value (may include NULL indexes)
303  bool find_first_eq(const SV& sv,
304  const typename SV::value_type* str,
305  bm::id_t& idx);
306 
307 
308  /// Prepare aggregator for AND-SUB (EQ) search
309  bool prepare_and_sub_aggregator(const SV& sv,
310  typename SV::value_type value);
311 
312  /// Prepare aggregator for AND-SUB (EQ) search (string)
313  bool prepare_and_sub_aggregator(const SV& sv,
314  const typename SV::value_type* str,
315  unsigned octet_start);
316 
317  /// Rank-Select decompression for RSC vectors
318  void decompress(const SV& sv, typename SV::bvector_type& bv_out);
319 protected:
321  void operator=(const sparse_vector_scanner&) = delete;
322 
323 private:
324  allocator_pool_type pool_;
325  bvector_type bv_tmp_;
328 
329  bvector_type bv_mask_; ///< AND mask vector
330  size_type mask_from_;
331  size_type mask_to_;
332  bool mask_set_;
333 
334 };
335 
336 
337 /*!
338  \brief Integer set to set transformation (functional image in groups theory)
339  https://en.wikipedia.org/wiki/Image_(mathematics)
340 
341  Input sets gets translated through the function, which is defined as
342  "one to one (or NULL)" binary relation object (sparse_vector).
343  Also works for M:1 relationships.
344 
345  \ingroup svalgo
346  \ingroup setalgo
347 */
348 template<typename SV>
350 {
351 public:
352  typedef typename SV::bvector_type bvector_type;
353  typedef typename SV::value_type value_type;
354  typedef typename SV::size_type size_type;
355  typedef typename bvector_type::allocator_type::allocator_pool_type allocator_pool_type;
356 public:
357 
360 
361 
362  /** Get read access to zero-elements vector
363  Zero vector gets populated after attach_sv() is called
364  or as a side-effect of remap() with immediate sv param
365  */
366  const bvector_type& get_bv_zero() const { return bv_zero_; }
367 
368  /** Perform remapping (Image function) (based on attached translation table)
369 
370  \param bv_in - input set, defined as a bit-vector
371  \param bv_out - output set as a bit-vector
372 
373  @sa attach_sv
374  */
375  void remap(const bvector_type& bv_in,
376  bvector_type& bv_out);
377 
378  /** Perform remapping (Image function)
379 
380  \param bv_in - input set, defined as a bit-vector
381  \param sv_brel - binary relation (translation table) sparse vector
382  \param bv_out - output set as a bit-vector
383  */
384  void remap(const bvector_type& bv_in,
385  const SV& sv_brel,
386  bvector_type& bv_out);
387 
388  /** Remap single set element
389 
390  \param id_from - input value
391  \param sv_brel - translation table sparse vector
392  \param id_to - out value
393 
394  \return - true if value was found and remapped
395  */
396  bool remap(size_type id_from, const SV& sv_brel, size_type& id_to);
397 
398 
399  /** Run remap transformation
400 
401  \param bv_in - input set, defined as a bit-vector
402  \param sv_brel - translation table sparse vector
403  \param bv_out - output set as a bit-vector
404 
405  @sa remap
406  */
407  void run(const bvector_type& bv_in,
408  const SV& sv_brel,
409  bvector_type& bv_out)
410  {
411  remap(bv_in, sv_brel, bv_out);
412  }
413 
414  /** Attach a translation table vector for remapping (Image function)
415 
416  \param sv_brel - binary relation sparse vector pointer
417  (pass NULL to detach)
418  \param compute_stats - flag to perform computation of some statistics
419  later used in remapping. This only make sense
420  for series of remappings on the same translation
421  vector.
422  @sa remap
423  */
424  void attach_sv(const SV* sv_brel, bool compute_stats = false);
425 
426 
427 protected:
428  void one_pass_run(const bvector_type& bv_in,
429  const SV& sv_brel,
430  bvector_type& bv_out);
431 
432  /// @internal
433  template<unsigned BSIZE>
435  {
436  size_type BM_VECT_ALIGN gather_idx_[BSIZE] BM_VECT_ALIGN_ATTR;
437  value_type BM_VECT_ALIGN buffer_[BSIZE] BM_VECT_ALIGN_ATTR;
438  };
439 
441  {
442  sv_g_size = 1024 * 8
443  };
445 
446 
447 protected:
449  void operator=(const set2set_11_transform&) = delete;
450 
451 protected:
452  const SV* sv_ptr_; ///< current translation table vector
453  gather_buffer_type* gb_; ///< intermediate buffers
454  bvector_type bv_product_;///< temp vector
455 
456  bool have_stats_; ///< flag of statistics presense
457  bvector_type bv_zero_; ///< bit-vector for zero elements
458 
459  allocator_pool_type pool_;
460 };
461 
462 
463 
464 //----------------------------------------------------------------------------
465 //
466 //----------------------------------------------------------------------------
467 
468 template<typename SV>
470 : sv_ptr_(0), gb_(0), have_stats_(false)
471 {
472  gb_ = (gather_buffer_type*)::malloc(sizeof(gather_buffer_type));
473  if (!gb_)
474  {
475  SV::throw_bad_alloc();
476  }
477 }
478 
479 //----------------------------------------------------------------------------
480 
481 template<typename SV>
483 {
484  if (gb_)
485  ::free(gb_);
486 }
487 
488 
489 //----------------------------------------------------------------------------
490 
491 template<typename SV>
492 void set2set_11_transform<SV>::attach_sv(const SV* sv_brel, bool compute_stats)
493 {
494  sv_ptr_ = sv_brel;
495  if (!sv_ptr_)
496  {
497  have_stats_ = false;
498  }
499  else
500  {
501  if (sv_brel->empty() || !compute_stats)
502  return; // nothing to do
503  const bvector_type* bv_non_null = sv_brel->get_null_bvector();
504  if (bv_non_null)
505  return; // already have 0 elements vector
506 
507 
508  typename bvector_type::mem_pool_guard mp_g_z;
509  mp_g_z.assign_if_not_set(pool_, bv_zero_);
510 
512  scanner.find_zero(*sv_brel, bv_zero_);
513  have_stats_ = true;
514  }
515 }
516 
517 //----------------------------------------------------------------------------
518 
519 template<typename SV>
521  const SV& sv_brel,
522  size_type& id_to)
523 {
524  if (sv_brel.empty())
525  return false; // nothing to do
526 
527  const bvector_type* bv_non_null = sv_brel.get_null_bvector();
528  if (bv_non_null)
529  {
530  if (!bv_non_null->test(id_from))
531  return false;
532  }
533  size_type idx = sv_brel.translate_address(id_from);
534  id_to = sv_brel.get(idx);
535  return true;
536 }
537 
538 //----------------------------------------------------------------------------
539 
540 template<typename SV>
542  const SV& sv_brel,
543  bvector_type& bv_out)
544 {
545  typename bvector_type::mem_pool_guard mp_g_out, mp_g_p, mp_g_z;
546  mp_g_out.assign_if_not_set(pool_, bv_out);
547  mp_g_p.assign_if_not_set(pool_, bv_product_);
548  mp_g_z.assign_if_not_set(pool_, bv_zero_);
549 
550  attach_sv(&sv_brel);
551 
552  remap(bv_in, bv_out);
553 
554  attach_sv(0); // detach translation table
555 }
556 
557 template<typename SV>
559  bvector_type& bv_out)
560 {
562 
563  bv_out.clear();
564 
565  if (sv_ptr_ == 0 || sv_ptr_->empty())
566  return; // nothing to do
567 
568  bv_out.init(); // just in case to "fast set" later
569 
570  typename bvector_type::mem_pool_guard mp_g_out, mp_g_p, mp_g_z;
571  mp_g_out.assign_if_not_set(pool_, bv_out);
572  mp_g_p.assign_if_not_set(pool_, bv_product_);
573  mp_g_z.assign_if_not_set(pool_, bv_zero_);
574 
575 
576  const bvector_type* enum_bv;
577 
578  const bvector_type * bv_non_null = sv_ptr_->get_null_bvector();
579  if (bv_non_null)
580  {
581  // TODO: optimize with 2-way ops
582  bv_product_ = bv_in;
583  bv_product_.bit_and(*bv_non_null);
584  enum_bv = &bv_product_;
585  }
586  else // non-NULL vector is not available
587  {
588  enum_bv = &bv_in;
589  // if we have any elements mapping into "0" on the other end
590  // we map it once (chances are there are many duplicates)
591  //
592 
593  if (have_stats_) // pre-attached translation statistics
594  {
595  bv_product_ = bv_in;
596  unsigned cnt1 = bv_product_.count();
597  bv_product_.bit_sub(bv_zero_);
598  unsigned cnt2 = bv_product_.count();
599 
600  BM_ASSERT(cnt2 <= cnt1);
601 
602  if (cnt1 != cnt2) // mapping included 0 elements
603  bv_out.set_bit_no_check(0);
604 
605  enum_bv = &bv_product_;
606  }
607  }
608 
609 
610 
611  unsigned buf_cnt, nb_old, nb;
612  buf_cnt = nb_old = 0;
613 
614  typename bvector_type::enumerator en(enum_bv->first());
615  for (; en.valid(); ++en)
616  {
617  typename SV::size_type idx = *en;
618  idx = sv_ptr_->translate_address(idx);
619 
620  nb = unsigned(idx >> bm::set_block_shift);
621  if (nb != nb_old) // new blocks
622  {
623  if (buf_cnt)
624  {
625  sv_ptr_->gather(&gb_->buffer_[0], &gb_->gather_idx_[0], buf_cnt, BM_SORTED_UNIFORM);
626  bv_out.set(&gb_->buffer_[0], buf_cnt, BM_SORTED);
627  buf_cnt ^= buf_cnt;
628  }
629  nb_old = nb;
630  gb_->gather_idx_[buf_cnt++] = idx;
631  }
632  else
633  {
634  gb_->gather_idx_[buf_cnt++] = idx;
635  }
636 
637  if (buf_cnt == sv_g_size)
638  {
639  sv_ptr_->gather(&gb_->buffer_[0], &gb_->gather_idx_[0], buf_cnt, BM_SORTED_UNIFORM);
640  bv_out.set(&gb_->buffer_[0], buf_cnt, bm::BM_SORTED);
641  buf_cnt ^= buf_cnt;
642  }
643  } // for en
644  if (buf_cnt)
645  {
646  sv_ptr_->gather(&gb_->buffer_[0], &gb_->gather_idx_[0], buf_cnt, BM_SORTED_UNIFORM);
647  bv_out.set(&gb_->buffer_[0], buf_cnt, bm::BM_SORTED);
648  }
649 }
650 
651 
652 //----------------------------------------------------------------------------
653 
654 template<typename SV>
656  const SV& sv_brel,
657  bvector_type& bv_out)
658 {
659  if (sv_brel.empty())
660  return; // nothing to do
661 
662  bv_out.init();
663 
664  typename SV::const_iterator it = sv_brel.begin();
665  for (; it.valid(); ++it)
666  {
667  typename SV::value_type t_id = *it;
668  bm::id_t idx = it.pos();
669  if (bv_in.test(idx))
670  {
671  bv_out.set_bit_no_check(t_id);
672  }
673  } // for
674 }
675 
676 
677 //----------------------------------------------------------------------------
678 //
679 //----------------------------------------------------------------------------
680 
681 template<typename SV>
683 {
684  bv_mask_.set_allocator_pool(&pool_);
685  mask_set_ = false;
686  mask_from_ = mask_to_ = bm::id_max;
687 }
688 
689 //----------------------------------------------------------------------------
690 
691 template<typename SV>
693  typename SV::bvector_type& bv_out)
694 {
695  if (sv.size() == 0)
696  {
697  bv_out.clear();
698  return;
699  }
700 
701  find_nonzero(sv, bv_out);
702  if (sv.is_compressed())
703  {
704  bv_out.invert();
705  bv_out.set_range(sv.effective_size(), bm::id_max - 1, false);
706  decompress(sv, bv_out);
707  }
708  else
709  {
710  invert(sv, bv_out);
711  }
712  correct_nulls(sv, bv_out);
713 }
714 
715 //----------------------------------------------------------------------------
716 
717 template<typename SV>
718 void sparse_vector_scanner<SV>::invert(const SV& sv, typename SV::bvector_type& bv_out)
719 {
720  if (sv.size() == 0)
721  {
722  bv_out.clear();
723  return;
724  }
725  bv_out.invert();
726  const bvector_type* bv_null = sv.get_null_bvector();
727  if (bv_null) // correct result to only use not NULL elements
728  bv_out &= *bv_null;
729  else
730  bv_out.set_range(sv.size(), bm::id_max - 1, false);
731 }
732 
733 //----------------------------------------------------------------------------
734 
735 template<typename SV>
737  typename SV::bvector_type& bv_out)
738 {
739  const bvector_type* bv_null = sv.get_null_bvector();
740  if (bv_null) // correct result to only use not NULL elements
741  bv_out.bit_and(*bv_null);
742 }
743 
744 //----------------------------------------------------------------------------
745 
746 template<typename SV>
748  typename SV::value_type value,
749  typename SV::bvector_type& bv_out,
750  typename SV::size_type search_limit)
751 {
752  if (sv.empty())
753  return false; // nothing to do
754 
755  if (!value)
756  {
757  find_zero(sv, bv_out);
758  return bv_out.any();
759  }
760  agg_.reset();
761 
762  bool found = prepare_and_sub_aggregator(sv, value);
763  if (!found)
764  {
765  bv_out.clear();
766  return found;
767  }
768 
769  bool any = (search_limit == 1);
770  found = agg_.combine_and_sub(bv_out, any);
771  agg_.reset();
772  return found;
773 }
774 
775 //----------------------------------------------------------------------------
776 
777 template<typename SV>
779  typename SV::value_type value,
780  bm::id_t& idx)
781 {
782  if (sv.empty())
783  return false; // nothing to do
784 
785  BM_ASSERT(value); // cannot handle zero values
786  if (!value)
787  return false;
788 
789  agg_.reset();
790  bool found = prepare_and_sub_aggregator(sv, value);
791  if (!found)
792  return found;
793  found = agg_.find_first_and_sub(idx);
794  agg_.reset();
795  return found;
796 }
797 
798 
799 //----------------------------------------------------------------------------
800 
801 template<typename SV>
803  const typename SV::value_type* str,
804  bm::id_t& idx)
805 {
806  if (sv.empty())
807  return false; // nothing to do
808  BM_ASSERT(*str);
809 
810  if (!*str)
811  return false;
812 
813  agg_.reset();
814  unsigned common_prefix_len = 0;
815 
816  // if search mask is set - add it as first(!) AND operation to reduce
817  // the search space
818  // TODO: add flags and checks that mask represents a sorted case
819  if (mask_set_)
820  {
821  agg_.set_range_hint(mask_from_, mask_to_);
822  agg_.add(&bv_mask_);
823 
824  common_prefix_len = sv.common_prefix_length(mask_from_, mask_to_);
825  }
826 
827  bool found = prepare_and_sub_aggregator(sv, str, common_prefix_len);
828  if (!found)
829  return found;
830 
831  found = agg_.find_first_and_sub(idx);
832  agg_.reset();
833  return found;
834 }
835 
836 //----------------------------------------------------------------------------
837 
838 template<typename SV>
840  const typename SV::value_type* str,
841  unsigned octet_start)
842 {
843  unsigned char bits[64];
844 
845  int len = 0;
846  for (; str[len] != 0; ++len)
847  {}
848  BM_ASSERT(len);
849 
850  // use reverse order (faster for sorted arrays)
851  for (int octet_idx = len-1; octet_idx >= 0; --octet_idx)
852  {
853  if (unsigned(octet_idx) < octet_start)
854  {
855  break;
856  }
857  unsigned value = unsigned(str[octet_idx]) & 0xFF;
858  BM_ASSERT(value != 0);
859  unsigned short bit_count_v = bm::bitscan(value, bits);
860  // prep the lists for combined AND-SUB aggregator
861  //
862  for (unsigned i = 0; i < bit_count_v; ++i)
863  {
864  unsigned bit_idx = bits[i];
865  BM_ASSERT(value & (value_type(1) << bit_idx));
866  unsigned plain_idx = (unsigned(octet_idx) * 8) + bit_idx;
867  const bvector_type* bv = sv.get_plain(plain_idx);
868  if (bv)
869  agg_.add(bv);
870  else
871  return false;
872  } // for i
873 
874  unsigned vinv = unsigned(value);
875  if (bm::conditional<sizeof(value_type) == 1>::test())
876  {
877  vinv ^= 0xFF;
878  }
879  else // 2-byte char
880  {
881  BM_ASSERT(sizeof(value_type) == 2);
882  vinv ^= 0xFFFF;
883  }
884 
885  bit_count_v = bm::bitscan(vinv, bits);
886  BM_ASSERT(bit_count_v <= sizeof(value_type) * 8);
887  for (unsigned i = 0; i < bit_count_v; ++i)
888  {
889  unsigned bit_idx = bits[i];
890  unsigned plain_idx = (unsigned(octet_idx) * 8) + bit_idx;
891  BM_ASSERT(!(value & (value_type(1) << bit_idx)));
892  const bvector_type* bv = sv.get_plain(plain_idx);
893  if (bv)
894  agg_.add(bv, 1); // agg to SUB group
895  }
896  } // for octet_idx
897  // add all vectors above string len to the SUB operation group
898  //
899  typename SV::size_type plain_idx = unsigned(len * 8) + 1;
900  typename SV::size_type plains = sv.plains();
901  for (; plain_idx < plains; ++plain_idx)
902  {
903  bvector_type_const_ptr bv = sv.get_plain(plain_idx);
904  if (bv)
905  {
906  agg_.add(bv, 1); // agg to SUB group
907  }
908  } // for
909  return true;
910 }
911 
912 //----------------------------------------------------------------------------
913 
914 template<typename SV>
916  typename SV::value_type value)
917 {
918  unsigned char bits[sizeof(value) * 8];
919  unsigned short bit_count_v = bm::bitscan(value, bits);
920  BM_ASSERT(bit_count_v);
921 
922  // prep the lists for combined AND-SUB aggregator
923  // (backward order has better chance for bit reduction on AND)
924  //
925  for (unsigned i = bit_count_v; i > 0; --i)
926  {
927  unsigned bit_idx = bits[i-1];
928  BM_ASSERT(value & (value_type(1) << bit_idx));
929  const bvector_type* bv = sv.get_plain(bit_idx);
930  if (bv)
931  agg_.add(bv);
932  else
933  return false;
934  }
935 
936  unsigned sv_plains = sv.effective_plains();
937  for (unsigned i = 0; (i < sv_plains) && value; ++i)
938  {
939  bvector_type_const_ptr bv = sv.get_plain(i);
940  if (bv && !(value & (value_type(1) << i)))
941  agg_.add(bv, 1); // agg to SUB group
942  } // for i
943  return true;
944 }
945 
946 
947 //----------------------------------------------------------------------------
948 
949 template<typename SV>
951  typename SV::value_type value,
952  typename SV::bvector_type& bv_out)
953 {
954  if (sv.empty())
955  return; // nothing to do
956 
957  if (!value)
958  {
959  find_zero(sv, bv_out);
960  return;
961  }
962 
963  unsigned char bits[sizeof(value) * 8];
964  unsigned short bit_count_v = bm::bitscan(value, bits);
965  BM_ASSERT(bit_count_v);
966 
967  // aggregate AND all matching vectors
968  {
969  const bvector_type* bv_plain = sv.get_plain(bits[--bit_count_v]);
970  if (bv_plain)
971  bv_out = *bv_plain;
972  else // plain not found
973  {
974  bv_out.clear();
975  return;
976  }
977  }
978  for (unsigned i = 0; i < bit_count_v; ++i)
979  {
980  const bvector_type* bv_plain = sv.get_plain(bits[i]);
981  if (bv_plain)
982  bv_out &= *bv_plain;
983  else // mandatory plain not found - empty result!
984  {
985  bv_out.clear();
986  return;
987  }
988  } // for i
989 
990  // SUB all other plains
991  unsigned sv_plains = sv.effective_plains();
992  for (unsigned i = 0; (i < sv_plains) && value; ++i)
993  {
994  const bvector_type* bv_plain = sv.get_plain(i);
995  if (bv_plain && !(value & (value_type(1) << i)))
996  bv_out -= *bv_plain;
997  }
998 }
999 
1000 //----------------------------------------------------------------------------
1001 
1002 template<typename SV>
1004  const typename SV::value_type* str,
1005  typename SV::size_type& pos)
1006 {
1007  bool found = false;
1008  if (sv.empty())
1009  return found;
1010  if (*str)
1011  {
1012  bm::id_t found_pos;
1013  found = find_first_eq(sv, str, found_pos);
1014  if (found)
1015  {
1016  if (sv.is_compressed()) // if compressed vector - need rank translation
1017  {
1018  found = sv.find_rank(found_pos + 1, pos);
1019  }
1020  else
1021  pos = found_pos;
1022  }
1023  }
1024  else // search for zero value
1025  {
1026  // TODO: implement optimized version which would work witout temp vector
1027  typename SV::bvector_type bv_zero;
1028  find_zero(sv, bv_zero);
1029  found = bv_zero.find(pos);
1030  }
1031  return found;
1032 }
1033 
1034 //----------------------------------------------------------------------------
1035 
1036 template<typename SV>
1038  const typename SV::value_type* str,
1039  typename SV::size_type& pos)
1040 {
1041  bool found = false;
1042  if (sv.empty())
1043  return found;
1044 
1045  if (*str)
1046  {
1047  reset_search_range();
1048 
1049  // narrow down the search
1050  const unsigned min_distance_cutoff = 65536/2;
1051  size_type l, r, dist;
1052  l = 0; r = sv.size()-1;
1053  dist = r - l;
1054  if (dist < min_distance_cutoff)
1055  {
1056  return find_eq_str(sv, str, pos);
1057  }
1058  bm::id_t found_pos;
1059  // binary search to narrow down the search window
1060  while (l <= r)
1061  {
1062  typename SV::size_type mid = (r-l)/2+l;
1063 
1064  int cmp = sv.compare(mid, str);
1065  if (cmp == 0)
1066  {
1067  found_pos = mid;
1068  found = true;
1069  break;
1070  }
1071  if (cmp < 0)
1072  l = mid+1;
1073  else
1074  r = mid-1;
1075  dist = r - l;
1076  if (dist < min_distance_cutoff)
1077  {
1078  set_search_range(l, r);
1079  break;
1080  }
1081  } // while
1082 
1083  // use linear search (range is set)
1084  found = find_first_eq(sv, str, found_pos);
1085  if (found)
1086  {
1087  if (sv.is_compressed()) // if compressed vector - need rank translation
1088  {
1089  found = sv.find_rank(found_pos + 1, pos);
1090  }
1091  else
1092  pos = found_pos;
1093  }
1094  reset_search_range();
1095  }
1096  else // search for zero value
1097  {
1098  // TODO: implement optimized version which would work without temp vector
1099  typename SV::bvector_type bv_zero;
1100  find_zero(sv, bv_zero);
1101  found = bv_zero.find(pos);
1102  }
1103  return found;
1104 }
1105 
1106 
1107 //----------------------------------------------------------------------------
1108 
1109 template<typename SV>
1111  typename SV::value_type value,
1112  typename SV::bvector_type& bv_out)
1113 {
1114  if (sv.empty())
1115  {
1116  bv_out.clear();
1117  return; // nothing to do
1118  }
1119 
1120  if (!value)
1121  {
1122  find_zero(sv, bv_out);
1123  return;
1124  }
1125 
1126  find_eq_with_nulls(sv, value, bv_out, 0);
1127 
1128  decompress(sv, bv_out);
1129  correct_nulls(sv, bv_out);
1130 }
1131 
1132 //----------------------------------------------------------------------------
1133 
1134 template<typename SV>
1136  typename SV::value_type value,
1137  typename SV::size_type& pos)
1138 {
1139  if (!value) // zero value - special case
1140  {
1141  bvector_type bv_zero;
1142  find_eq(sv, value, bv_zero);
1143  bool found = bv_zero.find(pos);
1144  return found;
1145  }
1146 
1147  bm::id_t found_pos;
1148  bool found = find_first_eq(sv, value, found_pos);
1149  if (found)
1150  {
1151  if (sv.is_compressed()) // if compressed vector - need rank translation
1152  found = sv.find_rank(found_pos + 1, pos);
1153  else
1154  pos = found_pos;
1155  }
1156  return found;
1157 }
1158 
1159 //----------------------------------------------------------------------------
1160 
1161 template<typename SV>
1163  typename SV::bvector_type& bv_out)
1164 {
1165  agg_.reset(); // in case if previous scan was interrupted
1166  for (unsigned i = 0; i < sv.plains(); ++i)
1167  agg_.add(sv.get_plain(i));
1168  agg_.combine_or(bv_out);
1169  agg_.reset();
1170 }
1171 
1172 //----------------------------------------------------------------------------
1173 
1174 template<typename SV>
1176  typename SV::bvector_type& bv_out)
1177 {
1178  if (!sv.is_compressed())
1179  return; // nothing to do
1180  const bvector_type* bv_non_null = sv.get_null_bvector();
1181  BM_ASSERT(bv_non_null);
1182 
1183  // TODO: implement faster decompressor for small result-sets
1184  rank_compr_.decompress(bv_tmp_, *bv_non_null, bv_out);
1185  bv_out.swap(bv_tmp_);
1186 }
1187 
1188 //----------------------------------------------------------------------------
1189 
1190 template<typename SV>
1192 {
1193  BM_ASSERT(from < to);
1194 
1195  bv_mask_.clear(true);
1196  bv_mask_.set(from);
1197  bv_mask_.set(to);
1198  bv_mask_.set_range(from, to, true);
1199 
1200  mask_from_ = from;
1201  mask_to_ = to;
1202  mask_set_ = true;
1203 }
1204 
1205 //----------------------------------------------------------------------------
1206 
1207 template<typename SV>
1209 {
1210  mask_set_ = false;
1211  mask_from_ = mask_to_ = bm::id_max;
1212 }
1213 
1214 
1215 //----------------------------------------------------------------------------
1216 //
1217 //----------------------------------------------------------------------------
1218 
1219 
1220 } // namespace bm
1221 
1222 #include "bmundef.h"
1223 
1224 #endif
SV::bvector_type bvector_type
const SV * sv_ptr_
current translation table vector
allocator_pool_type pool_
void find_eq(const SV &sv, typename SV::value_type value, typename SV::bvector_type &bv_out)
find all sparse vector elements EQ to search value
#define BM_VECT_ALIGN
Definition: bmdef.h:293
ad-hoc conditional expressions
Definition: bmfunc.h:124
gather_buffer_type * gb_
intermediate buffers
void dynamic_range_clip_low(SV &svect, unsigned low_bit)
Clip dynamic range for signal lower than specified (boost)
const bvector_type * bvector_type_const_ptr
void invert(const SV &sv, typename SV::bvector_type &bv_out)
invert search result ("EQ" to "not EQ")
Sparse constainer sparse_vector<> for integer types using bit-transposition transform.
bvector_type bv_zero_
bit-vector for zero elements
void attach_sv(const SV *sv_brel, bool compute_stats=false)
Attach a translation table vector for remapping (Image function)
void find_eq(const SV &sv, It start, It end, typename SV::bvector_type &bv_out)
find all values A IN (C, D, E, F)
Definition: bm.h:69
bool find_eq_with_nulls(const SV &sv, typename SV::value_type value, typename SV::bvector_type &bv_out, typename SV::size_type search_limit=0)
find value (may include NULL indexes)
pre-processor un-defines to avoid global space pollution (internal)
algorithms for sparse_vector scan/seach
size_type BM_VECT_ALIGN gather_idx_ [BSIZE] BM_VECT_ALIGN_ATTR
gather_buffer< sv_g_size > gather_buffer_type
const unsigned id_max
Definition: bmconst.h:43
void remap(const bvector_type &bv_in, bvector_type &bv_out)
Perform remapping (Image function) (based on attached translation table)
void reset_search_range()
reset (disable) search range
bvector_type bv_product_
temp vector
Algorithms for fast aggregation of N bvectors.
void operator=(const sparse_vector_scanner &)=delete
void set_search_range(size_type from, size_type to)
set search boundaries
input set is sorted (ascending order)
Definition: bmconst.h:147
void one_pass_run(const bvector_type &bv_in, const SV &sv_brel, bvector_type &bv_out)
bool prepare_and_sub_aggregator(const SV &sv, typename SV::value_type value)
Prepare aggregator for AND-SUB (EQ) search.
bool find_eq_str(const SV &sv, const typename SV::value_type *str, typename SV::size_type &pos)
find first sparse vector element (string)
Definitions(internal)
void find_zero(const SV &sv, typename SV::bvector_type &bv_out)
find all sparse vector elements EQ to 0
void find_nonzero(const SV &sv, typename SV::bvector_type &bv_out)
Find non-zero elements Output vector is computed as a logical OR (join) of all plains.
bool find_first_eq(const SV &sv, typename SV::value_type value, bm::id_t &idx)
find first value (may include NULL indexes)
value_type BM_VECT_ALIGN buffer_ [BSIZE] BM_VECT_ALIGN_ATTR
bvector_type::allocator_type::allocator_pool_type allocator_pool_type
void find_eq_with_nulls_horizontal(const SV &sv, typename SV::value_type value, typename SV::bvector_type &bv_out)
For testing purposes only.
void run(const bvector_type &bv_in, const SV &sv_brel, bvector_type &bv_out)
Run remap transformation.
unsigned int id_t
Definition: bmconst.h:34
unsigned short bitscan(V w, B *bits)
Definition: bmfunc.h:504
SV::bvector_type bvector_type
void decompress(const SV &sv, typename SV::bvector_type &bv_out)
Rank-Select decompression for RSC vectors.
#define BM_ASSERT
Definition: bmdef.h:116
const bvector_type & get_bv_zero() const
Get read access to zero-elements vector Zero vector gets populated after attach_sv() is called or as ...
const unsigned set_block_shift
Definition: bmconst.h:48
void correct_nulls(const SV &sv, typename SV::bvector_type &bv_out)
Exclude possible NULL values from the result vector.
bvector_type::allocator_type::allocator_pool_type allocator_pool_type
void dynamic_range_clip_high(SV &svect, unsigned high_bit)
Clip dynamic range for signal higher than specified.
input set is sorted and belongs to one address block
Definition: bmconst.h:148
bool have_stats_
flag of statistics presense
bool bfind_eq_str(const SV &sv, const typename SV::value_type *str, typename SV::size_type &pos)
binary find first sparse vector element (string)
Integer set to set transformation (functional image in groups theory) https://en.wikipedia.org/wiki/Image_(mathematics)