BitMagic-C++
bmsparsevec_algo.h
Go to the documentation of this file.
1 #ifndef BMSPARSEVEC_ALGO__H__INCLUDED__
2 #define BMSPARSEVEC_ALGO__H__INCLUDED__
3 /*
4 Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 For more information please visit: http://bitmagic.io
19 */
20 /*! \file bmsparsevec_algo.h
21  \brief Algorithms for bm::sparse_vector
22 */
23 
24 #ifndef BM__H__INCLUDED__
25 // BitMagic utility headers do not include main "bm.h" declaration
26 // #include "bm.h" or "bm64.h" explicitly
27 # error missing include (bm.h or bm64.h)
28 #endif
29 
30 #include "bmdef.h"
31 #include "bmsparsevec.h"
32 #include "bmaggregator.h"
33 #include "bmbuffer.h"
34 #include "bmalgo.h"
35 #include "bmdef.h"
36 
37 #ifdef _MSC_VER
38 # pragma warning( disable: 4146 )
39 #endif
40 
41 
42 
43 /** \defgroup svalgo Sparse vector algorithms
44  Sparse vector algorithms
45  \ingroup svector
46  */
47 
48 
49 namespace bm
50 {
51 
52 
53 /*!
54  \brief Clip dynamic range for signal higher than specified
55 
56  \param svect - sparse vector to do clipping
57  \param high_bit - max bit (inclusive) allowed for this signal vector
58 
59 
60  \ingroup svalgo
61  \sa dynamic_range_clip_low
62 */
63 template<typename SV>
64 void dynamic_range_clip_high(SV& svect, unsigned high_bit)
65 {
66  unsigned sv_plains = svect.plains();
67 
68  BM_ASSERT(sv_plains > high_bit && high_bit > 0);
69 
70  typename SV::bvector_type bv_acc;
71  unsigned i;
72 
73  // combine all the high bits into accumulator vector
74  for (i = high_bit+1; i < sv_plains; ++i)
75  {
76  typename SV::bvector_type* bv_plain = svect.plain(i);
77  if (bv_plain)
78  {
79  bv_acc.bit_or(*bv_plain);
80  svect.free_plain(i);
81  }
82  } // for i
83 
84  // set all bits ON for all low vectors, which happen to be clipped
85  for (i = high_bit; true; --i)
86  {
87  typename SV::bvector_type* bv_plain = svect.get_plain(i);
88  bv_plain->bit_or(bv_acc);
89  if (i == 0)
90  break;
91  } // for i
92 }
93 
94 
95 /*!
96  \brief Clip dynamic range for signal lower than specified (boost)
97 
98  \param svect - sparse vector to do clipping
99  \param low_bit - low bit (inclusive) allowed for this signal vector
100 
101  \ingroup svalgo
102  \sa dynamic_range_clip_high
103 */
104 template<typename SV>
105 void dynamic_range_clip_low(SV& svect, unsigned low_bit)
106 {
107  if (low_bit == 0) return; // nothing to do
108  BM_ASSERT(svect.plains() > low_bit);
109 
110  unsigned sv_plains = svect.plains();
111  typename SV::bvector_type bv_acc1;
112  unsigned i;
113 
114  // combine all the high bits into accumulator vector
115  for (i = low_bit+1; i < sv_plains; ++i)
116  {
117  typename SV::bvector_type* bv_plain = svect.plain(i);
118  if (bv_plain)
119  bv_acc1.bit_or(*bv_plain);
120  } // for i
121 
122  // accumulate all vectors below the clipping point
123  typename SV::bvector_type bv_acc2;
124  typename SV::bvector_type* bv_low_plain = svect.get_plain(low_bit);
125 
126  for (i = low_bit-1; true; --i)
127  {
128  typename SV::bvector_type* bv_plain = svect.plain(i);
129  if (bv_plain)
130  {
131  bv_acc2.bit_or(*bv_plain);
132  svect.free_plain(i);
133  if (i == 0)
134  break;
135  }
136  } // for i
137 
138  // now we want to set 1 in the clipping low plain based on
139  // exclusive or (XOR) between upper and lower parts)
140  // as a result high signal (any bits in the upper plains) gets
141  // slightly lower value (due to clipping) low signal gets amplified
142  // (lower contrast algorithm)
143 
144  bv_acc1.bit_xor(bv_acc2);
145  bv_low_plain->bit_or(bv_acc1);
146 }
147 
148 /**
149  Find first mismatch (element which is different) between two sparse vectors
150  (uses linear scan in bit-vector plains)
151 
152  Function works with both NULL and NOT NULL vectors
153  NULL means unassigned (uncertainty), so first mismatch NULL is a mismatch
154  even if values in vectors can be formally the same (i.e. 0)
155 
156  @param sv1 - vector 1
157  @param sv2 - vector 2
158  @param midx - mismatch index
159  @param null_proc - defines if we want to include (not) NULL
160  vector into comparison (bm::use_null) or not.
161  By default search takes NULL vector into account
162 
163  @return true if mismatch found
164 
165  @sa sparse_vector_find_mismatch
166 
167  \ingroup svalgo
168 */
169 template<typename SV>
171  const SV& sv2,
172  typename SV::size_type& midx,
173  bm::null_support null_proc = bm::use_null)
174 {
175  typename SV::size_type mismatch = bm::id_max;
176  bool found = false;
177 
178  unsigned sv_idx = 0;
179 
180  unsigned plains1 = sv1.plains();
181  BM_ASSERT(plains1);
182 
183  // for RSC vector do NOT compare NULL plains
184 
186  {
187  //--plains1;
188  }
189  else // regular sparse vector - may have NULL plains
190  {
191  if (null_proc == bm::use_null)
192  {
193  typename SV::bvector_type_const_ptr bv_null1 = sv1.get_null_bvector();
194  typename SV::bvector_type_const_ptr bv_null2 = sv2.get_null_bvector();
195  if (bv_null1 && bv_null2) // both (not) NULL vectors present
196  {
197  bool f = bv_null1->find_first_mismatch(*bv_null2, midx, mismatch);
198  if (f && (midx < mismatch)) // better mismatch found
199  {
200  found = f; mismatch = midx;
201  }
202 
203  }
204  else // one or both NULL vectors are not present
205  {
206  if (bv_null1)
207  {
208  typename SV::bvector_type bv_tmp; // TODO: get rid of temp bv
209  bv_tmp.resize(sv2.size());
210  bv_tmp.invert(); // turn into true NULL vector
211 
212  // find first NULL value (mismatch)
213  bool f = bv_null1->find_first_mismatch(bv_tmp, midx, mismatch);
214  if (f && (midx < mismatch)) // better mismatch found
215  {
216  found = f; mismatch = midx;
217  }
218  }
219  if (bv_null2)
220  {
221  typename SV::bvector_type bv_tmp; // TODO: get rid of temp bv
222  bv_tmp.resize(sv1.size());
223  bv_tmp.invert();
224 
225  bool f = bv_null2->find_first_mismatch(bv_tmp, midx, mismatch);
226  if (f && (midx < mismatch)) // better mismatch found
227  {
228  found = f; mismatch = midx;
229  }
230  }
231  }
232  } // null_proc
233  }
234 
235  for (unsigned i = 0; mismatch && (i < plains1); ++i)
236  {
237  typename SV::bvector_type_const_ptr bv1 = sv1.get_plain(i);
238  typename SV::bvector_type_const_ptr bv2 = sv2.get_plain(i);
239  if (!bv1)
240  {
241  if (!bv2)
242  continue;
243  bool f = bv2->find(midx);
244  if (f && (midx < mismatch))
245  {
246  found = f; sv_idx = 2; mismatch = midx;
247  }
248  continue;
249  }
250  if (!bv2)
251  {
252  BM_ASSERT(bv1);
253  bool f = bv1->find(midx);
254  if (f && (midx < mismatch))
255  {
256  found = f; sv_idx = 1; mismatch = midx;
257  }
258  continue;
259  }
260  // both plains are not NULL
261  //
262  bool f = bv1->find_first_mismatch(*bv2, midx, mismatch);
263  if (f && (midx < mismatch)) // better mismatch found
264  {
265  found = f; mismatch = midx;
266  // which vector has '1' at mismatch position?
268  sv_idx = (bv1->test(mismatch)) ? 1 : 2;
269  }
270 
271  } // for i
272 
273  // RSC address translation here
274  //
276  {
277  if (found) // RSC address translation
278  {
279  BM_ASSERT(sv1.is_compressed());
280  BM_ASSERT(sv2.is_compressed());
281 
282  switch (sv_idx)
283  {
284  case 1:
285  found = sv1.find_rank(midx + 1, mismatch);
286  break;
287  case 2:
288  found = sv2.find_rank(midx + 1, mismatch);
289  break;
290  default: // unknown, try both
291  BM_ASSERT(0);
292  }
293  BM_ASSERT(found);
294  }
295  else // search for mismatch in the NOT NULL vectors
296  {
297  if (null_proc == bm::use_null)
298  {
299  // no need for address translation in this case
300  typename SV::bvector_type_const_ptr bv_null1 = sv1.get_null_bvector();
301  typename SV::bvector_type_const_ptr bv_null2 = sv2.get_null_bvector();
302  found = bv_null1->find_first_mismatch(*bv_null2, mismatch);
303  }
304  }
305  }
306 
307  midx = mismatch; // minimal mismatch
308  return found;
309 }
310 
311 /**
312  Find mismatch vector, indicating positions of mismatch between two sparse vectors
313  (uses linear scan in bit-vector plains)
314 
315  Function works with both NULL and NOT NULL vectors
316 
317  @param bv - [out] - bit-ector with mismatch positions indicated as 1s
318  @param sv1 - vector 1
319  @param sv2 - vector 2
320  @param null_proc - rules of processing for (not) NULL plain
321  bm::no_null - NULLs from both vectors are treated as uncertainty
322  and NOT included into final result
323  bm::use_null - difference in NULLs accounted into the result
324 
325  @sa sparse_vector_find_first_mismatch
326 
327  \ingroup svalgo
328 */
329 template<typename SV1, typename SV2>
331  const SV1& sv1,
332  const SV2& sv2,
333  bm::null_support null_proc)
334 {
335  typedef typename SV1::bvector_type bvector_type;
336  typedef typename bvector_type::allocator_type allocator_type;
337  typedef typename allocator_type::allocator_pool_type allocator_pool_type;
338 
339  allocator_pool_type pool; // local pool for blocks
340  typename bvector_type::mem_pool_guard mp_guard_bv;
341  mp_guard_bv.assign_if_not_set(pool, bv);
342 
343 
345  {
346  BM_ASSERT(0); // TODO: fixme
347  }
349  {
350  BM_ASSERT(0); // TODO: fixme
351  }
352 
353  bv.clear();
354 
355  unsigned plains = sv1.plains();
356  if (plains < sv2.plains())
357  plains = sv2.plains();
358 
359  for (unsigned i = 0; i < plains; ++i)
360  {
361  typename SV1::bvector_type_const_ptr bv1 = sv1.get_plain(i);
362  typename SV2::bvector_type_const_ptr bv2 = sv2.get_plain(i);
363 
364  if (!bv1)
365  {
366  if (!bv2)
367  continue;
368  bv |= *bv2;
369  continue;
370  }
371  if (!bv2)
372  {
373  BM_ASSERT(bv1);
374  bv |= *bv1;
375  continue;
376  }
377 
378  // both plains are not NULL, compute XOR diff
379  //
380  bvector_type bv_xor;
381  typename bvector_type::mem_pool_guard mp_guard;
382  mp_guard.assign_if_not_set(pool, bv_xor);
383 
384  bv_xor.bit_xor(*bv1, *bv2, SV1::bvector_type::opt_none);
385  bv |= bv_xor;
386 
387  } // for i
388 
389  // size mismatch check
390  {
391  typename SV1::size_type sz1 = sv1.size();
392  typename SV2::size_type sz2 = sv2.size();
393  if (sz1 != sz2)
394  {
395  if (sz1 < sz2)
396  {
397  }
398  else
399  {
400  bm::xor_swap(sz1, sz2);
401  }
402  bv.set_range(sz1, sz2-1);
403  }
404  }
405 
406  // NULL processings
407  //
408  typename SV1::bvector_type_const_ptr bv_null1 = sv1.get_null_bvector();
409  typename SV2::bvector_type_const_ptr bv_null2 = sv2.get_null_bvector();
410 
411  switch (null_proc)
412  {
413  case bm::no_null:
414  // NULL correction to exclude all NULL (unknown) values from the result set
415  // (AND with NOT NULL vector)
416  if (bv_null1 && bv_null2)
417  {
418  bvector_type bv_or;
419  typename bvector_type::mem_pool_guard mp_guard;
420  mp_guard.assign_if_not_set(pool, bv_or);
421 
422  bv_or.bit_or(*bv_null1, *bv_null2, bvector_type::opt_none);
423  bv &= bv_or;
424  }
425  else
426  {
427  if (bv_null1)
428  bv &= *bv_null1;
429  if (bv_null2)
430  bv &= *bv_null2;
431  }
432  break;
433  case bm::use_null:
434  if (bv_null1 && bv_null2)
435  {
436  bvector_type bv_xor;
437  typename bvector_type::mem_pool_guard mp_guard;
438  mp_guard.assign_if_not_set(pool, bv_xor);
439 
440  bv_xor.bit_xor(*bv_null1, *bv_null2, bvector_type::opt_none);
441  bv |= bv_xor;
442  }
443  else
444  {
445  bvector_type bv_null;
446  typename bvector_type::mem_pool_guard mp_guard;
447  mp_guard.assign_if_not_set(pool, bv_null);
448  if (bv_null1)
449  {
450  bv_null = *bv_null1;
451  bv_null.resize(sv1.size());
452  }
453  if (bv_null2)
454  {
455  bv_null = *bv_null2;
456  bv_null.resize(sv2.size());
457  }
458  bv_null.invert();
459  bv |= bv_null;
460  }
461  break;
462  default:
463  BM_ASSERT(0);
464  }
465 }
466 
467 
468 /**
469  \brief algorithms for sparse_vector scan/search
470 
471  Scanner uses properties of bit-vector plains to answer questions
472  like "find all sparse vector elements equivalent to XYZ".
473 
474  Class uses fast algorithms based on properties of bit-plains.
475  This is NOT a brute force, direct scan.
476 
477  @ingroup svalgo
478  @ingroup setalgo
479 */
480 template<typename SV>
482 {
483 public:
484  typedef typename SV::bvector_type bvector_type;
487  typedef typename SV::value_type value_type;
488  typedef typename SV::size_type size_type;
489 
491  typedef typename allocator_type::allocator_pool_type allocator_pool_type;
492 
493 public:
495 
496  /**
497  \brief bind sparse vector for all searches
498 
499  \param sv - input sparse vector to bind for searches
500  \param sorted - source index is sorted, build index for binary search
501  */
502  void bind(const SV& sv, bool sorted);
503 
504  /**
505  \brief reset sparse vector binding
506  */
507  void reset_binding() BMNOEXCEPT;
508 
509  /**
510  \brief find all sparse vector elements EQ to search value
511 
512  Find all sparse vector elements equivalent to specified value
513 
514  \param sv - input sparse vector
515  \param value - value to search for
516  \param bv_out - output bit-vector (search result masks 1 elements)
517  */
518  void find_eq(const SV& sv,
519  typename SV::value_type value,
520  typename SV::bvector_type& bv_out);
521 
522  /**
523  \brief find first sparse vector element
524 
525  Find all sparse vector elements equivalent to specified value.
526  Works well if sperse vector represents unordered set
527 
528  \param sv - input sparse vector
529  \param value - value to search for
530  \param pos - output found sparse vector element index
531 
532  \return true if found
533  */
534  bool find_eq(const SV& sv,
535  typename SV::value_type value,
536  typename SV::size_type& pos);
537 
538  /**
539  \brief find first sparse vector element (string)
540  */
541  bool find_eq_str(const SV& sv,
542  const typename SV::value_type* str,
543  typename SV::size_type& pos);
544 
545  /**
546  \brief binary find first sparse vector element (string)
547  Sparse vector must be attached (bind())
548  @sa bind
549  */
550  bool find_eq_str(const typename SV::value_type* str,
551  typename SV::size_type& pos);
552 
553  /**
554  \brief binary find first sparse vector element (string)
555  Sparse vector must be sorted.
556  */
557  bool bfind_eq_str(const SV& sv,
558  const typename SV::value_type* str,
559  typename SV::size_type& pos);
560 
561  /**
562  \brief lower bound search for an array position
563 
564  Method assumes the sparse array is sorted
565 
566  \param sv - input sparse vector
567  \param val - value to search for
568  \param pos - output sparse vector element index
569 
570  \return true if value found
571  */
572  bool lower_bound(const SV& sv,
573  const typename SV::value_type val,
574  typename SV::size_type& pos);
575  /**
576  \brief lower bound search for an array position
577 
578  Method assumes the sparse array is sorted
579 
580  \param sv - input sparse vector
581  \param str - value to search for
582  \param pos - output sparse vector element index
583 
584  \return true if value found
585  */
586  bool lower_bound_str(const SV& sv,
587  const typename SV::value_type* str,
588  typename SV::size_type& pos);
589 
590  /**
591  \brief binary find first sparse vector element (string)
592  Sparse vector must be sorted and attached
593  @sa bind
594  */
595  bool bfind_eq_str(const typename SV::value_type* str,
596  typename SV::size_type& pos);
597 
598  /**
599  \brief find all sparse vector elements EQ to 0
600  \param sv - input sparse vector
601  \param bv_out - output bit-vector (search result masks 1 elements)
602  */
603  void find_zero(const SV& sv,
604  typename SV::bvector_type& bv_out);
605 
606  /*!
607  \brief Find non-zero elements
608  Output vector is computed as a logical OR (join) of all plains
609 
610  \param sv - input sparse vector
611  \param bv_out - output bit-bector of non-zero elements
612  */
613  void find_nonzero(const SV& sv, typename SV::bvector_type& bv_out);
614 
615  /**
616  \brief invert search result ("EQ" to "not EQ")
617 
618  \param sv - input sparse vector
619  \param bv_out - output bit-bector of non-zero elements
620  */
621  void invert(const SV& sv, typename SV::bvector_type& bv_out);
622 
623  /**
624  \brief find all values A IN (C, D, E, F)
625  \param sv - input sparse vector
626  \param start - start iterator (set to search)
627  \param end - end iterator (set to search)
628  \param bv_out - output bit-bector of non-zero elements
629  */
630  template<typename It>
631  void find_eq(const SV& sv,
632  It start,
633  It end,
634  typename SV::bvector_type& bv_out)
635  {
636  typename bvector_type::mem_pool_guard mp_guard;
637  mp_guard.assign_if_not_set(pool_, bv_out); // set algorithm-local memory pool to avoid heap contention
638 
639  bvector_type bv1;
640  typename bvector_type::mem_pool_guard mp_guard1(pool_, bv1);
641  bool any_zero = false;
642  for (; start < end; ++start)
643  {
644  value_type v = *start;
645  any_zero |= (v == 0);
646  bool found = find_eq_with_nulls(sv, v, bv1);
647  if (found)
648  bv_out.bit_or(bv1);
649  else
650  {
651  BM_ASSERT(!bv1.any());
652  }
653  } // for
654  if (any_zero)
655  correct_nulls(sv, bv_out);
656  }
657 
658  /// For testing purposes only
659  ///
660  /// @internal
661  void find_eq_with_nulls_horizontal(const SV& sv,
662  typename SV::value_type value,
663  typename SV::bvector_type& bv_out);
664 
665  /** Exclude possible NULL values from the result vector
666  \param sv - input sparse vector
667  \param bv_out - output bit-bector of non-zero elements
668  */
669  void correct_nulls(const SV& sv, typename SV::bvector_type& bv_out);
670 
671 protected:
672 
673  /// set search boundaries (hint for the aggregator)
674  void set_search_range(size_type from, size_type to);
675 
676  /// reset (disable) search range
677  void reset_search_range();
678 
679  /// find value (may include NULL indexes)
680  bool find_eq_with_nulls(const SV& sv,
681  typename SV::value_type value,
682  typename SV::bvector_type& bv_out,
683  typename SV::size_type search_limit = 0);
684 
685  /// find first value (may include NULL indexes)
686  bool find_first_eq(const SV& sv,
687  typename SV::value_type value,
688  size_type& idx);
689 
690  /// find first string value (may include NULL indexes)
691  bool find_first_eq(const SV& sv,
692  const typename SV::value_type* str,
693  size_type& idx,
694  bool remaped);
695 
696 
697  /// Prepare aggregator for AND-SUB (EQ) search
698  bool prepare_and_sub_aggregator(const SV& sv,
699  typename SV::value_type value);
700 
701  /// Prepare aggregator for AND-SUB (EQ) search (string)
702  bool prepare_and_sub_aggregator(const SV& sv,
703  const typename SV::value_type* str,
704  unsigned octet_start);
705 
706  /// Rank-Select decompression for RSC vectors
707  void decompress(const SV& sv, typename SV::bvector_type& bv_out);
708 
709  /// compare sv[idx] with input str
710  int compare_str(const SV& sv, size_type idx, const value_type* str);
711 
712  /// compare sv[idx] with input value
713  int compare(const SV& sv, size_type idx, const value_type val) BMNOEXCEPT;
714 
715 protected:
717  void operator=(const sparse_vector_scanner&) = delete;
718 
719 protected:
720 
722  {
723  max_columns = SV::max_vector_size
724  };
725 
727  {
730  };
731 
732  typedef bm::dynamic_heap_matrix<value_type, allocator_type> heap_matrix_type;
733  typedef bm::heap_matrix<typename SV::value_type,
735  SV::sv_octet_plains,
737 
738 
739 private:
740  allocator_pool_type pool_;
741  bvector_type bv_tmp_;
744 
745  size_type mask_from_;
746  size_type mask_to_;
747  bool mask_set_;
748 
749  const SV* bound_sv_;
750  heap_matrix_type block0_elements_cache_; ///< cache for elements[0] of each block
751  heap_matrix_type block3_elements_cache_; ///< cache for elements[16384x] of each block
752  size_type effective_str_max_;
753 
754  value_type remap_value_vect_[SV::max_vector_size];
755  /// masks of allocated bit-plains (1 - means there is a bit-plain)
756  bm::id64_t vector_plain_masks_[SV::max_vector_size];
757  matrix_search_buf_type hmatr_; ///< heap matrix for string search linear stage
758 };
759 
760 
761 /*!
762  \brief Integer set to set transformation (functional image in groups theory)
763  https://en.wikipedia.org/wiki/Image_(mathematics)
764 
765  Input sets gets translated through the function, which is defined as
766  "one to one (or NULL)" binary relation object (sparse_vector).
767  Also works for M:1 relationships.
768 
769  \ingroup svalgo
770  \ingroup setalgo
771 */
772 template<typename SV>
774 {
775 public:
776  typedef typename SV::bvector_type bvector_type;
777  typedef typename SV::value_type value_type;
778  typedef typename SV::size_type size_type;
780 public:
781 
784 
785 
786  /** Get read access to zero-elements vector
787  Zero vector gets populated after attach_sv() is called
788  or as a side-effect of remap() with immediate sv param
789  */
790  const bvector_type& get_bv_zero() const { return bv_zero_; }
791 
792  /** Perform remapping (Image function) (based on attached translation table)
793 
794  \param bv_in - input set, defined as a bit-vector
795  \param bv_out - output set as a bit-vector
796 
797  @sa attach_sv
798  */
799  void remap(const bvector_type& bv_in,
800  bvector_type& bv_out);
801 
802  /** Perform remapping (Image function)
803 
804  \param bv_in - input set, defined as a bit-vector
805  \param sv_brel - binary relation (translation table) sparse vector
806  \param bv_out - output set as a bit-vector
807  */
808  void remap(const bvector_type& bv_in,
809  const SV& sv_brel,
810  bvector_type& bv_out);
811 
812  /** Remap single set element
813 
814  \param id_from - input value
815  \param sv_brel - translation table sparse vector
816  \param id_to - out value
817 
818  \return - true if value was found and remapped
819  */
820  bool remap(size_type id_from, const SV& sv_brel, size_type& id_to);
821 
822 
823  /** Run remap transformation
824 
825  \param bv_in - input set, defined as a bit-vector
826  \param sv_brel - translation table sparse vector
827  \param bv_out - output set as a bit-vector
828 
829  @sa remap
830  */
831  void run(const bvector_type& bv_in,
832  const SV& sv_brel,
833  bvector_type& bv_out)
834  {
835  remap(bv_in, sv_brel, bv_out);
836  }
837 
838  /** Attach a translation table vector for remapping (Image function)
839 
840  \param sv_brel - binary relation sparse vector pointer
841  (pass NULL to detach)
842  \param compute_stats - flag to perform computation of some statistics
843  later used in remapping. This only make sense
844  for series of remappings on the same translation
845  vector.
846  @sa remap
847  */
848  void attach_sv(const SV* sv_brel, bool compute_stats = false);
849 
850 
851 protected:
852  void one_pass_run(const bvector_type& bv_in,
853  const SV& sv_brel,
854  bvector_type& bv_out);
855 
856  /// @internal
857  template<unsigned BSIZE>
859  {
862  };
863 
865  {
866  sv_g_size = 1024 * 8
867  };
869 
870 
871 protected:
873  void operator=(const set2set_11_transform&) = delete;
874 
875 protected:
876  const SV* sv_ptr_; ///< current translation table vector
877  gather_buffer_type* gb_; ///< intermediate buffers
878  bvector_type bv_product_;///< temp vector
879 
880  bool have_stats_; ///< flag of statistics presense
881  bvector_type bv_zero_; ///< bit-vector for zero elements
882 
884 };
885 
886 
887 
888 //----------------------------------------------------------------------------
889 //
890 //----------------------------------------------------------------------------
891 
892 template<typename SV>
894 : sv_ptr_(0), gb_(0), have_stats_(false)
895 {
896  gb_ = (gather_buffer_type*)::malloc(sizeof(gather_buffer_type));
897  if (!gb_)
898  {
899  SV::throw_bad_alloc();
900  }
901 }
902 
903 //----------------------------------------------------------------------------
904 
905 template<typename SV>
907 {
908  if (gb_)
909  ::free(gb_);
910 }
911 
912 
913 //----------------------------------------------------------------------------
914 
915 template<typename SV>
916 void set2set_11_transform<SV>::attach_sv(const SV* sv_brel, bool compute_stats)
917 {
918  sv_ptr_ = sv_brel;
919  if (!sv_ptr_)
920  {
921  have_stats_ = false;
922  }
923  else
924  {
925  if (sv_brel->empty() || !compute_stats)
926  return; // nothing to do
927  const bvector_type* bv_non_null = sv_brel->get_null_bvector();
928  if (bv_non_null)
929  return; // already have 0 elements vector
930 
931 
932  typename bvector_type::mem_pool_guard mp_g_z;
933  mp_g_z.assign_if_not_set(pool_, bv_zero_);
934 
936  scanner.find_zero(*sv_brel, bv_zero_);
937  have_stats_ = true;
938  }
939 }
940 
941 //----------------------------------------------------------------------------
942 
943 template<typename SV>
945  const SV& sv_brel,
946  size_type& id_to)
947 {
948  if (sv_brel.empty())
949  return false; // nothing to do
950 
951  const bvector_type* bv_non_null = sv_brel.get_null_bvector();
952  if (bv_non_null)
953  {
954  if (!bv_non_null->test(id_from))
955  return false;
956  }
957  size_type idx = sv_brel.translate_address(id_from);
958  id_to = sv_brel.get(idx);
959  return true;
960 }
961 
962 //----------------------------------------------------------------------------
963 
964 template<typename SV>
966  const SV& sv_brel,
967  bvector_type& bv_out)
968 {
969  typename bvector_type::mem_pool_guard mp_g_out, mp_g_p, mp_g_z;
970  mp_g_out.assign_if_not_set(pool_, bv_out);
971  mp_g_p.assign_if_not_set(pool_, bv_product_);
972  mp_g_z.assign_if_not_set(pool_, bv_zero_);
973 
974  attach_sv(&sv_brel);
975 
976  remap(bv_in, bv_out);
977 
978  attach_sv(0); // detach translation table
979 }
980 
981 template<typename SV>
983  bvector_type& bv_out)
984 {
985  BM_ASSERT(sv_ptr_);
986 
987  bv_out.clear();
988 
989  if (sv_ptr_ == 0 || sv_ptr_->empty())
990  return; // nothing to do
991 
992  bv_out.init(); // just in case to "fast set" later
993 
994  typename bvector_type::mem_pool_guard mp_g_out, mp_g_p, mp_g_z;
995  mp_g_out.assign_if_not_set(pool_, bv_out);
996  mp_g_p.assign_if_not_set(pool_, bv_product_);
997  mp_g_z.assign_if_not_set(pool_, bv_zero_);
998 
999 
1000  const bvector_type* enum_bv;
1001 
1002  const bvector_type * bv_non_null = sv_ptr_->get_null_bvector();
1003  if (bv_non_null)
1004  {
1005  // TODO: optimize with 2-way ops
1006  bv_product_ = bv_in;
1007  bv_product_.bit_and(*bv_non_null);
1008  enum_bv = &bv_product_;
1009  }
1010  else // non-NULL vector is not available
1011  {
1012  enum_bv = &bv_in;
1013  // if we have any elements mapping into "0" on the other end
1014  // we map it once (chances are there are many duplicates)
1015  //
1016 
1017  if (have_stats_) // pre-attached translation statistics
1018  {
1019  bv_product_ = bv_in;
1020  size_type cnt1 = bv_product_.count();
1021  bv_product_.bit_sub(bv_zero_);
1022  size_type cnt2 = bv_product_.count();
1023 
1024  BM_ASSERT(cnt2 <= cnt1);
1025 
1026  if (cnt1 != cnt2) // mapping included 0 elements
1027  bv_out.set_bit_no_check(0);
1028 
1029  enum_bv = &bv_product_;
1030  }
1031  }
1032 
1033 
1034 
1035  size_type buf_cnt, nb_old, nb;
1036  buf_cnt = nb_old = 0;
1037 
1038  typename bvector_type::enumerator en(enum_bv->first());
1039  for (; en.valid(); ++en)
1040  {
1041  typename SV::size_type idx = *en;
1042  idx = sv_ptr_->translate_address(idx);
1043 
1044  nb = unsigned(idx >> bm::set_block_shift);
1045  if (nb != nb_old) // new blocks
1046  {
1047  if (buf_cnt)
1048  {
1049  sv_ptr_->gather(&gb_->buffer_[0], &gb_->gather_idx_[0], buf_cnt, BM_SORTED_UNIFORM);
1050  bv_out.set(&gb_->buffer_[0], buf_cnt, BM_SORTED);
1051  buf_cnt = 0;
1052  }
1053  nb_old = nb;
1054  gb_->gather_idx_[buf_cnt++] = idx;
1055  }
1056  else
1057  {
1058  gb_->gather_idx_[buf_cnt++] = idx;
1059  }
1060 
1061  if (buf_cnt == sv_g_size)
1062  {
1063  sv_ptr_->gather(&gb_->buffer_[0], &gb_->gather_idx_[0], buf_cnt, BM_SORTED_UNIFORM);
1064  bv_out.set(&gb_->buffer_[0], buf_cnt, bm::BM_SORTED);
1065  buf_cnt = 0;
1066  }
1067  } // for en
1068  if (buf_cnt)
1069  {
1070  sv_ptr_->gather(&gb_->buffer_[0], &gb_->gather_idx_[0], buf_cnt, BM_SORTED_UNIFORM);
1071  bv_out.set(&gb_->buffer_[0], buf_cnt, bm::BM_SORTED);
1072  }
1073 }
1074 
1075 
1076 //----------------------------------------------------------------------------
1077 
1078 template<typename SV>
1080  const SV& sv_brel,
1081  bvector_type& bv_out)
1082 {
1083  if (sv_brel.empty())
1084  return; // nothing to do
1085 
1086  bv_out.init();
1087 
1088  typename SV::const_iterator it = sv_brel.begin();
1089  for (; it.valid(); ++it)
1090  {
1091  typename SV::value_type t_id = *it;
1092  size_type idx = it.pos();
1093  if (bv_in.test(idx))
1094  {
1095  bv_out.set_bit_no_check(t_id);
1096  }
1097  } // for
1098 }
1099 
1100 
1101 //----------------------------------------------------------------------------
1102 //
1103 //----------------------------------------------------------------------------
1104 
1105 template<typename SV>
1107 {
1108  mask_set_ = false;
1109  mask_from_ = mask_to_ = bm::id_max;
1110 
1111  bound_sv_ = 0;
1112  effective_str_max_ = 0;
1113 }
1114 
1115 //----------------------------------------------------------------------------
1116 
1117 template<typename SV>
1118 void sparse_vector_scanner<SV>::bind(const SV& sv, bool sorted)
1119 {
1120  bound_sv_ = &sv;
1121  if (sorted)
1122  {
1123  size_type sv_sz = sv.size();
1124  BM_ASSERT(sv_sz);
1125  size_type total_nb = sv_sz / bm::gap_max_bits + 1;
1126  effective_str_max_ = sv.effective_vector_max();
1127 
1128  block0_elements_cache_.resize(total_nb, effective_str_max_+1);
1129  block0_elements_cache_.set_zero();
1130 
1131  block3_elements_cache_.resize(total_nb * 3, effective_str_max_+1);
1132  block3_elements_cache_.set_zero();
1133 
1134  // fill in elements cache
1135  for (size_type i = 0; i < sv_sz; i+= bm::gap_max_bits)
1136  {
1137  size_type nb = (i >> bm::set_block_shift);
1138  value_type* s0 = block0_elements_cache_.row(nb);
1139  sv.get(i, s0, size_type(block0_elements_cache_.cols()));
1140 
1141  for (size_type k = 0; k < 3; ++k)
1142  {
1143  value_type* s1 = block3_elements_cache_.row(nb * 3 + k);
1144  size_type idx = i + (k+1) * bm::sub_block3_size;
1145  sv.get(idx, s1, size_type(block3_elements_cache_.cols()));
1146  } // for k
1147  } // for i
1148  }
1149  // pre-calculate vector plain masks
1150  //
1151  for (unsigned i = 0; i < SV::max_vector_size; ++i)
1152  {
1153  vector_plain_masks_[i] = sv.get_plains_mask(i);
1154  } // for i
1155 
1156 }
1157 
1158 //----------------------------------------------------------------------------
1159 
1160 template<typename SV>
1162 {
1163  bound_sv_ = 0;
1164  effective_str_max_ = 0;
1165 }
1166 
1167 //----------------------------------------------------------------------------
1168 
1169 template<typename SV>
1171  typename SV::bvector_type& bv_out)
1172 {
1173  if (sv.size() == 0)
1174  {
1175  bv_out.clear();
1176  return;
1177  }
1178  find_nonzero(sv, bv_out);
1179  if (sv.is_compressed())
1180  {
1181  bv_out.invert();
1182  bv_out.set_range(sv.effective_size(), bm::id_max - 1, false);
1183  decompress(sv, bv_out);
1184  }
1185  else
1186  {
1187  invert(sv, bv_out);
1188  }
1189  correct_nulls(sv, bv_out);
1190 }
1191 
1192 //----------------------------------------------------------------------------
1193 
1194 template<typename SV>
1195 void sparse_vector_scanner<SV>::invert(const SV& sv, typename SV::bvector_type& bv_out)
1196 {
1197  if (sv.size() == 0)
1198  {
1199  bv_out.clear();
1200  return;
1201  }
1202  // TODO: find a better algorithm (NAND?)
1203  bv_out.invert();
1204  const bvector_type* bv_null = sv.get_null_bvector();
1205  if (bv_null) // correct result to only use not NULL elements
1206  bv_out &= *bv_null;
1207  else
1208  {
1209  // TODO: use the shorter range to clear the tail
1210  bv_out.set_range(sv.size(), bm::id_max - 1, false);
1211  }
1212 }
1213 
1214 //----------------------------------------------------------------------------
1215 
1216 template<typename SV>
1218  typename SV::bvector_type& bv_out)
1219 {
1220  const bvector_type* bv_null = sv.get_null_bvector();
1221  if (bv_null) // correct result to only use not NULL elements
1222  bv_out.bit_and(*bv_null);
1223 }
1224 
1225 //----------------------------------------------------------------------------
1226 
1227 template<typename SV>
1229  typename SV::value_type value,
1230  typename SV::bvector_type& bv_out,
1231  typename SV::size_type search_limit)
1232 {
1233  if (sv.empty())
1234  return false; // nothing to do
1235 
1236  if (!value)
1237  {
1238  find_zero(sv, bv_out);
1239  return bv_out.any();
1240  }
1241  agg_.reset();
1242 
1243  bool found = prepare_and_sub_aggregator(sv, value);
1244  if (!found)
1245  {
1246  bv_out.clear();
1247  return found;
1248  }
1249 
1250  bool any = (search_limit == 1);
1251  found = agg_.combine_and_sub(bv_out, any);
1252  agg_.reset();
1253  return found;
1254 }
1255 
1256 //----------------------------------------------------------------------------
1257 
1258 template<typename SV>
1260  typename SV::value_type value,
1261  size_type& idx)
1262 {
1263  if (sv.empty())
1264  return false; // nothing to do
1265 
1266  BM_ASSERT(value); // cannot handle zero values
1267  if (!value)
1268  return false;
1269 
1270  agg_.reset();
1271  bool found = prepare_and_sub_aggregator(sv, value);
1272  if (!found)
1273  return found;
1274  found = agg_.find_first_and_sub(idx);
1275  agg_.reset();
1276  return found;
1277 }
1278 
1279 
1280 //----------------------------------------------------------------------------
1281 
1282 template<typename SV>
1284  const typename SV::value_type* str,
1285  size_type& idx,
1286  bool remaped)
1287 {
1288  if (sv.empty())
1289  return false; // nothing to do
1290  BM_ASSERT(*str);
1291 
1292  if (!*str)
1293  return false;
1294 
1295  agg_.reset();
1296  unsigned common_prefix_len = 0;
1297 
1298  if (mask_set_)
1299  {
1300  agg_.set_range_hint(mask_from_, mask_to_);
1301  common_prefix_len = sv.common_prefix_length(mask_from_, mask_to_);
1302  }
1303 
1304  if (remaped)
1305  {
1306  str = remap_value_vect_;
1307  }
1308  else
1309  {
1310  if (sv.is_remap() && str != remap_value_vect_)
1311  {
1312  bool r = sv.remap_tosv(remap_value_vect_, SV::max_vector_size, str);
1313  if (!r)
1314  return r;
1315  str = remap_value_vect_;
1316  }
1317  }
1318 
1319  bool found = prepare_and_sub_aggregator(sv, str, common_prefix_len);
1320  if (!found)
1321  return found;
1322 
1323  found = agg_.find_first_and_sub(idx);
1324  agg_.reset();
1325  return found;
1326 }
1327 
1328 //----------------------------------------------------------------------------
1329 
1330 template<typename SV>
1332  const typename SV::value_type* str,
1333  unsigned octet_start)
1334 {
1335  unsigned char bits[64];
1336 
1337  int len = 0;
1338  for (; str[len] != 0; ++len)
1339  {}
1340  BM_ASSERT(len);
1341 
1342  // use reverse order (faster for sorted arrays)
1343  for (int octet_idx = len-1; octet_idx >= 0; --octet_idx)
1344  {
1345  if (unsigned(octet_idx) < octet_start) // common prefix
1346  break;
1347 
1348  unsigned value = unsigned(str[octet_idx]) & 0xFF;
1349  BM_ASSERT(value != 0);
1350 
1351  bm::id64_t plains_mask;
1352  if (&sv == bound_sv_)
1353  plains_mask = vector_plain_masks_[octet_idx];
1354  else
1355  plains_mask = sv.get_plains_mask(unsigned(octet_idx));
1356 
1357  if ((value & plains_mask) != value) // pre-screen for impossible values
1358  return false; // found non-existing plain
1359 
1360  // prep the lists for combined AND-SUB aggregator
1361  //
1362  unsigned short bit_count_v = bm::bitscan(value, bits);
1363  for (unsigned i = 0; i < bit_count_v; ++i)
1364  {
1365  unsigned bit_idx = bits[i];
1366  BM_ASSERT(value & (value_type(1) << bit_idx));
1367  unsigned plain_idx = (unsigned(octet_idx) * 8) + bit_idx;
1368  const bvector_type* bv = sv.get_plain(plain_idx);
1369  agg_.add(bv);
1370  } // for i
1371 
1372  unsigned vinv = unsigned(value);
1373  if (bm::conditional<sizeof(value_type) == 1>::test())
1374  {
1375  vinv ^= 0xFF;
1376  }
1377  else // 2-byte char
1378  {
1379  BM_ASSERT(sizeof(value_type) == 2);
1380  vinv ^= 0xFFFF;
1381  }
1382  // exclude the octet bits which are all not set (no vectors)
1383  vinv &= unsigned(plains_mask);
1384  for (unsigned octet_plain = (unsigned(octet_idx) * 8); vinv; vinv &= vinv-1)
1385  {
1386  bm::id_t t = vinv & -vinv;
1387  unsigned bit_idx = bm::word_bitcount(t - 1);
1388  unsigned plain_idx = octet_plain + bit_idx;
1389  const bvector_type* bv = sv.get_plain(plain_idx);
1390  BM_ASSERT(bv);
1391  agg_.add(bv, 1); // agg to SUB group
1392  } // for
1393  } // for octet_idx
1394 
1395  // add all vectors above string len to the SUB operation group
1396  //
1397  unsigned plain_idx = unsigned(len * 8) + 1;
1398  typename SV::size_type plains;
1399  if (&sv == bound_sv_)
1400  plains = effective_str_max_ * unsigned(sizeof(value_type)) * 8;
1401  else
1402  plains = sv.plains();
1403  for (; plain_idx < plains; ++plain_idx)
1404  {
1405  bvector_type_const_ptr bv = sv.get_plain(plain_idx);
1406  if (bv)
1407  agg_.add(bv, 1); // agg to SUB group
1408  } // for
1409  return true;
1410 }
1411 
1412 //----------------------------------------------------------------------------
1413 
1414 template<typename SV>
1416  typename SV::value_type value)
1417 {
1418  unsigned char bits[sizeof(value) * 8];
1419  unsigned short bit_count_v = bm::bitscan(value, bits);
1420  BM_ASSERT(bit_count_v);
1421  const value_type mask1 = 1;
1422 
1423  // prep the lists for combined AND-SUB aggregator
1424  // (backward order has better chance for bit reduction on AND)
1425  //
1426  for (unsigned i = bit_count_v; i > 0; --i)
1427  {
1428  unsigned bit_idx = bits[i-1];
1429  BM_ASSERT(value & (mask1 << bit_idx));
1430  const bvector_type* bv = sv.get_plain(bit_idx);
1431  if (bv)
1432  agg_.add(bv);
1433  else
1434  return false;
1435  }
1436 
1437  unsigned sv_plains = sv.effective_plains();
1438  for (unsigned i = 0; i < sv_plains; ++i)
1439  {
1440  bvector_type_const_ptr bv = sv.get_plain(i);
1441  value_type mask = mask1 << i;
1442  if (bv && !(value & mask))
1443  agg_.add(bv, 1); // agg to SUB group
1444  } // for i
1445  return true;
1446 }
1447 
1448 
1449 //----------------------------------------------------------------------------
1450 
1451 template<typename SV>
1453  typename SV::value_type value,
1454  typename SV::bvector_type& bv_out)
1455 {
1456  if (sv.empty())
1457  return; // nothing to do
1458 
1459  if (!value)
1460  {
1461  find_zero(sv, bv_out);
1462  return;
1463  }
1464 
1465  unsigned char bits[sizeof(value) * 8];
1466  unsigned short bit_count_v = bm::bitscan(value, bits);
1467  BM_ASSERT(bit_count_v);
1468 
1469  // aggregate AND all matching vectors
1470  {
1471  const bvector_type* bv_plain = sv.get_plain(bits[--bit_count_v]);
1472  if (bv_plain)
1473  bv_out = *bv_plain;
1474  else // plain not found
1475  {
1476  bv_out.clear();
1477  return;
1478  }
1479  }
1480  for (unsigned i = 0; i < bit_count_v; ++i)
1481  {
1482  const bvector_type* bv_plain = sv.get_plain(bits[i]);
1483  if (bv_plain)
1484  bv_out &= *bv_plain;
1485  else // mandatory plain not found - empty result!
1486  {
1487  bv_out.clear();
1488  return;
1489  }
1490  } // for i
1491 
1492  // SUB all other plains
1493  unsigned sv_plains = sv.effective_plains();
1494  for (unsigned i = 0; (i < sv_plains) && value; ++i)
1495  {
1496  const bvector_type* bv_plain = sv.get_plain(i);
1497  if (bv_plain && !(value & (value_type(1) << i)))
1498  bv_out -= *bv_plain;
1499  }
1500 }
1501 
1502 //----------------------------------------------------------------------------
1503 
1504 template<typename SV>
1505 bool sparse_vector_scanner<SV>::find_eq_str(const typename SV::value_type* str,
1506  typename SV::size_type& pos)
1507 {
1508  BM_ASSERT(bound_sv_);
1509  return this->find_eq_str(*bound_sv_, str, pos);
1510 }
1511 
1512 //----------------------------------------------------------------------------
1513 
1514 template<typename SV>
1516  const typename SV::value_type* str,
1517  typename SV::size_type& pos)
1518 {
1519  bool found = false;
1520  if (sv.empty())
1521  return found;
1522  if (*str)
1523  {
1524  bool remaped = false;
1525  if (bm::conditional<SV::is_remap_support::value>::test()) // test remapping trait
1526  {
1527  if (sv.is_remap() && str != remap_value_vect_)
1528  {
1529  remaped = sv.remap_tosv(remap_value_vect_, SV::max_vector_size, str);
1530  if (!remaped)
1531  return remaped;
1532  str = remap_value_vect_;
1533  }
1534  }
1535 
1536  size_type found_pos;
1537  found = find_first_eq(sv, str, found_pos, remaped);
1538  if (found)
1539  {
1540  pos = found_pos;
1541  if (bm::conditional<SV::is_rsc_support::value>::test()) // test rank/select trait
1542  {
1543  if (sv.is_compressed()) // if compressed vector - need rank translation
1544  found = sv.find_rank(found_pos + 1, pos);
1545  }
1546  }
1547  }
1548  else // search for zero value
1549  {
1550  // TODO: implement optimized version which would work witout temp vector
1551  typename SV::bvector_type bv_zero;
1552  find_zero(sv, bv_zero);
1553  found = bv_zero.find(pos);
1554  }
1555  return found;
1556 }
1557 
1558 //----------------------------------------------------------------------------
1559 
1560 template<typename SV>
1562  const typename SV::value_type* str,
1563  typename SV::size_type& pos)
1564 {
1565  bool found = false;
1566  if (sv.empty())
1567  return found;
1568 
1569  if (*str)
1570  {
1571  bool remaped = false;
1572  // test search pre-condition based on remap tables
1574  {
1575  if (sv.is_remap() && str != remap_value_vect_)
1576  {
1577  remaped = sv.remap_tosv(
1578  remap_value_vect_, SV::max_vector_size, str);
1579  if (!remaped)
1580  return remaped;
1581  }
1582  }
1583 
1584  reset_search_range();
1585 
1586  // narrow down the search
1587  const unsigned min_distance_cutoff = bm::gap_max_bits + bm::gap_max_bits / 2;
1588  size_type l, r, dist;
1589  l = 0; r = sv.size()-1;
1590  size_type found_pos;
1591 
1592  // binary search to narrow down the search window
1593  while (l <= r)
1594  {
1595  dist = r - l;
1596  if (dist < min_distance_cutoff)
1597  {
1598  // we are in an narrow <2 blocks window, but still may be in two
1599  // different neighboring blocks, lets try to narrow
1600  // to exactly one block
1601 
1602  size_type nb_l = (l >> bm::set_block_shift);
1603  size_type nb_r = (r >> bm::set_block_shift);
1604  if (nb_l != nb_r)
1605  {
1606  size_type mid = nb_r * bm::gap_max_bits;
1607  if (mid < r)
1608  {
1609  int cmp = this->compare_str(sv, mid, str);
1610  if (cmp < 0) // mid < str
1611  l = mid;
1612  else
1613  r = mid-(cmp!=0); // branchless if (cmp==0) r=mid;
1614  BM_ASSERT(l < r);
1615  }
1616  nb_l = unsigned(l >> bm::set_block_shift);
1617  nb_r = unsigned(r >> bm::set_block_shift);
1618  }
1619 
1620  if (nb_l == nb_r)
1621  {
1622  size_type max_nb = sv.size() >> bm::set_block_shift;
1623  if (nb_l != max_nb)
1624  {
1625  // linear in-place fixed depth scan to identify the sub-range
1627  int cmp = this->compare_str(sv, mid, str);
1628  if (cmp < 0)
1629  {
1630  l = mid;
1631  mid = nb_r * bm::gap_max_bits + bm::sub_block3_size * 2;
1632  cmp = this->compare_str(sv, mid, str);
1633  if (cmp < 0)
1634  {
1635  l = mid;
1636  mid = nb_r * bm::gap_max_bits + bm::sub_block3_size * 3;
1637  cmp = this->compare_str(sv, mid, str);
1638  if (cmp < 0)
1639  l = mid;
1640  else
1641  r = mid;
1642  }
1643  else
1644  {
1645  r = mid;
1646  }
1647  }
1648  else
1649  {
1650  r = mid;
1651  }
1652  }
1653  }
1654 
1655  set_search_range(l, r);
1656  break;
1657  }
1658 
1659  typename SV::size_type mid = dist/2+l;
1660  size_type nb = (mid >> bm::set_block_shift);
1661  mid = nb * bm::gap_max_bits;
1662  if (mid <= l)
1663  {
1664  if (nb == 0 && r > bm::gap_max_bits)
1665  mid = bm::gap_max_bits;
1666  else
1667  mid = dist / 2 + l;
1668  }
1669  BM_ASSERT(mid > l);
1670 
1671  int cmp = this->compare_str(sv, mid, str);
1672  if (cmp == 0)
1673  {
1674  found_pos = mid;
1675  found = true;
1676  set_search_range(l, mid);
1677  break;
1678  }
1679  if (cmp < 0)
1680  l = mid+1;
1681  else
1682  r = mid-1;
1683  } // while
1684 
1685  // use linear search (range is set)
1686  found = find_first_eq(sv, str, found_pos, remaped);
1687  if (found)
1688  {
1689  pos = found_pos;
1690  if (bm::conditional<SV::is_rsc_support::value>::test()) // test rank/select trait
1691  {
1692  if (sv.is_compressed()) // if compressed vector - need rank translation
1693  found = sv.find_rank(found_pos + 1, pos);
1694  }
1695  }
1696  reset_search_range();
1697  }
1698  else // search for zero value
1699  {
1700  // TODO: implement optimized version which would work without temp vector
1701  typename SV::bvector_type bv_zero;
1702  find_zero(sv, bv_zero);
1703  found = bv_zero.find(pos);
1704  }
1705  return found;
1706 }
1707 
1708 //----------------------------------------------------------------------------
1709 
1710 template<typename SV>
1711 bool sparse_vector_scanner<SV>::bfind_eq_str(const typename SV::value_type* str,
1712  typename SV::size_type& pos)
1713 {
1714  BM_ASSERT(bound_sv_);
1715  return bfind_eq_str(*bound_sv_, str, pos);
1716 }
1717 
1718 //----------------------------------------------------------------------------
1719 
1720 template<typename SV>
1722  const typename SV::value_type val,
1723  typename SV::size_type& pos)
1724 {
1725  int cmp;
1726  size_type l = 0, r = sv.size();
1727  if (l == r) // empty vector
1728  {
1729  pos = 0;
1730  return false;
1731  }
1732  --r;
1733 
1734  // check initial boundary conditions if insert point is at tail/head
1735  cmp = this->compare(sv, l, val); // left (0) boundary check
1736  if (cmp > 0) // vect[x] > str
1737  {
1738  pos = 0;
1739  return false;
1740  }
1741  if (cmp == 0)
1742  {
1743  pos = 0;
1744  return true;
1745  }
1746  cmp = this->compare(sv, r, val); // right(size-1) boundary check
1747  if (cmp == 0)
1748  {
1749  pos = r;
1750  // back-scan to rewind all duplicates
1751  // TODO: adapt one-sided binary search to traverse large platos
1752  for (; r >= 0; --r)
1753  {
1754  cmp = this->compare(sv, r, val);
1755  if (cmp != 0)
1756  return true;
1757  pos = r;
1758  } // for i
1759  return true;
1760  }
1761  if (cmp < 0) // vect[x] < str
1762  {
1763  pos = r+1;
1764  return false;
1765  }
1766 
1767  size_type dist = r - l;
1768  if (dist < linear_cutoff1)
1769  {
1770  for (; l <= r; ++l)
1771  {
1772  cmp = this->compare(sv, l, val);
1773  if (cmp == 0)
1774  {
1775  pos = l;
1776  return true;
1777  }
1778  if (cmp > 0)
1779  {
1780  pos = l;
1781  return false;
1782  }
1783  } // for
1784  }
1785 
1786  while (l <= r)
1787  {
1788  size_type mid = (r-l)/2+l;
1789  cmp = this->compare(sv, mid, val);
1790  if (cmp == 0)
1791  {
1792  pos = mid;
1793  // back-scan to rewind all duplicates
1794  for (size_type i = mid-1; i >= 0; --i)
1795  {
1796  cmp = this->compare(sv, i, val);
1797  if (cmp != 0)
1798  return true;
1799  pos = i;
1800  } // for i
1801  pos = 0;
1802  return true;
1803  }
1804  if (cmp < 0)
1805  l = mid+1;
1806  else
1807  r = mid-1;
1808 
1809  dist = r - l;
1810  if (dist < linear_cutoff2) // do linear scan here
1811  {
1812  typename SV::const_iterator it(&sv, l);
1813  for (; it.valid(); ++it, ++l)
1814  {
1815  typename SV::value_type sv_value = it.value();
1816  if (sv_value == val)
1817  {
1818  pos = l;
1819  return true;
1820  }
1821  if (sv_value > val) // vect[x] > val
1822  {
1823  pos = l;
1824  return false;
1825  }
1826  } // for it
1827  BM_ASSERT(0);
1828  pos = l;
1829  return false;
1830  }
1831  } // while
1832 
1833  BM_ASSERT(0);
1834  return false;
1835 }
1836 
1837 
1838 //----------------------------------------------------------------------------
1839 
1840 template<typename SV>
1842  const SV& sv,
1843  const typename SV::value_type* str,
1844  typename SV::size_type& pos)
1845 {
1846  int cmp;
1847  size_type l = 0, r = sv.size();
1848 
1849  if (l == r) // empty vector
1850  {
1851  pos = 0;
1852  return false;
1853  }
1854  --r;
1855 
1856  // check initial boundary conditions if insert point is at tail/head
1857  cmp = this->compare_str(sv, l, str); // left (0) boundary check
1858  if (cmp > 0) // vect[x] > str
1859  {
1860  pos = 0;
1861  return false;
1862  }
1863  if (cmp == 0)
1864  {
1865  pos = 0;
1866  return true;
1867  }
1868  cmp = this->compare_str(sv, r, str); // right(size-1) boundary check
1869  if (cmp == 0)
1870  {
1871  pos = r;
1872  // back-scan to rewind all duplicates
1873  // TODO: adapt one-sided binary search to traverse large platos
1874  for (; r >= 0; --r)
1875  {
1876  cmp = this->compare_str(sv, r, str);
1877  if (cmp != 0)
1878  return true;
1879  pos = r;
1880  } // for i
1881  return true;
1882  }
1883  if (cmp < 0) // vect[x] < str
1884  {
1885  pos = r+1;
1886  return false;
1887  }
1888 
1889  size_type dist = r - l;
1890  if (dist < linear_cutoff1)
1891  {
1892  for (; l <= r; ++l)
1893  {
1894  cmp = this->compare_str(sv, l, str);
1895  if (cmp == 0)
1896  {
1897  pos = l;
1898  return true;
1899  }
1900  if (cmp > 0)
1901  {
1902  pos = l;
1903  return false;
1904  }
1905  } // for
1906  }
1907  while (l <= r)
1908  {
1909  size_type mid = (r-l)/2+l;
1910  cmp = this->compare_str(sv, mid, str);
1911  if (cmp == 0)
1912  {
1913  pos = mid;
1914  // back-scan to rewind all duplicates
1915  for (size_type i = mid-1; i >= 0; --i)
1916  {
1917  cmp = this->compare_str(sv, i, str);
1918  if (cmp != 0)
1919  return true;
1920  pos = i;
1921  } // for i
1922  pos = 0;
1923  return true;
1924  }
1925  if (cmp < 0)
1926  l = mid+1;
1927  else
1928  r = mid-1;
1929 
1930  dist = r - l;
1931  if (dist < linear_cutoff2) // do linear scan here
1932  {
1933  hmatr_.init();
1934 
1935  dist = sv.decode(hmatr_, l, dist+1);
1936  for (unsigned i = 0; i < dist; ++i, ++l)
1937  {
1938  const typename SV::value_type* hm_str = hmatr_.row(i);
1939  cmp = ::strcmp(hm_str, str);
1940  if (cmp == 0)
1941  {
1942  pos = l;
1943  return true;
1944  }
1945  if (cmp > 0) // vect[x] > str
1946  {
1947  pos = l;
1948  return false;
1949  }
1950  }
1951  cmp = this->compare_str(sv, l, str);
1952  if (cmp > 0) // vect[x] > str
1953  {
1954  pos = l;
1955  return false;
1956  }
1957  BM_ASSERT(0);
1958  pos = l;
1959  return false;
1960  }
1961  } // while
1962 
1963  BM_ASSERT(0);
1964  return false;
1965 }
1966 
1967 
1968 //----------------------------------------------------------------------------
1969 
1970 template<typename SV>
1972  size_type idx,
1973  const value_type* str)
1974 {
1975  if (bound_sv_ == &sv)
1976  {
1977  size_type nb = (idx >> bm::set_block_shift);
1978  size_type nbit = (idx & bm::set_block_mask);
1979  if (nbit == 0) // access to sentinel, first block element
1980  {
1981  value_type* s0 = block0_elements_cache_.row(nb);
1982  if (*s0 == 0) // uninitialized element
1983  {
1984  sv.get(idx, s0, size_type(block0_elements_cache_.cols()));
1985  }
1986  int res = 0;
1987  for (unsigned i = 0; i < block0_elements_cache_.cols(); ++i)
1988  {
1989  char octet = str[i]; char value = s0[i];
1990  res = (value > octet) - (value < octet);
1991  if (res || !octet)
1992  break;
1993  } // for i
1994  BM_ASSERT(res == sv.compare(idx, str));
1995  return res;
1996  }
1997  else
1998  {
1999  if (nbit % bm::sub_block3_size == 0) // TODO: use AND mask here
2000  {
2001  size_type k = nbit / bm::sub_block3_size - 1;
2002  value_type* s1 = block3_elements_cache_.row(nb * 3 + k);
2003  int res = 0;
2004  for (unsigned i = 0; i < block3_elements_cache_.cols(); ++i)
2005  {
2006  char octet = str[i]; char value = s1[i];
2007  res = (value > octet) - (value < octet);
2008  if (res || !octet)
2009  break;
2010  } // for i
2011  BM_ASSERT(res == sv.compare(idx, str));
2012  return res;
2013  }
2014  }
2015  }
2016  return sv.compare(idx, str);
2017 }
2018 
2019 //----------------------------------------------------------------------------
2020 
2021 template<typename SV>
2023  size_type idx,
2024  const value_type val) BMNOEXCEPT
2025 {
2026  // TODO: implement sentinel elements cache (similar to compare_str())
2027  return sv.compare(idx, val);
2028 }
2029 
2030 //----------------------------------------------------------------------------
2031 
2032 template<typename SV>
2034  typename SV::value_type value,
2035  typename SV::bvector_type& bv_out)
2036 {
2037  if (sv.empty())
2038  {
2039  bv_out.clear();
2040  return; // nothing to do
2041  }
2042  if (!value)
2043  {
2044  find_zero(sv, bv_out);
2045  return;
2046  }
2047 
2048  find_eq_with_nulls(sv, value, bv_out, 0);
2049 
2050  decompress(sv, bv_out);
2051  correct_nulls(sv, bv_out);
2052 }
2053 
2054 //----------------------------------------------------------------------------
2055 
2056 template<typename SV>
2058  typename SV::value_type value,
2059  typename SV::size_type& pos)
2060 {
2061  if (!value) // zero value - special case
2062  {
2063  bvector_type bv_zero;
2064  find_eq(sv, value, bv_zero);
2065  bool found = bv_zero.find(pos);
2066  return found;
2067  }
2068 
2069  size_type found_pos;
2070  bool found = find_first_eq(sv, value, found_pos);
2071  if (found)
2072  {
2073  pos = found_pos;
2074  if (bm::conditional<SV::is_rsc_support::value>::test()) // test rank/select trait
2075  {
2076  if (sv.is_compressed()) // if compressed vector - need rank translation
2077  found = sv.find_rank(found_pos + 1, pos);
2078  }
2079  }
2080  return found;
2081 }
2082 
2083 //----------------------------------------------------------------------------
2084 
2085 template<typename SV>
2087  typename SV::bvector_type& bv_out)
2088 {
2089  agg_.reset(); // in case if previous scan was interrupted
2090  for (unsigned i = 0; i < sv.plains(); ++i)
2091  agg_.add(sv.get_plain(i));
2092  agg_.combine_or(bv_out);
2093  agg_.reset();
2094 }
2095 
2096 //----------------------------------------------------------------------------
2097 
2098 template<typename SV>
2100  typename SV::bvector_type& bv_out)
2101 {
2102  if (!sv.is_compressed())
2103  return; // nothing to do
2104  const bvector_type* bv_non_null = sv.get_null_bvector();
2105  BM_ASSERT(bv_non_null);
2106 
2107  // TODO: implement faster decompressor for small result-sets
2108  rank_compr_.decompress(bv_tmp_, *bv_non_null, bv_out);
2109  bv_out.swap(bv_tmp_);
2110 }
2111 
2112 //----------------------------------------------------------------------------
2113 
2114 template<typename SV>
2116 {
2117  BM_ASSERT(from < to);
2118  mask_from_ = from;
2119  mask_to_ = to;
2120  mask_set_ = true;
2121 }
2122 
2123 //----------------------------------------------------------------------------
2124 
2125 template<typename SV>
2127 {
2128  mask_set_ = false;
2129  mask_from_ = mask_to_ = bm::id_max;
2130 }
2131 
2132 
2133 //----------------------------------------------------------------------------
2134 //
2135 //----------------------------------------------------------------------------
2136 
2137 
2138 } // namespace bm
2139 
2140 #include "bmundef.h"
2141 
2142 #endif
bm::set2set_11_transform::gather_buffer::BM_VECT_ALIGN_ATTR
value_type BM_VECT_ALIGN buffer_[BSIZE] BM_VECT_ALIGN_ATTR
Definition: bmsparsevec_algo.h:861
bmaggregator.h
Algorithms for fast aggregation of N bvectors.
BM_VECT_ALIGN
#define BM_VECT_ALIGN
Definition: bmdef.h:359
bm::set2set_11_transform::remap
void remap(const bvector_type &bv_in, bvector_type &bv_out)
Perform remapping (Image function) (based on attached translation table)
Definition: bmsparsevec_algo.h:982
bm::bvector::mem_pool_guard
Definition: bm.h:785
bm::sparse_vector_scanner::invert
void invert(const SV &sv, typename SV::bvector_type &bv_out)
invert search result ("EQ" to "not EQ")
Definition: bmsparsevec_algo.h:1195
bm::set2set_11_transform::gather_buffer::BM_VECT_ALIGN_ATTR
size_type BM_VECT_ALIGN gather_idx_[BSIZE] BM_VECT_ALIGN_ATTR
Definition: bmsparsevec_algo.h:860
bm::dynamic_range_clip_high
void dynamic_range_clip_high(SV &svect, unsigned high_bit)
Clip dynamic range for signal higher than specified.
Definition: bmsparsevec_algo.h:64
bm::sparse_vector_scanner::reset_search_range
void reset_search_range()
reset (disable) search range
Definition: bmsparsevec_algo.h:2126
bm::set2set_11_transform::sv_ptr_
const SV * sv_ptr_
current translation table vector
Definition: bmsparsevec_algo.h:876
bm::set2set_11_transform::bv_zero_
bvector_type bv_zero_
bit-vector for zero elements
Definition: bmsparsevec_algo.h:881
bm::set2set_11_transform::gather_buffer
Definition: bmsparsevec_algo.h:858
bm::sparse_vector_scanner::size_type
SV::size_type size_type
Definition: bmsparsevec_algo.h:488
bm::set2set_11_transform::gb_
gather_buffer_type * gb_
intermediate buffers
Definition: bmsparsevec_algo.h:877
bm::bvector::size
size_type size() const BMNOEXCEPT
Returns bvector's capacity (number of bits it can store)
Definition: bm.h:1272
bm::sparse_vector_scanner::operator=
void operator=(const sparse_vector_scanner &)=delete
bm::set2set_11_transform
Integer set to set transformation (functional image in groups theory) https://en.wikipedia....
Definition: bmsparsevec_algo.h:773
bm::bvector::bit_or
bm::bvector< Alloc > & bit_or(const bm::bvector< Alloc > &bv1, const bm::bvector< Alloc > &bv2, typename bm::bvector< Alloc >::optmode opt_mode)
3-operand OR : this := bv1 OR bv2
Definition: bm.h:4823
bm::set2set_11_transform::bvector_type
SV::bvector_type bvector_type
Definition: bmsparsevec_algo.h:776
bm::sparse_vector_scanner::find_zero
void find_zero(const SV &sv, typename SV::bvector_type &bv_out)
find all sparse vector elements EQ to 0
Definition: bmsparsevec_algo.h:1170
bm::sparse_vector_scanner::vector_capacity
vector_capacity
Definition: bmsparsevec_algo.h:721
bm::dynamic_range_clip_low
void dynamic_range_clip_low(SV &svect, unsigned low_bit)
Clip dynamic range for signal lower than specified (boost)
Definition: bmsparsevec_algo.h:105
bm::no_null
@ no_null
do not support NULL values
Definition: bmconst.h:216
bm::sparse_vector_scanner::allocator_type
bvector_type::allocator_type allocator_type
Definition: bmsparsevec_algo.h:490
bm::set2set_11_transform::allocator_pool_type
bvector_type::allocator_type::allocator_pool_type allocator_pool_type
Definition: bmsparsevec_algo.h:779
bm::BM_SORTED_UNIFORM
@ BM_SORTED_UNIFORM
sorted and in one block (internal!)
Definition: bmconst.h:192
bm::set2set_11_transform::operator=
void operator=(const set2set_11_transform &)=delete
bm::sparse_vector_scanner::compare_str
int compare_str(const SV &sv, size_type idx, const value_type *str)
compare sv[idx] with input str
Definition: bmsparsevec_algo.h:1971
bm::BM_SORTED
@ BM_SORTED
input set is sorted (ascending order)
Definition: bmconst.h:191
bm::sparse_vector_scanner::allocator_pool_type
allocator_type::allocator_pool_type allocator_pool_type
Definition: bmsparsevec_algo.h:491
bm::id64_t
unsigned long long int id64_t
Definition: bmconst.h:34
bmsparsevec.h
Sparse constainer sparse_vector<> for integer types using bit-transposition transform.
bm::aggregator< bvector_type >
bm::sparse_vector_scanner::compare
int compare(const SV &sv, size_type idx, const value_type val) BMNOEXCEPT
compare sv[idx] with input value
Definition: bmsparsevec_algo.h:2022
bm::bvector::enumerator
Constant iterator designed to enumerate "ON" bits.
Definition: bm.h:599
bm::sparse_vector_scanner
algorithms for sparse_vector scan/search
Definition: bmsparsevec_algo.h:481
bm::sparse_vector_scanner::bfind_eq_str
bool bfind_eq_str(const SV &sv, const typename SV::value_type *str, typename SV::size_type &pos)
binary find first sparse vector element (string) Sparse vector must be sorted.
Definition: bmsparsevec_algo.h:1561
bm::sparse_vector_scanner::linear_cutoff1
@ linear_cutoff1
Definition: bmsparsevec_algo.h:728
bm::bvector::allocator_type
Alloc allocator_type
Definition: bm.h:110
bm::set2set_11_transform::gather_buffer_type
gather_buffer< sv_g_size > gather_buffer_type
Definition: bmsparsevec_algo.h:868
bmalgo.h
Algorithms for bvector<> (main include)
bm::bitscan
unsigned short bitscan(V w, B *bits) BMNOEXCEPT
Definition: bmfunc.h:556
bm::sparse_vector_scanner::decompress
void decompress(const SV &sv, typename SV::bvector_type &bv_out)
Rank-Select decompression for RSC vectors.
Definition: bmsparsevec_algo.h:2099
bm::sparse_vector_scanner::sparse_vector_scanner
sparse_vector_scanner()
Definition: bmsparsevec_algo.h:1106
bm::sparse_vector_scanner::bvector_type_const_ptr
const typedef bvector_type * bvector_type_const_ptr
Definition: bmsparsevec_algo.h:485
bm::sparse_vector_find_mismatch
void sparse_vector_find_mismatch(typename SV1::bvector_type &bv, const SV1 &sv1, const SV2 &sv2, bm::null_support null_proc)
Find mismatch vector, indicating positions of mismatch between two sparse vectors (uses linear scan i...
Definition: bmsparsevec_algo.h:330
bm::set2set_11_transform::size_type
SV::size_type size_type
Definition: bmsparsevec_algo.h:778
bm::bvector
Bitvector Bit-vector container with runtime compression of bits.
Definition: bm.h:107
bmundef.h
pre-processor un-defines to avoid global space pollution (internal)
bm::sub_block3_size
const unsigned sub_block3_size
Definition: bmconst.h:123
bm::use_null
@ use_null
support "non-assigned" or "NULL" logic
Definition: bmconst.h:215
bm::id_max
const unsigned id_max
Definition: bmconst.h:108
bm::sparse_vector_scanner::lower_bound_str
bool lower_bound_str(const SV &sv, const typename SV::value_type *str, typename SV::size_type &pos)
lower bound search for an array position
Definition: bmsparsevec_algo.h:1841
bm::set_block_mask
const unsigned set_block_mask
Definition: bmconst.h:56
bm::sparse_vector_find_first_mismatch
bool sparse_vector_find_first_mismatch(const SV &sv1, const SV &sv2, typename SV::size_type &midx, bm::null_support null_proc=bm::use_null)
Find first mismatch (element which is different) between two sparse vectors (uses linear scan in bit-...
Definition: bmsparsevec_algo.h:170
bm::xor_swap
void xor_swap(W &x, W &y) BMNOEXCEPT
XOR swap two scalar variables.
Definition: bmfunc.h:911
bm::bvector::allocator_pool_type
allocator_type::allocator_pool_type allocator_pool_type
Definition: bm.h:111
bm::bvector::iterator_base::valid
bool valid() const BMNOEXCEPT
Checks if iterator is still valid. Analog of != 0 comparison for pointers.
Definition: bm.h:280
bm::rank_compressor< bvector_type >
bm::sparse_vector_scanner::find_nonzero
void find_nonzero(const SV &sv, typename SV::bvector_type &bv_out)
Find non-zero elements Output vector is computed as a logical OR (join) of all plains.
Definition: bmsparsevec_algo.h:2086
bm::set2set_11_transform::~set2set_11_transform
~set2set_11_transform()
Definition: bmsparsevec_algo.h:906
bm::set2set_11_transform::value_type
SV::value_type value_type
Definition: bmsparsevec_algo.h:777
BMNOEXCEPT
#define BMNOEXCEPT
Definition: bmdef.h:79
bm::set2set_11_transform::run
void run(const bvector_type &bv_in, const SV &sv_brel, bvector_type &bv_out)
Run remap transformation.
Definition: bmsparsevec_algo.h:831
bm::bvector::opt_none
@ opt_none
no optimization
Definition: bm.h:131
bm::set2set_11_transform::sv_g_size
@ sv_g_size
Definition: bmsparsevec_algo.h:866
bm::sparse_vector_scanner::set_search_range
void set_search_range(size_type from, size_type to)
set search boundaries (hint for the aggregator)
Definition: bmsparsevec_algo.h:2115
bm::sparse_vector_scanner::reset_binding
void reset_binding() BMNOEXCEPT
reset sparse vector binding
Definition: bmsparsevec_algo.h:1161
bm::sparse_vector_scanner::linear_cutoff2
@ linear_cutoff2
Definition: bmsparsevec_algo.h:729
bm::conditional
ad-hoc conditional expressions
Definition: bmutil.h:110
bvector_type
bm::bvector bvector_type
Definition: xsample07a.cpp:93
bm::set2set_11_transform::set2set_11_transform
set2set_11_transform()
Definition: bmsparsevec_algo.h:893
BM_ASSERT
#define BM_ASSERT
Definition: bmdef.h:130
bm::id_t
unsigned int id_t
Definition: bmconst.h:37
bm::sparse_vector_scanner::find_eq_with_nulls_horizontal
void find_eq_with_nulls_horizontal(const SV &sv, typename SV::value_type value, typename SV::bvector_type &bv_out)
For testing purposes only.
Definition: bmsparsevec_algo.h:1452
bmdef.h
Definitions(internal)
bm::sparse_vector_scanner::find_eq_str
bool find_eq_str(const SV &sv, const typename SV::value_type *str, typename SV::size_type &pos)
find first sparse vector element (string)
Definition: bmsparsevec_algo.h:1515
bm::bvector::resize
void resize(size_type new_size)
Change size of the bvector.
Definition: bm.h:2270
bm::sparse_vector_scanner::matrix_search_buf_type
bm::heap_matrix< typename SV::value_type, linear_cutoff2, SV::sv_octet_plains, allocator_type > matrix_search_buf_type
Definition: bmsparsevec_algo.h:736
bm::gap_max_bits
const unsigned gap_max_bits
Definition: bmconst.h:80
bm::bvector::invert
bvector< Alloc > & invert()
Invert/NEG all bits It should be noted, invert is affected by size() if size is set - it only inverts...
Definition: bm.h:3001
bm::word_bitcount
BMFORCEINLINE bm::id_t word_bitcount(bm::id_t w) BMNOEXCEPT
Definition: bmfunc.h:199
bm::sparse_vector_scanner::correct_nulls
void correct_nulls(const SV &sv, typename SV::bvector_type &bv_out)
Exclude possible NULL values from the result vector.
Definition: bmsparsevec_algo.h:1217
bm::set2set_11_transform::get_bv_zero
const bvector_type & get_bv_zero() const
Get read access to zero-elements vector Zero vector gets populated after attach_sv() is called or as ...
Definition: bmsparsevec_algo.h:790
bm::set2set_11_transform::gather_window_size
gather_window_size
Definition: bmsparsevec_algo.h:864
bm::set2set_11_transform::attach_sv
void attach_sv(const SV *sv_brel, bool compute_stats=false)
Attach a translation table vector for remapping (Image function)
Definition: bmsparsevec_algo.h:916
bm::set2set_11_transform::bv_product_
bvector_type bv_product_
temp vector
Definition: bmsparsevec_algo.h:878
bm::sparse_vector_scanner::bvector_type
SV::bvector_type bvector_type
Definition: bmsparsevec_algo.h:484
bm::sparse_vector_scanner::bvector_type_ptr
bvector_type * bvector_type_ptr
Definition: bmsparsevec_algo.h:486
bm::bvector::bit_xor
bm::bvector< Alloc > & bit_xor(const bm::bvector< Alloc > &bv1, const bm::bvector< Alloc > &bv2, typename bm::bvector< Alloc >::optmode opt_mode)
3-operand XOR : this := bv1 XOR bv2
Definition: bm.h:4920
bm::set_block_shift
const unsigned set_block_shift
Definition: bmconst.h:55
bm::sparse_vector_scanner::prepare_and_sub_aggregator
bool prepare_and_sub_aggregator(const SV &sv, typename SV::value_type value)
Prepare aggregator for AND-SUB (EQ) search.
Definition: bmsparsevec_algo.h:1415
bm
Definition: bm.h:76
bm::sparse_vector_scanner::max_columns
@ max_columns
Definition: bmsparsevec_algo.h:723
bm::sparse_vector_scanner::bind
void bind(const SV &sv, bool sorted)
bind sparse vector for all searches
Definition: bmsparsevec_algo.h:1118
bm::null_support
null_support
NULL-able value support.
Definition: bmconst.h:213
bm::set2set_11_transform::one_pass_run
void one_pass_run(const bvector_type &bv_in, const SV &sv_brel, bvector_type &bv_out)
Definition: bmsparsevec_algo.h:1079
bm::sparse_vector_scanner::lower_bound
bool lower_bound(const SV &sv, const typename SV::value_type val, typename SV::size_type &pos)
lower bound search for an array position
Definition: bmsparsevec_algo.h:1721
bm::sparse_vector_scanner::value_type
SV::value_type value_type
Definition: bmsparsevec_algo.h:487
bm::sparse_vector_scanner::search_algo_params
search_algo_params
Definition: bmsparsevec_algo.h:726
bm::sparse_vector_scanner::heap_matrix_type
bm::dynamic_heap_matrix< value_type, allocator_type > heap_matrix_type
Definition: bmsparsevec_algo.h:732
bm::set2set_11_transform::pool_
allocator_pool_type pool_
Definition: bmsparsevec_algo.h:883
bm::sparse_vector_scanner::find_first_eq
bool find_first_eq(const SV &sv, typename SV::value_type value, size_type &idx)
find first value (may include NULL indexes)
Definition: bmsparsevec_algo.h:1259
bm::sparse_vector_scanner::find_eq_with_nulls
bool find_eq_with_nulls(const SV &sv, typename SV::value_type value, typename SV::bvector_type &bv_out, typename SV::size_type search_limit=0)
find value (may include NULL indexes)
Definition: bmsparsevec_algo.h:1228
bm::sparse_vector_scanner::find_eq
void find_eq(const SV &sv, typename SV::value_type value, typename SV::bvector_type &bv_out)
find all sparse vector elements EQ to search value
Definition: bmsparsevec_algo.h:2033
bm::set2set_11_transform::have_stats_
bool have_stats_
flag of statistics presense
Definition: bmsparsevec_algo.h:880
bm::bvector::mem_pool_guard::assign_if_not_set
void assign_if_not_set(allocator_pool_type &pool, bvector< Alloc > &bv) BMNOEXCEPT
check if vector has no assigned allocator and set one
Definition: bm.h:803