BitMagic-C++
bmsparsevec_algo.h
Go to the documentation of this file.
1#ifndef BMSPARSEVEC_ALGO__H__INCLUDED__
2#define BMSPARSEVEC_ALGO__H__INCLUDED__
3/*
4Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5
6Licensed under the Apache License, Version 2.0 (the "License");
7you may not use this file except in compliance with the License.
8You may obtain a copy of the License at
9
10 http://www.apache.org/licenses/LICENSE-2.0
11
12Unless required by applicable law or agreed to in writing, software
13distributed under the License is distributed on an "AS IS" BASIS,
14WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15See the License for the specific language governing permissions and
16limitations under the License.
17
18For more information please visit: http://bitmagic.io
19*/
20/*! \file bmsparsevec_algo.h
21 \brief Algorithms for bm::sparse_vector
22*/
23
24#ifndef BM__H__INCLUDED__
25// BitMagic utility headers do not include main "bm.h" declaration
26// #include "bm.h" or "bm64.h" explicitly
27# error missing include (bm.h or bm64.h)
28#endif
29
30#include <limits>
31
32#include "bmdef.h"
33#include "bmsparsevec.h"
34#include "bmaggregator.h"
35#include "bmbuffer.h"
36#include "bmalgo.h"
37#include "bmdef.h"
38
39#ifdef _MSC_VER
40# pragma warning( disable: 4146 )
41#endif
42
43
44
45/** \defgroup svalgo Sparse vector algorithms
46 Sparse vector algorithms
47 \ingroup svector
48 */
49
50
51namespace bm
52{
53
54
55/*!
56 \brief Clip dynamic range for signal higher than specified
57
58 \param svect - sparse vector to do clipping
59 \param high_bit - max bit (inclusive) allowed for this signal vector
60
61
62 \ingroup svalgo
63 \sa dynamic_range_clip_low
64*/
65template<typename SV>
66void dynamic_range_clip_high(SV& svect, unsigned high_bit)
67{
68 unsigned sv_planes = svect.slices();
69
70 BM_ASSERT(sv_planes > high_bit && high_bit > 0);
71
72 typename SV::bvector_type bv_acc;
73 unsigned i;
74
75 // combine all the high bits into accumulator vector
76 for (i = high_bit+1; i < sv_planes; ++i)
77 {
78 if (typename SV::bvector_type* bv = svect.slice(i))
79 {
80 bv_acc.bit_or(*bv);
81 svect.free_slice(i);
82 }
83 } // for i
84
85 // set all bits ON for all low vectors, which happen to be clipped
86 for (i = high_bit; true; --i)
87 {
88 typename SV::bvector_type* bv = svect.get_create_slice(i);
89 bv->bit_or(bv_acc);
90 if (!i)
91 break;
92 } // for i
93}
94
95
96/*!
97 \brief Clip dynamic range for signal lower than specified (boost)
98
99 \param svect - sparse vector to do clipping
100 \param low_bit - low bit (inclusive) allowed for this signal vector
101
102 \ingroup svalgo
103 \sa dynamic_range_clip_high
104*/
105template<typename SV>
106void dynamic_range_clip_low(SV& svect, unsigned low_bit)
107{
108 if (low_bit == 0) return; // nothing to do
109 BM_ASSERT(svect.slices() > low_bit);
110
111 unsigned sv_planes = svect.slices();
112 typename SV::bvector_type bv_acc1;
113 unsigned i;
114
115 // combine all the high bits into accumulator vector
116 for (i = low_bit+1; i < sv_planes; ++i)
117 {
118 if (typename SV::bvector_type* bv = svect.slice(i))
119 bv_acc1.bit_or(*bv);
120 } // for i
121
122 // accumulate all vectors below the clipping point
123 typename SV::bvector_type bv_acc2;
124 typename SV::bvector_type* bv_low_plane = svect.get_create_slice(low_bit);
125
126 for (i = low_bit-1; true; --i)
127 {
128 if (typename SV::bvector_type* bv = svect.slice(i))
129 {
130 bv_acc2.bit_or(*bv);
131 svect.free_slice(i);
132 if (i == 0)
133 break;
134 }
135 } // for i
136
137 // now we want to set 1 in the clipping low plane based on
138 // exclusive or (XOR) between upper and lower parts)
139 // as a result high signal (any bits in the upper planes) gets
140 // slightly lower value (due to clipping) low signal gets amplified
141 // (lower contrast algorithm)
142
143 bv_acc1.bit_xor(bv_acc2);
144 bv_low_plane->bit_or(bv_acc1);
145}
146
147/**
148 Find first mismatch (element which is different) between two sparse vectors
149 (uses linear scan in bit-vector planes)
150
151 Function works with both NULL and NOT NULL vectors
152 NULL means unassigned (uncertainty), so first mismatch NULL is a mismatch
153 even if values in vectors can be formally the same (i.e. 0)
154
155 @param sv1 - vector 1
156 @param sv2 - vector 2
157 @param midx - mismatch index
158 @param null_proc - defines if we want to include (not) NULL
159 vector into comparison (bm::use_null) or not.
160 By default search takes NULL vector into account
161
162 @return true if mismatch found
163
164 @sa sparse_vector_find_mismatch
165
166 \ingroup svalgo
167*/
168template<typename SV>
170 const SV& sv2,
171 typename SV::size_type& midx,
172 bm::null_support null_proc = bm::use_null)
173{
174 typename SV::size_type mismatch = bm::id_max;
175 bool found = false;
176
177 unsigned sv_idx = 0;
178
179 unsigned planes1 = (unsigned)sv1.get_bmatrix().rows();
180 BM_ASSERT(planes1);
181
182 typename SV::bvector_type_const_ptr bv_null1 = sv1.get_null_bvector();
183 typename SV::bvector_type_const_ptr bv_null2 = sv2.get_null_bvector();
184
185 // for RSC vector do NOT compare NULL planes
186
188 {
189 //--planes1;
190 }
191 else // regular sparse vector - may have NULL planes
192 {
193 if (null_proc == bm::use_null)
194 {
195 if (bv_null1 && bv_null2) // both (not) NULL vectors present
196 {
197 bool f = bv_null1->find_first_mismatch(*bv_null2, midx, mismatch);
198 if (f && (midx < mismatch)) // better mismatch found
199 {
200 found = f; mismatch = midx;
201 }
202 }
203 else // one or both NULL vectors are not present
204 {
205 if (bv_null1)
206 {
207 typename SV::bvector_type bv_tmp; // TODO: get rid of temp bv
208 bv_tmp.resize(sv2.size());
209 bv_tmp.invert(); // turn into true NULL vector
210
211 // find first NULL value (mismatch)
212 bool f = bv_null1->find_first_mismatch(bv_tmp, midx, mismatch);
213 if (f && (midx < mismatch)) // better mismatch found
214 {
215 found = f; mismatch = midx;
216 }
217 }
218 if (bv_null2)
219 {
220 typename SV::bvector_type bv_tmp; // TODO: get rid of temp bv
221 bv_tmp.resize(sv1.size());
222 bv_tmp.invert();
223
224 bool f = bv_null2->find_first_mismatch(bv_tmp, midx, mismatch);
225 if (f && (midx < mismatch)) // better mismatch found
226 {
227 found = f; mismatch = midx;
228 }
229 }
230 }
231 } // null_proc
232 }
233
234 for (unsigned i = 0; mismatch && (i < planes1); ++i)
235 {
236 typename SV::bvector_type_const_ptr bv1 = sv1.get_slice(i);
237 if (bv1 == bv_null1)
238 bv1 = 0;
239 typename SV::bvector_type_const_ptr bv2 = sv2.get_slice(i);
240 if (bv2 == bv_null2)
241 bv2 = 0;
242
243 if (!bv1)
244 {
245 if (!bv2)
246 continue;
247 bool f = bv2->find(midx);
248 if (f && (midx < mismatch))
249 {
250 found = f; sv_idx = 2; mismatch = midx;
251 }
252 continue;
253 }
254 if (!bv2)
255 {
256 BM_ASSERT(bv1);
257 bool f = bv1->find(midx);
258 if (f && (midx < mismatch))
259 {
260 found = f; sv_idx = 1; mismatch = midx;
261 }
262 continue;
263 }
264 // both planes are not NULL
265 //
266 bool f = bv1->find_first_mismatch(*bv2, midx, mismatch);
267 if (f && (midx < mismatch)) // better mismatch found
268 {
269 found = f; mismatch = midx;
270 // which vector has '1' at mismatch position?
272 sv_idx = (bv1->test(mismatch)) ? 1 : 2;
273 }
274
275 } // for i
276
277 // RSC address translation here
278 //
280 {
281 if (found) // RSC address translation
282 {
283 BM_ASSERT(sv1.is_compressed());
284 BM_ASSERT(sv2.is_compressed());
285
286 switch (sv_idx)
287 {
288 case 1:
289 found = sv1.find_rank(midx + 1, mismatch);
290 break;
291 case 2:
292 found = sv2.find_rank(midx + 1, mismatch);
293 break;
294 default: // unknown, try both
295 BM_ASSERT(0);
296 }
297 BM_ASSERT(found);
298 midx = mismatch;
299 }
300 if (null_proc == bm::use_null)
301 {
302 // no need for address translation in this case
303 bool f = bv_null1->find_first_mismatch(*bv_null2, midx, mismatch);
304 if (f && (midx < mismatch)) // better mismatch found
305 {
306 found = f; mismatch = midx;
307 }
308 }
309
310/*
311 else // search for mismatch in the NOT NULL vectors
312 {
313 if (null_proc == bm::use_null)
314 {
315 // no need for address translation in this case
316 typename SV::bvector_type_const_ptr bv_null1 = sv1.get_null_bvector();
317 typename SV::bvector_type_const_ptr bv_null2 = sv2.get_null_bvector();
318 found = bv_null1->find_first_mismatch(*bv_null2, mismatch);
319 }
320 }
321*/
322 }
323
324 midx = mismatch; // minimal mismatch
325 return found;
326}
327
328/**
329 Find mismatch vector, indicating positions of mismatch between two sparse vectors
330 (uses linear scan in bit-vector planes)
331
332 Function works with both NULL and NOT NULL vectors
333
334 @param bv - [out] - bit-ector with mismatch positions indicated as 1s
335 @param sv1 - vector 1
336 @param sv2 - vector 2
337 @param null_proc - rules of processing for (not) NULL plane
338 bm::no_null - NULLs from both vectors are treated as uncertainty
339 and NOT included into final result
340 bm::use_null - difference in NULLs accounted into the result
341
342 @sa sparse_vector_find_first_mismatch
343
344 \ingroup svalgo
345*/
346template<typename SV1, typename SV2>
348 const SV1& sv1,
349 const SV2& sv2,
350 bm::null_support null_proc)
351{
352 typedef typename SV1::bvector_type bvector_type;
353 typedef typename bvector_type::allocator_type allocator_type;
354 typedef typename allocator_type::allocator_pool_type allocator_pool_type;
355
356 allocator_pool_type pool; // local pool for blocks
357 typename bvector_type::mem_pool_guard mp_guard_bv;
358 mp_guard_bv.assign_if_not_set(pool, bv);
359
360
362 {
363 BM_ASSERT(0); // TODO: fixme
364 }
366 {
367 BM_ASSERT(0); // TODO: fixme
368 }
369
370 bv.clear();
371
372 unsigned planes = sv1.effective_slices();; // slices();
373 if (planes < sv2.slices())
374 planes = sv2.slices();
375
376 for (unsigned i = 0; i < planes; ++i)
377 {
378 typename SV1::bvector_type_const_ptr bv1 = sv1.get_slice(i);
379 typename SV2::bvector_type_const_ptr bv2 = sv2.get_slice(i);
380
381 if (!bv1)
382 {
383 if (!bv2)
384 continue;
385 bv |= *bv2;
386 continue;
387 }
388 if (!bv2)
389 {
390 BM_ASSERT(bv1);
391 bv |= *bv1;
392 continue;
393 }
394
395 // both planes are not NULL, compute XOR diff
396 //
397 bvector_type bv_xor;
398 typename bvector_type::mem_pool_guard mp_guard;
399 mp_guard.assign_if_not_set(pool, bv_xor);
400
401 bv_xor.bit_xor(*bv1, *bv2, SV1::bvector_type::opt_none);
402 bv |= bv_xor;
403
404 } // for i
405
406 // size mismatch check
407 {
408 typename SV1::size_type sz1 = sv1.size();
409 typename SV2::size_type sz2 = sv2.size();
410 if (sz1 != sz2)
411 {
412 if (sz1 < sz2)
413 {
414 }
415 else
416 {
417 bm::xor_swap(sz1, sz2);
418 }
419 bv.set_range(sz1, sz2-1);
420 }
421 }
422
423 // NULL processings
424 //
425 typename SV1::bvector_type_const_ptr bv_null1 = sv1.get_null_bvector();
426 typename SV2::bvector_type_const_ptr bv_null2 = sv2.get_null_bvector();
427
428 switch (null_proc)
429 {
430 case bm::no_null:
431 // NULL correction to exclude all NULL (unknown) values from the result set
432 // (AND with NOT NULL vector)
433 if (bv_null1 && bv_null2)
434 {
435 bvector_type bv_or;
436 typename bvector_type::mem_pool_guard mp_guard;
437 mp_guard.assign_if_not_set(pool, bv_or);
438
439 bv_or.bit_or(*bv_null1, *bv_null2, bvector_type::opt_none);
440 bv &= bv_or;
441 }
442 else
443 {
444 if (bv_null1)
445 bv &= *bv_null1;
446 if (bv_null2)
447 bv &= *bv_null2;
448 }
449 break;
450 case bm::use_null:
451 if (bv_null1 && bv_null2)
452 {
453 bvector_type bv_xor;
454 typename bvector_type::mem_pool_guard mp_guard;
455 mp_guard.assign_if_not_set(pool, bv_xor);
456
457 bv_xor.bit_xor(*bv_null1, *bv_null2, bvector_type::opt_none);
458 bv |= bv_xor;
459 }
460 else
461 {
462 bvector_type bv_null;
463 typename bvector_type::mem_pool_guard mp_guard;
464 mp_guard.assign_if_not_set(pool, bv_null);
465 if (bv_null1)
466 {
467 bv_null = *bv_null1;
468 bv_null.resize(sv1.size());
469 }
470 if (bv_null2)
471 {
472 bv_null = *bv_null2;
473 bv_null.resize(sv2.size());
474 }
475 bv_null.invert();
476 bv |= bv_null;
477 }
478 break;
479 default:
480 BM_ASSERT(0);
481 }
482}
483
484
485/**
486 \brief algorithms for sparse_vector scan/search
487
488 Scanner uses properties of bit-vector planes to answer questions
489 like "find all sparse vector elements equivalent to XYZ".
490
491 Class uses fast algorithms based on properties of bit-planes.
492 This is NOT a brute force, direct scan, scanner uses search space pruning and cache optimizations
493 to run the search.
494
495 @ingroup svalgo
496 @ingroup setalgo
497*/
498template<typename SV>
500{
501public:
505 typedef typename SV::value_type value_type;
506 typedef typename SV::size_type size_type;
507
509 typedef typename allocator_type::allocator_pool_type allocator_pool_type;
510
512 typedef
513 bm::heap_vector<value_type, typename bvector_type::allocator_type, true>
515 typedef
516 bm::heap_vector<bvector_type*, allocator_type, true> bvect_vector_type;
517 typedef
519
520 /**
521 Scanner options to control execution
522 */
523 typedef
524 typename aggregator_type::run_options run_options;
525
526public:
527
528
529 /**
530 Pipeline to run multiple searches against a particular SV faster using cache optimizations.
531 USAGE:
532 - setup the pipeline (add searches)
533 - run all searches at once
534 - get the search results (in the order it was added)
535 */
536 template<class Opt = bm::agg_run_options<> >
538 {
539 public:
540 typedef
541 typename aggregator_type::template pipeline<Opt> aggregator_pipeline_type;
542 public:
543 pipeline(const SV& sv)
544 : sv_(sv), bv_and_mask_(0),
545 eff_slices_(sv_.get_bmatrix().calc_effective_rows_not_null())
546 {}
547
548 /// Set pipeline run options
550 { return agg_pipe_.options(); }
551
552 /// Get pipeline run options
554 { return agg_pipe_.options(); }
555
556 /** Set pipeline mask bit-vector to restrict the search space
557 @param bv_mask - pointer to bit-vector restricting search to vector indexes marked
558 as 1s. Pointer ownership is not transferred, NULL value resets the mask.
559 bv_mask defines the mask for all added searches.
560
561 */
562 void set_search_mask(const bvector_type* bv_mask) BMNOEXCEPT;
563
564 /**
565 Set search limit for results. Requires that bit-counting to be enabled in the template parameters.
566 Warning: search limit is approximate (for performance reasons) so it can occasinally find more
567 than requested. It cannot find less. Search limit works for individual results (not ORed aggregate)
568 @param limit - search limit (target population count to search for)
569 */
571 { agg_pipe_.set_search_count_limit(limit); }
572
573 /**
574 Attach OR (aggregator vector).
575 Pipeline results all will be OR-ed (UNION) into the OR target vector
576 @param bv_or - OR target vector
577 */
579 { agg_pipe_.set_or_target(bv_or); }
580
581 /*! @name pipeline fill-in methods */
582 //@{
583
584 /**
585 Add new search string
586 @param str - zero terminated string pointer to configure a search for
587 */
588 void add(const typename SV::value_type* str);
589
590 /** Prepare pipeline for the execution (resize and init internal structures)
591 Once complete, you cannot add() to it.
592 */
593 void complete() { agg_pipe_.complete(); }
594
596 { return agg_pipe_.is_complete(); }
597
598 size_type size() const BMNOEXCEPT { return agg_pipe_.size(); }
599
600 //@}
601
602 /** Return results vector used for pipeline execution */
604 { return agg_pipe_.get_bv_res_vector(); }
605 /** Return results vector count used for pipeline execution */
606
608 { return agg_pipe_.get_bv_count_vector(); }
609
610
611 /**
612 get aggregator pipeline (access to compute internals)
613 @internal
614 */
616 { return agg_pipe_; }
617
618 protected:
620
621 pipeline(const pipeline&) = delete;
622 pipeline& operator=(const pipeline&) = delete;
623 protected:
624 const SV& sv_; ///< target sparse vector ref
625 aggregator_pipeline_type agg_pipe_; ///< traget aggregator pipeline
628 size_t eff_slices_; ///< number of effectice NOT NULL value slices
629 };
630
631public:
633
634 /**
635 \brief bind sparse vector for all searches
636
637 \param sv - input sparse vector to bind for searches
638 \param sorted - source index is sorted, build index for binary search
639 */
640 void bind(const SV& sv, bool sorted);
641
642 /**
643 \brief reset sparse vector binding
644 */
646
647
648 /*! @name Find in scalar vector */
649 //@{
650
651 /**
652 \brief find all sparse vector elements EQ to search value
653
654 Find all sparse vector elements equivalent to specified value
655
656 \param sv - input sparse vector
657 \param value - value to search for
658 \param bv_out - search result bit-vector (search result is a vector of 1s when sv[i] == value)
659 */
660 void find_eq(const SV& sv, value_type value, bvector_type& bv_out);
661
662
663 /**
664 \brief find all sparse vector elements EQ to search value
665
666 Find all sparse vector elements equivalent to specified value
667
668 \param sv - input sparse vector
669 \param value - value to search for
670 \param bi - back insert iterator for the search results
671 */
672 template<typename BII>
673 void find_eq(const SV& sv, value_type value, BII bi);
674
675
676 /**
677 \brief find first sparse vector element
678
679 Find all sparse vector elements equivalent to specified value.
680 Works well if sperse vector represents unordered set
681
682 \param sv - input sparse vector
683 \param value - value to search for
684 \param pos - output found sparse vector element index
685
686 \return true if found
687 */
688 bool find_eq(const SV& sv, value_type value, size_type& pos);
689
690 /**
691 \brief binary search for position in the sorted sparse vector
692
693 Method assumes the sparse array is sorted, if value is found pos returns its index,
694 if not found, pos would contain index where it could be inserted to maintain the order
695
696 \param sv - input sparse vector
697 \param val - value to search for
698 \param pos - output sparse vector element index (actual index if found or insertion
699 point if not found)
700
701 \return true if value found
702 */
703 bool bfind(const SV& sv, const value_type val, size_type& pos);
704
705 /**
706 \brief find all elements sparse vector element greater (>) than value
707
708 \param sv - input sparse vector
709 \param value - value to search for
710 \param bv_out - search result bit-vector (search result is a vector of 1s when sv[i] > value)
711 */
712 void find_gt(const SV& sv, value_type val, bvector_type& bv_out);
713
714 /**
715 \brief find all elements sparse vector element greater or equal (>=) than value
716
717 \param sv - input sparse vector
718 \param value - value to search for
719 \param bv_out - search result bit-vector (search result is a vector of 1s when sv[i] >= value)
720 */
721 void find_ge(const SV& sv, value_type val, bvector_type& bv_out);
722
723
724 /**
725 \brief find all elements sparse vector element less (<) than value
726
727 \param sv - input sparse vector
728 \param value - value to search for
729 \param bv_out - search result bit-vector (search result is a vector of 1s when sv[i] < value)
730 */
731 void find_lt(const SV& sv, value_type val, bvector_type& bv_out);
732
733 /**
734 \brief find all elements sparse vector element less or equal (<=) than value
735
736 \param sv - input sparse vector
737 \param value - value to search for
738 \param bv_out - search result bit-vector (search result is a vector of 1s when sv[i] <= value)
739 */
740 void find_le(const SV& sv, value_type val, bvector_type& bv_out);
741
742
743 /**
744 \brief find all elements sparse vector element in closed range [left..right] interval
745
746 \param sv - input sparse vector
747 \param from - range from
748 \param to - range to
749 \param bv_out - search result bit-vector (search result is a vector of 1s when sv[i] <= value)
750 */
751 void find_range(const SV& sv,
752 value_type from, value_type to,
753 bvector_type& bv_out);
754
755
756 //@}
757
758
759 // --------------------------------------------------
760 //
761 /*! @name Find in bit-transposed string vector */
762 //@{
763
764
765 /**
766 \brief find sparse vector elements (string)
767 @param sv - sparse vector of strings to search
768 @param str - string to search for
769 @param bv_out - search result bit-vector (search result masks 1 elements)
770 */
771 bool find_eq_str(const SV& sv, const value_type* str, bvector_type& bv_out);
772
773 /**
774 \brief find sparse vector elementa (string) in the attached SV
775 @param str - string to search for
776 @param bv_out - search result bit-vector (search result masks 1 elements)
777 */
778 bool find_eq_str(const value_type* str, bvector_type& bv_out);
779
780 /**
781 \brief find first sparse vector element (string)
782 @param sv - sparse vector of strings to search
783 @param str - string to search for
784 @param pos - [out] index of the first found
785 */
786 bool find_eq_str(const SV& sv, const value_type* str, size_type& pos);
787
788 /**
789 \brief binary find first sparse vector element (string)
790 Sparse vector must be attached (bind())
791 @param str - string to search for
792 @param pos - [out] index of the first found
793 @sa bind
794 */
795 bool find_eq_str(const value_type* str, size_type& pos);
796
797 /**
798 \brief find sparse vector elements with a given prefix (string)
799 @param sv - sparse vector of strings to search
800 @param str - string prefix to search for
801 "123" is a prefix for "1234567"
802 @param bv_out - search result bit-vector (search result masks 1 elements)
803 */
804 bool find_eq_str_prefix(const SV& sv, const value_type* str,
805 bvector_type& bv_out);
806
807 /**
808 \brief find sparse vector elements using search pipeline
809 @param pipe - pipeline to run
810 */
811 template<class TPipe>
812 void find_eq_str(TPipe& pipe);
813
814 /**
815 \brief binary find first sparse vector element (string)
816 Sparse vector must be sorted.
817 */
818 bool bfind_eq_str(const SV& sv,
819 const value_type* str, size_type& pos);
820
821 /**
822 \brief lower bound search for an array position
823
824 Method assumes the sparse array is sorted
825
826 \param sv - input sparse vector
827 \param str - value to search for
828 \param pos - output sparse vector element index
829
830 \return true if value found
831 */
832 bool lower_bound_str(const SV& sv,
833 const value_type* str,
834 size_type& pos);
835
836 /**
837 \brief binary find first sparse vector element (string)
838 Sparse vector must be sorted and attached
839 @sa bind
840 */
841 bool bfind_eq_str(const value_type* str, size_type& pos);
842
843 //@}
844
845 /**
846 \brief find all sparse vector elements EQ to 0
847 \param sv - input sparse vector
848 \param bv_out - output bit-vector (search result masks 1 elements)
849 \param null_correct - flag to perform NULL correction
850 */
851 void find_zero(const SV& sv, bvector_type& bv_out, bool null_correct = true);
852
853 /*!
854 \brief Find non-zero elements
855 Output vector is computed as a logical OR (join) of all planes
856
857 \param sv - input sparse vector
858 \param bv_out - output bit-bector of non-zero elements
859 */
860 void find_nonzero(const SV& sv, bvector_type& bv_out);
861
862 /*!
863 \brief Find positive (greter than zero elements)
864 Output vector is computed as a logical OR (join) of all planes
865
866 \param sv - input sparse vector
867 \param bv_out - output bit-bector of non-zero elements
868 */
869 void find_positive(const SV& sv, bvector_type& bv_out);
870
871
872 /**
873 \brief invert search result ("EQ" to "not EQ")
874
875 \param sv - input sparse vector
876 \param bv_out - output bit-bector of non-zero elements
877 */
878 void invert(const SV& sv, bvector_type& bv_out);
879
880 /**
881 \brief find all values A IN (C, D, E, F)
882 \param sv - input sparse vector
883 \param start - start iterator (set to search)
884 \param end - end iterator (set to search)
885 \param bv_out - output bit-bector of non-zero elements
886 */
887 template<typename It>
888 void find_eq(const SV& sv,
889 It start,
890 It end,
891 bvector_type& bv_out)
892 {
893 typename bvector_type::mem_pool_guard mp_guard;
894 mp_guard.assign_if_not_set(pool_, bv_out); // set algorithm-local memory pool to avoid heap contention
895
896 bvector_type bv1;
897 typename bvector_type::mem_pool_guard mp_guard1(pool_, bv1);
898 bool any_zero = false;
899 for (; start < end; ++start)
900 {
901 value_type v = *start;
902 any_zero |= (v == 0);
903 bool found = find_eq_with_nulls(sv, v, bv1);
904 if (found)
905 bv_out.bit_or(bv1);
906 else
907 {
908 BM_ASSERT(!bv1.any());
909 }
910 } // for
911 if (any_zero)
912 correct_nulls(sv, bv_out);
913 }
914
915
916 /// For testing purposes only
917 ///
918 /// @internal
919 void find_eq_with_nulls_horizontal(const SV& sv, value_type value,
920 bvector_type& bv_out);
921
922 /// For testing purposes only
923 ///
924 /// @internal
925 void find_gt_horizontal(const SV& sv,
926 value_type value,
927 bvector_type& bv_out,
928 bool null_correct = true);
929
930
931 /** Exclude possible NULL values from the result vector
932 \param sv - input sparse vector
933 \param bv_out - output bit-bector of non-zero elements
934 */
935 void correct_nulls(const SV& sv, bvector_type& bv_out);
936
937 /// Return allocator pool for blocks
938 /// (Can be used to improve performance of repeated searches with the same scanner)
939 ///
941
942protected:
943
944 /// Remap input value into SV char encodings
945 static
946 bool remap_tosv(remap_vector_type& remap_vect_target,
947 const value_type* str,
948 const SV& sv);
949
950 /// set search boundaries (hint for the aggregator)
951 void set_search_range(size_type from, size_type to);
952
953 /// reset (disable) search range
954 void reset_search_range();
955
956 /// find value (may include NULL indexes)
957 bool find_eq_with_nulls(const SV& sv,
958 value_type value,
959 bvector_type& bv_out,
960 size_type search_limit = 0);
961
962 /// find first value (may include NULL indexes)
963 bool find_first_eq(const SV& sv,
964 value_type value,
965 size_type& idx);
966
967 /// find first string value (may include NULL indexes)
968 bool find_first_eq(const SV& sv,
969 const value_type* str,
970 size_type& idx,
971 bool remaped);
972
973 /// find EQ str / prefix impl
974 bool find_eq_str_impl(const SV& sv,
975 const value_type* str,
976 bvector_type& bv_out,
977 bool prefix_sub);
978
979 /// Prepare aggregator for AND-SUB (EQ) search
980 bool prepare_and_sub_aggregator(const SV& sv, value_type value);
981
982 /// Prepare aggregator for AND-SUB (EQ) search (string)
983 bool prepare_and_sub_aggregator(const SV& sv,
984 const value_type* str,
985 unsigned octet_start,
986 bool prefix_sub);
987
988 /// Rank-Select decompression for RSC vectors
989 void decompress(const SV& sv, bvector_type& bv_out);
990
991 /// compare sv[idx] with input str
992 int compare_str(const SV& sv, size_type idx, const value_type* str);
993
994 /// compare sv[idx] with input value
995 int compare(const SV& sv, size_type idx, const value_type val) BMNOEXCEPT;
996
997 /// @internal
998 void aggregate_OR_slices(bvector_type& bv_target, const SV& sv,
999 unsigned from, unsigned total_planes);
1000
1001 /// @internal
1002 static
1003 void aggregate_AND_OR_slices(bvector_type& bv_target,
1004 const bvector_type& bv_mask,
1005 const SV& sv,
1006 unsigned from, unsigned total_planes);
1007
1008 /// Return the slice limit for signed/unsigned vector value types
1009 /// @internal
1010 static constexpr int gt_slice_limit() noexcept
1011 {
1012 if constexpr (std::is_signed<value_type>::value)
1013 return 1; // last plane
1014 else
1015 return 0;
1016 }
1017
1018
1019protected:
1021 void operator=(const sparse_vector_scanner&) = delete;
1022
1023protected:
1025 {
1027 linear_cutoff2 = 128
1029
1030 typedef bm::dynamic_heap_matrix<value_type, allocator_type> heap_matrix_type;
1031 typedef bm::dynamic_heap_matrix<value_type, allocator_type> matrix_search_buf_type;
1032
1033 typedef
1034 bm::heap_vector<bm::id64_t, typename bvector_type::allocator_type, true>
1036
1037protected:
1038
1039 /// Addd char to aggregator (AND-SUB)
1040 template<typename AGG>
1041 static
1042 void add_agg_char(AGG& agg,
1043 const SV& sv,
1044 int octet_idx, bm::id64_t planes_mask,
1045 unsigned value)
1046 {
1047 unsigned char bits[64];
1048 unsigned short bit_count_v = bm::bitscan(value, bits);
1049 for (unsigned i = 0; i < bit_count_v; ++i)
1050 {
1051 unsigned bit_idx = bits[i];
1052 BM_ASSERT(value & (value_type(1) << bit_idx));
1053 unsigned plane_idx = (unsigned(octet_idx) * 8) + bit_idx;
1054 const bvector_type* bv = sv.get_slice(plane_idx);
1055 BM_ASSERT(bv != sv.get_null_bvector());
1056 agg.add(bv, 0); // add to the AND group
1057 } // for i
1058 unsigned vinv = unsigned(value);
1059 if constexpr (sizeof(value_type) == 1)
1060 vinv ^= 0xFF;
1061 else // 2-byte char
1062 {
1063 BM_ASSERT(sizeof(value_type) == 2);
1064 vinv ^= 0xFFFF;
1065 }
1066 // exclude the octet bits which are all not set (no vectors)
1067 vinv &= unsigned(planes_mask);
1068 for (unsigned octet_plane = (unsigned(octet_idx) * 8); vinv; vinv &= vinv-1)
1069 {
1070 bm::id_t t = vinv & -vinv;
1071 unsigned bit_idx = bm::word_bitcount(t - 1);
1072 unsigned plane_idx = octet_plane + bit_idx;
1073 const bvector_type* bv = sv.get_slice(plane_idx);
1074 BM_ASSERT(bv);
1075 BM_ASSERT(bv != sv.get_null_bvector());
1076 agg.add(bv, 1); // add to SUB group
1077 } // for
1078 }
1079
1080private:
1081 allocator_pool_type pool_;
1082 bvector_type bv_tmp_;
1083 aggregator_type agg_;
1085
1086 size_type mask_from_;
1087 size_type mask_to_;
1088 bool mask_set_;
1089
1090 const SV* bound_sv_;
1091 heap_matrix_type block0_elements_cache_; ///< cache for elements[0] of each block
1092 heap_matrix_type block3_elements_cache_; ///< cache for elements[16384x] of each block
1093 size_type effective_str_max_;
1094
1095 remap_vector_type remap_value_vect_; ///< remap buffer
1096 /// masks of allocated bit-planes (1 - means there is a bit-plane)
1097 mask_vector_type vector_plane_masks_;
1098 matrix_search_buf_type hmatr_; ///< heap matrix for string search linear stage
1099};
1100
1101
1102/*!
1103 \brief Integer set to set transformation (functional image in groups theory)
1104 https://en.wikipedia.org/wiki/Image_(mathematics)
1105
1106 Input sets gets translated through the function, which is defined as
1107 "one to one (or NULL)" binary relation object (sparse_vector).
1108 Also works for M:1 relationships.
1109
1110 \ingroup svalgo
1111 \ingroup setalgo
1112*/
1113template<typename SV>
1115{
1116public:
1118 typedef typename SV::value_type value_type;
1119 typedef typename SV::size_type size_type;
1121public:
1122
1125
1126
1127 /** Get read access to zero-elements vector
1128 Zero vector gets populated after attach_sv() is called
1129 or as a side-effect of remap() with immediate sv param
1130 */
1131 const bvector_type& get_bv_zero() const { return bv_zero_; }
1132
1133 /** Perform remapping (Image function) (based on attached translation table)
1134
1135 \param bv_in - input set, defined as a bit-vector
1136 \param bv_out - output set as a bit-vector
1137
1138 @sa attach_sv
1139 */
1140 void remap(const bvector_type& bv_in,
1141 bvector_type& bv_out);
1142
1143 /** Perform remapping (Image function)
1144
1145 \param bv_in - input set, defined as a bit-vector
1146 \param sv_brel - binary relation (translation table) sparse vector
1147 \param bv_out - output set as a bit-vector
1148 */
1149 void remap(const bvector_type& bv_in,
1150 const SV& sv_brel,
1151 bvector_type& bv_out);
1152
1153 /** Remap single set element
1154
1155 \param id_from - input value
1156 \param sv_brel - translation table sparse vector
1157 \param id_to - out value
1158
1159 \return - true if value was found and remapped
1160 */
1161 bool remap(size_type id_from, const SV& sv_brel, size_type& id_to);
1162
1163
1164 /** Run remap transformation
1165
1166 \param bv_in - input set, defined as a bit-vector
1167 \param sv_brel - translation table sparse vector
1168 \param bv_out - output set as a bit-vector
1169
1170 @sa remap
1171 */
1172 void run(const bvector_type& bv_in,
1173 const SV& sv_brel,
1174 bvector_type& bv_out)
1175 {
1176 remap(bv_in, sv_brel, bv_out);
1177 }
1178
1179 /** Attach a translation table vector for remapping (Image function)
1180
1181 \param sv_brel - binary relation sparse vector pointer
1182 (pass NULL to detach)
1183 \param compute_stats - flag to perform computation of some statistics
1184 later used in remapping. This only make sense
1185 for series of remappings on the same translation
1186 vector.
1187 @sa remap
1188 */
1189 void attach_sv(const SV* sv_brel, bool compute_stats = false);
1190
1191
1192protected:
1193 void one_pass_run(const bvector_type& bv_in,
1194 const SV& sv_brel,
1195 bvector_type& bv_out);
1196
1197 /// @internal
1198 template<unsigned BSIZE>
1200 {
1203 };
1204
1206 {
1207 sv_g_size = 1024 * 8
1210
1211
1212protected:
1214 void operator=(const set2set_11_transform&) = delete;
1215
1216protected:
1217 const SV* sv_ptr_; ///< current translation table vector
1218 gather_buffer_type* gb_; ///< intermediate buffers
1219 bvector_type bv_product_;///< temp vector
1220
1221 bool have_stats_; ///< flag of statistics presense
1222 bvector_type bv_zero_; ///< bit-vector for zero elements
1223
1225};
1226
1227
1228
1229//----------------------------------------------------------------------------
1230//
1231//----------------------------------------------------------------------------
1232
1233template<typename SV>
1235: sv_ptr_(0), gb_(0), have_stats_(false)
1236{
1237 // set2set_11_transform for signed int is not implemented yet
1238 static_assert(std::is_unsigned<value_type>::value,
1239 "BM: unsigned sparse vector is required for transform");
1240 gb_ = (gather_buffer_type*)::malloc(sizeof(gather_buffer_type));
1241 if (!gb_)
1242 {
1243 SV::throw_bad_alloc();
1244 }
1245}
1246
1247//----------------------------------------------------------------------------
1248
1249template<typename SV>
1251{
1252 if (gb_)
1253 ::free(gb_);
1254}
1255
1256
1257//----------------------------------------------------------------------------
1258
1259template<typename SV>
1260void set2set_11_transform<SV>::attach_sv(const SV* sv_brel, bool compute_stats)
1261{
1262 sv_ptr_ = sv_brel;
1263 if (!sv_ptr_)
1264 {
1265 have_stats_ = false;
1266 }
1267 else
1268 {
1269 if (sv_brel->empty() || !compute_stats)
1270 return; // nothing to do
1271 const bvector_type* bv_non_null = sv_brel->get_null_bvector();
1272 if (bv_non_null)
1273 return; // already have 0 elements vector
1274
1275
1276 typename bvector_type::mem_pool_guard mp_g_z;
1277 mp_g_z.assign_if_not_set(pool_, bv_zero_);
1278
1280 scanner.find_zero(*sv_brel, bv_zero_);
1281 have_stats_ = true;
1282 }
1283}
1284
1285//----------------------------------------------------------------------------
1286
1287template<typename SV>
1289 const SV& sv_brel,
1290 size_type& id_to)
1291{
1292 if (sv_brel.empty())
1293 return false; // nothing to do
1294
1295 const bvector_type* bv_non_null = sv_brel.get_null_bvector();
1296 if (bv_non_null)
1297 {
1298 if (!bv_non_null->test(id_from))
1299 return false;
1300 }
1301 size_type idx = sv_brel.translate_address(id_from);
1302 id_to = sv_brel.get(idx);
1303 return true;
1304}
1305
1306//----------------------------------------------------------------------------
1307
1308template<typename SV>
1310 const SV& sv_brel,
1311 bvector_type& bv_out)
1312{
1313 typename bvector_type::mem_pool_guard mp_g_out, mp_g_p, mp_g_z;
1314 mp_g_out.assign_if_not_set(pool_, bv_out);
1315 mp_g_p.assign_if_not_set(pool_, bv_product_);
1316 mp_g_z.assign_if_not_set(pool_, bv_zero_);
1317
1318 attach_sv(&sv_brel);
1319
1320 remap(bv_in, bv_out);
1321
1322 attach_sv(0); // detach translation table
1323}
1324
1325template<typename SV>
1327 bvector_type& bv_out)
1328{
1329 BM_ASSERT(sv_ptr_);
1330
1331 bv_out.clear();
1332
1333 if (sv_ptr_ == 0 || sv_ptr_->empty())
1334 return; // nothing to do
1335
1336 bv_out.init(); // just in case to "fast set" later
1337
1338 typename bvector_type::mem_pool_guard mp_g_out, mp_g_p, mp_g_z;
1339 mp_g_out.assign_if_not_set(pool_, bv_out);
1340 mp_g_p.assign_if_not_set(pool_, bv_product_);
1341 mp_g_z.assign_if_not_set(pool_, bv_zero_);
1342
1343
1344 const bvector_type* enum_bv;
1345
1346 const bvector_type * bv_non_null = sv_ptr_->get_null_bvector();
1347 if (bv_non_null)
1348 {
1349 // TODO: optimize with 2-way ops
1350 bv_product_ = bv_in;
1351 bv_product_.bit_and(*bv_non_null);
1352 enum_bv = &bv_product_;
1353 }
1354 else // non-NULL vector is not available
1355 {
1356 enum_bv = &bv_in;
1357 // if we have any elements mapping into "0" on the other end
1358 // we map it once (chances are there are many duplicates)
1359 //
1360
1361 if (have_stats_) // pre-attached translation statistics
1362 {
1363 bv_product_ = bv_in;
1364 size_type cnt1 = bv_product_.count();
1365 bv_product_.bit_sub(bv_zero_);
1366 size_type cnt2 = bv_product_.count();
1367
1368 BM_ASSERT(cnt2 <= cnt1);
1369
1370 if (cnt1 != cnt2) // mapping included 0 elements
1371 bv_out.set_bit_no_check(0);
1372
1373 enum_bv = &bv_product_;
1374 }
1375 }
1376
1377
1378
1379 size_type buf_cnt, nb_old, nb;
1380 buf_cnt = nb_old = 0;
1381
1382 typename bvector_type::enumerator en(enum_bv->first());
1383 for (; en.valid(); ++en)
1384 {
1385 typename SV::size_type idx = *en;
1386 idx = sv_ptr_->translate_address(idx);
1387
1388 nb = unsigned(idx >> bm::set_block_shift);
1389 if (nb != nb_old) // new blocks
1390 {
1391 if (buf_cnt)
1392 {
1393 sv_ptr_->gather(&gb_->buffer_[0], &gb_->gather_idx_[0], buf_cnt, BM_SORTED_UNIFORM);
1394 bv_out.set(&gb_->buffer_[0], buf_cnt, BM_SORTED);
1395 buf_cnt = 0;
1396 }
1397 nb_old = nb;
1398 gb_->gather_idx_[buf_cnt++] = idx;
1399 }
1400 else
1401 {
1402 gb_->gather_idx_[buf_cnt++] = idx;
1403 }
1404
1405 if (buf_cnt == sv_g_size)
1406 {
1407 sv_ptr_->gather(&gb_->buffer_[0], &gb_->gather_idx_[0], buf_cnt, BM_SORTED_UNIFORM);
1408 bv_out.set(&gb_->buffer_[0], buf_cnt, bm::BM_SORTED);
1409 buf_cnt = 0;
1410 }
1411 } // for en
1412 if (buf_cnt)
1413 {
1414 sv_ptr_->gather(&gb_->buffer_[0], &gb_->gather_idx_[0], buf_cnt, BM_SORTED_UNIFORM);
1415 bv_out.set(&gb_->buffer_[0], buf_cnt, bm::BM_SORTED);
1416 }
1417}
1418
1419
1420//----------------------------------------------------------------------------
1421
1422template<typename SV>
1424 const SV& sv_brel,
1425 bvector_type& bv_out)
1426{
1427 if (sv_brel.empty())
1428 return; // nothing to do
1429
1430 bv_out.init();
1431
1432 typename SV::const_iterator it = sv_brel.begin();
1433 for (; it.valid(); ++it)
1434 {
1435 typename SV::value_type t_id = *it;
1436 size_type idx = it.pos();
1437 if (bv_in.test(idx))
1438 {
1439 bv_out.set_bit_no_check(t_id);
1440 }
1441 } // for
1442}
1443
1444
1445//----------------------------------------------------------------------------
1446//
1447//----------------------------------------------------------------------------
1448
1449template<typename SV>
1451{
1452 mask_set_ = false;
1453 mask_from_ = mask_to_ = bm::id_max;
1454
1455 bound_sv_ = 0;
1456 effective_str_max_ = 0;
1457}
1458
1459//----------------------------------------------------------------------------
1460
1461template<typename SV>
1462void sparse_vector_scanner<SV>::bind(const SV& sv, bool sorted)
1463{
1464 (void)sorted; // MSVC warning over if constexpr variable "not-referenced"
1465 bound_sv_ = &sv;
1466
1467 if constexpr (SV::is_str()) // bindings for the string sparse vector
1468 {
1469 effective_str_max_ = sv.effective_vector_max();
1470 if (sorted)
1471 {
1472 size_type sv_sz = sv.size();
1473 BM_ASSERT(sv_sz);
1474 size_type total_nb = sv_sz / bm::gap_max_bits + 1;
1475
1476 block0_elements_cache_.resize(total_nb, effective_str_max_+1);
1477 block0_elements_cache_.set_zero();
1478
1479 block3_elements_cache_.resize(total_nb * 3, effective_str_max_+1);
1480 block3_elements_cache_.set_zero();
1481
1482 // fill in elements cache
1483 for (size_type i = 0; i < sv_sz; i+= bm::gap_max_bits)
1484 {
1485 size_type nb = (i >> bm::set_block_shift);
1486 value_type* s0 = block0_elements_cache_.row(nb);
1487 sv.get(i, s0, size_type(block0_elements_cache_.cols()));
1488
1489 for (size_type k = 0; k < 3; ++k)
1490 {
1491 value_type* s1 = block3_elements_cache_.row(nb * 3 + k);
1492 size_type idx = i + (k+1) * bm::sub_block3_size;
1493 sv.get(idx, s1, size_type(block3_elements_cache_.cols()));
1494 } // for k
1495 } // for i
1496 }
1497 // pre-calculate vector plane masks
1498 //
1499 vector_plane_masks_.resize(effective_str_max_);
1500 for (unsigned i = 0; i < effective_str_max_; ++i)
1501 {
1502 vector_plane_masks_[i] = sv.get_slice_mask(i);
1503 } // for i
1504 }
1505}
1506
1507//----------------------------------------------------------------------------
1508
1509template<typename SV>
1511{
1512 bound_sv_ = 0;
1513 effective_str_max_ = 0;
1514}
1515
1516//----------------------------------------------------------------------------
1517
1518template<typename SV>
1520 bvector_type& bv_out,
1521 bool null_correct)
1522{
1523 if (sv.size() == 0)
1524 {
1525 bv_out.clear();
1526 return;
1527 }
1528 find_nonzero(sv, bv_out);
1529 if constexpr (SV::is_compressed())
1530 {
1531 bv_out.invert();
1532 bv_out.set_range(sv.effective_size(), bm::id_max - 1, false);
1533 decompress(sv, bv_out);
1534 }
1535 else
1536 {
1537 invert(sv, bv_out);
1538 }
1539 if (null_correct)
1540 correct_nulls(sv, bv_out);
1541}
1542
1543//----------------------------------------------------------------------------
1544
1545template<typename SV>
1547{
1548 if (sv.size() == 0)
1549 {
1550 bv_out.clear();
1551 return;
1552 }
1553 // TODO: find a better algorithm (NAND?)
1554 auto old_sz = bv_out.size();
1555 bv_out.resize(sv.size());
1556 bv_out.invert();
1557 bv_out.resize(old_sz);
1558 correct_nulls(sv, bv_out);
1559}
1560
1561//----------------------------------------------------------------------------
1562
1563template<typename SV>
1565 bvector_type& bv_out)
1566{
1567 const bvector_type* bv_null = sv.get_null_bvector();
1568 if (bv_null) // correct result to only use not NULL elements
1569 bv_out.bit_and(*bv_null);
1570}
1571
1572//----------------------------------------------------------------------------
1573
1574template<typename SV>
1576 value_type value,
1577 bvector_type& bv_out,
1578 size_type search_limit)
1579{
1580 if (sv.empty())
1581 return false; // nothing to do
1582
1583 if (!value)
1584 {
1585 find_zero(sv, bv_out);
1586 return bv_out.any();
1587 }
1588 agg_.reset();
1589
1590 bool found = prepare_and_sub_aggregator(sv, value);
1591 if (!found)
1592 {
1593 bv_out.clear();
1594 return found;
1595 }
1596
1597 bool any = (search_limit == 1);
1598 found = agg_.combine_and_sub(bv_out, any);
1599 agg_.reset();
1600 return found;
1601}
1602
1603//----------------------------------------------------------------------------
1604
1605template<typename SV>
1607 value_type value,
1608 size_type& idx)
1609{
1610 if (sv.empty())
1611 return false; // nothing to do
1612
1613 BM_ASSERT(value); // cannot handle zero values
1614 if (!value)
1615 return false;
1616
1617 agg_.reset();
1618 bool found = prepare_and_sub_aggregator(sv, value);
1619 if (!found)
1620 return found;
1621 found = agg_.find_first_and_sub(idx);
1622 agg_.reset();
1623 return found;
1624}
1625
1626
1627//----------------------------------------------------------------------------
1628
1629template<typename SV>
1631 const SV& sv,
1632 const value_type* str,
1633 size_type& idx,
1634 bool remaped)
1635{
1636 if (sv.empty())
1637 return false; // nothing to do
1638 BM_ASSERT(*str);
1639
1640 if (!*str)
1641 return false;
1642
1643 agg_.reset();
1644 unsigned common_prefix_len = 0;
1645 if (mask_set_)
1646 {
1647 agg_.set_range_hint(mask_from_, mask_to_);
1648 common_prefix_len = sv.common_prefix_length(mask_from_, mask_to_);
1649 }
1650
1651 if (remaped)
1652 {
1653 str = remap_value_vect_.data();
1654 }
1655 else
1656 {
1657 if (sv.is_remap() && (str != remap_value_vect_.data()))
1658 {
1659 bool r = sv.remap_tosv(
1660 remap_value_vect_.data(), sv.effective_max_str(), str);
1661 if (!r)
1662 return r;
1663 str = remap_value_vect_.data();
1664 }
1665 }
1666
1667 bool found = prepare_and_sub_aggregator(sv, str, common_prefix_len, true);
1668 if (!found)
1669 return found;
1670
1671 found = agg_.find_first_and_sub(idx);
1672 agg_.reset();
1673 return found;
1674}
1675
1676//----------------------------------------------------------------------------
1677
1678template<typename SV>
1680 const value_type* str,
1681 unsigned octet_start,
1682 bool prefix_sub)
1683{
1684 int len = 0;
1685 for (; str[len] != 0; ++len)
1686 {}
1687 BM_ASSERT(len);
1688
1689 // use reverse order (faster for sorted arrays)
1690 for (int octet_idx = len-1; octet_idx >= 0; --octet_idx)
1691 {
1692 if (unsigned(octet_idx) < octet_start) // common prefix
1693 break;
1694
1695 unsigned value = unsigned(str[octet_idx]) & 0xFF;
1696 BM_ASSERT(value != 0);
1697
1698 bm::id64_t planes_mask;
1699 if (&sv == bound_sv_)
1700 planes_mask = vector_plane_masks_[unsigned(octet_idx)];
1701 else
1702 planes_mask = sv.get_slice_mask(unsigned(octet_idx));
1703
1704 if ((value & planes_mask) != value) // pre-screen for impossible values
1705 return false; // found non-existing plane
1706
1707 add_agg_char(agg_, sv, octet_idx, planes_mask, value); // AND-SUB aggregator
1708 } // for octet_idx
1709
1710 // add all vectors above string len to the SUB aggregation group
1711 //
1712 if (prefix_sub)
1713 {
1714 unsigned plane_idx = unsigned(len * 8);
1715 typename SV::size_type planes = sv.get_bmatrix().rows_not_null();
1716 for (; plane_idx < planes; ++plane_idx)
1717 {
1718 if (bvector_type_const_ptr bv = sv.get_slice(plane_idx))
1719 agg_.add(bv, 1); // agg to SUB group
1720 } // for
1721 }
1722 return true;
1723}
1724
1725//----------------------------------------------------------------------------
1726
1727template<typename SV>
1729 value_type value)
1730{
1731 using unsigned_value_type = typename SV::unsigned_value_type;
1732
1733 agg_.reset();
1734
1735 unsigned char bits[sizeof(value) * 8];
1736 unsigned_value_type uv = sv.s2u(value);
1737
1738 unsigned short bit_count_v = bm::bitscan(uv, bits);
1739 BM_ASSERT(bit_count_v);
1740 const unsigned_value_type mask1 = 1;
1741
1742 // prep the lists for combined AND-SUB aggregator
1743 // (backward order has better chance for bit reduction on AND)
1744 //
1745 for (unsigned i = bit_count_v; i > 0; --i)
1746 {
1747 unsigned bit_idx = bits[i-1];
1748 BM_ASSERT(uv & (mask1 << bit_idx));
1749 if (const bvector_type* bv = sv.get_slice(bit_idx))
1750 agg_.add(bv);
1751 else
1752 return false;
1753 }
1754 unsigned sv_planes = sv.effective_slices();
1755 for (unsigned i = 0; i < sv_planes; ++i)
1756 {
1757 unsigned_value_type mask = mask1 << i;
1758 if (bvector_type_const_ptr bv = sv.get_slice(i); bv && !(uv & mask))
1759 agg_.add(bv, 1); // agg to SUB group
1760 } // for i
1761 return true;
1762}
1763
1764
1765//----------------------------------------------------------------------------
1766
1767template<typename SV>
1769 const SV& sv,
1770 value_type value,
1771 bvector_type& bv_out)
1772{
1773 bv_out.clear();
1774 if (sv.empty())
1775 return; // nothing to do
1776
1777 if (!value)
1778 {
1779 find_zero(sv, bv_out);
1780 return;
1781 }
1782
1783 unsigned char bits[sizeof(value) * 8];
1784 typename SV::unsigned_value_type uv = sv.s2u(value);
1785 unsigned short bit_count_v = bm::bitscan(uv, bits);
1786 BM_ASSERT(bit_count_v);
1787
1788 // aggregate AND all matching vectors
1789 if (const bvector_type* bv_plane = sv.get_slice(bits[--bit_count_v]))
1790 bv_out = *bv_plane;
1791 else // plane not found
1792 {
1793 bv_out.clear();
1794 return;
1795 }
1796 for (unsigned i = 0; i < bit_count_v; ++i)
1797 {
1798 const bvector_type* bv_plane = sv.get_slice(bits[i]);
1799 if (bv_plane)
1800 bv_out &= *bv_plane;
1801 else // mandatory plane not found - empty result!
1802 {
1803 bv_out.clear();
1804 return;
1805 }
1806 } // for i
1807
1808 // SUB all other planes
1809 unsigned sv_planes = sv.effective_slices();
1810 for (unsigned i = 0; (i < sv_planes) && uv; ++i)
1811 {
1812 if (const bvector_type* bv = sv.get_slice(i); bv && !(uv & (1u << i)))
1813 bv_out -= *bv;
1814 } // for i
1815}
1816
1817//----------------------------------------------------------------------------
1818
1819template<typename SV>
1821 value_type val,
1822 bvector_type& bv_out)
1823{
1824 // this is not very optimal but should be good enough
1825 find_gt_horizontal(sv, val, bv_out, true /*NULL correction */);
1826}
1827
1828//----------------------------------------------------------------------------
1829
1830template<typename SV>
1832 value_type val,
1833 bvector_type& bv_out)
1834{
1835 if constexpr (std::is_signed<value_type>::value)
1836 {
1837 if (val == std::numeric_limits<int>::min())
1838 {
1839 bvector_type bv_min;
1840 find_eq(sv, val, bv_min);
1841 find_gt_horizontal(sv, val, bv_out, true /*NULL correction */);
1842 bv_out.merge(bv_min);
1843 }
1844 else
1845 {
1846 --val;
1847 find_gt_horizontal(sv, val, bv_out, true /*NULL correction */);
1848 }
1849 }
1850 else // unsigned
1851 {
1852 if (val)
1853 {
1854 --val;
1855 find_gt_horizontal(sv, val, bv_out, true /*NULL correction */);
1856 }
1857 else // val == 0
1858 {
1859 // result set is ALL elements minus possible NULL values
1860 bv_out.set_range(0, sv.size()-1);
1861 correct_nulls(sv, bv_out);
1862 }
1863 }
1864}
1865
1866//----------------------------------------------------------------------------
1867
1868template<typename SV>
1870 value_type val,
1871 bvector_type& bv_out)
1872{
1873 find_ge(sv, val, bv_out);
1874 invert(sv, bv_out);
1875}
1876
1877//----------------------------------------------------------------------------
1878
1879template<typename SV>
1881 value_type val,
1882 bvector_type& bv_out)
1883{
1884 find_gt(sv, val, bv_out);
1885 invert(sv, bv_out);
1886}
1887
1888//----------------------------------------------------------------------------
1889
1890template<typename SV>
1892 value_type from, value_type to,
1893 bvector_type& bv_out)
1894{
1895 if (to < from)
1896 bm::xor_swap(from, to);
1897
1898 find_ge(sv, from, bv_out);
1899
1900 bvector_type bv_gt;
1901 find_gt(sv, to, bv_gt);
1902 bv_out.bit_sub(bv_gt);
1903}
1904
1905//----------------------------------------------------------------------------
1906
1907
1908template<typename SV>
1910 value_type value,
1911 bvector_type& bv_out,
1912 bool null_correct)
1913{
1914 unsigned char blist[64];
1915 unsigned bit_count_v;
1916
1917 if (sv.empty())
1918 {
1919 bv_out.clear();
1920 return; // nothing to do
1921 }
1922
1923 if (!value)
1924 {
1925 bvector_type bv_zero;
1926 find_zero(sv, bv_zero);
1927 auto sz = sv.size();
1928 bv_out.set_range(0, sz-1);
1929 bv_out.bit_sub(bv_zero); // all but zero
1930
1931 if constexpr (std::is_signed<value_type>::value)
1932 {
1933 if (const bvector_type* bv_sign = sv.get_slice(0)) // sign bvector
1934 bv_out.bit_sub(*bv_sign); // all but negatives
1935 }
1936 if (null_correct)
1937 correct_nulls(sv, bv_out);
1938 return;
1939 }
1940 bv_out.clear();
1941
1942 bvector_type gtz_bv; // all >= 0
1943
1944 if constexpr (std::is_signed<value_type>::value)
1945 {
1946 find_positive(sv, gtz_bv); // all positives are greater than all negs
1947 if (value == -1)
1948 {
1949 bv_out.swap(gtz_bv);
1950 if (null_correct)
1951 correct_nulls(sv, bv_out);
1952 return;
1953 }
1954 if (value == -2)
1955 {
1956 find_eq(sv, -1, bv_out);
1957 bv_out.bit_or(gtz_bv);
1958 if (null_correct)
1959 correct_nulls(sv, bv_out);
1960 return;
1961 }
1962
1963 auto uvalue = SV::s2u(value + bool(value < 0)); // turns it int LE expression
1964 uvalue &= ~1u; // drop the negative bit
1965 BM_ASSERT(uvalue);
1966 bit_count_v = bm::bitscan(uvalue, blist);
1967 }
1968 else
1969 bit_count_v = bm::bitscan(value, blist);
1970
1971 BM_ASSERT(bit_count_v);
1972
1973
1974 // aggregate all top bit-slices (surely greater)
1975 // TODO: use aggregator function
1976 bvector_type top_or_bv;
1977
1978 unsigned total_planes = sv.effective_slices();
1979 const bvector_type* bv_sign = sv.get_slice(0); (void)bv_sign;
1980
1981 agg_.reset();
1982 if constexpr (std::is_signed<value_type>::value)
1983 {
1984 if (value < 0)
1985 {
1986 if (!bv_sign) // no negatives at all
1987 {
1988 bv_out.swap(gtz_bv); // return all >= 0
1989 if (null_correct)
1990 correct_nulls(sv, bv_out);
1991 return;
1992 }
1993 aggregate_AND_OR_slices(top_or_bv, *bv_sign, sv, blist[bit_count_v-1]+1, total_planes);
1994 }
1995 else // value > 0
1996 {
1997 aggregate_OR_slices(top_or_bv, sv, blist[bit_count_v-1]+1, total_planes);
1998 }
1999 }
2000 else
2001 {
2002 if (total_planes < unsigned(blist[bit_count_v-1]+1))
2003 return; // number is greater than anything in this vector (empty set)
2004 aggregate_OR_slices(top_or_bv, sv, blist[bit_count_v-1]+1, total_planes);
2005 }
2006 agg_.reset();
2007
2008 bv_out.merge(top_or_bv);
2009
2010 // TODO: optimize FULL blocks
2011
2012 bvector_type and_eq_bv; // AND accum
2013 bool first = true; // flag for initial assignment
2014
2015 // GT search
2016 for (int i = int(bit_count_v)-1; i >= 0; --i)
2017 {
2018 int slice_idx = blist[i];
2019 if (slice_idx == gt_slice_limit()) // last plane
2020 break;
2021 const bvector_type* bv_base_plane = sv.get_slice(unsigned(slice_idx));
2022 if (!bv_base_plane)
2023 break;
2024 if (first)
2025 {
2026 first = false;
2027 if constexpr (std::is_signed<value_type>::value)
2028 {
2029 if (value < 0)
2030 and_eq_bv.bit_and(*bv_base_plane, *bv_sign); // use negatives for AND mask
2031 else // value > 0
2032 and_eq_bv.bit_and(*bv_base_plane, gtz_bv);
2033 }
2034 else // unsigned
2035 and_eq_bv = *bv_base_plane; // initial assignment
2036 }
2037 else
2038 and_eq_bv.bit_and(*bv_base_plane); // AND base to accumulator
2039
2040 int next_slice_idx = 0;
2041 if (i)
2042 {
2043 next_slice_idx = blist[i-1];
2044 if (slice_idx-1 == next_slice_idx)
2045 continue;
2046 ++next_slice_idx;
2047 }
2048
2049 // AND-OR the mid-planes
2050 //
2051 for (int j = slice_idx-1; j >= next_slice_idx; --j)
2052 {
2053 if constexpr (std::is_signed<value_type>::value)
2054 if (j == 0) // sign plane
2055 break; // do not process the sign plane at all
2056 if (const bvector_type* bv_sub_plane = sv.get_slice(unsigned(j)))
2057 bv_out.bit_or_and(and_eq_bv, *bv_sub_plane);
2058 } // for j
2059 } // for i
2060
2061 if constexpr (std::is_signed<value_type>::value)
2062 {
2063 if (value < 0)
2064 {
2065 // now we have negatives greter by abs value
2066 top_or_bv.set_range(0, sv.size()-1);
2067 top_or_bv.bit_sub(bv_out);
2068 bv_out.swap(top_or_bv);
2069 }
2070 else // value > 0
2071 {
2072 gtz_bv.resize(sv.size());
2073 gtz_bv.invert(); // now it is all < 0
2074 bv_out.bit_sub(gtz_bv); // exclude all negatives
2075 }
2076 }
2077
2078 decompress(sv, bv_out);
2079
2080 if (null_correct)
2081 {
2082 if constexpr (!std::is_signed<value_type>::value) // unsigned
2083 return; // NULL correction for positive values is not needed
2084 else // signed
2085 {
2086 if (value < 0)
2087 correct_nulls(sv, bv_out);
2088 }
2089 }
2090}
2091
2092//----------------------------------------------------------------------------
2093
2094template<typename SV>
2096 bvector_type& bv_target,
2097 const SV& sv,
2098 unsigned from, unsigned total_planes)
2099{
2100 for (unsigned i = from; i < total_planes; ++i)
2101 {
2102 if (const bvector_type* bv_slice = sv.get_slice(i))
2103 {
2104 BM_ASSERT(bv_slice != sv.get_null_bvector());
2105 agg_.add(bv_slice);
2106 }
2107 }
2108 agg_.combine_or(bv_target);
2109}
2110
2111//----------------------------------------------------------------------------
2112
2113template<typename SV>
2115 const bvector_type& bv_mask,
2116 const SV& sv,
2117 unsigned from, unsigned total_planes)
2118{
2119 for (unsigned i = from; i < total_planes; ++i)
2120 {
2121 if (const bvector_type* bv_slice = sv.get_slice(i))
2122 {
2123 BM_ASSERT(bv_slice != sv.get_null_bvector());
2124 bv_target.bit_or_and(bv_mask, *bv_slice);
2125 }
2126 }
2127}
2128
2129//----------------------------------------------------------------------------
2130
2131template<typename SV>
2133 const typename SV::value_type* str,
2134 typename SV::bvector_type& bv_out)
2135{
2136 return find_eq_str_impl(sv, str, bv_out, false);
2137}
2138
2139
2140//----------------------------------------------------------------------------
2141
2142template<typename SV>
2143bool sparse_vector_scanner<SV>::find_eq_str(const typename SV::value_type* str,
2144 typename SV::size_type& pos)
2145{
2146 BM_ASSERT(bound_sv_);
2147 return this->find_eq_str(*bound_sv_, str, pos);
2148}
2149
2150//----------------------------------------------------------------------------
2151
2152template<typename SV>
2154 const typename SV::value_type* str,
2155 typename SV::size_type& pos)
2156{
2157 bool found = false;
2158 if (sv.empty())
2159 return found;
2160 if (*str)
2161 {
2162 bool remaped = false;
2163 if constexpr (SV::is_remap_support::value) // test remapping trait
2164 {
2165 if (sv.is_remap() && str != remap_value_vect_.data())
2166 {
2167 auto str_len = sv.effective_vector_max();
2168 remap_value_vect_.resize(str_len);
2169 remaped = sv.remap_tosv(
2170 remap_value_vect_.data(), str_len, str);
2171 if (!remaped)
2172 return remaped;
2173 str = remap_value_vect_.data();
2174 }
2175 }
2176
2177 size_type found_pos;
2178 found = find_first_eq(sv, str, found_pos, remaped);
2179 if (found)
2180 {
2181 pos = found_pos;
2182 if constexpr (SV::is_rsc_support::value) // test rank/select trait
2183 {
2184 if constexpr (sv.is_compressed()) // if compressed vector - need rank translation
2185 found = sv.find_rank(found_pos + 1, pos);
2186 }
2187 }
2188 }
2189 else // search for zero(empty str) value
2190 {
2191 // TODO: implement optimized version which would work witout temp vector
2192 typename SV::bvector_type bv_zero;
2193 find_zero(sv, bv_zero);
2194 found = bv_zero.find(pos);
2195 }
2196 return found;
2197}
2198
2199//----------------------------------------------------------------------------
2200
2201template<typename SV>
2202bool sparse_vector_scanner<SV>::find_eq_str(const typename SV::value_type* str,
2203 typename SV::bvector_type& bv_out)
2204{
2205 BM_ASSERT(bound_sv_);
2206 return find_eq_str(*bound_sv_, str, bv_out);
2207}
2208
2209//----------------------------------------------------------------------------
2210
2211template<typename SV>
2213 const typename SV::value_type* str,
2214 typename SV::bvector_type& bv_out)
2215{
2216 return find_eq_str_impl(sv, str, bv_out, true);
2217}
2218
2219//----------------------------------------------------------------------------
2220
2221template<typename SV>
2223 remap_vector_type& remap_vect_target,
2224 const typename SV::value_type* str,
2225 const SV& sv)
2226{
2227 auto str_len = sv.effective_vector_max();
2228 remap_vect_target.resize(str_len);
2229 bool remaped = sv.remap_tosv(remap_vect_target.data(), str_len, str);
2230 return remaped;
2231}
2232
2233//----------------------------------------------------------------------------
2234
2235template<typename SV>
2237 const typename SV::value_type* str,
2238 typename SV::bvector_type& bv_out,
2239 bool prefix_sub)
2240{
2241 bool found = false;
2242 bv_out.clear(true);
2243 if (sv.empty())
2244 return false; // nothing to do
2245 if constexpr (SV::is_rsc_support::value) // test rank/select trait
2246 {
2247 // search in RS compressed string vectors is not implemented
2248 BM_ASSERT(0);
2249 }
2250
2251 if (*str)
2252 {
2253 if constexpr (SV::is_remap_support::value) // test remapping trait
2254 {
2255 if (sv.is_remap() && (str != remap_value_vect_.data()))
2256 {
2257 bool remaped = remap_tosv(remap_value_vect_, str, sv);
2258 if (!remaped)
2259 return false; // not found because
2260 str = remap_value_vect_.data();
2261 }
2262 }
2263 /// aggregator search
2264 agg_.reset();
2265
2266 const unsigned common_prefix_len = 0;
2267 found = prepare_and_sub_aggregator(sv, str, common_prefix_len,
2268 prefix_sub);
2269 if (!found)
2270 return found;
2271 found = agg_.combine_and_sub(bv_out);
2272
2273 agg_.reset();
2274 }
2275 else // search for zero(empty str) value
2276 {
2277 find_zero(sv, bv_out);
2278 found = bv_out.any();
2279 }
2280 return found;
2281
2282}
2283
2284//----------------------------------------------------------------------------
2285
2286template<typename SV> template<class TPipe>
2288{
2289 if (pipe.bv_and_mask_)
2290 {
2291 size_type first, last;
2292 bool any = pipe.bv_and_mask_->find_range(first, last);
2293 if (!any)
2294 return;
2295 agg_.set_range_hint(first, last);
2296 }
2297 agg_.combine_and_sub(pipe.agg_pipe_);
2298 agg_.reset_range_hint();
2299}
2300
2301//----------------------------------------------------------------------------
2302
2303template<typename SV>
2305 const SV& sv,
2306 const typename SV::value_type* str,
2307 typename SV::size_type& pos)
2308{
2309 bool found = false;
2310 if (sv.empty())
2311 return found;
2312
2313 if (*str)
2314 {
2315 bool remaped = false;
2316 // test search pre-condition based on remap tables
2317 if constexpr (SV::is_remap_support::value)
2318 {
2319 if (sv.is_remap() && (str != remap_value_vect_.data()))
2320 {
2321 auto str_len = sv.effective_vector_max();
2322 remap_value_vect_.resize(str_len);
2323 remaped = sv.remap_tosv(remap_value_vect_.data(), str_len, str);
2324 if (!remaped)
2325 return remaped;
2326 }
2327 }
2328
2329 reset_search_range();
2330
2331 // narrow down the search
2332 const unsigned min_distance_cutoff = bm::gap_max_bits + bm::gap_max_bits / 2;
2333 size_type l, r, dist;
2334 l = 0; r = sv.size()-1;
2335 size_type found_pos;
2336
2337 // binary search to narrow down the search window
2338 while (l <= r)
2339 {
2340 dist = r - l;
2341 if (dist < min_distance_cutoff)
2342 {
2343 // we are in an narrow <2 blocks window, but still may be in two
2344 // different neighboring blocks, lets try to narrow
2345 // to exactly one block
2346
2347 size_type nb_l = (l >> bm::set_block_shift);
2348 size_type nb_r = (r >> bm::set_block_shift);
2349 if (nb_l != nb_r)
2350 {
2351 size_type mid = nb_r * bm::gap_max_bits;
2352 if (mid < r)
2353 {
2354 int cmp = this->compare_str(sv, mid, str);
2355 if (cmp < 0) // mid < str
2356 l = mid;
2357 else
2358 r = mid-(cmp!=0); // branchless if (cmp==0) r=mid;
2359 BM_ASSERT(l < r);
2360 }
2361 nb_l = unsigned(l >> bm::set_block_shift);
2362 nb_r = unsigned(r >> bm::set_block_shift);
2363 }
2364
2365 if (nb_l == nb_r)
2366 {
2367 size_type max_nb = sv.size() >> bm::set_block_shift;
2368 if (nb_l != max_nb)
2369 {
2370 // linear in-place fixed depth scan to identify the sub-range
2371 size_type mid = nb_r * bm::gap_max_bits + bm::sub_block3_size;
2372 int cmp = this->compare_str(sv, mid, str);
2373 if (cmp < 0)
2374 {
2375 l = mid;
2376 mid = nb_r * bm::gap_max_bits + bm::sub_block3_size * 2;
2377 cmp = this->compare_str(sv, mid, str);
2378 if (cmp < 0)
2379 {
2380 l = mid;
2381 mid = nb_r * bm::gap_max_bits + bm::sub_block3_size * 3;
2382 cmp = this->compare_str(sv, mid, str);
2383 if (cmp < 0)
2384 l = mid;
2385 else
2386 r = mid;
2387 }
2388 else
2389 {
2390 r = mid;
2391 }
2392 }
2393 else
2394 {
2395 r = mid;
2396 }
2397 }
2398 }
2399
2400 set_search_range(l, r);
2401 break;
2402 }
2403
2404 typename SV::size_type mid = dist/2+l;
2405 size_type nb = (mid >> bm::set_block_shift);
2406 mid = nb * bm::gap_max_bits;
2407 if (mid <= l)
2408 {
2409 if (nb == 0 && r > bm::gap_max_bits)
2410 mid = bm::gap_max_bits;
2411 else
2412 mid = dist / 2 + l;
2413 }
2414 BM_ASSERT(mid > l);
2415
2416 int cmp = this->compare_str(sv, mid, str);
2417 if (cmp == 0)
2418 {
2419 found_pos = mid;
2420 found = true;
2421 set_search_range(l, mid);
2422 break;
2423 }
2424 if (cmp < 0)
2425 l = mid+1;
2426 else
2427 r = mid-1;
2428 } // while
2429
2430 // use linear search (range is set)
2431 found = find_first_eq(sv, str, found_pos, remaped);
2432 if (found)
2433 {
2434 pos = found_pos;
2435 if constexpr (SV::is_compressed()) // if compressed vector - need rank translation
2436 found = sv.find_rank(found_pos + 1, pos);
2437 }
2438 reset_search_range();
2439 }
2440 else // search for zero value
2441 {
2442 // TODO: implement optimized version which would work without temp vector
2443 typename SV::bvector_type bv_zero;
2444 find_zero(sv, bv_zero);
2445 found = bv_zero.find(pos);
2446 }
2447 return found;
2448}
2449
2450//----------------------------------------------------------------------------
2451
2452template<typename SV>
2453bool sparse_vector_scanner<SV>::bfind_eq_str(const typename SV::value_type* str,
2454 typename SV::size_type& pos)
2455{
2456 BM_ASSERT(bound_sv_);
2457 return bfind_eq_str(*bound_sv_, str, pos);
2458}
2459
2460//----------------------------------------------------------------------------
2461
2462template<typename SV>
2464 const typename SV::value_type val,
2465 typename SV::size_type& pos)
2466{
2467 int cmp;
2468 size_type l = 0, r = sv.size();
2469 if (l == r) // empty vector
2470 {
2471 pos = 0;
2472 return false;
2473 }
2474 --r;
2475
2476 // check initial boundary conditions if insert point is at tail/head
2477 cmp = this->compare(sv, l, val); // left (0) boundary check
2478 if (cmp > 0) // vect[x] > str
2479 {
2480 pos = 0;
2481 return false;
2482 }
2483 if (cmp == 0)
2484 {
2485 pos = 0;
2486 return true;
2487 }
2488 cmp = this->compare(sv, r, val); // right(size-1) boundary check
2489 if (cmp == 0)
2490 {
2491 pos = r;
2492 // back-scan to rewind all duplicates
2493 // TODO: adapt one-sided binary search to traverse large platos
2494 for (; r >= 0; --r)
2495 {
2496 cmp = this->compare(sv, r, val);
2497 if (cmp != 0)
2498 return true;
2499 pos = r;
2500 } // for i
2501 return true;
2502 }
2503 if (cmp < 0) // vect[x] < str
2504 {
2505 pos = r+1;
2506 return false;
2507 }
2508
2509 size_type dist = r - l;
2510 if (dist < linear_cutoff1)
2511 {
2512 for (; l <= r; ++l)
2513 {
2514 cmp = this->compare(sv, l, val);
2515 if (cmp == 0)
2516 {
2517 pos = l;
2518 return true;
2519 }
2520 if (cmp > 0)
2521 {
2522 pos = l;
2523 return false;
2524 }
2525 } // for
2526 }
2527
2528 while (l <= r)
2529 {
2530 size_type mid = (r-l)/2+l;
2531 cmp = this->compare(sv, mid, val);
2532 if (cmp == 0)
2533 {
2534 pos = mid;
2535 // back-scan to rewind all duplicates
2536 for (size_type i = mid-1; i >= 0; --i)
2537 {
2538 cmp = this->compare(sv, i, val);
2539 if (cmp != 0)
2540 return true;
2541 pos = i;
2542 } // for i
2543 pos = 0;
2544 return true;
2545 }
2546 if (cmp < 0)
2547 l = mid+1;
2548 else
2549 r = mid-1;
2550
2551 dist = r - l;
2552 if (dist < linear_cutoff2) // do linear scan here
2553 {
2554 typename SV::const_iterator it(&sv, l);
2555 for (; it.valid(); ++it, ++l)
2556 {
2557 typename SV::value_type sv_value = it.value();
2558 if (sv_value == val)
2559 {
2560 pos = l;
2561 return true;
2562 }
2563 if (sv_value > val) // vect[x] > val
2564 {
2565 pos = l;
2566 return false;
2567 }
2568 } // for it
2569 BM_ASSERT(0);
2570 pos = l;
2571 return false;
2572 }
2573 } // while
2574
2575 BM_ASSERT(0);
2576 return false;
2577}
2578
2579
2580//----------------------------------------------------------------------------
2581
2582template<typename SV>
2584 const SV& sv,
2585 const typename SV::value_type* str,
2586 typename SV::size_type& pos)
2587{
2588 int cmp;
2589 size_type l = 0, r = sv.size();
2590
2591 if (l == r) // empty vector
2592 {
2593 pos = 0;
2594 return false;
2595 }
2596 --r;
2597
2598 // check initial boundary conditions if insert point is at tail/head
2599 cmp = this->compare_str(sv, l, str); // left (0) boundary check
2600 if (cmp > 0) // vect[x] > str
2601 {
2602 pos = 0;
2603 return false;
2604 }
2605 if (cmp == 0)
2606 {
2607 pos = 0;
2608 return true;
2609 }
2610 cmp = this->compare_str(sv, r, str); // right(size-1) boundary check
2611 if (cmp == 0)
2612 {
2613 pos = r;
2614 // back-scan to rewind all duplicates
2615 // TODO: adapt one-sided binary search to traverse large platos
2616 for (; r >= 0; --r)
2617 {
2618 cmp = this->compare_str(sv, r, str);
2619 if (cmp != 0)
2620 return true;
2621 pos = r;
2622 } // for i
2623 return true;
2624 }
2625 if (cmp < 0) // vect[x] < str
2626 {
2627 pos = r+1;
2628 return false;
2629 }
2630
2631 size_type dist = r - l;
2632 if (dist < linear_cutoff1)
2633 {
2634 for (; l <= r; ++l)
2635 {
2636 cmp = this->compare_str(sv, l, str);
2637 if (cmp == 0)
2638 {
2639 pos = l;
2640 return true;
2641 }
2642 if (cmp > 0)
2643 {
2644 pos = l;
2645 return false;
2646 }
2647 } // for
2648 }
2649 while (l <= r)
2650 {
2651 size_type mid = (r-l)/2+l;
2652 cmp = this->compare_str(sv, mid, str);
2653 if (cmp == 0)
2654 {
2655 pos = mid;
2656 // back-scan to rewind all duplicates
2657 for (size_type i = mid-1; i >= 0; --i)
2658 {
2659 cmp = this->compare_str(sv, i, str);
2660 if (cmp != 0)
2661 return true;
2662 pos = i;
2663 } // for i
2664 pos = 0;
2665 return true;
2666 }
2667 if (cmp < 0)
2668 l = mid+1;
2669 else
2670 r = mid-1;
2671
2672 dist = r - l;
2673 if (dist < linear_cutoff2) // do linear scan here
2674 {
2675 if (!hmatr_.is_init())
2676 {
2677 unsigned max_str = (unsigned)sv.effective_max_str();
2678 hmatr_.resize(linear_cutoff2, max_str+1, false);
2679 }
2680
2681 dist = sv.decode(hmatr_, l, dist+1);
2682 for (unsigned i = 0; i < dist; ++i, ++l)
2683 {
2684 const typename SV::value_type* hm_str = hmatr_.row(i);
2685 cmp = ::strcmp(hm_str, str);
2686 if (cmp == 0)
2687 {
2688 pos = l;
2689 return true;
2690 }
2691 if (cmp > 0) // vect[x] > str
2692 {
2693 pos = l;
2694 return false;
2695 }
2696 }
2697 cmp = this->compare_str(sv, l, str);
2698 if (cmp > 0) // vect[x] > str
2699 {
2700 pos = l;
2701 return false;
2702 }
2703 BM_ASSERT(0);
2704 pos = l;
2705 return false;
2706 }
2707 } // while
2708
2709 BM_ASSERT(0);
2710 return false;
2711}
2712
2713
2714//----------------------------------------------------------------------------
2715
2716template<typename SV>
2718 size_type idx,
2719 const value_type* str)
2720{
2721 if (bound_sv_ == &sv)
2722 {
2723 size_type nb = (idx >> bm::set_block_shift);
2724 size_type nbit = (idx & bm::set_block_mask);
2725 if (nbit == 0) // access to sentinel, first block element
2726 {
2727 value_type* s0 = block0_elements_cache_.row(nb);
2728 if (*s0 == 0) // uninitialized element
2729 {
2730 sv.get(idx, s0, size_type(block0_elements_cache_.cols()));
2731 }
2732 int res = 0;
2733 for (unsigned i = 0; i < block0_elements_cache_.cols(); ++i)
2734 {
2735 char octet = str[i]; char value = s0[i];
2736 res = (value > octet) - (value < octet);
2737 if (res || !octet)
2738 break;
2739 } // for i
2740 return res;
2741 }
2742 else
2743 {
2744 if (nbit % bm::sub_block3_size == 0) // TODO: use AND mask here
2745 {
2746 size_type k = nbit / bm::sub_block3_size - 1;
2747 value_type* s1 = block3_elements_cache_.row(nb * 3 + k);
2748 int res = 0;
2749 for (unsigned i = 0; i < block3_elements_cache_.cols(); ++i)
2750 {
2751 char octet = str[i]; char value = s1[i];
2752 res = (value > octet) - (value < octet);
2753 if (res || !octet)
2754 break;
2755 } // for i
2756 return res;
2757 }
2758 }
2759 }
2760 return sv.compare(idx, str);
2761}
2762
2763//----------------------------------------------------------------------------
2764
2765template<typename SV>
2767 size_type idx,
2768 const value_type val) BMNOEXCEPT
2769{
2770 // TODO: implement sentinel elements cache (similar to compare_str())
2771 return sv.compare(idx, val);
2772}
2773
2774//----------------------------------------------------------------------------
2775
2776template<typename SV>
2778 typename SV::value_type value,
2779 typename SV::bvector_type& bv_out)
2780{
2781 if (sv.empty())
2782 {
2783 bv_out.clear();
2784 return; // nothing to do
2785 }
2786 if (!value)
2787 {
2788 find_zero(sv, bv_out);
2789 return;
2790 }
2791
2792 find_eq_with_nulls(sv, value, bv_out, 0);
2793
2794 decompress(sv, bv_out);
2795 correct_nulls(sv, bv_out);
2796}
2797
2798//----------------------------------------------------------------------------
2799
2800template<typename SV> template<typename BII>
2801void sparse_vector_scanner<SV>::find_eq(const SV& sv, value_type value, BII bi)
2802{
2803 static_assert(!SV::is_compressed(), "BM:find_eq on RSC vector not implemented");
2804
2805 if (sv.empty())
2806 return; // nothing to do
2807 if (!value)
2808 {
2809 // TODO: better implementation for 0 value seach
2810 typename SV::bvector_type bv_out;
2811 find_zero(sv, bv_out);
2812 typename SV::bvector_type::enumerator en = bv_out.get_enumerator(0);
2813 for (; en.valid(); ++en)
2814 *bi = *en;
2815 return;
2816 }
2817
2818 // search for value with aggregator
2819 //
2820 agg_.reset();
2821
2822 bool found = prepare_and_sub_aggregator(sv, value);
2823 if (!found)
2824 return; // impossible value
2825
2826 found = agg_.combine_and_sub_bi(bi);
2827 agg_.reset();
2828}
2829
2830
2831//----------------------------------------------------------------------------
2832
2833template<typename SV>
2835 typename SV::value_type value,
2836 typename SV::size_type& pos)
2837{
2838 if (!value) // zero value - special case
2839 {
2840 bvector_type bv_zero;
2841 find_eq(sv, value, bv_zero);
2842 bool found = bv_zero.find(pos);
2843 return found;
2844 }
2845
2846 size_type found_pos;
2847 bool found = find_first_eq(sv, value, found_pos);
2848 if (found)
2849 {
2850 pos = found_pos;
2851 if (bm::conditional<SV::is_rsc_support::value>::test()) // test rank/select trait
2852 {
2853 if constexpr (SV::is_compressed()) // if compressed vector - need rank translation
2854 found = sv.find_rank(found_pos + 1, pos);
2855 }
2856 }
2857 return found;
2858}
2859
2860//----------------------------------------------------------------------------
2861
2862template<typename SV>
2864 typename SV::bvector_type& bv_out)
2865{
2866 agg_.reset(); // in case if previous scan was interrupted
2867 auto sz = sv.effective_slices(); // sv.slices();
2868 for (unsigned i = 0; i < sz; ++i)
2869 agg_.add(sv.get_slice(i));
2870 agg_.combine_or(bv_out);
2871 agg_.reset();
2872}
2873
2874//----------------------------------------------------------------------------
2875
2876template<typename SV>
2878 typename SV::bvector_type& bv_out)
2879{
2880 BM_ASSERT(sv.size());
2881 bv_out.set_range(0, sv.size()-1); // select all elements
2882 if constexpr (std::is_signed<value_type>::value)
2883 {
2884 if (const bvector_type* bv_sign = sv.get_slice(0)) // sign bvector
2885 bv_out.bit_sub(*bv_sign); // all MINUS negatives
2886 }
2887}
2888
2889//----------------------------------------------------------------------------
2890
2891template<typename SV>
2893 typename SV::bvector_type& bv_out)
2894{
2895 if constexpr (SV::is_compressed())
2896 {
2897 const bvector_type* bv_non_null = sv.get_null_bvector();
2898 BM_ASSERT(bv_non_null);
2899
2900 // TODO: implement faster decompressor for small result-sets
2901 rank_compr_.decompress(bv_tmp_, *bv_non_null, bv_out);
2902 bv_out.swap(bv_tmp_);
2903 }
2904 else
2905 {
2906 (void)sv; (void)bv_out;
2907 }
2908}
2909
2910//----------------------------------------------------------------------------
2911
2912template<typename SV>
2914{
2915 BM_ASSERT(from < to);
2916 mask_from_ = from;
2917 mask_to_ = to;
2918 mask_set_ = true;
2919}
2920
2921//----------------------------------------------------------------------------
2922
2923template<typename SV>
2925{
2926 mask_set_ = false;
2927 mask_from_ = mask_to_ = bm::id_max;
2928}
2929
2930
2931//----------------------------------------------------------------------------
2932// sparse_vector_scanner<SV>::pipeline<Opt>
2933//----------------------------------------------------------------------------
2934
2935template<typename SV> template<class Opt>
2936void
2938{
2939 static_assert(Opt::is_masks(),
2940 "BM: Search masking needs to be enabled in template parameter options before function call. see bm::agg_run_options<> ");
2941 bv_and_mask_ = bv_mask;
2942}
2943
2944//----------------------------------------------------------------------------
2945
2946template<typename SV> template<class Opt>
2947void
2948sparse_vector_scanner<SV>::pipeline<Opt>::add(const typename SV::value_type* str)
2949{
2950 BM_ASSERT(str);
2951
2952 typename aggregator_type::arg_groups* arg = agg_pipe_.add();
2953 BM_ASSERT(arg);
2954
2955 if constexpr (SV::is_remap_support::value) // test remapping trait
2956 {
2957 if (sv_.is_remap() && (str != remap_value_vect_.data()))
2958 {
2959 bool remaped = remap_tosv(remap_value_vect_, str, sv_);
2960 if (!remaped)
2961 return; // not found (leaves the arg(arg_group) empty
2962 str = remap_value_vect_.data();
2963 }
2964 }
2965 int len = 0;
2966 for (; str[len] != 0; ++len)
2967 {}
2968 BM_ASSERT(len);
2969
2970 if constexpr(Opt::is_masks())
2971 arg->add(bv_and_mask_, 0);
2972 arg->add(sv_.get_null_bvector(), 0);
2973
2974 // use reverse order (faster for sorted arrays)
2975
2976 for (int octet_idx = len-1; octet_idx >= 0; --octet_idx)
2977 {
2978 //if (unsigned(octet_idx) < octet_start) // common prefix
2979 // break;
2980
2981 unsigned value = unsigned(str[octet_idx]) & 0xFF;
2982 BM_ASSERT(value != 0);
2983
2984 bm::id64_t planes_mask;
2985 #if (0)
2986 if (&sv == bound_sv_)
2987 planes_mask = vector_plane_masks_[unsigned(octet_idx)];
2988 else
2989 #endif
2990 planes_mask = sv_.get_slice_mask(unsigned(octet_idx));
2991
2992 if ((value & planes_mask) != value) // pre-screen for impossible values
2993 {
2994 arg->reset(); // reset is necessary to disable single plane AND
2995 return; // found non-existing plane
2996 }
2997
2999 *arg, sv_, octet_idx, planes_mask, value); // AND-SUB aggregator
3000 } // for octet_idx
3001
3002
3003 // add all vectors above string len to the SUB aggregation group
3004 //
3005 //if (prefix_sub)
3006 {
3007 unsigned plane_idx = unsigned(len * 8);
3008 // SUB group should NOt include not NULL bvector
3009 size_type planes = size_type(this->eff_slices_);
3010 for (; plane_idx < planes; ++plane_idx)
3011 {
3012 if (bvector_type_const_ptr bv = sv_.get_slice(plane_idx))
3013 arg->add(bv, 1); // agg to SUB group
3014 } // for
3015 }
3016
3017}
3018
3019//----------------------------------------------------------------------------
3020
3021
3022} // namespace bm
3023
3024
3025#endif
Algorithms for fast aggregation of N bvectors.
Algorithms for bvector<> (main include)
Definitions(internal)
#define BMNOEXCEPT
Definition: bmdef.h:82
#define BM_ASSERT
Definition: bmdef.h:139
#define BM_VECT_ALIGN
Definition: bmdef.h:320
Sparse constainer sparse_vector<> for integer types using bit-transposition transform.
bm::heap_vector< size_type, allocator_type, true > bv_count_vector_type
Definition: bmaggregator.h:187
Constant iterator designed to enumerate "ON" bits.
Definition: bm.h:603
bool valid() const BMNOEXCEPT
Checks if iterator is still valid.
Definition: bm.h:283
Bitvector Bit-vector container with runtime compression of bits.
Definition: bm.h:115
bm::bvector< Alloc > & bit_or(const bm::bvector< Alloc > &bv1, const bm::bvector< Alloc > &bv2, typename bm::bvector< Alloc >::optmode opt_mode=opt_none)
3-operand OR : this := bv1 OR bv2
Definition: bm.h:5668
@ opt_none
no optimization
Definition: bm.h:134
bvector< Alloc > & invert()
Invert/NEG all bits It should be noted, invert is affected by size() if size is set - it only inverts...
Definition: bm.h:3515
allocator_type::allocator_pool_type allocator_pool_type
Definition: bm.h:118
size_type size() const BMNOEXCEPT
Returns bvector's capacity (number of bits it can store)
Definition: bm.h:1278
void resize(size_type new_size)
Change size of the bvector.
Definition: bm.h:2428
Alloc allocator_type
Definition: bm.h:117
bm::bvector< Alloc > & bit_xor(const bm::bvector< Alloc > &bv1, const bm::bvector< Alloc > &bv2, typename bm::bvector< Alloc >::optmode opt_mode=opt_none)
3-operand XOR : this := bv1 XOR bv2
Definition: bm.h:5767
Algorithms for rank compression of bit-vector.
Definition: bmalgo.h:453
Integer set to set transformation (functional image in groups theory) https://en.wikipedia....
bvector_type bv_zero_
bit-vector for zero elements
const bvector_type & get_bv_zero() const
Get read access to zero-elements vector Zero vector gets populated after attach_sv() is called or as ...
bvector_type bv_product_
temp vector
void attach_sv(const SV *sv_brel, bool compute_stats=false)
Attach a translation table vector for remapping (Image function)
allocator_pool_type pool_
const SV * sv_ptr_
current translation table vector
void remap(const bvector_type &bv_in, bvector_type &bv_out)
Perform remapping (Image function) (based on attached translation table)
bool have_stats_
flag of statistics presense
gather_buffer_type * gb_
intermediate buffers
bvector_type::allocator_type::allocator_pool_type allocator_pool_type
void operator=(const set2set_11_transform &)=delete
void one_pass_run(const bvector_type &bv_in, const SV &sv_brel, bvector_type &bv_out)
gather_buffer< sv_g_size > gather_buffer_type
void run(const bvector_type &bv_in, const SV &sv_brel, bvector_type &bv_out)
Run remap transformation.
set2set_11_transform(const set2set_11_transform &)=delete
SV::bvector_type bvector_type
Pipeline to run multiple searches against a particular SV faster using cache optimizations.
const run_options & get_options() const BMNOEXCEPT
Get pipeline run options.
bv_count_vector_type & get_bv_count_vector() BMNOEXCEPT
Return results vector count used for pipeline execution.
remap_vector_type remap_value_vect_
remap buffer
void complete()
Prepare pipeline for the execution (resize and init internal structures) Once complete,...
const SV & sv_
target sparse vector ref
bool is_complete() const BMNOEXCEPT
aggregator_pipeline_type agg_pipe_
traget aggregator pipeline
bvect_vector_type & get_bv_res_vector() BMNOEXCEPT
Return results vector used for pipeline execution.
void set_or_target(bvector_type *bv_or) BMNOEXCEPT
Attach OR (aggregator vector).
aggregator_pipeline_type & get_aggregator_pipe() BMNOEXCEPT
get aggregator pipeline (access to compute internals)
void set_search_mask(const bvector_type *bv_mask) BMNOEXCEPT
Set pipeline mask bit-vector to restrict the search space.
void add(const typename SV::value_type *str)
Add new search string.
aggregator_type::template pipeline< Opt > aggregator_pipeline_type
pipeline & operator=(const pipeline &)=delete
pipeline(const pipeline &)=delete
size_t eff_slices_
number of effectice NOT NULL value slices
size_type size() const BMNOEXCEPT
run_options & options() BMNOEXCEPT
Set pipeline run options.
void set_search_count_limit(size_type limit) BMNOEXCEPT
Set search limit for results.
algorithms for sparse_vector scan/search
bm::aggregator< bvector_type > aggregator_type
SV::bvector_type bvector_type
bool find_eq_str_impl(const SV &sv, const value_type *str, bvector_type &bv_out, bool prefix_sub)
find EQ str / prefix impl
void correct_nulls(const SV &sv, bvector_type &bv_out)
Exclude possible NULL values from the result vector.
bvector_type::allocator_type allocator_type
bool prepare_and_sub_aggregator(const SV &sv, value_type value)
Prepare aggregator for AND-SUB (EQ) search.
bool find_eq_with_nulls(const SV &sv, value_type value, bvector_type &bv_out, size_type search_limit=0)
find value (may include NULL indexes)
aggregator_type::bv_count_vector_type bv_count_vector_type
static bool remap_tosv(remap_vector_type &remap_vect_target, const value_type *str, const SV &sv)
Remap input value into SV char encodings.
bm::dynamic_heap_matrix< value_type, allocator_type > matrix_search_buf_type
bool find_first_eq(const SV &sv, value_type value, size_type &idx)
find first value (may include NULL indexes)
void find_eq_with_nulls_horizontal(const SV &sv, value_type value, bvector_type &bv_out)
For testing purposes only.
void find_zero(const SV &sv, bvector_type &bv_out, bool null_correct=true)
find all sparse vector elements EQ to 0
bool bfind_eq_str(const SV &sv, const value_type *str, size_type &pos)
binary find first sparse vector element (string) Sparse vector must be sorted.
bm::heap_vector< value_type, typename bvector_type::allocator_type, true > remap_vector_type
void invert(const SV &sv, bvector_type &bv_out)
invert search result ("EQ" to "not EQ")
sparse_vector_scanner(const sparse_vector_scanner &)=delete
void find_gt_horizontal(const SV &sv, value_type value, bvector_type &bv_out, bool null_correct=true)
For testing purposes only.
int compare(const SV &sv, size_type idx, const value_type val) BMNOEXCEPT
compare sv[idx] with input value
bm::dynamic_heap_matrix< value_type, allocator_type > heap_matrix_type
void reset_binding() BMNOEXCEPT
reset sparse vector binding
void find_eq(const SV &sv, value_type value, bvector_type &bv_out)
find all sparse vector elements EQ to search value
void decompress(const SV &sv, bvector_type &bv_out)
Rank-Select decompression for RSC vectors.
void operator=(const sparse_vector_scanner &)=delete
void find_nonzero(const SV &sv, bvector_type &bv_out)
Find non-zero elements Output vector is computed as a logical OR (join) of all planes.
static constexpr int gt_slice_limit() noexcept
Return the slice limit for signed/unsigned vector value types.
void find_lt(const SV &sv, value_type val, bvector_type &bv_out)
find all elements sparse vector element less (<) than value
bm::heap_vector< bvector_type *, allocator_type, true > bvect_vector_type
allocator_type::allocator_pool_type allocator_pool_type
void bind(const SV &sv, bool sorted)
bind sparse vector for all searches
static void add_agg_char(AGG &agg, const SV &sv, int octet_idx, bm::id64_t planes_mask, unsigned value)
Addd char to aggregator (AND-SUB)
bool bfind(const SV &sv, const value_type val, size_type &pos)
binary search for position in the sorted sparse vector
void reset_search_range()
reset (disable) search range
void find_positive(const SV &sv, bvector_type &bv_out)
Find positive (greter than zero elements) Output vector is computed as a logical OR (join) of all pla...
bool find_eq_str_prefix(const SV &sv, const value_type *str, bvector_type &bv_out)
find sparse vector elements with a given prefix (string)
bool lower_bound_str(const SV &sv, const value_type *str, size_type &pos)
lower bound search for an array position
int compare_str(const SV &sv, size_type idx, const value_type *str)
compare sv[idx] with input str
const bvector_type * bvector_type_const_ptr
void set_search_range(size_type from, size_type to)
set search boundaries (hint for the aggregator)
void find_ge(const SV &sv, value_type val, bvector_type &bv_out)
find all elements sparse vector element greater or equal (>=) than value
void find_le(const SV &sv, value_type val, bvector_type &bv_out)
find all elements sparse vector element less or equal (<=) than value
void find_gt(const SV &sv, value_type val, bvector_type &bv_out)
find all elements sparse vector element greater (>) than value
void find_range(const SV &sv, value_type from, value_type to, bvector_type &bv_out)
find all elements sparse vector element in closed range [left..right] interval
void aggregate_OR_slices(bvector_type &bv_target, const SV &sv, unsigned from, unsigned total_planes)
bool find_eq_str(const SV &sv, const value_type *str, bvector_type &bv_out)
find sparse vector elements (string)
bm::heap_vector< bm::id64_t, typename bvector_type::allocator_type, true > mask_vector_type
aggregator_type::run_options run_options
Scanner options to control execution.
allocator_pool_type & get_bvector_alloc_pool() BMNOEXCEPT
Return allocator pool for blocks (Can be used to improve performance of repeated searches with the sa...
static void aggregate_AND_OR_slices(bvector_type &bv_target, const bvector_type &bv_mask, const SV &sv, unsigned from, unsigned total_planes)
BMFORCEINLINE bm::id_t word_bitcount(bm::id_t w) BMNOEXCEPT
Definition: bmutil.h:573
unsigned short bitscan(V w, B *bits) BMNOEXCEPT
Templated Bitscan with dynamic dispatch for best type.
Definition: bmfunc.h:761
bm::alloc_pool_guard< allocator_pool_type, bvector< Alloc > > mem_pool_guard
Definition: bm.h:790
null_support
NULL-able value support.
Definition: bmconst.h:228
@ BM_SORTED
input set is sorted (ascending order)
Definition: bmconst.h:205
@ BM_SORTED_UNIFORM
sorted and in one block (internal!)
Definition: bmconst.h:206
@ use_null
support "non-assigned" or "NULL" logic
Definition: bmconst.h:229
@ no_null
do not support NULL values
Definition: bmconst.h:230
bool sparse_vector_find_first_mismatch(const SV &sv1, const SV &sv2, typename SV::size_type &midx, bm::null_support null_proc=bm::use_null)
Find first mismatch (element which is different) between two sparse vectors (uses linear scan in bit-...
void dynamic_range_clip_low(SV &svect, unsigned low_bit)
Clip dynamic range for signal lower than specified (boost)
void dynamic_range_clip_high(SV &svect, unsigned high_bit)
Clip dynamic range for signal higher than specified.
void sparse_vector_find_mismatch(typename SV1::bvector_type &bv, const SV1 &sv1, const SV2 &sv2, bm::null_support null_proc)
Find mismatch vector, indicating positions of mismatch between two sparse vectors (uses linear scan i...
Definition: bm.h:78
const unsigned id_max
Definition: bmconst.h:109
const unsigned set_block_mask
Definition: bmconst.h:57
const unsigned sub_block3_size
Definition: bmconst.h:126
BMFORCEINLINE void xor_swap(W &x, W &y) BMNOEXCEPT
XOR swap two variables.
Definition: bmutil.h:534
unsigned long long int id64_t
Definition: bmconst.h:35
unsigned int id_t
Definition: bmconst.h:38
const unsigned gap_max_bits
Definition: bmconst.h:81
const unsigned set_block_shift
Definition: bmconst.h:56
ad-hoc conditional expressions
Definition: bmutil.h:113
size_type BM_VECT_ALIGN gather_idx_[BSIZE] BM_VECT_ALIGN_ATTR
value_type BM_VECT_ALIGN buffer_[BSIZE] BM_VECT_ALIGN_ATTR
bm::bvector bvector_type
Definition: xsample07a.cpp:94