BitMagic-C++
strsvsample06.cpp
Go to the documentation of this file.
1/*
2Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
3
4Licensed under the Apache License, Version 2.0 (the "License");
5you may not use this file except in compliance with the License.
6You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10Unless required by applicable law or agreed to in writing, software
11distributed under the License is distributed on an "AS IS" BASIS,
12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13See the License for the specific language governing permissions and
14limitations under the License.
15
16For more information please visit: http://bitmagic.io
17*/
18
19/** \example strsvsample06.cpp
20 Succinct container iterator for (substring), search, mismatch
21
22 \sa bm::str_sparse_vector
23 \sa bm::sparse_vector_find_mismatch
24 \sa bm::sparse_vector_find_first_mismatch
25 \sa bm::sparse_vector_scanner
26*/
27
28/*! \file strsvsample06.cpp
29 \brief Example: Succinct container iterator for (substring), search, mismatch
30*/
31
32#include <iostream>
33#include <string>
34#include <vector>
35#include <cassert>
36
37#include "bm.h"
38#include "bmstrsparsevec.h"
39#include "bmsparsevec_algo.h"
40#include "bmundef.h" /* clear the pre-proc defines from BM */
41
42using namespace std;
43
46
47int main(void)
48{
49 try
50 {
51 str_sv_type str_sv(bm::use_null); // construct it as a NULL-able vector
52
53 // back inserted is the fastest way to load the container, use it
54 // where necessary
55 {
56 auto iit = str_sv.get_back_inserter();
57 iit = "240-423-0567";
58 iit = "420-123-5078";
59 iit.add_null();
60 iit = "301-223-1234";
61 iit = "131-423-1235";
62 iit.add_null();
63 iit = "301-113-6535";
64
65 iit.flush();
66 }
68 str_sv.optimize(tb); // optimize the vector
69
70 // print the content using iterator
71 {
72 auto it = str_sv.begin();
73 auto it_end = str_sv.end();
74 for (; it != it_end; ++it)
75 {
76 if (it.is_null())
77 cout << "NULL";
78 else
79 cout << *it;
80 cout << endl;
81 }
82 }
83
84 // print all the NULL values in the vector
85 //
86 {
87 const bvector_type* bv_not_null = str_sv.get_null_bvector();
88 assert(bv_not_null);
89 bvector_type bv_tmp(*bv_not_null);
90 bv_tmp.resize(str_sv.size());
91 bv_tmp.invert();
92 if (bv_tmp.any())
93 {
94 auto en = bv_tmp.get_enumerator(0);
95 cout << "List of NULL values: ";
96 for (; en.valid(); ++en)
97 {
98 cout << *en << ", ";
99 }
100 cout << endl;
101 }
102 }
103
104
105
106 // use substring iterators
107 // substring performs partial transposition and can be faster
108 // than the full iterator with extract ans substr
109 //
110 {
111 cout << endl << "--------------------------\n";
112 auto it1 = str_sv.begin();
113 it1.set_substr(0, 3);
114
115 auto it2 = str_sv.begin();
116 it2.set_substr(4, 3);
117
118 auto it3 = str_sv.begin();
119 it3.set_substr(8); // from 8 to end
120
121 auto it_end = str_sv.end();
122 for (; it1 != it_end; ++it1, ++it2, ++it3)
123 {
124 if (it1.is_null())
125 cout << "NULL";
126 else
127 cout << "(" << *it1 << ")" << *it2 << "." << *it3;
128 cout << endl;
129 } // for
130 }
131
132 str_sv_type str_sv1(str_sv); // construct a copy
133
134
135
136 // see also sparse_vector_find_mismatch
138 bool b = bm::sparse_vector_find_first_mismatch(str_sv, str_sv1, idx);
139 assert(!b); // should not find at this point
140
141 str_sv1.set(2, "788-125-6789");
142 b = bm::sparse_vector_find_first_mismatch(str_sv, str_sv1, idx);
143 if (b)
144 {
145 cout << endl;
146 cout << "Mismatch at: " << idx << endl;
147 cout << "value=" << str_sv1[idx] << endl;
148 }
149 cout << endl;
150
151 // use scanner to perform vector search
152 //
153 // NOTE: re-use scanner for multiple searches for best performance
154 //
156
157 {
158 bvector_type bv_res;
159 str_scan.find_eq_str(str_sv1, "250-113-6535", bv_res);
160 {
161 auto en = bv_res.get_enumerator(0); // from the start (pos=0)
162 for (;en.valid(); ++en)
163 {
164 idx = *en;
165 cout << idx << ": " << str_sv1[idx] << endl;
166 } // for
167 }
168
169 cout << "Prefix search:" << endl;
170 str_scan.find_eq_str_prefix(str_sv1, "301", bv_res);
171 cout << "Found: " << bv_res.count() << endl;
172
173 auto en = bv_res.get_enumerator(0); // from the start (pos=0)
174 for (;en.valid(); ++en)
175 {
176 idx = *en;
177 cout << idx << ": " << str_sv1[idx] << endl;
178 } // for
179
180
181 }
182
183
184
185 }
186 catch(std::exception& ex)
187 {
188 std::cerr << ex.what() << std::endl;
189 return 1;
190 }
191
192
193 return 0;
194}
195
Compressed bit-vector bvector<> container, set algebraic methods, traversal iterators.
#define BM_DECLARE_TEMP_BLOCK(x)
Definition: bm.h:47
Algorithms for bm::sparse_vector.
string sparse vector based on bit-transposed matrix
pre-processor un-defines to avoid global space pollution (internal)
const bvector_type * get_null_bvector() const BMNOEXCEPT
Get bit-vector of assigned values or NULL (if not constructed that way)
Definition: bmbmatrix.h:430
Bitvector Bit-vector container with runtime compression of bits.
Definition: bm.h:115
bvector< Alloc > & invert()
Invert/NEG all bits It should be noted, invert is affected by size() if size is set - it only inverts...
Definition: bm.h:3515
size_type count() const BMNOEXCEPT
population count (count of ON bits)
Definition: bm.h:2366
void resize(size_type new_size)
Change size of the bvector.
Definition: bm.h:2428
bool any() const BMNOEXCEPT
Returns true if any bits in this bitset are set, and otherwise returns false.
Definition: bm.h:2416
enumerator get_enumerator(size_type pos) const
Returns enumerator pointing on specified or the next available bit.
Definition: bm.h:1861
algorithms for sparse_vector scan/search
bool find_eq_str_prefix(const SV &sv, const value_type *str, bvector_type &bv_out)
find sparse vector elements with a given prefix (string)
bool find_eq_str(const SV &sv, const value_type *str, bvector_type &bv_out)
find sparse vector elements (string)
void add_null()
add NULL (no-value) to the container
void set_substr(unsigned from, unsigned len=0) BMNOEXCEPT
setup iterator to retrieve a sub-string of a string
succinct sparse vector for strings with compression using bit-slicing ( transposition) method
const_iterator end() const BMNOEXCEPT
Provide const iterator access to the end
void optimize(bm::word_t *temp_block=0, typename bvector_type::optmode opt_mode=bvector_type::opt_compress, typename str_sparse_vector< CharType, BV, STR_SIZE >::statistics *stat=0)
run memory optimization for all vector planes
size_type size() const
return size of the vector
const_iterator begin() const BMNOEXCEPT
Provide const iterator access to container content
back_insert_iterator get_back_inserter()
Provide back insert iterator Back insert iterator implements buffered insertion, which is faster,...
void set(size_type idx, const value_type *str)
set specified element with bounds checking and automatic resize
bvector_type::size_type size_type
@ use_null
support "non-assigned" or "NULL" logic
Definition: bmconst.h:229
bool sparse_vector_find_first_mismatch(const SV &sv1, const SV &sv2, typename SV::size_type &midx, bm::null_support null_proc=bm::use_null)
Find first mismatch (element which is different) between two sparse vectors (uses linear scan in bit-...
bm::str_sparse_vector< char, bvector_type, 32 > str_sv_type
bm::bvector bvector_type
int main(void)