BitMagic-C++
strsvsample06.cpp

Succinct container iterator for (substring), search, mismatch

See also
bm::str_sparse_vector
bm::sparse_vector_find_mismatch
bm::sparse_vector_find_first_mismatch
bm::sparse_vector_scanner
/*
Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
For more information please visit: http://bitmagic.io
*/
/** \example strsvsample06.cpp
Succinct container iterator for (substring), search, mismatch
\sa bm::str_sparse_vector
\sa bm::sparse_vector_find_mismatch
\sa bm::sparse_vector_find_first_mismatch
\sa bm::sparse_vector_scanner
*/
/*! \file strsvsample06.cpp
\brief Example: Succinct container iterator for (substring), search, mismatch
*/
#include <iostream>
#include <string>
#include <vector>
#include <cassert>
#include "bm.h"
#include "bmstrsparsevec.h"
#include "bmundef.h" /* clear the pre-proc defines from BM */
using namespace std;
int main(void)
{
try
{
str_sv_type str_sv(bm::use_null); // construct it as a NULL-able vector
// back inserted is the fastest way to load the container, use it
// where necessary
{
auto iit = str_sv.get_back_inserter();
iit = "240-423-0567";
iit = "420-123-5078";
iit.add_null();
iit = "301-223-1234";
iit = "131-423-1235";
iit.add_null();
iit = "301-113-6535";
iit.flush();
}
str_sv.optimize(tb); // optimize the vector
// print the content using iterator
{
auto it = str_sv.begin();
auto it_end = str_sv.end();
for (; it != it_end; ++it)
{
if (it.is_null())
cout << "NULL";
else
cout << *it;
cout << endl;
}
}
// print all the NULL values in the vector
//
{
const bvector_type* bv_not_null = str_sv.get_null_bvector();
assert(bv_not_null);
bvector_type bv_tmp(*bv_not_null);
bv_tmp.resize(str_sv.size());
bv_tmp.invert();
if (bv_tmp.any())
{
auto en = bv_tmp.get_enumerator(0);
cout << "List of NULL values: ";
for (; en.valid(); ++en)
{
cout << *en << ", ";
}
cout << endl;
}
}
// use substring iterators
// substring performs partial transposition and can be faster
// than the full iterator with extract ans substr
//
{
cout << endl << "--------------------------\n";
auto it1 = str_sv.begin();
it1.set_substr(0, 3);
auto it2 = str_sv.begin();
it2.set_substr(4, 3);
auto it3 = str_sv.begin();
it3.set_substr(8); // from 8 to end
auto it_end = str_sv.end();
for (; it1 != it_end; ++it1, ++it2, ++it3)
{
if (it1.is_null())
cout << "NULL";
else
cout << "(" << *it1 << ")" << *it2 << "." << *it3;
cout << endl;
} // for
}
str_sv_type str_sv1(str_sv); // construct a copy
// see also sparse_vector_find_mismatch
bool b = bm::sparse_vector_find_first_mismatch(str_sv, str_sv1, idx);
assert(!b); // should not find at this point
str_sv1.set(2, "788-125-6789");
b = bm::sparse_vector_find_first_mismatch(str_sv, str_sv1, idx);
if (b)
{
cout << endl;
cout << "Mismatch at: " << idx << endl;
cout << "value=" << str_sv1[idx] << endl;
}
cout << endl;
// use scanner to perform vector search
//
// NOTE: re-use scanner for multiple searches for best performance
//
{
bvector_type bv_res;
str_scan.find_eq_str(str_sv1, "250-113-6535", bv_res);
{
auto en = bv_res.get_enumerator(0); // from the start (pos=0)
for (;en.valid(); ++en)
{
idx = *en;
cout << idx << ": " << str_sv1[idx] << endl;
} // for
}
cout << "Prefix search:" << endl;
str_scan.find_eq_str_prefix(str_sv1, "301", bv_res);
cout << "Found: " << bv_res.count() << endl;
auto en = bv_res.get_enumerator(0); // from the start (pos=0)
for (;en.valid(); ++en)
{
idx = *en;
cout << idx << ": " << str_sv1[idx] << endl;
} // for
}
}
catch(std::exception& ex)
{
std::cerr << ex.what() << std::endl;
return 1;
}
return 0;
}
Compressed bit-vector bvector<> container, set algebraic methods, traversal iterators.
#define BM_DECLARE_TEMP_BLOCK(x)
Definition: bm.h:47
Algorithms for bm::sparse_vector.
string sparse vector based on bit-transposed matrix
pre-processor un-defines to avoid global space pollution (internal)
const bvector_type * get_null_bvector() const BMNOEXCEPT
Get bit-vector of assigned values or NULL (if not constructed that way)
Definition: bmbmatrix.h:430
Bitvector Bit-vector container with runtime compression of bits.
Definition: bm.h:115
bvector< Alloc > & invert()
Invert/NEG all bits It should be noted, invert is affected by size() if size is set - it only inverts...
Definition: bm.h:3515
size_type count() const BMNOEXCEPT
population count (count of ON bits)
Definition: bm.h:2366
void resize(size_type new_size)
Change size of the bvector.
Definition: bm.h:2428
bool any() const BMNOEXCEPT
Returns true if any bits in this bitset are set, and otherwise returns false.
Definition: bm.h:2416
enumerator get_enumerator(size_type pos) const
Returns enumerator pointing on specified or the next available bit.
Definition: bm.h:1861
algorithms for sparse_vector scan/search
bool find_eq_str_prefix(const SV &sv, const value_type *str, bvector_type &bv_out)
find sparse vector elements with a given prefix (string)
bool find_eq_str(const SV &sv, const value_type *str, bvector_type &bv_out)
find sparse vector elements (string)
void add_null()
add NULL (no-value) to the container
void set_substr(unsigned from, unsigned len=0) BMNOEXCEPT
setup iterator to retrieve a sub-string of a string
succinct sparse vector for strings with compression using bit-slicing ( transposition) method
const_iterator end() const BMNOEXCEPT
Provide const iterator access to the end
void optimize(bm::word_t *temp_block=0, typename bvector_type::optmode opt_mode=bvector_type::opt_compress, typename str_sparse_vector< CharType, BV, STR_SIZE >::statistics *stat=0)
run memory optimization for all vector planes
size_type size() const
return size of the vector
const_iterator begin() const BMNOEXCEPT
Provide const iterator access to container content
back_insert_iterator get_back_inserter()
Provide back insert iterator Back insert iterator implements buffered insertion, which is faster,...
void set(size_type idx, const value_type *str)
set specified element with bounds checking and automatic resize
bvector_type::size_type size_type
@ use_null
support "non-assigned" or "NULL" logic
Definition: bmconst.h:229
bool sparse_vector_find_first_mismatch(const SV &sv1, const SV &sv2, typename SV::size_type &midx, bm::null_support null_proc=bm::use_null)
Find first mismatch (element which is different) between two sparse vectors (uses linear scan in bit-...
bm::str_sparse_vector< char, bvector_type, 32 > str_sv_type
bm::bvector bvector_type
int main(void)