BitMagic-C++
xsample08.cpp
Go to the documentation of this file.
1/*
2Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
3
4Licensed under the Apache License, Version 2.0 (the "License");
5you may not use this file except in compliance with the License.
6You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10Unless required by applicable law or agreed to in writing, software
11distributed under the License is distributed on an "AS IS" BASIS,
12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13See the License for the specific language governing permissions and
14limitations under the License.
15
16For more information please visit: http://bitmagic.io
17*/
18
19/** \example xsample08.cpp
20
21 Example on intervals and how to use it for layout calculation.
22 As a use case this example uses genomics visualization for
23 features mapped into genomic coordinates.
24
25 It is also illustartes vector model using coordinate ranges
26 or feature vectors. Various properties of the initial model acn be
27 dropped (sliced) to improve memory efficiency, better storage
28 or network transfer.
29
30 This example does NOT do serialization of models (which is possible)
31 for the clarity of the sample code.
32
33 \sa bm::bvector::set_range
34 \sa bm::bvector::any_range
35 \sa bm::bvector::copy_range
36 \sa bm::interval_enumerator
37 \sa bm::rsc_sparse_vector
38 \sa bm::rsc_sparse_vector::copy_range
39 \sa bm::find_interval_start
40 \sa bm::find_interval_end
41
42 \sa sample22.cpp
43 \sa sample23.cpp
44 \sa bvintervals
45*/
46
47/*! \file sample23.cpp
48 \brief Example: interval_enumerator<> - interator class for intervals
49*/
50
51#include <iostream>
52#include <utility>
53#include <vector>
54#include <memory>
55#include <cassert>
56
57#include "bm.h"
58#include "bmintervals.h"
59#include "bmsparsevec_compr.h"
60#include "bmundef.h" /* clear the pre-proc defines from BM */
61
62using namespace std;
63
65typedef std::vector<std::unique_ptr<bm::bvector<> > > layout_vector_type;
66
69typedef std::vector<std::unique_ptr<rsc_vector_u8> > starnds_vector_type;
70
71
72// -------------------------------------------------------------------
73
74/// Data frame object, sued to buid succinct data model
75///
76///
78{
79 /// Optimize memory layoput, build index for faster read access
80 ///
81 void optimize();
82
83 void add_layout(size_t plane, bm::bvector<>* bv);
84 void add_strand(size_t plane, rsc_vector_u8* strand);
85
86 layout_vector_type layout_v; ///< layout vector
87 starnds_vector_type strand_v; ///< strand planes vector
88};
89
91{
92 BM_DECLARE_TEMP_BLOCK(tb); // explicit temp for faster optimization
93 for (size_t i = 0; i < layout_v.size(); ++i)
94 {
95 auto bv = layout_v[i].get();
96 if (bv)
97 bv->optimize(tb); // memory optimization
98 } // for i
99 for (size_t i = 0; i < strand_v.size(); ++i)
100 {
101 auto strand_plane = strand_v[i].get();
102 if (strand_plane)
103 {
104 strand_plane->optimize(tb);
105 strand_plane->sync(); // build rank-select idx (faster read access)
106 }
107 } // for i
108}
109
111{
112 unique_ptr<bm::bvector<> > ap(bv);
113 if (layout_v.size() == plane) // push back requested
114 {
115 layout_v.emplace_back(move(ap));
116 }
117 else
118 {
119 while (layout_v.size() < plane) // this is crude resize() but it would do
120 layout_v.emplace_back(new bm::bvector<>(bm::BM_GAP));
121 layout_v[plane] = std::move(ap);
122 }
123}
124
125void data_model::add_strand(size_t plane, rsc_vector_u8* strand)
126{
127 unique_ptr<rsc_vector_u8 > ap(strand);
128 if (strand_v.size() == plane) // push back requested
129 {
130 strand_v.emplace_back(move(ap));
131 }
132 else
133 {
134 while (strand_v.size() < plane) // this is crude resize() but it would do
135 strand_v.emplace_back(new rsc_vector_u8());
136 strand_v[plane] = std::move(ap);
137 }
138}
139
140
141// -------------------------------------------------------------------
142
143static
144void set_feature_strand(data_model& dm, size_t plane,
146 unsigned char strand)
147{
148 if (!strand)
149 return;
150 while (dm.strand_v.size() <= plane) // add planes
151 {
152 std::unique_ptr<rsc_vector_u8> p2(new rsc_vector_u8());
153 dm.strand_v.emplace_back(move(p2));
154 }
155
156 rsc_vector_u8* strand_plane = dm.strand_v[plane].get();
157 if (!strand_plane)
158 {
159 strand_plane = new rsc_vector_u8();
160 dm.strand_v[plane] = unique_ptr<rsc_vector_u8 >(strand_plane);
161 }
162 assert(strand_plane->is_null(pos));
163 strand_plane->set(pos, strand);
164}
165
166/// Register new object in the data model: [start..end] + strand
167///
168static
170 unsigned start, unsigned end,
171 unsigned char strand)
172{
173 assert(start <= end);
174
175 bm::bvector<>* bv; // layout plane vector
176
177 for (size_t i = 0; i < dm.layout_v.size(); ++i)
178 {
179 bv = dm.layout_v[i].get();
180 if (!bv)
181 {
182 bv = new bm::bvector<>(bm::BM_GAP);
183 dm.layout_v[i] = unique_ptr<bm::bvector<> >(bv);
184 // bv just created (empty) no need to do range check
185 bv->set_range(start, end);
186 set_feature_strand(dm, i, start, strand);
187
188 return;
189 }
190 if (!bv->any_range(start, end)) // check if layout space is not used
191 {
192 bv->set_range(start, end); // add [start..end] coordinates
193 // set strand at the start of feature
194 set_feature_strand(dm, i, start, strand);
195
196 return;
197 }
198 } // for i
199
200 // not found, make new plane
201 //
202 bv = new bm::bvector<>(bm::BM_GAP);
203 dm.layout_v.emplace_back(std::unique_ptr<bm::bvector<> >(bv));
204 bv->set_range(start, end);
205 set_feature_strand(dm, dm.layout_v.size()-1, start, strand);
206}
207
208/// Data model splicer
209///
210static
211void splice_model(data_model& dm_target, const data_model& dm,
214 bool copy_strands)
215{
216 const bm::bvector<>* bv; // layout
217 const rsc_vector_u8* strand_plane;
218
219 size_t t_plane = 0;
220 for (size_t i = 0; i < dm.layout_v.size(); ++i)
221 {
222 bv = dm.layout_v[i].get();
223 if (bv)
224 {
225 bm::bvector<>::size_type start_pos;
227
228 bool found = bm::find_interval_start(*bv, start, start_pos);
229 if (!found)
230 start_pos = start;
231 found = bm::find_interval_end(*bv, end, end_pos);
232 if (!found)
233 end_pos = end;
234
235 unique_ptr<bm::bvector<>> bv_ptr(new bm::bvector<>(bm::BM_GAP));
236 bv_ptr->copy_range(*bv, start_pos, end_pos);
237 if (bv_ptr->any()) // copy range may have ended as empty
238 {
239 dm_target.add_layout(t_plane, bv_ptr.release());
240
241 // slice the strands plane (if requested)
242 //
243 if (copy_strands)
244 {
245 if (i < dm.strand_v.size())
246 {
247 strand_plane = dm.strand_v[i].get();
248 if (strand_plane)
249 {
250 unique_ptr<rsc_vector_u8> strand_ptr(new rsc_vector_u8());
251 strand_ptr->copy_range(*strand_plane, start_pos, end_pos);
252 dm_target.add_strand(t_plane, strand_ptr.release());
253 }
254 }
255 }
256 ++t_plane;
257 } // if any()
258
259 } // if bv
260 } // for i
261
262}
263
264
265/// This is ASCII art "renderer" for the data model.
266/// illustrates how to manipulate succinct data model to create graphics
267///
268static
269void print_model(const data_model& dm)
270{
271 const bm::bvector<>* bv; // layout
272 const rsc_vector_u8* strand_plane;
273
274 // Sequence on top is for purely decorative purposes
275 cout <<
276 "-------------------------------------------------------------------------"
277 << endl <<
278 "ATGTTAGCCCGCGCATATTATATATGTAGCGTATTAAGCGDGGAGATTACCCTTGCATTAGGTTANNNNNNNN"
279 << endl <<
280 "-------------------------------------------------------------------------"
281 << endl;
282
283 for (size_t i = 0; i < dm.layout_v.size(); ++i)
284 {
285 bv = dm.layout_v[i].get();
286 if (bv)
287 {
288 strand_plane = i < dm.strand_v.size() ? dm.strand_v[i].get() : nullptr;
290 if (ien.valid())
291 {
292 bm::bvector<>::size_type spaces = 0;
293 do
294 {
295 auto st = ien.start(); auto end = ien.end();
296 char ch_strand = '?';
297 if (strand_plane)
298 {
299 auto strand = strand_plane->get(st);
300 switch (strand)
301 {
302 case 0: ch_strand = '>'; break; // positive
303 case 1: ch_strand = '<'; break; // negative
304 default: break; // unknown strand
305 }
306 }
307 for (; spaces < st; ++spaces)
308 cout << " ";
309 for (bool first = true; st <= end; ++st, first = false)
310 {
311 if (st == end)
312 cout << ch_strand;
313 else
314 cout << (first ? ch_strand : '.');
315 } // for
316 spaces = end+1;
317 } while (ien.advance());
318 cout << endl;
319 }
320 }
321 } // for
322}
323
325
326
327
328int main(void)
329{
330 try
331 {
332 data_model dm;
333
334 // build the data model using succinct vectors
335 //
336 add_object(dm, 0, 0, negative);
337 add_object(dm, 5, 10, positive);
338 add_object(dm, 4, 70, negative);
339 add_object(dm, 15, 20, negative);
340 add_object(dm, 20, 30, positive);
341 add_object(dm, 16, 21, unknown);
342
343 dm.optimize(); // run compression and build access index
344
345 // View the model using toy ASCII art renderer
346 //
347 print_model(dm);
348
349 // create a model splice for [5..10] range
350 // plus drop strand property (renderer will assume unknown)
351 //
352 {
353 data_model dm_splice;
354
355 splice_model(dm_splice, dm, 5, 10, false);
356 dm_splice.optimize();
357
358 cout << endl;
359 print_model(dm_splice);
360 }
361
362 // create a model splice for [5..10] range
363 // now WITH strand property
364 //
365 {
366 data_model dm_splice;
367
368 splice_model(dm_splice, dm, 5, 10, true);
369 dm_splice.optimize();
370
371 cout << endl;
372 print_model(dm_splice);
373 }
374 }
375 catch(std::exception& ex)
376 {
377 std::cerr << ex.what() << std::endl;
378 return 1;
379 }
380
381 return 0;
382}
383
Compressed bit-vector bvector<> container, set algebraic methods, traversal iterators.
#define BM_DECLARE_TEMP_BLOCK(x)
Definition: bm.h:47
Algorithms for bit ranges and intervals.
Compressed sparse container rsc_sparse_vector<> for integer types.
pre-processor un-defines to avoid global space pollution (internal)
Bitvector Bit-vector container with runtime compression of bits.
Definition: bm.h:115
bool any_range(size_type left, size_type right) const BMNOEXCEPT
Returns true if any bits in the range are 1s (non-empty interval) Function uses closed interval [left...
Definition: bm.h:3387
bvector_size_type size_type
Definition: bm.h:121
bvector< Alloc > & set_range(size_type left, size_type right, bool value=true)
Sets all bits in the specified closed interval [left,right] Interval must be inside the bvector's siz...
Definition: bm.h:2333
forward iterator class to traverse bit-vector as ranges
Definition: bmintervals.h:53
size_type end() const BMNOEXCEPT
Return interval end/right as bit-vector coordinate 011110 [left..right].
Definition: bmintervals.h:560
bool valid() const BMNOEXCEPT
Returns true if enumerator is valid (false if traversal is done)
Definition: bmintervals.h:568
size_type start() const BMNOEXCEPT
Return interval start/left as bit-vector coordinate 011110 [left..right].
Definition: bmintervals.h:551
Rank-Select compressed sparse vector.
void set(size_type idx, value_type v)
set specified element with bounds checking and automatic resize
value_type get(size_type idx) const BMNOEXCEPT
get specified element without bounds checking
bool is_null(size_type idx) const BMNOEXCEPT
test if specified element is NULL
succinct sparse vector with runtime compression using bit-slicing / transposition method
Definition: bmsparsevec.h:87
@ BM_GAP
GAP compression is ON.
Definition: bmconst.h:147
bool find_interval_end(const BV &bv, typename BV::size_type from, typename BV::size_type &pos) BMNOEXCEPT
Reverse find index of first 1 bit gap (01110) starting from position Reverse scan for the first 1 in ...
Definition: bmintervals.h:438
bool find_interval_start(const BV &bv, typename BV::size_type from, typename BV::size_type &pos) BMNOEXCEPT
Reverse find index of first 1 bit gap (01110) starting from position Reverse scan for the first 1 in ...
Definition: bmintervals.h:315
Data frame object, sued to buid succinct data model.
Definition: xsample08.cpp:78
void optimize()
Optimize memory layoput, build index for faster read access.
Definition: xsample08.cpp:90
void add_strand(size_t plane, rsc_vector_u8 *strand)
Definition: xsample08.cpp:125
void add_layout(size_t plane, bm::bvector<> *bv)
Definition: xsample08.cpp:110
layout_vector_type layout_v
layout vector
Definition: xsample08.cpp:86
starnds_vector_type strand_v
strand planes vector
Definition: xsample08.cpp:87
bm::sparse_vector< unsigned char, bm::bvector<> > sparse_vector_u8
Definition: xsample08.cpp:67
bm::interval_enumerator< bm::bvector<> > interval_enumerator_type
Definition: xsample08.cpp:64
std::vector< std::unique_ptr< rsc_vector_u8 > > starnds_vector_type
Definition: xsample08.cpp:69
bm::rsc_sparse_vector< unsigned char, sparse_vector_u8 > rsc_vector_u8
Definition: xsample08.cpp:68
Strand
Definition: xsample08.cpp:324
@ unknown
Definition: xsample08.cpp:324
@ negative
Definition: xsample08.cpp:324
@ positive
Definition: xsample08.cpp:324
static void add_object(data_model &dm, unsigned start, unsigned end, unsigned char strand)
Register new object in the data model: [start..end] + strand.
Definition: xsample08.cpp:169
static void set_feature_strand(data_model &dm, size_t plane, bm::bvector<>::size_type pos, unsigned char strand)
Definition: xsample08.cpp:144
int main(void)
Definition: xsample08.cpp:328
static void splice_model(data_model &dm_target, const data_model &dm, bm::bvector<>::size_type start, bm::bvector<>::size_type end, bool copy_strands)
Data model splicer.
Definition: xsample08.cpp:211
static void print_model(const data_model &dm)
This is ASCII art "renderer" for the data model.
Definition: xsample08.cpp:269
std::vector< std::unique_ptr< bm::bvector<> > > layout_vector_type
Definition: xsample08.cpp:65