BitMagic-C++
xsample08.cpp
Go to the documentation of this file.
1 /*
2 Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
3 
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7 
8  http://www.apache.org/licenses/LICENSE-2.0
9 
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 
16 For more information please visit: http://bitmagic.io
17 */
18 
19 /** \example xsample08.cpp
20 
21  Example on intervals and how to use it for layout calculation.
22  As a use case this example uses genomics visualization for
23  features mapped into genomic coordinates.
24 
25  It is also illustartes vector model using coordinate ranges
26  or feature vectors. Various properties of the initial model acn be
27  dropped (sliced) to improve memory efficiency, better storage
28  or network transfer.
29 
30  This example does NOT do serialization of models (which is possible)
31  for the clarity of the sample code.
32 
33  \sa bm::bvector::set_range
34  \sa bm::bvector::any_range
35  \sa bm::bvector::copy_range
36  \sa bm::interval_enumerator
37  \sa bm::rsc_sparse_vector
38  \sa bm::rsc_sparse_vector::copy_range
39  \sa bm::find_interval_start
40  \sa bm::find_interval_end
41 
42  \sa sample22.cpp
43  \sa sample23.cpp
44  \sa bvintervals
45 */
46 
47 /*! \file sample23.cpp
48  \brief Example: interval_enumerator<> - interator class for intervals
49 */
50 
51 #include <iostream>
52 #include <utility>
53 #include <vector>
54 #include <memory>
55 #include <cassert>
56 
57 #include "bm.h"
58 #include "bmintervals.h"
59 #include "bmsparsevec_compr.h"
60 
61 using namespace std;
62 
64 typedef std::vector<std::unique_ptr<bm::bvector<> > > layout_vector_type;
65 
68 typedef std::vector<std::unique_ptr<rsc_vector_u8> > starnds_vector_type;
69 
70 
71 // -------------------------------------------------------------------
72 
73 /// Data frame object, sued to buid succinct data model
74 ///
75 ///
76 struct data_model
77 {
78  /// Optimize memory layoput, build index for faster read access
79  ///
80  void optimize();
81 
82  void add_layout(size_t plane, bm::bvector<>* bv);
83  void add_strand(size_t plane, rsc_vector_u8* strand);
84 
85  layout_vector_type layout_v; ///< layout vector
86  starnds_vector_type strand_v; ///< strand planes vector
87 };
88 
90 {
91  BM_DECLARE_TEMP_BLOCK(tb); // explicit temp for faster optimization
92  for (size_t i = 0; i < layout_v.size(); ++i)
93  {
94  auto bv = layout_v[i].get();
95  if (bv)
96  bv->optimize(tb); // memory optimization
97  } // for i
98  for (size_t i = 0; i < strand_v.size(); ++i)
99  {
100  auto strand_plane = strand_v[i].get();
101  if (strand_plane)
102  {
103  strand_plane->optimize(tb);
104  strand_plane->sync(); // build rank-select idx (faster read access)
105  }
106  } // for i
107 }
108 
109 void data_model::add_layout(size_t plane, bm::bvector<>* bv)
110 {
111  unique_ptr<bm::bvector<> > ap(bv);
112  if (layout_v.size() == plane) // push back requested
113  {
114  layout_v.emplace_back(move(ap));
115  }
116  else
117  {
118  while (layout_v.size() < plane) // this is crude resize() but it would do
119  layout_v.emplace_back(new bm::bvector<>(bm::BM_GAP));
120  layout_v[plane] = std::move(ap);
121  }
122 }
123 
124 void data_model::add_strand(size_t plane, rsc_vector_u8* strand)
125 {
126  unique_ptr<rsc_vector_u8 > ap(strand);
127  if (strand_v.size() == plane) // push back requested
128  {
129  strand_v.emplace_back(move(ap));
130  }
131  else
132  {
133  while (strand_v.size() < plane) // this is crude resize() but it would do
134  strand_v.emplace_back(new rsc_vector_u8());
135  strand_v[plane] = std::move(ap);
136  }
137 }
138 
139 
140 // -------------------------------------------------------------------
141 
142 void set_feature_strand(data_model& dm, size_t plane,
144  unsigned char strand)
145 {
146  if (!strand)
147  return;
148  while (dm.strand_v.size() <= plane) // add planes
149  {
150  std::unique_ptr<rsc_vector_u8> p2(new rsc_vector_u8());
151  dm.strand_v.emplace_back(move(p2));
152  }
153 
154  rsc_vector_u8* strand_plane = dm.strand_v[plane].get();
155  if (!strand_plane)
156  {
157  strand_plane = new rsc_vector_u8();
158  dm.strand_v[plane] = unique_ptr<rsc_vector_u8 >(strand_plane);
159  }
160  assert(strand_plane->is_null(pos));
161  strand_plane->set(pos, strand);
162 }
163 
164 /// Register new object in the data model: [start..end] + strand
165 ///
167  unsigned start, unsigned end,
168  unsigned char strand)
169 {
170  assert(start <= end);
171 
172  bm::bvector<>* bv; // layout plane vector
173 
174  for (size_t i = 0; i < dm.layout_v.size(); ++i)
175  {
176  bv = dm.layout_v[i].get();
177  if (!bv)
178  {
179  bv = new bm::bvector<>(bm::BM_GAP);
180  dm.layout_v[i] = unique_ptr<bm::bvector<> >(bv);
181  // bv just created (empty) no need to do range check
182  bv->set_range(start, end);
183  set_feature_strand(dm, i, start, strand);
184 
185  return;
186  }
187  if (!bv->any_range(start, end)) // check if layout space is not used
188  {
189  bv->set_range(start, end); // add [start..end] coordinates
190  // set strand at the start of feature
191  set_feature_strand(dm, i, start, strand);
192 
193  return;
194  }
195  } // for i
196 
197  // not found, make new plane
198  //
199  bv = new bm::bvector<>(bm::BM_GAP);
200  dm.layout_v.emplace_back(std::unique_ptr<bm::bvector<> >(bv));
201  bv->set_range(start, end);
202  set_feature_strand(dm, dm.layout_v.size()-1, start, strand);
203 }
204 
205 /// Data model splicer
206 ///
207 
208 void splice_model(data_model& dm_target, const data_model& dm,
211  bool copy_strands)
212 {
213  const bm::bvector<>* bv; // layout
214  const rsc_vector_u8* strand_plane;
215 
216  size_t t_plane = 0;
217  for (size_t i = 0; i < dm.layout_v.size(); ++i)
218  {
219  bv = dm.layout_v[i].get();
220  if (bv)
221  {
222  bm::bvector<>::size_type start_pos;
223  bm::bvector<>::size_type end_pos;
224 
225  bool found = bm::find_interval_start(*bv, start, start_pos);
226  if (!found)
227  start_pos = start;
228  found = bm::find_interval_end(*bv, end, end_pos);
229  if (!found)
230  end_pos = end;
231 
232  unique_ptr<bm::bvector<>> bv_ptr(new bm::bvector<>(bm::BM_GAP));
233  bv_ptr->copy_range(*bv, start_pos, end_pos);
234  if (bv_ptr->any()) // copy range may have ended as empty
235  {
236  dm_target.add_layout(t_plane, bv_ptr.release());
237 
238  // slice the strands plane (if requested)
239  //
240  if (copy_strands)
241  {
242  if (i < dm.strand_v.size())
243  {
244  strand_plane = dm.strand_v[i].get();
245  if (strand_plane)
246  {
247  unique_ptr<rsc_vector_u8> strand_ptr(new rsc_vector_u8());
248  strand_ptr->copy_range(*strand_plane, start_pos, end_pos);
249  dm_target.add_strand(t_plane, strand_ptr.release());
250  }
251  }
252  }
253  ++t_plane;
254  } // if any()
255 
256  } // if bv
257  } // for i
258 
259 }
260 
261 
262 /// This is ASCII art "renderer" for the data model.
263 /// illustrates how to manipulate succinct data model to create graphics
264 ///
265 void print_model(const data_model& dm)
266 {
267  const bm::bvector<>* bv; // layout
268  const rsc_vector_u8* strand_plane;
269 
270  // Sequence on top is for purely decorative purposes
271  cout <<
272  "-------------------------------------------------------------------------"
273  << endl <<
274  "ATGTTAGCCCGCGCATATTATATATGTAGCGTATTAAGCGDGGAGATTACCCTTGCATTAGGTTANNNNNNNN"
275  << endl <<
276  "-------------------------------------------------------------------------"
277  << endl;
278 
279  for (size_t i = 0; i < dm.layout_v.size(); ++i)
280  {
281  bv = dm.layout_v[i].get();
282  if (bv)
283  {
284  strand_plane = i < dm.strand_v.size() ? dm.strand_v[i].get() : nullptr;
285  interval_enumerator_type ien(*bv);
286  if (ien.valid())
287  {
288  bm::bvector<>::size_type spaces = 0;
289  do
290  {
291  auto st = ien.start(); auto end = ien.end();
292  char ch_strand = '?';
293  if (strand_plane)
294  {
295  auto strand = strand_plane->get(st);
296  switch (strand)
297  {
298  case 0: ch_strand = '>'; break; // positive
299  case 1: ch_strand = '<'; break; // negative
300  default: break; // unknown strand
301  }
302  }
303  for (; spaces < st; ++spaces)
304  cout << " ";
305  for (bool first = true; st <= end; ++st, first = false)
306  {
307  if (st == end)
308  cout << ch_strand;
309  else
310  cout << (first ? ch_strand : '.');
311  } // for
312  spaces = end+1;
313  } while (ien.advance());
314  cout << endl;
315  }
316  }
317  } // for
318 }
319 
320 enum Strand { positive=0, negative=1, unknown=2 };
321 
322 
323 
324 int main(void)
325 {
326  try
327  {
328  data_model dm;
329 
330  // build the data model using succinct vectors
331  //
332  add_object(dm, 0, 0, negative);
333  add_object(dm, 5, 10, positive);
334  add_object(dm, 4, 70, negative);
335  add_object(dm, 15, 20, negative);
336  add_object(dm, 20, 30, positive);
337  add_object(dm, 16, 21, unknown);
338 
339  dm.optimize(); // run compression and build access index
340 
341  // View the model using toy ASCII art renderer
342  //
343  print_model(dm);
344 
345  // create a model splice for [5..10] range
346  // plus drop strand property (renderer will assume unknown)
347  //
348  {
349  data_model dm_splice;
350 
351  splice_model(dm_splice, dm, 5, 10, false);
352  dm_splice.optimize();
353 
354  cout << endl;
355  print_model(dm_splice);
356  }
357 
358  // create a model splice for [5..10] range
359  // now WITH strand property
360  //
361  {
362  data_model dm_splice;
363 
364  splice_model(dm_splice, dm, 5, 10, true);
365  dm_splice.optimize();
366 
367  cout << endl;
368  print_model(dm_splice);
369  }
370  }
371  catch(std::exception& ex)
372  {
373  std::cerr << ex.what() << std::endl;
374  return 1;
375  }
376 
377  return 0;
378 }
379 
bm::bvector::set_range
bvector< Alloc > & set_range(size_type left, size_type right, bool value=true)
Sets all bits in the specified closed interval [left,right] Interval must be inside the bvector's siz...
Definition: bm.h:2184
bm::rsc_sparse_vector::set
void set(size_type idx, value_type v)
set specified element with bounds checking and automatic resize
Definition: bmsparsevec_compr.h:1001
data_model::layout_v
layout_vector_type layout_v
layout vector
Definition: xsample08.cpp:85
bm::sparse_vector
sparse vector with runtime compression using bit transposition method
Definition: bmsparsevec.h:81
data_model::optimize
void optimize()
Optimize memory layoput, build index for faster read access.
Definition: xsample08.cpp:89
BM_DECLARE_TEMP_BLOCK
#define BM_DECLARE_TEMP_BLOCK(x)
Definition: bm.h:47
data_model::add_strand
void add_strand(size_t plane, rsc_vector_u8 *strand)
Definition: xsample08.cpp:124
splice_model
void splice_model(data_model &dm_target, const data_model &dm, bm::bvector<>::size_type start, bm::bvector<>::size_type end, bool copy_strands)
Data model splicer.
Definition: xsample08.cpp:208
bm::find_interval_start
bool find_interval_start(const BV &bv, typename BV::size_type from, typename BV::size_type &pos) BMNOEXCEPT
Reverse find index of first 1 bit gap (01110) starting from position Reverse scan for the first 1 in ...
Definition: bmintervals.h:315
interval_enumerator_type
bm::interval_enumerator< bm::bvector<> > interval_enumerator_type
Definition: xsample08.cpp:63
rsc_vector_u8
bm::rsc_sparse_vector< unsigned char, sparse_vector_u8 > rsc_vector_u8
Definition: xsample08.cpp:67
Strand
Strand
Definition: xsample08.cpp:320
bm::interval_enumerator::end
size_type end() const BMNOEXCEPT
Return interval end/right as bit-vector coordinate 011110 [left..right].
Definition: bmintervals.h:560
data_model
Data frame object, sued to buid succinct data model.
Definition: xsample08.cpp:76
bmsparsevec_compr.h
Compressed sparse container rsc_sparse_vector<> for integer types.
bm::bvector<>
starnds_vector_type
std::vector< std::unique_ptr< rsc_vector_u8 > > starnds_vector_type
Definition: xsample08.cpp:68
bm::interval_enumerator::start
size_type start() const BMNOEXCEPT
Return interval start/left as bit-vector coordinate 011110 [left..right].
Definition: bmintervals.h:551
bm::interval_enumerator::advance
bool advance()
Definition: bmintervals.h:717
data_model::strand_v
starnds_vector_type strand_v
strand planes vector
Definition: xsample08.cpp:86
negative
@ negative
Definition: xsample08.cpp:320
bm::rsc_sparse_vector::is_null
bool is_null(size_type idx) const BMNOEXCEPT
test if specified element is NULL
Definition: bmsparsevec_compr.h:1229
add_object
void add_object(data_model &dm, unsigned start, unsigned end, unsigned char strand)
Register new object in the data model: [start..end] + strand.
Definition: xsample08.cpp:166
layout_vector_type
std::vector< std::unique_ptr< bm::bvector<> > > layout_vector_type
Definition: xsample08.cpp:64
set_feature_strand
void set_feature_strand(data_model &dm, size_t plane, bm::bvector<>::size_type pos, unsigned char strand)
Definition: xsample08.cpp:142
bm::BM_GAP
@ BM_GAP
GAP compression is ON.
Definition: bmconst.h:144
bm::bvector::any_range
bool any_range(size_type left, size_type right) const BMNOEXCEPT
Returns true if any bits in the range are 1s (non-empty interval) Function uses closed interval [left...
Definition: bm.h:2891
data_model::add_layout
void add_layout(size_t plane, bm::bvector<> *bv)
Definition: xsample08.cpp:109
bmintervals.h
Algorithms for bit ranges and intervals.
sparse_vector_u8
bm::sparse_vector< unsigned char, bm::bvector<> > sparse_vector_u8
Definition: xsample08.cpp:66
bm::rsc_sparse_vector
Rank-Select compressed sparse vector.
Definition: bmsparsevec_compr.h:58
main
int main(void)
Definition: xsample08.cpp:324
print_model
void print_model(const data_model &dm)
This is ASCII art "renderer" for the data model.
Definition: xsample08.cpp:265
bm::interval_enumerator::valid
bool valid() const BMNOEXCEPT
Returns true if enumerator is valid (false if traversal is done)
Definition: bmintervals.h:568
bm::interval_enumerator
forward iterator class to traverse bit-vector as ranges
Definition: bmintervals.h:52
positive
@ positive
Definition: xsample08.cpp:320
unknown
@ unknown
Definition: xsample08.cpp:320
bm::bvector::size_type
bm::id_t size_type
Definition: bm.h:117
bm.h
Compressed bit-vector bvector<> container, set algebraic methods, traversal iterators.
bm::find_interval_end
bool find_interval_end(const BV &bv, typename BV::size_type from, typename BV::size_type &pos) BMNOEXCEPT
Reverse find index of first 1 bit gap (01110) starting from position Reverse scan for the first 1 in ...
Definition: bmintervals.h:438
bm::rsc_sparse_vector::get
value_type get(size_type idx) const BMNOEXCEPT
get specified element without bounds checking
Definition: bmsparsevec_compr.h:1216