BitMagic-C++
xsample08.cpp
Go to the documentation of this file.
1 /*
2 Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
3 
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7 
8  http://www.apache.org/licenses/LICENSE-2.0
9 
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 
16 For more information please visit: http://bitmagic.io
17 */
18 
19 /** \example xsample08.cpp
20 
21  Example on intervals and how to use it for layout calculation.
22  As a use case this example uses genomics visualization for
23  features mapped into genomic coordinates.
24 
25  It is also illustartes vector model using coordinate ranges
26  or feature vectors. Various properties of the initial model acn be
27  dropped (sliced) to improve memory efficiency, better storage
28  or network transfer.
29 
30  This example does NOT do serialization of models (which is possible)
31  for the clarity of the sample code.
32 
33  \sa bm::bvector::set_range
34  \sa bm::bvector::any_range
35  \sa bm::bvector::copy_range
36  \sa bm::interval_enumerator
37  \sa bm::rsc_sparse_vector
38  \sa bm::rsc_sparse_vector::copy_range
39  \sa bm::find_interval_start
40  \sa bm::find_interval_end
41 
42  \sa sample22.cpp
43  \sa sample23.cpp
44  \sa bvintervals
45 */
46 
47 /*! \file sample23.cpp
48  \brief Example: interval_enumerator<> - interator class for intervals
49 */
50 
51 #include <iostream>
52 #include <utility>
53 #include <vector>
54 #include <memory>
55 #include <cassert>
56 
57 #include "bm.h"
58 #include "bmintervals.h"
59 #include "bmsparsevec_compr.h"
60 #include "bmundef.h" /* clear the pre-proc defines from BM */
61 
62 using namespace std;
63 
65 typedef std::vector<std::unique_ptr<bm::bvector<> > > layout_vector_type;
66 
69 typedef std::vector<std::unique_ptr<rsc_vector_u8> > starnds_vector_type;
70 
71 
72 // -------------------------------------------------------------------
73 
74 /// Data frame object, sued to buid succinct data model
75 ///
76 ///
77 struct data_model
78 {
79  /// Optimize memory layoput, build index for faster read access
80  ///
81  void optimize();
82 
83  void add_layout(size_t plane, bm::bvector<>* bv);
84  void add_strand(size_t plane, rsc_vector_u8* strand);
85 
86  layout_vector_type layout_v; ///< layout vector
87  starnds_vector_type strand_v; ///< strand planes vector
88 };
89 
91 {
92  BM_DECLARE_TEMP_BLOCK(tb); // explicit temp for faster optimization
93  for (size_t i = 0; i < layout_v.size(); ++i)
94  {
95  auto bv = layout_v[i].get();
96  if (bv)
97  bv->optimize(tb); // memory optimization
98  } // for i
99  for (size_t i = 0; i < strand_v.size(); ++i)
100  {
101  auto strand_plane = strand_v[i].get();
102  if (strand_plane)
103  {
104  strand_plane->optimize(tb);
105  strand_plane->sync(); // build rank-select idx (faster read access)
106  }
107  } // for i
108 }
109 
110 void data_model::add_layout(size_t plane, bm::bvector<>* bv)
111 {
112  unique_ptr<bm::bvector<> > ap(bv);
113  if (layout_v.size() == plane) // push back requested
114  {
115  layout_v.emplace_back(move(ap));
116  }
117  else
118  {
119  while (layout_v.size() < plane) // this is crude resize() but it would do
120  layout_v.emplace_back(new bm::bvector<>(bm::BM_GAP));
121  layout_v[plane] = std::move(ap);
122  }
123 }
124 
125 void data_model::add_strand(size_t plane, rsc_vector_u8* strand)
126 {
127  unique_ptr<rsc_vector_u8 > ap(strand);
128  if (strand_v.size() == plane) // push back requested
129  {
130  strand_v.emplace_back(move(ap));
131  }
132  else
133  {
134  while (strand_v.size() < plane) // this is crude resize() but it would do
135  strand_v.emplace_back(new rsc_vector_u8());
136  strand_v[plane] = std::move(ap);
137  }
138 }
139 
140 
141 // -------------------------------------------------------------------
142 
143 void set_feature_strand(data_model& dm, size_t plane,
145  unsigned char strand)
146 {
147  if (!strand)
148  return;
149  while (dm.strand_v.size() <= plane) // add planes
150  {
151  std::unique_ptr<rsc_vector_u8> p2(new rsc_vector_u8());
152  dm.strand_v.emplace_back(move(p2));
153  }
154 
155  rsc_vector_u8* strand_plane = dm.strand_v[plane].get();
156  if (!strand_plane)
157  {
158  strand_plane = new rsc_vector_u8();
159  dm.strand_v[plane] = unique_ptr<rsc_vector_u8 >(strand_plane);
160  }
161  assert(strand_plane->is_null(pos));
162  strand_plane->set(pos, strand);
163 }
164 
165 /// Register new object in the data model: [start..end] + strand
166 ///
168  unsigned start, unsigned end,
169  unsigned char strand)
170 {
171  assert(start <= end);
172 
173  bm::bvector<>* bv; // layout plane vector
174 
175  for (size_t i = 0; i < dm.layout_v.size(); ++i)
176  {
177  bv = dm.layout_v[i].get();
178  if (!bv)
179  {
180  bv = new bm::bvector<>(bm::BM_GAP);
181  dm.layout_v[i] = unique_ptr<bm::bvector<> >(bv);
182  // bv just created (empty) no need to do range check
183  bv->set_range(start, end);
184  set_feature_strand(dm, i, start, strand);
185 
186  return;
187  }
188  if (!bv->any_range(start, end)) // check if layout space is not used
189  {
190  bv->set_range(start, end); // add [start..end] coordinates
191  // set strand at the start of feature
192  set_feature_strand(dm, i, start, strand);
193 
194  return;
195  }
196  } // for i
197 
198  // not found, make new plane
199  //
200  bv = new bm::bvector<>(bm::BM_GAP);
201  dm.layout_v.emplace_back(std::unique_ptr<bm::bvector<> >(bv));
202  bv->set_range(start, end);
203  set_feature_strand(dm, dm.layout_v.size()-1, start, strand);
204 }
205 
206 /// Data model splicer
207 ///
208 
209 void splice_model(data_model& dm_target, const data_model& dm,
212  bool copy_strands)
213 {
214  const bm::bvector<>* bv; // layout
215  const rsc_vector_u8* strand_plane;
216 
217  size_t t_plane = 0;
218  for (size_t i = 0; i < dm.layout_v.size(); ++i)
219  {
220  bv = dm.layout_v[i].get();
221  if (bv)
222  {
223  bm::bvector<>::size_type start_pos;
224  bm::bvector<>::size_type end_pos;
225 
226  bool found = bm::find_interval_start(*bv, start, start_pos);
227  if (!found)
228  start_pos = start;
229  found = bm::find_interval_end(*bv, end, end_pos);
230  if (!found)
231  end_pos = end;
232 
233  unique_ptr<bm::bvector<>> bv_ptr(new bm::bvector<>(bm::BM_GAP));
234  bv_ptr->copy_range(*bv, start_pos, end_pos);
235  if (bv_ptr->any()) // copy range may have ended as empty
236  {
237  dm_target.add_layout(t_plane, bv_ptr.release());
238 
239  // slice the strands plane (if requested)
240  //
241  if (copy_strands)
242  {
243  if (i < dm.strand_v.size())
244  {
245  strand_plane = dm.strand_v[i].get();
246  if (strand_plane)
247  {
248  unique_ptr<rsc_vector_u8> strand_ptr(new rsc_vector_u8());
249  strand_ptr->copy_range(*strand_plane, start_pos, end_pos);
250  dm_target.add_strand(t_plane, strand_ptr.release());
251  }
252  }
253  }
254  ++t_plane;
255  } // if any()
256 
257  } // if bv
258  } // for i
259 
260 }
261 
262 
263 /// This is ASCII art "renderer" for the data model.
264 /// illustrates how to manipulate succinct data model to create graphics
265 ///
266 void print_model(const data_model& dm)
267 {
268  const bm::bvector<>* bv; // layout
269  const rsc_vector_u8* strand_plane;
270 
271  // Sequence on top is for purely decorative purposes
272  cout <<
273  "-------------------------------------------------------------------------"
274  << endl <<
275  "ATGTTAGCCCGCGCATATTATATATGTAGCGTATTAAGCGDGGAGATTACCCTTGCATTAGGTTANNNNNNNN"
276  << endl <<
277  "-------------------------------------------------------------------------"
278  << endl;
279 
280  for (size_t i = 0; i < dm.layout_v.size(); ++i)
281  {
282  bv = dm.layout_v[i].get();
283  if (bv)
284  {
285  strand_plane = i < dm.strand_v.size() ? dm.strand_v[i].get() : nullptr;
286  interval_enumerator_type ien(*bv);
287  if (ien.valid())
288  {
289  bm::bvector<>::size_type spaces = 0;
290  do
291  {
292  auto st = ien.start(); auto end = ien.end();
293  char ch_strand = '?';
294  if (strand_plane)
295  {
296  auto strand = strand_plane->get(st);
297  switch (strand)
298  {
299  case 0: ch_strand = '>'; break; // positive
300  case 1: ch_strand = '<'; break; // negative
301  default: break; // unknown strand
302  }
303  }
304  for (; spaces < st; ++spaces)
305  cout << " ";
306  for (bool first = true; st <= end; ++st, first = false)
307  {
308  if (st == end)
309  cout << ch_strand;
310  else
311  cout << (first ? ch_strand : '.');
312  } // for
313  spaces = end+1;
314  } while (ien.advance());
315  cout << endl;
316  }
317  }
318  } // for
319 }
320 
321 enum Strand { positive=0, negative=1, unknown=2 };
322 
323 
324 
325 int main(void)
326 {
327  try
328  {
329  data_model dm;
330 
331  // build the data model using succinct vectors
332  //
333  add_object(dm, 0, 0, negative);
334  add_object(dm, 5, 10, positive);
335  add_object(dm, 4, 70, negative);
336  add_object(dm, 15, 20, negative);
337  add_object(dm, 20, 30, positive);
338  add_object(dm, 16, 21, unknown);
339 
340  dm.optimize(); // run compression and build access index
341 
342  // View the model using toy ASCII art renderer
343  //
344  print_model(dm);
345 
346  // create a model splice for [5..10] range
347  // plus drop strand property (renderer will assume unknown)
348  //
349  {
350  data_model dm_splice;
351 
352  splice_model(dm_splice, dm, 5, 10, false);
353  dm_splice.optimize();
354 
355  cout << endl;
356  print_model(dm_splice);
357  }
358 
359  // create a model splice for [5..10] range
360  // now WITH strand property
361  //
362  {
363  data_model dm_splice;
364 
365  splice_model(dm_splice, dm, 5, 10, true);
366  dm_splice.optimize();
367 
368  cout << endl;
369  print_model(dm_splice);
370  }
371  }
372  catch(std::exception& ex)
373  {
374  std::cerr << ex.what() << std::endl;
375  return 1;
376  }
377 
378  return 0;
379 }
380 
void add_strand(size_t plane, rsc_vector_u8 *strand)
Definition: xsample08.cpp:125
Compressed bit-vector bvector<> container, set algebraic methods, traversal iterators.
void add_layout(size_t plane, bm::bvector<> *bv)
Definition: xsample08.cpp:110
bool find_interval_end(const BV &bv, typename BV::size_type from, typename BV::size_type &pos) BMNOEXCEPT
Reverse find index of first 1 bit gap (01110) starting from position Reverse scan for the first 1 in ...
Definition: bmintervals.h:438
bm::sparse_vector< unsigned char, bm::bvector<> > sparse_vector_u8
Definition: xsample08.cpp:67
int main(void)
Definition: xsample08.cpp:325
void print_model(const data_model &dm)
This is ASCII art "renderer" for the data model.
Definition: xsample08.cpp:266
void set_feature_strand(data_model &dm, size_t plane, bm::bvector<>::size_type pos, unsigned char strand)
Definition: xsample08.cpp:143
bm::id_t size_type
Definition: bm.h:117
Rank-Select compressed sparse vector.
Strand
Definition: xsample08.cpp:321
value_type get(size_type idx) const BMNOEXCEPT
get specified element without bounds checking
bool any_range(size_type left, size_type right) const BMNOEXCEPT
Returns true if any bits in the range are 1s (non-empty interval) Function uses closed interval [left...
Definition: bm.h:2889
pre-processor un-defines to avoid global space pollution (internal)
bm::rsc_sparse_vector< unsigned char, sparse_vector_u8 > rsc_vector_u8
Definition: xsample08.cpp:68
GAP compression is ON.
Definition: bmconst.h:144
void splice_model(data_model &dm_target, const data_model &dm, bm::bvector<>::size_type start, bm::bvector<>::size_type end, bool copy_strands)
Data model splicer.
Definition: xsample08.cpp:209
void set(size_type idx, value_type v)
set specified element with bounds checking and automatic resize
size_type end() const BMNOEXCEPT
Return interval end/right as bit-vector coordinate 011110 [left..right].
Definition: bmintervals.h:560
#define BM_DECLARE_TEMP_BLOCK(x)
Definition: bm.h:47
starnds_vector_type strand_v
strand planes vector
Definition: xsample08.cpp:87
sparse vector with runtime compression using bit transposition method
Definition: bmsparsevec.h:81
Algorithms for bit ranges and intervals.
Data frame object, sued to buid succinct data model.
Definition: xsample08.cpp:77
bool is_null(size_type idx) const BMNOEXCEPT
test if specified element is NULL
std::vector< std::unique_ptr< bm::bvector<> > > layout_vector_type
Definition: xsample08.cpp:65
layout_vector_type layout_v
layout vector
Definition: xsample08.cpp:86
void optimize()
Optimize memory layoput, build index for faster read access.
Definition: xsample08.cpp:90
bvector< Alloc > & set_range(size_type left, size_type right, bool value=true)
Sets all bits in the specified closed interval [left,right] Interval must be inside the bvector&#39;s siz...
Definition: bm.h:2184
bm::interval_enumerator< bm::bvector<> > interval_enumerator_type
Definition: xsample08.cpp:64
forward iterator class to traverse bit-vector as ranges
Definition: bmintervals.h:52
bool valid() const BMNOEXCEPT
Returns true if enumerator is valid (false if traversal is done)
Definition: bmintervals.h:568
size_type start() const BMNOEXCEPT
Return interval start/left as bit-vector coordinate 011110 [left..right].
Definition: bmintervals.h:551
bool find_interval_start(const BV &bv, typename BV::size_type from, typename BV::size_type &pos) BMNOEXCEPT
Reverse find index of first 1 bit gap (01110) starting from position Reverse scan for the first 1 in ...
Definition: bmintervals.h:315
Compressed sparse container rsc_sparse_vector<> for integer types.
void add_object(data_model &dm, unsigned start, unsigned end, unsigned char strand)
Register new object in the data model: [start..end] + strand.
Definition: xsample08.cpp:167
std::vector< std::unique_ptr< rsc_vector_u8 > > starnds_vector_type
Definition: xsample08.cpp:69