#include <iostream>
#include <sstream>
#include <regex>
#include <time.h>
#include <stdio.h>
#include <stdexcept>
#include <memory>
#include <vector>
#include <future>
#include <thread>
#include <mutex>
#include "bmdbg.h"
using namespace std;
static
{
std::cerr
<< "BitMagic DNA Index Build Sample (c) 2018" << std::endl
<< "-fa file-name -- input FASTA file" << std::endl
<< "-j number -- number of parallel jobs to run" << std::endl
<< "-timing -- collect timings" << std::endl
;
}
static
{
for (int i = 1; i < argc; ++i)
{
std::string arg = argv[i];
if ((arg == "-h") || (arg == "--help"))
{
return 0;
}
if (arg == "-fa" || arg == "--fa")
{
if (i + 1 < argc)
{
}
else
{
std::cerr << "Error: -fa requires file name" << std::endl;
return 1;
}
continue;
}
if (arg == "-j" || arg == "--j")
{
if (i + 1 < argc)
{
}
else
{
std::cerr << "Error: -j requires number of jobs" << std::endl;
return 1;
}
continue;
}
if (arg == "-timing" || arg == "--timing" || arg == "-t" || arg == "--t")
}
return 0;
}
static
int load_FASTA(
const std::string& fname, std::vector<char>& seq_vect)
{
seq_vect.resize(0);
std::ifstream fin(fname.c_str(), std::ios::in);
if (!fin.good())
return -1;
std::string line;
for (unsigned i = 0; std::getline(fin, line); ++i)
{
if (line.empty() ||
line.front() == '>')
continue;
for (std::string::iterator it = line.begin(); it != line.end(); ++it)
seq_vect.push_back(*it);
}
return 0;
}
{
public:
void Build(
const vector<char>& sequence)
{
for (size_t i = 0; i < sequence.size(); ++i)
{
unsigned pos = unsigned(i);
switch (sequence[i])
{
case 'A':
iA = pos;
break;
case 'C':
iC = pos;
break;
case 'G':
iG = pos;
break;
case 'T':
iT = pos;
break;
case 'N':
iN = pos;
break;
default:
break;
}
}
}
{
for (size_t i = 0; i < sequence.size(); ++i)
{
unsigned pos = unsigned(i);
switch (sequence[i])
{
case 'A':
iA = pos;
break;
case 'C':
iC = pos;
break;
case 'G':
iG = pos;
break;
case 'T':
iT = pos;
break;
case 'N':
iN = pos;
break;
default:
break;
}
}
}
void BuildParallel(
const vector<char>& sequence,
unsigned threads)
{
struct Func
{
const std::vector<char>* src_sequence;
: target_idx(idx), src_sequence(&src) {}
void operator() (size_t from, size_t to)
{
const vector<char>& sequence = *src_sequence;
{
for (size_t i = from; i < sequence.size() && (i < to); ++i)
{
unsigned pos = unsigned(i);
switch (sequence[i])
{
case 'A':
iA = pos;
break;
case 'C':
iC = pos;
break;
case 'G':
iG = pos;
break;
case 'T':
iT = pos;
break;
case 'N':
iN = pos;
break;
default:
break;
}
}
iA.flush();
iC.flush();
iT.flush();
iG.flush();
iN.flush();
}
}
};
if (threads <= 1)
{
return;
}
std::vector<std::future<void> > futures;
futures.reserve(8);
unsigned range = unsigned(sequence.size() / threads);
for (unsigned k = 0; k < sequence.size(); k += range)
{
futures.emplace_back(std::async(std::launch::async,
Func(this, sequence), k, k + range));
}
for (auto& e : futures)
{
e.wait();
}
}
{
static std::mutex mtx_A;
static std::mutex mtx_T;
static std::mutex mtx_G;
static std::mutex mtx_C;
static std::mutex mtx_N;
switch (letter)
{
case 'A':
{
std::lock_guard<std::mutex> guard(mtx_A);
}
break;
case 'C':
{
std::lock_guard<std::mutex> guard(mtx_C);
}
break;
case 'G':
{
std::lock_guard<std::mutex> guard(mtx_G);
}
break;
case 'T':
{
std::lock_guard<std::mutex> guard(mtx_T);
}
break;
case 'N':
{
std::lock_guard<std::mutex> guard(mtx_N);
}
break;
default:
break;
}
}
{
switch (letter)
{
case 'A':
case 'C':
case 'G':
case 'T':
case 'N':
default:
break;
}
throw runtime_error("Error. Invalid letter!");
}
private:
};
static
{
std::vector<char> letters {'A', 'T', 'G', 'C'};
for (char base : letters)
{
if (cmp != 0)
{
throw runtime_error(string("Fingerprint mismatch for:") + string(1, base));
}
}
}
int main(
int argc,
char *argv[])
{
if (argc < 3)
{
return 1;
}
std::vector<char> seq_vect;
try
{
if (ret != 0)
return ret;
{
if (res != 0)
return res;
std::cout << "FASTA sequence size=" << seq_vect.size() << std::endl;
{
}
{
}
}
{
std::cout << std::endl << "Performance:" << std::endl;
}
}
catch (std::exception& ex)
{
std::cerr << "Error:" << ex.what() << std::endl;
return 1;
}
return 0;
}
Compressed bit-vector bvector<> container, set algebraic methods, traversal iterators.
Timing utilities for benchmarking (internal)
pre-processor un-defines to avoid global space pollution (internal)
Utility for keeping all DNA finger print vectors and search using various techniques.
void BuildParallel(const vector< char > &sequence, unsigned threads)
Build fingerprint bit-vectors using bulk insert iterator and parallel processing.
void Build(const vector< char > &sequence)
Build fingerprint bit-vectors from the original sequence.
void BuildBulk(const vector< char > &sequence)
Build index using bulk insert iterator.
void MergeVector(char letter, bm::bvector<> &bv)
Thread sync bit-vector merge.
const bm::bvector & GetVector(char letter) const
Return fingerprint bit-vector.
Output iterator iterator designed to set "ON" bits based on input sequence of integers.
Output iterator iterator designed to set "ON" bits based on input sequence of integers (bit indeces).
Bitvector Bit-vector container with runtime compression of bits.
void merge(bm::bvector< Alloc > &bvect)
Merge/move content from another vector.
insert_iterator inserter()
int compare(const bvector< Alloc > &bvect) const BMNOEXCEPT
Lexicographical comparison with a bitvector.
Utility class to collect performance measurements and statistics.
std::map< std::string, statistics > duration_map_type
test name to duration map
static void print_duration_map(TOut &tout, const duration_map_type &dmap, format fmt=ct_time)
@ BM_SORTED
input set is sorted (ascending order)
int main(int argc, char *argv[])
static int parse_args(int argc, char *argv[])
static void fingerprint_compare(const DNA_FingerprintScanner &idx1, const DNA_FingerprintScanner &idx2)
Check correctness of indexes constructed using different methods.
bm::chrono_taker ::duration_map_type timing_map
static int load_FASTA(const std::string &fname, std::vector< char > &seq_vect)