BitMagic-C++
xsample02.cpp
Go to the documentation of this file.
1/*
2Copyright(c) 2018 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
3
4Licensed under the Apache License, Version 2.0 (the "License");
5you may not use this file except in compliance with the License.
6You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10Unless required by applicable law or agreed to in writing, software
11distributed under the License is distributed on an "AS IS" BASIS,
12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13See the License for the specific language governing permissions and
14limitations under the License.
15
16For more information please visit: http://bitmagic.io
17*/
18
19/** \example xsample02.cpp
20 Counting sort using bit-vector and sparse vector to build histogram of unsigned ints.
21 Benchmark compares different histogram buiding techniques using BitMagic and std::sort()
22
23 Histogram construction, based on integer events is a common problem,
24 this demo studies different approaches, potential for parallelization and other
25 aspects.
26*/
27
28/*! \file xsample02.cpp
29 \brief Example: sparse_vector<> used for counting sort / historgam construction
30*/
31
32
33#include <iostream>
34#include <memory>
35#include <map>
36#include <vector>
37#include <chrono>
38#include <algorithm>
39#include <random>
40#include <stdexcept>
41
42#include <future>
43#include <thread>
44
45using namespace std;
46
47#include "bm.h"
48#include "bmtimer.h"
49#include "bmsparsevec.h"
50#include "bmundef.h" /* clear the pre-proc defines from BM */
51
52// ----------------------------------------------------
53// Global parameters and types
54// ----------------------------------------------------
55
56const unsigned value_max = 1250000; // range of variants of events [0..max]
57const unsigned test_size = 250000000; // number of events (ints) to generate
58
59// -------------------------------------------
60// Random generator
61// -------------------------------------------
62
63std::random_device rand_dev;
64std::mt19937 gen(rand_dev());
65std::uniform_int_distribution<> rand_dis(1, value_max); // generate uniform numebrs for [1, vector_max]
66
67
69typedef std::map<unsigned, unsigned> map_u32;
70
71
72// timing storage for benchmarking
74
75
76// -------------------------------------------
77// Counting sort / histogram construction (std::map)
78// -------------------------------------------
79
80static
81void sort_map(map_u32& hmap, const std::vector<unsigned>& vin)
82{
83 for (auto v : vin)
84 {
85 hmap[v]++;
86 }
87}
88
89
90// -------------------------------------------
91// Counting sort / histogram construction (naive)
92// -------------------------------------------
93
94// This sorting method uses sparse_vector<> as a storage but implements increment
95// as an get-inc-put operations (decoding-encoding every value in the sum)
96//
97static
98void counting_sort_naive(sparse_vector_u32& sv_out, const std::vector<unsigned>& vin)
99{
100 for (auto v : vin)
101 {
102 auto count = sv_out.get(v);
103 sv_out.set(v, count + 1);
104 }
105}
106
107// -------------------------------------------
108// Counting sort / histogram construction
109// -------------------------------------------
110
111// This sorting method uses sparse_vector<> as a storage but implements increment
112// but increment was implemented as a bm::sparse_vector::inc() method
113// which in turn is based on uses bvector<>::inc()
114//
115// This approach is faster than decoding-encoding used in naive counting sort
116//
117static
118void counting_sort(sparse_vector_u32& sv_out, const std::vector<unsigned>& vin)
119{
120 for(auto v : vin)
121 sv_out.inc(v);
122}
123
124// --------------------------------------------------
125// Counting sort / histogram construction (parallel)
126// --------------------------------------------------
127
128// parallel subproblem for all even numbers: (v & 1) == 0
129inline
130unsigned counting_sort_subbatch(sparse_vector_u32* sv_out, const std::vector<unsigned>* vin)
131{
132 for (size_t i = 0; i < vin->size(); i++)
133 {
134 auto v = (*vin)[i];
135 if ((v & 1) == 0)
136 sv_out->inc(v);
137 }
138 return 0;
139}
140
141// Parallel histogram construction uses a very simple divide and conquer technique
142// splitting by even/odd numbers, uses std::async() for parallelization
143//
144// (should be possible to do a lot better than that)
145//
146static
147void counting_sort_parallel(sparse_vector_u32& sv_out, const std::vector<unsigned>& vin)
148{
150 // process evens in parallel
151 std::future<unsigned> f1 = std::async(std::launch::async, counting_sort_subbatch, &sv_out2, &vin);
152
153 // process all odd elements
154 for (size_t i = 0; i < vin.size(); i++)
155 {
156 auto v = vin[i];
157 if (v & 1)
158 sv_out.inc(v);
159 }
160 f1.wait();
161
162 // merge effectively performs logical OR on all plains, only it
163 // borrows memory blocks from the argument vector, so it gets changed
164 // (which is ok here, since sv_out2 is a temporary)
165 //
166 sv_out.merge(sv_out2);
167}
168
169// Test utility. It also illustrates histogram access method
170//
171static
173{
176
177 for (; en.valid(); ++en)
178 {
179 unsigned v = *en;
180 unsigned cnt = sv.get(v);
181 for (unsigned j = 0; j < cnt; ++j)
182 {
183 std::cout << v << ", ";
184 } // for
185 } // for en
186 std::cout << std::endl;
187}
188
189// Test utility for std::map
190//
191static
192void print_sorted(const map_u32& hmap)
193{
194 map_u32::const_iterator it = hmap.begin();
195 map_u32::const_iterator it_end = hmap.end();
196
197 for (; it != it_end; ++it)
198 {
199 unsigned v = it->first;
200 unsigned cnt = it->second;
201 for (unsigned j = 0; j < cnt; ++j)
202 {
203 std::cout << v << ", ";
204 } // for
205 } // for en
206 std::cout << std::endl;
207}
208
209
210// build histogram using sorted vector
211//
212static
213void build_histogram(sparse_vector_u32& sv_out, const std::vector<unsigned>& vin)
214{
215 if (vin.empty())
216 return;
217 unsigned start = vin[0];
218 unsigned count = 0; // histogram counter
219 for (auto v : vin)
220 {
221 if (v == start)
222 {
223 ++count;
224 }
225 else
226 {
227 sv_out.set(start, count);
228 start = v; count = 1;
229 }
230 }
231 if (count)
232 {
233 sv_out.set(start, count);
234 }
235}
236
237
238
239
240int main(void)
241{
242 try
243 {
244 // try simple input vector as a model
245 //
246 {
247 std::vector<unsigned> v {10, 1, 5, 4, 8, 8, 8} ;
248 sparse_vector_u32 r_sv(bm::use_null); // result vector
249
250 counting_sort(r_sv, v);
251
252 print_sorted(r_sv); // 1, 4, 5, 8, 8, 8, 10,
253
255 counting_sort_parallel(p_sv, v);
256 print_sorted(r_sv);
257
258 map_u32 h_map;
259 sort_map(h_map, v);
260 print_sorted(h_map);
261
262
263 std::sort(v.begin(), v.end());
264 sparse_vector_u32 h_sv(bm::use_null); // histogram vector
265 build_histogram(h_sv, v);
266 if (!r_sv.equal(h_sv))
267 {
268 std::cerr << "Error: Histogram comparison failed!" << std::endl;
269 print_sorted(h_sv);
270 return 1;
271 }
272
273 }
274
275 // run benchmarks
276 //
277 std::vector<unsigned> v;
278
279 // generate vector of random numbers
280 for (unsigned i = 0; i < test_size; ++i)
281 {
282 v.push_back(unsigned(rand_dis(gen)));
283 }
284 std::cout << "test vector generation ok" << std::endl;
285
290 map_u32 h_map;
291
292 {
293 bm::chrono_taker tt1(cout, "1. counting sort ", 1, &timing_map);
294 counting_sort(r_sv, v);
295 }
296
297 {
298 bm::chrono_taker tt1(cout, "3. counting sort (naive) ", 1, &timing_map);
299 counting_sort_naive(n_sv, v);
300 }
301
302 {
303 bm::chrono_taker tt1(cout, "4. counting sort (parallel) ", 1, &timing_map);
304 counting_sort_parallel(p_sv, v);
305 }
306
307 {
308 bm::chrono_taker tt1(cout, "5. counting sort (map) ", 1, &timing_map);
309 sort_map(h_map, v);
310 }
311
312 {
313 bm::chrono_taker tt1(cout, "2. std::sort() + histogram", 1, &timing_map);
314 std::sort(v.begin(), v.end());
315 build_histogram(h_sv, v);
316 }
317
318
319 // quality assurance checks
320 //
321 if (!r_sv.equal(h_sv) || !n_sv.equal(h_sv))
322 {
323 std::cerr << "Error: Histogram comparison failed!" << std::endl;
324 return 1;
325 }
326 if (!r_sv.equal(p_sv))
327 {
328 std::cerr << "Error: Histogram comparison failed for parallel sort!" << std::endl;
329 return 1;
330 }
331
332 // compute memory consumption of sparse vector
333 {
334 std::cout << std::endl;
335
337 sparse_vector_u32::statistics st;
339
340 std::cout << "Sparse vector memory usage:" << st.memory_used / (1024*1024)<< "MB" << std::endl;
341 std::cout << "vector<unsigned> usage:" << v.size() * sizeof(v[0]) / (1024 * 1024) << "MB" << std::endl << std::endl;
342 }
343
344
346
347 }
348 catch(std::exception& ex)
349 {
350 std::cerr << ex.what() << std::endl;
351 return 1;
352 }
353
354 return 0;
355}
356
Compressed bit-vector bvector<> container, set algebraic methods, traversal iterators.
#define BM_DECLARE_TEMP_BLOCK(x)
Definition: bm.h:47
Sparse constainer sparse_vector<> for integer types using bit-transposition transform.
Timing utilities for benchmarking (internal)
pre-processor un-defines to avoid global space pollution (internal)
const bvector_type * get_null_bvector() const BMNOEXCEPT
Get bit-vector of assigned values or NULL (if not constructed that way)
Definition: bmbmatrix.h:430
Bitvector Bit-vector container with runtime compression of bits.
Definition: bm.h:115
@ opt_compress
compress blocks when possible (GAP/prefix sum)
Definition: bm.h:137
friend class enumerator
Definition: bm.h:793
enumerator first() const
Returns enumerator pointing on the first non-zero bit.
Definition: bm.h:1849
Utility class to collect performance measurements and statistics.
Definition: bmtimer.h:41
std::map< std::string, statistics > duration_map_type
test name to duration map
Definition: bmtimer.h:66
static void print_duration_map(TOut &tout, const duration_map_type &dmap, format fmt=ct_time)
Definition: bmtimer.h:150
succinct sparse vector with runtime compression using bit-slicing / transposition method
Definition: bmsparsevec.h:87
void inc(size_type idx)
increment specified element by one
Definition: bmsparsevec.h:1998
value_type get(size_type idx) const BMNOEXCEPT
get specified element without bounds checking
Definition: bmsparsevec.h:1741
bool equal(const sparse_vector< Val, BV > &sv, bm::null_support null_able=bm::use_null) const BMNOEXCEPT
check if another sparse vector has the same content and size
Definition: bmsparsevec.h:2246
void set(size_type idx, value_type v)
set specified element with bounds checking and automatic resize
Definition: bmsparsevec.h:1804
sparse_vector< Val, BV > & merge(sparse_vector< Val, BV > &sv)
merge with another sparse vector using OR operation Merge is different from join(),...
Definition: bmsparsevec.h:2154
void optimize(bm::word_t *temp_block=0, typename bvector_type::optmode opt_mode=bvector_type::opt_compress, typename sparse_vector< Val, BV >::statistics *stat=0)
run memory optimization for all vector planes
Definition: bmsparsevec.h:2094
@ use_null
support "non-assigned" or "NULL" logic
Definition: bmconst.h:229
const unsigned test_size
Definition: xsample02.cpp:57
static void counting_sort_parallel(sparse_vector_u32 &sv_out, const std::vector< unsigned > &vin)
Definition: xsample02.cpp:147
std::map< unsigned, unsigned > map_u32
Definition: xsample02.cpp:69
static void counting_sort(sparse_vector_u32 &sv_out, const std::vector< unsigned > &vin)
Definition: xsample02.cpp:118
std::random_device rand_dev
Definition: xsample02.cpp:63
static void build_histogram(sparse_vector_u32 &sv_out, const std::vector< unsigned > &vin)
Definition: xsample02.cpp:213
bm::sparse_vector< unsigned, bm::bvector<> > sparse_vector_u32
Definition: xsample02.cpp:68
int main(void)
Definition: xsample02.cpp:240
static void counting_sort_naive(sparse_vector_u32 &sv_out, const std::vector< unsigned > &vin)
Definition: xsample02.cpp:98
static void sort_map(map_u32 &hmap, const std::vector< unsigned > &vin)
Definition: xsample02.cpp:81
unsigned counting_sort_subbatch(sparse_vector_u32 *sv_out, const std::vector< unsigned > *vin)
Definition: xsample02.cpp:130
bm::chrono_taker ::duration_map_type timing_map
Definition: xsample02.cpp:73
std::mt19937 gen(rand_dev())
static void print_sorted(const sparse_vector_u32 &sv)
Definition: xsample02.cpp:172
const unsigned value_max
Definition: xsample02.cpp:56
std::uniform_int_distribution rand_dis(1, value_max)