-
Notifications
You must be signed in to change notification settings - Fork 0
/
05b_branchless.cpp
51 lines (44 loc) · 1.33 KB
/
05b_branchless.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#include <stdlib.h>
#include <string.h>
#include <iostream>
#include <vector>
#include <memory>
#include "benchmark/benchmark.h"
class RandData {
public:
RandData(size_t N) : v1(N), v2(N), c1(new bool[N]), c2(new bool[N])
{
for (size_t i = 0; i < N; ++i) {
v1[i] = rand();
v2[i] = rand();
c1[i] = rand() & 0x1;
c2[i] = !c1[i];
}
}
const unsigned long* data1() const { return v1.data(); }
const unsigned long* data2() const { return v2.data(); }
const bool* cond1() const { return &c1[0]; }
const bool* cond2() const { return &c2[0]; }
private:
std::vector<unsigned long> v1, v2;
std::unique_ptr<bool[]> c1, c2;
};
void __attribute__ ((noinline)) f(bool b, unsigned long x, unsigned long& s) { s += b*x; }
void BM_branchless(benchmark::State& state) {
srand(1);
const unsigned int N = state.range(0);
RandData d(N);
const unsigned long* const p1 = d.data1();
const bool* const b1 = d.cond1();
for (auto _ : state) {
unsigned long a1 = 0;
for (size_t i = 0; i < N; ++i) {
f(b1[i], p1[i], a1);
}
benchmark::DoNotOptimize(a1);
benchmark::ClobberMemory();
}
state.SetItemsProcessed(N*state.iterations());
}
BENCHMARK(BM_branchless)->Arg(1<<22);
BENCHMARK_MAIN();