forked from Nik4053/Templated-Cpp-Tensor
-
Notifications
You must be signed in to change notification settings - Fork 0
/
matmulbench.cpp
89 lines (77 loc) · 2.2 KB
/
matmulbench.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#include <benchmark/benchmark.h>
#include "tensorlib/cpu/tensorhelper.hpp"
#define DIM 127
float lhs[Tensor<float,DIM,DIM>::SIZE];
float rhs[Tensor<float,DIM,DIM>::SIZE];
float result[Tensor<float,DIM,DIM>::SIZE];
static void BM_Reference(benchmark::State& state) {
for (auto _ : state){
for(size_t i=0; i < DIM; i++){
for(size_t j=0; j < DIM; j++){
float tmp = 0;
for(size_t k=0; k < DIM; k++){
tmp += lhs[i*DIM+k] * rhs[k*DIM+j];
}
result[i*DIM+j] = tmp;
}
}
}
}
// Register the function as a benchmark
BENCHMARK(BM_Reference);
static void BM_Reference_line(benchmark::State& state) {
for (auto _ : state){
for(size_t i=0; i < DIM; i++){
for(size_t k=0; k < DIM; k++){
float tmp = lhs[i*DIM+k];
for(size_t j=0; j < DIM; j++){
result[i*DIM+j] += tmp * rhs[k*DIM+j];
}
}
}
}
}
// Register the function as a benchmark
BENCHMARK(BM_Reference_line);
static void BM_Tensor(benchmark::State& state) {
Tensor<float,DIM,DIM> lhs_t(lhs);
Tensor<float,DIM,DIM> rhs_t(rhs);
Tensor<float,DIM,DIM> result_t(result);
for (auto _ : state){
TensorHelper::Matmul(result_t,lhs_t,rhs_t);
}
}
BENCHMARK(BM_Tensor);
static void BM_Tensor_line(benchmark::State& state) {
Tensor<float,DIM,DIM> lhs_t(lhs);
Tensor<float,DIM,DIM> rhs_t(rhs);
Tensor<float,DIM,DIM> result_t(result);
for (auto _ : state){
for(size_t i=0; i < DIM; i++){
for(size_t k=0; k < DIM; k++){
float tmp = lhs_t(i,k);
for(size_t j=0; j < DIM; j++){
result_t(i,j) += tmp * rhs_t(k,i);
}
}
}
}
}
BENCHMARK(BM_Tensor_line);
static void BM_Tensor_lineV2(benchmark::State& state) {
Tensor<float,DIM,DIM> lhs_t(lhs);
Tensor<float,DIM,DIM> rhs_t(rhs);
Tensor<float,DIM,DIM> result_t(result);
for (auto _ : state){
for(size_t i=0; i < DIM; i++){
for(size_t k=0; k < DIM; k++){
float tmp = lhs_t[i][k];
for(size_t j=0; j < DIM; j++){
result_t[i][j] += tmp * rhs_t[k][j];
}
}
}
}
}
BENCHMARK(BM_Tensor_lineV2);
BENCHMARK_MAIN();