Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit b778064

Browse files
committedFeb 6, 2024
Adding commandline options to select only sparse or dense kernels
1 parent e04479e commit b778064

File tree

8 files changed

+151
-92
lines changed

8 files changed

+151
-92
lines changed
 

‎.idea/.gitignore

+8
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎.idea/codeStyles/codeStyleConfig.xml

+5
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎.idea/misc.xml

+18
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎.idea/vcs.xml

+6
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎DefaultCPU/spGemm.hh

+31-38
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,18 @@ namespace cpu {
2222
using spGemm<T>::n_;
2323

2424
/** Initialise the required data structures. */
25-
virtual void initialise(int n, double sparsity) override {
25+
virtual void initialise(int n, double sparsity, bool binary) override {
2626
n_ = n;
27+
isBinary_ = binary;
2728

2829
A_.assign(n * n, 0.0);
2930
B_.assign(n * n, 0.0);
30-
C_.assign(n * n, 0.0);
31+
C_.reserve(n * n);
3132

3233
// Random number generator objects for use in descent
3334
std::default_random_engine gen;
35+
gen.seed(std::chrono::system_clock::now()
36+
.time_since_epoch().count());
3437
std::uniform_real_distribution<double> dist(0.0, 1.0);
3538

3639
// Work out number of edges needed to achieve target sparsity
@@ -39,67 +42,55 @@ namespace cpu {
3942
// Initialise the matrices
4043
// Using a=0.45 and b=c=0.22 as default probabilities
4144
for (int i = 0; i < edges; i++) {
42-
while (!rMat(A_, n, 0, n-1, 0, n-1,
45+
while (!rMat(&A_, n, 0, n-1, 0, n-1,
4346
0.45, 0.22, 0.22,
44-
gen, dist)) {
45-
gen.seed(std::chrono::system_clock::now()
46-
.time_since_epoch().count());
47-
}
48-
while (!rMat(B_, n, 0, n-1, 0, n-1,
47+
&gen, dist)) {}
48+
while (!rMat(&B_, n, 0, n-1, 0, n-1,
4949
0.45, 0.22, 0.22,
50-
gen, dist)) {}
50+
&gen, dist)) {}
5151
}
52-
}
52+
}
5353

5454
private:
55-
bool rMat(std::vector<T>& M, int n, int x1, int x2, int y1, int y2,
56-
float a, float b, float c, std::default_random_engine gen,
55+
bool rMat(std::vector<T>* M, int n, int x1, int x2, int y1, int y2,
56+
float a, float b, float c, std::default_random_engine* gen,
5757
std::uniform_real_distribution<double> dist) {
5858
// If a 1x1 submatrix, then add an edge and return out
5959
if (x1 >= x2 && y1 >= y2) {
60-
if (abs(M[(y1 * n) + x1]) > 0.5) {
60+
if (abs(M->at((y1 * n) + x1)) > 0.1) {
6161
return false;
6262
} else {
63-
M[(int) (y1 * n) + x1] = 1.0;
63+
// Add 1.0 if this is a binary graph, and a random real number otherwise
64+
M->at((int) (y1 * n) + x1) = (isBinary_) ? 1.0 : (((rand() % 10000) / 100.0) - 50.0);
6465
return true;
6566
}
6667
} else {
6768
// Divide up the matrix
68-
int xMidPoint = x1 + (int)((x2 - x1) / 2);
69-
int yMidPoint = y1 + (int)((y2 - y1) / 2);
70-
71-
// ToDo - consider if need to check for non-square matrices
72-
73-
// Introduce some noise to the quarter probabilities
74-
float newA = a + (-0.01 + (dist(gen) * 0.02));
75-
float newB = b + (-0.01 + (dist(gen) * 0.02));
76-
float newC = c + (-0.01 + (dist(gen) * 0.02));
77-
// Make sure noise doesn't make impossible probabilities
78-
if ((newA + newB + newC) > 0.98 ||
79-
newA < 0.02 || newB < 0.02 || newC < 0.02) {
80-
newA = 0.45;
81-
newB = 0.22;
82-
newC = 0.22;
83-
}
69+
int xMidPoint = x1 + floor((x2 - x1) / 2);
70+
int yMidPoint = y1 + floor((y2 - y1) / 2);
8471

85-
// float newA = a;
86-
// float newB = b;
87-
// float newC = c;
72+
// ToDo -- add some noise to these values between iterations
73+
float newA = a;
74+
float newB = b;
75+
float newC = c;
8876

8977
// Work out which quarter to recurse into
90-
float randomNum = dist(gen);
78+
// There are some ugly ternary operators here to avoid going out of bounds in the edge case
79+
// that we are already at 1 width or 1 height
80+
float randomNum = dist(*gen);
9181
if (randomNum < a) {
9282
return rMat(M, n, x1, xMidPoint, y1, yMidPoint,
9383
newA, newB, newC, gen, dist);
9484
} else if (randomNum < (a + b)) {
95-
return rMat(M, n, xMidPoint + 1, x2, y1, yMidPoint,
85+
return rMat(M, n, ((xMidPoint < x2) ? xMidPoint + 1 : xMidPoint), x2, y1, yMidPoint,
9686
newA, newB, newC, gen, dist);
9787
} else if (randomNum < (a + b + c)) {
98-
return rMat(M, n, x1, xMidPoint, yMidPoint + 1, y2,
88+
return rMat(M, n, x1, xMidPoint, ((yMidPoint < y2) ? yMidPoint + 1 : yMidPoint), y2,
9989
newA, newB, newC, gen, dist);
10090
} else {
101-
return rMat(M, n, xMidPoint + 1, x2, yMidPoint + 1, y2,
102-
newA, newB, newC, gen, dist);
91+
return rMat(M, n, ((xMidPoint < x2) ? xMidPoint + 1 : xMidPoint), x2,
92+
((yMidPoint < y2) ? yMidPoint + 1 : yMidPoint), y2, newA, newB, newC,
93+
gen, dist);
10394
}
10495
}
10596
}
@@ -148,6 +139,8 @@ namespace cpu {
148139
/** Input matrix C. */
149140
std::vector<T> C_;
150141

142+
bool isBinary_;
143+
151144
};
152145

153146
#endif

‎include/CPU/spGemm.hh

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ namespace cpu {
1111
using kernel<T>::kernel;
1212

1313
/** Initialise the required data structures. */
14-
virtual void initialise(int n, double sparsity) = 0;
14+
virtual void initialise(int n, double sparsity, bool binary) = 0;
1515

1616
protected:
1717
/** Matrix size -- matrix will be nxn */

‎include/doGemm.hh

+65-51
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,11 @@
1818
template <typename T>
1919
class doGemm {
2020
public:
21-
doGemm(const int iters, const int upperLimit)
21+
doGemm(const int iters, const int upperLimit, bool dense, bool sparse)
2222
: iterations_(iters),
2323
upperLimit_(upperLimit),
24+
dense_(dense),
25+
sparse_(sparse),
2426
gemmCpu_(iterations_),
2527
spGemmCpu_(iterations_),
2628
gemmGpu_(iterations_) {
@@ -32,66 +34,74 @@ class doGemm {
3234
/** Run all problem types and write data to CSV files. */
3335
void collectData() {
3436
// Square Problem Sizes...
35-
std::ofstream csvFile = initCSVFile(std::string(CSV_DIR) + "/" +
36-
getKernelName() + "_square.csv");
37-
for (int dim = 1; dim <= upperLimit_; dim++) {
38-
const int M = dim, N = dim, K = dim;
37+
std::ofstream csvFile;
38+
if (dense_) {
39+
csvFile = initCSVFile(std::string(CSV_DIR) + "/" +
40+
getKernelName() + "_square.csv");
41+
for (int dim = 1; dim <= upperLimit_; dim++) {
42+
const int M = dim, N = dim, K = dim;
3943
callDenseKernels(csvFile, M, N, K);
40-
}
41-
// Close file
42-
csvFile.close();
43-
44-
// Rectangular Problem Sizes:
45-
// Tall and thin (16M x K)...
46-
csvFile = initCSVFile(std::string(CSV_DIR) + "/" + getKernelName() +
47-
"_rectangular_16MxK.csv");
48-
for (int dim = 16; dim <= upperLimit_; dim += 16) {
49-
const int M = dim, N = dim, K = (dim / 16);
44+
}
45+
// Close file
46+
csvFile.close();
47+
48+
// Rectangular Problem Sizes:
49+
// Tall and thin (16M x K)...
50+
csvFile = initCSVFile(std::string(CSV_DIR) + "/" + getKernelName() +
51+
"_rectangular_16MxK.csv");
52+
for (int dim = 16; dim <= upperLimit_; dim += 16) {
53+
const int M = dim, N = dim, K = (dim / 16);
5054
callDenseKernels(csvFile, M, N, K);
51-
}
52-
// Close file
53-
csvFile.close();
54-
55-
// Tall and thin (M x 32)...
56-
csvFile = initCSVFile(std::string(CSV_DIR) + "/" + getKernelName() +
57-
"_rectangular_Mx32.csv");
58-
if (upperLimit_ >= 32) {
59-
for (int dim = 1; dim <= upperLimit_; dim++) {
60-
const int M = dim, N = dim, K = 32;
55+
}
56+
// Close file
57+
csvFile.close();
58+
59+
// Tall and thin (M x 32)...
60+
csvFile = initCSVFile(std::string(CSV_DIR) + "/" + getKernelName() +
61+
"_rectangular_Mx32.csv");
62+
if (upperLimit_ >= 32) {
63+
for (int dim = 1; dim <= upperLimit_; dim++) {
64+
const int M = dim, N = dim, K = 32;
6165
callDenseKernels(csvFile, M, N, K);
66+
}
6267
}
63-
}
64-
// Close file
65-
csvFile.close();
66-
67-
// Short and wide (M x 16K)...
68-
csvFile = initCSVFile(std::string(CSV_DIR) + "/" + getKernelName() +
69-
"_rectangular_Mx16K.csv");
70-
for (int dim = 16; dim <= upperLimit_; dim += 16) {
71-
const int M = (dim / 16), N = (dim / 16), K = dim;
68+
// Close file
69+
csvFile.close();
70+
71+
// Short and wide (M x 16K)...
72+
csvFile = initCSVFile(std::string(CSV_DIR) + "/" + getKernelName() +
73+
"_rectangular_Mx16K.csv");
74+
for (int dim = 16; dim <= upperLimit_; dim += 16) {
75+
const int M = (dim / 16), N = (dim / 16), K = dim;
7276
callDenseKernels(csvFile, M, N, K);
77+
}
78+
// Close file
79+
csvFile.close();
80+
81+
// Short and wide (32 x K)...
82+
csvFile = initCSVFile(std::string(CSV_DIR) + "/" + getKernelName() +
83+
"_rectangular_32xK.csv");
84+
if (upperLimit_ >= 32) {
85+
for (int dim = 1; dim <= upperLimit_; dim++) {
86+
const int M = 32, N = 32, K = dim;
87+
callDenseKernels(csvFile, M, N, K);
88+
}
89+
}
90+
// Close file
91+
csvFile.close();
7392
}
74-
// Close file
75-
csvFile.close();
7693

77-
// Short and wide (32 x K)...
78-
csvFile = initCSVFile(std::string(CSV_DIR) + "/" + getKernelName() +
79-
"_rectangular_32xK.csv");
80-
if (upperLimit_ >= 32) {
94+
if (sparse_) {
95+
// Sparse graph matrix (N x N)
96+
csvFile = initCSVFile(std::string(CSV_DIR) + "/" + getKernelName() +
97+
"_sparse_graph.csv");
8198
for (int dim = 1; dim <= upperLimit_; dim++) {
82-
const int M = 32, N = 32, K = dim;
83-
callDenseKernels(csvFile, M, N, K);
99+
callSparseKernels(csvFile, dim, 0.99);
84100
}
101+
// Close file
102+
csvFile.close();
85103
}
86104

87-
// Sparse graph matrix (N x N)
88-
csvFile = initCSVFile(std::string(CSV_DIR) + "/" + getKernelName() +
89-
"_sparse_graph.csv");
90-
for (int dim = 1; dim <= upperLimit_; dim++) {
91-
callSparseKernels(csvFile, dim, 0.999);
92-
}
93-
// Close file
94-
csvFile.close();
95105
}
96106

97107
private:
@@ -127,7 +137,7 @@ class doGemm {
127137
std::string kernelName = getKernelName();
128138

129139
// Perform CPU
130-
spGemmCpu_.initialise(N, sparsity);
140+
spGemmCpu_.initialise(N, sparsity, false);
131141
double cpuTime = spGemmCpu_.compute();
132142
writeLineToCsv(csvFile, "cpu", kernelName, N, N, N, sparsity, probSize,
133143
iterations_, cpuTime,
@@ -179,6 +189,10 @@ class doGemm {
179189
/** The maximum value of the largest problem size dimension. */
180190
const int upperLimit_;
181191

192+
/** Boolean values to keep a track of whether dense/sparse kernels are being run */
193+
bool dense_;
194+
bool sparse_;
195+
182196
cpu::gemm_cpu<T> gemmCpu_;
183197
cpu::spGemm_cpu<T> spGemmCpu_;
184198
gpu::gemm_gpu<T> gemmGpu_;

‎src/main.cc

+17-2
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
int iters = 10;
44
int upperLimit = 128;
5+
bool sparse = true;
6+
bool dense = true;
57

68
int main(int argc, char** argv) {
79
getParameters(argc, argv);
@@ -20,20 +22,22 @@ int main(int argc, char** argv) {
2022

2123
// SGEMM Comparison
2224
std::cout << std::endl << "Comparing SGEMM Kernels:" << std::endl;
23-
doGemm<float> sgemm(iters, upperLimit);
25+
doGemm<float> sgemm(iters, upperLimit, dense, sparse);
2426
sgemm.collectData();
2527
std::cout << "Finished!" << std::endl;
2628

2729
// DGEMM Comparison
2830
std::cout << std::endl << "Comparing DGEMM Kernels:" << std::endl;
29-
doGemm<double> dgemm(iters, upperLimit);
31+
doGemm<double> dgemm(iters, upperLimit, dense, sparse);
3032
dgemm.collectData();
3133
std::cout << "Finished!" << std::endl;
3234
return 0;
3335
}
3436

3537
void printBenchmarkConfig(const int iters, const int upperLimit) {
3638
std::string gpuEnabledStr = (GPU_ENABLED) ? "True" : "False";
39+
std::string denseStr = dense ? "True" : "False";
40+
std::string sparseStr = sparse ? "True" : "False";
3741
unsigned int ompThreads =
3842
(getenv("OMP_NUM_THREADS") != NULL) ? atoi(getenv("OMP_NUM_THREADS")) : 1;
3943
const char* ompProcBind =
@@ -50,6 +54,9 @@ void printBenchmarkConfig(const int iters, const int upperLimit) {
5054
std::cout << "\tOMP_NUM_THREADS: " << ompThreads << std::endl;
5155
std::cout << "\tOMP_PROC_BIND: " << ompProcBind << std::endl;
5256
std::cout << "\tOMP_PLACES: " << ompPlaces << std::endl;
57+
std::cout << "\tOMP_PLACES: " << ompPlaces << std::endl;
58+
std::cout << "\tRunning dense kernels: " << denseStr << std::endl;
59+
std::cout << "\tRunning sparse kernels: " << sparseStr << std::endl;
5360
std::cout << std::endl;
5461
#ifdef CPU_DEFAULT
5562
std::cout
@@ -97,8 +104,16 @@ void getParameters(int argc, char* argv[]) {
97104
std::cout << " -d --dimension_limit D Max value of M, N, K is D "
98105
"(default: "
99106
<< upperLimit << ")" << std::endl;
107+
std::cout << " --dense Run only the dense matrix kernels "
108+
"(cannot be run in combination with --sparse)" << std::endl;
109+
std::cout << " -s --sparse Run only the sparse matrix kernels "
110+
"(cannot be run in combination with --dense)" << std::endl;
100111
std::cout << std::endl;
101112
exit(0);
113+
} else if (!strcmp(argv[i], "--sparse") || !strcmp(argv[i], "-s")) {
114+
dense = false;
115+
} else if (!strcmp(argv[i], "--dense")) {
116+
sparse = false;
102117
} else {
103118
std::cout << "Unrecognized argument '" << argv[i] << "' (try '--help')"
104119
<< std::endl;

0 commit comments

Comments
 (0)
Please sign in to comment.