UoB-HPC · Feb 6, 2024
diff --git a/‎.idea/.gitignore
+8 b/‎.idea/.gitignore
+8
diff --git a/‎.idea/codeStyles/codeStyleConfig.xml
+5 b/‎.idea/codeStyles/codeStyleConfig.xml
+5
diff --git a/‎.idea/misc.xml
+18 b/‎.idea/misc.xml
+18
diff --git a/‎.idea/vcs.xml
+6 b/‎.idea/vcs.xml
+6
diff --git a/‎DefaultCPU/spGemm.hh
+31-38 b/‎DefaultCPU/spGemm.hh
+31-38
diff --git a/‎include/CPU/spGemm.hh
+1-1 b/‎include/CPU/spGemm.hh
+1-1
diff --git a/‎include/doGemm.hh
+65-51 b/‎include/doGemm.hh
+65-51
diff --git a/‎src/main.cc
+17-2 b/‎src/main.cc
+17-2
@@ -22,15 +22,18 @@ namespace cpu {
         using spGemm<T>::n_;
 
         /** Initialise the required data structures. */
-        virtual void initialise(int n, double sparsity) override {
+        virtual void initialise(int n, double sparsity, bool binary) override {
             n_ = n;
+            isBinary_ = binary;
 
             A_.assign(n * n, 0.0);
             B_.assign(n * n, 0.0);
-            C_.assign(n * n, 0.0);
+            C_.reserve(n * n);
 
             // Random number generator objects for use in descent
             std::default_random_engine gen;
+            gen.seed(std::chrono::system_clock::now()
+                             .time_since_epoch().count());
             std::uniform_real_distribution<double> dist(0.0, 1.0);
 
             // Work out number of edges needed to achieve target sparsity
@@ -39,67 +42,55 @@ namespace cpu {
             // Initialise the matrices
             // Using a=0.45 and b=c=0.22 as default probabilities
             for (int i = 0; i < edges; i++) {
-                while (!rMat(A_, n, 0, n-1, 0, n-1,
+                while (!rMat(&A_, n, 0, n-1, 0, n-1,
                              0.45, 0.22, 0.22,
-                             gen, dist)) {
-                    gen.seed(std::chrono::system_clock::now()
-                                .time_since_epoch().count());
-                }
-                while (!rMat(B_, n, 0, n-1, 0, n-1,
+                             &gen, dist)) {}
+                while (!rMat(&B_, n, 0, n-1, 0, n-1,
                              0.45, 0.22, 0.22,
-                             gen, dist)) {}
+                             &gen, dist)) {}
             }
-      }
+        }
 
     private:
-        bool rMat(std::vector<T>& M, int n, int x1, int x2, int y1, int y2,
-                  float a, float b, float c, std::default_random_engine gen,
+        bool rMat(std::vector<T>* M, int n, int x1, int x2, int y1, int y2,
+                  float a, float b, float c, std::default_random_engine* gen,
                   std::uniform_real_distribution<double> dist) {
             // If a 1x1 submatrix, then add an edge and return out
             if (x1 >= x2 && y1 >= y2) {
-                if (abs(M[(y1 * n) + x1]) > 0.5) {
+                if (abs(M->at((y1 * n) + x1)) > 0.1) {
                     return false;
                 } else {
-                    M[(int) (y1 * n) + x1] = 1.0;
+                    // Add 1.0 if this is a binary graph, and a random real number otherwise
+                    M->at((int) (y1 * n) + x1) = (isBinary_) ? 1.0 : (((rand() % 10000) / 100.0) - 50.0);
                     return true;
                 }
             } else {
                 // Divide up the matrix
-                int xMidPoint = x1 + (int)((x2 - x1) / 2);
-                int yMidPoint = y1 + (int)((y2 - y1) / 2);
-
-                // ToDo - consider if need to check for non-square matrices
-
-                // Introduce some noise to the quarter probabilities
-                float newA = a + (-0.01 + (dist(gen) * 0.02));
-                float newB = b + (-0.01 + (dist(gen) * 0.02));
-                float newC = c + (-0.01 + (dist(gen) * 0.02));
-                // Make sure noise doesn't make impossible probabilities
-                if ((newA + newB + newC) > 0.98 ||
-                    newA < 0.02 || newB < 0.02 || newC < 0.02) {
-                    newA = 0.45;
-                    newB = 0.22;
-                    newC = 0.22;
-                }
+                int xMidPoint = x1 + floor((x2 - x1) / 2);
+                int yMidPoint = y1 + floor((y2 - y1) / 2);
 
-//                float newA = a;
-//                float newB = b;
-//                float newC = c;
+                // ToDo -- add some noise to these values between iterations
+                float newA = a;
+                float newB = b;
+                float newC = c;
 
                 // Work out which quarter to recurse into
-                float randomNum = dist(gen);
+                // There are some ugly ternary operators here to avoid going out of bounds in the edge case
+                // that we are already at 1 width or 1 height
+                float randomNum = dist(*gen);
                 if (randomNum < a) {
                     return rMat(M, n, x1, xMidPoint, y1, yMidPoint,
                                 newA, newB, newC, gen, dist);
                 } else if (randomNum < (a + b)) {
-                    return rMat(M, n, xMidPoint + 1, x2, y1, yMidPoint,
+                    return rMat(M, n, ((xMidPoint < x2) ? xMidPoint + 1 : xMidPoint), x2, y1, yMidPoint,
                                 newA, newB, newC, gen, dist);
                 } else if (randomNum < (a + b + c)) {
-                    return rMat(M, n, x1, xMidPoint, yMidPoint + 1, y2,
+                    return rMat(M, n, x1, xMidPoint, ((yMidPoint < y2) ? yMidPoint + 1 : yMidPoint), y2,
                                 newA, newB, newC, gen, dist);
                 } else {
-                    return rMat(M, n, xMidPoint + 1, x2, yMidPoint + 1, y2,
-                                newA, newB, newC, gen, dist);
+                    return rMat(M, n, ((xMidPoint < x2) ? xMidPoint + 1 : xMidPoint), x2,
+                                ((yMidPoint < y2) ? yMidPoint + 1 : yMidPoint), y2, newA, newB, newC,
+                                gen, dist);
                 }
             }
         }
@@ -148,6 +139,8 @@ namespace cpu {
         /** Input matrix C. */
         std::vector<T> C_;
 
+        bool isBinary_;
+
     };
 
 #endif
 
@@ -11,7 +11,7 @@ namespace cpu {
         using kernel<T>::kernel;
 
         /** Initialise the required data structures. */
-        virtual void initialise(int n, double sparsity) = 0;
+        virtual void initialise(int n, double sparsity, bool binary) = 0;
 
     protected:
         /** Matrix size -- matrix will be nxn */
 
@@ -18,9 +18,11 @@
 template <typename T>
 class doGemm {
  public:
-  doGemm(const int iters, const int upperLimit)
+  doGemm(const int iters, const int upperLimit, bool dense, bool sparse)
       : iterations_(iters),
         upperLimit_(upperLimit),
+        dense_(dense),
+        sparse_(sparse),
         gemmCpu_(iterations_),
         spGemmCpu_(iterations_),
         gemmGpu_(iterations_) {
@@ -32,66 +34,74 @@ class doGemm {
   /** Run all problem types and write data to CSV files. */
   void collectData() {
     // Square Problem Sizes...
-    std::ofstream csvFile = initCSVFile(std::string(CSV_DIR) + "/" +
-                                        getKernelName() + "_square.csv");
-    for (int dim = 1; dim <= upperLimit_; dim++) {
-      const int M = dim, N = dim, K = dim;
+    std::ofstream csvFile;
+    if (dense_) {
+      csvFile = initCSVFile(std::string(CSV_DIR) + "/" +
+                            getKernelName() + "_square.csv");
+      for (int dim = 1; dim <= upperLimit_; dim++) {
+        const int M = dim, N = dim, K = dim;
         callDenseKernels(csvFile, M, N, K);
-    }
-    // Close file
-    csvFile.close();
-
-    // Rectangular Problem Sizes:
-    // Tall and thin (16M x K)...
-    csvFile = initCSVFile(std::string(CSV_DIR) + "/" + getKernelName() +
-                          "_rectangular_16MxK.csv");
-    for (int dim = 16; dim <= upperLimit_; dim += 16) {
-      const int M = dim, N = dim, K = (dim / 16);
+      }
+      // Close file
+      csvFile.close();
+
+      // Rectangular Problem Sizes:
+      // Tall and thin (16M x K)...
+      csvFile = initCSVFile(std::string(CSV_DIR) + "/" + getKernelName() +
+                            "_rectangular_16MxK.csv");
+      for (int dim = 16; dim <= upperLimit_; dim += 16) {
+        const int M = dim, N = dim, K = (dim / 16);
         callDenseKernels(csvFile, M, N, K);
-    }
-    // Close file
-    csvFile.close();
-
-    // Tall and thin (M x 32)...
-    csvFile = initCSVFile(std::string(CSV_DIR) + "/" + getKernelName() +
-                          "_rectangular_Mx32.csv");
-    if (upperLimit_ >= 32) {
-      for (int dim = 1; dim <= upperLimit_; dim++) {
-        const int M = dim, N = dim, K = 32;
+      }
+      // Close file
+      csvFile.close();
+
+      // Tall and thin (M x 32)...
+      csvFile = initCSVFile(std::string(CSV_DIR) + "/" + getKernelName() +
+                            "_rectangular_Mx32.csv");
+      if (upperLimit_ >= 32) {
+        for (int dim = 1; dim <= upperLimit_; dim++) {
+          const int M = dim, N = dim, K = 32;
           callDenseKernels(csvFile, M, N, K);
+        }
       }
-    }
-    // Close file
-    csvFile.close();
-
-    // Short and wide (M x 16K)...
-    csvFile = initCSVFile(std::string(CSV_DIR) + "/" + getKernelName() +
-                          "_rectangular_Mx16K.csv");
-    for (int dim = 16; dim <= upperLimit_; dim += 16) {
-      const int M = (dim / 16), N = (dim / 16), K = dim;
+      // Close file
+      csvFile.close();
+
+      // Short and wide (M x 16K)...
+      csvFile = initCSVFile(std::string(CSV_DIR) + "/" + getKernelName() +
+                            "_rectangular_Mx16K.csv");
+      for (int dim = 16; dim <= upperLimit_; dim += 16) {
+        const int M = (dim / 16), N = (dim / 16), K = dim;
         callDenseKernels(csvFile, M, N, K);
+      }
+      // Close file
+      csvFile.close();
+
+      // Short and wide (32 x K)...
+      csvFile = initCSVFile(std::string(CSV_DIR) + "/" + getKernelName() +
+                            "_rectangular_32xK.csv");
+      if (upperLimit_ >= 32) {
+        for (int dim = 1; dim <= upperLimit_; dim++) {
+          const int M = 32, N = 32, K = dim;
+          callDenseKernels(csvFile, M, N, K);
+        }
+      }
+      // Close file
+      csvFile.close();
     }
-    // Close file
-    csvFile.close();
 
-    // Short and wide (32 x K)...
-    csvFile = initCSVFile(std::string(CSV_DIR) + "/" + getKernelName() +
-                          "_rectangular_32xK.csv");
-    if (upperLimit_ >= 32) {
+    if (sparse_) {
+      // Sparse graph matrix (N x N)
+      csvFile = initCSVFile(std::string(CSV_DIR) + "/" + getKernelName() +
+                            "_sparse_graph.csv");
       for (int dim = 1; dim <= upperLimit_; dim++) {
-        const int M = 32, N = 32, K = dim;
-          callDenseKernels(csvFile, M, N, K);
+        callSparseKernels(csvFile, dim, 0.99);
       }
+      // Close file
+      csvFile.close();
     }
 
-    // Sparse graph matrix (N x N)
-    csvFile = initCSVFile(std::string(CSV_DIR) + "/" + getKernelName() +
-                         "_sparse_graph.csv");
-    for (int dim = 1; dim <= upperLimit_; dim++) {
-        callSparseKernels(csvFile, dim, 0.999);
-    }
-    // Close file
-    csvFile.close();
   }
 
  private:
@@ -127,7 +137,7 @@ class doGemm {
       std::string kernelName = getKernelName();
 
       // Perform CPU
-      spGemmCpu_.initialise(N, sparsity);
+      spGemmCpu_.initialise(N, sparsity, false);
       double cpuTime = spGemmCpu_.compute();
       writeLineToCsv(csvFile, "cpu", kernelName, N, N, N, sparsity, probSize,
                      iterations_, cpuTime,
@@ -179,6 +189,10 @@ class doGemm {
   /** The maximum value of the largest problem size dimension. */
   const int upperLimit_;
 
+  /** Boolean values to keep a track of whether dense/sparse kernels are being run */
+  bool dense_;
+  bool sparse_;
+
   cpu::gemm_cpu<T> gemmCpu_;
   cpu::spGemm_cpu<T> spGemmCpu_;
   gpu::gemm_gpu<T> gemmGpu_;
 
@@ -2,6 +2,8 @@
 
 int iters = 10;
 int upperLimit = 128;
+bool sparse = true;
+bool dense = true;
 
 int main(int argc, char** argv) {
   getParameters(argc, argv);
@@ -20,20 +22,22 @@ int main(int argc, char** argv) {
 
   // SGEMM Comparison
   std::cout << std::endl << "Comparing SGEMM Kernels:" << std::endl;
-  doGemm<float> sgemm(iters, upperLimit);
+  doGemm<float> sgemm(iters, upperLimit, dense, sparse);
   sgemm.collectData();
   std::cout << "Finished!" << std::endl;
 
   // DGEMM Comparison
   std::cout << std::endl << "Comparing DGEMM Kernels:" << std::endl;
-  doGemm<double> dgemm(iters, upperLimit);
+  doGemm<double> dgemm(iters, upperLimit, dense, sparse);
   dgemm.collectData();
   std::cout << "Finished!" << std::endl;
   return 0;
 }
 
 void printBenchmarkConfig(const int iters, const int upperLimit) {
   std::string gpuEnabledStr = (GPU_ENABLED) ? "True" : "False";
+  std::string denseStr = dense ? "True" : "False";
+  std::string sparseStr = sparse ? "True" : "False";
   unsigned int ompThreads =
       (getenv("OMP_NUM_THREADS") != NULL) ? atoi(getenv("OMP_NUM_THREADS")) : 1;
   const char* ompProcBind =
@@ -50,6 +54,9 @@ void printBenchmarkConfig(const int iters, const int upperLimit) {
   std::cout << "\tOMP_NUM_THREADS: " << ompThreads << std::endl;
   std::cout << "\tOMP_PROC_BIND: " << ompProcBind << std::endl;
   std::cout << "\tOMP_PLACES: " << ompPlaces << std::endl;
+  std::cout << "\tOMP_PLACES: " << ompPlaces << std::endl;
+  std::cout << "\tRunning dense kernels: " << denseStr << std::endl;
+  std::cout << "\tRunning sparse kernels: " << sparseStr << std::endl;
   std::cout << std::endl;
 #ifdef CPU_DEFAULT
   std::cout
@@ -97,8 +104,16 @@ void getParameters(int argc, char* argv[]) {
       std::cout << "  -d  --dimension_limit D      Max value of M, N, K is D "
                    "(default: "
                 << upperLimit << ")" << std::endl;
+      std::cout << "  --dense                      Run only the dense matrix kernels "
+                   "(cannot be run in combination with --sparse)" << std::endl;
+      std::cout << "  -s --sparse                  Run only the sparse matrix kernels "
+                   "(cannot be run in combination with --dense)" << std::endl;
       std::cout << std::endl;
       exit(0);
+    } else if (!strcmp(argv[i], "--sparse") || !strcmp(argv[i], "-s")) {
+      dense = false;
+    } else if (!strcmp(argv[i], "--dense")) {
+      sparse = false;
     } else {
       std::cout << "Unrecognized argument '" << argv[i] << "' (try '--help')"
                 << std::endl;