Implementation of CUDA Accelerated Passive Crossbar Programming Routi…

…nes for the 2021 Data-Driven Model (#125) Implementation of CUDA accelerated passive crossbar programming routines for the 2021 Data-Driven model (#125) as a partial solution to (#53).
coreylammie · Feb 10, 2022 · 4ce968b · 4ce968b
1 parent aec1e25
commit 4ce968b
Show file tree

Hide file tree

Showing 13 changed files with 727 additions and 18 deletions.
diff --git a/memtorch/bh/crossbar/Crossbar.py b/memtorch/bh/crossbar/Crossbar.py
@@ -10,9 +10,15 @@
 import torch.nn as nn
 
 import memtorch
+from memtorch.bh.memristor import Data_Driven2021
+
+if "cpu" not in memtorch.__version__:
+    import memtorch_cuda_bindings
 
 from .Tile import gen_tiles
 
+CUDA_supported_memristor_models = [Data_Driven2021]
+
 
 @unique
 class Scheme(Enum):
@@ -48,14 +54,17 @@ def __init__(
         shape,
         tile_shape=None,
         use_bindings=True,
+        cuda_malloc_heap_size=50,
         random_crossbar_init=False,
     ):
+        self.memristor_model_params = memristor_model_params
         self.time_series_resolution = memristor_model_params.get(
             "time_series_resolution"
         )
         self.device = torch.device("cpu" if "cpu" in memtorch.__version__ else "cuda")
         self.tile_shape = tile_shape
         self.use_bindings = use_bindings
+        self.cuda_malloc_heap_size = cuda_malloc_heap_size
         if hasattr(memristor_model_params, "r_off"):
             self.r_off_mean = memristor_model_params["r_off"]
             if callable(self.r_off_mean):
@@ -201,7 +210,6 @@ def write_conductance_matrix(
             )
         else:
             raise Exception("Unsupported crossbar shape.")
-
         if self.tile_shape is not None:
             conductance_matrix, tiles_map = gen_tiles(
                 conductance_matrix,
@@ -231,27 +239,53 @@ def write_conductance_matrix(
             )
             self.update(from_devices=False)
         else:
-            if self.tile_shape is not None:
-                for i in range(0, self.devices.shape[0]):
-                    for j in range(0, self.devices.shape[1]):
-                        for k in range(0, self.devices.shape[2]):
+            if (
+                self.use_bindings
+                and type(self.devices.any()) in CUDA_supported_memristor_models
+                and "cpu" not in memtorch.__version__
+            ):
+                device_matrix = torch.FloatTensor(self.g_np(self.devices))
+                device_matrix_aug = device_matrix
+                conductance_matrix_aug = conductance_matrix
+                if (
+                    len(device_matrix.shape) == 2
+                ):  # To ensure compatibility with CUDA code
+                    device_matrix_aug = device_matrix[:, :, None]
+                    conductance_matrix_aug = conductance_matrix[:, :, None]
+
+                self.conductance_matrix = memtorch_cuda_bindings.simulate_passive(
+                    conductance_matrix_aug,
+                    device_matrix_aug,
+                    self.cuda_malloc_heap_size,
+                    **programming_routine_params,
+                    **self.memristor_model_params
+                )
+                self.max_abs_conductance = (
+                    torch.abs(self.conductance_matrix).flatten().max()
+                )
+                self.update(from_devices=False)
+            else:
+                if self.tile_shape is not None:
+                    for i in range(0, self.devices.shape[0]):
+                        for j in range(0, self.devices.shape[1]):
+                            for k in range(0, self.devices.shape[2]):
+                                self.devices = programming_routine(
+                                    self,
+                                    (i, j, k),
+                                    conductance_matrix[i][j][k],
+                                    **programming_routine_params
+                                )
+                else:
+                    for i in range(0, self.rows):
+                        for j in range(0, self.columns):
                             self.devices = programming_routine(
                                 self,
-                                (i, j, k),
-                                conductance_matrix[i][j][k],
+                                (i, j),
+                                conductance_matrix[i][j],
                                 **programming_routine_params
                             )
-            else:
-                for i in range(0, self.rows):
-                    for j in range(0, self.columns):
-                        self.devices = programming_routine(
-                            self,
-                            (i, j),
-                            conductance_matrix[i][j],
-                            **programming_routine_params
-                        )
 
-            self.update(from_devices=True)
+                self.update(from_devices=True)
 
 
 def init_crossbar(
@@ -266,6 +300,7 @@ def init_crossbar(
     scheme=Scheme.DoubleColumn,
     tile_shape=(128, 128),
     use_bindings=True,
+    cuda_malloc_heap_size=50,
     random_crossbar_init=False,
 ):
     """Method to initialise and construct memristive crossbars.
@@ -319,6 +354,7 @@ def init_crossbar(
                         channel_weights.shape,
                         tile_shape,
                         use_bindings=use_bindings,
+                        cuda_malloc_heap_size=cuda_malloc_heap_size,
                         random_crossbar_init=random_crossbar_init,
                     )
                 )
@@ -329,6 +365,7 @@ def init_crossbar(
                         channel_weights.shape,
                         tile_shape,
                         use_bindings=use_bindings,
+                        cuda_malloc_heap_size=cuda_malloc_heap_size,
                         random_crossbar_init=random_crossbar_init,
                     )
                 )
@@ -413,6 +450,7 @@ def out(crossbars, operation, idx=(0, 1), **kwargs):
                         channel_weights.shape,
                         tile_shape,
                         use_bindings=use_bindings,
+                        random_crossbar_init=random_crossbar_init,
                     )
                 )
                 conductance_matrix = mapping_routine(
@@ -437,6 +475,7 @@ def out(crossbars, operation, idx=(0, 1), **kwargs):
                     weights.shape,
                     tile_shape,
                     use_bindings=use_bindings,
+                    random_crossbar_init=random_crossbar_init,
                 )
             )
             conductance_matrix = mapping_routine(

diff --git a/memtorch/bh/memristor/Data_Driven2021.py b/memtorch/bh/memristor/Data_Driven2021.py
@@ -4,6 +4,7 @@
 
 import memtorch
 from memtorch.utils import clip
+
 from .Memristor import Memristor as Memristor
 
 

diff --git a/memtorch/cu/bindings.cpp b/memtorch/cu/bindings.cpp
@@ -6,11 +6,13 @@
 #include "inference.h"
 #include "solve_passive.h"
 #include "tile_matmul.h"
+#include "simulate_passive.h"
 
 
 PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
   gen_tiles_bindings_gpu(m);
   tile_matmul_bindings(m);
   inference_bindings(m);
+  simulate_passive_bindings(m);
   solve_passive_bindings(m);
 }
diff --git a/memtorch/cu/simulate_passive.cpp b/memtorch/cu/simulate_passive.cpp
@@ -0,0 +1,109 @@
+#include <ATen/ATen.h>
+#include <cmath>
+#include <torch/extension.h>
+
+#include <Eigen/Core>
+
+#include <Eigen/SparseCore>
+
+#include <Eigen/SparseLU>
+
+#include "simulate_passive_kernels.cuh"
+
+//Default values
+std::vector<float> r_p{2699.2336, -672.930205};
+std::vector<float> r_n{649.413746, -1474.32358};
+
+void simulate_passive_bindings(py::module_ &m) {
+
+  //Data_Driven2021 model
+  m.def(
+      "simulate_passive",
+      [&](at::Tensor conductance_matrix, at::Tensor device_matrix,int cuda_malloc_heap_size, float rel_tol,
+          float pulse_duration, float refactory_period, float pos_voltage_level, float neg_voltage_level,
+          float timeout, float force_adjustment, float force_adjustment_rel_tol, float force_adjustment_pos_voltage_threshold,
+          float force_adjustment_neg_voltage_threshold, float time_series_resolution , float r_off, float r_on, float A_p, float A_n, float t_p, float t_n,
+          float k_p, float k_n, std::vector<float>  r_p, std::vector<float> r_n, float a_p, float a_n, float b_p, float b_n, bool sim_neighbors) {
+        return simulate_passive_dd(conductance_matrix, device_matrix,cuda_malloc_heap_size, rel_tol,
+          pulse_duration, refactory_period,  pos_voltage_level,  neg_voltage_level,
+          timeout, force_adjustment, force_adjustment_rel_tol, force_adjustment_pos_voltage_threshold,
+          force_adjustment_neg_voltage_threshold,time_series_resolution,r_off,r_on,A_p,A_n,t_p,t_n,k_p,k_n,r_p,r_n,a_p,a_n,b_p,b_n, sim_neighbors);
+      },
+      py::arg("conductance_matrix"), py::arg("device_matrix"),py::arg("cuda_malloc_heap_size")=50, py::arg("rel_tol")=0.1,
+      py::arg("pulse_duration") = 1e-3, py::arg("refactory_period") = 0, py::arg("pos_voltage_level") = 1.0,
+      py::arg("neg_voltage_level") = -1.0, py::arg("timeout") = 5, py::arg("force_adjustment") = 1e-3,
+      py::arg("force_adjustment_rel_tol") = 1e-1, py::arg("force_adjustment_pos_voltage_threshold") = 0,
+      py::arg("force_adjustment_neg_voltage_threshold") = 0, py::arg("time_series_resolution") = 1e-10, py::arg("r_off") = 10000, py::arg("r_on") = 1000, py::arg("A_p") = 600.10075,
+      py::arg("A_n")=-34.5988399, py::arg("t_p") = -0.0212028, py::arg("t_n") = -0.05343997, py::arg("k_p") = 5.11e-4, py::arg("k_n") = 1.17e-3,
+      py::arg("r_p") = r_p, py::arg("r_n") = r_n, py::arg("a_p")=0.32046175,
+      py::arg("a_n")=0.32046175, py::arg("b_p")=2.71689828, py::arg("b_n")=2.71689828, py::arg("simulate_neighbours") = true); //Maybe change order of simulate_neighbours to before memristor args
+
+  //Linear Ion Drift
+  m.def(
+      "simulate_passive",
+      [&](at::Tensor conductance_matrix, at::Tensor device_matrix,int cuda_malloc_heap_size, float rel_tol,
+          float pulse_duration, float refactory_period, float pos_voltage_level, float neg_voltage_level,
+          float timeout, float force_adjustment, float force_adjustment_rel_tol, float force_adjustment_pos_voltage_threshold,
+          float force_adjustment_neg_voltage_threshold, float time_series_resolution , float r_off, float r_on, float u_v,
+          float d,float pos_write_threshold, float neg_write_threshold, float p, bool sim_neighbors) {
+        return simulate_passive_linearIonDrift(conductance_matrix, device_matrix,cuda_malloc_heap_size, rel_tol,
+          pulse_duration, refactory_period,  pos_voltage_level,  neg_voltage_level,
+          timeout, force_adjustment, force_adjustment_rel_tol, force_adjustment_pos_voltage_threshold,
+          force_adjustment_neg_voltage_threshold,time_series_resolution,r_off,r_on, u_v,
+          d, pos_write_threshold, neg_write_threshold, p,sim_neighbors);
+      },
+      py::arg("conductance_matrix"), py::arg("device_matrix"),py::arg("cuda_malloc_heap_size")=50, py::arg("rel_tol")=0.1,
+      py::arg("pulse_duration") = 1e-3, py::arg("refactory_period") = 0, py::arg("pos_voltage_level") = 1.0,
+      py::arg("neg_voltage_level") = -1.0, py::arg("timeout") = 5, py::arg("force_adjustment") = 1e-3,
+      py::arg("force_adjustment_rel_tol") = 1e-1, py::arg("force_adjustment_pos_voltage_threshold") = 0,
+      py::arg("force_adjustment_neg_voltage_threshold") = 0, py::arg("time_series_resolution") = 1e-4, py::arg("r_off") = 10000, py::arg("r_on") = 1000, py::arg("u_v") = 1e-14,
+      py::arg("d") = 10e-9, py::arg("pos_write_threshold") = 0.55, py::arg("neg_write_threshold") = -0.55, py::arg("p") = 1, py::arg("simulate_neighbours") = true);
+
+   //VTEAM
+   m.def(
+      "simulate_passive",
+      [&](at::Tensor conductance_matrix, at::Tensor device_matrix,int cuda_malloc_heap_size, float rel_tol,
+          float pulse_duration, float refactory_period, float pos_voltage_level, float neg_voltage_level,
+          float timeout, float force_adjustment, float force_adjustment_rel_tol, float force_adjustment_pos_voltage_threshold,
+          float force_adjustment_neg_voltage_threshold, float time_series_resolution , float r_off, float r_on, float d,
+          float k_on, float k_off, float alpha_on,  float alpha_off, float v_on, float v_off, float x_on, float x_off, bool sim_neighbors) {
+        return simulate_passive_VTEAM(conductance_matrix, device_matrix,cuda_malloc_heap_size, rel_tol,
+          pulse_duration, refactory_period,  pos_voltage_level,  neg_voltage_level,
+          timeout, force_adjustment, force_adjustment_rel_tol, force_adjustment_pos_voltage_threshold,
+          force_adjustment_neg_voltage_threshold,time_series_resolution,r_off,r_on,d,
+          k_on, k_off, alpha_on, alpha_off, v_on, v_off, x_on, x_off, sim_neighbors);
+      },
+      py::arg("conductance_matrix"), py::arg("device_matrix"),py::arg("cuda_malloc_heap_size")=50, py::arg("rel_tol")=0.1,
+      py::arg("pulse_duration") = 1e-3, py::arg("refactory_period") = 0, py::arg("pos_voltage_level") = 1.0,
+      py::arg("neg_voltage_level") = -1.0, py::arg("timeout") = 5, py::arg("force_adjustment") = 1e-3,
+      py::arg("force_adjustment_rel_tol") = 1e-1, py::arg("force_adjustment_pos_voltage_threshold") = 0,
+      py::arg("force_adjustment_neg_voltage_threshold") = 0, py::arg("time_series_resolution") = 1e-10, py::arg("r_off") = 10000, py::arg("r_on") = 1000, py::arg("d") = 3e-9,
+      py::arg("k_on") =-10, py::arg("k_off") = 5e-4, py::arg("alpha_on") =3, py::arg("alpha_off") = 1, py::arg("v_on") = 0.2, py::arg("v_off") = 0.02, py::arg("x_on") = 0,
+      py::arg("x_off") = 3e-9, py::arg("simulate_neighbours") = true);
+
+   //Stanford_PKU
+   m.def(
+      "simulate_passive",
+      [&](at::Tensor conductance_matrix, at::Tensor device_matrix,int cuda_malloc_heap_size, float rel_tol,
+          float pulse_duration, float refactory_period, float pos_voltage_level, float neg_voltage_level,
+          float timeout, float force_adjustment, float force_adjustment_rel_tol, float force_adjustment_pos_voltage_threshold,
+          float force_adjustment_neg_voltage_threshold, float time_series_resolution , float r_off, float r_on, float gap_init,
+          float g_0, float V_0, float I_0, float read_voltage, float T_init, float R_th, float gamma_init,
+          float beta, float t_ox, float F_min, float vel_0, float E_a, float a_0, float delta_g_init,
+          float model_switch, float T_crit, float T_smth, bool sim_neighbors) {
+        return simulate_passive_Stanford_PKU(conductance_matrix, device_matrix,cuda_malloc_heap_size, rel_tol,
+          pulse_duration, refactory_period,  pos_voltage_level,  neg_voltage_level,
+          timeout, force_adjustment, force_adjustment_rel_tol, force_adjustment_pos_voltage_threshold,
+          force_adjustment_neg_voltage_threshold,time_series_resolution,r_off,r_on, gap_init,
+          g_0, V_0, I_0, read_voltage,  T_init, R_th, gamma_init, beta, t_ox, F_min, vel_0, E_a, a_0,
+          delta_g_init, model_switch, T_crit, T_smth, sim_neighbors);
+      },
+      py::arg("conductance_matrix"), py::arg("device_matrix"),py::arg("cuda_malloc_heap_size")=50, py::arg("rel_tol")=0.1,
+      py::arg("pulse_duration") = 1e-3, py::arg("refactory_period") = 0, py::arg("pos_voltage_level") = 1.0,
+      py::arg("neg_voltage_level") = -1.0, py::arg("timeout") = 5, py::arg("force_adjustment") = 1e-3,
+      py::arg("force_adjustment_rel_tol") = 1e-1, py::arg("force_adjustment_pos_voltage_threshold") = 0,
+      py::arg("force_adjustment_neg_voltage_threshold") = 0, py::arg("time_series_resolution") = 1e-10, py::arg("r_off") = 10000, py::arg("r_on") = 1000, py::arg("gap_init") = 2e-10,
+      py::arg("g_0") = 0.25e-9, py::arg("V_0") = 0.25, py::arg("I_0") = 1000e-6, py::arg("read_voltage") = 0.1, py::arg("T_init") = 298, py::arg("R_th") = 2.1e3,
+      py::arg("gamma_init") = 16, py::arg("beta") = 0.8, py::arg("t_ox") = 12e-9,py::arg("F_min") = 1.4e9, py::arg("vel_0") = 10, py::arg("E_a") = 0.6, py::arg("a_0") = 0.25e-9,
+      py::arg("delta_g_init") = 0.02, py::arg("model_switch") = 0, py::arg("T_crit") = 450, py::arg("T_smth") = 500, py::arg("simulate_neighbours") = true);
+}
diff --git a/memtorch/cu/simulate_passive.cuh b/memtorch/cu/simulate_passive.cuh
@@ -0,0 +1 @@
+//unused so far
diff --git a/memtorch/cu/simulate_passive.h b/memtorch/cu/simulate_passive.h
@@ -0,0 +1 @@
+void simulate_passive_bindings(py::module_ &m);