Merge pull request #9 from DavidDiazGuerra/sinc_lut

Add the (default) option to compute the sinc functions using LookUp Tables to improve the computational efficiency. The LUT mode is not compatible with the mixed-precision mode.
DavidDiazGuerra · Feb 3, 2020 · 885c259 · 885c259
2 parents 84ea4bf + 5182b61
commit 885c259
Show file tree

Hide file tree

Showing 8 changed files with 314 additions and 182 deletions.
diff --git a/README.md b/README.md
@@ -10,6 +10,7 @@
   * [`simulateRIR`](#simulaterir)
   * [`simulateTrajectory`](#simulatetrajectory)
   * [`activateMixedPrecision`](#activateMixedPrecision)
+  * [`activateLUT`](#activateLUT)
   * [`beta_SabineEstimation`](#beta_sabineestimation)
   * [`att2t_SabineEstimator`](#att2t_sabineestimator)
   * [`t2n`](#t2n)
@@ -110,7 +111,16 @@ Activate the mixed precision mode, only for Pascal GPU architecture or superior.
 
 * **activate** : *bool, optional.*
         True for activate and Flase for deactivate. True by default.
-
+
+### `activateLUT`
+
+Activate the lookup table for the sinc computations.
+
+#### Parameters
+
+* **activate** : *bool, optional.*
+        True for activate and Flase for deactivate. True by default.
+
 ### `beta_SabineEstimation`
 
 Estimation of the reflection coefficients needed to have the desired reverberation time.

diff --git a/examples/example.py b/examples/example.py
@@ -10,6 +10,7 @@
 
 import gpuRIR
 gpuRIR.activateMixedPrecision(False)
+gpuRIR.activateLUT(True)
 
 room_sz = [3,3,2.5]  # Size of the room [m]
 nb_src = 2  # Number of sources

diff --git a/examples/time_vs_T60.py b/examples/time_vs_T60.py
@@ -10,6 +10,7 @@
 
 import gpuRIR
 gpuRIR.activateMixedPrecision(False)
+gpuRIR.activateLUT(False)
 
 T60_vec = np.arange(0.1, 2.2, 0.2) # Reverberation times to measure
 nb_test_per_point = 10 # Number of simulations per T60 to average the runtime

diff --git a/examples/time_vs_nbRIRs.py b/examples/time_vs_nbRIRs.py
@@ -10,6 +10,7 @@
 
 import gpuRIR
 gpuRIR.activateMixedPrecision(False)
+gpuRIR.activateLUT(False)
 
 nb_src_vec = np.concatenate([2**np.arange(12), [4094]]) # Number of RIRs to measure
 nb_test_per_point = 10 # Number of simulations per T60 to average the runtime

diff --git a/gpuRIR/__init__.py b/gpuRIR/__init__.py
@@ -8,7 +8,7 @@
 
 from gpuRIR_bind import gpuRIR_bind
 
-__all__ = ["mic_patterns", "beta_SabineEstimation", "att2t_SabineEstimator", "t2n", "simulateRIR", "simulateTrajectory", "activate_mixed_precision"]
+__all__ = ["mic_patterns", "beta_SabineEstimation", "att2t_SabineEstimator", "t2n", "simulateRIR", "simulateTrajectory", "activate_mixed_precision", "activate_lut"]
 
 mic_patterns =	{
   "omni": 0,
@@ -212,5 +212,16 @@ def activateMixedPrecision(activate=True):
 	'''
 	gpuRIR_bind_simulator.activate_mixed_precision_bind(activate)
 
+def activateLUT(activate=True):
+	''' Activate the lookup table for the sinc computations.
+
+	Parameters
+	----------
+	activate : bool, optional
+		True for activate and Flase for deactivate. True by default.
+
+	'''
+	gpuRIR_bind_simulator.activate_lut_bind(activate)
+
 # Create the simulator object when the module is loaded
 gpuRIR_bind_simulator = gpuRIR_bind()
diff --git a/src/gpuRIR_cuda.cu b/src/gpuRIR_cuda.cu
diff --git a/src/gpuRIR_cuda.h b/src/gpuRIR_cuda.h
@@ -1,8 +1,4 @@
 
-
-typedef float scalar_t;
-//typedef float2 Complex;
-
 // Accepted polar patterns for the receivers:
 typedef int micPattern;
 #define DIR_OMNI 0
@@ -19,20 +15,24 @@ struct cuRandGeneratorWrapper_t;
 class gpuRIR_cuda {
 
 	public:
-		gpuRIR_cuda(bool);
+		gpuRIR_cuda(bool, bool);
 
-		scalar_t* cuda_simulateRIR(scalar_t[3], scalar_t[6], scalar_t*, int, scalar_t*, scalar_t*, micPattern, int, int[3], scalar_t, scalar_t, scalar_t, scalar_t);
-		scalar_t* cuda_convolutions(scalar_t*, int, int,scalar_t*, int, int);
+		float* cuda_simulateRIR(float[3], float[6], float*, int, float*, float*, micPattern, int, int[3], float, float, float, float);
+		float* cuda_convolutions(float*, int, int, float*, int, int);
 		bool activate_mixed_precision(bool);
+		bool activate_lut(bool);
 
 	private:
 		// cuRAND generator
 		static cuRandGeneratorWrapper_t cuRandGenWrap; // I'm not able to compile if I include the cuda headers here... so I have to hide the cuRAND generator
 
 		// Mixed precision flag
 		bool mixed_precision;
+
+		// Lookup table flag
+		bool lookup_table;
 
 		// Auxiliar host functions
-		void cuda_rirGenerator(scalar_t*, scalar_t*, scalar_t*, int, int, int, scalar_t);
-		int PadData(scalar_t*, scalar_t**, int, scalar_t*, scalar_t**, int, int, int);
+		void cuda_rirGenerator(float*, float*, float*, int, int, int, float);
+		int PadData(float*, float**, int, float*, float**, int, int, int);
 };
diff --git a/src/python_bind.cpp b/src/python_bind.cpp
@@ -12,29 +12,31 @@ namespace py = pybind11;
 
 class gpuRIR_bind {
 	public:
-		gpuRIR_bind(bool mPrecision=false) : mixed_precision(mPrecision), gpuRIR_cuda_simulator(mPrecision) {};
+		gpuRIR_bind(bool mPrecision=false, bool lut=true) : mixed_precision(mPrecision), lookup_table(lut), gpuRIR_cuda_simulator(mPrecision, lut) {};
 
-		py::array simulateRIR_bind(std::vector<scalar_t>, std::vector<scalar_t>, py::array_t<scalar_t, py::array::c_style>, py::array_t<scalar_t, py::array::c_style>, py::array_t<scalar_t, py::array::c_style>, micPattern, std::vector<int> ,scalar_t, scalar_t, scalar_t, scalar_t);
-		py::array gpu_conv(py::array_t<scalar_t, py::array::c_style>, py::array_t<scalar_t, py::array::c_style>);
+		py::array simulateRIR_bind(std::vector<float>, std::vector<float>, py::array_t<float, py::array::c_style>, py::array_t<float, py::array::c_style>, py::array_t<float, py::array::c_style>, micPattern, std::vector<int> ,float, float, float, float);
+		py::array gpu_conv(py::array_t<float, py::array::c_style>, py::array_t<float, py::array::c_style>);
 		bool activate_mixed_precision_bind(bool);
+		bool activate_lut_bind(bool);
 
 		bool mixed_precision;
+		bool lookup_table;
 
 	private:
 		gpuRIR_cuda gpuRIR_cuda_simulator;
 };
 
-py::array gpuRIR_bind::simulateRIR_bind(std::vector<scalar_t> room_sz, // Size of the room [m]
-										std::vector<scalar_t> beta, // Reflection coefficients
-										py::array_t<scalar_t, py::array::c_style> pos_src, // positions of the sources [m]
-										py::array_t<scalar_t, py::array::c_style> pos_rcv, // positions of the receivers [m]
-										py::array_t<scalar_t, py::array::c_style> orV_rcv, // orientation of the receivers
+py::array gpuRIR_bind::simulateRIR_bind(std::vector<float> room_sz, // Size of the room [m]
+										std::vector<float> beta, // Reflection coefficients
+										py::array_t<float, py::array::c_style> pos_src, // positions of the sources [m]
+										py::array_t<float, py::array::c_style> pos_rcv, // positions of the receivers [m]
+										py::array_t<float, py::array::c_style> orV_rcv, // orientation of the receivers
 										micPattern mic_pattern, // Polar pattern of the receivers (see gpuRIR_cuda.h)
 										std::vector<int> nb_img, // Number of sources in each dimension
-										scalar_t Tdiff, // Time when the ISM is replaced by a diffusse reverberation model [s]
-										scalar_t Tmax, // RIRs length [s]
-										scalar_t Fs, // Sampling frequency [Hz]
-										scalar_t c=343.0 // Speed of sound [m/s]
+										float Tdiff, // Time when the ISM is replaced by a diffusse reverberation model [s]
+										float Tmax, // RIRs length [s]
+										float Fs, // Sampling frequency [Hz]
+										float c=343.0 // Speed of sound [m/s]
 									   ) 
 {
 	py::buffer_info info_pos_src = pos_src.request();
@@ -55,26 +57,26 @@ py::array gpuRIR_bind::simulateRIR_bind(std::vector<scalar_t> room_sz, // Size o
 	int M_src = info_pos_src.shape[0];
 	int M_rcv = info_pos_rcv.shape[0];
 
-	scalar_t* rir = gpuRIR_cuda_simulator.cuda_simulateRIR(&room_sz[0], &beta[0], 
-														   (scalar_t*) info_pos_src.ptr, M_src, 
-														   (scalar_t*) info_pos_rcv.ptr, (scalar_t*) info_orV_rcv.ptr, mic_pattern, M_rcv, 
+	float* rir = gpuRIR_cuda_simulator.cuda_simulateRIR(&room_sz[0], &beta[0], 
+														   (float*) info_pos_src.ptr, M_src, 
+														   (float*) info_pos_rcv.ptr, (float*) info_orV_rcv.ptr, mic_pattern, M_rcv, 
 														   &nb_img[0], Tdiff, Tmax, Fs, c);
 
 	py::capsule free_when_done(rir, [](void *f) {
-		scalar_t *foo = reinterpret_cast<scalar_t *>(f);
+		float *foo = reinterpret_cast<float *>(f);
 		delete[] foo;
 	});
 
 	int nSamples = ceil(Tmax*Fs);
 	nSamples += nSamples%2; // nSamples must be even
 	std::vector<int> shape = {M_src, M_rcv, nSamples};
-	std::vector<size_t> strides = {M_rcv*nSamples*sizeof(scalar_t), nSamples*sizeof(scalar_t), sizeof(scalar_t)};
-	return py::array_t<scalar_t>(shape, strides, rir, free_when_done);
+	std::vector<size_t> strides = {M_rcv*nSamples*sizeof(float), nSamples*sizeof(float), sizeof(float)};
+	return py::array_t<float>(shape, strides, rir, free_when_done);
 
 }
 
-py::array gpuRIR_bind::gpu_conv(py::array_t<scalar_t, py::array::c_style> source_segments, // Source signal segment for each trajectory point
-								py::array_t<scalar_t, py::array::c_style> RIR // 3D array with the RIR from each point of the trajectory to each receiver
+py::array gpuRIR_bind::gpu_conv(py::array_t<float, py::array::c_style> source_segments, // Source signal segment for each trajectory point
+								py::array_t<float, py::array::c_style> RIR // 3D array with the RIR from each point of the trajectory to each receiver
 							   ) 
 {
 	py::buffer_info info_source_segments = source_segments.request();
@@ -89,25 +91,29 @@ py::array gpuRIR_bind::gpu_conv(py::array_t<scalar_t, py::array::c_style> source
 	int M_rcv = info_RIR.shape[1];
 	int RIR_len = info_RIR.shape[2];
 
-	scalar_t* convolution = gpuRIR_cuda_simulator.cuda_convolutions((scalar_t*)info_source_segments.ptr, M_src, segment_len,
-																	(scalar_t*)info_RIR.ptr, M_rcv, RIR_len);
+	float* convolution = gpuRIR_cuda_simulator.cuda_convolutions((float*)info_source_segments.ptr, M_src, segment_len,
+																	(float*)info_RIR.ptr, M_rcv, RIR_len);
 
 	py::capsule free_when_done(convolution, [](void *f) {
-		scalar_t *foo = reinterpret_cast<scalar_t *>(f);
+		float *foo = reinterpret_cast<float *>(f);
 		delete[] foo;
 	});
 
 	int nSamples = segment_len+RIR_len-1;
 	std::vector<int> shape = {M_src, M_rcv, nSamples};
-	std::vector<size_t> strides = {M_rcv*nSamples*sizeof(scalar_t), nSamples*sizeof(scalar_t), sizeof(scalar_t)};
-	return py::array_t<scalar_t>(shape, strides, convolution, free_when_done);
+	std::vector<size_t> strides = {M_rcv*nSamples*sizeof(float), nSamples*sizeof(float), sizeof(float)};
+	return py::array_t<float>(shape, strides, convolution, free_when_done);
 
 }
 
 bool gpuRIR_bind::activate_mixed_precision_bind(bool activate) {
   gpuRIR_cuda_simulator.activate_mixed_precision(activate);
 }
 
+bool gpuRIR_bind::activate_lut_bind(bool activate) {
+  gpuRIR_cuda_simulator.activate_lut(activate);
+}
+
 
 PYBIND11_MODULE(gpuRIR_bind,m)
 {
@@ -120,6 +126,6 @@ PYBIND11_MODULE(gpuRIR_bind,m)
 			 py::arg("Fs"), py::arg("c")=343.0f )
 		.def("gpu_conv", &gpuRIR_bind::gpu_conv, "Batched convolution using FFTs in GPU", py::arg("source_segments"), py::arg("RIR"))
 		.def("activate_mixed_precision_bind", &gpuRIR_bind::activate_mixed_precision_bind, "Activate the mixed precision mode, only for Pascal GPU architecture or superior",
-			 py::arg("activate"));
-
+			 py::arg("activate"))
+		.def("activate_lut_bind", &gpuRIR_bind::activate_lut_bind, "Activate the lookup table", py::arg("activate"));
 }