From 21e8252ea53f4acd92c7dd24b3f1c6ab2c18299b Mon Sep 17 00:00:00 2001 From: DavidDiazGuerra Date: Fri, 4 Oct 2019 14:24:07 +0200 Subject: [PATCH] Now the lookup table is optional --- README.md | 12 +++++- examples/example.py | 1 + examples/time_vs_T60.py | 1 + examples/time_vs_nbRIRs.py | 1 + gpuRIR/__init__.py | 13 +++++- src/gpuRIR_cuda.cu | 83 +++++++++++++++++++++++++++++++------- src/gpuRIR_cuda.h | 6 ++- src/python_bind.cpp | 12 ++++-- 8 files changed, 108 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index a8e3170..1844e6a 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,7 @@ * [`simulateRIR`](#simulaterir) * [`simulateTrajectory`](#simulatetrajectory) * [`activateMixedPrecision`](#activateMixedPrecision) + * [`activateLUT`](#activateLUT) * [`beta_SabineEstimation`](#beta_sabineestimation) * [`att2t_SabineEstimator`](#att2t_sabineestimator) * [`t2n`](#t2n) @@ -110,7 +111,16 @@ Activate the mixed precision mode, only for Pascal GPU architecture or superior. * **activate** : *bool, optional.* True for activate and Flase for deactivate. True by default. - + +### `activateLUT` + +Activate the lookup table for the sinc computations. + +#### Parameters + +* **activate** : *bool, optional.* + True for activate and Flase for deactivate. True by default. + ### `beta_SabineEstimation` Estimation of the reflection coefficients needed to have the desired reverberation time. diff --git a/examples/example.py b/examples/example.py index 8b7a297..5b2aaa9 100755 --- a/examples/example.py +++ b/examples/example.py @@ -10,6 +10,7 @@ import gpuRIR gpuRIR.activateMixedPrecision(False) +gpuRIR.activateLUT(True) room_sz = [3,3,2.5] # Size of the room [m] nb_src = 2 # Number of sources diff --git a/examples/time_vs_T60.py b/examples/time_vs_T60.py index 5c1eaa7..2f06592 100644 --- a/examples/time_vs_T60.py +++ b/examples/time_vs_T60.py @@ -10,6 +10,7 @@ import gpuRIR gpuRIR.activateMixedPrecision(False) +gpuRIR.activateLUT(False) T60_vec = np.arange(0.1, 2.2, 0.2) # Reverberation times to measure nb_test_per_point = 10 # Number of simulations per T60 to average the runtime diff --git a/examples/time_vs_nbRIRs.py b/examples/time_vs_nbRIRs.py index 7b950ab..c43f96f 100644 --- a/examples/time_vs_nbRIRs.py +++ b/examples/time_vs_nbRIRs.py @@ -10,6 +10,7 @@ import gpuRIR gpuRIR.activateMixedPrecision(False) +gpuRIR.activateLUT(False) nb_src_vec = np.concatenate([2**np.arange(12), [4094]]) # Number of RIRs to measure nb_test_per_point = 10 # Number of simulations per T60 to average the runtime diff --git a/gpuRIR/__init__.py b/gpuRIR/__init__.py index cc5a2da..d2616bb 100644 --- a/gpuRIR/__init__.py +++ b/gpuRIR/__init__.py @@ -8,7 +8,7 @@ from gpuRIR_bind import gpuRIR_bind -__all__ = ["mic_patterns", "beta_SabineEstimation", "att2t_SabineEstimator", "t2n", "simulateRIR", "simulateTrajectory", "activate_mixed_precision"] +__all__ = ["mic_patterns", "beta_SabineEstimation", "att2t_SabineEstimator", "t2n", "simulateRIR", "simulateTrajectory", "activate_mixed_precision", "activate_lut"] mic_patterns = { "omni": 0, @@ -212,5 +212,16 @@ def activateMixedPrecision(activate=True): ''' gpuRIR_bind_simulator.activate_mixed_precision_bind(activate) +def activateLUT(activate=True): + ''' Activate the lookup table for the sinc computations. + + Parameters + ---------- + activate : bool, optional + True for activate and Flase for deactivate. True by default. + + ''' + gpuRIR_bind_simulator.activate_lut_bind(activate) + # Create the simulator object when the module is loaded gpuRIR_bind_simulator = gpuRIR_bind() diff --git a/src/gpuRIR_cuda.cu b/src/gpuRIR_cuda.cu index f911df0..5feb223 100644 --- a/src/gpuRIR_cuda.cu +++ b/src/gpuRIR_cuda.cu @@ -115,9 +115,9 @@ __device__ __forceinline__ scalar_t sinc(scalar_t x) { return (x==0)? 1 : sinf(x)/x; } -__device__ __forceinline__ scalar_t image_sample(scalar_t amp, scalar_t tau, scalar_t t, int Tw_2, cudaTextureObject_t sinc_lut, float lut_center) { +__device__ __forceinline__ scalar_t image_sample(scalar_t amp, scalar_t tau, scalar_t t, scalar_t Tw) { scalar_t t_tau = t - tau; - return (abs(t_tau)(sinc_lut, __fmaf_rz(t_tau,lut_oversamp,lut_center)) : 0.0f; + return (abs(t_tau)(sinc_lut, __fmaf_rz(t_tau,lut_oversamp,lut_center)) : 0.0f; +} + /***********/ /* KERNELS */ /***********/ @@ -339,7 +348,7 @@ __global__ void calcAmpTau_kernel(scalar_t* g_amp /*[M_src]M_rcv][nb_img_x][nb_i } } -__global__ void generateRIR_kernel(scalar_t* initialRIR, scalar_t* amp, scalar_t* tau, int T, int M, int N, int iniRIR_N, int ini_red, int Tw_2, cudaTextureObject_t sinc_lut, float lut_center) { +__global__ void generateRIR_kernel(scalar_t* initialRIR, scalar_t* amp, scalar_t* tau, int T, int M, int N, int iniRIR_N, int ini_red, scalar_t Tw) { int t = blockIdx.x * blockDim.x + threadIdx.x; int m = blockIdx.y * blockDim.y + threadIdx.y; int n_ini = blockIdx.z * ini_red; @@ -348,7 +357,7 @@ __global__ void generateRIR_kernel(scalar_t* initialRIR, scalar_t* amp, scalar_t if (m>>( initialRIR, amp, tau, T, M, N, iniRIR_N, initialReduction, Tw/2, sinc_lut, lut_len/2+0.5 ); - gpuErrchk( cudaDeviceSynchronize() ); - gpuErrchk( cudaPeekAtLastError() ); - cudaDestroyTextureObject(sinc_lut); - cudaFreeArray(cuArrayLut); + if (lookup_table) { + int lut_len = Tw * lut_oversamp; + lut_len += ((lut_len%2)? 0 : 1); // Must be odd + cudaArray* cuArrayLut; + cudaTextureObject_t sinc_lut = create_sinc_texture_lut(&cuArrayLut, Tw, lut_len); + + generateRIR_kernel_lut<<>>( initialRIR, amp, tau, T, M, N, iniRIR_N, initialReduction, Tw/2, sinc_lut, lut_len/2+0.5 ); + gpuErrchk( cudaDeviceSynchronize() ); + gpuErrchk( cudaPeekAtLastError() ); + + cudaDestroyTextureObject(sinc_lut); + cudaFreeArray(cuArrayLut); + } else { + generateRIR_kernel<<>>( initialRIR, amp, tau, T, M, N, iniRIR_N, initialReduction, Tw ); + gpuErrchk( cudaDeviceSynchronize() ); + gpuErrchk( cudaPeekAtLastError() ); + } + dim3 threadsPerBlockRed(nThreadsRed, 1, 1); scalar_t* intermediateRIR; @@ -949,14 +986,15 @@ scalar_t* gpuRIR_cuda::cuda_convolutions(scalar_t* source_segments, int M_src, i return convolved_segments; } -gpuRIR_cuda::gpuRIR_cuda(bool mPrecision) { +gpuRIR_cuda::gpuRIR_cuda(bool mPrecision, bool lut) { // Get CUDA architecture cudaDeviceProp prop; cudaGetDeviceProperties(&prop, 0); cuda_arch = prop.major*100 + prop.minor*10; - // Activate mixed precision if selected + // Activate mixed precision and lut if selected activate_mixed_precision(mPrecision); + activate_lut(lut); // Initiate CUDA runtime API scalar_t* memPtr_warmup; @@ -975,6 +1013,11 @@ gpuRIR_cuda::gpuRIR_cuda(bool mPrecision) { bool gpuRIR_cuda::activate_mixed_precision(bool activate) { if (cuda_arch >= 530) { + if (activate && lookup_table) { + printf("The mixed precision implementation is not compatible with the lookup table."); + printf("Dissabling the lookup table."); + lookup_table = false; + } mixed_precision = activate; } else { if (activate) printf("This feature requires Pascal GPU architecture or higher.\n"); @@ -982,3 +1025,13 @@ bool gpuRIR_cuda::activate_mixed_precision(bool activate) { } return mixed_precision; } + +bool gpuRIR_cuda::activate_lut(bool activate) { + if (activate && mixed_precision) { + printf("The lookup table is not compatible with the mixed precision implementation."); + printf("Disabling the mixed precision implementation."); + mixed_precision = false; + } + lookup_table = activate; + return lookup_table; +} diff --git a/src/gpuRIR_cuda.h b/src/gpuRIR_cuda.h index 82cb232..baa73b7 100644 --- a/src/gpuRIR_cuda.h +++ b/src/gpuRIR_cuda.h @@ -19,11 +19,12 @@ struct cuRandGeneratorWrapper_t; class gpuRIR_cuda { public: - gpuRIR_cuda(bool); + gpuRIR_cuda(bool, bool); scalar_t* cuda_simulateRIR(scalar_t[3], scalar_t[6], scalar_t*, int, scalar_t*, scalar_t*, micPattern, int, int[3], scalar_t, scalar_t, scalar_t, scalar_t); scalar_t* cuda_convolutions(scalar_t*, int, int,scalar_t*, int, int); bool activate_mixed_precision(bool); + bool activate_lut(bool); private: // cuRAND generator @@ -31,6 +32,9 @@ class gpuRIR_cuda { // Mixed precision flag bool mixed_precision; + + // Lookup table flag + bool lookup_table; // Auxiliar host functions void cuda_rirGenerator(scalar_t*, scalar_t*, scalar_t*, int, int, int, scalar_t); diff --git a/src/python_bind.cpp b/src/python_bind.cpp index 159b274..71b0098 100644 --- a/src/python_bind.cpp +++ b/src/python_bind.cpp @@ -12,13 +12,15 @@ namespace py = pybind11; class gpuRIR_bind { public: - gpuRIR_bind(bool mPrecision=false) : mixed_precision(mPrecision), gpuRIR_cuda_simulator(mPrecision) {}; + gpuRIR_bind(bool mPrecision=false, bool lut=true) : mixed_precision(mPrecision), lookup_table(lut), gpuRIR_cuda_simulator(mPrecision, lut) {}; py::array simulateRIR_bind(std::vector, std::vector, py::array_t, py::array_t, py::array_t, micPattern, std::vector ,scalar_t, scalar_t, scalar_t, scalar_t); py::array gpu_conv(py::array_t, py::array_t); bool activate_mixed_precision_bind(bool); + bool activate_lut_bind(bool); bool mixed_precision; + bool lookup_table; private: gpuRIR_cuda gpuRIR_cuda_simulator; @@ -108,6 +110,10 @@ bool gpuRIR_bind::activate_mixed_precision_bind(bool activate) { gpuRIR_cuda_simulator.activate_mixed_precision(activate); } +bool gpuRIR_bind::activate_lut_bind(bool activate) { + gpuRIR_cuda_simulator.activate_lut(activate); +} + PYBIND11_MODULE(gpuRIR_bind,m) { @@ -120,6 +126,6 @@ PYBIND11_MODULE(gpuRIR_bind,m) py::arg("Fs"), py::arg("c")=343.0f ) .def("gpu_conv", &gpuRIR_bind::gpu_conv, "Batched convolution using FFTs in GPU", py::arg("source_segments"), py::arg("RIR")) .def("activate_mixed_precision_bind", &gpuRIR_bind::activate_mixed_precision_bind, "Activate the mixed precision mode, only for Pascal GPU architecture or superior", - py::arg("activate")); - + py::arg("activate")) + .def("activate_lut_bind", &gpuRIR_bind::activate_lut_bind, "Activate the lookup table", py::arg("activate")); }