Skip to content

Commit

Permalink
Now the lookup table is optional
Browse files Browse the repository at this point in the history
  • Loading branch information
DavidDiazGuerra committed Oct 4, 2019
1 parent 69199f4 commit 21e8252
Show file tree
Hide file tree
Showing 8 changed files with 108 additions and 21 deletions.
12 changes: 11 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
* [`simulateRIR`](#simulaterir)
* [`simulateTrajectory`](#simulatetrajectory)
* [`activateMixedPrecision`](#activateMixedPrecision)
* [`activateLUT`](#activateLUT)
* [`beta_SabineEstimation`](#beta_sabineestimation)
* [`att2t_SabineEstimator`](#att2t_sabineestimator)
* [`t2n`](#t2n)
Expand Down Expand Up @@ -110,7 +111,16 @@ Activate the mixed precision mode, only for Pascal GPU architecture or superior.

* **activate** : *bool, optional.*
True for activate and Flase for deactivate. True by default.


### `activateLUT`

Activate the lookup table for the sinc computations.

#### Parameters

* **activate** : *bool, optional.*
True for activate and Flase for deactivate. True by default.

### `beta_SabineEstimation`

Estimation of the reflection coefficients needed to have the desired reverberation time.
Expand Down
1 change: 1 addition & 0 deletions examples/example.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import gpuRIR
gpuRIR.activateMixedPrecision(False)
gpuRIR.activateLUT(True)

room_sz = [3,3,2.5] # Size of the room [m]
nb_src = 2 # Number of sources
Expand Down
1 change: 1 addition & 0 deletions examples/time_vs_T60.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import gpuRIR
gpuRIR.activateMixedPrecision(False)
gpuRIR.activateLUT(False)

T60_vec = np.arange(0.1, 2.2, 0.2) # Reverberation times to measure
nb_test_per_point = 10 # Number of simulations per T60 to average the runtime
Expand Down
1 change: 1 addition & 0 deletions examples/time_vs_nbRIRs.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import gpuRIR
gpuRIR.activateMixedPrecision(False)
gpuRIR.activateLUT(False)

nb_src_vec = np.concatenate([2**np.arange(12), [4094]]) # Number of RIRs to measure
nb_test_per_point = 10 # Number of simulations per T60 to average the runtime
Expand Down
13 changes: 12 additions & 1 deletion gpuRIR/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from gpuRIR_bind import gpuRIR_bind

__all__ = ["mic_patterns", "beta_SabineEstimation", "att2t_SabineEstimator", "t2n", "simulateRIR", "simulateTrajectory", "activate_mixed_precision"]
__all__ = ["mic_patterns", "beta_SabineEstimation", "att2t_SabineEstimator", "t2n", "simulateRIR", "simulateTrajectory", "activate_mixed_precision", "activate_lut"]

mic_patterns = {
"omni": 0,
Expand Down Expand Up @@ -212,5 +212,16 @@ def activateMixedPrecision(activate=True):
'''
gpuRIR_bind_simulator.activate_mixed_precision_bind(activate)

def activateLUT(activate=True):
''' Activate the lookup table for the sinc computations.
Parameters
----------
activate : bool, optional
True for activate and Flase for deactivate. True by default.
'''
gpuRIR_bind_simulator.activate_lut_bind(activate)

# Create the simulator object when the module is loaded
gpuRIR_bind_simulator = gpuRIR_bind()
83 changes: 68 additions & 15 deletions src/gpuRIR_cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -115,9 +115,9 @@ __device__ __forceinline__ scalar_t sinc(scalar_t x) {
return (x==0)? 1 : sinf(x)/x;
}

__device__ __forceinline__ scalar_t image_sample(scalar_t amp, scalar_t tau, scalar_t t, int Tw_2, cudaTextureObject_t sinc_lut, float lut_center) {
__device__ __forceinline__ scalar_t image_sample(scalar_t amp, scalar_t tau, scalar_t t, scalar_t Tw) {
scalar_t t_tau = t - tau;
return (abs(t_tau)<Tw_2)? amp * tex1D<scalar_t>(sinc_lut, __fmaf_rz(t_tau,lut_oversamp,lut_center)) : 0.0f;
return (abs(t_tau)<Tw/2)? hanning_window(t_tau, Tw) * amp * sinc( (t_tau) * PI ) : 0.0f;
}

__device__ __forceinline__ scalar_t SabineT60( scalar_t room_sz_x, scalar_t room_sz_y, scalar_t room_sz_z,
Expand Down Expand Up @@ -228,6 +228,15 @@ __device__ __forceinline__ half2 image_sample_mp(half2 amp, scalar_t tau, scalar

#endif

/******************************************/
/* Lookup table auxiliar device functions */
/******************************************/

__device__ __forceinline__ scalar_t image_sample_lut(scalar_t amp, scalar_t tau, scalar_t t, int Tw_2, cudaTextureObject_t sinc_lut, float lut_center) {
scalar_t t_tau = t - tau;
return (abs(t_tau)<Tw_2)? amp * tex1D<scalar_t>(sinc_lut, __fmaf_rz(t_tau,lut_oversamp,lut_center)) : 0.0f;
}

/***********/
/* KERNELS */
/***********/
Expand Down Expand Up @@ -339,7 +348,7 @@ __global__ void calcAmpTau_kernel(scalar_t* g_amp /*[M_src]M_rcv][nb_img_x][nb_i
}
}

__global__ void generateRIR_kernel(scalar_t* initialRIR, scalar_t* amp, scalar_t* tau, int T, int M, int N, int iniRIR_N, int ini_red, int Tw_2, cudaTextureObject_t sinc_lut, float lut_center) {
__global__ void generateRIR_kernel(scalar_t* initialRIR, scalar_t* amp, scalar_t* tau, int T, int M, int N, int iniRIR_N, int ini_red, scalar_t Tw) {
int t = blockIdx.x * blockDim.x + threadIdx.x;
int m = blockIdx.y * blockDim.y + threadIdx.y;
int n_ini = blockIdx.z * ini_red;
Expand All @@ -348,7 +357,7 @@ __global__ void generateRIR_kernel(scalar_t* initialRIR, scalar_t* amp, scalar_t
if (m<M && t<T) {
scalar_t loc_sum = 0;
for (int n=n_ini; n<n_max; n++) {
loc_sum += image_sample(amp[m*N+n], tau[m*N+n], t, Tw_2, sinc_lut, lut_center);
loc_sum += image_sample(amp[m*N+n], tau[m*N+n], t, Tw);
}
initialRIR[m*T*iniRIR_N + t*iniRIR_N + blockIdx.z] = loc_sum;
}
Expand Down Expand Up @@ -517,6 +526,25 @@ __global__ void h2RIR_to_floatRIR_kernel(half2* h2RIR, scalar_t* floatRIR, int M
#endif
}

/************************/
/* Lookup table KERNELS */
/************************/

__global__ void generateRIR_kernel_lut(scalar_t* initialRIR, scalar_t* amp, scalar_t* tau, int T, int M, int N, int iniRIR_N, int ini_red, int Tw_2, cudaTextureObject_t sinc_lut, float lut_center) {
int t = blockIdx.x * blockDim.x + threadIdx.x;
int m = blockIdx.y * blockDim.y + threadIdx.y;
int n_ini = blockIdx.z * ini_red;
int n_max = fminf(n_ini + ini_red, N);

if (m<M && t<T) {
scalar_t loc_sum = 0;
for (int n=n_ini; n<n_max; n++) {
loc_sum += image_sample_lut(amp[m*N+n], tau[m*N+n], t, Tw_2, sinc_lut, lut_center);
}
initialRIR[m*T*iniRIR_N + t*iniRIR_N + blockIdx.z] = loc_sum;
}
}

/***************************/
/* Auxiliar host functions */
/***************************/
Expand Down Expand Up @@ -573,16 +601,25 @@ void gpuRIR_cuda::cuda_rirGenerator(scalar_t* rir, scalar_t* amp, scalar_t* tau,
gpuErrchk( cudaMalloc(&initialRIR, M*T*iniRIR_N*sizeof(scalar_t)) );

int Tw = (int) round(8e-3f * Fs); // Window duration [samples]
int lut_len = Tw * lut_oversamp;
lut_len += ((lut_len%2)? 0 : 1); // Must be odd
cudaArray* cuArrayLut;
cudaTextureObject_t sinc_lut = create_sinc_texture_lut(&cuArrayLut, Tw, lut_len);

generateRIR_kernel<<<numBlocksIni, threadsPerBlockIni>>>( initialRIR, amp, tau, T, M, N, iniRIR_N, initialReduction, Tw/2, sinc_lut, lut_len/2+0.5 );
gpuErrchk( cudaDeviceSynchronize() );
gpuErrchk( cudaPeekAtLastError() );
cudaDestroyTextureObject(sinc_lut);
cudaFreeArray(cuArrayLut);
if (lookup_table) {
int lut_len = Tw * lut_oversamp;
lut_len += ((lut_len%2)? 0 : 1); // Must be odd
cudaArray* cuArrayLut;
cudaTextureObject_t sinc_lut = create_sinc_texture_lut(&cuArrayLut, Tw, lut_len);

generateRIR_kernel_lut<<<numBlocksIni, threadsPerBlockIni>>>( initialRIR, amp, tau, T, M, N, iniRIR_N, initialReduction, Tw/2, sinc_lut, lut_len/2+0.5 );
gpuErrchk( cudaDeviceSynchronize() );
gpuErrchk( cudaPeekAtLastError() );

cudaDestroyTextureObject(sinc_lut);
cudaFreeArray(cuArrayLut);
} else {
generateRIR_kernel<<<numBlocksIni, threadsPerBlockIni>>>( initialRIR, amp, tau, T, M, N, iniRIR_N, initialReduction, Tw );
gpuErrchk( cudaDeviceSynchronize() );
gpuErrchk( cudaPeekAtLastError() );
}


dim3 threadsPerBlockRed(nThreadsRed, 1, 1);
scalar_t* intermediateRIR;
Expand Down Expand Up @@ -949,14 +986,15 @@ scalar_t* gpuRIR_cuda::cuda_convolutions(scalar_t* source_segments, int M_src, i
return convolved_segments;
}

gpuRIR_cuda::gpuRIR_cuda(bool mPrecision) {
gpuRIR_cuda::gpuRIR_cuda(bool mPrecision, bool lut) {
// Get CUDA architecture
cudaDeviceProp prop;
cudaGetDeviceProperties(&prop, 0);
cuda_arch = prop.major*100 + prop.minor*10;

// Activate mixed precision if selected
// Activate mixed precision and lut if selected
activate_mixed_precision(mPrecision);
activate_lut(lut);

// Initiate CUDA runtime API
scalar_t* memPtr_warmup;
Expand All @@ -975,10 +1013,25 @@ gpuRIR_cuda::gpuRIR_cuda(bool mPrecision) {

bool gpuRIR_cuda::activate_mixed_precision(bool activate) {
if (cuda_arch >= 530) {
if (activate && lookup_table) {
printf("The mixed precision implementation is not compatible with the lookup table.");
printf("Dissabling the lookup table.");
lookup_table = false;
}
mixed_precision = activate;
} else {
if (activate) printf("This feature requires Pascal GPU architecture or higher.\n");
mixed_precision = false;
}
return mixed_precision;
}

bool gpuRIR_cuda::activate_lut(bool activate) {
if (activate && mixed_precision) {
printf("The lookup table is not compatible with the mixed precision implementation.");
printf("Disabling the mixed precision implementation.");
mixed_precision = false;
}
lookup_table = activate;
return lookup_table;
}
6 changes: 5 additions & 1 deletion src/gpuRIR_cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,22 @@ struct cuRandGeneratorWrapper_t;
class gpuRIR_cuda {

public:
gpuRIR_cuda(bool);
gpuRIR_cuda(bool, bool);

scalar_t* cuda_simulateRIR(scalar_t[3], scalar_t[6], scalar_t*, int, scalar_t*, scalar_t*, micPattern, int, int[3], scalar_t, scalar_t, scalar_t, scalar_t);
scalar_t* cuda_convolutions(scalar_t*, int, int,scalar_t*, int, int);
bool activate_mixed_precision(bool);
bool activate_lut(bool);

private:
// cuRAND generator
static cuRandGeneratorWrapper_t cuRandGenWrap; // I'm not able to compile if I include the cuda headers here... so I have to hide the cuRAND generator

// Mixed precision flag
bool mixed_precision;

// Lookup table flag
bool lookup_table;

// Auxiliar host functions
void cuda_rirGenerator(scalar_t*, scalar_t*, scalar_t*, int, int, int, scalar_t);
Expand Down
12 changes: 9 additions & 3 deletions src/python_bind.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,15 @@ namespace py = pybind11;

class gpuRIR_bind {
public:
gpuRIR_bind(bool mPrecision=false) : mixed_precision(mPrecision), gpuRIR_cuda_simulator(mPrecision) {};
gpuRIR_bind(bool mPrecision=false, bool lut=true) : mixed_precision(mPrecision), lookup_table(lut), gpuRIR_cuda_simulator(mPrecision, lut) {};

py::array simulateRIR_bind(std::vector<scalar_t>, std::vector<scalar_t>, py::array_t<scalar_t, py::array::c_style>, py::array_t<scalar_t, py::array::c_style>, py::array_t<scalar_t, py::array::c_style>, micPattern, std::vector<int> ,scalar_t, scalar_t, scalar_t, scalar_t);
py::array gpu_conv(py::array_t<scalar_t, py::array::c_style>, py::array_t<scalar_t, py::array::c_style>);
bool activate_mixed_precision_bind(bool);
bool activate_lut_bind(bool);

bool mixed_precision;
bool lookup_table;

private:
gpuRIR_cuda gpuRIR_cuda_simulator;
Expand Down Expand Up @@ -108,6 +110,10 @@ bool gpuRIR_bind::activate_mixed_precision_bind(bool activate) {
gpuRIR_cuda_simulator.activate_mixed_precision(activate);
}

bool gpuRIR_bind::activate_lut_bind(bool activate) {
gpuRIR_cuda_simulator.activate_lut(activate);
}


PYBIND11_MODULE(gpuRIR_bind,m)
{
Expand All @@ -120,6 +126,6 @@ PYBIND11_MODULE(gpuRIR_bind,m)
py::arg("Fs"), py::arg("c")=343.0f )
.def("gpu_conv", &gpuRIR_bind::gpu_conv, "Batched convolution using FFTs in GPU", py::arg("source_segments"), py::arg("RIR"))
.def("activate_mixed_precision_bind", &gpuRIR_bind::activate_mixed_precision_bind, "Activate the mixed precision mode, only for Pascal GPU architecture or superior",
py::arg("activate"));

py::arg("activate"))
.def("activate_lut_bind", &gpuRIR_bind::activate_lut_bind, "Activate the lookup table", py::arg("activate"));
}

0 comments on commit 21e8252

Please sign in to comment.