From 801b9b5d2c75b3dc8df94f95ba9ff009b2cabe3d Mon Sep 17 00:00:00 2001 From: DavidDiazGuerra Date: Tue, 10 Sep 2019 09:18:25 +0000 Subject: [PATCH] Performance optimization --- src/gpuRIR_cuda.cu | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/gpuRIR_cuda.cu b/src/gpuRIR_cuda.cu index 4b17e43..332233f 100644 --- a/src/gpuRIR_cuda.cu +++ b/src/gpuRIR_cuda.cu @@ -216,9 +216,11 @@ __device__ __forceinline__ half2 my_h2sinc(half2 x) { } -__device__ __forceinline__ half2 image_sample_mp(half2 amp, scalar_t tau, scalar_t t1, scalar_t t2, half2 Tw_2, half2 Tw_inv) { - half2 t_tau = __floats2half2_rn(t1-tau, t2-tau); - if (__hble2(h2abs(t_tau), Tw_2)) { +__device__ __forceinline__ half2 image_sample_mp(half2 amp, scalar_t tau, scalar_t t1, scalar_t t2, scalar_t Tw_2, half2 Tw_inv) { + scalar_t t1_tau = t1-tau; + scalar_t t2_tau = t2-tau; + half2 t_tau = __floats2half2_rn(t1_tau, t2_tau); + if (abs(t1_tau)= 530 int t = blockIdx.x * blockDim.x + threadIdx.x; @@ -466,7 +467,7 @@ __global__ void generateRIR_mp_kernel(half2* initialRIR, scalar_t* amp, scalar_t scalar_t loc_tim_2 = 2*t+1; for (int n=n_ini; n>>( initialRIR, amp, tau, T/2, M, N, iniRIR_N, initialReduction, Fs, Tw_2, Tw_inv );