From 656865a63ccf7e4fdc0b2e0a3385e4a34f438068 Mon Sep 17 00:00:00 2001 From: Carl Johnsen Date: Mon, 9 Sep 2024 15:31:30 +0200 Subject: [PATCH] #5 Handled correctly calling the optimized diffusion functions (radius < 16 (i.e. kernel <= 31)) --- src/lib/cpp/gpu/diffusion.cc | 15 +++++++++------ src/test/test_diffusion.py | 6 +++--- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/src/lib/cpp/gpu/diffusion.cc b/src/lib/cpp/gpu/diffusion.cc index 05dfa8d..e95fe2b 100644 --- a/src/lib/cpp/gpu/diffusion.cc +++ b/src/lib/cpp/gpu/diffusion.cc @@ -538,12 +538,15 @@ namespace gpu { } void diffusion_step(const uint8_t *__restrict__ voxels, float *buf0, float *buf1, const shape_t &N, const shape_t &P, const float *__restrict__ kernel, const int64_t radius) { - diffusion_core(buf0, kernel, buf1, P, 0, radius); - //diffusion_core_z(buf0, kernel, buf1, P, radius); - diffusion_core(buf1, kernel, buf0, P, 1, radius); - //diffusion_core_y(buf1, kernel, buf0, P, radius); - diffusion_core(buf0, kernel, buf1, P, 2, radius); - //diffusion_core_x(buf0, kernel, buf1, P, radius); + if (radius < 16) { + diffusion_core_z(buf0, kernel, buf1, P, radius); + diffusion_core_y(buf1, kernel, buf0, P, radius); + diffusion_core_x(buf0, kernel, buf1, P, radius); + } else { + diffusion_core(buf0, kernel, buf1, P, 0, radius); + diffusion_core(buf1, kernel, buf0, P, 1, radius); + diffusion_core(buf0, kernel, buf1, P, 2, radius); + } std::swap(buf0, buf1); illuminate(voxels, buf0, N, P); diff --git a/src/test/test_diffusion.py b/src/test/test_diffusion.py index 4e32ffb..0646505 100644 --- a/src/test/test_diffusion.py +++ b/src/test/test_diffusion.py @@ -15,9 +15,9 @@ from lib.cpp.cpu.diffusion import diffusion as diffusion_cpu from lib.cpp.gpu.diffusion import diffusion as diffusion_gpu -n = 333 -sigma = 3 # Radius has to be <= 16 for the GPU implementation -reps = 1 +n = 128 +sigma = 3 # Radius has to be <= 16 for the faster GPU implementation +reps = 100 plot = False run_py = True