Skip to content

Commit

Permalink
#5 Handled correctly calling the optimized diffusion functions (radiu…
Browse files Browse the repository at this point in the history
…s < 16 (i.e. kernel <= 31))
  • Loading branch information
carljohnsen committed Sep 9, 2024
1 parent 63dad5d commit 656865a
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 9 deletions.
15 changes: 9 additions & 6 deletions src/lib/cpp/gpu/diffusion.cc
Original file line number Diff line number Diff line change
Expand Up @@ -538,12 +538,15 @@ namespace gpu {
}

void diffusion_step(const uint8_t *__restrict__ voxels, float *buf0, float *buf1, const shape_t &N, const shape_t &P, const float *__restrict__ kernel, const int64_t radius) {
diffusion_core(buf0, kernel, buf1, P, 0, radius);
//diffusion_core_z(buf0, kernel, buf1, P, radius);
diffusion_core(buf1, kernel, buf0, P, 1, radius);
//diffusion_core_y(buf1, kernel, buf0, P, radius);
diffusion_core(buf0, kernel, buf1, P, 2, radius);
//diffusion_core_x(buf0, kernel, buf1, P, radius);
if (radius < 16) {
diffusion_core_z(buf0, kernel, buf1, P, radius);
diffusion_core_y(buf1, kernel, buf0, P, radius);
diffusion_core_x(buf0, kernel, buf1, P, radius);
} else {
diffusion_core(buf0, kernel, buf1, P, 0, radius);
diffusion_core(buf1, kernel, buf0, P, 1, radius);
diffusion_core(buf0, kernel, buf1, P, 2, radius);
}
std::swap(buf0, buf1);

illuminate(voxels, buf0, N, P);
Expand Down
6 changes: 3 additions & 3 deletions src/test/test_diffusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@
from lib.cpp.cpu.diffusion import diffusion as diffusion_cpu
from lib.cpp.gpu.diffusion import diffusion as diffusion_gpu

n = 333
sigma = 3 # Radius has to be <= 16 for the GPU implementation
reps = 1
n = 128
sigma = 3 # Radius has to be <= 16 for the faster GPU implementation
reps = 100
plot = False

run_py = True
Expand Down

0 comments on commit 656865a

Please sign in to comment.