From ed3c47f321ba7cffa04e5798f2b258f7eceb0de3 Mon Sep 17 00:00:00 2001 From: Carl Johnsen Date: Tue, 10 Sep 2024 09:36:15 +0200 Subject: [PATCH] #5 Added padding for all dimensions, as this is a requirement for the optimized 1d diffusions for each axis. --- src/lib/cpp/gpu/diffusion.cc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/lib/cpp/gpu/diffusion.cc b/src/lib/cpp/gpu/diffusion.cc index be3c621..880f999 100644 --- a/src/lib/cpp/gpu/diffusion.cc +++ b/src/lib/cpp/gpu/diffusion.cc @@ -567,7 +567,11 @@ namespace gpu { void diffusion_in_memory(const uint8_t *__restrict__ voxels, const shape_t &N, const float *__restrict__ kernel, const int64_t kernel_size, const int64_t repititions, uint16_t *__restrict__ output) { constexpr int32_t veclen = 32; // TODO - const shape_t P = { N.z, N.y, (N.x + veclen-1) / veclen * veclen }; + const shape_t P = { + ((N.z + veclen-1) / veclen) * veclen, + ((N.y + veclen-1) / veclen) * veclen, + ((N.x + veclen-1) / veclen) * veclen + }; const int64_t padded_size = P.z*P.y*P.x, total_size = N.z*N.y*N.x,