diff --git a/src/generate_gauss_c.py b/src/generate_gauss_c.py index d1923fc..d5ff127 100644 --- a/src/generate_gauss_c.py +++ b/src/generate_gauss_c.py @@ -12,11 +12,13 @@ from math import pi, sqrt, exp from scipy import ndimage as ndi +# From stack overflow def gauss(n=11,sigma=1): r = range(-int(n/2),int(n/2)+1) g = [1 / (sigma * sqrt(2*pi)) * exp(-float(x)**2/(2*sigma**2)) for x in r] return np.array(g, dtype=np.float32) +# A copy of the scipy implementation def _gaussian_kernel1d(sigma, order, radius): """ Computes a 1-D Gaussian convolution kernel. @@ -54,40 +56,43 @@ def tobyt(arr): if __name__ == '__main__': sample, scale = commandline_args({"sample":"","scale":1}) outpath = 'dummy' - display = 0 - sigma = 5 + display = 500 + sigma = 13 + reps = 20 vf = h5py.File(f'{hdf5_root_fast}/processed/implant-edt/{scale}x/{sample}.h5', 'r') - voxels = vf['voxels'][500:501,:,:] - #vmax = np.max(vf['voxels']) + voxels = vf['voxels'][:,:,:] vf.close() - print (voxels.shape) - Image.fromarray(tobyt(voxels[display,:,:])).save(f"{outpath}/original.png") vmax = voxels.max(); implant_mask = voxels >= vmax implant_mask = implant_mask.astype(np.float32) - print (vmax) - Image.fromarray(tobyt(implant_mask[display,:,:])).save(f"{outpath}/masked.png") - #kernel = gauss(51,13) - radius = int(4.0 * float(sigma) + .5) - kernel = _gaussian_kernel1d(sigma, 0, radius) + radius = int(4.0 * float(sigma) + .5) # stolen from the default scipy parameters + kernel = _gaussian_kernel1d(sigma, 0, radius)[::-1] #kernel = gauss(radius*2+1, sigma) result = np.zeros_like(implant_mask) start = timeit.default_timer() - histograms.gauss_filter_par_cpu(implant_mask, implant_mask.shape, kernel, 1, result) + histograms.gauss_filter_par_cpu(implant_mask, implant_mask.shape, kernel, reps, result) print (f'Parallel C edition: {timeit.default_timer() - start}') + np.save(f'{outpath}/mine', result) Image.fromarray(tobyt(result[display,:,:])).save(f'{outpath}/gauss1.png') + control = implant_mask.copy() start = timeit.default_timer() - control = ndi.gaussian_filter(implant_mask, sigma, mode='constant') + for _ in range(reps): + control = ndi.gaussian_filter(control, sigma, mode='constant') print (f'ndimage edition: {timeit.default_timer() - start}') - + np.save(f'{outpath}/control', control) Image.fromarray(tobyt(control[display,:,:])).save(f'{outpath}/control1.png') + + diff = np.abs(result - control) + print (f'Total difference: {diff.sum()}') + print (f'Max difference: {diff.max()}') + print (f'Mean difference: {diff.mean()}') \ No newline at end of file diff --git a/src/pybind_kernels/histograms.cc b/src/pybind_kernels/histograms.cc index 89215dd..3e6d9f7 100644 --- a/src/pybind_kernels/histograms.cc +++ b/src/pybind_kernels/histograms.cc @@ -56,8 +56,9 @@ void gauss_filter_par_cpu(const py::array_t np_voxels, *tmp1 = (float *) malloc(sizeof(float) * Rz*Ry*Rx); memcpy(tmp0, voxels, Rz*Ry*Rx * sizeof(float)); + uint64_t iters = 3 * reps; // 1 pass for each dimension - for (uint64_t rep = 0; rep < reps; rep++) { + for (uint64_t rep = 0; rep < iters; rep++) { float *tin, *tout; if (rep % 2 == 1) { tin = tmp1; @@ -67,26 +68,34 @@ void gauss_filter_par_cpu(const py::array_t np_voxels, tout = tmp1; } - //#pragma omp parallel for + #pragma omp parallel for collapse(3) for (int64_t z = 0; z < Pz; z++) { for (int64_t y = 0; y < Py; y++) { for (int64_t x = 0; x < Px; x++) { float tmp = 0; - //#pragma omp simd reduction(+:tmp) - for (int64_t i = -padding; i < padding; i++) { - if (z+i > 0 && z+i < Pz) { - uint64_t voxel_index_z = (z+i)*Py*Px + y*Px + x; - tmp += tin[voxel_index_z] * kernel[i+padding]; + uint64_t dim = rep % 3; + + if (dim == 0) { // z dimension + for (int64_t i = -padding; i < padding; i++) { + if (z+i >= 0 && z+i < Pz) { + uint64_t voxel_index_z = (z+i)*Py*Px + y*Px + x; + tmp += tin[voxel_index_z] * kernel[i+padding]; + } } - if (y+i > 0 && y+i < Py) { - uint64_t voxel_index_y = z*Py*Px + (y+i)*Px + x; - tmp += tin[voxel_index_y] * kernel[i+padding]; + } else if (dim == 1) { // y dimension + for (int64_t i = -padding; i < padding; i++) { + if (y+i >= 0 && y+i < Py) { + uint64_t voxel_index_y = z*Py*Px + (y+i)*Px + x; + tmp += tin[voxel_index_y] * kernel[i+padding]; + } } - if (x+i > 0 && x+i < Px) { - uint64_t voxel_index_x = z*Py*Px + y*Px + (x+i); - tmp += tin[voxel_index_x] * kernel[i+padding]; + } else if (dim == 2) { // x dimension + for (int64_t i = -padding; i < padding; i++) { + if (x+i >= 0 && x+i < Px) { + uint64_t voxel_index_x = z*Py*Px + y*Px + (x+i); + tmp += tin[voxel_index_x] * kernel[i+padding]; + } } - //printf("%ld, %ld, %ld, %ld\n", i, z+i, y+i, x+i); } uint64_t flat_index = z*Ry*Rx + y*Rx + x; @@ -96,7 +105,7 @@ void gauss_filter_par_cpu(const py::array_t np_voxels, } } - memcpy(result, reps % 2 == 1 ? tmp1 : tmp0, Rz*Ry*Rx * sizeof(float)); + memcpy(result, iters % 2 == 1 ? tmp1 : tmp0, Rz*Ry*Rx * sizeof(float)); free(tmp0); free(tmp1); }