diff --git a/src/generate_gauss_c.py b/src/generate_gauss_c.py new file mode 100644 index 0000000..d1923fc --- /dev/null +++ b/src/generate_gauss_c.py @@ -0,0 +1,93 @@ +import pybind_kernels.histograms as histograms +#!/usr/bin/env python3 +import sys +sys.path.append(sys.path[0]+"/../") +from matplotlib import image +import pybind_kernels.histograms as histograms +import numpy as np, h5py, timeit +from datetime import datetime +from PIL import Image +from tqdm import tqdm +from config.paths import hdf5_root_fast, commandline_args +from math import pi, sqrt, exp +from scipy import ndimage as ndi + +def gauss(n=11,sigma=1): + r = range(-int(n/2),int(n/2)+1) + g = [1 / (sigma * sqrt(2*pi)) * exp(-float(x)**2/(2*sigma**2)) for x in r] + return np.array(g, dtype=np.float32) + +def _gaussian_kernel1d(sigma, order, radius): + """ + Computes a 1-D Gaussian convolution kernel. + """ + if order < 0: + raise ValueError('order must be non-negative') + exponent_range = np.arange(order + 1) + sigma2 = sigma * sigma + x = np.arange(-radius, radius+1) + phi_x = np.exp(-0.5 / sigma2 * x ** 2) + phi_x = phi_x / phi_x.sum() + + if order == 0: + return phi_x + else: + # f(x) = q(x) * phi(x) = q(x) * exp(p(x)) + # f'(x) = (q'(x) + q(x) * p'(x)) * phi(x) + # p'(x) = -1 / sigma ** 2 + # Implement q'(x) + q(x) * p'(x) as a matrix operator and apply to the + # coefficients of q(x) + q = np.zeros(order + 1) + q[0] = 1 + D = np.diag(exponent_range[1:], 1) # D @ q(x) = q'(x) + P = np.diag(np.ones(order)/-sigma2, -1) # P @ q(x) = q(x) * p'(x) + Q_deriv = D + P + for _ in range(order): + q = Q_deriv.dot(q) + q = (x[:, None] ** exponent_range).dot(q) + return q * phi_x + +def tobyt(arr): + mi, ma = arr.min(), arr.max() + return (((arr - mi) / (ma - mi + 1)) * 255).astype(np.uint8) + +if __name__ == '__main__': + sample, scale = commandline_args({"sample":"","scale":1}) + outpath = 'dummy' + display = 0 + sigma = 5 + + vf = h5py.File(f'{hdf5_root_fast}/processed/implant-edt/{scale}x/{sample}.h5', 'r') + voxels = vf['voxels'][500:501,:,:] + #vmax = np.max(vf['voxels']) + vf.close() + + print (voxels.shape) + + Image.fromarray(tobyt(voxels[display,:,:])).save(f"{outpath}/original.png") + + vmax = voxels.max(); + implant_mask = voxels >= vmax + implant_mask = implant_mask.astype(np.float32) + + print (vmax) + + Image.fromarray(tobyt(implant_mask[display,:,:])).save(f"{outpath}/masked.png") + + #kernel = gauss(51,13) + radius = int(4.0 * float(sigma) + .5) + kernel = _gaussian_kernel1d(sigma, 0, radius) + #kernel = gauss(radius*2+1, sigma) + + result = np.zeros_like(implant_mask) + start = timeit.default_timer() + histograms.gauss_filter_par_cpu(implant_mask, implant_mask.shape, kernel, 1, result) + print (f'Parallel C edition: {timeit.default_timer() - start}') + + Image.fromarray(tobyt(result[display,:,:])).save(f'{outpath}/gauss1.png') + + start = timeit.default_timer() + control = ndi.gaussian_filter(implant_mask, sigma, mode='constant') + print (f'ndimage edition: {timeit.default_timer() - start}') + + Image.fromarray(tobyt(control[display,:,:])).save(f'{outpath}/control1.png') diff --git a/src/pybind_kernels/histograms.cc b/src/pybind_kernels/histograms.cc index d4a1f96..89215dd 100644 --- a/src/pybind_kernels/histograms.cc +++ b/src/pybind_kernels/histograms.cc @@ -20,6 +20,87 @@ typedef uint16_t field_type; #define VALID_VOXEL(voxel) (voxel != 0) /* Voxel not masked */ #define GB_VOXEL ((1024 / sizeof(voxel_type)) * 1024 * 1024) +void gauss_filter_par_cpu(const py::array_t np_voxels, + const tuple shape, + const py::array_t np_kernel, + const uint64_t reps, + py::array_t &np_result) { + auto + voxels_info = np_voxels.request(), + kernel_info = np_kernel.request(), + result_info = np_result.request(); + + const float + *voxels = static_cast(voxels_info.ptr), + *kernel = static_cast(kernel_info.ptr); + + float + *result = static_cast(result_info.ptr); + + auto [Nz, Ny, Nx] = shape; // global shape TODO for blocked edition + + int64_t + kernel_size = kernel_info.size, + padding = kernel_size / 2, // kx should always be odd + // Partial shape + Pz = voxels_info.shape[0], + Py = voxels_info.shape[1], + Px = voxels_info.shape[2], + // Result shape + Rz = result_info.shape[0], + Ry = result_info.shape[1], + Rx = result_info.shape[2]; + + float + *tmp0 = (float *) malloc(sizeof(float) * Rz*Ry*Rx), + *tmp1 = (float *) malloc(sizeof(float) * Rz*Ry*Rx); + + memcpy(tmp0, voxels, Rz*Ry*Rx * sizeof(float)); + + for (uint64_t rep = 0; rep < reps; rep++) { + float *tin, *tout; + if (rep % 2 == 1) { + tin = tmp1; + tout = tmp0; + } else { + tin = tmp0; + tout = tmp1; + } + + //#pragma omp parallel for + for (int64_t z = 0; z < Pz; z++) { + for (int64_t y = 0; y < Py; y++) { + for (int64_t x = 0; x < Px; x++) { + float tmp = 0; + //#pragma omp simd reduction(+:tmp) + for (int64_t i = -padding; i < padding; i++) { + if (z+i > 0 && z+i < Pz) { + uint64_t voxel_index_z = (z+i)*Py*Px + y*Px + x; + tmp += tin[voxel_index_z] * kernel[i+padding]; + } + if (y+i > 0 && y+i < Py) { + uint64_t voxel_index_y = z*Py*Px + (y+i)*Px + x; + tmp += tin[voxel_index_y] * kernel[i+padding]; + } + if (x+i > 0 && x+i < Px) { + uint64_t voxel_index_x = z*Py*Px + y*Px + (x+i); + tmp += tin[voxel_index_x] * kernel[i+padding]; + } + //printf("%ld, %ld, %ld, %ld\n", i, z+i, y+i, x+i); + } + + uint64_t flat_index = z*Ry*Rx + y*Rx + x; + tout[flat_index] = tmp; + } + } + } + } + + memcpy(result, reps % 2 == 1 ? tmp1 : tmp0, Rz*Ry*Rx * sizeof(float)); + free(tmp0); + free(tmp1); +} + pair masked_minmax(const py::array_t np_voxels) { // Extract NumPy array basearray-pointer and length auto voxels_info = np_voxels.request(); @@ -790,4 +871,5 @@ PYBIND11_MODULE(histograms, m) { m.def("field_histogram_resample_par_cpu", &field_histogram_resample_par_cpu); m.def("masked_minmax", &masked_minmax); m.def("float_minmax", &float_minmax); + m.def("gauss_filter_par_cpu", &gauss_filter_par_cpu); }