Skip to content

Commit

Permalink
Preliminary work on faster gaussian filter #5
Browse files Browse the repository at this point in the history
  • Loading branch information
carljohnsen committed May 4, 2022
1 parent c27b48e commit e600be3
Show file tree
Hide file tree
Showing 2 changed files with 175 additions and 0 deletions.
93 changes: 93 additions & 0 deletions src/generate_gauss_c.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import pybind_kernels.histograms as histograms
#!/usr/bin/env python3
import sys
sys.path.append(sys.path[0]+"/../")
from matplotlib import image
import pybind_kernels.histograms as histograms
import numpy as np, h5py, timeit
from datetime import datetime
from PIL import Image
from tqdm import tqdm
from config.paths import hdf5_root_fast, commandline_args
from math import pi, sqrt, exp
from scipy import ndimage as ndi

def gauss(n=11,sigma=1):
r = range(-int(n/2),int(n/2)+1)
g = [1 / (sigma * sqrt(2*pi)) * exp(-float(x)**2/(2*sigma**2)) for x in r]
return np.array(g, dtype=np.float32)

def _gaussian_kernel1d(sigma, order, radius):
"""
Computes a 1-D Gaussian convolution kernel.
"""
if order < 0:
raise ValueError('order must be non-negative')
exponent_range = np.arange(order + 1)
sigma2 = sigma * sigma
x = np.arange(-radius, radius+1)
phi_x = np.exp(-0.5 / sigma2 * x ** 2)
phi_x = phi_x / phi_x.sum()

if order == 0:
return phi_x
else:
# f(x) = q(x) * phi(x) = q(x) * exp(p(x))
# f'(x) = (q'(x) + q(x) * p'(x)) * phi(x)
# p'(x) = -1 / sigma ** 2
# Implement q'(x) + q(x) * p'(x) as a matrix operator and apply to the
# coefficients of q(x)
q = np.zeros(order + 1)
q[0] = 1
D = np.diag(exponent_range[1:], 1) # D @ q(x) = q'(x)
P = np.diag(np.ones(order)/-sigma2, -1) # P @ q(x) = q(x) * p'(x)
Q_deriv = D + P
for _ in range(order):
q = Q_deriv.dot(q)
q = (x[:, None] ** exponent_range).dot(q)
return q * phi_x

def tobyt(arr):
mi, ma = arr.min(), arr.max()
return (((arr - mi) / (ma - mi + 1)) * 255).astype(np.uint8)

if __name__ == '__main__':
sample, scale = commandline_args({"sample":"<required>","scale":1})
outpath = 'dummy'
display = 0
sigma = 5

vf = h5py.File(f'{hdf5_root_fast}/processed/implant-edt/{scale}x/{sample}.h5', 'r')
voxels = vf['voxels'][500:501,:,:]
#vmax = np.max(vf['voxels'])
vf.close()

print (voxels.shape)

Image.fromarray(tobyt(voxels[display,:,:])).save(f"{outpath}/original.png")

vmax = voxels.max();
implant_mask = voxels >= vmax
implant_mask = implant_mask.astype(np.float32)

print (vmax)

Image.fromarray(tobyt(implant_mask[display,:,:])).save(f"{outpath}/masked.png")

#kernel = gauss(51,13)
radius = int(4.0 * float(sigma) + .5)
kernel = _gaussian_kernel1d(sigma, 0, radius)
#kernel = gauss(radius*2+1, sigma)

result = np.zeros_like(implant_mask)
start = timeit.default_timer()
histograms.gauss_filter_par_cpu(implant_mask, implant_mask.shape, kernel, 1, result)
print (f'Parallel C edition: {timeit.default_timer() - start}')

Image.fromarray(tobyt(result[display,:,:])).save(f'{outpath}/gauss1.png')

start = timeit.default_timer()
control = ndi.gaussian_filter(implant_mask, sigma, mode='constant')
print (f'ndimage edition: {timeit.default_timer() - start}')

Image.fromarray(tobyt(control[display,:,:])).save(f'{outpath}/control1.png')
82 changes: 82 additions & 0 deletions src/pybind_kernels/histograms.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,87 @@ typedef uint16_t field_type;
#define VALID_VOXEL(voxel) (voxel != 0) /* Voxel not masked */
#define GB_VOXEL ((1024 / sizeof(voxel_type)) * 1024 * 1024)

void gauss_filter_par_cpu(const py::array_t<float> np_voxels,
const tuple<uint64_t, uint64_t, uint64_t> shape,
const py::array_t<float> np_kernel,
const uint64_t reps,
py::array_t<float> &np_result) {
auto
voxels_info = np_voxels.request(),
kernel_info = np_kernel.request(),
result_info = np_result.request();

const float
*voxels = static_cast<const float*>(voxels_info.ptr),
*kernel = static_cast<const float*>(kernel_info.ptr);

float
*result = static_cast<float*>(result_info.ptr);

auto [Nz, Ny, Nx] = shape; // global shape TODO for blocked edition

int64_t
kernel_size = kernel_info.size,
padding = kernel_size / 2, // kx should always be odd
// Partial shape
Pz = voxels_info.shape[0],
Py = voxels_info.shape[1],
Px = voxels_info.shape[2],
// Result shape
Rz = result_info.shape[0],
Ry = result_info.shape[1],
Rx = result_info.shape[2];

float
*tmp0 = (float *) malloc(sizeof(float) * Rz*Ry*Rx),
*tmp1 = (float *) malloc(sizeof(float) * Rz*Ry*Rx);

memcpy(tmp0, voxels, Rz*Ry*Rx * sizeof(float));

for (uint64_t rep = 0; rep < reps; rep++) {
float *tin, *tout;
if (rep % 2 == 1) {
tin = tmp1;
tout = tmp0;
} else {
tin = tmp0;
tout = tmp1;
}

//#pragma omp parallel for
for (int64_t z = 0; z < Pz; z++) {
for (int64_t y = 0; y < Py; y++) {
for (int64_t x = 0; x < Px; x++) {
float tmp = 0;
//#pragma omp simd reduction(+:tmp)
for (int64_t i = -padding; i < padding; i++) {
if (z+i > 0 && z+i < Pz) {
uint64_t voxel_index_z = (z+i)*Py*Px + y*Px + x;
tmp += tin[voxel_index_z] * kernel[i+padding];
}
if (y+i > 0 && y+i < Py) {
uint64_t voxel_index_y = z*Py*Px + (y+i)*Px + x;
tmp += tin[voxel_index_y] * kernel[i+padding];
}
if (x+i > 0 && x+i < Px) {
uint64_t voxel_index_x = z*Py*Px + y*Px + (x+i);
tmp += tin[voxel_index_x] * kernel[i+padding];
}
//printf("%ld, %ld, %ld, %ld\n", i, z+i, y+i, x+i);
}

uint64_t flat_index = z*Ry*Rx + y*Rx + x;
tout[flat_index] = tmp;
}
}
}
}

memcpy(result, reps % 2 == 1 ? tmp1 : tmp0, Rz*Ry*Rx * sizeof(float));
free(tmp0);
free(tmp1);
}

pair<int,int> masked_minmax(const py::array_t<voxel_type> np_voxels) {
// Extract NumPy array basearray-pointer and length
auto voxels_info = np_voxels.request();
Expand Down Expand Up @@ -790,4 +871,5 @@ PYBIND11_MODULE(histograms, m) {
m.def("field_histogram_resample_par_cpu", &field_histogram_resample_par_cpu);
m.def("masked_minmax", &masked_minmax);
m.def("float_minmax", &float_minmax);
m.def("gauss_filter_par_cpu", &gauss_filter_par_cpu);
}

0 comments on commit e600be3

Please sign in to comment.