Skip to content

Commit

Permalink
Seems close enough #5
Browse files Browse the repository at this point in the history
  • Loading branch information
carljohnsen committed May 8, 2022
1 parent 2416c66 commit 4f1755c
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 84 deletions.
84 changes: 25 additions & 59 deletions src/generate_gauss_c.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,42 +12,7 @@
from math import pi, sqrt, exp
from scipy import ndimage as ndi

# From stack overflow
def gauss(n=11,sigma=1):
r = range(-int(n/2),int(n/2)+1)
g = [1 / (sigma * sqrt(2*pi)) * exp(-float(x)**2/(2*sigma**2)) for x in r]
return np.array(g, dtype=np.float32)

# A copy of the scipy implementation
def _gaussian_kernel1d(sigma, order, radius):
"""
Computes a 1-D Gaussian convolution kernel.
"""
if order < 0:
raise ValueError('order must be non-negative')
exponent_range = np.arange(order + 1)
sigma2 = sigma * sigma
x = np.arange(-radius, radius+1)
phi_x = np.exp(-0.5 / sigma2 * x ** 2)
phi_x = phi_x / phi_x.sum()

if order == 0:
return phi_x
else:
# f(x) = q(x) * phi(x) = q(x) * exp(p(x))
# f'(x) = (q'(x) + q(x) * p'(x)) * phi(x)
# p'(x) = -1 / sigma ** 2
# Implement q'(x) + q(x) * p'(x) as a matrix operator and apply to the
# coefficients of q(x)
q = np.zeros(order + 1)
q[0] = 1
D = np.diag(exponent_range[1:], 1) # D @ q(x) = q'(x)
P = np.diag(np.ones(order)/-sigma2, -1) # P @ q(x) = q(x) * p'(x)
Q_deriv = D + P
for _ in range(order):
q = Q_deriv.dot(q)
q = (x[:, None] ** exponent_range).dot(q)
return q * phi_x
impl_type = np.float32

def tobyt(arr):
mi, ma = arr.min(), arr.max()
Expand All @@ -56,47 +21,48 @@ def tobyt(arr):
if __name__ == '__main__':
sample, scale = commandline_args({"sample":"<required>","scale":1})
outpath = 'dummy'
display = 10
sigma = 13
reps =20
sigma = 5
reps = 5

vf = h5py.File(f'{hdf5_root_fast}/processed/implant-edt/{scale}x/{sample}.h5', 'r')
voxels = vf['voxels'][512:576,:,:]
vf.close()

Image.fromarray(tobyt(voxels[display,:,:])).save(f"{outpath}/original.png")
nz,ny,nx = voxels.shape

Image.fromarray(tobyt(voxels[nz//2,:,:])).save(f"{outpath}/original.png")

vmax = voxels.max();
vmax = voxels.max()
implant_mask = voxels >= vmax
implant_mask = implant_mask.astype(np.float32)

Image.fromarray(tobyt(implant_mask[display,:,:])).save(f"{outpath}/masked.png")
Image.fromarray(tobyt(implant_mask[nz//2,:,:].astype(impl_type))).save(f"{outpath}/masked.png")

radius = int(4.0 * float(sigma) + .5) # stolen from the default scipy parameters
kernel = _gaussian_kernel1d(sigma, 0, radius)[::-1]
#kernel = gauss(radius*2+1, sigma)
kernel = ndi.filters._gaussian_kernel1d(sigma, 0, radius).astype(impl_type)

result = np.zeros_like(implant_mask)
result = np.empty(implant_mask.shape,dtype=impl_type)
start = timeit.default_timer()
histograms.gauss_filter_par_cpu(implant_mask, implant_mask.shape, kernel, reps, result)
print (f'Parallel C edition: {timeit.default_timer() - start}')
np.save(f'{outpath}/mine', result)

nz,ny,nx = result.shape
Image.fromarray(tobyt(result[nz//2,:,:])).save(f'{outpath}/gauss-xy.png')
Image.fromarray(tobyt(result[:,ny//2,:])).save(f'{outpath}/gauss-xz.png')
Image.fromarray(tobyt(result[:,:,nx//2])).save(f'{outpath}/gauss-yz.png')
Image.fromarray(tobyt((np.max(np.abs(result),axis=0)!=0).astype(float))).save(f'{outpath}/gauss-xy-nonzero.png')
Image.fromarray(tobyt((np.max(np.abs(result),axis=1)!=0).astype(float))).save(f'{outpath}/gauss-xz-nonzero.png')
Image.fromarray(tobyt((np.max(np.abs(result),axis=2)!=0).astype(float))).save(f'{outpath}/gauss-yz-nonzero.png')

control = implant_mask.copy()
start = timeit.default_timer()
#for _ in range(reps):
# control = ndi.gaussian_filter(control, sigma, mode='constant')
print (f'ndimage edition: {timeit.default_timer() - start}')
#np.save(f'{outpath}/control', control)
Image.fromarray(tobyt(control[display,:,:])).save(f'{outpath}/control1.png')

diff = np.abs(result - control)
print (f'Total difference: {diff.sum()}')
print (f'Max difference: {diff.max()}')
print (f'Mean difference: {diff.mean()}')
if True: # generate ndimage comparison
control = implant_mask.astype(impl_type)
start = timeit.default_timer()
for _ in range(reps):
control[:] = ndi.gaussian_filter(control, sigma, mode='constant')
#control[implant_mask] = 1
print (f'ndimage edition: {timeit.default_timer() - start}')
np.save(f'{outpath}/control', control)
Image.fromarray(tobyt(control[nz//2,:,:])).save(f'{outpath}/control1.png')
diff = np.abs(result - control)
print (f'Total difference: {diff.sum()}')
print (f'Max difference: {diff.max()}')
print (f'Mean difference: {diff.mean()}')
58 changes: 33 additions & 25 deletions src/pybind_kernels/histograms.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ typedef uint16_t voxel_type;
//typedef float field_type;
typedef uint16_t field_type;
typedef uint8_t mask_type;
typedef float gauss_type;

#define INLINE __attribute__((always_inline)) inline

Expand All @@ -23,84 +24,91 @@ typedef uint8_t mask_type;

void gauss_filter_par_cpu(const py::array_t<mask_type> np_voxels,
const tuple<uint64_t, uint64_t, uint64_t> shape,
const py::array_t<float> np_kernel,
const py::array_t<gauss_type> np_kernel,
const uint64_t reps,
py::array_t<float> &np_result) {
py::array_t<gauss_type> &np_result) {
auto
voxels_info = np_voxels.request(),
kernel_info = np_kernel.request(),
result_info = np_result.request();

const mask_type *voxels = static_cast<const mask_type*>(voxels_info.ptr);
const float *kernel = static_cast<const float*>(kernel_info.ptr);
const mask_type *voxels = static_cast<const mask_type*>(voxels_info.ptr);
const gauss_type *kernel = static_cast<const gauss_type*>(kernel_info.ptr);

float *result = static_cast<float*>(result_info.ptr);
gauss_type *result = static_cast<gauss_type*>(result_info.ptr);

auto [Nz, Ny, Nx] = shape; // global shape TODO for blocked edition

int64_t
kernel_size = kernel_info.size,
padding = kernel_size / 2, // kx should always be odd
// Partial shape
// Partial shape
Pz = voxels_info.shape[0],
Py = voxels_info.shape[1],
Px = voxels_info.shape[2],
// Result shape
Rz = result_info.shape[0],
Ry = result_info.shape[1],
Rx = result_info.shape[2];

float
*tmp0 = (float *) calloc(Rz*Ry*Rx, sizeof(float)),
*tmp1 = (float *) calloc(Rz*Ry*Rx, sizeof(float));


assert(kernel_size % 2 == 1);

gauss_type
*tmp0 = (gauss_type *) calloc(Rz*Ry*Rx, sizeof(gauss_type)),
*tmp1 = (gauss_type *) calloc(Rz*Ry*Rx, sizeof(gauss_type));

#pragma omp parallel for
for (int64_t i = 0; i < voxels_info.size; i++) {
tmp0[i] = voxels[i];
tmp0[i] = voxels[i] ? 1 : 0;
}

uint64_t iters = 3 * reps; // 1 pass for each dimension
const int64_t strides[3] = {Py*Px,Px,1};
const int64_t N[3] = {Pz,Py,Px};

for (uint64_t rep = 0; rep < iters; rep++) {
float *tin, *tout;
gauss_type *tin, *tout;
if (rep % 2 == 1) {
tin = tmp1;
tout = tmp0;
} else {
tin = tmp0;
tout = tmp1;
}
int64_t dim = rep % 3;

#pragma omp parallel for collapse(3)
#pragma omp parallel for// collapse(3)
for (int64_t z = 0; z < Pz; z++) {
for (int64_t y = 0; y < Py; y++) {
for (int64_t x = 0; x < Px; x++) {
float sum = 0;
int64_t dim = rep % 3;
int64_t output_index = z*strides[0] + y*strides[1] + x*strides[2];
int64_t X[3] = {z,y,x};

const int64_t output_index = z*Ry*Rx + y*Rx + x;
const int64_t strides[3] = {Py*Px,Px,1};
const int64_t X[3] = {z,y,x};
const int64_t N[3] = {Pz,Py,Px};

const int64_t stride = strides[dim], i_start = -min(padding,X[dim]-1), i_end = min(padding,N[dim]-X[dim]-1);
int64_t
stride = strides[dim],
i_start = -min(padding,X[dim]),
i_end = min(padding,N[dim]-X[dim]-1);

auto mask_value = voxels[output_index];
if (mask_value) {
if (0 && dim % 3 == 2 && mask_value) {
tout[output_index] = 1;
} else {
gauss_type sum = 0;

#pragma omp simd reduction(+:sum)
for (int64_t i = i_start; i <= i_end; i++) {
uint64_t voxel_index = output_index + stride*i;
int64_t voxel_index = output_index + stride*i;
sum += tin[voxel_index] * kernel[i+padding];
}

tout[output_index] = sum;
}
}
}
}
}

memcpy(result, iters % 2 == 1 ? tmp1 : tmp0, Rz*Ry*Rx * sizeof(float));
memcpy(result, iters % 2 == 1 ? tmp1 : tmp0, Rz*Ry*Rx * sizeof(gauss_type));
free(tmp0);
free(tmp1);
}
Expand Down

0 comments on commit 4f1755c

Please sign in to comment.