Skip to content

Commit

Permalink
#37 Bugfixes related to the relayed shapes
Browse files Browse the repository at this point in the history
  • Loading branch information
carljohnsen committed Mar 18, 2024
1 parent a49067a commit f07d308
Show file tree
Hide file tree
Showing 6 changed files with 86 additions and 38 deletions.
2 changes: 1 addition & 1 deletion src/lib/cpp/cpu/connected_components.cc
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ void canonical_names_and_size(const std::string &path, int64_t *__restrict__ out
fclose(file);
}

int64_t connected_components(const std::string &base_path, std::vector<int64_t> &n_labels, const idx3d &global_shape, const bool verbose) {
int64_t connected_components(const std::string &base_path, std::vector<int64_t> &n_labels, const idx3d &total_shape, const idx3d &global_shape, const bool verbose) {
auto cc_start = std::chrono::high_resolution_clock::now();
// Check if the call is well-formed
int64_t chunks = n_labels.size();
Expand Down
2 changes: 1 addition & 1 deletion src/lib/cpp/cpu_seq/connected_components.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
namespace cpu_seq {

#pragma GCC diagnostic ignored "-Wunused-parameter"
int64_t connected_components(const std::string &base_path, std::vector<int64_t> &n_labels, const idx3d &global_shape, const bool verbose) {
int64_t connected_components(const std::string &base_path, std::vector<int64_t> &n_labels, const idx3d &total_shape, const idx3d &global_shape, const bool verbose) {
throw std::runtime_error("Not implemented");
}

Expand Down
2 changes: 1 addition & 1 deletion src/lib/cpp/gpu/connected_components.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
namespace gpu {

#pragma GCC diagnostic ignored "-Wunused-parameter"
int64_t connected_components(const std::string &base_path, std::vector<int64_t> &n_labels, const idx3d &global_shape, const bool verbose) {
int64_t connected_components(const std::string &base_path, std::vector<int64_t> &n_labels, const idx3d &total_shape, const idx3d &global_shape, const bool verbose) {
throw std::runtime_error("Not implemented");
}

Expand Down
2 changes: 1 addition & 1 deletion src/lib/cpp/include/connected_components.hh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
namespace NS {

// External Functions
int64_t connected_components(const std::string &base_path, std::vector<int64_t> &n_labels, const idx3d &global_shape, const bool verbose = false);
int64_t connected_components(const std::string &base_path, std::vector<int64_t> &n_labels, const idx3d &total_shape, const idx3d &global_shape, const bool verbose = false);

// Internal Functions
void apply_renaming(std::vector<int64_t> &img, const std::vector<int64_t> &to_rename);
Expand Down
106 changes: 76 additions & 30 deletions src/processing_steps/0600_segment_implant_cc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,17 @@
sys.path.append(sys.path[0]+"/../")
from config.constants import *
from config.paths import hdf5_root, binary_root
from functools import partial
from lib.py.helpers import commandline_args, update_hdf5_mask
from lib.cpp.cpu.io import load_slice
from lib.cpp.cpu.connected_components import connected_components
import multiprocessing as mp

NA = np.newaxis

sample, scale, chunk_size, verbose = commandline_args({"sample" : "<required>",
"scale" : 8,
"chunk_size" : 256,
"verbose" : 1})
sample, scale, verbose = commandline_args({"sample" : "<required>",
"scale" : 8,
"verbose" : 1})

# Load metadata. TODO: Clean up, make automatic function.
meta_filename = f"{hdf5_root}/hdf5-byte/msb/{sample}.h5"
Expand All @@ -19,51 +21,95 @@
full_Nz, Ny, Nx = h5meta['voxels'].shape # Full image resolution
Nz = full_Nz - np.sum(vm_shifts) # Full volume matched image resolution
nz,ny,nx = np.array([Nz,Ny,Nx])//scale # Volume matched image resolution at chosen scale
intermediate_folder = f"{binary_root}/labels/{scale}x/"
os.makedirs(intermediate_folder, exist_ok=True)

voxel_size = h5meta['voxels'].attrs['voxelsize'] * scale
global_vmin = np.min(h5meta['subvolume_range'][:,0])
global_vmax = np.max(h5meta['subvolume_range'][:,1])
values = np.linspace(global_vmin,global_vmax,2**16)
implant_threshold_u16 = np.argmin(np.abs(values-implant_threshold))

if verbose >= 1: print(f"Reading metadata from {meta_filename}.\n"+
f"volume_matching_shifts = {vm_shifts}\n"+
f"full_Nz,Ny,Nx = {full_Nz,Ny,Nx}\n"+
f"Nz = {Nz}\n"+
f"nz,ny,nx = {nz,ny,nx}\n"+
f"voxel_size = {voxel_size}\n"+
f"vmin,vmax = {global_vmin,global_vmax}\n"+
f"Implant threshold {implant_threshold} -> {implant_threshold_u16} as uint16")
# Automatic chunk size calculation.
# Should be that fmod(log2(n_chunks),1.0) == 0 and chunk_size * n_cores < available memory
layer_size = ny*nx
hyperthreading = True # TODO check if hyperthreading is enabled
n_cores = mp.cpu_count() // (2 if hyperthreading else 1) # Only count physical cores
available_memory = 1024**3 * 64 # 64 GB
memory_per_core = available_memory // n_cores
elements_per_core = memory_per_core // 8 # 8 bytes per element
layers_per_core = elements_per_core // layer_size
n_chunks = int(2**np.ceil(np.log2(nz // layers_per_core)))
layers_per_chunk = nz // n_chunks
chunk_size_elements = layers_per_chunk * layer_size
chunk_size_bytes = chunk_size_elements * 8

if verbose >= 1: print(f"""
Reading metadata from {meta_filename}.
volume_matching_shifts = {vm_shifts}
full_Nz,Ny,Nx = {full_Nz,Ny,Nx}
Nz = {Nz}
nz,ny,nx = {nz,ny,nx}
voxel_size = {voxel_size}
vmin,vmax = {global_vmin,global_vmax}
Implant threshold {implant_threshold} -> {implant_threshold_u16} as uint16
Layers per core = {layers_per_core}
Chunk size = {chunk_size_bytes / 1024**2} MB
Layers per chunk = {layers_per_chunk}
Number of chunks = {n_chunks}
Number of cores = {n_cores}
""")
h5meta.close()

noisy_implant = np.empty((nz,ny,nx),dtype=bool)
voxel_chunk = np.empty((chunk_size,ny,nx),dtype=np.uint16)
if layers_per_core > nz:
voxels = np.empty((nz,ny,nx),dtype=np.uint16)
load_slice(voxels, f"{binary_root}/voxels/{scale}x/{sample}.uint16", (0,0,0), (nz,ny,nx))
noisy_implant = (voxels > implant_threshold_u16)
del voxels
label, n_features = ndi.label(noisy_implant, output=np.int64)
bincnts = np.bincount(label[label>0],minlength=n_features+1)
largest_cc_ix = np.argmax(bincnts)
implant_mask = (label == largest_cc_ix)
else:
use_cache = False

if use_cache:
n_labels = np.fromfile(f"{intermediate_folder}/{sample}_n_labels.int64", dtype=np.int64)
else:
def label_chunk(i, chunk_size, chunk_prefix, implant_threshold_u16, global_shape):
start = i*chunk_size
end = (i+1)*chunk_size if i < n_chunks-1 else nz # Last chunk gets the rest
chunk_length = end-start
voxel_chunk = np.empty((chunk_length,ny,nx),dtype=np.uint16)
load_slice(voxel_chunk, f"{binary_root}/voxels/{scale}x/{sample}.uint16", (start,0,0), global_shape)
noisy_implant = (voxel_chunk > implant_threshold_u16)
del voxel_chunk
label, n_features = ndi.label(noisy_implant, output=np.int64)
label.tofile(f'{chunk_prefix}{i}.int64')
del label
return n_features

for z in tqdm.tqdm(range(0,nz,chunk_size),"Loading and thresholding voxels"):
chunk_length = min(chunk_size,nz-z)
load_slice(voxel_chunk, f"{binary_root}/voxels/{scale}x/{sample}.uint16",
(z,0,0), (nz,ny,nx))
noisy_implant[z:z+chunk_length] = (voxel_chunk[:chunk_length] > implant_threshold_u16)
with mp.Pool(n_cores) as pool:
label_chunk_partial = partial(label_chunk, chunk_size=layers_per_chunk, chunk_prefix=f"{intermediate_folder}/{sample}_", implant_threshold_u16=implant_threshold_u16, global_shape=(nz,ny,nx))
n_labels = pool.map(label_chunk_partial, range(n_chunks))

if verbose >= 1: print(f"Computing connected components")
np.array(n_labels, dtype=np.int64).tofile(f"{intermediate_folder}/{sample}_n_labels.int64")

label, n_features = ndi.label(noisy_implant)
if verbose >= 1: print(f"Counting component volumes")
bincnts = np.bincount(label[label>0],minlength=n_features+1)
new_labels = connected_components(f"{intermediate_folder}/{sample}_", n_labels, (nz,ny,nx), (layers_per_chunk,ny,nx), True)

largest_cc_ix = np.argmax(bincnts)
implant_mask=(label==largest_cc_ix)
print (new_labels)

quit()

output_dir = f"{hdf5_root}/masks/{scale}x/"
pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True)
if verbose >= 1: print(f"Writing largest connected component to {output_dir}/{sample}.h5")

update_hdf5_mask(f"{output_dir}/{sample}.h5",
group_name="implant",
datasets={'mask':implant_mask},
attributes={'scale':scale,'voxel_size':voxel_size,
'sample':sample, 'name':"implant_mask"})

group_name="implant",
datasets={'mask':implant_mask},
attributes={'scale':scale,'voxel_size':voxel_size,
'sample':sample, 'name':"implant_mask"})

# np.savez_compressed(f"{output_dir}/{sample}",mask=mask, scale=scale,voxel_size=voxel_size,
# sample=sample, name="implant_mask")
10 changes: 6 additions & 4 deletions src/pybind/connected_components-pybind.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,23 @@

namespace python_api {

void connected_components(const std::string &base_path, np_array<int64_t> &py_n_labels, const std::tuple<int64_t, int64_t, int64_t> &py_global_shape, const bool verbose = false) {
int64_t connected_components(const std::string &base_path, np_array<int64_t> &py_n_labels, const std::tuple<int64_t, int64_t, int64_t> &py_total_shape, const std::tuple<int64_t, int64_t, int64_t> &py_global_shape, const bool verbose = false) {
auto n_labels_info = py_n_labels.request();
int64_t *n_labels = static_cast<int64_t*>(n_labels_info.ptr);

std::vector<int64_t> n_labels_vec(n_labels, n_labels + n_labels_info.size);

const idx3d global_shape = {std::get<0>(py_global_shape), std::get<1>(py_global_shape), std::get<2>(py_global_shape)};
const idx3d
total_shape = {std::get<0>(py_total_shape), std::get<1>(py_total_shape), std::get<2>(py_total_shape)},
global_shape = {std::get<0>(py_global_shape), std::get<1>(py_global_shape), std::get<2>(py_global_shape)};

NS::connected_components(base_path, n_labels_vec, global_shape, verbose);
return NS::connected_components(base_path, n_labels_vec, total_shape, global_shape, verbose);
}

}

PYBIND11_MODULE(connected_components, m) {
m.doc() = "Connected Components"; // optional module docstring

m.def("connected_components", &python_api::connected_components, py::arg("base_path"), py::arg("np_n_labels"), py::arg("global_shape"), py::arg("verbose") = false);
m.def("connected_components", &python_api::connected_components, py::arg("base_path"), py::arg("np_n_labels"), py::arg("total_shape"), py::arg("global_shape"), py::arg("verbose") = false);
}

0 comments on commit f07d308

Please sign in to comment.