diff --git a/src/lib/cpp/cpu/connected_components.cc b/src/lib/cpp/cpu/connected_components.cc index 49a1af2..41ed68f 100644 --- a/src/lib/cpp/cpu/connected_components.cc +++ b/src/lib/cpp/cpu/connected_components.cc @@ -277,7 +277,7 @@ void canonical_names_and_size(const std::string &path, int64_t *__restrict__ out fclose(file); } -int64_t connected_components(const std::string &base_path, std::vector &n_labels, const idx3d &global_shape, const bool verbose) { +int64_t connected_components(const std::string &base_path, std::vector &n_labels, const idx3d &total_shape, const idx3d &global_shape, const bool verbose) { auto cc_start = std::chrono::high_resolution_clock::now(); // Check if the call is well-formed int64_t chunks = n_labels.size(); diff --git a/src/lib/cpp/cpu_seq/connected_components.cc b/src/lib/cpp/cpu_seq/connected_components.cc index 93bcf16..f54d1fd 100644 --- a/src/lib/cpp/cpu_seq/connected_components.cc +++ b/src/lib/cpp/cpu_seq/connected_components.cc @@ -3,7 +3,7 @@ namespace cpu_seq { #pragma GCC diagnostic ignored "-Wunused-parameter" -int64_t connected_components(const std::string &base_path, std::vector &n_labels, const idx3d &global_shape, const bool verbose) { +int64_t connected_components(const std::string &base_path, std::vector &n_labels, const idx3d &total_shape, const idx3d &global_shape, const bool verbose) { throw std::runtime_error("Not implemented"); } diff --git a/src/lib/cpp/gpu/connected_components.cc b/src/lib/cpp/gpu/connected_components.cc index 512dae0..f220f0f 100644 --- a/src/lib/cpp/gpu/connected_components.cc +++ b/src/lib/cpp/gpu/connected_components.cc @@ -3,7 +3,7 @@ namespace gpu { #pragma GCC diagnostic ignored "-Wunused-parameter" -int64_t connected_components(const std::string &base_path, std::vector &n_labels, const idx3d &global_shape, const bool verbose) { +int64_t connected_components(const std::string &base_path, std::vector &n_labels, const idx3d &total_shape, const idx3d &global_shape, const bool verbose) { throw std::runtime_error("Not implemented"); } diff --git a/src/lib/cpp/include/connected_components.hh b/src/lib/cpp/include/connected_components.hh index 27e5bb3..3f2a872 100644 --- a/src/lib/cpp/include/connected_components.hh +++ b/src/lib/cpp/include/connected_components.hh @@ -6,7 +6,7 @@ namespace NS { // External Functions - int64_t connected_components(const std::string &base_path, std::vector &n_labels, const idx3d &global_shape, const bool verbose = false); + int64_t connected_components(const std::string &base_path, std::vector &n_labels, const idx3d &total_shape, const idx3d &global_shape, const bool verbose = false); // Internal Functions void apply_renaming(std::vector &img, const std::vector &to_rename); diff --git a/src/processing_steps/0600_segment_implant_cc.py b/src/processing_steps/0600_segment_implant_cc.py index 1fbe7b6..43b9299 100644 --- a/src/processing_steps/0600_segment_implant_cc.py +++ b/src/processing_steps/0600_segment_implant_cc.py @@ -2,15 +2,17 @@ sys.path.append(sys.path[0]+"/../") from config.constants import * from config.paths import hdf5_root, binary_root +from functools import partial from lib.py.helpers import commandline_args, update_hdf5_mask from lib.cpp.cpu.io import load_slice +from lib.cpp.cpu.connected_components import connected_components +import multiprocessing as mp NA = np.newaxis -sample, scale, chunk_size, verbose = commandline_args({"sample" : "", - "scale" : 8, - "chunk_size" : 256, - "verbose" : 1}) +sample, scale, verbose = commandline_args({"sample" : "", + "scale" : 8, + "verbose" : 1}) # Load metadata. TODO: Clean up, make automatic function. meta_filename = f"{hdf5_root}/hdf5-byte/msb/{sample}.h5" @@ -19,6 +21,8 @@ full_Nz, Ny, Nx = h5meta['voxels'].shape # Full image resolution Nz = full_Nz - np.sum(vm_shifts) # Full volume matched image resolution nz,ny,nx = np.array([Nz,Ny,Nx])//scale # Volume matched image resolution at chosen scale +intermediate_folder = f"{binary_root}/labels/{scale}x/" +os.makedirs(intermediate_folder, exist_ok=True) voxel_size = h5meta['voxels'].attrs['voxelsize'] * scale global_vmin = np.min(h5meta['subvolume_range'][:,0]) @@ -26,44 +30,86 @@ values = np.linspace(global_vmin,global_vmax,2**16) implant_threshold_u16 = np.argmin(np.abs(values-implant_threshold)) -if verbose >= 1: print(f"Reading metadata from {meta_filename}.\n"+ - f"volume_matching_shifts = {vm_shifts}\n"+ - f"full_Nz,Ny,Nx = {full_Nz,Ny,Nx}\n"+ - f"Nz = {Nz}\n"+ - f"nz,ny,nx = {nz,ny,nx}\n"+ - f"voxel_size = {voxel_size}\n"+ - f"vmin,vmax = {global_vmin,global_vmax}\n"+ - f"Implant threshold {implant_threshold} -> {implant_threshold_u16} as uint16") +# Automatic chunk size calculation. +# Should be that fmod(log2(n_chunks),1.0) == 0 and chunk_size * n_cores < available memory +layer_size = ny*nx +hyperthreading = True # TODO check if hyperthreading is enabled +n_cores = mp.cpu_count() // (2 if hyperthreading else 1) # Only count physical cores +available_memory = 1024**3 * 64 # 64 GB +memory_per_core = available_memory // n_cores +elements_per_core = memory_per_core // 8 # 8 bytes per element +layers_per_core = elements_per_core // layer_size +n_chunks = int(2**np.ceil(np.log2(nz // layers_per_core))) +layers_per_chunk = nz // n_chunks +chunk_size_elements = layers_per_chunk * layer_size +chunk_size_bytes = chunk_size_elements * 8 + +if verbose >= 1: print(f""" + Reading metadata from {meta_filename}. + volume_matching_shifts = {vm_shifts} + full_Nz,Ny,Nx = {full_Nz,Ny,Nx} + Nz = {Nz} + nz,ny,nx = {nz,ny,nx} + voxel_size = {voxel_size} + vmin,vmax = {global_vmin,global_vmax} + Implant threshold {implant_threshold} -> {implant_threshold_u16} as uint16 + Layers per core = {layers_per_core} + Chunk size = {chunk_size_bytes / 1024**2} MB + Layers per chunk = {layers_per_chunk} + Number of chunks = {n_chunks} + Number of cores = {n_cores} +""") h5meta.close() -noisy_implant = np.empty((nz,ny,nx),dtype=bool) -voxel_chunk = np.empty((chunk_size,ny,nx),dtype=np.uint16) +if layers_per_core > nz: + voxels = np.empty((nz,ny,nx),dtype=np.uint16) + load_slice(voxels, f"{binary_root}/voxels/{scale}x/{sample}.uint16", (0,0,0), (nz,ny,nx)) + noisy_implant = (voxels > implant_threshold_u16) + del voxels + label, n_features = ndi.label(noisy_implant, output=np.int64) + bincnts = np.bincount(label[label>0],minlength=n_features+1) + largest_cc_ix = np.argmax(bincnts) + implant_mask = (label == largest_cc_ix) +else: + use_cache = False + + if use_cache: + n_labels = np.fromfile(f"{intermediate_folder}/{sample}_n_labels.int64", dtype=np.int64) + else: + def label_chunk(i, chunk_size, chunk_prefix, implant_threshold_u16, global_shape): + start = i*chunk_size + end = (i+1)*chunk_size if i < n_chunks-1 else nz # Last chunk gets the rest + chunk_length = end-start + voxel_chunk = np.empty((chunk_length,ny,nx),dtype=np.uint16) + load_slice(voxel_chunk, f"{binary_root}/voxels/{scale}x/{sample}.uint16", (start,0,0), global_shape) + noisy_implant = (voxel_chunk > implant_threshold_u16) + del voxel_chunk + label, n_features = ndi.label(noisy_implant, output=np.int64) + label.tofile(f'{chunk_prefix}{i}.int64') + del label + return n_features -for z in tqdm.tqdm(range(0,nz,chunk_size),"Loading and thresholding voxels"): - chunk_length = min(chunk_size,nz-z) - load_slice(voxel_chunk, f"{binary_root}/voxels/{scale}x/{sample}.uint16", - (z,0,0), (nz,ny,nx)) - noisy_implant[z:z+chunk_length] = (voxel_chunk[:chunk_length] > implant_threshold_u16) + with mp.Pool(n_cores) as pool: + label_chunk_partial = partial(label_chunk, chunk_size=layers_per_chunk, chunk_prefix=f"{intermediate_folder}/{sample}_", implant_threshold_u16=implant_threshold_u16, global_shape=(nz,ny,nx)) + n_labels = pool.map(label_chunk_partial, range(n_chunks)) -if verbose >= 1: print(f"Computing connected components") + np.array(n_labels, dtype=np.int64).tofile(f"{intermediate_folder}/{sample}_n_labels.int64") -label, n_features = ndi.label(noisy_implant) -if verbose >= 1: print(f"Counting component volumes") -bincnts = np.bincount(label[label>0],minlength=n_features+1) + new_labels = connected_components(f"{intermediate_folder}/{sample}_", n_labels, (nz,ny,nx), (layers_per_chunk,ny,nx), True) -largest_cc_ix = np.argmax(bincnts) -implant_mask=(label==largest_cc_ix) + print (new_labels) + + quit() output_dir = f"{hdf5_root}/masks/{scale}x/" pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True) if verbose >= 1: print(f"Writing largest connected component to {output_dir}/{sample}.h5") update_hdf5_mask(f"{output_dir}/{sample}.h5", - group_name="implant", - datasets={'mask':implant_mask}, - attributes={'scale':scale,'voxel_size':voxel_size, - 'sample':sample, 'name':"implant_mask"}) - + group_name="implant", + datasets={'mask':implant_mask}, + attributes={'scale':scale,'voxel_size':voxel_size, + 'sample':sample, 'name':"implant_mask"}) # np.savez_compressed(f"{output_dir}/{sample}",mask=mask, scale=scale,voxel_size=voxel_size, # sample=sample, name="implant_mask") diff --git a/src/pybind/connected_components-pybind.cc b/src/pybind/connected_components-pybind.cc index 98d927d..edb18ce 100644 --- a/src/pybind/connected_components-pybind.cc +++ b/src/pybind/connected_components-pybind.cc @@ -2,15 +2,17 @@ namespace python_api { - void connected_components(const std::string &base_path, np_array &py_n_labels, const std::tuple &py_global_shape, const bool verbose = false) { + int64_t connected_components(const std::string &base_path, np_array &py_n_labels, const std::tuple &py_total_shape, const std::tuple &py_global_shape, const bool verbose = false) { auto n_labels_info = py_n_labels.request(); int64_t *n_labels = static_cast(n_labels_info.ptr); std::vector n_labels_vec(n_labels, n_labels + n_labels_info.size); - const idx3d global_shape = {std::get<0>(py_global_shape), std::get<1>(py_global_shape), std::get<2>(py_global_shape)}; + const idx3d + total_shape = {std::get<0>(py_total_shape), std::get<1>(py_total_shape), std::get<2>(py_total_shape)}, + global_shape = {std::get<0>(py_global_shape), std::get<1>(py_global_shape), std::get<2>(py_global_shape)}; - NS::connected_components(base_path, n_labels_vec, global_shape, verbose); + return NS::connected_components(base_path, n_labels_vec, total_shape, global_shape, verbose); } } @@ -18,5 +20,5 @@ namespace python_api { PYBIND11_MODULE(connected_components, m) { m.doc() = "Connected Components"; // optional module docstring - m.def("connected_components", &python_api::connected_components, py::arg("base_path"), py::arg("np_n_labels"), py::arg("global_shape"), py::arg("verbose") = false); + m.def("connected_components", &python_api::connected_components, py::arg("base_path"), py::arg("np_n_labels"), py::arg("total_shape"), py::arg("global_shape"), py::arg("verbose") = false); } \ No newline at end of file