diff --git a/src/lib/cpp/cpu/connected_components.cc b/src/lib/cpp/cpu/connected_components.cc
index 49a1af2..41ed68f 100644
--- a/src/lib/cpp/cpu/connected_components.cc
+++ b/src/lib/cpp/cpu/connected_components.cc
@@ -277,7 +277,7 @@ void canonical_names_and_size(const std::string &path, int64_t *__restrict__ out
     fclose(file);
 }
 
-int64_t connected_components(const std::string &base_path, std::vector<int64_t> &n_labels, const idx3d &global_shape, const bool verbose) {
+int64_t connected_components(const std::string &base_path, std::vector<int64_t> &n_labels, const idx3d &total_shape, const idx3d &global_shape, const bool verbose) {
     auto cc_start = std::chrono::high_resolution_clock::now();
     // Check if the call is well-formed
     int64_t chunks = n_labels.size();
diff --git a/src/lib/cpp/cpu_seq/connected_components.cc b/src/lib/cpp/cpu_seq/connected_components.cc
index 93bcf16..f54d1fd 100644
--- a/src/lib/cpp/cpu_seq/connected_components.cc
+++ b/src/lib/cpp/cpu_seq/connected_components.cc
@@ -3,7 +3,7 @@
 namespace cpu_seq {
 
 #pragma GCC diagnostic ignored "-Wunused-parameter"
-int64_t connected_components(const std::string &base_path, std::vector<int64_t> &n_labels, const idx3d &global_shape, const bool verbose) {
+int64_t connected_components(const std::string &base_path, std::vector<int64_t> &n_labels, const idx3d &total_shape, const idx3d &global_shape, const bool verbose) {
     throw std::runtime_error("Not implemented");
 }
 
diff --git a/src/lib/cpp/gpu/connected_components.cc b/src/lib/cpp/gpu/connected_components.cc
index 512dae0..f220f0f 100644
--- a/src/lib/cpp/gpu/connected_components.cc
+++ b/src/lib/cpp/gpu/connected_components.cc
@@ -3,7 +3,7 @@
 namespace gpu {
 
 #pragma GCC diagnostic ignored "-Wunused-parameter"
-int64_t connected_components(const std::string &base_path, std::vector<int64_t> &n_labels, const idx3d &global_shape, const bool verbose) {
+int64_t connected_components(const std::string &base_path, std::vector<int64_t> &n_labels, const idx3d &total_shape, const idx3d &global_shape, const bool verbose) {
     throw std::runtime_error("Not implemented");
 }
 
diff --git a/src/lib/cpp/include/connected_components.hh b/src/lib/cpp/include/connected_components.hh
index 27e5bb3..3f2a872 100644
--- a/src/lib/cpp/include/connected_components.hh
+++ b/src/lib/cpp/include/connected_components.hh
@@ -6,7 +6,7 @@
 namespace NS {
 
     // External Functions
-    int64_t connected_components(const std::string &base_path, std::vector<int64_t> &n_labels, const idx3d &global_shape, const bool verbose = false);
+    int64_t connected_components(const std::string &base_path, std::vector<int64_t> &n_labels, const idx3d &total_shape, const idx3d &global_shape, const bool verbose = false);
 
     // Internal Functions
     void apply_renaming(std::vector<int64_t> &img, const std::vector<int64_t> &to_rename);
diff --git a/src/processing_steps/0600_segment_implant_cc.py b/src/processing_steps/0600_segment_implant_cc.py
index 1fbe7b6..43b9299 100644
--- a/src/processing_steps/0600_segment_implant_cc.py
+++ b/src/processing_steps/0600_segment_implant_cc.py
@@ -2,15 +2,17 @@
 sys.path.append(sys.path[0]+"/../")
 from config.constants import *
 from config.paths import hdf5_root, binary_root
+from functools import partial
 from lib.py.helpers import commandline_args, update_hdf5_mask
 from lib.cpp.cpu.io import load_slice
+from lib.cpp.cpu.connected_components import connected_components
+import multiprocessing as mp
 
 NA = np.newaxis
 
-sample, scale, chunk_size, verbose = commandline_args({"sample" : "<required>",
-                                                       "scale" : 8,
-                                                       "chunk_size" : 256,
-                                                       "verbose" : 1})
+sample, scale, verbose = commandline_args({"sample"  : "<required>",
+                                           "scale"   : 8,
+                                           "verbose" : 1})
 
 # Load metadata. TODO: Clean up, make automatic function.
 meta_filename = f"{hdf5_root}/hdf5-byte/msb/{sample}.h5"
@@ -19,6 +21,8 @@
 full_Nz, Ny, Nx = h5meta['voxels'].shape    # Full image resolution
 Nz         = full_Nz - np.sum(vm_shifts)    # Full volume matched image resolution
 nz,ny,nx   = np.array([Nz,Ny,Nx])//scale    # Volume matched image resolution at chosen scale
+intermediate_folder = f"{binary_root}/labels/{scale}x/"
+os.makedirs(intermediate_folder, exist_ok=True)
 
 voxel_size   = h5meta['voxels'].attrs['voxelsize'] * scale
 global_vmin = np.min(h5meta['subvolume_range'][:,0])
@@ -26,44 +30,86 @@
 values      = np.linspace(global_vmin,global_vmax,2**16)
 implant_threshold_u16 = np.argmin(np.abs(values-implant_threshold))
 
-if verbose >= 1: print(f"Reading metadata from {meta_filename}.\n"+
-      f"volume_matching_shifts = {vm_shifts}\n"+
-      f"full_Nz,Ny,Nx = {full_Nz,Ny,Nx}\n"+
-      f"Nz            = {Nz}\n"+
-      f"nz,ny,nx      = {nz,ny,nx}\n"+
-      f"voxel_size    = {voxel_size}\n"+
-      f"vmin,vmax     = {global_vmin,global_vmax}\n"+
-      f"Implant threshold {implant_threshold} -> {implant_threshold_u16} as uint16")
+# Automatic chunk size calculation.
+# Should be that fmod(log2(n_chunks),1.0) == 0 and chunk_size * n_cores < available memory
+layer_size = ny*nx
+hyperthreading = True # TODO check if hyperthreading is enabled
+n_cores = mp.cpu_count() // (2 if hyperthreading else 1) # Only count physical cores
+available_memory = 1024**3 * 64 # 64 GB
+memory_per_core  = available_memory // n_cores
+elements_per_core = memory_per_core // 8 # 8 bytes per element
+layers_per_core = elements_per_core // layer_size
+n_chunks = int(2**np.ceil(np.log2(nz // layers_per_core)))
+layers_per_chunk = nz // n_chunks
+chunk_size_elements = layers_per_chunk * layer_size
+chunk_size_bytes = chunk_size_elements * 8
+
+if verbose >= 1: print(f"""
+    Reading metadata from {meta_filename}.
+    volume_matching_shifts = {vm_shifts}
+    full_Nz,Ny,Nx = {full_Nz,Ny,Nx}
+    Nz            = {Nz}
+    nz,ny,nx      = {nz,ny,nx}
+    voxel_size    = {voxel_size}
+    vmin,vmax     = {global_vmin,global_vmax}
+    Implant threshold {implant_threshold} -> {implant_threshold_u16} as uint16
+    Layers per core = {layers_per_core}
+    Chunk size     = {chunk_size_bytes / 1024**2} MB
+    Layers per chunk = {layers_per_chunk}
+    Number of chunks = {n_chunks}
+    Number of cores = {n_cores}
+""")
 h5meta.close()
 
-noisy_implant = np.empty((nz,ny,nx),dtype=bool)
-voxel_chunk   = np.empty((chunk_size,ny,nx),dtype=np.uint16)
+if layers_per_core > nz:
+    voxels = np.empty((nz,ny,nx),dtype=np.uint16)
+    load_slice(voxels, f"{binary_root}/voxels/{scale}x/{sample}.uint16", (0,0,0), (nz,ny,nx))
+    noisy_implant = (voxels > implant_threshold_u16)
+    del voxels
+    label, n_features = ndi.label(noisy_implant, output=np.int64)
+    bincnts           = np.bincount(label[label>0],minlength=n_features+1)
+    largest_cc_ix     = np.argmax(bincnts)
+    implant_mask      = (label == largest_cc_ix)
+else:
+    use_cache = False
+
+    if use_cache:
+        n_labels = np.fromfile(f"{intermediate_folder}/{sample}_n_labels.int64", dtype=np.int64)
+    else:
+        def label_chunk(i, chunk_size, chunk_prefix, implant_threshold_u16, global_shape):
+            start = i*chunk_size
+            end   = (i+1)*chunk_size if i < n_chunks-1 else nz # Last chunk gets the rest
+            chunk_length = end-start
+            voxel_chunk   = np.empty((chunk_length,ny,nx),dtype=np.uint16)
+            load_slice(voxel_chunk, f"{binary_root}/voxels/{scale}x/{sample}.uint16", (start,0,0), global_shape)
+            noisy_implant = (voxel_chunk > implant_threshold_u16)
+            del voxel_chunk
+            label, n_features = ndi.label(noisy_implant, output=np.int64)
+            label.tofile(f'{chunk_prefix}{i}.int64')
+            del label
+            return n_features
 
-for z in tqdm.tqdm(range(0,nz,chunk_size),"Loading and thresholding voxels"):
-    chunk_length = min(chunk_size,nz-z)
-    load_slice(voxel_chunk, f"{binary_root}/voxels/{scale}x/{sample}.uint16",
-               (z,0,0), (nz,ny,nx))
-    noisy_implant[z:z+chunk_length] = (voxel_chunk[:chunk_length] > implant_threshold_u16)
+        with mp.Pool(n_cores) as pool:
+            label_chunk_partial = partial(label_chunk, chunk_size=layers_per_chunk, chunk_prefix=f"{intermediate_folder}/{sample}_", implant_threshold_u16=implant_threshold_u16, global_shape=(nz,ny,nx))
+            n_labels = pool.map(label_chunk_partial, range(n_chunks))
 
-if verbose >= 1: print(f"Computing connected components")
+        np.array(n_labels, dtype=np.int64).tofile(f"{intermediate_folder}/{sample}_n_labels.int64")
 
-label, n_features = ndi.label(noisy_implant)
-if verbose >= 1: print(f"Counting component volumes")
-bincnts           = np.bincount(label[label>0],minlength=n_features+1)
+    new_labels = connected_components(f"{intermediate_folder}/{sample}_", n_labels, (nz,ny,nx), (layers_per_chunk,ny,nx), True)
 
-largest_cc_ix     = np.argmax(bincnts)
-implant_mask=(label==largest_cc_ix)
+    print (new_labels)
+
+    quit()
 
 output_dir = f"{hdf5_root}/masks/{scale}x/"
 pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True)
 if verbose >= 1: print(f"Writing largest connected component to {output_dir}/{sample}.h5")
 
 update_hdf5_mask(f"{output_dir}/{sample}.h5",
-                 group_name="implant",
-                 datasets={'mask':implant_mask},
-                 attributes={'scale':scale,'voxel_size':voxel_size,
-                             'sample':sample, 'name':"implant_mask"})
-
+                group_name="implant",
+                datasets={'mask':implant_mask},
+                attributes={'scale':scale,'voxel_size':voxel_size,
+                            'sample':sample, 'name':"implant_mask"})
 
 # np.savez_compressed(f"{output_dir}/{sample}",mask=mask, scale=scale,voxel_size=voxel_size,
 #                     sample=sample, name="implant_mask")
diff --git a/src/pybind/connected_components-pybind.cc b/src/pybind/connected_components-pybind.cc
index 98d927d..edb18ce 100644
--- a/src/pybind/connected_components-pybind.cc
+++ b/src/pybind/connected_components-pybind.cc
@@ -2,15 +2,17 @@
 
 namespace python_api {
 
-    void connected_components(const std::string &base_path, np_array<int64_t> &py_n_labels, const std::tuple<int64_t, int64_t, int64_t> &py_global_shape, const bool verbose = false) {
+    int64_t connected_components(const std::string &base_path, np_array<int64_t> &py_n_labels, const std::tuple<int64_t, int64_t, int64_t> &py_total_shape, const std::tuple<int64_t, int64_t, int64_t> &py_global_shape, const bool verbose = false) {
         auto n_labels_info = py_n_labels.request();
         int64_t *n_labels = static_cast<int64_t*>(n_labels_info.ptr);
 
         std::vector<int64_t> n_labels_vec(n_labels, n_labels + n_labels_info.size);
 
-        const idx3d global_shape = {std::get<0>(py_global_shape), std::get<1>(py_global_shape), std::get<2>(py_global_shape)};
+        const idx3d
+            total_shape = {std::get<0>(py_total_shape), std::get<1>(py_total_shape), std::get<2>(py_total_shape)},
+            global_shape = {std::get<0>(py_global_shape), std::get<1>(py_global_shape), std::get<2>(py_global_shape)};
 
-        NS::connected_components(base_path, n_labels_vec, global_shape, verbose);
+        return NS::connected_components(base_path, n_labels_vec, total_shape, global_shape, verbose);
     }
 
 }
@@ -18,5 +20,5 @@ namespace python_api {
 PYBIND11_MODULE(connected_components, m) {
     m.doc() = "Connected Components"; // optional module docstring
 
-    m.def("connected_components", &python_api::connected_components, py::arg("base_path"), py::arg("np_n_labels"), py::arg("global_shape"), py::arg("verbose") = false);
+    m.def("connected_components", &python_api::connected_components, py::arg("base_path"), py::arg("np_n_labels"), py::arg("total_shape"), py::arg("global_shape"), py::arg("verbose") = false);
 }
\ No newline at end of file