|
| 1 | +# %% Import packages |
| 2 | + |
| 3 | +import itertools |
| 4 | +import math |
| 5 | +from pathlib import Path |
| 6 | +import time |
| 7 | +import numpy as np |
| 8 | +from cloudvolume import CloudVolume |
| 9 | +from cloudvolume.lib import touch, Vec |
| 10 | +import zarr |
| 11 | +from neuroglancer.downsample import downsample_with_averaging |
| 12 | + |
| 13 | +# %% Define the path to the files |
| 14 | + |
| 15 | +HERE = Path(__file__).parent |
| 16 | + |
| 17 | +# Paths to change |
| 18 | +INPUTFOLDER = Path("") |
| 19 | +OUTPUT_PATH = Path("") |
| 20 | + |
| 21 | +# Other settings |
| 22 | +OUTPUT_PATH.mkdir(exist_ok=True, parents=True) |
| 23 | +OVERWRITE = False |
| 24 | +NUM_MIPS = 5 |
| 25 | +MIP_CUTOFF = 4 # To save time you can start at the lowest resolution and work up |
| 26 | + |
| 27 | +# %% Load the data |
| 28 | +OUTPUT_PATH.mkdir(exist_ok=True, parents=True) |
| 29 | +# Need to figure out some sizes by checking the data |
| 30 | +all_files = list(INPUTFOLDER.glob("**/*.zarr")) |
| 31 | + |
| 32 | + |
| 33 | +def load_zarr_data(file_path): |
| 34 | + zarr_store = zarr.open(file_path, mode="r") |
| 35 | + return zarr_store |
| 36 | + |
| 37 | + |
| 38 | +def load_zarr_and_permute(file_path): |
| 39 | + zarr_store = zarr.open(file_path, mode="r") |
| 40 | + # Input is in Z, T, C, Y, X order |
| 41 | + # Want XYTCZ order |
| 42 | + data = zarr_store[:] |
| 43 | + data = np.transpose(data, (4, 3, 1, 2, 0)) # Permute to XYTCZ |
| 44 | + return zarr_store, data |
| 45 | + |
| 46 | + |
| 47 | +def load_chunk_from_zarr_store( |
| 48 | + zarr_store, x_start, x_end, y_start, y_end, z_start, z_end, channel=0 |
| 49 | +): |
| 50 | + # Input is in Z, T, C, Y, X order |
| 51 | + data = zarr_store[ |
| 52 | + :, # T |
| 53 | + z_start:z_end, # Z |
| 54 | + channel, # C |
| 55 | + y_start:y_end, # Y |
| 56 | + x_start:x_end, # X |
| 57 | + ] |
| 58 | + # The original timestamp was 65535, can be filterd out |
| 59 | + data = np.where(data == 65535, 0, data) |
| 60 | + print("Loaded data shape b4 sq:", data.shape) |
| 61 | + data = np.squeeze(data) # Remove any singleton dimensions |
| 62 | + print("Loaded data shape:", data.shape) |
| 63 | + # Then we permute to XYTCZ |
| 64 | + data = np.transpose(data, (-1, -2, 0)) # Permute to XYTCZ |
| 65 | + return data |
| 66 | + |
| 67 | + |
| 68 | +zarr_store = load_zarr_data(all_files[0]) |
| 69 | + |
| 70 | +# It may take too long to just load one file, might need to process in chunks |
| 71 | +# %% Check how long to load a single file |
| 72 | +start_time = time.time() |
| 73 | +data = load_chunk_from_zarr_store( |
| 74 | + zarr_store, 0, 256, 0, 256, 0, 128, channel=0 |
| 75 | +) |
| 76 | +print("Time to load a single file:", time.time() - start_time) |
| 77 | + |
| 78 | +# %% Inspect the data |
| 79 | +shape = zarr_store.shape |
| 80 | +# Input is in Z, T, C, Y, X order |
| 81 | +# Want XYTCZ order |
| 82 | +# single_file_shape = [shape[4], shape[3], shape[1], shape[2], shape[0]] |
| 83 | +single_file_dims_shape = [shape[4], shape[3], shape[1]] |
| 84 | +size_x = 1 |
| 85 | +size_y = 1 |
| 86 | +size_z = 1 |
| 87 | + |
| 88 | +num_channels = shape[2] |
| 89 | +data_type = "uint16" |
| 90 | +chunk_size = [256, 256, 128] |
| 91 | + |
| 92 | +# You can provide a subset here also |
| 93 | +num_rows = 16 |
| 94 | +num_cols = 24 |
| 95 | +volume_size = [ |
| 96 | + single_file_dims_shape[0] * num_cols, |
| 97 | + single_file_dims_shape[1] * num_rows, |
| 98 | + single_file_dims_shape[2], |
| 99 | +] # XYZ (T) |
| 100 | +print("Volume size:", volume_size) |
| 101 | + |
| 102 | +# %% Setup the cloudvolume info |
| 103 | +info = CloudVolume.create_new_info( |
| 104 | + num_channels=num_channels, |
| 105 | + layer_type="image", |
| 106 | + data_type=data_type, |
| 107 | + encoding="raw", |
| 108 | + resolution=[size_x, size_y, size_z], |
| 109 | + voxel_offset=[0, 0, 0], |
| 110 | + chunk_size=chunk_size, |
| 111 | + volume_size=volume_size, |
| 112 | + max_mip=NUM_MIPS - 1, |
| 113 | + factor=Vec(2, 2, 2), |
| 114 | +) |
| 115 | +vol = CloudVolume( |
| 116 | + "file://" + str(OUTPUT_PATH), |
| 117 | + info=info, |
| 118 | + mip=0, |
| 119 | +) |
| 120 | +vol.commit_info() |
| 121 | +vol.provenance.description = "Example data conversion" |
| 122 | +vol.commit_provenance() |
| 123 | +del vol |
| 124 | + |
| 125 | +# %% Create the volumes for each mip level and hold progress |
| 126 | +vols = [ |
| 127 | + CloudVolume("file://" + str(OUTPUT_PATH), mip=i, compress=False) |
| 128 | + for i in range(NUM_MIPS) |
| 129 | +] |
| 130 | +progress_dir = OUTPUT_PATH / "progress" |
| 131 | +progress_dir.mkdir(exist_ok=True) |
| 132 | + |
| 133 | +# %% Functions for moving data |
| 134 | +read_shape = single_file_dims_shape # this is for reading data |
| 135 | + |
| 136 | + |
| 137 | +def process(args): |
| 138 | + x_i, y_i = args |
| 139 | + start = [x_i * read_shape[0], y_i * read_shape[1], 0] |
| 140 | + end = [ |
| 141 | + (x_i + 1) * read_shape[0], |
| 142 | + (y_i + 1) * read_shape[1], |
| 143 | + read_shape[2], |
| 144 | + ] |
| 145 | + f_name = progress_dir / f"{start[0]}-{end[0]}_{start[1]}-{end[1]}.done" |
| 146 | + if f_name.exists() and not OVERWRITE: |
| 147 | + return |
| 148 | + flat_index = x_i * num_cols + y_i |
| 149 | + path = all_files[flat_index] |
| 150 | + rawdata = load_zarr_and_permute(path)[1] |
| 151 | + print("Working on", f_name) |
| 152 | + for mip_level in reversed(range(MIP_CUTOFF, NUM_MIPS)): |
| 153 | + if mip_level == 0: |
| 154 | + downsampled = rawdata |
| 155 | + ds_start = start |
| 156 | + ds_end = end |
| 157 | + else: |
| 158 | + downsampled = downsample_with_averaging( |
| 159 | + rawdata, [2 * mip_level, 2 * mip_level, 2 * mip_level, 1] |
| 160 | + ) |
| 161 | + ds_start = [int(math.ceil(s / (2 * mip_level))) for s in start] |
| 162 | + ds_end = [int(math.ceil(e / (2 * mip_level))) for e in end] |
| 163 | + |
| 164 | + vols[mip_level][ |
| 165 | + ds_start[0] : ds_end[0], ds_start[1] : ds_end[1], ds_start[2] : ds_end[2] |
| 166 | + ] = downsampled |
| 167 | + touch(f_name) |
| 168 | + |
| 169 | + |
| 170 | +# %% Try with a single chunk to see if it works |
| 171 | +x_i, y_i = 0, 0 |
| 172 | +process((x_i, y_i)) |
| 173 | + |
| 174 | +# %% Loop over all the chunks |
| 175 | + |
| 176 | +coords = itertools.product(range(num_rows), range(num_cols)) |
| 177 | +# Do it in reverse order because the last chunks are most likely to error |
| 178 | +reversed_coords = list(coords) |
| 179 | +reversed_coords.reverse() |
| 180 | + |
| 181 | +# %% Move the data across with multiple workers |
| 182 | +# max_workers = 8 |
| 183 | + |
| 184 | +# with ProcessPoolExecutor(max_workers=max_workers) as executor: |
| 185 | +# executor.map(process, coords) |
| 186 | + |
| 187 | +# %% Move the data across with a single worker |
| 188 | +for coord in reversed_coords: |
| 189 | + process(coord) |
| 190 | + |
| 191 | +# %% Serve the dataset to be used in neuroglancer |
| 192 | +vols[0].viewer(port=1337) |
| 193 | + |
| 194 | +# %% |
0 commit comments