Skip to content

Commit 00813c5

Browse files
committed
feat: downsample in progress
1 parent c850e3d commit 00813c5

File tree

1 file changed

+194
-0
lines changed

1 file changed

+194
-0
lines changed

examples/create_downampled.py

Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
# %% Import packages
2+
3+
import itertools
4+
import math
5+
from pathlib import Path
6+
import time
7+
import numpy as np
8+
from cloudvolume import CloudVolume
9+
from cloudvolume.lib import touch, Vec
10+
import zarr
11+
from neuroglancer.downsample import downsample_with_averaging
12+
13+
# %% Define the path to the files
14+
15+
HERE = Path(__file__).parent
16+
17+
# Paths to change
18+
INPUTFOLDER = Path("")
19+
OUTPUT_PATH = Path("")
20+
21+
# Other settings
22+
OUTPUT_PATH.mkdir(exist_ok=True, parents=True)
23+
OVERWRITE = False
24+
NUM_MIPS = 5
25+
MIP_CUTOFF = 4 # To save time you can start at the lowest resolution and work up
26+
27+
# %% Load the data
28+
OUTPUT_PATH.mkdir(exist_ok=True, parents=True)
29+
# Need to figure out some sizes by checking the data
30+
all_files = list(INPUTFOLDER.glob("**/*.zarr"))
31+
32+
33+
def load_zarr_data(file_path):
34+
zarr_store = zarr.open(file_path, mode="r")
35+
return zarr_store
36+
37+
38+
def load_zarr_and_permute(file_path):
39+
zarr_store = zarr.open(file_path, mode="r")
40+
# Input is in Z, T, C, Y, X order
41+
# Want XYTCZ order
42+
data = zarr_store[:]
43+
data = np.transpose(data, (4, 3, 1, 2, 0)) # Permute to XYTCZ
44+
return zarr_store, data
45+
46+
47+
def load_chunk_from_zarr_store(
48+
zarr_store, x_start, x_end, y_start, y_end, z_start, z_end, channel=0
49+
):
50+
# Input is in Z, T, C, Y, X order
51+
data = zarr_store[
52+
:, # T
53+
z_start:z_end, # Z
54+
channel, # C
55+
y_start:y_end, # Y
56+
x_start:x_end, # X
57+
]
58+
# The original timestamp was 65535, can be filterd out
59+
data = np.where(data == 65535, 0, data)
60+
print("Loaded data shape b4 sq:", data.shape)
61+
data = np.squeeze(data) # Remove any singleton dimensions
62+
print("Loaded data shape:", data.shape)
63+
# Then we permute to XYTCZ
64+
data = np.transpose(data, (-1, -2, 0)) # Permute to XYTCZ
65+
return data
66+
67+
68+
zarr_store = load_zarr_data(all_files[0])
69+
70+
# It may take too long to just load one file, might need to process in chunks
71+
# %% Check how long to load a single file
72+
start_time = time.time()
73+
data = load_chunk_from_zarr_store(
74+
zarr_store, 0, 256, 0, 256, 0, 128, channel=0
75+
)
76+
print("Time to load a single file:", time.time() - start_time)
77+
78+
# %% Inspect the data
79+
shape = zarr_store.shape
80+
# Input is in Z, T, C, Y, X order
81+
# Want XYTCZ order
82+
# single_file_shape = [shape[4], shape[3], shape[1], shape[2], shape[0]]
83+
single_file_dims_shape = [shape[4], shape[3], shape[1]]
84+
size_x = 1
85+
size_y = 1
86+
size_z = 1
87+
88+
num_channels = shape[2]
89+
data_type = "uint16"
90+
chunk_size = [256, 256, 128]
91+
92+
# You can provide a subset here also
93+
num_rows = 16
94+
num_cols = 24
95+
volume_size = [
96+
single_file_dims_shape[0] * num_cols,
97+
single_file_dims_shape[1] * num_rows,
98+
single_file_dims_shape[2],
99+
] # XYZ (T)
100+
print("Volume size:", volume_size)
101+
102+
# %% Setup the cloudvolume info
103+
info = CloudVolume.create_new_info(
104+
num_channels=num_channels,
105+
layer_type="image",
106+
data_type=data_type,
107+
encoding="raw",
108+
resolution=[size_x, size_y, size_z],
109+
voxel_offset=[0, 0, 0],
110+
chunk_size=chunk_size,
111+
volume_size=volume_size,
112+
max_mip=NUM_MIPS - 1,
113+
factor=Vec(2, 2, 2),
114+
)
115+
vol = CloudVolume(
116+
"file://" + str(OUTPUT_PATH),
117+
info=info,
118+
mip=0,
119+
)
120+
vol.commit_info()
121+
vol.provenance.description = "Example data conversion"
122+
vol.commit_provenance()
123+
del vol
124+
125+
# %% Create the volumes for each mip level and hold progress
126+
vols = [
127+
CloudVolume("file://" + str(OUTPUT_PATH), mip=i, compress=False)
128+
for i in range(NUM_MIPS)
129+
]
130+
progress_dir = OUTPUT_PATH / "progress"
131+
progress_dir.mkdir(exist_ok=True)
132+
133+
# %% Functions for moving data
134+
read_shape = single_file_dims_shape # this is for reading data
135+
136+
137+
def process(args):
138+
x_i, y_i = args
139+
start = [x_i * read_shape[0], y_i * read_shape[1], 0]
140+
end = [
141+
(x_i + 1) * read_shape[0],
142+
(y_i + 1) * read_shape[1],
143+
read_shape[2],
144+
]
145+
f_name = progress_dir / f"{start[0]}-{end[0]}_{start[1]}-{end[1]}.done"
146+
if f_name.exists() and not OVERWRITE:
147+
return
148+
flat_index = x_i * num_cols + y_i
149+
path = all_files[flat_index]
150+
rawdata = load_zarr_and_permute(path)[1]
151+
print("Working on", f_name)
152+
for mip_level in reversed(range(MIP_CUTOFF, NUM_MIPS)):
153+
if mip_level == 0:
154+
downsampled = rawdata
155+
ds_start = start
156+
ds_end = end
157+
else:
158+
downsampled = downsample_with_averaging(
159+
rawdata, [2 * mip_level, 2 * mip_level, 2 * mip_level, 1]
160+
)
161+
ds_start = [int(math.ceil(s / (2 * mip_level))) for s in start]
162+
ds_end = [int(math.ceil(e / (2 * mip_level))) for e in end]
163+
164+
vols[mip_level][
165+
ds_start[0] : ds_end[0], ds_start[1] : ds_end[1], ds_start[2] : ds_end[2]
166+
] = downsampled
167+
touch(f_name)
168+
169+
170+
# %% Try with a single chunk to see if it works
171+
x_i, y_i = 0, 0
172+
process((x_i, y_i))
173+
174+
# %% Loop over all the chunks
175+
176+
coords = itertools.product(range(num_rows), range(num_cols))
177+
# Do it in reverse order because the last chunks are most likely to error
178+
reversed_coords = list(coords)
179+
reversed_coords.reverse()
180+
181+
# %% Move the data across with multiple workers
182+
# max_workers = 8
183+
184+
# with ProcessPoolExecutor(max_workers=max_workers) as executor:
185+
# executor.map(process, coords)
186+
187+
# %% Move the data across with a single worker
188+
for coord in reversed_coords:
189+
process(coord)
190+
191+
# %% Serve the dataset to be used in neuroglancer
192+
vols[0].viewer(port=1337)
193+
194+
# %%

0 commit comments

Comments
 (0)