Skip to content

Commit 27d23bf

Browse files
committed
feat: more upload download changes
1 parent 97d9a3c commit 27d23bf

File tree

1 file changed

+37
-22
lines changed

1 file changed

+37
-22
lines changed

examples/create_downampled.py

Lines changed: 37 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -679,14 +679,6 @@ def compute_optimal_chunk_size(single_file_shape, num_mips, max_chunk_size=None)
679679
def process(args):
680680
x_i, y_i, z_i = args
681681

682-
# Use the new load_file function that handles download/caching
683-
print(f"Loading file for coordinates ({x_i}, {y_i}, {z_i})")
684-
loaded_zarr_store = load_file(x_i, y_i)
685-
686-
if loaded_zarr_store is None:
687-
print(f"Warning: Could not load file for row {x_i}, col {y_i}. Skipping...")
688-
return
689-
690682
start = [
691683
x_i * single_file_shape[0],
692684
y_i * single_file_shape[1],
@@ -702,6 +694,14 @@ def process(args):
702694
if f_name.exists() and not overwrite_output:
703695
return (start, end)
704696

697+
# Use the new load_file function that handles download/caching
698+
print(f"Loading file for coordinates ({x_i}, {y_i}, {z_i})")
699+
loaded_zarr_store = load_file(x_i, y_i)
700+
701+
if loaded_zarr_store is None:
702+
print(f"Warning: Could not load file for row {x_i}, col {y_i}. Skipping...")
703+
return
704+
705705
rawdata = load_data_from_zarr_store(loaded_zarr_store)
706706

707707
# Process all mip levels
@@ -778,7 +778,6 @@ def process(args):
778778
touch(f_name)
779779

780780
# Clean up cached file to save disk space
781-
# (you can comment this out if you want to keep files cached)
782781
delete_cached_zarr_file(x_i, y_i)
783782

784783
# Return the bounds of the processed chunk
@@ -791,14 +790,18 @@ def process(args):
791790

792791

793792
# %% Loop over all the chunks
793+
# Can do it in reverse order because the last chunks are most likely to error
794+
in_reverse = False
794795
coords = itertools.product(
795796
range(num_chunks_per_dim[0]),
796797
range(num_chunks_per_dim[1]),
797798
range(num_chunks_per_dim[2]),
798799
)
799-
# Do it in reverse order because the last chunks are most likely to error
800-
reversed_coords = list(coords)
801-
reversed_coords.reverse()
800+
if in_reverse:
801+
iter_coords = list(coords)
802+
iter_coords.reverse()
803+
else:
804+
iter_coords = coords
802805

803806
# %% Move the data across with multiple workers
804807
# TODO because we are using non-aligned writes, we can't use multiple workers
@@ -971,10 +974,15 @@ def upload_any_remaining_chunks():
971974

972975
# %% Move the data across with a single worker
973976
total_uploaded_files = 0
974-
for coord in reversed_coords:
977+
# TEMP early quit for testing
978+
max_iters = 4
979+
for coord in iter_coords:
975980
bounds = process(coord)
976981
start, end = bounds
977982
processed_chunks_bounds.append((start, end))
983+
if max_iters and len(processed_chunks_bounds) >= max_iters:
984+
print("Reached max iterations for testing, stopping early")
985+
break
978986

979987
# Periodically check and upload completed chunks to save disk space
980988
# This is done every 10 chunks to balance upload frequency vs overhead
@@ -986,15 +994,22 @@ def upload_any_remaining_chunks():
986994
for local_path, gcs_path in uploaded_files:
987995
f.write(f"{local_path} -> {gcs_path}\n")
988996

989-
# Final upload of any remaining chunks - hopefully should be none here, but maybe some failed
990-
print("Processing complete, uploading any remaining chunks...")
991-
total_uploaded_files += upload_any_remaining_chunks()
992-
print(f"Final upload completed: {total_uploaded_files} chunks uploaded")
993-
994-
# Write the list of uploaded files to a text file for reference
995-
with open(output_path / "uploaded_files.txt", "w") as f:
996-
for local_path, gcs_path in uploaded_files:
997-
f.write(f"{local_path} -> {gcs_path}\n")
997+
# %% Final upload of any remaining chunks - hopefully should be none here, but maybe some failed
998+
# This is not something we always want to run, so puttin an input prompt here just in case
999+
continue_upload = input(
1000+
"Do you want to upload any remaining chunks in the output directory to GCS? (y/n): "
1001+
)
1002+
if continue_upload.lower() != "y":
1003+
print("Skipping final upload of remaining chunks.")
1004+
else:
1005+
print("Processing complete, uploading any remaining chunks...")
1006+
total_uploaded_files += upload_any_remaining_chunks()
1007+
print(f"Final upload completed: {total_uploaded_files} chunks uploaded")
1008+
1009+
# Write the list of uploaded files to a text file for reference
1010+
with open(output_path / "uploaded_files.txt", "w") as f:
1011+
for local_path, gcs_path in uploaded_files:
1012+
f.write(f"{local_path} -> {gcs_path}\n")
9981013

9991014
# %% Serve the dataset to be used in neuroglancer
10001015
vols[0].viewer(port=1337)

0 commit comments

Comments
 (0)