@@ -679,14 +679,6 @@ def compute_optimal_chunk_size(single_file_shape, num_mips, max_chunk_size=None)
679679def process (args ):
680680 x_i , y_i , z_i = args
681681
682- # Use the new load_file function that handles download/caching
683- print (f"Loading file for coordinates ({ x_i } , { y_i } , { z_i } )" )
684- loaded_zarr_store = load_file (x_i , y_i )
685-
686- if loaded_zarr_store is None :
687- print (f"Warning: Could not load file for row { x_i } , col { y_i } . Skipping..." )
688- return
689-
690682 start = [
691683 x_i * single_file_shape [0 ],
692684 y_i * single_file_shape [1 ],
@@ -702,6 +694,14 @@ def process(args):
702694 if f_name .exists () and not overwrite_output :
703695 return (start , end )
704696
697+ # Use the new load_file function that handles download/caching
698+ print (f"Loading file for coordinates ({ x_i } , { y_i } , { z_i } )" )
699+ loaded_zarr_store = load_file (x_i , y_i )
700+
701+ if loaded_zarr_store is None :
702+ print (f"Warning: Could not load file for row { x_i } , col { y_i } . Skipping..." )
703+ return
704+
705705 rawdata = load_data_from_zarr_store (loaded_zarr_store )
706706
707707 # Process all mip levels
@@ -778,7 +778,6 @@ def process(args):
778778 touch (f_name )
779779
780780 # Clean up cached file to save disk space
781- # (you can comment this out if you want to keep files cached)
782781 delete_cached_zarr_file (x_i , y_i )
783782
784783 # Return the bounds of the processed chunk
@@ -791,14 +790,18 @@ def process(args):
791790
792791
793792# %% Loop over all the chunks
793+ # Can do it in reverse order because the last chunks are most likely to error
794+ in_reverse = False
794795coords = itertools .product (
795796 range (num_chunks_per_dim [0 ]),
796797 range (num_chunks_per_dim [1 ]),
797798 range (num_chunks_per_dim [2 ]),
798799)
799- # Do it in reverse order because the last chunks are most likely to error
800- reversed_coords = list (coords )
801- reversed_coords .reverse ()
800+ if in_reverse :
801+ iter_coords = list (coords )
802+ iter_coords .reverse ()
803+ else :
804+ iter_coords = coords
802805
803806# %% Move the data across with multiple workers
804807# TODO because we are using non-aligned writes, we can't use multiple workers
@@ -971,10 +974,15 @@ def upload_any_remaining_chunks():
971974
972975# %% Move the data across with a single worker
973976total_uploaded_files = 0
974- for coord in reversed_coords :
977+ # TEMP early quit for testing
978+ max_iters = 4
979+ for coord in iter_coords :
975980 bounds = process (coord )
976981 start , end = bounds
977982 processed_chunks_bounds .append ((start , end ))
983+ if max_iters and len (processed_chunks_bounds ) >= max_iters :
984+ print ("Reached max iterations for testing, stopping early" )
985+ break
978986
979987 # Periodically check and upload completed chunks to save disk space
980988 # This is done every 10 chunks to balance upload frequency vs overhead
@@ -986,15 +994,22 @@ def upload_any_remaining_chunks():
986994 for local_path , gcs_path in uploaded_files :
987995 f .write (f"{ local_path } -> { gcs_path } \n " )
988996
989- # Final upload of any remaining chunks - hopefully should be none here, but maybe some failed
990- print ("Processing complete, uploading any remaining chunks..." )
991- total_uploaded_files += upload_any_remaining_chunks ()
992- print (f"Final upload completed: { total_uploaded_files } chunks uploaded" )
993-
994- # Write the list of uploaded files to a text file for reference
995- with open (output_path / "uploaded_files.txt" , "w" ) as f :
996- for local_path , gcs_path in uploaded_files :
997- f .write (f"{ local_path } -> { gcs_path } \n " )
997+ # %% Final upload of any remaining chunks - hopefully should be none here, but maybe some failed
998+ # This is not something we always want to run, so puttin an input prompt here just in case
999+ continue_upload = input (
1000+ "Do you want to upload any remaining chunks in the output directory to GCS? (y/n): "
1001+ )
1002+ if continue_upload .lower () != "y" :
1003+ print ("Skipping final upload of remaining chunks." )
1004+ else :
1005+ print ("Processing complete, uploading any remaining chunks..." )
1006+ total_uploaded_files += upload_any_remaining_chunks ()
1007+ print (f"Final upload completed: { total_uploaded_files } chunks uploaded" )
1008+
1009+ # Write the list of uploaded files to a text file for reference
1010+ with open (output_path / "uploaded_files.txt" , "w" ) as f :
1011+ for local_path , gcs_path in uploaded_files :
1012+ f .write (f"{ local_path } -> { gcs_path } \n " )
9981013
9991014# %% Serve the dataset to be used in neuroglancer
10001015vols [0 ].viewer (port = 1337 )
0 commit comments