@@ -751,7 +751,7 @@ def process(args):
751751
752752# %% Function to check the output directory for completed chunks and upload them to GCS
753753
754- processed_chunks_bounds = None
754+ processed_chunks_bounds = []
755755uploaded_files = []
756756
757757
@@ -773,6 +773,8 @@ def check_and_upload_completed_chunks():
773773 # For each file in the output dir check if it is fully covered by the already processed bounds
774774 # First, we loop over all the files in the output directory
775775 for chunk_file in output_path_for_mip .glob ("**/*" ):
776+ if chunk_file in [uf [0 ] for uf in uploaded_files ]:
777+ continue
776778 # 1. Pull out the bounds of the chunk from the filename
777779 # filename format is x0-x1_y0-y1_z0-z1
778780 match = re .search (r"(\d+)-(\d+)_(\d+)-(\d+)_(\d+)-(\d+)" , str (chunk_file ))
@@ -790,15 +792,10 @@ def check_and_upload_completed_chunks():
790792 min (cb , vs ) for cb , vs in zip (chunk_bounds [1 ], volume_size )
791793 ]
792794 # 2. Check if the chunk is fully covered by the processed bounds
793- if all (
794- pb0 <= cb0 and pb1 >= cb1
795- for pb0 , pb1 , cb0 , cb1 in zip (
796- processed_chunks_bounds [0 ],
797- processed_chunks_bounds [1 ],
798- chunk_bounds [0 ],
799- chunk_bounds [1 ],
800- )
801- ):
795+ # TODO actually do this check
796+ covered = True
797+
798+ if covered :
802799 # 3. If it is, upload it to GCS
803800 relative_path = chunk_file .relative_to (output_path )
804801 gcs_chunk_path = (
@@ -853,16 +850,7 @@ def upload_any_remaining_chunks():
853850for coord in reversed_coords :
854851 bounds = process (coord )
855852 start , end = bounds
856- if processed_chunks_bounds is None :
857- processed_chunks_bounds = [start , end ]
858- else :
859- processed_chunks_bounds [0 ] = [
860- min (ps , s ) for ps , s in zip (processed_chunks_bounds [0 ], start )
861- ]
862- processed_chunks_bounds [1 ] = [
863- max (pe , e ) for pe , e in zip (processed_chunks_bounds [1 ], end )
864- ]
865- print (f"Updated processed bounds: { processed_chunks_bounds } " )
853+ processed_chunks_bounds .append ((start , end ))
866854
867855 # Periodically check and upload completed chunks to save disk space
868856 # This is done every 10 chunks to balance upload frequency vs overhead
0 commit comments