Skip to content

Commit e8ca0dd

Browse files
committed
feat: add option to not overwrite gcs files
1 parent 3c08b7e commit e8ca0dd

File tree

2 files changed

+24
-20
lines changed

2 files changed

+24
-20
lines changed

examples/.env.example

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,25 +7,19 @@ GCS_BUCKET_NAME=your-bucket-name
77
GCS_PREFIX=path/to/zarr/files/
88
GCS_FILE_EXTENSION=.zarr
99

10-
# Local paths (used when USE_GCS_BUCKET=false)
11-
INPUTFOLDER=/media/xyz
10+
# Output GCS bucket configuration (for uploading processed results)
11+
USE_GCS_OUTPUT=false
12+
GCS_OUTPUT_BUCKET_NAME=your-output-bucket-name
13+
GCS_OUTPUT_PREFIX=processed/
14+
15+
# Local paths (used to store GCS data, or when USE_GCS_BUCKET=false)
16+
INPUT_PATH=/media/xyz
1217
OUTPUT_PATH=/media/zyx
1318

1419
# Processing settings
1520
OVERWRITE=false
1621
NUM_MIPS=5
1722
MIP_CUTOFF=0
1823
CHANNEL_LIMIT=4
19-
NUM_ROWS=3
20-
NUM_COLS=6
2124
ALLOW_NON_ALIGNED_WRITE=false
22-
23-
# Optional resolution settings
24-
SIZE_X=1
25-
SIZE_Y=1
26-
SIZE_Z=1
27-
28-
# Optional chunk settings
29-
CHUNK_SIZE_X=64
30-
CHUNK_SIZE_Y=64
31-
CHUNK_SIZE_Z=32
25+
MANUAL_CHUNK_SIZE=64,64,32

examples/create_downampled.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ def load_env_config():
7272
"OUTPUT_PATH": Path(os.getenv("OUTPUT_PATH", "/temp/out")),
7373
# Processing settings
7474
"OVERWRITE": parse_bool(os.getenv("OVERWRITE", "false")),
75+
"OVERWRITE_GCS": parse_bool(os.getenv("OVERWRITE_GCS", "false")),
7576
"NUM_MIPS": int(os.getenv("NUM_MIPS", "5")),
7677
"MIP_CUTOFF": int(os.getenv("MIP_CUTOFF", "0")),
7778
"CHANNEL_LIMIT": int(os.getenv("CHANNEL_LIMIT", "4")),
@@ -103,6 +104,7 @@ def load_env_config():
103104
input_path = config["INPUT_PATH"]
104105
output_path = config["OUTPUT_PATH"]
105106
overwrite_output = config["OVERWRITE"]
107+
overwrite_gcs = config["OVERWRITE_GCS"]
106108
num_mips = config["NUM_MIPS"]
107109
mip_cutoff = config["MIP_CUTOFF"]
108110
channel_limit = config["CHANNEL_LIMIT"]
@@ -401,13 +403,14 @@ def sync_info_to_gcs_output():
401403
upload_file_to_gcs(local_info_path, gcs_info_path)
402404

403405

404-
def upload_file_to_gcs(local_file_path, gcs_file_path):
406+
def upload_file_to_gcs(local_file_path, gcs_file_path, overwrite=True):
405407
"""
406408
Upload a single chunk file to the GCS output bucket.
407409
408410
Args:
409-
local_chunk_path: Path to local chunk file
410-
gcs_chunk_path: GCS blob path for the chunk
411+
local_file_path: Path to local chunk file
412+
gcs_file_path: GCS blob path for the chunk
413+
overwrite: If False, skip upload if file already exists in GCS
411414
412415
Returns:
413416
bool: True if successful, False otherwise
@@ -420,6 +423,12 @@ def upload_file_to_gcs(local_file_path, gcs_file_path):
420423
bucket = client.bucket(gcs_output_bucket_name)
421424

422425
blob = bucket.blob(gcs_file_path)
426+
427+
# Check if file already exists and overwrite is False
428+
if not overwrite and blob.exists():
429+
print(f"File {gcs_file_path} already exists in GCS, skipping upload")
430+
return True
431+
423432
blob.upload_from_filename(str(local_file_path))
424433

425434
return True
@@ -858,7 +867,8 @@ def check_and_upload_completed_chunks():
858867
+ "/"
859868
+ str(relative_path).replace("\\", "/")
860869
)
861-
if upload_file_to_gcs(chunk_file, gcs_chunk_path):
870+
# Skip re-uploading files that are already uploaded
871+
if upload_file_to_gcs(chunk_file, gcs_chunk_path, overwrite=overwrite_gcs):
862872
uploaded_count += 1
863873
# Remove local chunk to save space
864874
if use_gcs_output:
@@ -892,7 +902,7 @@ def upload_any_remaining_chunks():
892902
+ "/"
893903
+ str(relative_path).replace("\\", "/")
894904
)
895-
if upload_file_to_gcs(chunk_file, gcs_chunk_path):
905+
if upload_file_to_gcs(chunk_file, gcs_chunk_path, overwrite=overwrite_gcs):
896906
uploaded_count += 1
897907
# Remove local chunk to save space
898908
if use_gcs_output:
@@ -930,6 +940,6 @@ def upload_any_remaining_chunks():
930940
f.write(f"{local_path} -> {gcs_path}\n")
931941

932942
# %% Serve the dataset to be used in neuroglancer
933-
# vols[0].viewer(port=1337)
943+
vols[0].viewer(port=1337)
934944

935945
# %%

0 commit comments

Comments
 (0)