From fcf07f21ad30bf6b9c97f4888ea5671f6ae10b94 Mon Sep 17 00:00:00 2001 From: EddieLF <34049565+EddieLF@users.noreply.github.com> Date: Wed, 17 Jan 2024 10:52:29 +1100 Subject: [PATCH] Update md5 creating script for requester pays buckets (#651) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Update create_md5s.py script to use billing project in gsutil commands * Bump version: 6.6.2 → 6.6.3 * Fix missing line * Use GCP billing project not Hail billing project * Linting * Revert bumpversion, ignore mypy errors in api/server.py --- api/server.py | 4 ++-- scripts/create_md5s.py | 21 +++++++++++---------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/api/server.py b/api/server.py index 574b341f5..652aa4322 100644 --- a/api/server.py +++ b/api/server.py @@ -140,11 +140,11 @@ async def exception_handler(request: Request, e: Exception): cors_middleware = middlewares[0] request_origin = request.headers.get('origin', '') - if cors_middleware and '*' in cors_middleware.options['allow_origins']: + if cors_middleware and '*' in cors_middleware.options['allow_origins']: # type: ignore response.headers['Access-Control-Allow-Origin'] = '*' elif ( cors_middleware - and request_origin in cors_middleware.options['allow_origins'] + and request_origin in cors_middleware.options['allow_origins'] # type: ignore ): response.headers['Access-Control-Allow-Origin'] = request_origin diff --git a/scripts/create_md5s.py b/scripts/create_md5s.py index 488ec8297..f99f64906 100644 --- a/scripts/create_md5s.py +++ b/scripts/create_md5s.py @@ -12,24 +12,25 @@ def create_md5s_for_files_in_directory(skip_filetypes: tuple[str, str], force_re if not gs_dir.startswith('gs://'): raise ValueError(f'Expected GS directory, got: {gs_dir}') - billing_project = get_config()['hail']['billing_project'] + billing_project = get_config()['workflow']['gcp_billing_project'] driver_image = get_config()['workflow']['driver_image'] bucket_name, *components = gs_dir[5:].split('/') client = storage.Client() - blobs = client.list_blobs(bucket_name, prefix='/'.join(components)) + bucket = client.bucket(bucket_name, user_project=billing_project) + blobs = bucket.list_blobs(prefix='/'.join(components)) files: set[str] = {f'gs://{bucket_name}/{blob.name}' for blob in blobs} - for obj in files: - if obj.endswith('.md5') or obj.endswith(skip_filetypes): + for filepath in files: + if filepath.endswith('.md5') or filepath.endswith(skip_filetypes): continue - if f'{obj}.md5' in files and not force_recreate: - print(f'{obj}.md5 already exists, skipping') + if f'{filepath}.md5' in files and not force_recreate: + print(f'{filepath}.md5 already exists, skipping') continue - print('Creating md5 for', obj) - job = b.new_job(f'Create {os.path.basename(obj)}.md5') - create_md5(job, obj, billing_project, driver_image) + print('Creating md5 for', filepath) + job = b.new_job(f'Create {os.path.basename(filepath)}.md5') + create_md5(job, filepath, billing_project, driver_image) b.run(wait=False) @@ -46,7 +47,7 @@ def create_md5(job, file, billing_project, driver_image): f"""\ set -euxo pipefail gcloud -q auth activate-service-account --key-file=$GOOGLE_APPLICATION_CREDENTIALS - gsutil cat {file} | md5sum | cut -d " " -f1 > /tmp/uploaded.md5 + gsutil -u {billing_project} cat {file} | md5sum | cut -d " " -f1 > /tmp/uploaded.md5 gsutil -u {billing_project} cp /tmp/uploaded.md5 {md5} """ )