Skip to content

Commit

Permalink
Make image for update-egis-db (#1055)
Browse files Browse the repository at this point in the history
The current LAMBDA uses AWS layers to manage dependencies. This PR
switches to an image-based LAMBDA for better environment
isolation/management.

ref: #1029
  • Loading branch information
groutr authored Feb 11, 2025
1 parent 11acdbe commit 3b17070
Show file tree
Hide file tree
Showing 11 changed files with 483 additions and 36 deletions.
43 changes: 43 additions & 0 deletions Core/LAMBDA/viz_functions/image_based/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,14 @@ variable "nwm_dataflow_version" {
type = string
}

variable "viz_cache_bucket" {
type = string
}

variable "lambda_role" {
type = string
}

locals {
viz_optimize_rasters_lambda_name = "hv-vpp-${var.environment}-viz-optimize-rasters"
viz_hand_fim_processing_lambda_name = "hv-vpp-${var.environment}-viz-hand-fim-processing"
Expand Down Expand Up @@ -527,6 +535,37 @@ module "schism-fim" {
viz_db_user_secret_string = var.viz_db_user_secret_string
}


############
# upload_egis_data
#############
module "update-egis-data" {
source = "./viz_update_egis_data"
providers = {
aws = aws
aws.sns = aws.sns
aws.no_tags = aws.no_tags
}
environment = var.environment
account_id = var.account_id
region = var.region
ecr_repository_image_tag = var.ecr_repository_image_tag
lambda_role = var.lambda_role
security_groups = var.hand_fim_processing_sgs
subnets = var.hand_fim_processing_subnets
deployment_bucket = var.deployment_bucket
viz_db_name = var.viz_db_name
viz_db_host = var.viz_db_host
viz_db_user_secret_string = var.viz_db_user_secret_string
egis_db_host = var.egis_db_host
egis_db_name = var.egis_db_name
egis_db_user_secret_string = var.egis_db_user_secret_string
viz_cache_bucket = var.viz_cache_bucket
default_tags = var.default_tags
}

####################### OUTPUTS ###################

output "hand_fim_processing" {
value = data.aws_lambda_function.viz_hand_fim_processing
}
Expand All @@ -535,6 +574,10 @@ output "schism_fim" {
value = module.schism-fim
}

output "update_egis_data" {
value = module.update-egis-data.update_egis_data
}

output "optimize_rasters" {
value = data.aws_lambda_function.viz_optimize_rasters
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
version: 0.2

phases:
pre_build:
commands:
- echo Logging in to Amazon ECR...
- aws ecr get-login-password --region $AWS_DEFAULT_REGION | docker login --username AWS --password-stdin $AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com
build:
commands:
- echo Build started on `date`
- echo Building the Docker image...
- docker buildx build -t $IMAGE_REPO_NAME:$IMAGE_TAG .
- docker tag $IMAGE_REPO_NAME:$IMAGE_TAG $AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com/$IMAGE_REPO_NAME:$IMAGE_TAG
post_build:
commands:
- echo Build completed on `date`
- echo Pushing the Docker image...
- docker push $AWS_ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com/$IMAGE_REPO_NAME:$IMAGE_TAG
- echo Updating lambda
- npm install -g [email protected]
- sls deploy
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
FROM mambaorg/micromamba AS micromamba

# Create conda environment for hydrovis
RUN --mount=type=bind,source=conda-linux-64.lock,target=/tmp/conda.lock \
micromamba install -y --file /tmp/conda.lock && \
micromamba clean -afy

RUN find /opt/conda -regex '^.*\(__pycache__\|\.py[co]\)$' -delete && \
find /opt/conda -follow -type f -name '*.a' -delete
RUN rm -rf /opt/conda/man /opt/conda/share/man /opt/conda/share/doc && \
rm -rf /opt/conda/include && \
rm -rf /opt/conda/share/bash-completion && \
rm -rf /opt/conda/share/info && \
rm -rf /opt/conda/share/examples && \
rm -rf /opt/conda/conda-meta


FROM public.ecr.aws/amazonlinux/amazonlinux:2023-minimal AS alinux

ENV MAMBA_ROOT_PREFIX="/opt/conda"
ENV MAMBA_EXE="/bin/micromamba"
# Prevent Python from writing bytecode files
ENV PYTHONDONTWRITEBYTECODE=1
# Prevent python from buffering stdout
ENV PYTHONUNBUFFERED=1
COPY --from=micromamba --chown=root "$MAMBA_EXE" "$MAMBA_EXE"
COPY --from=micromamba --chown=root /opt/conda /opt/conda
COPY --from=micromamba --chown=root /usr/local/bin/_*.sh /usr/local/bin/

WORKDIR /home/code

# Copy function code
COPY viz_classes.py \
lambda_function.py \
./

SHELL ["/usr/local/bin/_dockerfile_shell.sh"]

ENTRYPOINT [ "/usr/local/bin/_entrypoint.sh" ]
CMD [ \
"python", "-m", "awslambdaric", "lambda_function.lambda_handler" \
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# Generated by conda-lock.
# platform: linux-64
# input_hash: 6ca447a0e1fc3b0aad87fef4a6dac05474f62be72dcb9193472d8d688724546b
@EXPLICIT
https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81
https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2024.12.14-hbcca054_0.conda#720523eb0d6a9b0f6120c16b2aa4e7de
https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.12-5_cp312.conda#0424ae29b104430108f5218a66db7260
https://conda.anaconda.org/conda-forge/noarch/tzdata-2025a-h78e105d_0.conda#dbcace4706afdfb7eb891f7b37d07c04
https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.43-h712a8e2_2.conda#048b02e3962f066da18efe3a21b77672
https://conda.anaconda.org/conda-forge/linux-64/libgomp-14.2.0-h77fa898_1.conda#cc3573974587f12dda90d96e3e55a702
https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_gnu.tar.bz2#73aaf86a425cc6e73fcf236a5a46396d
https://conda.anaconda.org/conda-forge/linux-64/libgcc-14.2.0-h77fa898_1.conda#3cb76c3f10d3bc7f1105b2fc9db984df
https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.6.4-h5888daf_0.conda#db833e03127376d461e1e13e76f09b6c
https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-14.2.0-h69a702a_1.conda#e39480b9ca41323497b05492a63bc35b
https://conda.anaconda.org/conda-forge/linux-64/liblzma-5.6.3-hb9d3cd8_1.conda#2ecf2f1c7e4e21fcfe6423a51a992d84
https://conda.anaconda.org/conda-forge/linux-64/libntlm-1.8-hb9d3cd8_0.conda#7c7927b404672409d9917d49bff5f2d6
https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-14.2.0-hc0a3c3a_1.conda#234a5554c53625688d51062645337328
https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.3.1-hb9d3cd8_2.conda#edb0dca6bc32e4f4789199455a1dbeb8
https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-h2d0b736_2.conda#04b34b9a40cdc48cfdab261ab176ff74
https://conda.anaconda.org/conda-forge/linux-64/openssl-3.4.0-h7b32b05_1.conda#4ce6875f75469b2757a65e10a5d05e31
https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-h4bc722e_7.conda#62ee74e96c5ebb0af99386de58cf9553
https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.1-h166bdaf_0.tar.bz2#30186d27e2c9fa62b45fb1476b7200e3
https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20240808-pl5321h7949ede_0.conda#8247f80f3dc464d9322e85007e307fe8
https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2#d645c6d2ac96843a2bfaccd2d62b3ac3
https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.1-hd590300_0.conda#30fd6e37fe21f86f4bd26d6ee73eeec7
https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.48.0-hee588c1_1.conda#3fa05c528d8a1e2a67bbf1e36f22d3bc
https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-14.2.0-h4852527_1.conda#8371ac6457591af2cf6159439c1fd051
https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.38.1-h0b41bf4_0.conda#40b61aab5c7ba9ff276c41cfffe6b80b
https://conda.anaconda.org/conda-forge/linux-64/libxcrypt-4.4.36-hd590300_1.conda#5aa797f8787fe7a17d1b0821485b5adc
https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda#47d31b792659ce70f470b5c82fdfb7a4
https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_h4845f30_101.conda#d453b98d9c83e71da0741bb0ff4d76bc
https://conda.anaconda.org/conda-forge/linux-64/icu-75.1-he02047a_0.conda#8b189310083baabfb622af68fd9d3ae3
https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.3-h659f571_0.conda#3f43953b7d3fb3aaa1d0d0723d91e368
https://conda.anaconda.org/conda-forge/linux-64/python-3.12.8-h9e4cc4f_1_cpython.conda#7fd2fd79436d9b473812f14e86746844
https://conda.anaconda.org/conda-forge/noarch/aiohappyeyeballs-2.4.4-pyhd8ed1ab_1.conda#296b403617bafa89df4971567af79013
https://conda.anaconda.org/conda-forge/noarch/attrs-24.3.0-pyh71513ae_0.conda#356927ace43302bf6f5926e2a58dae6a
https://conda.anaconda.org/conda-forge/linux-64/brotli-python-1.1.0-py312h2ec8cdc_2.conda#b0b867af6fc74b2a0aa206da29c0f3cf
https://conda.anaconda.org/conda-forge/linux-64/cyrus-sasl-2.1.27-h54b06d7_7.conda#dce22f70b4e5a407ce88f2be046f4ceb
https://conda.anaconda.org/conda-forge/linux-64/frozenlist-1.5.0-py312h66e93f0_0.conda#f98e36c96b2c66d9043187179ddb04f4
https://conda.anaconda.org/conda-forge/noarch/fsspec-2024.12.0-pyhd8ed1ab_0.conda#e041ad4c43ab5e10c74587f95378ebc7
https://conda.anaconda.org/conda-forge/noarch/idna-3.10-pyhd8ed1ab_1.conda#39a4f67be3286c86d696df570b1201b7
https://conda.anaconda.org/conda-forge/noarch/jmespath-1.0.1-pyhd8ed1ab_1.conda#972bdca8f30147135f951847b30399ea
https://conda.anaconda.org/conda-forge/linux-64/multidict-6.1.0-py312h178313f_2.conda#5b5e3267d915a107eca793d52e1b780a
https://conda.anaconda.org/conda-forge/linux-64/propcache-0.2.1-py312h66e93f0_0.conda#55d5742a696d7da1c1262e99b6217ceb
https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha55dd90_7.conda#461219d1a5bd61342293efa2c0c90eac
https://conda.anaconda.org/conda-forge/linux-64/simplejson-3.19.3-py312h66e93f0_1.conda#c8d1a609d5f3358d715c2273011d9f4d
https://conda.anaconda.org/conda-forge/noarch/six-1.17.0-pyhd8ed1ab_0.conda#a451d576819089b0d672f18768be0f65
https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.12.2-pyha770c72_1.conda#d17f13df8b65464ca316cbc000a3cb64
https://conda.anaconda.org/conda-forge/linux-64/wrapt-1.17.2-py312h66e93f0_0.conda#669e63af87710f8d52fdec9d4d63b404
https://conda.anaconda.org/conda-forge/noarch/aioitertools-0.12.0-pyhd8ed1ab_1.conda#3eb47adbffac44483f59e580f8600a1e
https://conda.anaconda.org/conda-forge/noarch/aiosignal-1.3.2-pyhd8ed1ab_0.conda#1a3981115a398535dbe3f6d5faae3d36
https://conda.anaconda.org/conda-forge/linux-64/awslambdaric-2.2.0-py312hca68cad_0.conda#f2c87d6a5b21b95c550efb1c6e0ad9f7
https://conda.anaconda.org/conda-forge/linux-64/openldap-2.6.9-he970967_0.conda#ca2de8bbdc871bce41dbf59e51324165
https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.9.0.post0-pyhff2d567_1.conda#5ba79d7c71f03c678c8ead841f347d6e
https://conda.anaconda.org/conda-forge/noarch/urllib3-1.26.19-pyhd8ed1ab_0.conda#6bb37c314b3cc1515dcf086ffe01c46e
https://conda.anaconda.org/conda-forge/linux-64/yarl-1.18.3-py312h66e93f0_0.conda#91df2efaa08730416bec2a4502309275
https://conda.anaconda.org/conda-forge/linux-64/aiohttp-3.11.11-py312h178313f_0.conda#8219afa093757bbe07b9825eb1973ed9
https://conda.anaconda.org/conda-forge/noarch/botocore-1.36.1-pyge310_1234567_0.conda#8be0efb498e635e845c2239437c7eaf3
https://conda.anaconda.org/conda-forge/linux-64/libpq-17.2-h3b95a9b_1.conda#37724d8bae042345a19ca1a25dde786b
https://conda.anaconda.org/conda-forge/noarch/aiobotocore-2.18.0-pyhd8ed1ab_0.conda#6fee4b76792b913ba69c62e4899ea130
https://conda.anaconda.org/conda-forge/linux-64/psycopg2-2.9.9-py312hfaedaf9_2.conda#4a195f7a305e5b6ece2e2e93c4795d77
https://conda.anaconda.org/conda-forge/noarch/s3fs-2024.12.0-pyhd8ed1ab_0.conda#d91e140ebbb494372695d7b5ac829c09
Original file line number Diff line number Diff line change
@@ -1,8 +1,23 @@
import boto3
import fsspec
import os
from viz_classes import database
from viz_lambda_shared_funcs import gen_dict_extract
from datetime import datetime
from itertools import chain


def find_target_tables(mapping):
if isinstance(mapping, dict):
for k, v in mapping.items():
if k == 'target_table':
if isinstance(v, str):
yield [v]
else:
yield v
else:
yield from find_target_tables(v)
elif isinstance(mapping, list):
for item in mapping:
yield from find_target_tables(item)

###################################
def lambda_handler(event, context):
Expand All @@ -24,21 +39,16 @@ def lambda_handler(event, context):
if "unstage" in step:
if step == "unstage_db_tables":
print(f"Unstaging tables for {event['args']['product']['product']}")
target_tables = list(gen_dict_extract("target_table", event['args']))
all_single_tables = [table for table in target_tables if type(table) is not list]
all_list_tables = [table for table in target_tables if type(table) is list]
all_list_tables = [table for table_list in all_list_tables for table in table_list]

all_tables = all_single_tables + all_list_tables
publish_tables = [table for table in all_tables if table.startswith("publish")]
dest_tables = [f"services.{table.split('.')[1]}" for table in publish_tables]
target_tables = chain.from_iterable(find_target_tables(event['args']))
publish_tables = (x for x in target_tables if x.startswith("publish"))
dest_tables = [table.replace("publish", "services", 1) for table in publish_tables]

egis_db = database(db_type="egis")
unstage_db_tables(egis_db, dest_tables)
elif step == "unstage_rasters":
################### Move Rasters ###################
print(f"Moving and caching rasters for {event['args']['product']['product']}")
s3 = boto3.resource('s3')
s3 = fsspec.filesystem('s3')
s3_bucket = event['args']['raster_output_bucket']
output_raster_workspace = list(event['args']['raster_output_workspace'].values())[0]

Expand All @@ -61,18 +71,18 @@ def lambda_handler(event, context):

workspace_rasters = list_s3_files(s3_bucket, output_raster_workspace)
for s3_key in workspace_rasters:
s3_object = {"Bucket": s3_bucket, "Key": s3_key}
s3_object = f"{s3_bucket}/{s3_key}"
s3_filename = os.path.basename(s3_key)
s3_extension = os.path.splitext(s3_filename)[1]
cache_key = f"{cache_path}/{s3_filename}"

print(f"Caching {s3_key} at {cache_key}")
s3.meta.client.copy(s3_object, s3_bucket, cache_key)
s3.copy(s3_object, f"{s3_bucket}/{cache_key}")

if published_format == 'tif' and job_type == 'auto':
tif_published_key = f"{processing_prefix}/published/{s3_filename}"
print(f"Moving {s3_object} to published location at {tif_published_key}")
s3.meta.client.copy(s3_object, s3_bucket, tif_published_key)
s3.copy(s3_object, f"{s3_bucket}/{tif_published_key}")
elif published_format == 'mrf':
raster_name = s3_filename.replace(s3_extension, "")
mrf_workspace_prefix = s3_key.replace("/tif/", "/mrf/").replace(s3_extension, "")
Expand All @@ -84,21 +94,21 @@ def lambda_handler(event, context):
process_extensions = [s3_extension[1:]]

for extension in process_extensions:
mrf_workspace_raster = {"Bucket": s3_bucket, "Key": f"{mrf_workspace_prefix}.{extension}"}
mrf_workspace_raster = f"{s3_bucket}/{mrf_workspace_prefix}.{extension}"
mrf_published_raster = f"{published_prefix}.{extension}"

if job_type == 'auto':
print(f"Moving {mrf_workspace_prefix}.{extension} to published location at {mrf_published_raster}")
s3.meta.client.copy(mrf_workspace_raster, s3_bucket, mrf_published_raster)
s3.copy(mrf_workspace_raster, f"{s3_bucket}/{mrf_published_raster}")

print("Deleting a mrf workspace raster")
s3.Object(s3_bucket, f"{mrf_workspace_prefix}.{extension}").delete()
s3.rm_file(mrf_workspace_raster)

return True

################### Stage EGIS Tables ###################
elif "summary_data" in step:
tables = event['args']['postprocess_summary']['target_table']
tables = event['args']['postprocess_summary']['target_table']
elif "fim_config_data" in step:
if not event['args']['fim_config'].get('postprocess'):
return
Expand All @@ -114,7 +124,7 @@ def lambda_handler(event, context):
viz_schema = 'archive'

# Get the table names without the schemas
tables = [table.split(".")[1] for table in tables if table.split(".")[0]==viz_schema]
tables = [tn[1] for table in tables if (tn := table.split("."))[0]==viz_schema]

## For Staging and Caching - Loop through all the tables relevant to the current step
for table in tables:
Expand Down Expand Up @@ -218,18 +228,6 @@ def refresh_fdw_schema(db, local_schema, remote_server, remote_schema):

##################################
def list_s3_files(bucket, prefix):
s3 = boto3.client('s3')
files = []
paginator = s3.get_paginator('list_objects_v2')
for result in paginator.paginate(Bucket=bucket, Prefix=prefix):
if not result['KeyCount']:
continue

for key in result['Contents']:
# Skip folders
if not key['Key'].endswith('/'):
files.append(key['Key'])

if not files:
print(f"No files found at {bucket}/{prefix}")
return files
fs = fsspec.filesystem('s3')
files = fs.ls(f"{bucket}/{prefix}", detail=True)
return [f for f in files if f['type'] == 'file']
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
name: update_egis_data
channels:
- conda-forge
- nodefaults
dependencies:
- python=3.12
- awslambdaric
- fsspec
- s3fs
- psycopg2
Loading

0 comments on commit 3b17070

Please sign in to comment.