Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[sdlf-foundations][cdk] data_bucket method to handle raw/stage/analytics buckets creation #421

Merged
merged 1 commit into from
Oct 22, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
171 changes: 59 additions & 112 deletions sdlf-foundations/src/foundations.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,29 +135,29 @@ def __init__(self, scope: Construct, id: str, **kwargs) -> None:
],
)

lakeformationdataaccess_role = iam.Role(
self.lakeformationdataaccess_role = iam.Role(
self,
"rLakeFormationDataAccessRole",
assumed_by=iam.CompositePrincipal(
iam.ServicePrincipal("lakeformation.amazonaws.com"),
iam.ServicePrincipal("glue.amazonaws.com"),
),
)
lakeformationdataaccess_role.attach_inline_policy(lakeformationdataaccess_role_policy)
self.lakeformationdataaccess_role.attach_inline_policy(lakeformationdataaccess_role_policy)

ssm.StringParameter(
self,
"rLakeFormationDataAccessRoleSsm",
description="Lake Formation Data Access Role",
parameter_name="/SDLF/IAM/LakeFormationDataAccessRoleArn",
string_value=lakeformationdataaccess_role.role_arn,
string_value=self.lakeformationdataaccess_role.role_arn,
)
ssm.StringParameter(
self,
"rLakeFormationDataAccessRoleNameSsm",
description="Lake Formation Data Access Role",
parameter_name="/SDLF/IAM/LakeFormationDataAccessRole",
string_value=lakeformationdataaccess_role.role_name,
string_value=self.lakeformationdataaccess_role.role_name,
)

######## KMS #########
Expand Down Expand Up @@ -264,36 +264,36 @@ def __init__(self, scope: Construct, id: str, **kwargs) -> None:
sid="Allow LakeFormation access",
effect=iam.Effect.ALLOW,
principals=[
iam.ArnPrincipal(lakeformationdataaccess_role.role_arn),
iam.ArnPrincipal(self.lakeformationdataaccess_role.role_arn),
],
actions=["kms:Encrypt*", "kms:Decrypt*", "kms:ReEncrypt*", "kms:GenerateDataKey*", "kms:Describe*"],
resources=["*"],
),
]
)

kms_key = kms.Key(
self.kms_key = kms.Key(
self,
"rKMSKey",
removal_policy=RemovalPolicy.RETAIN_ON_UPDATE_OR_DELETE,
description="SDLF Foundations KMS Key",
enable_key_rotation=True,
policy=kms_key_policy,
)
kms_key.add_alias("alias/sdlf-kms-key").apply_removal_policy(RemovalPolicy.RETAIN_ON_UPDATE_OR_DELETE)
self.kms_key.add_alias("alias/sdlf-kms-key").apply_removal_policy(RemovalPolicy.RETAIN_ON_UPDATE_OR_DELETE)

ssm.StringParameter(
self,
"rKMSKeySsm",
description="ARN of the KMS key",
parameter_name="/SDLF/KMS/KeyArn",
string_value=kms_key.key_arn,
string_value=self.kms_key.key_arn,
)

######## S3 #########
####### Access Logging Bucket ######
access_logs_bucket_name = f"{p_org.value_as_string}-{p_domain.value_as_string}-{p_environment.value_as_string}-{scope.region}-{scope.account}-s3logs"
access_logs_bucket = s3.Bucket(
self.access_logs_bucket = s3.Bucket(
self,
"rS3AccessLogsBucket",
bucket_name=access_logs_bucket_name, # TODO
Expand All @@ -318,7 +318,7 @@ def __init__(self, scope: Construct, id: str, **kwargs) -> None:
),
],
encryption=s3.BucketEncryption.KMS,
encryption_key=kms_key,
encryption_key=self.kms_key,
bucket_key_enabled=True,
block_public_access=s3.BlockPublicAccess.BLOCK_ALL,
)
Expand All @@ -327,18 +327,18 @@ def __init__(self, scope: Construct, id: str, **kwargs) -> None:
"rS3AccessLogsBucketSsm",
description="S3 Access Logs Bucket",
parameter_name="/SDLF/S3/AccessLogsBucket",
string_value=access_logs_bucket.bucket_name,
string_value=self.access_logs_bucket.bucket_name,
)

artifacts_bucket_name = f"{p_org.value_as_string}-{p_domain.value_as_string}-{p_environment.value_as_string}-{scope.region}-{scope.account}-artifacts"
artifacts_bucket = s3.Bucket(
self,
"rArtifactsBucket",
bucket_name=artifacts_bucket_name, # TODO
server_access_logs_bucket=access_logs_bucket, # automatically add policy statement to access logs bucket policy
server_access_logs_bucket=self.access_logs_bucket, # automatically add policy statement to access logs bucket policy
server_access_logs_prefix=artifacts_bucket_name,
encryption=s3.BucketEncryption.KMS,
encryption_key=kms_key,
encryption_key=self.kms_key,
bucket_key_enabled=True,
block_public_access=s3.BlockPublicAccess.BLOCK_ALL,
enforce_ssl=True,
Expand All @@ -351,105 +351,19 @@ def __init__(self, scope: Construct, id: str, **kwargs) -> None:
string_value=artifacts_bucket.bucket_name,
)

raw_bucket_name = f"{p_org.value_as_string}-{p_domain.value_as_string}-{p_environment.value_as_string}-{scope.region}-{scope.account}-raw"
raw_bucket = s3.Bucket(
self,
"rRawBucket",
bucket_name=raw_bucket_name, # TODO
server_access_logs_bucket=access_logs_bucket,
server_access_logs_prefix=raw_bucket_name,
encryption=s3.BucketEncryption.KMS,
encryption_key=kms_key,
bucket_key_enabled=True,
block_public_access=s3.BlockPublicAccess.BLOCK_ALL,
enforce_ssl=True,
versioned=True,
event_bridge_enabled=True,
)
lakeformation.CfnResource(
self,
"rRawBucketLakeFormationS3Registration",
resource_arn=f"{raw_bucket.bucket_arn}/", # the trailing slash is important to Lake Formation somehow
use_service_linked_role=False,
role_arn=lakeformationdataaccess_role.role_arn,
)
ssm.StringParameter(
self,
"rS3RawBucketSsm",
description="Name of the Raw S3 bucket",
parameter_name="/SDLF/S3/RawBucket",
string_value=raw_bucket.bucket_name,
)

stage_bucket_name = f"{p_org.value_as_string}-{p_domain.value_as_string}-{p_environment.value_as_string}-{scope.region}-{scope.account}-stage"
stage_bucket = s3.Bucket(
self,
"rStageBucket",
bucket_name=stage_bucket_name, # TODO
server_access_logs_bucket=access_logs_bucket,
server_access_logs_prefix=stage_bucket_name,
encryption=s3.BucketEncryption.KMS,
encryption_key=kms_key,
bucket_key_enabled=True,
block_public_access=s3.BlockPublicAccess.BLOCK_ALL,
enforce_ssl=True,
versioned=True,
event_bridge_enabled=True,
)
lakeformation.CfnResource(
self,
"rStageBucketLakeFormationS3Registration",
resource_arn=f"{stage_bucket.bucket_arn}/", # the trailing slash is important to Lake Formation somehow
use_service_linked_role=False,
role_arn=lakeformationdataaccess_role.role_arn,
)
ssm.StringParameter(
self,
"rS3StageBucketSsm",
description="Name of the Stage S3 bucket",
parameter_name="/SDLF/S3/StageBucket",
string_value=stage_bucket.bucket_name,
)

analytics_bucket_name = f"{p_org.value_as_string}-{p_domain.value_as_string}-{p_environment.value_as_string}-{scope.region}-{scope.account}-analytics"
analytics_bucket = s3.Bucket(
self,
"rAnalyticsBucket",
bucket_name=analytics_bucket_name, # TODO
server_access_logs_bucket=access_logs_bucket,
server_access_logs_prefix=analytics_bucket_name,
encryption=s3.BucketEncryption.KMS,
encryption_key=kms_key,
bucket_key_enabled=True,
block_public_access=s3.BlockPublicAccess.BLOCK_ALL,
enforce_ssl=True,
versioned=True,
event_bridge_enabled=True,
)
lakeformation.CfnResource(
self,
"rAnalyticsBucketLakeFormationS3Registration",
resource_arn=f"{analytics_bucket.bucket_arn}/", # the trailing slash is important to Lake Formation somehow
use_service_linked_role=False,
role_arn=lakeformationdataaccess_role.role_arn,
)
ssm.StringParameter(
self,
"rS3AnalyticsBucketSsm",
description="Name of the Analytics S3 bucket",
parameter_name="/SDLF/S3/AnalyticsBucket",
string_value=analytics_bucket.bucket_name,
)
raw_bucket = self.data_bucket(p_org.value_as_string, p_domain.value_as_string, p_environment.value_as_string, scope.region, scope.account, "raw")
stage_bucket = self.data_bucket(p_org.value_as_string, p_domain.value_as_string, p_environment.value_as_string, scope.region, scope.account, "stage")
analytics_bucket = self.data_bucket(p_org.value_as_string, p_domain.value_as_string, p_environment.value_as_string, scope.region, scope.account, "analytics")

athena_bucket_name = f"{p_org.value_as_string}-{p_domain.value_as_string}-{p_environment.value_as_string}-{scope.region}-{scope.account}-athena"
athena_bucket = s3.Bucket(
self,
"rAthenaBucket",
bucket_name=athena_bucket_name, # TODO
server_access_logs_bucket=access_logs_bucket,
server_access_logs_bucket=self.access_logs_bucket,
server_access_logs_prefix=athena_bucket_name,
encryption=s3.BucketEncryption.KMS,
encryption_key=kms_key,
encryption_key=self.kms_key,
bucket_key_enabled=True,
block_public_access=s3.BlockPublicAccess.BLOCK_ALL,
enforce_ssl=True,
Expand Down Expand Up @@ -507,7 +421,7 @@ def __init__(self, scope: Construct, id: str, **kwargs) -> None:
),
],
)
lakeformationdataaccess_role.attach_inline_policy(s3_lakeformationdataaccess_role_policy)
self.lakeformationdataaccess_role.attach_inline_policy(s3_lakeformationdataaccess_role_policy)

######## Lambda & SQS #########
catalog_dlq = sqs.Queue(
Expand All @@ -517,7 +431,7 @@ def __init__(self, scope: Construct, id: str, **kwargs) -> None:
queue_name="sdlf-catalog-dlq",
retention_period=Duration.days(14),
visibility_timeout=Duration.seconds(60),
encryption_master_key=kms_key,
encryption_master_key=self.kms_key,
)

catalog_queue = sqs.Queue(
Expand All @@ -527,7 +441,7 @@ def __init__(self, scope: Construct, id: str, **kwargs) -> None:
queue_name="sdlf-catalog-queue",
retention_period=Duration.days(7),
visibility_timeout=Duration.seconds(60),
encryption_master_key=kms_key,
encryption_master_key=self.kms_key,
dead_letter_queue=sqs.DeadLetterQueue(
max_receive_count=1,
queue=catalog_dlq,
Expand Down Expand Up @@ -633,7 +547,7 @@ def __init__(self, scope: Construct, id: str, **kwargs) -> None:
"kms:GenerateDataKey*",
"kms:ReEncrypt*",
],
resources=[kms_key.key_arn],
resources=[self.kms_key.key_arn],
),
iam.PolicyStatement(
actions=[
Expand Down Expand Up @@ -671,7 +585,7 @@ def __init__(self, scope: Construct, id: str, **kwargs) -> None:
timeout=Duration.seconds(60),
role=lambdaexecution_role,
environment={"ENV": p_environment.value_as_string},
environment_encryption=kms_key,
environment_encryption=self.kms_key,
# vpcconfig TODO
)

Expand All @@ -687,7 +601,7 @@ def __init__(self, scope: Construct, id: str, **kwargs) -> None:
timeout=Duration.seconds(60),
role=lambdaexecution_role,
environment={"QUEUE": catalog_queue.queue_name, "DLQ": catalog_dlq.queue_name},
environment_encryption=kms_key,
environment_encryption=self.kms_key,
# vpcconfig TODO
)
logs.LogGroup(
Expand All @@ -697,7 +611,7 @@ def __init__(self, scope: Construct, id: str, **kwargs) -> None:
log_group_name=f"/aws/lambda/{catalog_redrive_function.function_name}",
retention=logs.RetentionDays.ONE_MONTH,
# retention=Duration.days(p_cloudwatchlogsretentionindays.value_as_number),
encryption_key=kms_key,
encryption_key=self.kms_key,
)

catalog_function.add_event_source(eventsources.SqsEventSource(catalog_queue, batch_size=10))
Expand All @@ -715,7 +629,7 @@ def __init__(self, scope: Construct, id: str, **kwargs) -> None:
table_name=f"octagon-ObjectMetadata-{p_environment.value_as_string}",
stream=ddb.StreamViewType.NEW_AND_OLD_IMAGES,
encryption=ddb.TableEncryption.CUSTOMER_MANAGED,
encryption_key=kms_key,
encryption_key=self.kms_key,
point_in_time_recovery=True,
)
ssm.StringParameter(
Expand All @@ -740,3 +654,36 @@ def __init__(self, scope: Construct, id: str, **kwargs) -> None:
description="Name of the domain's Artifacts S3 bucket",
value=artifacts_bucket.bucket_name,
)

def data_bucket(self, org, domain, environment, region, account, bucket_layer):
data_bucket_name = f"{org}-{domain}-{environment}-{region}-{account}-{bucket_layer}"
data_bucket = s3.Bucket(
self,
f"r{bucket_layer.capitalize()}Bucket",
bucket_name=data_bucket_name, # TODO
server_access_logs_bucket=self.access_logs_bucket,
server_access_logs_prefix=data_bucket_name,
encryption=s3.BucketEncryption.KMS,
encryption_key=self.kms_key,
bucket_key_enabled=True,
block_public_access=s3.BlockPublicAccess.BLOCK_ALL,
enforce_ssl=True,
versioned=True,
event_bridge_enabled=True,
)
lakeformation.CfnResource(
self,
f"r{bucket_layer.capitalize()}BucketLakeFormationS3Registration",
resource_arn=f"{data_bucket.bucket_arn}/", # the trailing slash is important to Lake Formation somehow
use_service_linked_role=False,
role_arn=self.lakeformationdataaccess_role.role_arn,
)
ssm.StringParameter(
self,
f"rS3{bucket_layer.capitalize()}BucketSsm",
description=f"Name of the {bucket_layer.capitalize()} S3 bucket",
parameter_name=f"/SDLF/S3/{bucket_layer.capitalize()}Bucket",
string_value=data_bucket.bucket_name,
)

return data_bucket
Loading