-
Notifications
You must be signed in to change notification settings - Fork 24
/
generative_ai_demo_web_stack.py
132 lines (115 loc) · 5.1 KB
/
generative_ai_demo_web_stack.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
from aws_cdk import (
Duration,
Stack,
aws_lambda as _lambda,
aws_apigateway as apigw,
aws_ec2 as ec2,
aws_iam as iam,
aws_ssm as ssm,
aws_ecs as ecs,
aws_ecs_patterns as ecs_patterns,
)
from constructs import Construct
class GenerativeAiDemoWebStack(Stack):
def __init__(self, scope: Construct, construct_id: str, vpc: ec2.IVpc, **kwargs) -> None:
super().__init__(scope, construct_id, **kwargs)
# Defines role for the AWS Lambda functions
role = iam.Role(self, "Gen-AI-Lambda-Policy", assumed_by=iam.ServicePrincipal("lambda.amazonaws.com"))
role.add_managed_policy(iam.ManagedPolicy.from_aws_managed_policy_name("service-role/AWSLambdaBasicExecutionRole"))
role.add_managed_policy(iam.ManagedPolicy.from_aws_managed_policy_name("service-role/AWSLambdaVPCAccessExecutionRole"))
role.attach_inline_policy(iam.Policy(self, "sm-invoke-policy",
statements=[iam.PolicyStatement(
effect=iam.Effect.ALLOW,
actions=["sagemaker:InvokeEndpoint"],
resources=["*"]
)]
))
# Defines an AWS Lambda function for Image Generation service
lambda_txt2img = _lambda.Function(
self, "lambda_txt2img",
runtime=_lambda.Runtime.PYTHON_3_9,
code=_lambda.Code.from_asset("code/lambda_txt2img"),
handler="txt2img.lambda_handler",
role=role,
timeout=Duration.seconds(180),
memory_size=512,
vpc_subnets=ec2.SubnetSelection(
subnet_type=ec2.SubnetType.PRIVATE_WITH_EGRESS
),
vpc=vpc
)
# Defines an Amazon API Gateway endpoint for Image Generation service
txt2img_apigw_endpoint = apigw.LambdaRestApi(
self, "txt2img_apigw_endpoint",
handler=lambda_txt2img
)
# Defines an AWS Lambda function for NLU & Text Generation service
lambda_txt2nlu = _lambda.Function(
self, "lambda_txt2nlu",
runtime=_lambda.Runtime.PYTHON_3_9,
code=_lambda.Code.from_asset("code/lambda_txt2nlu"),
handler="txt2nlu.lambda_handler",
role=role,
timeout=Duration.seconds(180),
memory_size=512,
vpc_subnets=ec2.SubnetSelection(
subnet_type=ec2.SubnetType.PRIVATE_WITH_EGRESS
),
vpc=vpc
)
# Defines an Amazon API Gateway endpoint for NLU & Text Generation service
txt2nlu_apigw_endpoint = apigw.LambdaRestApi(
self, "txt2nlu_apigw_endpoint",
handler=lambda_txt2nlu
)
# Create ECS cluster
cluster = ecs.Cluster(self, "WebDemoCluster", vpc=vpc)
# Add an AutoScalingGroup with spot instances to the existing cluster
cluster.add_capacity("AsgSpot",
max_capacity=2,
min_capacity=1,
desired_capacity=2,
instance_type=ec2.InstanceType("c5.xlarge"),
spot_price="0.0735",
# Enable the Automated Spot Draining support for Amazon ECS
spot_instance_draining=True
)
# Build Dockerfile from local folder and push to ECR
image = ecs.ContainerImage.from_asset("web-app")
# Create Fargate service
fargate_service = ecs_patterns.ApplicationLoadBalancedFargateService(
self, "WebApplication",
cluster=cluster, # Required
cpu=2048, # Default is 256 (512 is 0.5 vCPU, 2048 is 2 vCPU)
desired_count=1, # Default is 1
task_image_options=ecs_patterns.ApplicationLoadBalancedTaskImageOptions(
image=image,
container_port=8501,
),
#load_balancer_name="gen-ai-demo",
memory_limit_mib=4096, # Default is 512
public_load_balancer=True) # Default is True
fargate_service.task_definition.add_to_task_role_policy(iam.PolicyStatement(
effect=iam.Effect.ALLOW,
actions = ["ssm:GetParameter"],
resources = ["arn:aws:ssm:*"],
)
)
fargate_service.task_definition.add_to_task_role_policy(iam.PolicyStatement(
effect=iam.Effect.ALLOW,
actions = ["execute-api:Invoke","execute-api:ManageConnections"],
resources = ["*"],
)
)
# Setup task auto-scaling
scaling = fargate_service.service.auto_scale_task_count(
max_capacity=10
)
scaling.scale_on_cpu_utilization(
"CpuScaling",
target_utilization_percent=50,
scale_in_cooldown=Duration.seconds(60),
scale_out_cooldown=Duration.seconds(60),
)
ssm.StringParameter(self, "txt2img_api_endpoint", parameter_name="txt2img_api_endpoint", string_value=txt2img_apigw_endpoint.url)
ssm.StringParameter(self, "txt2nlu_api_endpoint", parameter_name="txt2nlu_api_endpoint", string_value=txt2nlu_apigw_endpoint.url)