From bbfd0df5b21855ac42e27725ac5e51d353a9890c Mon Sep 17 00:00:00 2001 From: Alis Akers <94012653+alismx@users.noreply.github.com> Date: Wed, 11 Dec 2024 15:14:03 -0800 Subject: [PATCH] add basic autoscaling based on mem and cpu usage (#13) * add basic autoscaling based on mem and cpu usage * remove ecr-viewer base path option * update readme * update based on load testing * allow for app repo configuration --- README.md | 7 +++++-- _data.tf | 1 + _local.tf | 38 +++++++++++++++++++++----------------- _variable.tf | 13 +++++++------ autoscaling.tf | 44 ++++++++++++++++++++++++++++++++++++++++++++ enable_ecr.tf | 2 +- 6 files changed, 79 insertions(+), 26 deletions(-) create mode 100644 autoscaling.tf diff --git a/README.md b/README.md index da91c05..75b05c7 100644 --- a/README.md +++ b/README.md @@ -124,6 +124,9 @@ No modules. | [aws_alb_listener_rule.http](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/alb_listener_rule) | resource | | [aws_alb_listener_rule.https](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/alb_listener_rule) | resource | | [aws_alb_target_group.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/alb_target_group) | resource | +| [aws_appautoscaling_policy.cpu](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/appautoscaling_policy) | resource | +| [aws_appautoscaling_policy.memory](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/appautoscaling_policy) | resource | +| [aws_appautoscaling_target.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/appautoscaling_target) | resource | | [aws_appmesh_mesh.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/appmesh_mesh) | resource | | [aws_appmesh_virtual_node.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/appmesh_virtual_node) | resource | | [aws_cloudwatch_log_group.ecs_cloudwatch_logs](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_log_group) | resource | @@ -177,13 +180,13 @@ No modules. | [disable\_ecr](#input\_disable\_ecr) | Flag to disable the aws ecr service for docker image storage, defaults to false | `bool` | `false` | no | | [ecr\_viewer\_app\_env](#input\_ecr\_viewer\_app\_env) | The current environment that is running. This may modify behavior of auth between dev and prod. | `string` | `"prod"` | no | | [ecr\_viewer\_auth\_pub\_key](#input\_ecr\_viewer\_auth\_pub\_key) | The public key used to validate the incoming authenication for the eCR Viewer. | `string` | `"-----BEGIN PUBLIC KEY-----\nMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEAqjrH9PprQCB5dX15zYfd\nS6K2ezNi/ZOu8vKEhQuLqwHACy1iUt1Yyp2PZLIV7FVDgBHMMVWPVx3GJ2wEyaJw\nMHkv6XNpUpWLhbs0V1T7o/OZfEIqcNua07OEoBxX9vhKIHtaksWdoMyKRXQJz0js\noWpawfOWxETnLqGvybT4yvY2RJhquTXLcLu90L4LdvIkADIZshaOtAU/OwI5ATcb\nfE3ip15E6jIoUm7FAtfRiuncpI5l/LJPP6fvwf8QCbbUJBZklLqcUuf4qe/L/nIq\npIONb8KZFWPhnGeRZ9bwIcqYWt3LAAshQLSGEYl2PGXaqbkUD2XLETSKDjisxd0g\n9j8bIMPgBKi+dBYcmBZnR7DxJe+vEDDw8prHG/+HRy5fim/BcibTKnIl8PR5yqHa\nmWQo7N+xXhILdD9e33KLRgbg97+erHqvHlNMdwDhAfrBT+W6GCdPwp3cePPsbhsc\noGSHOUDhzyAujr0J8h5WmZDGUNWjGzWqubNZD8dBXB8x+9dDoWhfM82nw0pvAeKf\nwJodvn3Qo8/S5hxJ6HyGkUTANKN8IxWh/6R5biET5BuztZP6jfPEaOAnt6sq+C38\nhR9rUr59dP2BTlcJ19ZXobLwuJEa81S5BrcbDwYNOAzC8jl2EV1i4bQIwJJaY27X\nIynom6unaheZpS4DFIh2w9UCAwEAAQ==\n-----END PUBLIC KEY-----\n"` | no | -| [ecr\_viewer\_basepath](#input\_ecr\_viewer\_basepath) | The basepath for the ecr-viewer | `string` | `"/ecr-viewer"` | no | | [ecs\_alb\_name](#input\_ecs\_alb\_name) | Name of the Application Load Balancer (ALB) | `string` | `""` | no | | [ecs\_alb\_tg\_name](#input\_ecs\_alb\_tg\_name) | Name of the ALB Target Group | `string` | `""` | no | | [ecs\_cloudwatch\_group](#input\_ecs\_cloudwatch\_group) | Name of the AWS CloudWatch Log Group for ECS | `string` | `""` | no | | [ecs\_cluster\_name](#input\_ecs\_cluster\_name) | Name of the ECS Cluster | `string` | `""` | no | | [ecs\_task\_execution\_role\_name](#input\_ecs\_task\_execution\_role\_name) | Name of the ECS Task Execution Role | `string` | `""` | no | | [ecs\_task\_role\_name](#input\_ecs\_task\_role\_name) | Name of the ECS Task Role | `string` | `""` | no | +| [enable\_autoscaling](#input\_enable\_autoscaling) | Flag to enable autoscaling for the ECS services | `bool` | `true` | no | | [internal](#input\_internal) | Flag to determine if the several AWS resources are public (intended for external access, public internet) or private (only intended to be accessed within your AWS VPC or avaiable with other means, a transit gateway for example). | `bool` | `true` | no | | [owner](#input\_owner) | Owner of the resources | `string` | `"CDC"` | no | | [phdi\_version](#input\_phdi\_version) | Version of the PHDI application | `string` | `"v1.6.9"` | no | @@ -194,7 +197,7 @@ No modules. | [region](#input\_region) | The AWS region where resources are created | `string` | n/a | yes | | [s3\_viewer\_bucket\_name](#input\_s3\_viewer\_bucket\_name) | Name of the S3 bucket for the viewer | `string` | `""` | no | | [s3\_viewer\_bucket\_role\_name](#input\_s3\_viewer\_bucket\_role\_name) | Name of the IAM role for the ecr-viewer bucket | `string` | `""` | no | -| [service\_data](#input\_service\_data) | Data for the DIBBS services |
map(object({| `{}` | no | +| [service\_data](#input\_service\_data) | Data for the DIBBS services |
short_name = string
fargate_cpu = number
fargate_memory = number
min_capacity = number
max_capacity = number
app_image = string
app_version = string
container_port = number
host_port = number
public = bool
registry_url = string
env_vars = list(object({
name = string
value = string
}))
}))
map(object({| `{}` | no | | [sqlserver\_database\_data](#input\_sqlserver\_database\_data) | n/a |
short_name = string
fargate_cpu = number
fargate_memory = number
min_capacity = number
max_capacity = number
app_repo = string
app_image = string
app_version = string
container_port = number
host_port = number
public = bool
registry_url = string
env_vars = list(object({
name = string
value = string
}))
}))
object({|
non_integrated_viewer = string
metadata_database_type = string
metadata_database_schema = string
secrets_manager_sqlserver_user_name = string
secrets_manager_sqlserver_password_name = string
secrets_manager_sqlserver_host_name = string
})
{| no | | [tags](#input\_tags) | Tags to apply to resources | `map(string)` | `{}` | no | | [vpc\_id](#input\_vpc\_id) | ID of the VPC | `string` | n/a | yes | diff --git a/_data.tf b/_data.tf index c358fef..9e0e068 100644 --- a/_data.tf +++ b/_data.tf @@ -18,6 +18,7 @@ data "aws_iam_policy_document" "ecr_viewer_s3" { "s3:PutObjectAcl", "s3:GetObject", "s3:GetObjectAcl", + "s3:ListBucket", ] resources = [ aws_s3_bucket.ecr_viewer.arn, diff --git a/_local.tf b/_local.tf index fad1ef2..b91c4e3 100644 --- a/_local.tf +++ b/_local.tf @@ -8,15 +8,17 @@ locals { registry_url = var.disable_ecr == false ? "${data.aws_caller_identity.current.account_id}.dkr.ecr.${var.region}.amazonaws.com" : "ghcr.io/cdcgov/phdi" registry_username = data.aws_ecr_authorization_token.this.user_name registry_password = data.aws_ecr_authorization_token.this.password + phdi_repo = "ghcr.io/cdcgov/phdi" database_data = var.postgres_database_data.non_integrated_viewer == "true" ? var.postgres_database_data : var.sqlserver_database_data service_data = length(var.service_data) > 0 ? var.service_data : { ecr-viewer = { short_name = "ecrv", - fargate_cpu = 1024, - fargate_memory = 2048, + fargate_cpu = 512, + fargate_memory = 1024, min_capacity = 1, max_capacity = 5, + app_repo = local.phdi_repo, app_image = var.disable_ecr == false ? "${terraform.workspace}-ecr-viewer" : "ecr-viewer", app_version = var.phdi_version, container_port = 3000, @@ -52,10 +54,6 @@ locals { name = "NBS_PUB_KEY", value = var.ecr_viewer_auth_pub_key }, - { - name = "NEXT_PUBLIC_BASEPATH", - value = var.ecr_viewer_basepath - }, { name = "METADATA_DATABASE_TYPE", value = local.database_data.non_integrated_viewer == "true" ? local.database_data.metadata_database_type : "" @@ -88,6 +86,7 @@ locals { fargate_memory = 2048, min_capacity = 1, max_capacity = 5, + app_repo = local.phdi_repo, app_image = var.disable_ecr == false ? "${terraform.workspace}-fhir-converter" : "fhir-converter", app_version = var.phdi_version, container_port = 8080, @@ -98,10 +97,11 @@ locals { }, ingestion = { short_name = "inge", - fargate_cpu = 1024, - fargate_memory = 2048, + fargate_cpu = 512, + fargate_memory = 1024, min_capacity = 1, max_capacity = 5, + app_repo = local.phdi_repo, app_image = var.disable_ecr == false ? "${terraform.workspace}-ingestion" : "ingestion", app_version = var.phdi_version, container_port = 8080, @@ -112,10 +112,11 @@ locals { }, validation = { short_name = "vali", - fargate_cpu = 1024, - fargate_memory = 2048, + fargate_cpu = 512, + fargate_memory = 1024, min_capacity = 1, max_capacity = 5, + app_repo = local.phdi_repo, app_image = var.disable_ecr == false ? "${terraform.workspace}-validation" : "validation", app_version = var.phdi_version, container_port = 8080, @@ -126,10 +127,11 @@ locals { }, trigger-code-reference = { short_name = "trigcr", - fargate_cpu = 1024, - fargate_memory = 2048, + fargate_cpu = 512, + fargate_memory = 1024, min_capacity = 1, max_capacity = 5, + app_repo = local.phdi_repo, app_image = var.disable_ecr == false ? "${terraform.workspace}-trigger-code-reference" : "trigger-code-reference", app_version = var.phdi_version, container_port = 8080, @@ -140,10 +142,11 @@ locals { }, message-parser = { short_name = "msgp", - fargate_cpu = 1024, - fargate_memory = 2048, + fargate_cpu = 512, + fargate_memory = 1024, min_capacity = 1, max_capacity = 5, + app_repo = local.phdi_repo, app_image = var.disable_ecr == false ? "${terraform.workspace}-message-parser" : "message-parser", app_version = var.phdi_version, container_port = 8080, @@ -154,10 +157,11 @@ locals { }, orchestration = { short_name = "orch", - fargate_cpu = 1024, - fargate_memory = 2048, + fargate_cpu = 512, + fargate_memory = 1024, min_capacity = 1, max_capacity = 5, + app_repo = local.phdi_repo, app_image = var.disable_ecr == false ? "${terraform.workspace}-orchestration" : "orchestration", app_version = var.phdi_version, container_port = 8080, @@ -187,7 +191,7 @@ locals { }, { name = "ECR_VIEWER_URL", - value = "http://ecr-viewer:3000${var.ecr_viewer_basepath}" + value = "http://ecr-viewer:3000/ecr-viewer" }, { name = "MESSAGE_PARSER_URL", diff --git a/_variable.tf b/_variable.tf index dc5b9d8..03e5c20 100644 --- a/_variable.tf +++ b/_variable.tf @@ -58,6 +58,12 @@ variable "ecs_task_role_name" { default = "" } +variable "enable_autoscaling" { + type = bool + description = "Flag to enable autoscaling for the ECS services" + default = true +} + variable "private_subnet_ids" { type = list(string) description = "List of private subnet IDs" @@ -98,6 +104,7 @@ variable "service_data" { fargate_memory = number min_capacity = number max_capacity = number + app_repo = string app_image = string app_version = string container_port = number @@ -182,12 +189,6 @@ variable "tags" { default = {} } -variable "ecr_viewer_basepath" { - type = string - description = "The basepath for the ecr-viewer" - default = "/ecr-viewer" -} - variable "ecr_viewer_app_env" { type = string description = "The current environment that is running. This may modify behavior of auth between dev and prod." diff --git a/autoscaling.tf b/autoscaling.tf new file mode 100644 index 0000000..f2d1021 --- /dev/null +++ b/autoscaling.tf @@ -0,0 +1,44 @@ + + +resource "aws_appautoscaling_target" "this" { + for_each = var.enable_autoscaling ? aws_ecs_service.this : {} + max_capacity = local.service_data[each.key].max_capacity + min_capacity = local.service_data[each.key].min_capacity + resource_id = "service/${aws_ecs_cluster.dibbs_app_cluster.name}/${each.key}" + scalable_dimension = "ecs:service:DesiredCount" + service_namespace = "ecs" +} + +resource "aws_appautoscaling_policy" "memory" { + for_each = var.enable_autoscaling ? aws_ecs_service.this : {} + name = "${each.key}_memory" + policy_type = "TargetTrackingScaling" + resource_id = aws_appautoscaling_target.this[each.key].resource_id + scalable_dimension = aws_appautoscaling_target.this[each.key].scalable_dimension + service_namespace = aws_appautoscaling_target.this[each.key].service_namespace + + target_tracking_scaling_policy_configuration { + predefined_metric_specification { + predefined_metric_type = "ECSServiceAverageMemoryUtilization" + } + + target_value = 80 + } +} + +resource "aws_appautoscaling_policy" "cpu" { + for_each = var.enable_autoscaling ? aws_ecs_service.this : {} + name = "${each.key}_cpu" + policy_type = "TargetTrackingScaling" + resource_id = aws_appautoscaling_target.this[each.key].resource_id + scalable_dimension = aws_appautoscaling_target.this[each.key].scalable_dimension + service_namespace = aws_appautoscaling_target.this[each.key].service_namespace + + target_tracking_scaling_policy_configuration { + predefined_metric_specification { + predefined_metric_type = "ECSServiceAverageCPUUtilization" + } + + target_value = 50 + } +} diff --git a/enable_ecr.tf b/enable_ecr.tf index 4567823..ca3e4c0 100644 --- a/enable_ecr.tf +++ b/enable_ecr.tf @@ -1,6 +1,6 @@ resource "dockerless_remote_image" "dibbs" { for_each = var.disable_ecr == false ? local.service_data : {} - source = "ghcr.io/cdcgov/phdi/${each.key}:${each.value.app_version}" + source = "${each.value.app_repo}/${each.key}:${each.value.app_version}" target = "${each.value.registry_url}/${each.value.app_image}:${each.value.app_version}" }
"metadata_database_schema": "",
"metadata_database_type": "",
"non_integrated_viewer": "false",
"secrets_manager_sqlserver_host_name": "",
"secrets_manager_sqlserver_password_name": "",
"secrets_manager_sqlserver_user_name": ""
}