From e78a9f2080f200ee13912851f9ca0ef8cf6aba82 Mon Sep 17 00:00:00 2001 From: Alex Benn Date: Thu, 5 Dec 2024 14:03:50 -0500 Subject: [PATCH] Add AWS/EKS to sched performance comparison scenario. --- .../cluster-churn-nodes1000-pods50k-sched.yml | 26 +++++ .../terraform-inputs/aws.tfvars | 100 ++++++++++++++++++ .../terraform-test-inputs/aws.json | 4 + 3 files changed, 130 insertions(+) create mode 100644 scenarios/perf-eval/cluster-churn-n1000p50k-sched/terraform-inputs/aws.tfvars create mode 100644 scenarios/perf-eval/cluster-churn-n1000p50k-sched/terraform-test-inputs/aws.json diff --git a/pipelines/perf-eval/Scheduler Benchmark/cluster-churn-nodes1000-pods50k-sched.yml b/pipelines/perf-eval/Scheduler Benchmark/cluster-churn-nodes1000-pods50k-sched.yml index c26634728..21e0d1f57 100644 --- a/pipelines/perf-eval/Scheduler Benchmark/cluster-churn-nodes1000-pods50k-sched.yml +++ b/pipelines/perf-eval/Scheduler Benchmark/cluster-churn-nodes1000-pods50k-sched.yml @@ -13,6 +13,32 @@ variables: SCENARIO_VERSION: main stages: + - stage: aws_eastus1_sched_baseline + dependsOn: [] + jobs: + - template: /jobs/competitive-test.yml + parameters: + cloud: aws + regions: + - us-east-1 + engine: clusterloader2 + engine_input: + image: "ghcr.io/azure/clusterloader2:v20241022" + topology: slo + matrix: + aws_vpc_cni: + cpu_per_node: 4 + node_count: 1000 + node_per_step: 100 + max_pods: 110 + repeats: 1 + scale_timeout: "30m" + cl2_config_file: cluster-scale-config.yaml + service_test: False + max_parallel: 1 + timeout_in_minutes: 720 + credential_type: service_connection + ssh_key_enabled: false - stage: azure_eastus2_sched_upstream_default dependsOn: [] variables: diff --git a/scenarios/perf-eval/cluster-churn-n1000p50k-sched/terraform-inputs/aws.tfvars b/scenarios/perf-eval/cluster-churn-n1000p50k-sched/terraform-inputs/aws.tfvars new file mode 100644 index 000000000..93c54f842 --- /dev/null +++ b/scenarios/perf-eval/cluster-churn-n1000p50k-sched/terraform-inputs/aws.tfvars @@ -0,0 +1,100 @@ +scenario_type = "perf-eval" +scenario_name = "cluster-churn-n1000p50k-sched" +deletion_delay = "12h" +owner = "aks" + +network_config_list = [ + { + role = "slo" + vpc_name = "slo-vpc" + vpc_cidr_block = "10.0.0.0/16" + secondary_ipv4_cidr_blocks = ["10.1.0.0/16"] + subnet = [ + { + name = "slo-subnet-1" + cidr_block = "10.0.0.0/16" + zone_suffix = "a" + map_public_ip_on_launch = true + }, + { + name = "slo-subnet-2" + cidr_block = "10.1.0.0/17" + zone_suffix = "b" + map_public_ip_on_launch = true + }, + { + name = "slo-subnet-3" + cidr_block = "10.1.128.0/17" + zone_suffix = "c" + map_public_ip_on_launch = true + } + ] + security_group_name = "slo-sg" + route_tables = [ + { + name = "internet-rt" + cidr_block = "0.0.0.0/0" + } + ], + route_table_associations = [ + { + name = "slo-subnet-rt-assoc-1" + subnet_name = "slo-subnet-1" + route_table_name = "internet-rt" + }, + { + name = "slo-subnet-rt-assoc-2" + subnet_name = "slo-subnet-2" + route_table_name = "internet-rt" + }, + { + name = "slo-subnet-rt-assoc-3" + subnet_name = "slo-subnet-3" + route_table_name = "internet-rt" + } + ] + sg_rules = { + ingress = [] + egress = [ + { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_block = "0.0.0.0/0" + } + ] + } + } +] + +eks_config_list = [{ + role = "slo" + eks_name = "slo" + enable_karpenter = true + vpc_name = "slo-vpc" + policy_arns = ["AmazonEKSClusterPolicy", "AmazonEKSVPCResourceController", "AmazonEKSWorkerNodePolicy", "AmazonEKS_CNI_Policy", "AmazonEC2ContainerRegistryReadOnly"] + eks_managed_node_groups = [ + { + name = "default" + ami_type = "AL2_x86_64" + instance_types = ["m4.4xlarge"] + min_size = 5 + max_size = 5 + desired_size = 5 + capacity_type = "ON_DEMAND" + }, + { + name = "prompool" + ami_type = "AL2_x86_64" + instance_types = ["m4.16xlarge"] + min_size = 1 + max_size = 1 + desired_size = 1 + capacity_type = "ON_DEMAND" + labels = { "prometheus" = "true" } + } + ] + + eks_addons = [] + kubernetes_version = "1.30" +}] diff --git a/scenarios/perf-eval/cluster-churn-n1000p50k-sched/terraform-test-inputs/aws.json b/scenarios/perf-eval/cluster-churn-n1000p50k-sched/terraform-test-inputs/aws.json new file mode 100644 index 000000000..873be3589 --- /dev/null +++ b/scenarios/perf-eval/cluster-churn-n1000p50k-sched/terraform-test-inputs/aws.json @@ -0,0 +1,4 @@ +{ + "run_id" : "123456789", + "region" : "us-east-1" +}