Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion e2e-tests/installer/auto_install.py
Original file line number Diff line number Diff line change
Expand Up @@ -975,7 +975,7 @@ def makefile_upgrade(self):
self.installer_session.expect("orchestrator-admin:~")

self.installer_session.sendline(f"{self.internal_makefile_params} make upgrade")
self.installer_session.expect("orchestrator-admin:~", timeout=1800)
self.installer_session.expect("orchestrator-admin:~", timeout=2700)

def deprovision(self):
"""
Expand Down
16 changes: 13 additions & 3 deletions installer/fix-external-secrets.sh
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,6 @@ EOF
echo "Syncing root app"
kubectl patch -n "$TARGET_ENV" application root-app --patch-file /tmp/argo-cd/sync-patch.yaml --type merge

# argo has trouble replacing this seceret so manually remove it
echo "Deleting TLS Boots..."
kubectl delete secret tls-boots -n orch-boots

# force vault to reload
echo "Deleting Vault..."
Expand All @@ -114,3 +111,16 @@ kubectl patch -n "$TARGET_ENV" application root-app --patch-file /tmp/argo-cd/sy
echo "Deleting and Syncing for Cluster Templates"
restart_and_wait_pod "orch-cluster" "cluster-manager"
restart_and_wait_pod "orch-cluster" "cluster-manager-template-controller"

echo "Sleep for 10 minutes to allow all apps to sync and come up"
sleep 600

# Delete infra-external vault related jobs
kubectl delete jobs -n orch-infra --field-selector status.successful=1

# delete ns-label related jobs
kubectl delete jobs -n ns-label --field-selector status.successful=1

# argo has trouble replacing this seceret so manually remove it
echo "Deleting TLS Boots..."
kubectl delete secret tls-boots -n orch-boots
51 changes: 48 additions & 3 deletions pod-configs/utils/upgrades/10-upgrade-to-2025.2.0.sh
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,15 @@ get_eks_node_ami() {
echo $ami
}

get_eks_vol_size() {

read LT_ID LT_VER <<< "$(aws eks describe-nodegroup --cluster-name ${ENV_NAME} --nodegroup-name nodegroup-${ENV_NAME} --query "nodegroup.launchTemplate.[id,version]" --output text)"
aws ec2 describe-launch-template-versions \
--launch-template-id "$LT_ID" \
--versions "$LT_VER" \
--query 'LaunchTemplateVersions[0].LaunchTemplateData.BlockDeviceMappings[0].Ebs.VolumeSize' \
--output text
}

get_aurora_ins_azs() {
aurora_azs=($1)
Expand Down Expand Up @@ -124,10 +133,10 @@ vpc_terraform_backend_key = "${VPC_TERRAFORM_BACKEND_KEY}"
vpc_terraform_backend_region = "${BUCKET_REGION}" # region of the S3 bucket to store the TF state
eks_cluster_name = "$ENV_NAME"
aws_account_number = "$AWS_ACCOUNT"
eks_volume_size = 128
eks_desired_size = $EKS_DESIRED_SIZE
eks_volume_size = $(get_eks_vol_size)
eks_desired_size = $((EKS_DESIRED_SIZE + 1))
eks_min_size = $EKS_MIN_SIZE
eks_max_size = $EKS_MAX_SIZE
eks_max_size = $((EKS_MAX_SIZE + 1))
eks_node_ami_id = "$(get_eks_node_ami)"
eks_volume_type = "gp3"
aws_region = "${AWS_REGION}"
Expand Down Expand Up @@ -238,6 +247,16 @@ else
exit 1
fi

echo "Applying changes for EKS module..."
if terraform apply -target=module.eks -var-file="environments/${ENV_NAME}/variable.tfvar" -auto-approve; then
echo " ^|^e Terraform apply for EKS module succeeded."
wait_for_nodegroup_ready_nodes
else
echo " ^}^l Terraform apply for EKS module failed!"
exit 1
fi


echo "Applying changes for KMS module..."
if terraform apply -target=module.kms -var-file="environments/${ENV_NAME}/variable.tfvar" -auto-approve; then
echo "✅ Terraform apply for KMS module succeeded."
Expand Down Expand Up @@ -288,6 +307,32 @@ aws ec2 describe-security-groups --group-ids "$LB_SG_ID_T3" --query "SecurityGro
return 0
}

wait_for_nodegroup_ready_nodes() {
TARGET=$((EKS_DESIRED_SIZE + 1))
TIMEOUT=900 # 15 minutes = 900 seconds
INTERVAL=15 # check every 15 seconds
ELAPSED=0

while [ "$ELAPSED" -lt "$TIMEOUT" ]; do
READY=$(kubectl get nodes \
-A \
--no-headers 2>/dev/null | grep -c " Ready")

echo "[$ELAPSED sec] Ready nodes: $READY / Waiting for: $TARGET"

if [ "$READY" -ge "$TARGET" ]; then
echo " ^|^e Node group reached $TARGET Ready nodes"
return 0
fi

sleep "$INTERVAL"
ELAPSED=$((ELAPSED + INTERVAL))
done

echo " ^}^l Timeout after 15 minutes. Node group did not reach $TARGET Ready nodes."
return 1
}

# Main

if [[ ${COMMAND:-""} != upgrade ]]; then
Expand Down
Loading