Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
156 changes: 156 additions & 0 deletions on-prem-installers/onprem/after_upgrade_sync.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
#!/bin/bash

NS="onprem"

# -----------------------------
# Check & Install argoCD CLI
# -----------------------------
if ! command -v argocd >/dev/null 2>&1; then
echo "[INFO] argocd CLI not found. Installing..."
VERSION=$(curl -L -s https://raw.githubusercontent.com/argoproj/argo-cd/stable/VERSION)
echo "[INFO] Latest version: $VERSION"
curl -sSL -o argocd-linux-amd64 \
https://github.com/argoproj/argo-cd/releases/download/v${VERSION}/argocd-linux-amd64
sudo install -m 555 argocd-linux-amd64 /usr/local/bin/argocd
rm -f argocd-linux-amd64
echo "[INFO] argocd CLI installed successfully."
else
echo "[INFO] argocd CLI already installed: $(argocd version --client | head -1)"
fi

# -----------------------------
# ADMIN PASSWORD
# -----------------------------
echo "[INFO] Fetching ArgoCD admin password..."
if command -v yq >/dev/null 2>&1; then
ADMIN_PASSWD=$(kubectl get secret -n argocd argocd-initial-admin-secret -o yaml | yq '.data.password' | base64 -d)
else
ADMIN_PASSWD=$(kubectl get secret -n argocd argocd-initial-admin-secret -o jsonpath='{.data.password}' | base64 -d)
fi

# -----------------------------
# ArgoCD Server IP (LB or NodePort)
# -----------------------------
echo "[INFO] Detecting ArgoCD server IP..."
ARGO_IP=$(kubectl get svc argocd-server -n argocd -o jsonpath="{.status.loadBalancer.ingress[0].ip}")
if [[ -z "$ARGO_IP" ]]; then
NODEPORT=$(kubectl get svc argocd-server -n argocd -o jsonpath='{.spec.ports[0].nodePort}')
NODEIP=$(kubectl get nodes -o jsonpath='{.items[0].status.addresses[0].address}')
ARGO_IP="${NODEIP}:${NODEPORT}"
echo "[INFO] LoadBalancer IP not found, using NodePort: ${ARGO_IP}"
else
echo "[INFO] LoadBalancer IP: ${ARGO_IP}"
fi

# -----------------------------
# Login
# -----------------------------
echo "[INFO] Logging in to ArgoCD..."
argocd login "${ARGO_IP}" --username admin --password "${ADMIN_PASSWD}" --insecure
echo "[INFO] ArgoCD login successful."

# ------------------------------------------------------------
# Return NOT GREEN apps (health != Healthy OR sync != Synced)
# ------------------------------------------------------------
get_not_green_apps() {
kubectl get applications.argoproj.io -n "$NS" -o json \
| jq -r '
.items[] | {
name: .metadata.name,
wave: (.metadata.annotations["argocd.argoproj.io/sync-wave"] // "0"),
health: .status.health.status,
sync: .status.sync.status
}
| select(.health != "Healthy" or .sync != "Synced")
| "\(.wave) \(.name) \(.health) \(.sync)"
'
}

# ------------------------------------------------------------
# Main sync logic: Sync apps not green in wave order
# ------------------------------------------------------------
sync_not_green_apps_once() {

mapfile -t apps < <(get_not_green_apps | sort -n -k1)

if [[ ${#apps[@]} -eq 0 ]]; then
echo "🎉 All apps are GREEN. Nothing to sync."
return 0
fi

echo "---------------------------------------------------------"
echo "Syncing ${#apps[@]} NOT-GREEN apps..."
echo "---------------------------------------------------------"

for entry in "${apps[@]}"; do

wave=$(echo "$entry" | awk '{print $1}')
name=$(echo "$entry" | awk '{print $2}')
health=$(echo "$entry" | awk '{print $3}')
sync=$(echo "$entry" | awk '{print $4}')

full_app="${NS}/${name}"

echo "---------------------------------------------------------"
echo "App: $full_app"
echo "Wave: $wave"
echo "Current Health: $health"
echo "Current Sync: $sync"
echo "Syncing...."
echo

# -----------------------------
# Graceful sync with retry handling
# -----------------------------
if ! argocd app sync "$full_app" --grpc-web 2>/tmp/argocd_sync.log; then
if grep -q "application is deleting" /tmp/argocd_sync.log; then
echo "⚠️ App $full_app is deleting. Skipping for now..."
elif grep -q "another operation is already in progress" /tmp/argocd_sync.log; then
echo "⚠️ Another operation in progress for $full_app. Will retry in next loop..."
else
echo "❌ Sync FAILED for $full_app. Error logged. Will retry next loop."
cat /tmp/argocd_sync.log
fi
else
echo "✔ Sync OK for $full_app"
fi

echo
done
}

# ------------------------------------------------------------
# LOOP UNTIL ALL APPS ARE GREEN
# ------------------------------------------------------------
sync_until_green() {
echo "========================================================="
echo "Starting continuous sync loop until ALL apps are GREEN"
echo "Namespace: $NS"
echo "========================================================="

while true; do
echo
echo "Checking app statuses..."

# If all are green → exit
if [[ -z "$(get_not_green_apps)" ]]; then
echo
echo "🎉🎉🎉 ALL APPLICATIONS ARE GREEN (Healthy + Synced) 🎉🎉🎉"
break
fi

# Sync apps that are not green
sync_not_green_apps_once
kubectl get application -A

echo "Waiting 10 seconds before next check..."
sleep 10
done
}

# ------------------------------------------------------------
# MAIN
# ------------------------------------------------------------
# Disable exit on error for the sync loop
set +e
sync_until_green
68 changes: 64 additions & 4 deletions on-prem-installers/onprem/onprem_upgrade.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1198,6 +1198,37 @@ if kubectl get crd externalsecrets.external-secrets.io >/dev/null 2>&1; then
kubectl patch crd/externalsecrets.external-secrets.io -p '{"metadata":{"finalizers":[]}}' --type=merge
fi

cleanup_job() {

local job_name=$1

if [[ -z "$job_name" ]]; then
echo "No job name provided to cleanup_job"
return
fi

echo "===== Starting Job Cleanup for: $job_name ====="

# Get the namespaces where this job exists
namespaces=$(kubectl get pod -A | grep "$job_name" | awk '{print $1}')

if [[ -z "$namespaces" ]]; then
echo "✅ Job '$job_name' not found in any namespace"
return
fi

# Delete job in each namespace
for ns in $namespaces; do
echo "Deleting job: $job_name in namespace: $ns"
kubectl delete pod "$job_name" -n "$ns" --ignore-not-found=true --cascade=foreground
echo "------------------------------------------------------"
done

echo "===== Job Cleanup Completed for: $job_name ====="
sleep 5
}


# Apply External Secrets CRDs with server-side apply
echo "Applying external-secrets CRDs with server-side apply..."
kubectl apply --server-side=true --force-conflicts -f https://raw.githubusercontent.com/external-secrets/external-secrets/refs/tags/v0.20.4/deploy/crds/bundle.yaml || true
Expand All @@ -1216,6 +1247,9 @@ check_and_force_sync_app copy-ca-cert-gitea-to-cluster "$apps_ns"
check_and_force_sync_app copy-cluster-gitea-cred-to-fleet "$apps_ns"
check_and_force_sync_app copy-keycloak-admin-to-infra "$apps_ns"

cleanup_job namespace-label
cleanup_job wait-istio-job

# Unseal vault after external-secrets is ready
echo "Unsealing vault..."
vault_unseal
Expand All @@ -1227,7 +1261,11 @@ wait_for_app_synced_healthy platform-keycloak "$apps_ns"

kubectl patch -n "$apps_ns" application cluster-manager --patch-file /tmp/argo-cd/sync-patch.yaml --type merge


# Stop root-app old sync as it will be stuck.
kubectl patch application root-app -n "$apps_ns" --type merge -p '{"operation":null}'
kubectl patch application root-app -n "$apps_ns" --type json -p '[{"op": "remove", "path": "/status/operationState"}]'
#Apply root-app Patch
kubectl patch application root-app -n "$apps_ns" --patch-file /tmp/argo-cd/sync-patch.yaml --type merge
kubectl delete secret tls-boots -n orch-boots

# Observability Minio PVC ignoreDifferences patching and job cleanup
Expand Down Expand Up @@ -1267,7 +1305,24 @@ check_and_patch_sync_app orchestrator-observability "$apps_ns"
# Cleanup infra-external jobs
kubectl delete jobs setup-databases-mps setup-databases-rps amt-dbpassword-secret-job init-amt-vault-job -n orch-infra --force --grace-period=0 --ignore-not-found

#process_unsynced_leftovers "$apps_ns"
kubectl patch application wait-istio-job -n "$apps_ns" --patch-file /tmp/argo-cd/sync-patch.yaml --type merge || true
kubectl patch application namespace-label -n "$apps_ns" --patch-file /tmp/argo-cd/sync-patch.yaml --type merge || true
kubectl patch application infra-external -n "$apps_ns" --patch-file /tmp/argo-cd/sync-patch.yaml --type merge || true
sleep 20
kubectl delete application namespace-label -n "$apps_ns" || true
kubectl delete application wait-istio-job -n "$apps_ns" || true
# Stop root-app old sync as it will be stuck.
kubectl patch application root-app -n "$apps_ns" --type merge -p '{"operation":null}'
kubectl patch application root-app -n "$apps_ns" --type json -p '[{"op": "remove", "path": "/status/operationState"}]'
#Apply root-app Patch
kubectl patch application root-app -n "$apps_ns" --patch-file /tmp/argo-cd/sync-patch.yaml --type merge


# Unsynced leftovers using patch sync

process_unsynced_leftovers() {
apps_ns=$1
# Collect and display syncwave information for OutOfSync applications
echo "OutOfSync applications by syncwave:"
outofsync_apps=$(kubectl get applications -n "$apps_ns" -o json | \
Expand Down Expand Up @@ -1325,10 +1380,15 @@ echo "$outofsync_apps" | while read -r wave app_name; do
fi
done

}

kubectl patch -n "$apps_ns" application root-app --patch-file /tmp/argo-cd/sync-patch.yaml --type merge

wait_for_app_synced_healthy root-app "$apps_ns"
#process_unsynced_leftovers "$apps_ns"
# Stop root-app old sync as it will be stuck.
kubectl patch application root-app -n "$apps_ns" --type merge -p '{"operation":null}'
kubectl patch application root-app -n "$apps_ns" --type json -p '[{"op": "remove", "path": "/status/operationState"}]'
#Apply root-app Patch
kubectl patch application root-app -n "$apps_ns" --patch-file /tmp/argo-cd/sync-patch.yaml --type merge

#wait_for_app_synced_healthy root-app "$apps_ns"

echo "Upgrade completed! Wait for ArgoCD applications to be in 'Synced' and 'Healthy' state"
Loading