From c7672d5cf667348d729cad454325172fe47effc8 Mon Sep 17 00:00:00 2001
From: Radoslav Dimitrov <radoslav@stacklok.com>
Date: Wed, 8 Oct 2025 10:32:23 +0300
Subject: [PATCH 1/6] Sync prod base to staging

Signed-off-by: Radoslav Dimitrov <radoslav@stacklok.com>
---
 .github/workflows/sync-db.yml | 219 ++++++++++++++++++++++++++++++++++
 1 file changed, 219 insertions(+)
 create mode 100644 .github/workflows/sync-db.yml

diff --git a/.github/workflows/sync-db.yml b/.github/workflows/sync-db.yml
new file mode 100644
index 00000000..b6006bfa
--- /dev/null
+++ b/.github/workflows/sync-db.yml
@@ -0,0 +1,219 @@
+name: Sync Production DB to Staging
+
+on:
+  schedule:
+    # Run daily at 2 AM UTC (during low-traffic hours)
+    - cron: '0 2 * * *'
+  workflow_dispatch:  # Allow manual triggering
+
+permissions:
+  contents: read
+
+jobs:
+  sync-database:
+    name: Sync Prod DB to Staging
+    runs-on: ubuntu-latest
+    environment: staging
+    concurrency:
+      group: sync-staging-database
+      cancel-in-progress: false
+    steps:
+      - name: Authenticate to Google Cloud (Production)
+        uses: google-github-actions/auth@7c6bc770dae815cd3e89ee6cdf493a5fab2cc093
+        with:
+          credentials_json: ${{ secrets.GCP_PROD_SERVICE_ACCOUNT_KEY }}
+
+      - name: Setup Google Cloud SDK
+        uses: google-github-actions/setup-gcloud@aa5489c8933f4cc7a4f7d45035b3b1440c9c10db
+        with:
+          project_id: mcp-registry-prod
+          install_components: gke-gcloud-auth-plugin
+
+      - name: Get prod GKE credentials
+        run: |
+          gcloud container clusters get-credentials mcp-registry-prod \
+            --zone=us-central1-b \
+            --project=mcp-registry-prod
+
+      - name: Dump production database
+        run: |
+          # Create a job to dump the database from within the prod cluster
+          kubectl apply -f - <<EOF
+          apiVersion: batch/v1
+          kind: Job
+          metadata:
+            name: pg-dump-$(date +%Y%m%d-%H%M%S)
+            namespace: default
+          spec:
+            ttlSecondsAfterFinished: 600
+            template:
+              spec:
+                restartPolicy: Never
+                containers:
+                - name: pg-dump
+                  image: postgres:15
+                  command:
+                    - /bin/bash
+                    - -c
+                    - |
+                      set -e
+                      echo "Waiting for database to be ready..."
+                      until pg_isready -h registry-pg-rw -U postgres; do
+                        sleep 2
+                      done
+
+                      echo "Dumping production database..."
+                      PGPASSWORD=\$POSTGRES_PASSWORD pg_dump \
+                        -h registry-pg-rw \
+                        -U postgres \
+                        -d app \
+                        --format=custom \
+                        --file=/backup/prod-backup.dump \
+                        --verbose
+
+                      echo "✓ Database dump completed"
+                      ls -lh /backup/prod-backup.dump
+                  env:
+                  - name: POSTGRES_PASSWORD
+                    valueFrom:
+                      secretKeyRef:
+                        name: registry-pg-superuser
+                        key: password
+                  volumeMounts:
+                  - name: backup-storage
+                    mountPath: /backup
+                volumes:
+                - name: backup-storage
+                  emptyDir: {}
+          EOF
+
+          # Wait for dump to complete
+          kubectl wait --for=condition=complete job -l job-name --timeout=600s -n default
+
+          # Get the job pod name
+          POD_NAME=$(kubectl get pods -l job-name -n default --sort-by=.metadata.creationTimestamp -o jsonpath='{.items[-1].metadata.name}')
+
+          # Copy the dump file from the pod
+          kubectl cp default/$POD_NAME:/backup/prod-backup.dump /tmp/prod-backup.dump
+
+          echo "✓ Database dump downloaded"
+          ls -lh /tmp/prod-backup.dump
+
+      - name: Switch to staging cluster
+        run: |
+          gcloud config set project mcp-registry-staging
+          gcloud container clusters get-credentials mcp-registry-staging \
+            --zone=us-central1-b \
+            --project=mcp-registry-staging
+
+      - name: Restore to staging database
+        run: |
+          # Create a configmap with the dump file
+          kubectl create configmap prod-backup --from-file=/tmp/prod-backup.dump -n default --dry-run=client -o yaml | kubectl apply -f -
+
+          # Create a job to restore the database
+          kubectl apply -f - <<EOF
+          apiVersion: batch/v1
+          kind: Job
+          metadata:
+            name: pg-restore-$(date +%Y%m%d-%H%M%S)
+            namespace: default
+          spec:
+            ttlSecondsAfterFinished: 600
+            template:
+              spec:
+                restartPolicy: Never
+                containers:
+                - name: pg-restore
+                  image: postgres:15
+                  command:
+                    - /bin/bash
+                    - -c
+                    - |
+                      set -e
+
+                      echo "Waiting for database to be ready..."
+                      until pg_isready -h registry-pg-rw -U postgres; do
+                        sleep 2
+                      done
+
+                      echo "Terminating existing connections..."
+                      PGPASSWORD=\$POSTGRES_PASSWORD psql -h registry-pg-rw -U postgres -d postgres \
+                        -c "SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = 'app' AND pid <> pg_backend_pid();" || true
+
+                      echo "Dropping and recreating database..."
+                      PGPASSWORD=\$POSTGRES_PASSWORD psql -h registry-pg-rw -U postgres -d postgres \
+                        -c "DROP DATABASE IF EXISTS app;"
+                      PGPASSWORD=\$POSTGRES_PASSWORD psql -h registry-pg-rw -U postgres -d postgres \
+                        -c "CREATE DATABASE app;"
+
+                      echo "Restoring database from backup..."
+                      PGPASSWORD=\$POSTGRES_PASSWORD pg_restore \
+                        -h registry-pg-rw \
+                        -U postgres \
+                        -d app \
+                        --verbose \
+                        --no-owner \
+                        --no-acl \
+                        /backup/prod-backup.dump
+
+                      echo "✓ Database restore completed successfully"
+                  env:
+                  - name: POSTGRES_PASSWORD
+                    valueFrom:
+                      secretKeyRef:
+                        name: registry-pg-superuser
+                        key: password
+                  volumeMounts:
+                  - name: backup-data
+                    mountPath: /backup
+                volumes:
+                - name: backup-data
+                  configMap:
+                    name: prod-backup
+          EOF
+
+          # Wait for restore to complete
+          kubectl wait --for=condition=complete job -l job-name --timeout=600s -n default || {
+            echo "Restore job failed"
+            POD_NAME=$(kubectl get pods -l job-name -n default --sort-by=.metadata.creationTimestamp -o jsonpath='{.items[-1].metadata.name}')
+            kubectl logs $POD_NAME -n default --tail=100
+            exit 1
+          }
+
+          echo "✓ Restore completed successfully"
+
+      - name: Verify staging DB is functional
+        if: always()
+        run: |
+          # Create a verification pod
+          kubectl run pg-verify-$(date +%s) \
+            --image=postgres:15 \
+            --rm -i --restart=Never \
+            --env="PGPASSWORD=$(kubectl get secret registry-pg-superuser -n default -o jsonpath='{.data.password}' | base64 -d)" \
+            -- bash -c '
+              TABLE_COUNT=$(psql -h registry-pg-rw -U postgres -d app -tAc "SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = '\''public'\'';")
+
+              if [ "$TABLE_COUNT" -lt 1 ]; then
+                echo "ERROR: Staging DB has no tables!"
+                exit 1
+              fi
+
+              echo "✓ Staging DB is healthy with $TABLE_COUNT tables"
+
+              echo "Top 10 tables by row count:"
+              psql -h registry-pg-rw -U postgres -d app \
+                -c "SELECT schemaname, tablename, n_live_tup FROM pg_stat_user_tables ORDER BY n_live_tup DESC LIMIT 10;"
+            '
+
+      - name: Cleanup
+        if: always()
+        run: |
+          # Remove dump file
+          rm -f /tmp/prod-backup.dump
+
+          # Remove configmap
+          kubectl delete configmap prod-backup -n default || true
+
+          # Clean up old jobs (keep last 3)
+          kubectl get jobs -n default --sort-by=.metadata.creationTimestamp -o name | grep -E 'pg-dump-|pg-restore-' | head -n -3 | xargs -r kubectl delete -n default || true

From ec6aa7375595ffb2f6ae80f81f547d028cc33166 Mon Sep 17 00:00:00 2001
From: Radoslav Dimitrov <radoslav@stacklok.com>
Date: Wed, 8 Oct 2025 14:25:47 +0300
Subject: [PATCH 2/6] Use the existing backups of prod instead

Signed-off-by: Radoslav Dimitrov <radoslav@stacklok.com>
---
 .github/workflows/sync-db.yml | 391 ++++++++++++++++++++++++----------
 1 file changed, 273 insertions(+), 118 deletions(-)

diff --git a/.github/workflows/sync-db.yml b/.github/workflows/sync-db.yml
index b6006bfa..7d7c7386 100644
--- a/.github/workflows/sync-db.yml
+++ b/.github/workflows/sync-db.yml
@@ -1,4 +1,4 @@
-name: Sync Production DB to Staging
+name: Sync Production DB to Staging (from backups)
 
 on:
   schedule:
@@ -11,7 +11,7 @@ permissions:
 
 jobs:
   sync-database:
-    name: Sync Prod DB to Staging
+    name: Sync Prod DB to Staging from k8up Backups
     runs-on: ubuntu-latest
     environment: staging
     concurrency:
@@ -29,94 +29,177 @@ jobs:
           project_id: mcp-registry-prod
           install_components: gke-gcloud-auth-plugin
 
-      - name: Get prod GKE credentials
+      - name: Get backup credentials from prod cluster
+        id: backup-creds
         run: |
+          # Connect to prod cluster to get backup credentials
           gcloud container clusters get-credentials mcp-registry-prod \
             --zone=us-central1-b \
             --project=mcp-registry-prod
 
-      - name: Dump production database
+          # Extract backup credentials from prod cluster
+          ACCESS_KEY=$(kubectl get secret k8up-backup-credentials -n default -o jsonpath='{.data.AWS_ACCESS_KEY_ID}' | base64 -d)
+          SECRET_KEY=$(kubectl get secret k8up-backup-credentials -n default -o jsonpath='{.data.AWS_SECRET_ACCESS_KEY}' | base64 -d)
+
+          # Store in outputs (GitHub Actions encrypts these automatically)
+          echo "access_key=$ACCESS_KEY" >> $GITHUB_OUTPUT
+          echo "secret_key=$SECRET_KEY" >> $GITHUB_OUTPUT
+
+          echo "✓ Backup credentials extracted from prod"
+
+      - name: Switch to staging cluster
+        uses: google-github-actions/auth@7c6bc770dae815cd3e89ee6cdf493a5fab2cc093
+        with:
+          credentials_json: ${{ secrets.GCP_STAGING_SERVICE_ACCOUNT_KEY }}
+
+      - name: Configure staging cluster access
+        run: |
+          gcloud config set project mcp-registry-staging
+          gcloud container clusters get-credentials mcp-registry-staging \
+            --zone=us-central1-b \
+            --project=mcp-registry-staging
+
+          echo "✓ Connected to staging cluster"
+
+      - name: Create secret for prod backup bucket access
+        run: |
+          # Create/update secret in staging with read-only access to prod backups
+          kubectl create secret generic prod-backup-credentials \
+            --from-literal=AWS_ACCESS_KEY_ID="${{ steps.backup-creds.outputs.access_key }}" \
+            --from-literal=AWS_SECRET_ACCESS_KEY="${{ steps.backup-creds.outputs.secret_key }}" \
+            --dry-run=client -o yaml | kubectl apply -f -
+
+          echo "✓ Backup credentials configured in staging"
+
+      - name: Create restore PVC
         run: |
-          # Create a job to dump the database from within the prod cluster
           kubectl apply -f - <<EOF
-          apiVersion: batch/v1
-          kind: Job
+          apiVersion: v1
+          kind: PersistentVolumeClaim
           metadata:
-            name: pg-dump-$(date +%Y%m%d-%H%M%S)
+            name: restore-data-pvc
             namespace: default
           spec:
-            ttlSecondsAfterFinished: 600
-            template:
-              spec:
-                restartPolicy: Never
-                containers:
-                - name: pg-dump
-                  image: postgres:15
-                  command:
-                    - /bin/bash
-                    - -c
-                    - |
-                      set -e
-                      echo "Waiting for database to be ready..."
-                      until pg_isready -h registry-pg-rw -U postgres; do
-                        sleep 2
-                      done
+            accessModes:
+              - ReadWriteOnce
+            resources:
+              requests:
+                storage: 50Gi
+          EOF
 
-                      echo "Dumping production database..."
-                      PGPASSWORD=\$POSTGRES_PASSWORD pg_dump \
-                        -h registry-pg-rw \
-                        -U postgres \
-                        -d app \
-                        --format=custom \
-                        --file=/backup/prod-backup.dump \
-                        --verbose
-
-                      echo "✓ Database dump completed"
-                      ls -lh /backup/prod-backup.dump
-                  env:
-                  - name: POSTGRES_PASSWORD
-                    valueFrom:
-                      secretKeyRef:
-                        name: registry-pg-superuser
-                        key: password
-                  volumeMounts:
-                  - name: backup-storage
-                    mountPath: /backup
-                volumes:
-                - name: backup-storage
-                  emptyDir: {}
+          echo "✓ Restore PVC created"
+
+      - name: Trigger k8up restore from prod backups
+        id: restore
+        run: |
+          RESTORE_NAME="restore-from-prod-$(date +%Y%m%d-%H%M%S)"
+          echo "restore_name=$RESTORE_NAME" >> $GITHUB_OUTPUT
+
+          # Create a k8up Restore resource to restore from prod backups
+          kubectl apply -f - <<EOF
+          apiVersion: k8up.io/v1
+          kind: Restore
+          metadata:
+            name: $RESTORE_NAME
+            namespace: default
+          spec:
+            snapshot: latest
+            restoreMethod:
+              folder:
+                claimName: restore-data-pvc
+            backend:
+              repoPasswordSecretRef:
+                name: k8up-repo-password
+                key: password
+              s3:
+                bucket: mcp-registry-prod-backups
+                endpoint: https://storage.googleapis.com
+                accessKeyIDSecretRef:
+                  name: prod-backup-credentials
+                  key: AWS_ACCESS_KEY_ID
+                secretAccessKeySecretRef:
+                  name: prod-backup-credentials
+                  key: AWS_SECRET_ACCESS_KEY
           EOF
 
-          # Wait for dump to complete
-          kubectl wait --for=condition=complete job -l job-name --timeout=600s -n default
+          echo "✓ k8up restore triggered: $RESTORE_NAME"
 
-          # Get the job pod name
-          POD_NAME=$(kubectl get pods -l job-name -n default --sort-by=.metadata.creationTimestamp -o jsonpath='{.items[-1].metadata.name}')
+      - name: Wait for k8up restore to complete
+        run: |
+          RESTORE_NAME="${{ steps.restore.outputs.restore_name }}"
 
-          # Copy the dump file from the pod
-          kubectl cp default/$POD_NAME:/backup/prod-backup.dump /tmp/prod-backup.dump
+          echo "Waiting for restore job to start..."
+          sleep 15
 
-          echo "✓ Database dump downloaded"
-          ls -lh /tmp/prod-backup.dump
+          # Find the job created by k8up for this restore
+          for i in {1..30}; do
+            JOB_NAME=$(kubectl get jobs -n default -l k8up.io/owned-by=restore -o jsonpath='{.items[?(@.metadata.ownerReferences[0].name=="'$RESTORE_NAME'")].metadata.name}' 2>/dev/null)
+            if [ -n "$JOB_NAME" ]; then
+              echo "Found restore job: $JOB_NAME"
+              break
+            fi
+            echo "Waiting for job to be created... ($i/30)"
+            sleep 2
+          done
 
-      - name: Switch to staging cluster
+          if [ -z "$JOB_NAME" ]; then
+            echo "ERROR: Restore job not found"
+            kubectl get restore $RESTORE_NAME -n default -o yaml
+            exit 1
+          fi
+
+          # Wait for the restore job to complete (max 15 minutes)
+          kubectl wait --for=condition=complete \
+            job/$JOB_NAME \
+            --timeout=900s -n default || {
+              echo "Restore job failed or timed out"
+              kubectl describe job/$JOB_NAME -n default
+              kubectl logs job/$JOB_NAME -n default --tail=100
+              exit 1
+            }
+
+          echo "✓ k8up restore completed successfully"
+
+      - name: Find staging PostgreSQL PVC
+        id: pgdata-pvc
         run: |
-          gcloud config set project mcp-registry-staging
-          gcloud container clusters get-credentials mcp-registry-staging \
-            --zone=us-central1-b \
-            --project=mcp-registry-staging
+          # Find the PVC used by the PostgreSQL cluster
+          PVC_NAME=$(kubectl get pvc -n default -l cnpg.io/cluster=registry-pg -o jsonpath='{.items[0].metadata.name}')
+
+          if [ -z "$PVC_NAME" ]; then
+            echo "ERROR: Could not find PostgreSQL PVC"
+            kubectl get pvc -n default -l cnpg.io/cluster=registry-pg
+            exit 1
+          fi
+
+          echo "pvc_name=$PVC_NAME" >> $GITHUB_OUTPUT
+          echo "✓ Found PostgreSQL PVC: $PVC_NAME"
+
+      - name: Scale down staging PostgreSQL
+        run: |
+          echo "Scaling down PostgreSQL cluster..."
+          kubectl patch cluster registry-pg -n default \
+            --type merge \
+            --patch '{"spec":{"instances":0}}'
+
+          # Wait for pods to terminate
+          echo "Waiting for pods to terminate..."
+          kubectl wait --for=delete pod -l cnpg.io/cluster=registry-pg -n default --timeout=300s || true
 
-      - name: Restore to staging database
+          echo "✓ PostgreSQL scaled down"
+
+      - name: Replace staging database with restored backup
+        id: copy-job
         run: |
-          # Create a configmap with the dump file
-          kubectl create configmap prod-backup --from-file=/tmp/prod-backup.dump -n default --dry-run=client -o yaml | kubectl apply -f -
+          JOB_NAME="copy-pgdata-$(date +%Y%m%d-%H%M%S)"
+          echo "job_name=$JOB_NAME" >> $GITHUB_OUTPUT
 
-          # Create a job to restore the database
+          # Create a job to copy the restored backup data to the staging PVC
           kubectl apply -f - <<EOF
           apiVersion: batch/v1
           kind: Job
           metadata:
-            name: pg-restore-$(date +%Y%m%d-%H%M%S)
+            name: $JOB_NAME
             namespace: default
           spec:
             ttlSecondsAfterFinished: 600
@@ -124,67 +207,111 @@ jobs:
               spec:
                 restartPolicy: Never
                 containers:
-                - name: pg-restore
-                  image: postgres:15
+                - name: copy-data
+                  image: busybox:latest
                   command:
-                    - /bin/bash
+                    - /bin/sh
                     - -c
                     - |
                       set -e
+                      echo "Finding PostgreSQL data in backup..."
+                      echo "Restore structure:"
+                      find /restore -maxdepth 3 -type d 2>/dev/null | head -20
 
-                      echo "Waiting for database to be ready..."
-                      until pg_isready -h registry-pg-rw -U postgres; do
-                        sleep 2
+                      # Try different possible paths for pgdata
+                      PGDATA_SOURCE=""
+                      for path in \$(find /restore -type d -name "pgdata" 2>/dev/null); do
+                        if [ -f "\$path/PG_VERSION" ]; then
+                          PGDATA_SOURCE="\$path"
+                          break
+                        fi
                       done
 
-                      echo "Terminating existing connections..."
-                      PGPASSWORD=\$POSTGRES_PASSWORD psql -h registry-pg-rw -U postgres -d postgres \
-                        -c "SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = 'app' AND pid <> pg_backend_pid();" || true
-
-                      echo "Dropping and recreating database..."
-                      PGPASSWORD=\$POSTGRES_PASSWORD psql -h registry-pg-rw -U postgres -d postgres \
-                        -c "DROP DATABASE IF EXISTS app;"
-                      PGPASSWORD=\$POSTGRES_PASSWORD psql -h registry-pg-rw -U postgres -d postgres \
-                        -c "CREATE DATABASE app;"
-
-                      echo "Restoring database from backup..."
-                      PGPASSWORD=\$POSTGRES_PASSWORD pg_restore \
-                        -h registry-pg-rw \
-                        -U postgres \
-                        -d app \
-                        --verbose \
-                        --no-owner \
-                        --no-acl \
-                        /backup/prod-backup.dump
-
-                      echo "✓ Database restore completed successfully"
-                  env:
-                  - name: POSTGRES_PASSWORD
-                    valueFrom:
-                      secretKeyRef:
-                        name: registry-pg-superuser
-                        key: password
+                      if [ -z "\$PGDATA_SOURCE" ]; then
+                        echo "ERROR: Could not find valid pgdata directory with PG_VERSION"
+                        echo "Searched paths:"
+                        find /restore -type d -name "pgdata" 2>/dev/null
+                        exit 1
+                      fi
+
+                      echo "Found pgdata at: \$PGDATA_SOURCE"
+                      echo "Contents:"
+                      ls -lah \$PGDATA_SOURCE/ | head -10
+
+                      echo "Backing up existing staging data..."
+                      mkdir -p /pgdata-backup
+                      if [ "\$(ls -A /pgdata)" ]; then
+                        cp -a /pgdata/. /pgdata-backup/ || echo "Warning: Could not backup existing data"
+                      fi
+
+                      echo "Clearing existing data..."
+                      rm -rf /pgdata/*
+
+                      echo "Copying backup data to staging PVC..."
+                      cp -a \$PGDATA_SOURCE/. /pgdata/
+
+                      echo "Setting correct permissions..."
+                      chmod 700 /pgdata
+
+                      echo "✓ Data copy completed"
+                      ls -lah /pgdata/ | head -20
+                      echo "PostgreSQL version: \$(cat /pgdata/PG_VERSION)"
                   volumeMounts:
-                  - name: backup-data
-                    mountPath: /backup
+                  - name: restore-data
+                    mountPath: /restore
+                  - name: staging-pgdata
+                    mountPath: /pgdata
                 volumes:
-                - name: backup-data
-                  configMap:
-                    name: prod-backup
+                - name: restore-data
+                  persistentVolumeClaim:
+                    claimName: restore-data-pvc
+                - name: staging-pgdata
+                  persistentVolumeClaim:
+                    claimName: ${{ steps.pgdata-pvc.outputs.pvc_name }}
           EOF
 
-          # Wait for restore to complete
-          kubectl wait --for=condition=complete job -l job-name --timeout=600s -n default || {
-            echo "Restore job failed"
-            POD_NAME=$(kubectl get pods -l job-name -n default --sort-by=.metadata.creationTimestamp -o jsonpath='{.items[-1].metadata.name}')
-            kubectl logs $POD_NAME -n default --tail=100
+          echo "✓ Copy job created: $JOB_NAME"
+
+      - name: Wait for data copy to complete
+        run: |
+          JOB_NAME="${{ steps.copy-job.outputs.job_name }}"
+
+          # Wait for copy to complete
+          kubectl wait --for=condition=complete job/$JOB_NAME --timeout=600s -n default || {
+            echo "Data copy job failed"
+            kubectl describe job/$JOB_NAME -n default
+            kubectl logs job/$JOB_NAME -n default --tail=100
             exit 1
           }
 
-          echo "✓ Restore completed successfully"
+          echo "✓ Database data replaced successfully"
+
+      - name: Scale up staging PostgreSQL
+        run: |
+          echo "Scaling up PostgreSQL cluster..."
+          kubectl patch cluster registry-pg -n default \
+            --type merge \
+            --patch '{"spec":{"instances":1}}'
+
+          # Wait for PostgreSQL pod to be created
+          echo "Waiting for PostgreSQL pod to be created..."
+          for i in {1..60}; do
+            POD_COUNT=$(kubectl get pods -l cnpg.io/cluster=registry-pg -n default --no-headers 2>/dev/null | wc -l)
+            if [ "$POD_COUNT" -gt 0 ]; then
+              echo "Pod created"
+              break
+            fi
+            echo "Waiting... ($i/60)"
+            sleep 2
+          done
+
+          # Wait for PostgreSQL to be ready
+          echo "Waiting for PostgreSQL to be ready..."
+          kubectl wait --for=condition=ready pod -l cnpg.io/cluster=registry-pg -n default --timeout=300s
+
+          echo "✓ PostgreSQL is running"
 
       - name: Verify staging DB is functional
-        if: always()
         run: |
           # Create a verification pod
           kubectl run pg-verify-$(date +%s) \
@@ -192,7 +319,23 @@ jobs:
             --rm -i --restart=Never \
             --env="PGPASSWORD=$(kubectl get secret registry-pg-superuser -n default -o jsonpath='{.data.password}' | base64 -d)" \
             -- bash -c '
-              TABLE_COUNT=$(psql -h registry-pg-rw -U postgres -d app -tAc "SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = '\''public'\'';")
+              echo "Waiting for database to accept connections..."
+              for i in {1..30}; do
+                if pg_isready -h registry-pg-rw -U postgres 2>/dev/null; then
+                  break
+                fi
+                echo "Waiting... ($i/30)"
+                sleep 2
+              done
+
+              echo "Querying database..."
+              TABLE_COUNT=$(psql -h registry-pg-rw -U postgres -d app -tAc "SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = '\''public'\'';" 2>&1)
+
+              if [ $? -ne 0 ]; then
+                echo "ERROR: Could not query database"
+                echo "$TABLE_COUNT"
+                exit 1
+              fi
 
               if [ "$TABLE_COUNT" -lt 1 ]; then
                 echo "ERROR: Staging DB has no tables!"
@@ -203,17 +346,29 @@ jobs:
 
               echo "Top 10 tables by row count:"
               psql -h registry-pg-rw -U postgres -d app \
-                -c "SELECT schemaname, tablename, n_live_tup FROM pg_stat_user_tables ORDER BY n_live_tup DESC LIMIT 10;"
+                -c "SELECT schemaname, tablename, n_live_tup FROM pg_stat_user_tables ORDER BY n_live_tup DESC LIMIT 10;" || true
             '
 
+          echo "✓ Database verification completed"
+
       - name: Cleanup
         if: always()
         run: |
-          # Remove dump file
-          rm -f /tmp/prod-backup.dump
+          # Clean up jobs first
+          if [ -n "${{ steps.copy-job.outputs.job_name }}" ]; then
+            kubectl delete job ${{ steps.copy-job.outputs.job_name }} -n default || true
+          fi
+
+          # Remove restore PVC (will wait for jobs to finish)
+          kubectl delete pvc restore-data-pvc -n default || true
+
+          # Remove prod backup credentials (for security)
+          kubectl delete secret prod-backup-credentials -n default || true
+
+          # Clean up old restore resources (keep last 3)
+          kubectl get restore -n default --sort-by=.metadata.creationTimestamp -o name | head -n -3 | xargs -r kubectl delete || true
 
-          # Remove configmap
-          kubectl delete configmap prod-backup -n default || true
+          # Clean up old copy jobs (keep last 3)
+          kubectl get jobs -n default --sort-by=.metadata.creationTimestamp -o name | grep 'copy-pgdata-' | head -n -3 | xargs -r kubectl delete -n default || true
 
-          # Clean up old jobs (keep last 3)
-          kubectl get jobs -n default --sort-by=.metadata.creationTimestamp -o name | grep -E 'pg-dump-|pg-restore-' | head -n -3 | xargs -r kubectl delete -n default || true
+          echo "✓ Cleanup completed"

From d51f3eab6fe7b049d13f1673399cca436f5efc32 Mon Sep 17 00:00:00 2001
From: Radoslav Dimitrov <radoslav@stacklok.com>
Date: Wed, 8 Oct 2025 15:32:08 +0300
Subject: [PATCH 3/6] Add additional checks to ensure we are in staging

Signed-off-by: Radoslav Dimitrov <radoslav@stacklok.com>
---
 .github/workflows/sync-db.yml | 111 +++++++++++++++++++++++++++++++++-
 1 file changed, 110 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/sync-db.yml b/.github/workflows/sync-db.yml
index 7d7c7386..d4bbdfae 100644
--- a/.github/workflows/sync-db.yml
+++ b/.github/workflows/sync-db.yml
@@ -13,11 +13,24 @@ jobs:
   sync-database:
     name: Sync Prod DB to Staging from k8up Backups
     runs-on: ubuntu-latest
-    environment: staging
+    environment: staging  # CRITICAL: This ensures we're targeting staging and requires staging environment approval
     concurrency:
       group: sync-staging-database
       cancel-in-progress: false
     steps:
+      - name: Initial safety banner
+        run: |
+          echo "╔════════════════════════════════════════════════════════════╗"
+          echo "║                    ⚠️  SAFETY NOTICE ⚠️                     ║"
+          echo "║                                                            ║"
+          echo "║  This workflow will REPLACE staging database with         ║"
+          echo "║  production backup data. This is IRREVERSIBLE.            ║"
+          echo "║                                                            ║"
+          echo "║  Target: STAGING ONLY                                     ║"
+          echo "║  Source: Production backups (read-only)                   ║"
+          echo "║                                                            ║"
+          echo "║  Multiple safety checks will verify we're in staging.     ║"
+          echo "╚════════════════════════════════════════════════════════════╝"
       - name: Authenticate to Google Cloud (Production)
         uses: google-github-actions/auth@7c6bc770dae815cd3e89ee6cdf493a5fab2cc093
         with:
@@ -160,6 +173,51 @@ jobs:
 
           echo "✓ k8up restore completed successfully"
 
+      - name: Verify we are in staging cluster (SAFETY CHECK)
+        run: |
+          echo "=== SAFETY CHECK: Verifying cluster context ==="
+
+          # Get current cluster context
+          CURRENT_CONTEXT=$(kubectl config current-context)
+          CURRENT_PROJECT=$(gcloud config get-value project)
+
+          echo "Current kubectl context: $CURRENT_CONTEXT"
+          echo "Current GCP project: $CURRENT_PROJECT"
+
+          # CRITICAL: Abort if we're in prod
+          if echo "$CURRENT_CONTEXT" | grep -qi "prod"; then
+            echo "❌ SAFETY CHECK FAILED: Currently connected to PRODUCTION cluster!"
+            echo "Context: $CURRENT_CONTEXT"
+            echo "ABORTING to prevent data loss"
+            exit 1
+          fi
+
+          if [ "$CURRENT_PROJECT" = "mcp-registry-prod" ]; then
+            echo "❌ SAFETY CHECK FAILED: Currently in PRODUCTION project!"
+            echo "Project: $CURRENT_PROJECT"
+            echo "ABORTING to prevent data loss"
+            exit 1
+          fi
+
+          # Verify we're in staging
+          if ! echo "$CURRENT_CONTEXT" | grep -qi "staging"; then
+            echo "❌ SAFETY CHECK FAILED: Not in staging cluster"
+            echo "Context: $CURRENT_CONTEXT"
+            echo "Expected: staging cluster"
+            exit 1
+          fi
+
+          if [ "$CURRENT_PROJECT" != "mcp-registry-staging" ]; then
+            echo "❌ SAFETY CHECK FAILED: Not in staging project"
+            echo "Project: $CURRENT_PROJECT"
+            echo "Expected: mcp-registry-staging"
+            exit 1
+          fi
+
+          echo "✅ SAFETY CHECK PASSED: Confirmed we are in STAGING"
+          echo "   Context: $CURRENT_CONTEXT"
+          echo "   Project: $CURRENT_PROJECT"
+
       - name: Find staging PostgreSQL PVC
         id: pgdata-pvc
         run: |
@@ -175,6 +233,11 @@ jobs:
           echo "pvc_name=$PVC_NAME" >> $GITHUB_OUTPUT
           echo "✓ Found PostgreSQL PVC: $PVC_NAME"
 
+          # Additional safety: Verify PVC is in staging by checking labels or annotations
+          PVC_INFO=$(kubectl get pvc $PVC_NAME -n default -o yaml)
+          echo "PVC Details:"
+          echo "$PVC_INFO" | grep -E "name:|namespace:|labels:" | head -10
+
       - name: Scale down staging PostgreSQL
         run: |
           echo "Scaling down PostgreSQL cluster..."
@@ -188,6 +251,52 @@ jobs:
 
           echo "✓ PostgreSQL scaled down"
 
+      - name: Final safety check before data replacement
+        run: |
+          echo "=== FINAL SAFETY CHECK BEFORE DATA REPLACEMENT ==="
+
+          # Re-verify we're in staging (paranoid check)
+          CURRENT_CONTEXT=$(kubectl config current-context)
+          CURRENT_PROJECT=$(gcloud config get-value project)
+
+          if echo "$CURRENT_CONTEXT" | grep -qi "prod" || [ "$CURRENT_PROJECT" = "mcp-registry-prod" ]; then
+            echo "❌ FINAL SAFETY CHECK FAILED: Detected production environment!"
+            echo "Context: $CURRENT_CONTEXT"
+            echo "Project: $CURRENT_PROJECT"
+            echo "ABORTING IMMEDIATELY"
+            exit 1
+          fi
+
+          # Verify the PVC we're about to modify
+          PVC_NAME="${{ steps.pgdata-pvc.outputs.pvc_name }}"
+          echo "About to modify PVC: $PVC_NAME"
+
+          # Check if PVC has any production indicators
+          if echo "$PVC_NAME" | grep -qi "prod"; then
+            echo "❌ SAFETY CHECK FAILED: PVC name contains 'prod'"
+            echo "PVC: $PVC_NAME"
+            echo "This might be a production PVC. ABORTING."
+            exit 1
+          fi
+
+          # Verify PostgreSQL is scaled down (safety measure)
+          POD_COUNT=$(kubectl get pods -l cnpg.io/cluster=registry-pg -n default --no-headers 2>/dev/null | wc -l)
+          if [ "$POD_COUNT" -gt 0 ]; then
+            echo "❌ SAFETY CHECK FAILED: PostgreSQL pods are still running!"
+            echo "Expected 0 pods, found: $POD_COUNT"
+            echo "Database must be scaled down before data replacement"
+            exit 1
+          fi
+
+          echo "✅ FINAL SAFETY CHECK PASSED"
+          echo "   Environment: STAGING"
+          echo "   Context: $CURRENT_CONTEXT"
+          echo "   Project: $CURRENT_PROJECT"
+          echo "   Target PVC: $PVC_NAME"
+          echo "   PostgreSQL pods: 0 (scaled down)"
+          echo ""
+          echo "Proceeding with data replacement..."
+
       - name: Replace staging database with restored backup
         id: copy-job
         run: |

From d4119e91f47963bd9d21995c4804a16392d6ad86 Mon Sep 17 00:00:00 2001
From: Radoslav Dimitrov <radoslav@stacklok.com>
Date: Wed, 8 Oct 2025 15:35:08 +0300
Subject: [PATCH 4/6] Update wrong comment

Signed-off-by: Radoslav Dimitrov <radoslav@stacklok.com>
---
 .github/workflows/sync-db.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/sync-db.yml b/.github/workflows/sync-db.yml
index d4bbdfae..d629d41a 100644
--- a/.github/workflows/sync-db.yml
+++ b/.github/workflows/sync-db.yml
@@ -76,7 +76,7 @@ jobs:
 
       - name: Create secret for prod backup bucket access
         run: |
-          # Create/update secret in staging with read-only access to prod backups
+          # Create/update secret in staging with access to prod backups
           kubectl create secret generic prod-backup-credentials \
             --from-literal=AWS_ACCESS_KEY_ID="${{ steps.backup-creds.outputs.access_key }}" \
             --from-literal=AWS_SECRET_ACCESS_KEY="${{ steps.backup-creds.outputs.secret_key }}" \

From b94b056a7f4aeaea827742d4f053517ac4f7e5ef Mon Sep 17 00:00:00 2001
From: Radoslav Dimitrov <radoslav@stacklok.com>
Date: Wed, 8 Oct 2025 15:39:25 +0300
Subject: [PATCH 5/6] Log out from prod once we got the backup credentials

Signed-off-by: Radoslav Dimitrov <radoslav@stacklok.com>
---
 .github/workflows/sync-db.yml | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/.github/workflows/sync-db.yml b/.github/workflows/sync-db.yml
index d629d41a..4e0eecae 100644
--- a/.github/workflows/sync-db.yml
+++ b/.github/workflows/sync-db.yml
@@ -60,6 +60,35 @@ jobs:
 
           echo "✓ Backup credentials extracted from prod"
 
+      - name: Remove all production access (SAFETY MEASURE)
+        run: |
+          echo "=== REMOVING ALL PRODUCTION ACCESS ==="
+
+          # Remove production cluster from kubeconfig
+          kubectl config delete-context gke_mcp-registry-prod_us-central1-b_mcp-registry-prod 2>/dev/null || true
+
+          # Revoke gcloud credentials
+          gcloud auth revoke --all 2>/dev/null || true
+
+          # Clear gcloud configuration
+          gcloud config unset project 2>/dev/null || true
+          gcloud config unset account 2>/dev/null || true
+
+          # Verify no contexts contain "prod"
+          if kubectl config get-contexts | grep -i prod; then
+            echo "❌ ERROR: Production context still exists!"
+            kubectl config get-contexts
+            exit 1
+          fi
+
+          echo "✅ Production access completely removed"
+          echo "   - Kubeconfig cleared"
+          echo "   - GCloud credentials revoked"
+          echo "   - No production contexts remaining"
+          echo ""
+          echo "Remaining contexts:"
+          kubectl config get-contexts || echo "No contexts (expected)"
+
       - name: Switch to staging cluster
         uses: google-github-actions/auth@7c6bc770dae815cd3e89ee6cdf493a5fab2cc093
         with:

From f39faa62a55c6bd27e5fbc27ec8a28d39cf57b29 Mon Sep 17 00:00:00 2001
From: Adam Jones <adamj@anthropic.com>
Date: Thu, 9 Oct 2025 10:53:00 +0000
Subject: [PATCH 6/6] Cleanup workflow a bit

---
 .github/workflows/sync-db.yml | 178 ++++++----------------------------
 1 file changed, 27 insertions(+), 151 deletions(-)

diff --git a/.github/workflows/sync-db.yml b/.github/workflows/sync-db.yml
index 4e0eecae..419bfee5 100644
--- a/.github/workflows/sync-db.yml
+++ b/.github/workflows/sync-db.yml
@@ -13,24 +13,11 @@ jobs:
   sync-database:
     name: Sync Prod DB to Staging from k8up Backups
     runs-on: ubuntu-latest
-    environment: staging  # CRITICAL: This ensures we're targeting staging and requires staging environment approval
+    environment: staging
     concurrency:
       group: sync-staging-database
       cancel-in-progress: false
     steps:
-      - name: Initial safety banner
-        run: |
-          echo "╔════════════════════════════════════════════════════════════╗"
-          echo "║                    ⚠️  SAFETY NOTICE ⚠️                     ║"
-          echo "║                                                            ║"
-          echo "║  This workflow will REPLACE staging database with         ║"
-          echo "║  production backup data. This is IRREVERSIBLE.            ║"
-          echo "║                                                            ║"
-          echo "║  Target: STAGING ONLY                                     ║"
-          echo "║  Source: Production backups (read-only)                   ║"
-          echo "║                                                            ║"
-          echo "║  Multiple safety checks will verify we're in staging.     ║"
-          echo "╚════════════════════════════════════════════════════════════╝"
       - name: Authenticate to Google Cloud (Production)
         uses: google-github-actions/auth@7c6bc770dae815cd3e89ee6cdf493a5fab2cc093
         with:
@@ -45,25 +32,18 @@ jobs:
       - name: Get backup credentials from prod cluster
         id: backup-creds
         run: |
-          # Connect to prod cluster to get backup credentials
           gcloud container clusters get-credentials mcp-registry-prod \
             --zone=us-central1-b \
             --project=mcp-registry-prod
 
-          # Extract backup credentials from prod cluster
-          ACCESS_KEY=$(kubectl get secret k8up-backup-credentials -n default -o jsonpath='{.data.AWS_ACCESS_KEY_ID}' | base64 -d)
-          SECRET_KEY=$(kubectl get secret k8up-backup-credentials -n default -o jsonpath='{.data.AWS_SECRET_ACCESS_KEY}' | base64 -d)
-
           # Store in outputs (GitHub Actions encrypts these automatically)
-          echo "access_key=$ACCESS_KEY" >> $GITHUB_OUTPUT
-          echo "secret_key=$SECRET_KEY" >> $GITHUB_OUTPUT
-
-          echo "✓ Backup credentials extracted from prod"
+          kubectl get secret k8up-backup-credentials -n default -o json | jq -r '
+            "access_key=" + (.data.AWS_ACCESS_KEY_ID | @base64d),
+            "secret_key=" + (.data.AWS_SECRET_ACCESS_KEY | @base64d)
+          ' >> $GITHUB_OUTPUT
 
       - name: Remove all production access (SAFETY MEASURE)
         run: |
-          echo "=== REMOVING ALL PRODUCTION ACCESS ==="
-
           # Remove production cluster from kubeconfig
           kubectl config delete-context gke_mcp-registry-prod_us-central1-b_mcp-registry-prod 2>/dev/null || true
 
@@ -74,21 +54,14 @@ jobs:
           gcloud config unset project 2>/dev/null || true
           gcloud config unset account 2>/dev/null || true
 
-          # Verify no contexts contain "prod"
-          if kubectl config get-contexts | grep -i prod; then
-            echo "❌ ERROR: Production context still exists!"
+          # Verify no contexts remain
+          CONTEXT_COUNT=$(kubectl config get-contexts -o name 2>/dev/null | wc -l)
+          if [ "$CONTEXT_COUNT" -gt 0 ]; then
+            echo "❌ ERROR: $CONTEXT_COUNT context(s) still exist after cleanup!"
             kubectl config get-contexts
             exit 1
           fi
 
-          echo "✅ Production access completely removed"
-          echo "   - Kubeconfig cleared"
-          echo "   - GCloud credentials revoked"
-          echo "   - No production contexts remaining"
-          echo ""
-          echo "Remaining contexts:"
-          kubectl config get-contexts || echo "No contexts (expected)"
-
       - name: Switch to staging cluster
         uses: google-github-actions/auth@7c6bc770dae815cd3e89ee6cdf493a5fab2cc093
         with:
@@ -101,18 +74,14 @@ jobs:
             --zone=us-central1-b \
             --project=mcp-registry-staging
 
-          echo "✓ Connected to staging cluster"
-
       - name: Create secret for prod backup bucket access
         run: |
           # Create/update secret in staging with access to prod backups
-          kubectl create secret generic prod-backup-credentials \
+          kubectl create secret generic prod-to-staging-sync-credentials \
             --from-literal=AWS_ACCESS_KEY_ID="${{ steps.backup-creds.outputs.access_key }}" \
             --from-literal=AWS_SECRET_ACCESS_KEY="${{ steps.backup-creds.outputs.secret_key }}" \
             --dry-run=client -o yaml | kubectl apply -f -
 
-          echo "✓ Backup credentials configured in staging"
-
       - name: Create restore PVC
         run: |
           kubectl apply -f - <<EOF
@@ -129,8 +98,6 @@ jobs:
                 storage: 50Gi
           EOF
 
-          echo "✓ Restore PVC created"
-
       - name: Trigger k8up restore from prod backups
         id: restore
         run: |
@@ -157,15 +124,13 @@ jobs:
                 bucket: mcp-registry-prod-backups
                 endpoint: https://storage.googleapis.com
                 accessKeyIDSecretRef:
-                  name: prod-backup-credentials
+                  name: prod-to-staging-sync-credentials
                   key: AWS_ACCESS_KEY_ID
                 secretAccessKeySecretRef:
-                  name: prod-backup-credentials
+                  name: prod-to-staging-sync-credentials
                   key: AWS_SECRET_ACCESS_KEY
           EOF
 
-          echo "✓ k8up restore triggered: $RESTORE_NAME"
-
       - name: Wait for k8up restore to complete
         run: |
           RESTORE_NAME="${{ steps.restore.outputs.restore_name }}"
@@ -200,53 +165,6 @@ jobs:
               exit 1
             }
 
-          echo "✓ k8up restore completed successfully"
-
-      - name: Verify we are in staging cluster (SAFETY CHECK)
-        run: |
-          echo "=== SAFETY CHECK: Verifying cluster context ==="
-
-          # Get current cluster context
-          CURRENT_CONTEXT=$(kubectl config current-context)
-          CURRENT_PROJECT=$(gcloud config get-value project)
-
-          echo "Current kubectl context: $CURRENT_CONTEXT"
-          echo "Current GCP project: $CURRENT_PROJECT"
-
-          # CRITICAL: Abort if we're in prod
-          if echo "$CURRENT_CONTEXT" | grep -qi "prod"; then
-            echo "❌ SAFETY CHECK FAILED: Currently connected to PRODUCTION cluster!"
-            echo "Context: $CURRENT_CONTEXT"
-            echo "ABORTING to prevent data loss"
-            exit 1
-          fi
-
-          if [ "$CURRENT_PROJECT" = "mcp-registry-prod" ]; then
-            echo "❌ SAFETY CHECK FAILED: Currently in PRODUCTION project!"
-            echo "Project: $CURRENT_PROJECT"
-            echo "ABORTING to prevent data loss"
-            exit 1
-          fi
-
-          # Verify we're in staging
-          if ! echo "$CURRENT_CONTEXT" | grep -qi "staging"; then
-            echo "❌ SAFETY CHECK FAILED: Not in staging cluster"
-            echo "Context: $CURRENT_CONTEXT"
-            echo "Expected: staging cluster"
-            exit 1
-          fi
-
-          if [ "$CURRENT_PROJECT" != "mcp-registry-staging" ]; then
-            echo "❌ SAFETY CHECK FAILED: Not in staging project"
-            echo "Project: $CURRENT_PROJECT"
-            echo "Expected: mcp-registry-staging"
-            exit 1
-          fi
-
-          echo "✅ SAFETY CHECK PASSED: Confirmed we are in STAGING"
-          echo "   Context: $CURRENT_CONTEXT"
-          echo "   Project: $CURRENT_PROJECT"
-
       - name: Find staging PostgreSQL PVC
         id: pgdata-pvc
         run: |
@@ -260,12 +178,6 @@ jobs:
           fi
 
           echo "pvc_name=$PVC_NAME" >> $GITHUB_OUTPUT
-          echo "✓ Found PostgreSQL PVC: $PVC_NAME"
-
-          # Additional safety: Verify PVC is in staging by checking labels or annotations
-          PVC_INFO=$(kubectl get pvc $PVC_NAME -n default -o yaml)
-          echo "PVC Details:"
-          echo "$PVC_INFO" | grep -E "name:|namespace:|labels:" | head -10
 
       - name: Scale down staging PostgreSQL
         run: |
@@ -278,54 +190,30 @@ jobs:
           echo "Waiting for pods to terminate..."
           kubectl wait --for=delete pod -l cnpg.io/cluster=registry-pg -n default --timeout=300s || true
 
-          echo "✓ PostgreSQL scaled down"
-
-      - name: Final safety check before data replacement
+      - name: Verify we are in staging cluster (SAFETY CHECK)
         run: |
-          echo "=== FINAL SAFETY CHECK BEFORE DATA REPLACEMENT ==="
-
-          # Re-verify we're in staging (paranoid check)
+          # Get current cluster context
           CURRENT_CONTEXT=$(kubectl config current-context)
           CURRENT_PROJECT=$(gcloud config get-value project)
 
-          if echo "$CURRENT_CONTEXT" | grep -qi "prod" || [ "$CURRENT_PROJECT" = "mcp-registry-prod" ]; then
-            echo "❌ FINAL SAFETY CHECK FAILED: Detected production environment!"
-            echo "Context: $CURRENT_CONTEXT"
-            echo "Project: $CURRENT_PROJECT"
-            echo "ABORTING IMMEDIATELY"
-            exit 1
-          fi
-
-          # Verify the PVC we're about to modify
-          PVC_NAME="${{ steps.pgdata-pvc.outputs.pvc_name }}"
-          echo "About to modify PVC: $PVC_NAME"
+          echo "Current kubectl context: $CURRENT_CONTEXT"
+          echo "Current GCP project: $CURRENT_PROJECT"
 
-          # Check if PVC has any production indicators
-          if echo "$PVC_NAME" | grep -qi "prod"; then
-            echo "❌ SAFETY CHECK FAILED: PVC name contains 'prod'"
-            echo "PVC: $PVC_NAME"
-            echo "This might be a production PVC. ABORTING."
+          # Verify we're in staging
+          if ! echo "$CURRENT_CONTEXT" | grep -qi "staging"; then
+            echo "❌ SAFETY CHECK FAILED: Not in staging cluster"
+            echo "Context: $CURRENT_CONTEXT"
+            echo "Expected: staging cluster"
             exit 1
           fi
 
-          # Verify PostgreSQL is scaled down (safety measure)
-          POD_COUNT=$(kubectl get pods -l cnpg.io/cluster=registry-pg -n default --no-headers 2>/dev/null | wc -l)
-          if [ "$POD_COUNT" -gt 0 ]; then
-            echo "❌ SAFETY CHECK FAILED: PostgreSQL pods are still running!"
-            echo "Expected 0 pods, found: $POD_COUNT"
-            echo "Database must be scaled down before data replacement"
+          if [ "$CURRENT_PROJECT" != "mcp-registry-staging" ]; then
+            echo "❌ SAFETY CHECK FAILED: Not in staging project"
+            echo "Project: $CURRENT_PROJECT"
+            echo "Expected: mcp-registry-staging"
             exit 1
           fi
 
-          echo "✅ FINAL SAFETY CHECK PASSED"
-          echo "   Environment: STAGING"
-          echo "   Context: $CURRENT_CONTEXT"
-          echo "   Project: $CURRENT_PROJECT"
-          echo "   Target PVC: $PVC_NAME"
-          echo "   PostgreSQL pods: 0 (scaled down)"
-          echo ""
-          echo "Proceeding with data replacement..."
-
       - name: Replace staging database with restored backup
         id: copy-job
         run: |
@@ -391,7 +279,6 @@ jobs:
                       echo "Setting correct permissions..."
                       chmod 700 /pgdata
 
-                      echo "✓ Data copy completed"
                       ls -lah /pgdata/ | head -20
                       echo "PostgreSQL version: \$(cat /pgdata/PG_VERSION)"
                   volumeMounts:
@@ -408,8 +295,6 @@ jobs:
                     claimName: ${{ steps.pgdata-pvc.outputs.pvc_name }}
           EOF
 
-          echo "✓ Copy job created: $JOB_NAME"
-
       - name: Wait for data copy to complete
         run: |
           JOB_NAME="${{ steps.copy-job.outputs.job_name }}"
@@ -422,8 +307,6 @@ jobs:
             exit 1
           }
 
-          echo "✓ Database data replaced successfully"
-
       - name: Scale up staging PostgreSQL
         run: |
           echo "Scaling up PostgreSQL cluster..."
@@ -447,8 +330,6 @@ jobs:
           echo "Waiting for PostgreSQL to be ready..."
           kubectl wait --for=condition=ready pod -l cnpg.io/cluster=registry-pg -n default --timeout=300s
 
-          echo "✓ PostgreSQL is running"
-
       - name: Verify staging DB is functional
         run: |
           # Create a verification pod
@@ -480,15 +361,12 @@ jobs:
                 exit 1
               fi
 
-              echo "✓ Staging DB is healthy with $TABLE_COUNT tables"
-
+              echo "Staging DB has $TABLE_COUNT tables"
               echo "Top 10 tables by row count:"
               psql -h registry-pg-rw -U postgres -d app \
                 -c "SELECT schemaname, tablename, n_live_tup FROM pg_stat_user_tables ORDER BY n_live_tup DESC LIMIT 10;" || true
             '
 
-          echo "✓ Database verification completed"
-
       - name: Cleanup
         if: always()
         run: |
@@ -501,12 +379,10 @@ jobs:
           kubectl delete pvc restore-data-pvc -n default || true
 
           # Remove prod backup credentials (for security)
-          kubectl delete secret prod-backup-credentials -n default || true
+          kubectl delete secret prod-to-staging-sync-credentials -n default || true
 
           # Clean up old restore resources (keep last 3)
           kubectl get restore -n default --sort-by=.metadata.creationTimestamp -o name | head -n -3 | xargs -r kubectl delete || true
 
           # Clean up old copy jobs (keep last 3)
           kubectl get jobs -n default --sort-by=.metadata.creationTimestamp -o name | grep 'copy-pgdata-' | head -n -3 | xargs -r kubectl delete -n default || true
-
-          echo "✓ Cleanup completed"