ZcashFoundation · mergify · Sep 19, 2024 · Sep 16, 2024 · Sep 16, 2024 · Sep 17, 2024
@@ -42,27 +42,26 @@ on:
         type: boolean
         default: false
 
-  # TODO: Temporarily disabled to reduce network load, see #6894.
-  #push:
-  #  # Skip main branch updates where Rust code and dependencies aren't modified.
-  #  branches:
-  #    - main
-  #  paths:
-  #    # code and tests
-  #    - '**/*.rs'
-  #    # hard-coded checkpoints and proptest regressions
-  #    - '**/*.txt'
-  #    # dependencies
-  #    - '**/Cargo.toml'
-  #    - '**/Cargo.lock'
-  #    # configuration files
-  #    - '.cargo/config.toml'
-  #    - '**/clippy.toml'
-  #    # workflow definitions
-  #    - 'docker/**'
-  #    - '.dockerignore'
-  #    - '.github/workflows/cd-deploy-nodes-gcp.yml'
-  #    - '.github/workflows/sub-build-docker-image.yml'
+  push:
+   # Skip main branch updates where Rust code and dependencies aren't modified.
+   branches:
+     - main
+   paths:
+     # code and tests
+     - '**/*.rs'
+     # hard-coded checkpoints and proptest regressions
+     - '**/*.txt'
+     # dependencies
+     - '**/Cargo.toml'
+     - '**/Cargo.lock'
+     # configuration files
+     - '.cargo/config.toml'
+     - '**/clippy.toml'
+     # workflow definitions
+     - 'docker/**'
+     - '.dockerignore'
+     - '.github/workflows/cd-deploy-nodes-gcp.yml'
+     - '.github/workflows/sub-build-docker-image.yml'
 
   # Only runs the Docker image tests, doesn't deploy any instances
   pull_request:
@@ -176,6 +175,19 @@ jobs:
       test_variables: '-e NETWORK -e ZEBRA_CONF_PATH="zebrad/tests/common/configs/v1.0.0-rc.2.toml"'
       network: ${{ inputs.network || vars.ZCASH_NETWORK }}
 
+  # Finds a `tip` cached state disk for zebra from the main branch
+  #
+  # Passes the disk name to subsequent jobs using `cached_disk_name` output
+  #
+  get-disk-name:
+    name: Get disk name
+    uses: ./.github/workflows/sub-find-cached-disks.yml
+    with:
+      network: ${{ inputs.network || vars.ZCASH_NETWORK }}
+      disk_prefix: zebrad-cache
+      disk_suffix: tip
+      prefer_main_cached_state: true
+
   # Deploy Managed Instance Groups (MiGs) for Mainnet and Testnet,
   # with one node in the configured GCP region.
   #
@@ -196,9 +208,11 @@ jobs:
       matrix:
         network: [Mainnet, Testnet]
     name: Deploy ${{ matrix.network }} nodes
-    needs: [ build, versioning, test-configuration-file, test-zebra-conf-path ]
+    needs: [ build, versioning, test-configuration-file, test-zebra-conf-path, get-disk-name ]
     runs-on: ubuntu-latest
     timeout-minutes: 60
+    env:
+      CACHED_DISK_NAME: ${{ needs.get-disk-name.outputs.cached_disk_name }}
     permissions:
       contents: 'read'
       id-token: 'write'
@@ -240,24 +254,31 @@ jobs:
       # but the implementation is failing as it's requiring the disk names, contrary to what is stated in the official documentation
       - name: Create instance template for ${{ matrix.network }}
         run: |
+          NAME="zebrad-cache-${{ env.GITHUB_HEAD_REF_SLUG_URL || env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-${NETWORK}"
+          DISK_PARAMS="name=${NAME},device-name=${NAME},size=400GB,type=pd-ssd"
+          if [ -n "${{ env.CACHED_DISK_NAME }}" ]; then
+            DISK_PARAMS+=",image=${{ env.CACHED_DISK_NAME }}"
+          else
+            echo "No cached disk found for ${{ matrix.network }} in main branch"
+            exit 1
+          fi
           gcloud compute instance-templates create-with-container zebrad-${{ needs.versioning.outputs.major_version || env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-${NETWORK} \
-          --boot-disk-size 300GB \
+          --machine-type ${{ vars.GCP_SMALL_MACHINE }} \
+          --boot-disk-size 50GB \
           --boot-disk-type=pd-ssd \
           --image-project=cos-cloud \
           --image-family=cos-stable \
-          --user-output-enabled \
-          --metadata google-logging-enabled=true,google-logging-use-fluentbit=true,google-monitoring-enabled=true \
+          --network-interface=subnet=${{ vars.GCP_SUBNETWORK }} \
+          --create-disk="${DISK_PARAMS}" \
+          --container-mount-disk=mount-path='/var/cache/zebrad-cache',name=${NAME},mode=rw \
           --container-stdin \
           --container-tty \
           --container-image ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} \
           --container-env "NETWORK=${{ matrix.network }},LOG_FILE=${{ vars.CD_LOG_FILE }},LOG_COLOR=false,SENTRY_DSN=${{ vars.SENTRY_DSN }}" \
-          --create-disk=name=zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${NETWORK},device-name=zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${NETWORK},auto-delete=yes,size=300GB,type=pd-ssd,mode=rw \
-          --container-mount-disk=mount-path='/var/cache/zebrad-cache',name=zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${NETWORK},mode=rw \
-          --machine-type ${{ vars.GCP_SMALL_MACHINE }} \
-          --network-interface=subnet=${{ vars.GCP_SUBNETWORK }} \
           --service-account ${{ vars.GCP_DEPLOYMENTS_SA }} \
           --scopes cloud-platform \
-          --labels=app=zebrad,environment=prod,network=${NETWORK},github_ref=${{ env.GITHUB_REF_SLUG_URL }} \
+          --metadata google-logging-enabled=true,google-logging-use-fluentbit=true,google-monitoring-enabled=true \
+          --labels=app=zebrad,environment=staging,network=${NETWORK},github_ref=${{ env.GITHUB_REF_SLUG_URL }} \
           --tags zebrad
 
       # Check if our destination instance group exists already
@@ -297,9 +318,11 @@ jobs:
   # Note: this instances are not automatically replaced or deleted
   deploy-instance:
     name: Deploy single ${{ inputs.network }} instance
-    needs: [ build, test-configuration-file, test-zebra-conf-path ]
+    needs: [ build, test-configuration-file, test-zebra-conf-path, get-disk-name ]
     runs-on: ubuntu-latest
     timeout-minutes: 30
+    env:
+      CACHED_DISK_NAME: ${{ needs.get-disk-name.outputs.cached_disk_name }}
     permissions:
       contents: 'read'
       id-token: 'write'
@@ -340,22 +363,30 @@ jobs:
       # Create instance template from container image
       - name: Manual deploy of a single ${{ inputs.network }} instance running zebrad
         run: |
+          NAME="zebrad-cache-${{ env.GITHUB_HEAD_REF_SLUG_URL || env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-${NETWORK}"
+          DISK_PARAMS="name=${NAME},device-name=${NAME},size=400GB,type=pd-ssd"
+          if [ -n "${{ env.CACHED_DISK_NAME }}" ]; then
+            DISK_PARAMS+=",image=${{ env.CACHED_DISK_NAME }}"
+          else
+            echo "No cached disk found for ${{ matrix.network }} in main branch"
+            exit 1
+          fi
           gcloud compute instances create-with-container "zebrad-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-${NETWORK}" \
-          --boot-disk-size 300GB \
+          --machine-type ${{ vars.GCP_SMALL_MACHINE }} \
+          --boot-disk-size 50GB \
           --boot-disk-type=pd-ssd \
           --image-project=cos-cloud \
           --image-family=cos-stable \
-          --user-output-enabled \
-          --metadata google-logging-enabled=true,google-logging-use-fluentbit=true,google-monitoring-enabled=true \
+          --network-interface=subnet=${{ vars.GCP_SUBNETWORK }} \
+          --create-disk="${DISK_PARAMS}" \
+          --container-mount-disk=mount-path='/var/cache/zebrad-cache',name=${NAME},mode=rw \
           --container-stdin \
           --container-tty \
           --container-image ${{ vars.GAR_BASE }}/zebrad@${{ needs.build.outputs.image_digest }} \
           --container-env "NETWORK=${{ inputs.network }},LOG_FILE=${{ inputs.log_file }},LOG_COLOR=false,SENTRY_DSN=${{ vars.SENTRY_DSN }}" \
-          --create-disk=name=zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${NETWORK},device-name=zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${NETWORK},auto-delete=yes,size=300GB,type=pd-ssd,mode=rw \
-          --container-mount-disk=mount-path='/var/cache/zebrad-cache',name=zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${NETWORK},mode=rw \
-          --machine-type ${{ vars.GCP_SMALL_MACHINE }} \
-          --network-interface=subnet=${{ vars.GCP_SUBNETWORK }} \
           --service-account ${{ vars.GCP_DEPLOYMENTS_SA }} \
+          --scopes cloud-platform \
+          --metadata google-logging-enabled=true,google-monitoring-enabled=true \
           --labels=app=zebrad,environment=qa,network=${NETWORK},github_ref=${{ env.GITHUB_REF_SLUG_URL }} \
           --tags zebrad \
           --zone ${{ vars.GCP_ZONE }}

@@ -1,20 +1,33 @@
 #!/usr/bin/env bash
 
-# Description:
 # This script finds a cached Google Cloud Compute image based on specific criteria.
-# It prioritizes images from the current commit, falls back to the main branch,
-# and finally checks other branches if needed. The selected image is used for
-# setting up the environment in a CI/CD pipeline.
+#
+# If there are multiple disks:
+# - prefer images generated from the same commit, then
+# - if prefer_main_cached_state is true, prefer images from the `main` branch, then
+# - use any images from any other branch or commit.
+#
+# Within each of these categories:
+# - prefer newer images to older images
+#
+# The selected image is used for setting up the environment in a CI/CD pipeline.
+# It also checks if specific disk types are available for subsequent jobs.
 
 set -eo pipefail
 
-# Function to find and report a cached disk image
+# Extract local state version
+echo "Extracting local state version..."
+LOCAL_STATE_VERSION=$(grep -oE "DATABASE_FORMAT_VERSION: .* [0-9]+" "${GITHUB_WORKSPACE}/zebra-state/src/constants.rs" | grep -oE "[0-9]+" | tail -n1)
+echo "STATE_VERSION: ${LOCAL_STATE_VERSION}"
+
+# Function to find a cached disk image based on the git pattern (commit, main, or any branch)
 find_cached_disk_image() {
-    local search_pattern="${1}"
+    local git_pattern="${1}"
     local git_source="${2}"
     local disk_name
+    local disk_search_pattern="${DISK_PREFIX}-${git_pattern}-v${LOCAL_STATE_VERSION}-${NETWORK}-${DISK_SUFFIX}"
 
-    disk_name=$(gcloud compute images list --filter="status=READY AND name~${search_pattern}" --format="value(NAME)" --sort-by=~creationTimestamp --limit=1)
+    disk_name=$(gcloud compute images list --filter="status=READY AND name~${disk_search_pattern}" --format="value(NAME)" --sort-by=~creationTimestamp --limit=1)
 
     # Use >&2 to redirect to stderr and avoid sending wrong assignments to stdout
     if [[ -n "${disk_name}" ]]; then
@@ -27,46 +40,71 @@ find_cached_disk_image() {
     fi
 }
 
-# Extract local state version
-echo "Extracting local state version..."
-LOCAL_STATE_VERSION=$(grep -oE "DATABASE_FORMAT_VERSION: .* [0-9]+" "${GITHUB_WORKSPACE}/zebra-state/src/constants.rs" | grep -oE "[0-9]+" | tail -n1)
-echo "STATE_VERSION: ${LOCAL_STATE_VERSION}"
+# Check if both $DISK_PREFIX and $DISK_SUFFIX are set, as they are required to find a cached disk image
+if [[ -n "${DISK_PREFIX}" && -n "${DISK_SUFFIX}" ]]; then
+    # Find the most suitable cached disk image
+    echo "Finding the most suitable cached disk image..."
+    CACHED_DISK_NAME=""
+
+    # First, try to find a cached disk image from the current commit
+    CACHED_DISK_NAME=$(find_cached_disk_image ".+-${GITHUB_SHA_SHORT}" "commit")
 
-# Define DISK_PREFIX based on the requiring state directory
-if [[ "${NEEDS_LWD_STATE}" == "true" ]]; then
-    DISK_PREFIX="${LWD_STATE_DIR}"
+    # If no cached disk image is found
+    if [[ -z "${CACHED_DISK_NAME}" ]]; then
+        # Check if main branch images are preferred
+        if [[ "${PREFER_MAIN_CACHED_STATE}" == "true" ]]; then
+            CACHED_DISK_NAME=$(find_cached_disk_image "main-[0-9a-f]+" "main branch")
+        # Else, try to find one from any branch
+        else
+            CACHED_DISK_NAME=$(find_cached_disk_image ".+-[0-9a-f]+" "any branch")
+        fi
+    fi
+
+    # Handle case where no suitable disk image is found
+    if [[ -z "${CACHED_DISK_NAME}" ]]; then
+        echo "No suitable cached state disk available."
+        echo "Cached state test jobs must depend on the cached state rebuild job."
+        exit 1
+    fi
+
+    echo "Selected Disk: ${CACHED_DISK_NAME}"
 else
-    DISK_PREFIX="${ZEBRA_STATE_DIR:-${DISK_PREFIX}}"
+    echo "DISK_PREFIX or DISK_SUFFIX is not set. Skipping disk image search."
 fi
 
-# Find the most suitable cached disk image
-echo "Finding the most suitable cached disk image..."
-if [[ -z "${CACHED_DISK_NAME}" ]]; then
-    # Try to find a cached disk image from the current commit
-    COMMIT_DISK_PREFIX="${DISK_PREFIX}-.+-${GITHUB_SHA_SHORT}-v${LOCAL_STATE_VERSION}-${NETWORK}-${DISK_SUFFIX}"
-    CACHED_DISK_NAME=$(find_cached_disk_image "${COMMIT_DISK_PREFIX}" "commit")
-    # If no cached disk image is found, try to find one from the main branch
-    if [[ "${PREFER_MAIN_CACHED_STATE}" == "true" ]]; then
-        MAIN_DISK_PREFIX="${DISK_PREFIX}-main-[0-9a-f]+-v${LOCAL_STATE_VERSION}-${NETWORK}-${DISK_SUFFIX}"
-        CACHED_DISK_NAME=$(find_cached_disk_image "${MAIN_DISK_PREFIX}" "main branch")
-    # Else, try to find one from any branch
+# Function to find and output available disk image types (e.g., lwd_tip_disk, zebra_tip_disk, zebra_checkpoint_disk)
+find_available_disk_type() {
+    local base_name="${1}"
+    local disk_type="${2}"
+    local disk_pattern="${base_name}-cache"
+    local output_var="${base_name}_${disk_type}_disk"
+    local disk_name
+
+    disk_name=$(gcloud compute images list --filter="status=READY AND name~${disk_pattern}-.+-[0-9a-f]+-v${LOCAL_STATE_VERSION}-${NETWORK}-${disk_type}" --format="value(NAME)" --sort-by=~creationTimestamp --limit=1)
+
+    # Use >&2 to redirect to stderr and avoid sending wrong assignments to stdout
+    if [[ -n "${disk_name}" ]]; then
+        echo "Found ${disk_type^^} disk: ${disk_name} for ${base_name^^} on network: ${NETWORK}" >&2
+        disk_description=$(gcloud compute images describe "${disk_name}" --format="value(DESCRIPTION)")
+        echo "Description: ${disk_description}" >&2
+        echo "true"  # This is the actual return value when a disk is found
     else
-        ANY_DISK_PREFIX="${DISK_PREFIX}-.+-[0-9a-f]+-v${LOCAL_STATE_VERSION}-${NETWORK}-${DISK_SUFFIX}"
-        CACHED_DISK_NAME=$(find_cached_disk_image "${ANY_DISK_PREFIX}" "any branch")
+        echo "No ${disk_type^^} disk found for ${base_name^^} on network: ${NETWORK}" >&2
+        echo "false"  # This is the actual return value when no disk is found
     fi
+}
+if [[ -n "${NETWORK}" ]]; then
+    # Check for specific disk images (lwd_tip_disk, zebra_tip_disk, zebra_checkpoint_disk)
+    echo "Checking for specific disk images..."
+    LWD_TIP_DISK=$(find_available_disk_type "lwd" "tip")
+    ZEBRA_TIP_DISK=$(find_available_disk_type "zebrad" "tip")
+    ZEBRA_CHECKPOINT_DISK=$(find_available_disk_type "zebrad" "checkpoint")
 fi
 
-# Handle case where no suitable disk image is found
-if [[ -z "${CACHED_DISK_NAME}" ]]; then
-    echo "No suitable cached state disk available."
-    echo "Expected pattern: ${COMMIT_DISK_PREFIX}"
-    echo "Cached state test jobs must depend on the cached state rebuild job."
-    exit 1
-fi
-
-echo "Selected Disk: ${CACHED_DISK_NAME}"
-
 # Exporting variables for subsequent steps
 echo "Exporting variables for subsequent steps..."
 export CACHED_DISK_NAME="${CACHED_DISK_NAME}"
 export LOCAL_STATE_VERSION="${LOCAL_STATE_VERSION}"
+export LWD_TIP_DISK="${LWD_TIP_DISK}"
+export ZEBRA_TIP_DISK="${ZEBRA_TIP_DISK}"
+export ZEBRA_CHECKPOINT_DISK="${ZEBRA_CHECKPOINT_DISK}"