continuous-delivery #4084

Workflow file for this run

.github/workflows/continuous-delivery.yml at f00544d

	# This workflow executes the E2E Test Suite for a series of combinations that
	# represent different execution environments
	name: continuous-delivery
	on:
	issue_comment:
	type: [created]
	# Manually or triggered by another workflow
	workflow_dispatch:
	inputs:
	depth:
	description: 'Depth (push, pull_request, main (default), schedule)'
	required: true
	default: 'main'
	limit:
	description: 'Limit to the specified engines list (local, eks, aks, gke, openshift)'
	required: false
	test_level:
	description: 'Test level: 0(highest) to 4(lowest). Default is 4.'
	required: false
	default: '4'
	feature_type:
	description: >
	Feature Type (disruptive, performance, upgrade, smoke, basic, service-connectivity, self-healing,
	backup-restore, snapshot, operator, observability, replication, plugin, postgres-configuration,
	pod-scheduling, cluster-metadata, recovery, importing-databases, storage, security, maintenance,
	tablespaces)
	required: false
	log_level:
	description: 'Log level for operator (error, warning, info, debug(default), trace)'
	required: false
	default: 'debug'
	schedule:
	- cron: '0 1 * * *'

	# set up environment variables to be used across all the jobs
	env:
	GOLANG_VERSION: "1.23.x"
	KUBEBUILDER_VERSION: "2.3.1"
	KIND_VERSION: "v0.26.0"
	ROOK_VERSION: "v1.16.0"
	EXTERNAL_SNAPSHOTTER_VERSION: "v8.2.0"
	OPERATOR_IMAGE_NAME: "ghcr.io/${{ github.repository }}-testing"
	BUILD_PUSH_PROVENANCE: ""
	BUILD_PUSH_CACHE_FROM: ""
	BUILD_PUSH_CACHE_TO: ""
	REGISTRY: "ghcr.io"
	REGISTRY_USER: ${{ github.actor }}
	REGISTRY_PASSWORD: ${{ secrets.GITHUB_TOKEN }}
	REPOSITORY_OWNER: "cloudnative-pg"
	SLACK_USERNAME: "cnpg-bot"
	BUILD_MANAGER_RELEASE_ARGS: "build --skip=validate --clean --id manager"
	# Keep in mind that adding more platforms (architectures) will increase the building
	# time even if we use the ghcache for the building process.
	PLATFORMS: "linux/amd64,linux/arm64"
	E2E_SUFFIX: "cnpge2e"

	defaults:
	run:
	# default failure handling for shell scripts in 'run' steps
	shell: 'bash -Eeuo pipefail -x {0}'

	jobs:
	# Trigger the workflow on release-* branches for smoke testing whenever it's a scheduled run.
	# Note: this is a workaround since we can't directly schedule-run a workflow from a non default branch
	smoke_test_release_branches:
	runs-on: ubuntu-24.04
	name: smoke test release-* branches when it's a scheduled run
	if: github.event_name == 'schedule'
	strategy:
	fail-fast: false
	matrix:
	branch: [release-1.22, release-1.23, release-1.24]
	steps:
	- name: Invoke workflow with inputs
	uses: benc-uk/workflow-dispatch@v1
	with:
	workflow: continuous-delivery
	ref: ${{ matrix.branch }}
	inputs: '{ "depth": "push", "limit": "local", "test_level": "4", "log_level": "debug" }'

	check_commenter:
	if: \|
	github.event_name == 'issue_comment' &&
	github.event.issue.pull_request &&
	startsWith(github.event.comment.body, '/test')
	name: Retrieve command
	runs-on: ubuntu-24.04
	outputs:
	github_ref: ${{ steps.refs.outputs.head_sha }}
	depth: ${{ env.DEPTH }}
	limit: ${{ env.LIMIT }}
	test_level: ${{ env.TEST_LEVEL }}
	feature_type: ${{ env.FEATURE_TYPE }}
	log_level: ${{ env.LOG_LEVEL }}
	steps:
	- name: Check for Command
	id: command
	uses: xt0rted/slash-command-action@v2
	continue-on-error: false
	with:
	command: test
	reaction: "true"
	reaction-type: "eyes"
	allow-edits: "false"
	permission-level: write

	- name: Process arguments
	id: args
	run: \|
	ARGS="${{ steps.command.outputs.command-arguments }}"
	# Set the defaults
	DEPTH="main"
	LIMIT="local"
	TEST_LEVEL="4"
	FEATURE_TYPE=""
	LOG_LEVEL="debug"

	for ARG in $ARGS; do
	IFS='=' read name value <<< $ARG
	case "${name}" in
	"depth"\|"d")
	DEPTH="${value}"
	;;
	"limit"\|"l")
	LIMIT="${value}"
	;;
	"test_level"\|"level"\|"tl")
	TEST_LEVEL="${value}"
	;;
	"feature_type"\|"type"\|"ft")
	FEATURE_TYPE="${value}"
	;;
	"log_level"\|"ll")
	LOG_LEVEL="${value}"
	;;
	*)
	;;
	esac
	done

	echo "DEPTH=${DEPTH}" >> $GITHUB_ENV
	echo "LIMIT=${LIMIT}" >> $GITHUB_ENV
	echo "TEST_LEVEL=${TEST_LEVEL}" >> $GITHUB_ENV
	echo "FEATURE_TYPE=${FEATURE_TYPE}" >> $GITHUB_ENV
	echo "LOG_LEVEL=${LOG_LEVEL}" >> $GITHUB_ENV

	- name: Resolve Git reference
	uses: xt0rted/pull-request-comment-branch@v3
	id: refs

	- name: Create comment
	uses: peter-evans/create-or-update-comment@v4
	with:
	token: ${{ secrets.GITHUB_TOKEN }}
	repository: ${{ github.repository }}
	issue-number: ${{ github.event.issue.number }}
	body: \|
	@${{ github.actor }}, here's the link to the E2E on CNPG workflow run: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}

	test_arguments:
	name: Parse arguments
	if: \|
	github.event_name == 'workflow_dispatch' \|\| github.event_name == 'schedule'
	runs-on: ubuntu-24.04
	outputs:
	github_ref: ${{ github.ref }}
	depth: ${{ env.DEPTH }}
	limit: ${{ env.LIMIT }}
	test_level: ${{ env.TEST_LEVEL }}
	feature_type: ${{ env.FEATURE_TYPE }}
	log_level: ${{ env.LOG_LEVEL }}
	steps:
	- name: Parse input to env
	run: \|
	# Set the defaults for workflow dispatch
	if [[ ${{ github.event_name }} == 'workflow_dispatch' ]]; then
	DEPTH=${{ github.event.inputs.depth }}
	LIMIT=${{ github.event.inputs.limit }}
	TEST_LEVEL=${{ github.event.inputs.test_level }}
	FEATURE_TYPE="${{ github.event.inputs.feature_type }}"
	LOG_LEVEL="${{ github.event.inputs.log_level }}"
	fi
	# Set the defaults for schedule dispatch
	if [[ ${{ github.event_name }} == 'schedule' ]]; then
	DEPTH="schedule"
	LIMIT=""
	TEST_LEVEL="4"
	FEATURE_TYPE=""
	LOG_LEVEL="debug"
	fi
	echo "DEPTH=${DEPTH}" >> $GITHUB_ENV
	echo "LIMIT=${LIMIT}" >> $GITHUB_ENV
	echo "TEST_LEVEL=${TEST_LEVEL}" >> $GITHUB_ENV
	echo "FEATURE_TYPE=${FEATURE_TYPE}" >> $GITHUB_ENV
	echo "LOG_LEVEL=${LOG_LEVEL}" >> $GITHUB_ENV

	evaluate_options:
	name: Evaluate workflow options
	needs:
	- check_commenter
	- test_arguments
	runs-on: ubuntu-24.04
	if: \|
	(
	needs.check_commenter.result == 'success' \|\|
	needs.test_arguments.result == 'success'
	) &&
	!cancelled()
	outputs:
	git_ref: ${{ env.GITHUB_REF }}
	depth: ${{ env.DEPTH }}
	limit: ${{ env.LIMIT }}
	test_level: ${{ env.TEST_LEVEL }}
	feature_type: ${{ env.FEATURE_TYPE }}
	log_level: ${{ env.LOG_LEVEL }}
	steps:
	- name: From command
	run: \|
	if [[ ${{ github.event_name }} == 'workflow_dispatch' ]] \|\| [[ ${{ github.event_name }} == 'schedule' ]]; then
	echo 'GITHUB_REF=${{ needs.test_arguments.outputs.github_ref }}' >> $GITHUB_ENV
	echo 'DEPTH=${{ needs.test_arguments.outputs.depth }}' >> $GITHUB_ENV
	echo 'LIMIT=${{ needs.test_arguments.outputs.limit }}' >> $GITHUB_ENV
	echo 'TEST_LEVEL=${{ needs.test_arguments.outputs.test_level }}' >> $GITHUB_ENV
	echo 'FEATURE_TYPE=${{ needs.test_arguments.outputs.feature_type }}' >> $GITHUB_ENV
	echo 'LOG_LEVEL=${{ needs.test_arguments.outputs.log_level }}' >> $GITHUB_ENV
	fi
	if [[ ${{ github.event_name }} == 'issue_comment' ]]; then
	echo 'GITHUB_REF=${{ needs.check_commenter.outputs.github_ref }}' >> $GITHUB_ENV
	echo 'DEPTH=${{ needs.check_commenter.outputs.depth }}' >> $GITHUB_ENV
	echo 'LIMIT=${{ needs.check_commenter.outputs.limit }}' >> $GITHUB_ENV
	echo 'TEST_LEVEL=${{ needs.check_commenter.outputs.test_level }}' >> $GITHUB_ENV
	echo 'FEATURE_TYPE=${{ needs.check_commenter.outputs.feature_type }}' >> $GITHUB_ENV
	echo 'LOG_LEVEL=${{ needs.check_commenter.outputs.log_level }}' >> $GITHUB_ENV
	fi

	buildx:
	name: Build containers
	needs:
	- check_commenter
	- test_arguments
	- evaluate_options
	if: \|
	always() && !cancelled() &&
	needs.evaluate_options.result == 'success'
	runs-on: ubuntu-24.04
	permissions:
	contents: read
	packages: write
	pull-requests: read
	outputs:
	image: ${{ steps.image-meta.outputs.image }}
	# 'branch_name' is used in 'GetMostRecentReleaseTag' in the Go code
	branch_name: ${{ steps.build-meta.outputs.branch_name }}
	upload_artifacts: ${{ steps.build-meta.outputs.upload_artifacts }}
	commit_msg: ${{ steps.build-meta.outputs.commit_msg }}
	commit_sha: ${{ steps.build-meta.outputs.commit_sha }}
	author_name: ${{ steps.build-meta.outputs.author_name }}
	author_email: ${{ steps.build-meta.outputs.author_email }}
	controller_img: ${{ env.CONTROLLER_IMG }}
	controller_img_ubi8: ${{ env.CONTROLLER_IMG_UBI8 }}
	bundle_img: ${{ env.BUNDLE_IMG }}
	catalog_img: ${{ env.CATALOG_IMG }}
	steps:
	-
	name: Checkout
	uses: actions/checkout@v4
	with:
	ref: ${{ needs.evaluate_options.outputs.git_ref }}
	# To identify the commit we need the history and all the tags.
	fetch-depth: 0
	-
	name: Install Go
	uses: actions/setup-go@v5
	with:
	go-version: ${{ env.GOLANG_VERSION }}
	check-latest: true
	-
	name: Build meta
	id: build-meta
	env:
	GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
	run: \|
	images='${{ env.OPERATOR_IMAGE_NAME }}'
	tags=''
	labels=''
	commit_sha=${{ needs.evaluate_options.outputs.git_ref }}
	commit_date=$(git log -1 --pretty=format:'%ad' --date short "${commit_sha}" \|\| : )
	# use git describe to get the nearest tag and use that to build the version (e.g. 1.4.0-dev24 or 1.4.0)
	commit_version=$(git describe --tags --match 'v*' "${commit_sha}"\| sed -e 's/^v//; s/-g[0-9a-f]\+$//; s/-$[0-9]\+$$/-dev\1/')

	# shortened commit sha
	commit_short=$(git rev-parse --short "${commit_sha}")

	# multiline strings are weird
	commit_message=$(git show -s --format=%B "${commit_sha}")
	commit_message=${commit_message//$'%'/'%25'}
	commit_message=${commit_message//$'\n'/'%0A'}
	commit_message=${commit_message//$'\r'/'%0D'}

	# get git user and email
	author_name=$(git show -s --format='%an' "${commit_sha}")
	author_email=$(git show -s --format='%ae' "${commit_sha}")

	# extract branch name
	if [[ ${{ github.event_name }} == 'workflow_dispatch' ]] \|\| [[ ${{ github.event_name }} == 'schedule' ]]
	then
	branch_name=${GITHUB_REF#refs/heads/}
	fi
	if [[ ${{ github.event_name }} == 'issue_comment' ]]
	then
	branch_name=$(gh pr view "${{ github.event.issue.number }}" --json headRefName -q '.headRefName' 2>/dev/null)
	fi

	# extract tag from branch name
	tag_name=$(echo "$branch_name" \| sed 's/[^a-zA-Z0-9]/-/g')

	upload_artifacts=false
	if [[ ${branch_name} == main \|\| ${branch_name} =~ ^release- ]]; then
	upload_artifacts=true
	fi

	echo "IMAGES=${images}" >> $GITHUB_ENV
	echo "TAGS=${tags}" >> $GITHUB_ENV
	echo "LABELS=${labels}" >> $GITHUB_ENV
	echo "DATE=${commit_date}" >> $GITHUB_ENV
	echo "VERSION=${commit_version}" >> $GITHUB_ENV
	echo "COMMIT=${commit_short}" >> $GITHUB_ENV
	echo "commit_sha=${commit_sha}" >> $GITHUB_OUTPUT
	echo "commit_msg=${commit_message}" >> $GITHUB_OUTPUT
	echo "author_name=${author_name}" >> $GITHUB_OUTPUT
	echo "author_email=${author_email}" >> $GITHUB_OUTPUT
	echo "branch_name=${branch_name}" >> $GITHUB_OUTPUT
	echo "tag_name=${tag_name,,}" >> $GITHUB_OUTPUT
	echo "upload_artifacts=${upload_artifacts}" >> $GITHUB_OUTPUT
	-
	name: Set GoReleaser environment
	run: \|
	echo GOPATH=$(go env GOPATH) >> $GITHUB_ENV
	echo PWD=$(pwd) >> $GITHUB_ENV
	-
	name: Run GoReleaser
	uses: goreleaser/goreleaser-action@v6
	with:
	distribution: goreleaser
	version: v2
	args: ${{ env.BUILD_MANAGER_RELEASE_ARGS }}
	env:
	DATE: ${{ env.DATE }}
	COMMIT: ${{ env.COMMIT }}
	VERSION: ${{ env.VERSION }}
	-
	name: Docker meta
	id: docker-meta
	uses: docker/metadata-action@v5
	with:
	images: ${{ env.IMAGES }}
	tags: \|
	type=raw,value=${{ steps.build-meta.outputs.tag_name }}
	-
	name: Docker meta UBI8
	id: docker-meta-ubi8
	uses: docker/metadata-action@v5
	with:
	images: ${{ env.IMAGES }}
	flavor: \|
	suffix=-ubi8
	tags: \|
	type=raw,value=${{ steps.build-meta.outputs.tag_name }}
	-
	name: Set up QEMU
	uses: docker/setup-qemu-action@v3
	with:
	platforms: ${{ env.PLATFORMS }}
	-
	name: Set up Docker Buildx
	uses: docker/setup-buildx-action@v3
	-
	name: Login into docker registry
	uses: docker/login-action@v3
	with:
	registry: ${{ env.REGISTRY }}
	username: ${{ env.REGISTRY_USER }}
	password: ${{ env.REGISTRY_PASSWORD }}
	-
	name: Build and push
	uses: docker/build-push-action@v6
	with:
	platforms: ${{ env.PLATFORMS }}
	context: .
	file: Dockerfile
	push: true
	build-args: \|
	VERSION=${{ env.VERSION }}
	tags: ${{ steps.docker-meta.outputs.tags }}
	labels: ${{ env.LABELS }}
	provenance: ${{ env.BUILD_PUSH_PROVENANCE }}
	cache-from: ${{ env.BUILD_PUSH_CACHE_FROM }}
	cache-to: ${{ env.BUILD_PUSH_CACHE_TO }}
	-
	name: Build and push UBI8
	uses: docker/build-push-action@v6
	with:
	platforms: ${{ env.PLATFORMS }}
	context: .
	file: Dockerfile-ubi8
	push: true
	build-args: \|
	VERSION=${{ env.VERSION }}
	tags: ${{ steps.docker-meta-ubi8.outputs.tags }}
	labels: ${{ env.LABELS }}
	provenance: ${{ env.BUILD_PUSH_PROVENANCE }}
	cache-from: ${{ env.BUILD_PUSH_CACHE_FROM }}
	cache-to: ${{ env.BUILD_PUSH_CACHE_TO }}
	-
	name: Image Meta
	id: image-meta
	env:
	TAGS: ${{ steps.docker-meta.outputs.tags }}
	run: \|
	# If there is more than one tag, take the first one
	# TAGS could be separated by newlines or commas
	image=$(sed -n '1{s/,.*//; p}' <<< "$TAGS")
	echo "image=${image}" >> $GITHUB_OUTPUT
	-
	name: Output images
	env:
	TAGS: ${{ steps.docker-meta.outputs.tags }}
	TAGS_UBI8: ${{ steps.docker-meta-ubi8.outputs.tags }}
	run: \|
	LOWERCASE_OPERATOR_IMAGE_NAME=${OPERATOR_IMAGE_NAME,,}
	TAG=${TAGS#*:}
	TAG_UBI=${TAGS_UBI8#*:}
	echo "CONTROLLER_IMG=${LOWERCASE_OPERATOR_IMAGE_NAME}:${TAG}" >> $GITHUB_ENV
	echo "CONTROLLER_IMG_UBI8=${LOWERCASE_OPERATOR_IMAGE_NAME}:${TAG_UBI}" >> $GITHUB_ENV
	echo "BUNDLE_IMG=${LOWERCASE_OPERATOR_IMAGE_NAME}:bundle-${TAG}" >> $GITHUB_ENV
	echo "CATALOG_IMG=${LOWERCASE_OPERATOR_IMAGE_NAME}:catalog-${TAG}" >> $GITHUB_ENV
	-
	name: Generate manifest for operator deployment
	id: generate-manifest
	env:
	CONTROLLER_IMG: ${{ steps.image-meta.outputs.image }}
	run: \|
	make generate-manifest
	-
	name: Upload the operator manifest as artifact in workflow
	uses: actions/upload-artifact@v4
	with:
	name: operator-manifest.yaml
	path: dist/operator-manifest.yaml
	retention-days: 7
	-
	# In order to test the case of upgrading from the current operator
	# to a future one, we build and push an image with a different VERSION
	# to force a different hash for the manager binary.
	# (Otherwise the ONLINE upgrade won't trigger)
	#
	# NOTE: we only fire this in TEST DEPTH = 4, as that is the level of the
	# upgrade test
	name: Build binary for upgrade test
	uses: goreleaser/goreleaser-action@v6
	if: \|
	always() && !cancelled() &&
	needs.evaluate_options.outputs.test_level == '4'
	with:
	distribution: goreleaser
	version: v2
	args: ${{ env.BUILD_MANAGER_RELEASE_ARGS }}
	env:
	DATE: ${{ env.DATE }}
	COMMIT: ${{ env.COMMIT }}
	VERSION: ${{ env.VERSION }}-prime
	-
	# In order to test the case of upgrading from the current operator
	# to a future one, we build and push an image with a different VERSION
	# to force a different hash for the manager binary.
	# (Otherwise the ONLINE upgrade won't trigger)
	#
	# We push the "prime" binary using a tag with the suffix "-prime"
	# NOTE: we only fire this in TEST DEPTH = 4, as that is the level of the
	# upgrade test
	name: Build and push image for upgrade test
	uses: docker/build-push-action@v6
	if: \|
	always() && !cancelled() &&
	needs.evaluate_options.outputs.test_level == '4'
	with:
	platforms: ${{ env.PLATFORMS }}
	context: .
	file: Dockerfile
	push: true
	build-args: \|
	VERSION=${{ env.VERSION }}-prime
	tags: ${{ steps.docker-meta.outputs.tags }}-prime
	labels: ${{ env.LABELS }}
	provenance: ${{ env.BUILD_PUSH_PROVENANCE }}
	cache-from: ${{ env.BUILD_PUSH_CACHE_FROM }}
	cache-to: ${{ env.BUILD_PUSH_CACHE_TO }}

	# This will only execute in cloudnative-pg org
	publish-artifacts:
	name: Publish artifacts
	needs:
	- buildx
	if: \|
	(always() && !cancelled()) &&
	needs.buildx.result == 'success' &&
	needs.buildx.outputs.upload_artifacts == 'true' &&
	github.repository_owner == 'cloudnative-pg'
	runs-on: ubuntu-24.04
	steps:
	-
	name: Checkout artifact
	uses: actions/checkout@v4
	with:
	repository: cloudnative-pg/artifacts
	token: ${{ secrets.REPO_GHA_PAT }}
	ref: main
	fetch-depth: 0
	-
	name: Configure git user
	run: \|
	git config user.email "${{ needs.buildx.outputs.author_email }}"
	git config user.name "${{ needs.buildx.outputs.author_name }}"
	-
	name: Switch to or create the right branch
	env:
	BRANCH: ${{ needs.buildx.outputs.branch_name }}
	run: \|
	git checkout "${BRANCH}" 2>/dev/null \|\| git checkout -b "${BRANCH}"

	# Remove the previous operator manifest if present because the next
	# step doesn't overwrite existing files
	rm -fr manifests/operator-manifest.yaml
	-
	name: Prepare the operator manifest
	uses: actions/download-artifact@v4
	with:
	name: operator-manifest.yaml
	path: manifests
	-
	name: Prepare the commit
	env:
	COMMIT_MESSAGE: \|
	${{ needs.buildx.outputs.commit_msg }}

	https://github.com/cloudnative-pg/cloudnative-pg/commit/${{ needs.buildx.outputs.commit_sha }}
	run: \|
	# Skip creating the commit if there are no changes
	[ -n "$(git status -s)" ] \|\| exit 0

	git add .
	git commit -m "${COMMIT_MESSAGE}"
	-
	name: Push changes
	uses: ad-m/[email protected]
	with:
	github_token: ${{ secrets.REPO_GHA_PAT }}
	repository: cloudnative-pg/artifacts
	branch: ${{ needs.buildx.outputs.branch_name }}

	generate-jobs:
	name: Generate jobs for E2E tests
	needs:
	- buildx
	- evaluate_options
	# We try to avoid running the E2E Test Suite in general, to reduce load on
	# GitHub resources.
	# Currently, it's executed in the following cases:
	# - When dispatched via chatops commands
	# - On a push in main and release branches
	# - On scheduled executions
	if: \|
	(always() && !cancelled()) &&
	needs.buildx.result == 'success'
	runs-on: ubuntu-24.04
	outputs:
	image: ${{ needs.buildx.outputs.image }}
	localMatrix: ${{ steps.generate-jobs.outputs.localMatrix }}
	localEnabled: ${{ steps.generate-jobs.outputs.localEnabled }}
	localTimeout: ${{ steps.generate-jobs.outputs.localE2ETimeout }}
	eksMatrix: ${{ steps.generate-jobs.outputs.eksMatrix }}
	eksEnabled: ${{ steps.generate-jobs.outputs.eksEnabled }}
	eksTimeout: ${{ steps.generate-jobs.outputs.eksE2ETimeout }}
	aksMatrix: ${{ steps.generate-jobs.outputs.aksMatrix }}
	aksEnabled: ${{ steps.generate-jobs.outputs.aksEnabled }}
	aksTimeout: ${{ steps.generate-jobs.outputs.aksE2ETimeout }}
	gkeMatrix: ${{ steps.generate-jobs.outputs.gkeMatrix }}
	gkeEnabled: ${{ steps.generate-jobs.outputs.gkeEnabled }}
	gkeTimeout: ${{ steps.generate-jobs.outputs.gkeE2ETimeout }}
	openshiftMatrix: ${{ steps.generate-jobs.outputs.openshiftMatrix }}
	openshiftEnabled: ${{ steps.generate-jobs.outputs.openshiftEnabled }}
	openshiftTimeout: ${{ steps.generate-jobs.outputs.openshiftE2ETimeout }}
	steps:
	-
	name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ needs.evaluate_options.outputs.git_ref }}
	-
	id: generate-jobs
	# Generates the jobs that will become different matrix branches,
	# according to the event, or to the "depth" parameter if set manually
	name: Generate Jobs
	shell: bash
	run: \|
	python .github/e2e-matrix-generator.py \
	-m '${{ needs.evaluate_options.outputs.depth }}' \
	-l '${{ needs.evaluate_options.outputs.limit }}'

	e2e-local:
	name: Run E2E on local executors
	if: \|
	(always() && !cancelled()) &&
	needs.generate-jobs.outputs.localEnabled == 'true' &&
	needs.generate-jobs.result == 'success'
	needs:
	- buildx
	- generate-jobs
	- evaluate_options
	strategy:
	fail-fast: false
	matrix: ${{ fromJSON(needs.generate-jobs.outputs.localMatrix) }}
	runs-on: ubuntu-24.04
	env:
	# TEST_DEPTH determines the maximum test level the suite should be running
	TEST_DEPTH: ${{ needs.evaluate_options.outputs.test_level }}
	# FEATURE_TYPE, when defined, determines the subset of E2E tests that will be executed, divided by feature type
	FEATURE_TYPE: ${{ needs.evaluate_options.outputs.feature_type }}

	K8S_VERSION: "${{ matrix.k8s_version }}"
	POSTGRES_VERSION: ${{ matrix.postgres_version }}
	POSTGRES_KIND: ${{ matrix.postgres_kind }}
	MATRIX: ${{ matrix.id }}
	POSTGRES_IMG: "${{ matrix.postgres_img }}"
	# The version of operator to upgrade FROM, in the rolling upgrade E2E test
	E2E_PRE_ROLLING_UPDATE_IMG: "${{ matrix.postgres_pre_img }}"
	TEST_TIMEOUTS: ${{ needs.generate-jobs.outputs.localTimeout }}
	BRANCH_NAME: ${{ needs.buildx.outputs.branch_name }}

	DEBUG: "true"
	BUILD_IMAGE: "false"
	CONTROLLER_IMG: ${{ needs.generate-jobs.outputs.image }}
	E2E_DEFAULT_STORAGE_CLASS: standard
	E2E_CSI_STORAGE_CLASS: csi-hostpath-sc
	E2E_DEFAULT_VOLUMESNAPSHOT_CLASS: csi-hostpath-snapclass
	LOG_DIR: ${{ github.workspace }}/kind-logs/
	DOCKER_REGISTRY_MIRROR: https://mirror.gcr.io
	TEST_CLOUD_VENDOR: "local"
	steps:
	-
	name: Cleanup Disk
	uses: jlumbroso/free-disk-space@main
	with:
	android: true
	dotnet: true
	haskell: true
	tool-cache: true
	large-packages: false
	swap-storage: false
	-
	name: Cleanup docker cache
	run: \|
	echo "-------------Disk info before cleanup----------------"
	df -h
	echo "-----------------------------------------------------"
	docker system prune -a -f
	echo "-------------Disk info after cleanup----------------"
	df -h
	echo "-----------------------------------------------------"
	-
	name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ needs.evaluate_options.outputs.git_ref }}
	-
	name: Install Go
	uses: actions/setup-go@v5
	with:
	go-version: ${{ env.GOLANG_VERSION }}
	check-latest: true
	-
	## In case hack/setup-cluster.sh need pull operand image from registry
	name: Login into docker registry
	uses: docker/login-action@v3
	with:
	registry: ${{ env.REGISTRY }}
	username: ${{ env.REGISTRY_USER }}
	password: ${{ env.REGISTRY_PASSWORD }}
	-
	# 'Retry' preparing the E2E test ENV
	name: Prepare the environment
	uses: nick-fields/retry@v3
	with:
	timeout_seconds: 300
	max_attempts: 3
	on_retry_command: \|
	# Clear-ups before retries
	sudo rm -rf /usr/local/bin/kind /usr/local/bin/kubectl
	command: \|
	sudo apt-get update
	sudo apt-get install -y gettext-base
	sudo hack/setup-cluster.sh prepare /usr/local/bin
	-
	name: Prepare patch for customization
	env:
	## the following variable all need be set if we use env_override_customized.yaml.template
	## this is customization for local kind
	LEADER_ELECTION: "true"
	LEADER_LEASE_DURATION: 15
	LEADER_RENEW_DEADLINE: 10
	LIVENESS_PROBE_THRESHOLD: 3
	LOG_LEVEL: ${{ needs.evaluate_options.outputs.log_level }}
	run: \|
	LOG_LEVEL=${LOG_LEVEL:-info}
	envsubst < hack/e2e/env_override_customized.yaml.template > config/manager/env_override.yaml
	cat config/manager/env_override.yaml
	-
	name: Run Kind End-to-End tests
	env:
	ENABLE_APISERVER_AUDIT: true
	run:
	make e2e-test-kind
	-
	# Summarize the failed E2E test cases if there are any
	name: Report failed E2E tests
	if: failure()
	run: \|
	set +x
	chmod +x .github/report-failed-test.sh
	./.github/report-failed-test.sh
	-
	# Create an individual artifact for each E2E test, which will be used to
	# generate E2E test summary in the follow-up job 'summarize-e2e-tests'
	name: Create individual artifact for each E2E test
	if: (always() && !cancelled())
	env:
	RUNNER: "local"
	RUN_ID: ${{ github.run_id }}
	REPOSITORY: ${{ github.repository }}
	GIT_REF: ${{ needs.evaluate_options.outputs.git_ref }}
	run: \|
	set +x
	python .github/generate-test-artifacts.py \
	-o testartifacts-${{ env.MATRIX }} \
	-f tests/e2e/out/report.json \
	--environment=true
	if [ -f tests/e2e/out/upgrade_report.json ]; then
	python .github/generate-test-artifacts.py \
	-o testartifacts-${{ env.MATRIX }} \
	-f tests/e2e/out/upgrade_report.json \
	--environment=true
	fi
	-
	name: Archive test artifacts
	if: (always() && !cancelled())
	uses: actions/upload-artifact@v4
	with:
	name: testartifacts-${{ env.MATRIX }}
	path: testartifacts-${{ env.MATRIX }}/
	retention-days: 7
	-
	name: Cleanup test artifacts
	if: always()
	run:
	rm -rf testartifacts-${{ env.MATRIX }}/
	-
	name: Cleanup ginkgo JSON report
	# Delete report.json after the analysis. File should always exist.
	# Delete upgrade_report.json. It may not exist depending on test level.
	if: always()
	run: \|
	if [ -f tests/e2e/out/upgrade_report.json ]; then
	rm tests/e2e/out/upgrade_report.json
	fi
	if [ -f tests/e2e/out/report.json ]; then
	rm tests/e2e/out/report.json
	fi
	-
	# Archive logs for failed test cases if there are any
	name: Archive Kind logs
	if: failure()
	uses: actions/upload-artifact@v4
	with:
	name: kind-logs-${{ matrix.id }}
	path: kind-logs/
	retention-days: 7
	-
	name: Archive e2e failure contexts
	if: failure()
	uses: actions/upload-artifact@v4
	with:
	name: test-failure-contexts-${{ matrix.id }}
	path: \|
	tests/*/out/
	retention-days: 7
	if-no-files-found: ignore

	-
	name: Archive e2e logs
	if: failure()
	uses: actions/upload-artifact@v4
	with:
	name: cluster-logs-${{ matrix.id }}
	path: \|
	tests/e2e/cluster_logs/**
	retention-days: 7
	if-no-files-found: ignore

	# AKS Secrets required
	# secrets.AZURE_CREDENTIALS
	# secrets.AZURE_SUBSCRIPTION
	# secrets.AZURE_RESOURCEGROUP
	# secrets.AZURE_RESOURCENAME
	# secrets.AZURE_WORKSPACE_RESOURCE_ID

	e2e-aks-setup:
	name: Setup shared resources for Microsoft AKS E2Es
	if: \|
	(always() && !cancelled()) &&
	vars.AKS_ENABLED == 'true' &&
	needs.generate-jobs.outputs.aksEnabled == 'true' &&
	needs.generate-jobs.result == 'success'
	needs:
	- buildx
	- generate-jobs
	- evaluate_options
	runs-on: ubuntu-24.04
	outputs:
	azure_storage_account: ${{ steps.setup.outputs.azure_storage_account }}
	steps:
	-
	name: Azure Login
	uses: azure/[email protected]
	with:
	creds: ${{ secrets.AZURE_CREDENTIALS }}
	-
	name: Create AKS shared resources
	uses: nick-fields/retry@v3
	id: setup
	with:
	timeout_minutes: 10
	max_attempts: 3
	command: \|
	az extension add --allow-preview true --name aks-preview
	az account set --subscription ${{ secrets.AZURE_SUBSCRIPTION }}

	AZURE_STORAGE_ACCOUNT="${{ github.run_number }}${{ env.E2E_SUFFIX }}"
	az storage account create \
	--resource-group ${{ secrets.AZURE_RESOURCEGROUP }} \
	--name ${AZURE_STORAGE_ACCOUNT} \
	--sku Standard_LRS -o none

	# Output storage account name
	echo "azure_storage_account=${AZURE_STORAGE_ACCOUNT}" >> $GITHUB_OUTPUT

	e2e-aks:
	name: Run E2E on Microsoft AKS
	if: \|
	(always() && !cancelled()) &&
	vars.AKS_ENABLED == 'true' &&
	needs.generate-jobs.outputs.aksEnabled == 'true' &&
	needs.generate-jobs.result == 'success' &&
	needs.e2e-aks-setup.result == 'success'
	needs:
	- buildx
	- generate-jobs
	- evaluate_options
	- e2e-aks-setup
	strategy:
	fail-fast: false
	max-parallel: 8
	matrix: ${{ fromJSON(needs.generate-jobs.outputs.aksMatrix) }}
	runs-on: ubuntu-24.04
	env:
	# TEST_DEPTH determines the maximum test level the suite should be running
	TEST_DEPTH: ${{ needs.evaluate_options.outputs.test_level }}
	# FEATURE_TYPE, when defined, determines the subset of E2E tests that will be executed, divided by feature type
	FEATURE_TYPE: ${{ needs.evaluate_options.outputs.feature_type }}

	K8S_VERSION: "${{ matrix.k8s_version }}"
	POSTGRES_VERSION: ${{ matrix.postgres_version }}
	POSTGRES_KIND: ${{ matrix.postgres_kind }}
	MATRIX: ${{ matrix.id }}
	POSTGRES_IMG: "${{ matrix.postgres_img }}"
	# The version of operator to upgrade FROM, in the rolling upgrade E2E test
	E2E_PRE_ROLLING_UPDATE_IMG: "${{ matrix.postgres_pre_img }}"
	TEST_TIMEOUTS: ${{ needs.generate-jobs.outputs.aksTimeout }}
	BRANCH_NAME: ${{ needs.buildx.outputs.branch_name }}

	AZURE_STORAGE_ACCOUNT: ${{ needs.e2e-aks-setup.outputs.azure_storage_account }}
	# AZURE_STORAGE_KEY: this one is gathered during a subsequent step

	DEBUG: "true"
	BUILD_IMAGE: "false"
	CONTROLLER_IMG: ${{ needs.generate-jobs.outputs.image }}
	E2E_DEFAULT_STORAGE_CLASS: rook-ceph-block
	E2E_CSI_STORAGE_CLASS: rook-ceph-block
	E2E_DEFAULT_VOLUMESNAPSHOT_CLASS: csi-rbdplugin-snapclass
	TEST_CLOUD_VENDOR: "aks"

	steps:
	-
	name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ needs.evaluate_options.outputs.git_ref }}
	-
	name: Install Go
	uses: actions/setup-go@v5
	with:
	go-version: ${{ env.GOLANG_VERSION }}
	check-latest: true
	-
	name: Prepare the environment
	uses: nick-fields/retry@v3
	with:
	timeout_seconds: 300
	max_attempts: 3
	command: \|
	sudo apt-get update
	sudo apt-get install -y gettext-base
	-
	name: Install ginkgo
	uses: nick-fields/retry@v3
	with:
	timeout_minutes: 1
	max_attempts: 3
	command: \|
	go install github.com/onsi/ginkgo/v2/ginkgo
	-
	## In case hack/setup-cluster.sh need pull operand image from registry
	name: Login into docker registry
	uses: docker/login-action@v3
	with:
	registry: ${{ env.REGISTRY }}
	username: ${{ env.REGISTRY_USER }}
	password: ${{ env.REGISTRY_PASSWORD }}
	-
	name: Azure Login
	uses: azure/[email protected]
	with:
	creds: ${{ secrets.AZURE_CREDENTIALS }}
	-
	name: Install kubectl
	uses: azure/setup-kubectl@v4
	with:
	version: v${{ env.K8S_VERSION }}
	-
	name: Create AKS cluster
	uses: nick-fields/retry@v3
	with:
	timeout_minutes: 10
	max_attempts: 3
	command: \|
	az extension add --allow-preview true --name aks-preview
	az account set --subscription ${{ secrets.AZURE_SUBSCRIPTION }}

	# name of the AKS cluster
	AZURE_AKS="${{ secrets.AZURE_RESOURCENAME }}-${{ github.run_number }}-$( echo ${{ matrix.id }} \| tr -d '_.-' )"
	echo "AZURE_AKS=${AZURE_AKS}" >> $GITHUB_ENV

	# gather the storage account Key
	AZURE_STORAGE_KEY=$(az storage account keys list -g "${{ secrets.AZURE_RESOURCEGROUP }}" -n "${{ env.AZURE_STORAGE_ACCOUNT }}" --query "[0].value" -o tsv)
	echo "::add-mask::$AZURE_STORAGE_KEY"
	echo "AZURE_STORAGE_KEY=${AZURE_STORAGE_KEY}" >> $GITHUB_ENV

	# name of the cluster's blob container in the storage account
	AZURE_BLOB_CONTAINER="$( echo ${{ matrix.id }} \| tr -d '_.-' \| tr '[:upper:]' '[:lower:]' )"
	echo "AZURE_BLOB_CONTAINER=${AZURE_BLOB_CONTAINER}" >> $GITHUB_ENV

	# create and login to the AKS cluster
	az aks create --resource-group ${{ secrets.AZURE_RESOURCEGROUP }} \
	--name ${AZURE_AKS} \
	--tier standard \
	--node-count 3 -k v${K8S_VERSION} --generate-ssh-keys --enable-addons monitoring \
	--workspace-resource-id ${{ secrets.AZURE_WORKSPACE_RESOURCE_ID }} \
	--aks-custom-headers EnableAzureDiskFileCSIDriver=true
	az aks get-credentials --resource-group ${{ secrets.AZURE_RESOURCEGROUP }} \
	--name ${AZURE_AKS}

	# create diagnostic settings for monitoring kube-apiserver logs
	AKS_CLUSTER_RESOURCE_ID=$(az aks show --resource-group ${{ secrets.AZURE_RESOURCEGROUP }} --name ${AZURE_AKS} --query id -o tsv --only-show-errors)
	az monitor diagnostic-settings create \
	--resource-group ${{ secrets.AZURE_RESOURCEGROUP }} \
	--resource ${AKS_CLUSTER_RESOURCE_ID} \
	--name diagnostic-kube-apiserver-logs \
	--workspace ${{ secrets.AZURE_WORKSPACE_RESOURCE_ID }} \
	--logs '[ { "category": "kube-apiserver", "enabled": true } ]'
	-
	# Azure is slow in provisioning disks, and we can't wait two minutes
	# every time we create a pod, otherwise all the tests will time out.
	# We set up a few large disks now, we run Rook on top of them and we
	# use rook to get the small PV we use in the tests.
	# It can still take a while to deploy rook.
	name: Set up Rook
	uses: nick-fields/retry@v3
	with:
	timeout_minutes: 27
	max_attempts: 1
	command: \|
	STORAGECLASSNAME=default
	go install github.com/mikefarah/yq/v4@v4
	ROOK_BASE_URL=https://raw.githubusercontent.com/rook/rook/${{ env.ROOK_VERSION }}/deploy/examples
	kubectl apply -f ${ROOK_BASE_URL}/crds.yaml
	kubectl apply -f ${ROOK_BASE_URL}/common.yaml
	kubectl apply -f ${ROOK_BASE_URL}/operator.yaml
	curl ${ROOK_BASE_URL}/cluster-on-pvc.yaml \| \
	sed '/^ #/d;/^ $/d' \| \
	yq e ".spec.storage.storageClassDeviceSets[].volumeClaimTemplates[].spec.resources.requests.storage = \"50Gi\" \|
	.spec.storage.storageClassDeviceSets[].volumeClaimTemplates[].spec.storageClassName = \"${STORAGECLASSNAME}\" \|
	.spec.mon.volumeClaimTemplate.spec.storageClassName = \"${STORAGECLASSNAME}\" " - \| \
	kubectl apply -f -
	while true; do
	output=$( kubectl get deploy -n rook-ceph -l app=rook-ceph-osd --no-headers -o name )
	if [[ $(wc -w <<< $output) == 3 ]]; then
	break
	fi
	done
	echo "Waiting for Rook OSDs to be available"
	kubectl wait deploy -n rook-ceph --for condition=available --timeout 480s -l app=rook-ceph-osd
	kubectl apply -f ${ROOK_BASE_URL}/csi/rbd/storageclass.yaml
	kubectl apply -f ${ROOK_BASE_URL}/csi/rbd/snapshotclass.yaml
	kubectl annotate storageclass ${{env.E2E_DEFAULT_STORAGE_CLASS}} storage.kubernetes.io/default-snapshot-class=${{env.E2E_DEFAULT_VOLUMESNAPSHOT_CLASS}} --overwrite
	-
	name: Prepare patch for customization
	env:
	## the following variable all need be set if we use env_override_customized.yaml.template
	## this is customization for aks
	LEADER_ELECTION: "true"
	LEADER_LEASE_DURATION: 15
	LEADER_RENEW_DEADLINE: 10
	LIVENESS_PROBE_THRESHOLD: 3
	LOG_LEVEL: ${{ needs.evaluate_options.outputs.log_level }}
	run: \|
	LOG_LEVEL=${LOG_LEVEL:-info}
	envsubst < hack/e2e/env_override_customized.yaml.template > config/manager/env_override.yaml
	cat config/manager/env_override.yaml
	-
	name: Run E2E tests
	run: hack/e2e/run-e2e.sh
	-
	# Summarize the failed E2E test cases if there are any
	name: Report failed E2E tests
	if: failure()
	run: \|
	set +x
	chmod +x .github/report-failed-test.sh
	./.github/report-failed-test.sh
	-
	# Create an individual artifact for each E2E test, which will be used to
	# generate E2E test summary in the follow-up job 'summarize-e2e-tests'
	name: Create individual artifact for each E2E test
	if: (always() && !cancelled())
	env:
	RUNNER: "aks"
	RUN_ID: ${{ github.run_id }}
	REPOSITORY: ${{ github.repository }}
	GIT_REF: ${{ needs.evaluate_options.outputs.git_ref }}
	run: \|
	set +x
	python .github/generate-test-artifacts.py \
	-o testartifacts-${{ env.MATRIX }} \
	-f tests/e2e/out/report.json \
	--environment=true
	if [ -f tests/e2e/out/upgrade_report.json ]; then
	python .github/generate-test-artifacts.py \
	-o testartifacts-${{ env.MATRIX }} \
	-f tests/e2e/out/upgrade_report.json \
	--environment=true
	fi
	-
	name: Archive test artifacts
	if: (always() && !cancelled())
	uses: actions/upload-artifact@v4
	with:
	name: testartifacts-${{ env.MATRIX }}
	path: testartifacts-${{ env.MATRIX }}/
	retention-days: 7
	-
	name: Cleanup test artifacts
	if: always()
	run:
	rm -rf testartifacts-${{ env.MATRIX }}/
	-
	name: Cleanup ginkgo JSON report
	# Delete report.json after the analysis. File should always exist.
	# Delete upgrade_report.json. It may not exist depending on test level.
	if: always()
	run: \|
	if [ -f tests/e2e/out/upgrade_report.json ]; then
	rm tests/e2e/out/upgrade_report.json
	fi
	if [ -f tests/e2e/out/report.json ]; then
	rm tests/e2e/out/report.json
	fi
	-
	name: Archive e2e failure contexts
	if: failure()
	uses: actions/upload-artifact@v4
	with:
	name: test-failure-contexts-${{ matrix.id }}
	path: \|
	tests/*/out/
	retention-days: 7
	if-no-files-found: ignore
	-
	name: Archive e2e logs
	if: failure()
	uses: actions/upload-artifact@v4
	with:
	name: cluster-logs-${{ matrix.id }}
	path: \|
	tests/e2e/cluster_logs/**
	retention-days: 7
	if-no-files-found: ignore
	-
	name: Clean up
	if: always()
	run: \|
	set +e
	az extension add --allow-preview true --name monitor-control-service
	az account set --subscription ${{ secrets.AZURE_SUBSCRIPTION }}
	attempt=1
	max_attempts=3
	while [ "${attempt}" -le "${max_attempts}" ]; do
	echo "Deleting cluster. Attempt ${attempt} of ${max_attempts}"
	az aks delete --resource-group ${{ secrets.AZURE_RESOURCEGROUP }} -y --name ${{ env.AZURE_AKS }}
	status=$?
	if [[ $status == 0 ]]; then
	echo "AKS cluster deleted"
	break
	fi
	echo "Failed deleting cluster ${{ env.AZURE_AKS }}, retrying"
	sleep 5
	attempt=$((attempt+1))
	done
	attempt=1
	AZURE_RESOURCEGROUP_LOCATION="$( az group show --resource-group ${{ secrets.AZURE_RESOURCEGROUP }} --query location -o tsv --only-show-errors )"
	DATA_COLL_RULE_NAME="MSCI-${AZURE_RESOURCEGROUP_LOCATION}-${{ env.AZURE_AKS }}"
	while [ "${attempt}" -le "${max_attempts}" ]; do
	echo "Deleting data-collection rule ${DATA_COLL_RULE_NAME}. Attempt ${attempt} of ${max_attempts}"
	az monitor data-collection rule show --name ${DATA_COLL_RULE_NAME} --resource-group ${{ secrets.AZURE_RESOURCEGROUP }} --query name
	# if not found, let it go
	status=$?
	if [[ $status != 0 ]]; then
	echo "AKS data-collection rule not found"
	break
	fi
	az monitor data-collection rule delete -y --name ${DATA_COLL_RULE_NAME} --resource-group ${{ secrets.AZURE_RESOURCEGROUP }}
	status=$?
	if [[ $status == 0 ]]; then
	echo "AKS data-collection rule deleted"
	break
	fi
	echo "Failed deleting data-collection rule ${DATA_COLL_RULE_NAME}, retrying"
	sleep 5
	attempt=$((attempt+1))
	done

	e2e-aks-teardown:
	name: Teardown Microsoft AKS shared resources
	if: \|
	always() &&
	vars.AKS_ENABLED == 'true' &&
	needs.generate-jobs.outputs.aksEnabled == 'true' &&
	needs.generate-jobs.result == 'success' &&
	needs.e2e-aks-setup.result == 'success'
	needs:
	- buildx
	- generate-jobs
	- e2e-aks-setup
	- e2e-aks
	runs-on: ubuntu-24.04
	env:
	AZURE_STORAGE_ACCOUNT: ${{ needs.e2e-aks-setup.outputs.azure_storage_account }}
	steps:
	-
	name: Azure Login
	if: always()
	uses: azure/[email protected]
	with:
	creds: ${{ secrets.AZURE_CREDENTIALS }}
	-
	name: Teardown AKS shared resources
	if: always()
	uses: nick-fields/retry@v3
	with:
	timeout_minutes: 5
	max_attempts: 3
	command: \|
	az account set --subscription ${{ secrets.AZURE_SUBSCRIPTION }}
	az storage account delete -y --resource-group ${{ secrets.AZURE_RESOURCEGROUP }} --name ${{ env.AZURE_STORAGE_ACCOUNT }}

	# EKS Secrets required
	# secrets.AWS_EKS_ADMIN_IAM_ROLES
	# secrets.AWS_ACCESS_KEY_ID
	# secrets.AWS_SECRET_ACCESS_KEY

	e2e-eks:
	name: Run E2E on Amazon EKS
	if: \|
	(always() && !cancelled()) &&
	vars.EKS_ENABLED == 'true' &&
	needs.generate-jobs.outputs.eksEnabled == 'true' &&
	needs.generate-jobs.result == 'success'
	needs:
	- buildx
	- generate-jobs
	- evaluate_options
	strategy:
	fail-fast: false
	max-parallel: 6
	matrix: ${{ fromJSON(needs.generate-jobs.outputs.eksMatrix) }}
	runs-on: ubuntu-24.04
	env:
	# TEST_DEPTH determines the maximum test level the suite should be running
	TEST_DEPTH: ${{ needs.evaluate_options.outputs.test_level }}
	# FEATURE_TYPE, when defined, determines the subset of E2E tests that will be executed, divided by feature type
	FEATURE_TYPE: ${{ needs.evaluate_options.outputs.feature_type }}

	K8S_VERSION: "${{ matrix.k8s_version }}"
	POSTGRES_VERSION: ${{ matrix.postgres_version }}
	POSTGRES_KIND: ${{ matrix.postgres_kind }}
	MATRIX: ${{ matrix.id }}
	POSTGRES_IMG: "${{ matrix.postgres_img }}"
	# The version of operator to upgrade FROM, in the rolling upgrade E2E test
	E2E_PRE_ROLLING_UPDATE_IMG: "${{ matrix.postgres_pre_img }}"
	TEST_TIMEOUTS: ${{ needs.generate-jobs.outputs.eksTimeout }}
	BRANCH_NAME: ${{ needs.buildx.outputs.branch_name }}

	DEBUG: "true"
	BUILD_IMAGE: "false"
	CONTROLLER_IMG: ${{ needs.generate-jobs.outputs.image }}
	E2E_DEFAULT_STORAGE_CLASS: gp3
	E2E_CSI_STORAGE_CLASS: gp3
	E2E_DEFAULT_VOLUMESNAPSHOT_CLASS: ebs-csi-snapclass

	AWS_REGION: eu-central-1
	AWS_EKS_ADMIN_IAM_ROLES: ${{ secrets.AWS_EKS_ADMIN_IAM_ROLES }}
	TEST_CLOUD_VENDOR: "eks"

	steps:
	-
	name: Set cluster name
	run: \|
	echo "CLUSTER_NAME=${{ env.E2E_SUFFIX }}-test-${{ github.run_number }}-$( echo ${{ matrix.id }} \| tr -d '_.-' )" >> $GITHUB_ENV
	-
	name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ needs.evaluate_options.outputs.git_ref }}
	-
	name: Install Go
	uses: actions/setup-go@v5
	with:
	go-version: ${{ env.GOLANG_VERSION }}
	check-latest: true
	-
	## In case hack/setup-cluster.sh need pull operand image from registry
	name: Login into docker registry
	uses: docker/login-action@v3
	with:
	registry: ${{ env.REGISTRY }}
	username: ${{ env.REGISTRY_USER }}
	password: ${{ env.REGISTRY_PASSWORD }}
	-
	name: Prepare the environment
	uses: nick-fields/retry@v3
	with:
	timeout_seconds: 300
	max_attempts: 3
	command: \|
	sudo apt-get update
	sudo apt-get install -y gettext-base
	-
	name: Install ginkgo
	uses: nick-fields/retry@v3
	with:
	timeout_minutes: 1
	max_attempts: 3
	command: \|
	go install github.com/onsi/ginkgo/v2/ginkgo
	-
	name: Configure AWS credentials
	uses: aws-actions/configure-aws-credentials@v4
	with:
	aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
	aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
	aws-region: ${{ env.AWS_REGION }}
	-
	name: Install eksctl
	uses: nick-fields/retry@v3
	with:
	timeout_minutes: 1
	max_attempts: 3
	command: \|
	mkdir -p "$HOME/.local/bin"
	curl -sL "https://github.com/weaveworks/eksctl/releases/latest/download/eksctl_$(uname -s)_amd64.tar.gz" \
	\| tar xz -C $HOME/.local/bin
	echo "$HOME/.local/bin" >> $GITHUB_PATH
	-
	name: Configure EKS setup
	run: \|
	envsubst < hack/e2e/eks-cluster.yaml.template > hack/e2e/eks-cluster.yaml
	-
	name: Setup EKS
	run: \|
	# Setting up EKS cluster
	echo "create cluster"
	eksctl create cluster -f hack/e2e/eks-cluster.yaml

	# Create iamidentitymapping
	echo "$AWS_EKS_ADMIN_IAM_ROLES" \| while read role
	do
	# Masking variables to hide values
	echo "::add-mask::$role"
	eksctl create iamidentitymapping --cluster "${CLUSTER_NAME}" --region="${AWS_REGION}" --arn "${role}" --group system:masters --username admin
	done

	# Updating .kubeconfig to use the correct version of client.authentication.k8s.io API
	aws eks update-kubeconfig --name ${CLUSTER_NAME} --region ${AWS_REGION}

	# Installing CRD for support volumeSnapshot
	SNAPSHOTTER_BASE_URL=https://raw.githubusercontent.com/kubernetes-csi/external-snapshotter/${{env.EXTERNAL_SNAPSHOTTER_VERSION}}
	kubectl apply -f ${SNAPSHOTTER_BASE_URL}/client/config/crd/snapshot.storage.k8s.io_volumesnapshotclasses.yaml
	kubectl apply -f ${SNAPSHOTTER_BASE_URL}/client/config/crd/snapshot.storage.k8s.io_volumesnapshotcontents.yaml
	kubectl apply -f ${SNAPSHOTTER_BASE_URL}/client/config/crd/snapshot.storage.k8s.io_volumesnapshots.yaml

	## Controller
	kubectl apply -f ${SNAPSHOTTER_BASE_URL}/deploy/kubernetes/snapshot-controller/rbac-snapshot-controller.yaml
	kubectl apply -f ${SNAPSHOTTER_BASE_URL}/deploy/kubernetes/snapshot-controller/setup-snapshot-controller.yaml

	# Install volume snapshot class
	kubectl apply -f hack/e2e/volumesnapshotclass-ebs-csi.yaml
	kubectl get volumesnapshotclass

	# Change to use gp3 as default storage account
	kubectl annotate storageclass gp2 storageclass.kubernetes.io/is-default-class=false --overwrite
	kubectl apply -f hack/e2e/storage-class-gp3.yaml
	kubectl annotate storageclass ${{env.E2E_DEFAULT_STORAGE_CLASS}} storage.kubernetes.io/default-snapshot-class=${{env.E2E_DEFAULT_VOLUMESNAPSHOT_CLASS}} --overwrite
	kubectl get storageclass
	-
	name: Setup Velero
	uses: nick-fields/retry@v3
	env:
	VELERO_VERSION: "v1.15.0"
	VELERO_AWS_PLUGIN_VERSION: "v1.11.0"
	with:
	timeout_minutes: 10
	max_attempts: 3
	on_retry_command: \|
	# Clean up buckets
	output=$( aws s3api delete-bucket --bucket "${VELERO_BUCKET_NAME}" --region "${AWS_REGION}" 2>&1 )
	status=$?
	if [[ $status == 0 ]]; then
	echo "S3 Bucket deleted"
	break
	fi
	if ( grep "NoSuchBucket" <<< "$output" ); then
	echo "S3 Bucket doesn't exist, nothing to remove"
	break
	fi

	# Uninstall Velero
	kubectl delete namespace/velero clusterrolebinding/velero
	kubectl delete crds -l component=velero
	command: \|
	VELERO_BUCKET_NAME="${CLUSTER_NAME,,}-velero"
	echo "VELERO_BUCKET_NAME=${VELERO_BUCKET_NAME}" >> $GITHUB_ENV

	# Create S3 bucket
	aws s3api create-bucket \
	--bucket "${VELERO_BUCKET_NAME}" \
	--region "${AWS_REGION}" \
	--create-bucket-configuration LocationConstraint="${AWS_REGION}"

	# Download Velero, extract and place it in $PATH
	curl -sL "https://github.com/vmware-tanzu/velero/releases/download/${VELERO_VERSION}/velero-${VELERO_VERSION}-linux-amd64.tar.gz" \| tar xz
	mv velero-${VELERO_VERSION}-linux-amd64/velero $HOME/.local/bin

	# Set Velero-specific credentials
	echo -e "[default]\naws_access_key_id=${{ secrets.AWS_ACCESS_KEY_ID }}\naws_secret_access_key=${{ secrets.AWS_SECRET_ACCESS_KEY }}" >> credentials-velero

	# Install Velero
	velero install \
	--provider aws \
	--plugins velero/velero-plugin-for-aws:${VELERO_AWS_PLUGIN_VERSION} \
	--bucket "${VELERO_BUCKET_NAME}" \
	--backup-location-config region="${AWS_REGION}" \
	--snapshot-location-config region="${AWS_REGION}" \
	--secret-file ./credentials-velero \
	--wait
	-
	name: Prepare patch for customization
	env:
	## the following variable all need be set if we use env_override_customized.yaml.template
	## this is customization for eks
	LEADER_ELECTION: "true"
	LEADER_LEASE_DURATION: 15
	LEADER_RENEW_DEADLINE: 10
	LIVENESS_PROBE_THRESHOLD: 3
	LOG_LEVEL: ${{ needs.evaluate_options.outputs.log_level }}
	run: \|
	LOG_LEVEL=${LOG_LEVEL:-info}
	envsubst < hack/e2e/env_override_customized.yaml.template > config/manager/env_override.yaml
	cat config/manager/env_override.yaml
	-
	name: Run E2E tests
	run: hack/e2e/run-e2e.sh
	-
	# Summarize the failed E2E test cases if there are any
	name: Report failed E2E tests
	if: failure()
	run: \|
	set +x
	chmod +x .github/report-failed-test.sh
	./.github/report-failed-test.sh
	-
	# Create an individual artifact for each E2E test, which will be used to
	# generate E2E test summary in the follow-up job 'summarize-e2e-tests'
	name: Create individual artifact for each E2E test
	if: (always() && !cancelled())
	env:
	RUNNER: "eks"
	RUN_ID: ${{ github.run_id }}
	REPOSITORY: ${{ github.repository }}
	GIT_REF: ${{ needs.evaluate_options.outputs.git_ref }}
	run: \|
	set +x
	python .github/generate-test-artifacts.py \
	-o testartifacts-${{ env.MATRIX }} \
	-f tests/e2e/out/report.json \
	--environment=true
	if [ -f tests/e2e/out/upgrade_report.json ]; then
	python .github/generate-test-artifacts.py \
	-o testartifacts-${{ env.MATRIX }} \
	-f tests/e2e/out/upgrade_report.json \
	--environment=true
	fi
	-
	name: Archive test artifacts
	if: (always() && !cancelled())
	uses: actions/upload-artifact@v4
	with:
	name: testartifacts-${{ env.MATRIX }}
	path: testartifacts-${{ env.MATRIX }}/
	retention-days: 7
	-
	name: Cleanup test artifacts
	if: always()
	run:
	rm -rf testartifacts-${{ env.MATRIX }}/
	-
	name: Cleanup ginkgo JSON report
	# Delete report.json after the analysis. File should always exist.
	# Delete upgrade_report.json. It may not exist depending on test level.
	if: always()
	run: \|
	if [ -f tests/e2e/out/upgrade_report.json ]; then
	rm tests/e2e/out/upgrade_report.json
	fi
	if [ -f tests/e2e/out/report.json ]; then
	rm tests/e2e/out/report.json
	fi
	-
	name: Archive e2e failure contexts
	if: failure()
	uses: actions/upload-artifact@v4
	with:
	name: test-failure-contexts-${{ matrix.id }}
	path: \|
	tests/*/out/
	retention-days: 7
	if-no-files-found: ignore
	-
	name: Archive e2e logs
	if: failure()
	uses: actions/upload-artifact@v4
	with:
	name: cluster-logs-${{ matrix.id }}
	path: \|
	tests/e2e/cluster_logs/**
	retention-days: 7
	if-no-files-found: ignore
	-
	name: Clean up
	if: always()
	run: \|
	set +e
	CLUSTER_NAME="${{ env.CLUSTER_NAME }}"
	REGION_NAME="${{ env.AWS_REGION }}"
	STACK_NAME="eksctl-${CLUSTER_NAME}-cluster"
	CLOUDFORMATION_STATUS_CURRENT=$(aws cloudformation describe-stacks --stack-name "${STACK_NAME}" \| jq -r '.Stacks[].StackStatus')
	if [[ -z "${CLOUDFORMATION_STATUS_CURRENT}" ]]; then
	echo "CloudFormation stack not found. Nothing to cleanup."
	exit 0
	fi

	# Attempt to remove any leftover PDB (and Cluster that would recreate it)
	# that could prevent the EKS cluster deletion
	kubectl delete cluster --all --all-namespaces --now --timeout=30s \|\| true
	kubectl delete pdb --all --all-namespaces --now --timeout=30s \|\| true
	kubectl delete pvc --all --all-namespaces --now --timeout=30s \|\| true
	# Remove any LoadBalancer service
	kubectl get service --all-namespaces -o json \| jq -r '.items[] \| select(.spec.type=="LoadBalancer") \| .metadata \| "kubectl delete service --now --timeout=30s -n " + .namespace + " " + .name' \| xargs -rI X bash -c X \|\| true

	NODEGROUP_STACK_NAMES=$(eksctl get nodegroup --cluster "${CLUSTER_NAME}" -o json \| jq -r '.[].StackName' \|\| true)

	attempt=1
	bucket_attempt=1
	max_attempts=3

	# Attempting three times to remove the Velero S3 bucket
	VELERO_BUCKET_NAME=${VELERO_BUCKET_NAME:-"${CLUSTER_NAME,,}-velero"}
	while [ "${bucket_attempt}" -le "${max_attempts}" ]; do
	echo "Deleting S3 Bucket. Attempt ${bucket_attempt} of ${max_attempts}"
	output=$( aws s3api delete-bucket --bucket "${VELERO_BUCKET_NAME}" --region "${AWS_REGION}" 2>&1 )
	status=$?
	if [[ $status == 0 ]]; then
	echo "S3 Bucket deleted"
	break
	fi
	if ( grep "NoSuchBucket" <<< "$output" ); then
	echo "S3 Bucket doesn't exist, nothing to remove"
	break
	fi
	echo "Failed deleting S3 Bucket ${VELERO_BUCKET_NAME}, retrying"
	sleep 5
	bucket_attempt=$((bucket_attempt+1))
	done

	# Attempting three times to cleanly remove the cluster via eksctl
	while [ "${attempt}" -le "${max_attempts}" ]; do
	echo "Deleting cluster. Attempt ${attempt} of ${max_attempts}"
	output=$( eksctl delete cluster -n "${CLUSTER_NAME}" -r "${REGION_NAME}" --wait --force 2>&1 )
	status=$?
	if [[ $status == 0 ]]; then
	echo "EKS cluster deleted"
	break
	fi
	if ( grep "ResourceNotFoundException: No cluster found for name: ${CLUSTER_NAME}" <<< "$output" ); then
	echo "EKS cluster doesn't exist, nothing to remove"
	break
	fi
	echo "Failed deleting cluster ${CLUSTER_NAME}, retrying"
	sleep 5
	attempt=$((attempt+1))
	done

	# Recheck if something got stuck, and use harder methods to clean up
	CLOUDFORMATION_STATUS_CURRENT=$(aws cloudformation describe-stacks --stack-name "${STACK_NAME}" \| jq -r '.Stacks[].StackStatus')
	if [ -n "${CLOUDFORMATION_STATUS_CURRENT}" ] ; then
	echo "::warning file=continuous-delivery.yml::eksctl failed deleting a cluster cleanly"
	# When the status of CloudFormation stack managed by eksctl reports an error, try to delete resources directly with AWS CLI
	pip install boto3
	for vpc_id in $(aws ec2 describe-vpcs \| jq -r '.Vpcs[] \| select(.Tags?[]? \| .Key == "Name" and (.Value \| contains("'"${STACK_NAME}"'"))).VpcId'); do
	python .github/vpc_destroy.py --vpc_id "${vpc_id}" --region "${REGION_NAME}" --services ec2
	done
	# Then we try to delete the cluster cleanly and the cloudformation
	if aws eks describe-cluster --name "${CLUSTER_NAME}" --region "${REGION_NAME}" ; then
	eksctl delete cluster -n "${CLUSTER_NAME}" -r "${REGION_NAME}" --wait --force
	fi
	if [ -n "${NODEGROUP_STACK_NAMES}" ] ; then
	for NODEGROUP_STACK_NAME in ${NODEGROUP_STACK_NAMES}; do
	if aws cloudformation describe-stacks --stack-name "${NODEGROUP_STACK_NAME}" --region "${REGION_NAME}" ; then
	aws cloudformation delete-stack --stack-name "${NODEGROUP_STACK_NAME}" --region "${REGION_NAME}"
	fi
	done
	fi
	if aws cloudformation describe-stacks --stack-name "${STACK_NAME}" --region "${REGION_NAME}" ; then
	aws cloudformation delete-stack --stack-name "${STACK_NAME}" --region "${REGION_NAME}"
	fi
	fi

	# Clear up leftover volumes
	while read -r volume; do
	echo "Deleting $volume of cluster $CLUSTER_NAME ..."
	if ! aws ec2 delete-volume --region "${REGION_NAME}" --volume-id "$volume" ; then
	echo "::warning file=continuous-delivery.yml::Failed deleting $volume of cluster $CLUSTER_NAME"
	fi
	done < <(aws ec2 describe-volumes --region "${REGION_NAME}" --query 'Volumes[?not_null(Tags[?Key == `kubernetes.io/cluster/'"$CLUSTER_NAME"'` && Value == `owned`].Value)].VolumeId' \| jq -r '.[]' \|\| true)

	# GKE Secrets required
	# secrets.GCP_SERVICE_ACCOUNT
	# secrets.GCP_PROJECT_ID

	e2e-gke:
	name: Run E2E on Google GKE
	if: \|
	(always() && !cancelled()) &&
	vars.GKE_ENABLED == 'true' &&
	needs.generate-jobs.outputs.gkeEnabled == 'true' &&
	needs.generate-jobs.result == 'success'
	needs:
	- buildx
	- generate-jobs
	- evaluate_options
	strategy:
	fail-fast: false
	max-parallel: 6
	matrix: ${{ fromJSON(needs.generate-jobs.outputs.gkeMatrix) }}
	runs-on: ubuntu-24.04
	env:
	# TEST_DEPTH determines the maximum test level the suite should be running
	TEST_DEPTH: ${{ needs.evaluate_options.outputs.test_level }}
	# FEATURE_TYPE, when defined, determines the subset of E2E tests that will be executed, divided by feature type
	FEATURE_TYPE: ${{ needs.evaluate_options.outputs.feature_type }}

	K8S_VERSION: "${{ matrix.k8s_version }}"
	POSTGRES_VERSION: ${{ matrix.postgres_version }}
	POSTGRES_KIND: ${{ matrix.postgres_kind }}
	MATRIX: ${{ matrix.id }}
	POSTGRES_IMG: "${{ matrix.postgres_img }}"
	# The version of operator to upgrade FROM, in the rolling upgrade E2E test
	E2E_PRE_ROLLING_UPDATE_IMG: "${{ matrix.postgres_pre_img }}"
	TEST_TIMEOUTS: ${{ needs.generate-jobs.outputs.gkeTimeout }}
	BRANCH_NAME: ${{ needs.buildx.outputs.branch_name }}

	DEBUG: "true"
	BUILD_IMAGE: "false"
	CONTROLLER_IMG: ${{ needs.generate-jobs.outputs.image }}
	E2E_DEFAULT_STORAGE_CLASS: standard-rwo
	E2E_CSI_STORAGE_CLASS: standard-rwo
	E2E_DEFAULT_VOLUMESNAPSHOT_CLASS: pd-csi-snapclass

	REGION: europe-west3
	TEST_CLOUD_VENDOR: "gke"

	steps:
	-
	name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ needs.evaluate_options.outputs.git_ref }}
	-
	name: Install Go
	uses: actions/setup-go@v5
	with:
	go-version: ${{ env.GOLANG_VERSION }}
	check-latest: true
	-
	## In case hack/setup-cluster.sh need pull operand image from registry
	name: Login into docker registry
	uses: docker/login-action@v3
	with:
	registry: ${{ env.REGISTRY }}
	username: ${{ env.REGISTRY_USER }}
	password: ${{ env.REGISTRY_PASSWORD }}
	-
	name: Prepare the environment
	uses: nick-fields/retry@v3
	with:
	timeout_seconds: 300
	max_attempts: 3
	command: \|
	sudo apt-get update
	sudo apt-get install -y gettext-base
	-
	name: Install ginkgo
	uses: nick-fields/retry@v3
	with:
	timeout_seconds: 120
	max_attempts: 3
	command: \|
	go install github.com/onsi/ginkgo/v2/ginkgo
	-
	name: Set cluster name
	run: \|
	# GKE cluster names rules:
	# only lowercase alphanumerics and '-' allowed, must start with a letter and end with an alphanumeric,
	# and must be no longer than 40 characters
	# We need to shorten the name and lower the case
	SHORT_ID=$( echo ${{ matrix.id }} \| tr -d '_.-' \| tr '[:upper:]' '[:lower:]')
	echo "CLUSTER_NAME=${{ env.E2E_SUFFIX }}-test-${{ github.run_number }}-${SHORT_ID}" >> $GITHUB_ENV
	-
	name: Authenticate to Google Cloud
	id: 'auth'
	uses: google-github-actions/auth@v2
	with:
	credentials_json: '${{ secrets.GCP_SERVICE_ACCOUNT }}'
	-
	name: Set up Cloud SDK and kubectl
	uses: google-github-actions/setup-gcloud@v2
	with:
	project_id: ${{ secrets.GCP_PROJECT_ID }}
	install_components: 'kubectl,gke-gcloud-auth-plugin'
	-
	name: Create GKE cluster
	run: \|
	set +e
	# We may go over the amount of API requests allowed
	# by Google when creating all the clusters at the same time.
	# We give a few attempts at creating the cluster before giving up.
	# The following command will create a 3 nodes cluster, with each
	# node deployed in its own availability zone.
	for i in `seq 1 5`; do
	if gcloud container clusters create ${{ env.CLUSTER_NAME }} \
	--num-nodes=1 \
	--cluster-version=${{ env.K8S_VERSION }} \
	--region=${{ env.REGION }} \
	--disk-size=20 \
	--machine-type=e2-standard-2 \
	--labels=cluster=${{ env.CLUSTER_NAME }}
	then
	exit 0
	fi
	echo "Couldn't create the cluster. Retrying in 100s."
	sleep 100
	done
	echo "Couldn't create the cluster. Failing."
	exit 1
	-
	name: Get GKE kubeconfig credentials
	env:
	USE_GKE_GCLOUD_AUTH_PLUGIN: "True"
	run: \|
	gcloud container clusters get-credentials ${{ env.CLUSTER_NAME }} --region ${{ env.REGION }} --project ${{ secrets.GCP_PROJECT_ID }}
	-
	name: Configure Storage
	run: \|
	# Install volume snapshot class
	kubectl apply -f hack/e2e/volumesnapshotclass-pd-csi.yaml
	# Change to use standard-rwo as default storage account
	kubectl annotate storageclass ${{env.E2E_DEFAULT_STORAGE_CLASS}} storage.kubernetes.io/default-snapshot-class=${{env.E2E_DEFAULT_VOLUMESNAPSHOT_CLASS}} --overwrite
	kubectl get storageclass
	-
	name: Prepare patch for customization
	env:
	## the following variable all need be set if we use env_override_customized.yaml.template
	## this is customization for gke
	LEADER_ELECTION: "false"
	LEADER_LEASE_DURATION: 240
	LEADER_RENEW_DEADLINE: 230
	LIVENESS_PROBE_THRESHOLD: 9
	LOG_LEVEL: ${{ needs.evaluate_options.outputs.log_level }}
	run: \|
	LOG_LEVEL=${LOG_LEVEL:-info}
	envsubst < hack/e2e/env_override_customized.yaml.template > config/manager/env_override.yaml
	cat config/manager/env_override.yaml
	-
	name: Run E2E tests
	run: hack/e2e/run-e2e.sh
	-
	name: Report failed E2E tests
	if: failure()
	run: \|
	set +x
	chmod +x .github/report-failed-test.sh
	./.github/report-failed-test.sh
	-
	# Create an individual artifact for each E2E test, which will be used to
	# generate E2E test summary in the follow-up job 'summarize-e2e-tests'
	name: Create individual artifact for each E2E test
	if: (always() && !cancelled())
	env:
	RUNNER: "gke"
	RUN_ID: ${{ github.run_id }}
	REPOSITORY: ${{ github.repository }}
	GIT_REF: ${{ needs.evaluate_options.outputs.git_ref }}
	run: \|
	set +x
	python .github/generate-test-artifacts.py \
	-o testartifacts-${{ env.MATRIX }} \
	-f tests/e2e/out/report.json \
	--environment=true
	if [ -f tests/e2e/out/upgrade_report.json ]; then
	python .github/generate-test-artifacts.py \
	-o testartifacts-${{ env.MATRIX }} \
	-f tests/e2e/out/upgrade_report.json \
	--environment=true
	fi
	-
	name: Archive test artifacts
	if: (always() && !cancelled())
	uses: actions/upload-artifact@v4
	with:
	name: testartifacts-${{ env.MATRIX }}
	path: testartifacts-${{ env.MATRIX }}/
	retention-days: 7
	-
	name: Cleanup test artifacts
	if: always()
	run:
	rm -rf testartifacts-${{ env.MATRIX }}/
	-
	name: Cleanup ginkgo JSON report
	# Delete report.json after the analysis. File should always exist.
	# Delete upgrade_report.json. It may not exist depending on test level.
	if: always()
	run: \|
	if [ -f tests/e2e/out/upgrade_report.json ]; then
	rm tests/e2e/out/upgrade_report.json
	fi
	if [ -f tests/e2e/out/report.json ]; then
	rm tests/e2e/out/report.json
	fi
	-
	name: Archive e2e failure contexts
	if: failure()
	uses: actions/upload-artifact@v4
	with:
	name: test-failure-contexts-${{ matrix.id }}
	path: \|
	tests/*/out/
	retention-days: 7
	if-no-files-found: ignore
	-
	name: Archive e2e logs
	if: failure()
	uses: actions/upload-artifact@v4
	with:
	name: cluster-logs-${{ matrix.id }}
	path: \|
	tests/e2e/cluster_logs/**
	retention-days: 7
	if-no-files-found: ignore
	-
	name: Clean up
	if: always()
	run: \|
	set +e
	# Attempt to remove any leftover resource
	kubectl delete cluster --all --all-namespaces --now --timeout=30s \|\| true
	kubectl delete pdb --all --all-namespaces --now --timeout=30s \|\| true
	kubectl delete pvc --all --all-namespaces --now --timeout=30s \|\| true

	# Wait until all the PVs provisioned are actually reclaimed
	kubectl wait --for delete --all pv --timeout=60s \|\| true

	attempt=1
	max_attempts=3
	while [ "${attempt}" -le "${max_attempts}" ]; do
	gcloud container clusters delete ${{ env.CLUSTER_NAME }} --region=${{ env.REGION }} --quiet
	status=$?
	if [[ $status == 0 ]]; then
	echo "GKS cluster ${{ env.CLUSTER_NAME }} deleted from region ${{ env.REGION }}"
	break
	fi
	echo "Failed deleting cluster ${{ env.CLUSTER_NAME }} from region ${{ env.REGION }}, retrying"
	sleep 5
	attempt=$((attempt+1))
	done

	# The node's disks are not automatically deleted when the cluster is removed.
	# We delete all the disks tagged with the name of the cluster that are not
	# owned by anyone.

	attempt=1
	max_attempts=3
	while [ "${attempt}" -le "${max_attempts}" ]; do
	IDS=$(gcloud compute disks list --filter="labels.cluster=${{ env.CLUSTER_NAME }} AND region:${{ env.REGION }} AND -users:*" --format="value(id)")
	amount="$(echo $IDS \| awk '{print NF}')"
	if [[ "$amount" == 3 ]]; then
	echo -e "Found the 3 disks to be removed:\n$IDS"
	break
	fi
	echo "Expected 3 disks to delete but found $amount, waiting and retrying"
	sleep 20
	attempt=$((attempt+1))
	done

	for ID in ${IDS}
	do
	attempt=1
	max_attempts=3
	while [ "${attempt}" -le "${max_attempts}" ]; do
	gcloud compute disks delete --region "${{ env.REGION }}" --quiet "${ID}"
	status=$?
	if [[ $status == 0 ]]; then
	echo "computer disk ${ID} deleted"
	break
	fi
	echo "Failed deleting disk ${ID} from region ${{ env.REGION }}, retrying"
	sleep 5
	attempt=$((attempt+1))
	done
	done

	# OpenShift Secrets required
	# secrets.AWS_EKS_ADMIN_IAM_ROLES
	# secrets.AWS_ACCESS_KEY_ID
	# secrets.AWS_SECRET_ACCESS_KEY

	e2e-openshift:
	name: Run E2E on OpenShift
	if: \|
	always() && !cancelled() &&
	vars.OPENSHIFT_ENABLED == 'true' &&
	needs.generate-jobs.outputs.openshiftEnabled == 'true' &&
	needs.generate-jobs.result == 'success'
	needs:
	- buildx
	- generate-jobs
	- evaluate_options
	strategy:
	fail-fast: false
	max-parallel: 6
	matrix: ${{ fromJSON(needs.generate-jobs.outputs.openshiftMatrix) }}
	runs-on: ubuntu-24.04
	env:
	# TEST_DEPTH determines the maximum test level the suite should be running
	TEST_DEPTH: ${{ needs.evaluate_options.outputs.test_level }}
	# FEATURE_TYPE, when defined, determines the subset of E2E tests that will be executed, divided by feature type
	FEATURE_TYPE: ${{ needs.evaluate_options.outputs.feature_type }}

	K8S_VERSION: "${{ matrix.k8s_version }}"
	POSTGRES_VERSION: ${{ matrix.postgres_version }}
	POSTGRES_KIND: ${{ matrix.postgres_kind }}
	MATRIX: ${{ matrix.id }}
	POSTGRES_IMG: "${{ matrix.postgres_img }}"
	# The version of operator to upgrade FROM, in the rolling upgrade E2E test
	E2E_PRE_ROLLING_UPDATE_IMG: "${{ matrix.postgres_pre_img }}"
	TEST_TIMEOUTS: ${{ needs.generate-jobs.outputs.openshiftTimeout }}
	BRANCH_NAME: ${{ needs.buildx.outputs.branch_name }}

	DEBUG: "true"
	BUILD_IMAGE: "false"
	CONTROLLER_IMG: ${{ needs.generate-jobs.outputs.image }}
	E2E_DEFAULT_STORAGE_CLASS: gp3-csi
	E2E_CSI_STORAGE_CLASS: gp3-csi
	E2E_DEFAULT_VOLUMESNAPSHOT_CLASS: csi-aws-vsc

	TEST_CLOUD_VENDOR: "ocp"

	# AWS configuration
	AWS_BASE_DOMAIN: ${{ secrets.AWS_BASE_DOMAIN }}
	AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
	AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
	AWS_REGION: eu-central-1
	AWS_EKS_ADMIN_IAM_ROLES: ${{ secrets.AWS_EKS_ADMIN_IAM_ROLES }}

	REDHAT_PULL: ${{ secrets.REDHAT_PULL }}
	SSH_PUBLIC_KEY: ${{ secrets.SSH_PUBLIC_KEY }}

	steps:
	-
	name: Set cluster name
	run: \|
	echo "CLUSTER_NAME=${{ env.E2E_SUFFIX }}-ocp-${{ github.run_number}}-$( echo ${{ matrix.k8s_version }} \| tr -d '.' )" >> $GITHUB_ENV
	-
	name: Checkout code
	uses: actions/checkout@v4
	with:
	ref: ${{ needs.evaluate_options.outputs.git_ref }}
	fetch-depth: 0
	-
	name: Install Go
	uses: actions/setup-go@v5
	with:
	go-version: ${{ env.GOLANG_VERSION }}
	check-latest: true
	-
	name: Set up QEMU
	uses: docker/setup-qemu-action@v3
	with:
	platforms: ${{ env.PLATFORMS }}
	-
	name: Set up Docker Buildx
	uses: docker/setup-buildx-action@v3
	-
	## In case hack/setup-cluster.sh need pull operand image from registry
	name: Login into docker registry
	uses: docker/login-action@v3
	with:
	registry: ${{ env.REGISTRY }}
	username: ${{ env.REGISTRY_USER }}
	password: ${{ env.REGISTRY_PASSWORD }}
	-
	name: Build and push the operator and catalog
	env:
	CONTROLLER_IMG: ${{ needs.buildx.outputs.controller_img_ubi8 }}
	BUNDLE_IMG: ${{ needs.buildx.outputs.bundle_img }}
	CATALOG_IMG: ${{ needs.buildx.outputs.catalog_img }}
	run: \|
	make olm-catalog
	-
	name: Install OC Installer and client
	uses: redhat-actions/openshift-tools-installer@v1
	with:
	source: "mirror"
	openshift-install: ${{ matrix.k8s_version }}
	oc: ${{ matrix.k8s_version }}
	-
	name: Install OpenShift Cluster ${{ matrix.k8s_version }}
	run: \|
	envsubst < hack/install-config.yaml.template > hack/install-config.yaml
	openshift-install create cluster --dir hack/ --log-level warn
	-
	name: Run E2E tests
	if: (always() && !cancelled())
	run: \|
	# Before running on OpenShift we make sure that the catalog is created
	# in the openshift-marketplace namespace
	sed -i -e 's/namespace: operators/namespace: openshift-marketplace/' cloudnative-pg-catalog.yaml
	find -type f -name "cloudnative-pg-catalog.yaml"
	cat cloudnative-pg-catalog.yaml
	KUBECONFIG=$(pwd)/hack/auth/kubeconfig bash -x hack/e2e/run-e2e-ocp.sh

	-
	# Summarize the failed E2E tests cases if there are any
	name: Report failed E2E tests
	if: failure()
	run: \|
	set +x
	chmod +x .github/report-failed-test.sh
	./.github/report-failed-test.sh
	-
	# Create an individual artifact for each E2E test, which will be used to
	# generate E2E test summary in the follow-up job 'summarize-e2e-tests'
	name: Create individual artifact for each E2E test
	if: (always() && !cancelled())
	env:
	RUNNER: "openshift"
	RUN_ID: ${{ github.run_id }}
	REPOSITORY: ${{ github.repository }}
	GIT_REF: ${{ needs.evaluate_options.outputs.git_ref }}
	run: \|
	set +x
	python .github/generate-test-artifacts.py \
	-o testartifacts-${{ env.MATRIX }} \
	-f tests/e2e/out/report.json \
	--environment=true
	if [ -f tests/e2e/out/upgrade_report.json ]; then
	python .github/generate-test-artifacts.py \
	-o testartifacts-${{ env.MATRIX }} \
	-f tests/e2e/out/upgrade_report.json \
	--environment=true
	fi
	-
	name: Archive test artifacts
	if: (always() && !cancelled())
	uses: actions/upload-artifact@v4
	with:
	name: testartifacts-${{ env.MATRIX }}
	path: testartifacts-${{ env.MATRIX }}/
	retention-days: 7
	-
	name: Cleanup test artifacts
	if: always()
	run:
	rm -rf testartifacts-${{ env.MATRIX }}/
	-
	name: Cleanup ginkgo JSON report
	# Delete report.json after the analysis. File should always exist.
	# Delete upgrade_report.json. It may not exist depending on test level.
	if: always()
	run: \|
	if [ -f tests/e2e/out/upgrade_report.json ]; then
	rm tests/e2e/out/upgrade_report.json
	fi
	if [ -f tests/e2e/out/report.json ]; then
	rm tests/e2e/out/report.json
	fi
	-
	name: Archive e2e failure contexts
	if: failure()
	uses: actions/upload-artifact@v4
	with:
	name: test-failure-contexts-${{ matrix.id }}
	path: \|
	tests/*/out/
	retention-days: 7
	if-no-files-found: ignore
	-
	name: Archive e2e logs
	if: failure()
	uses: actions/upload-artifact@v4
	with:
	name: cluster-logs-${{ matrix.id }}
	path: \|
	tests/e2e/cluster_logs/**
	retention-days: 7
	if-no-files-found: ignore
	-
	name: Destroy OpenShift Cluster ${{ matrix.k8s_version }}
	if: always()
	run: \|
	openshift-install destroy cluster --dir hack/

	# Summarize E2E test results, display in the GitHub 'summary' view
	summarize-e2e-tests:
	name: E2E test suite
	needs:
	- evaluate_options
	- e2e-local
	- e2e-eks
	- e2e-aks
	- e2e-gke
	- e2e-openshift
	if: \|
	(always() && !cancelled()) &&
	((
	needs.e2e-local.result == 'success' \|\|
	needs.e2e-local.result == 'failure'
	) \|\|
	(
	needs.e2e-eks.result == 'success' \|\|
	needs.e2e-eks.result == 'failure'
	) \|\|
	(
	needs.e2e-aks.result == 'success' \|\|
	needs.e2e-aks.result == 'failure'
	) \|\|
	(
	needs.e2e-gke.result == 'success' \|\|
	needs.e2e-gke.result == 'failure'
	) \|\|
	(
	needs.e2e-openshift.result == 'success' \|\|
	needs.e2e-openshift.result == 'failure'
	))
	runs-on: ubuntu-24.04
	steps:
	- name: Create a directory for the artifacts
	run: mkdir test-artifacts

	- name: Download all artifacts to the directory
	uses: actions/download-artifact@v4
	with:
	path: test-artifacts
	pattern: testartifacts-*

	- name: Flatten all artifacts onto directory
	# The download-artifact action, since we did not give it a name,
	# downloads all artifacts and creates a new folder for each.
	# In this step we bring all the JSONs to a single folder
	run: \|
	mkdir test-artifacts/data
	mv test-artifacts//.json test-artifacts/data \|\| true

	- name: Display the structure of the artifact folder
	run: ls -R test-artifacts/data

	- name: Compute the E2E test summary
	id: generate-summary
	uses: cloudnative-pg/[email protected]
	with:
	artifact_directory: test-artifacts/data

	- name: If there is an overflow summary, archive it
	if: steps.generate-summary.outputs.Overflow
	uses: actions/upload-artifact@v4
	with:
	name: ${{ steps.generate-summary.outputs.Overflow }}
	path: ${{ steps.generate-summary.outputs.Overflow }}
	retention-days: 7

	- name: Send the Ciclops view over Slack
	# Send the Ciclops thermometer on every scheduled run on `main`.
	# or when there are systematic failures in release branches
	uses: rtCamp/action-slack-notify@v2
	if: \|
	github.repository_owner == env.REPOSITORY_OWNER &&
	(
	github.event_name == 'schedule' \|\|
	(
	steps.generate-summary.outputs.alerts &&
	startsWith(needs.evaluate_options.outputs.git_ref, 'refs/heads/release-')
	)
	)
	env:
	# SLACK_COLOR is where we distinguish a run with/without alerts. It's where the
	# action has hooks for conditionality in the message body (yeah, weird)
	SLACK_COLOR: ${{ steps.generate-summary.outputs.alerts && 'failure' \|\| 'success' }}
	SLACK_ICON: https://avatars.githubusercontent.com/u/85171364?size=48
	SLACK_USERNAME: ${{ env.SLACK_USERNAME }}
	SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
	SLACK_TITLE: CICLOPS view for ${{ github.repository }}
	SLACK_MESSAGE_ON_SUCCESS: \|
	${{ steps.generate-summary.outputs.thermometer }}
	SLACK_MESSAGE_ON_FAILURE: \|
	${{ steps.generate-summary.outputs.thermometer }}
	:warning: Systematic failures!
	${{ steps.generate-summary.outputs.alerts }}
	SLACK_FOOTER: \|
	<${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}\|See full CI run>

	- name: Delete the downloaded files
	run: rm -rf test-artifacts

	# Adds the 'ok-to-merge' label to workflows that have run successfully and
	# have adequate test and matrix coverage.
	# This label is a prerequisite to be able to merge a PR.
	# Also see to 'require-labels.yml'
	ok-to-merge:
	name: Label the PR as "ok to merge :ok_hand:"
	needs:
	- evaluate_options
	- e2e-local
	if: \|
	always() &&
	needs.e2e-local.result == 'success' &&
	github.event_name == 'issue_comment' &&
	needs.evaluate_options.outputs.test_level == '4'
	runs-on: ubuntu-24.04
	steps:
	- name: Check preconditions
	id: get_pr_number_and_labels
	env:
	GITHUB_TOKEN: ${{ secrets.REPO_GHA_PAT }}
	run: \|
	ok_label=$(gh pr view "${{ github.event.issue.number }}" --json labels -q ".labels.[].name" 2>/dev/null \| grep "ok to merge :ok_hand:" \|\| :)
	echo "OK_LABEL=${ok_label}" >> $GITHUB_ENV

	- name: Label the PR as "ok to merge :ok_hand:"
	if: \|
	env.OK_LABEL == ''
	uses: actions-ecosystem/[email protected]
	with:
	github_token: ${{ secrets.REPO_GHA_PAT }}
	number: ${{ github.event.issue.number }}
	labels: "ok to merge :ok_hand:"

	# Remove the "ok to merge :ok_hand:" label if the E2E tests or previous steps failed
	unlabel-ok-to-merge:
	name: Remove the "ok to merge :ok_hand:" label from the PR
	needs:
	- evaluate_options
	- e2e-local
	if: \|
	always() &&
	needs.e2e-local.result == 'failure' &&
	github.event_name == 'issue_comment'
	runs-on: ubuntu-24.04
	steps:
	- name: Check preconditions
	id: get_pr_number_and_labels
	env:
	GITHUB_TOKEN: ${{ secrets.REPO_GHA_PAT }}
	run: \|
	ok_label=$(gh pr view "${{ github.event.issue.number }}" --json labels -q ".labels.[].name" 2>/dev/null \| grep "ok to merge :ok_hand:" \|\| :)
	echo "OK_LABEL=${ok_label}" >> $GITHUB_ENV

	- name: Remove "ok to merge :ok_hand:" label from PR
	if: \|
	env.OK_LABEL != ''
	uses: actions-ecosystem/[email protected]
	with:
	github_token: ${{ secrets.REPO_GHA_PAT }}
	number: ${{ github.event.issue.number }}
	labels: "ok to merge :ok_hand:"

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

continuous-delivery #4084

Workflow file

continuous-delivery #4084

Jobs

Run details

Workflow file for this run