Skip to content

Commit

Permalink
Merge branch 'main' into delete-on-allocate
Browse files Browse the repository at this point in the history
  • Loading branch information
igooch authored Dec 9, 2024
2 parents 9786b90 + dc6d4a0 commit 730ac2c
Show file tree
Hide file tree
Showing 23 changed files with 618 additions and 225 deletions.
3 changes: 3 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ bin
/site/public
/test

# Allow upgrade test directory
!/test/upgrade

# Created by .ignore support plugin (hsz.mobi)
### Go template
# Binaries for programs and plugins
Expand Down
169 changes: 163 additions & 6 deletions cloudbuild.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -233,9 +233,18 @@ steps:
# End to end tests
#

# wait for us to be the oldest ongoing build before we run e2es
- name: gcr.io/cloud-builders/gcloud
id: e2e-wait-to-become-leader
# Build and Push upgrade test
- name: make-docker
id: push-upgrade-test
dir: test/upgrade
env: ['REGISTRY=${_REGISTRY}']
args: [push]
waitFor:
- push-images

# Wait for us to be the oldest ongoing build before we run upgrade and e2e tests
- name: gcr.io/google.com/cloudsdktool/cloud-sdk
id: wait-to-become-leader
waitFor: [push-images]
script: |
#!/usr/bin/env bash
Expand All @@ -258,10 +267,157 @@ steps:
- BUILD_ID=$BUILD_ID
- TRIGGER_NAME=$TRIGGER_NAME

# Run the upgrade tests parallel, fail this step if any of the tests fail
- name: gcr.io/google.com/cloudsdktool/cloud-sdk
id: submit-upgrade-test-cloud-build
dir: test/upgrade
entrypoint: bash
args:
- -c
- |
#!/usr/bin/env bash
set -e
set -o pipefail
export KUBECONFIG="/root/.kube/config"
mkdir -p /go/src/agones.dev/ /root/.kube/
ln -s /workspace /go/src/agones.dev/agones
cd /go/src/agones.dev/agones/test/upgrade
pids=()
typeset -A waitPids # Associative array for mapping `kubectl wait job` pid -> `kubectl wait job` output log name
tmpdir=$(mktemp -d)
trap 'rm -rf -- "$tmpdir"' EXIT SIGTERM
# Update image tags to include the current build version.
DevVersion="${_BASE_VERSION}-dev-$(git rev-parse --short=7 HEAD)"
export DevVersion
sed "s/\${DevVersion}/${DevVersion}/" upgradeTest.yaml > "${tmpdir}"/upgradeTest.yaml
sed "s/\${DevVersion}/${DevVersion}/" versionMap.yaml > "${tmpdir}"/versionMap.yaml
# Kill all currently running child processes on exit or if a non-zero signal is seen
trap 'echo Cleaning up any remaining running pids: $(jobs -p) ; kill $(jobs -p) 2> /dev/null || :' EXIT SIGTERM
cloudProducts=("generic" "gke-autopilot")
declare -A versionsAndRegions=( [1.31]=us-east1 [1.30]=us-central1 [1.29]=us-west1 )
for cloudProduct in "${cloudProducts[@]}"
do
for version in "${!versionsAndRegions[@]}"
do
region=${versionsAndRegions[$version]}
if [ "$cloudProduct" = generic ]
then
testCluster="standard-upgrade-test-cluster-${version//./-}"
else
testCluster="gke-autopilot-upgrade-test-cluster-${version//./-}"
fi
testClusterLocation="${region}"
gcloud container clusters get-credentials "$testCluster" --region="$testClusterLocation" --project="$PROJECT_ID"
if [ "$cloudProduct" = gke-autopilot ] ; then
# For autopilot clusters use evictable "balloon" pods to keep a buffer in node pool autoscaling.
kubectl apply -f evictablePods.yaml
fi
# Clean up any existing job / namespace / apiservice from previous run
echo Checking if resources from a previous build of upgrade-test-runner exist and need to be cleaned up on cluster "${testCluster}".
if kubectl get jobs | grep upgrade-test-runner ; then
echo Deleting job from previous run of upgrade-test-runner on cluster "${testCluster}".
kubectl delete job upgrade-test-runner
kubectl wait --for=delete pod -l job-name=upgrade-test-runner --timeout=5m
fi
# Check if there are any dangling game servers.
if kubectl get gs | grep ".*"; then
# Remove any finalizers so that dangling game servers can be manually deleted.
kubectl get gs -o=custom-columns=:.metadata.name --no-headers | xargs kubectl patch gs -p '{"metadata":{"finalizers":[]}}' --type=merge
sleep 5
echo Deleting game servers from previous run of upgrade-test-runner on cluster "${testCluster}".
kubectl delete gs -l app=sdk-client-test
fi
if kubectl get po -l app=sdk-client-test | grep ".*"; then
echo Deleting pods from previous run of upgrade-test-runner on cluster "${testCluster}".
kubectl delete po -l app=sdk-client-test
kubectl wait --for=delete pod -l app=sdk-client-test --timeout=5m
fi
# The v1.allocation.agones.dev apiservice does not get removed automatically and will prevent the namespace from terminating.
if kubectl get apiservice | grep v1.allocation.agones.dev ; then
echo Deleting v1.allocation.agones.dev from previous run of upgrade-test-runner on cluster "${testCluster}".
kubectl delete apiservice v1.allocation.agones.dev
fi
if kubectl get namespace | grep agones-system ; then
echo Deleting agones-system namespace from previous run of upgrade-test-runner on cluster "${testCluster}".
kubectl delete namespace agones-system
kubectl wait --for=delete ns agones-system --timeout=5m
fi
if kubectl get crds | grep agones ; then
echo Deleting crds from previous run of upgrade-test-runner on cluster "${testCluster}".
kubectl get crds -o=custom-columns=:.metadata.name | grep agones | xargs kubectl delete crd
fi
echo kubectl apply -f permissions.yaml on cluster "${testCluster}"
kubectl apply -f permissions.yaml
echo kubectl apply -f versionMap.yaml on cluster "${testCluster}"
kubectl apply -f "${tmpdir}"/versionMap.yaml
echo kubectl apply -f gameserverTemplate.yaml on cluster "${testCluster}"
kubectl apply -f gameserverTemplate.yaml
echo kubectl apply -f upgradeTest.yaml on cluster "${testCluster}"
kubectl apply -f "${tmpdir}"/upgradeTest.yaml
# We need to wait for job pod to be created and ready before we can wait on the job itself.
# TODO: Once all test clusters are at Kubernetes Version >= 1.31 use `kubectl wait --for=create` instead of sleep.
# kubectl wait --for=create pod -l job-name=upgrade-test-runner --timeout=1m
sleep 10s
kubectl wait --for=condition=ready pod -l job-name=upgrade-test-runner --timeout=5m
echo Wait for job upgrade-test-runner to complete or fail on cluster "${testCluster}"
kubectl wait job/upgrade-test-runner --timeout=20m --for jsonpath='{.status.conditions[*].status}'=True -o jsonpath='{.status.conditions[*].type}' | tee "${tmpdir}"/"${testCluster}".log &
waitPid=$!
pids+=( "$waitPid" )
waitPids[$waitPid]="${tmpdir}"/"${testCluster}".log
done
done
for pid in "${pids[@]}"; do
# This block executes when the process exits and pid status==0
if wait $pid; then
outputLog="${waitPids[$pid]}"
# wait for output to finish writing to file
until [ -s "$outputLog" ]; do sleep 1; done
output=$(<"${outputLog}")
echo "${outputLog}": "${output}"
# "Complete" is successful job run.
# Version 1.31 has "SuccessCriteriaMet" as the first completion status returned, or "FailureTarget" in case of failure.
if [ "$output" == "Complete" ] || [ "$output" == "SuccessCriteriaMet" ] ; then
continue
else
exit 1
fi
# This block executes when the process exits and pid status!=0
else
status=$?
outputLog="${waitPids[$pid]}"
echo "One of the upgrade tests pid $pid from cluster log $outputLog exited with a non-zero status ${status}."
exit $status
fi
done
echo "End of Upgrade Tests"
waitFor:
- wait-to-become-leader
- push-upgrade-test

# cancel all the orphan e2e test cloud builds, fail to cancel any of the build will fail this whole build
- name: gcr.io/cloud-builders/gcloud
id: cancel-orphan-e2e-tests
waitFor: [e2e-wait-to-become-leader]
waitFor: [wait-to-become-leader]
script: |
#!/usr/bin/env bash
until gcloud builds list --ongoing --filter "tags:'e2e-test'" --format="value(id)" | xargs --no-run-if-empty gcloud builds cancel
Expand Down Expand Up @@ -386,7 +542,7 @@ steps:
#
- name: gcr.io/cloud-builders/gcloud
id: cleanup-services
waitFor: [e2e-wait-to-become-leader]
waitFor: [wait-to-become-leader]
allowFailure: true
entrypoint: bash
args:
Expand All @@ -400,14 +556,15 @@ steps:
done
substitutions:
_BASE_VERSION: 1.46.0
_CACHE_BUCKET: agones-build-cache
_HTMLTEST_CACHE_KEY: htmltest-0.10.1
_CPP_SDK_BUILD_CACHE_KEY: cpp-sdk-build
_CPP_SDK_CONFORMANCE_CACHE_KEY: cpp-sdk-conformance
_RUST_SDK_BUILD_CACHE_KEY: rust-sdk-build
_REGISTRY: us-docker.pkg.dev/${PROJECT_ID}/ci
tags: [ci, 'commit-${COMMIT_SHA}']
timeout: 18000s # 5h: 3h (e2e-wait-to-become-leader) + 1.5h (e2e timeout) + 0.5h (everything else)
timeout: 18000s # 5h: 3h (wait-to-become-leader) + 1.5h (e2e timeout) + 0.5h (everything else)
queueTtl: 259200s # 72h
images:
- ${_REGISTRY}/agones-controller
Expand Down
2 changes: 1 addition & 1 deletion docs/governance/templates/release_issue.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ and copy it into a release issue. Fill in relevant values, found inside {}
- [ ] Run `make post-build-release` to build the artifacts in GCS(These files will be attached in the release notes) and to push the latest images in the release repository and push chart on agones-chart.
- [ ] Run `make shell` and run `gcloud config configurations activate <your development project>` to switch Agones
development tooling off of the `agones-images` project.
- [ ] Smoke Test: run `make install-release` to view helm releases, uninstall agones-system namesapce, fetch the latest version of Agones, verify the new version, installing agones-system namespace, and list all the pods of agones-system.
- [ ] Smoke Test: run `make install-release` to view helm releases, uninstall agones-system namespace, fetch the latest version of Agones, verify the new version, installing agones-system namespace, and list all the pods of agones-system.
- [ ] Attach all assets found in the cloud storage with {version} to the draft GitHub Release.
- [ ] Copy any review changes from the release blog post into the draft GitHub release.
- [ ] Publish the draft GitHub Release.
Expand Down
2 changes: 1 addition & 1 deletion examples/allocation-endpoint/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ helm upgrade my-release --install --namespace agones-system --create-namespace a
--set agones.allocator.service.http.enabled=false
```

After installing Agones, deploy [ESP](https://cloud.google.com/endpoints/docs/grpc/specify-esp-v2-startup-options) which is an envoy based proxy, deployed as a sidecar along side `agones-alloator` container. Run the following to patch the service deployement, change the service port to ESP and add annotation to `agones-allocator` service account to impersonate GCP service account.
After installing Agones, deploy [ESP](https://cloud.google.com/endpoints/docs/grpc/specify-esp-v2-startup-options) which is an envoy based proxy, deployed as a sidecar along side `agones-alloator` container. Run the following to patch the service deployment, change the service port to ESP and add annotation to `agones-allocator` service account to impersonate GCP service account.

Replace [GKE-PROJECT-ID] in `patch-agones-allocator.yaml` with your project ID before running the scripts.

Expand Down
2 changes: 1 addition & 1 deletion examples/allocator-client-csharp/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class Program
static async Task Main(string[] args)
{
if (args.Length < 6) {
throw new Exception("Arguments are missing. Expecting: <private key> <public key> <server CA> <external IP> <namepace> <enable multi-cluster>");
throw new Exception("Arguments are missing. Expecting: <private key> <public key> <server CA> <external IP> <namespace> <enable multi-cluster>");
}

string clientKey = File.ReadAllText(args[0]);
Expand Down
4 changes: 2 additions & 2 deletions examples/simple-game-server/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ There are some text commands you can send the server to affect its behavior:
| "LIST_CONTAINS" | Returns true if the given value is in the given List, false otherwise |
| "GET_LIST_LENGTH" | Returns the length (number of values) of the given List as a string |
| "GET_LIST_VALUES" | Return the values in the given List as a comma delineated string |
| "APPEND_LIST_VALUE" | Returns if the given value was successfuly added to the List (true) or not (false) |
| "DELETE_LIST_VALUE" | Rreturns if the given value was successfuly deleted from the List (true) or not (false) |
| "APPEND_LIST_VALUE" | Returns if the given value was successfully added to the List (true) or not (false) |
| "DELETE_LIST_VALUE" | Rreturns if the given value was successfully deleted from the List (true) or not (false) |

## Configuration

Expand Down
6 changes: 3 additions & 3 deletions examples/simple-game-server/handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ func handlePlayerConnected(s *sdk.SDK, parts []string, _ ...context.CancelFunc)
return
}

// handleGetPlayers returns a comma delimeted list of connected players
// handleGetPlayers returns a comma delimited list of connected players
func handleGetPlayers(s *sdk.SDK, parts []string, _ ...context.CancelFunc) (response string, addACK bool, responseError error) {
log.Print("Retrieving connected player list")
list, err := s.Alpha().GetConnectedPlayers()
Expand Down Expand Up @@ -535,7 +535,7 @@ func handleGetListValues(s *sdk.SDK, parts []string, _ ...context.CancelFunc) (r
return
}

// handleAppendListValue returns if the given value was successfuly added to the List or not
// handleAppendListValue returns if the given value was successfully added to the List or not
func handleAppendListValue(s *sdk.SDK, parts []string, _ ...context.CancelFunc) (response string, addACK bool, responseError error) {
if len(parts) < 3 {
response = "Invalid APPEND_LIST_VALUE, should have 2 arguments"
Expand All @@ -553,7 +553,7 @@ func handleAppendListValue(s *sdk.SDK, parts []string, _ ...context.CancelFunc)
return
}

// handleDeleteListValue returns if the given value was successfuly deleted from the List or not
// handleDeleteListValue returns if the given value was successfully deleted from the List or not
func handleDeleteListValue(s *sdk.SDK, parts []string, _ ...context.CancelFunc) (response string, addACK bool, responseError error) {
if len(parts) < 3 {
response = "Invalid DELETE_LIST_VALUE, should have 2 arguments"
Expand Down
4 changes: 2 additions & 2 deletions examples/simple-genai-server/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ type Message struct {
func handleGenAIRequest(prompt string, clientConn *connection, chatHistory []Message) (string, error) {
var jsonStr []byte
var err error
// If the endpoint is the NPC API, use the json request format specifc to that API
// If the endpoint is the NPC API, use the json request format specific to that API
if clientConn.npc {
npcRequest := NPCRequest{
Msg: prompt,
Expand Down Expand Up @@ -329,7 +329,7 @@ func autonomousChat(prompt string, conn1 *connection, conn2 *connection, numChat
autonomousChat(response, conn2, conn1, numChats, stopPhase, chatHistory)
}

// Manually interact via TCP with the GenAI endpont
// Manually interact via TCP with the GenAI endpoint
func tcpListener(port string, genAiConn *connection) {
log.Printf("Starting TCP server, listening on port %s", port)
ln, err := net.Listen("tcp", ":"+port)
Expand Down
3 changes: 3 additions & 0 deletions install/helm/agones/templates/controller.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ spec:
app: {{ template "agones.name" . }}
release: {{ .Release.Name }}
heritage: {{ .Release.Service }}
{{- if .Values.agones.controller.labels }}
{{- toYaml .Values.agones.controller.labels | nindent 8 }}
{{- end }}
spec:
{{- if .Values.agones.controller.topologySpreadConstraints }}
topologySpreadConstraints:
Expand Down
3 changes: 3 additions & 0 deletions install/helm/agones/templates/extensions-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ spec:
app: {{ template "agones.name" . }}
release: {{ .Release.Name }}
heritage: {{ .Release.Service }}
{{- if .Values.agones.extensions.labels }}
{{- toYaml .Values.agones.extensions.labels | nindent 8 }}
{{- end }}
spec:
{{- if .Values.agones.extensions.topologySpreadConstraints }}
topologySpreadConstraints:
Expand Down
2 changes: 2 additions & 0 deletions install/helm/agones/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ agones:
# cpu: 1
# memory: 256Mi
nodeSelector: {}
labels: {}
annotations: {}
tolerations:
- key: "agones.dev/agones-system"
Expand Down Expand Up @@ -105,6 +106,7 @@ agones:
# memory: 256Mi
nodeSelector: {}
annotations: {}
labels: {}
# Determines if the Agones extensions should operate in hostNetwork mode.
#
# This setting is necessary for certain managed Kubernetes clusters (e.g., AWS EKS) that use custom
Expand Down
Loading

0 comments on commit 730ac2c

Please sign in to comment.