Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,7 @@
.pytest_cache
.idea
**/err.txt
coverage.out
coverage.out

# Symlinks created by Makefile build-cli
k8s-tests/chainsaw/*/skyhook-cli
2 changes: 1 addition & 1 deletion .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
"type": "go",
"request": "launch",
"mode": "debug",
"program": "${workspaceRoot}/operator/cmd/main.go",
"program": "${workspaceRoot}/operator/cmd/manager/main.go",
"cwd": "${workspaceRoot}/operator",
"buildFlags": "--ldflags '-X github.com/NVIDIA/skyhook/operator/internal/version.GIT_SHA=foobars -X github.com/NVIDIA/skyhook/operator/internal/version.VERSION=v0.5.0'",
"env": {
Expand Down
25 changes: 17 additions & 8 deletions k8s-tests/chainsaw/cli/lifecycle/chainsaw-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,15 @@ spec:
assert: 60s
exec: 30s
steps:
# Step 0: Reset state from previous runs
- name: reset-state
try:
- script:
timeout: 30s
content: |
../skyhook-cli reset cli-lifecycle-test --confirm 2>/dev/null || true
echo "✓ State reset complete"

# Step 1: Create a Skyhook
- name: setup-skyhook
try:
Expand All @@ -55,7 +64,7 @@ spec:
- script:
timeout: 30s
content: |
../../../../operator/bin/skyhook pause cli-lifecycle-test --confirm
../skyhook-cli pause cli-lifecycle-test --confirm
- assert:
file: assert-paused.yaml

Expand All @@ -65,7 +74,7 @@ spec:
- script:
timeout: 30s
content: |
../../../../operator/bin/skyhook resume cli-lifecycle-test
../skyhook-cli resume cli-lifecycle-test
- assert:
file: assert-resumed.yaml

Expand All @@ -75,7 +84,7 @@ spec:
- script:
timeout: 30s
content: |
../../../../operator/bin/skyhook disable cli-lifecycle-test --confirm
../skyhook-cli disable cli-lifecycle-test --confirm
- assert:
file: assert-disabled.yaml

Expand All @@ -85,7 +94,7 @@ spec:
- script:
timeout: 30s
content: |
../../../../operator/bin/skyhook enable cli-lifecycle-test
../skyhook-cli enable cli-lifecycle-test
- assert:
file: assert-enabled.yaml

Expand All @@ -95,22 +104,22 @@ spec:
- script:
timeout: 30s
content: |
../../../../operator/bin/skyhook pause cli-lifecycle-test --confirm
../../../../operator/bin/skyhook disable cli-lifecycle-test --confirm
../skyhook-cli pause cli-lifecycle-test --confirm
../skyhook-cli disable cli-lifecycle-test --confirm
- assert:
file: assert-paused-and-disabled.yaml
- script:
timeout: 30s
content: |
# Resume should only remove pause, not disable
../../../../operator/bin/skyhook resume cli-lifecycle-test
../skyhook-cli resume cli-lifecycle-test
- assert:
file: assert-still-disabled.yaml
- script:
timeout: 30s
content: |
# Enable should remove disable
../../../../operator/bin/skyhook enable cli-lifecycle-test
../skyhook-cli enable cli-lifecycle-test

# Cleanup
- name: cleanup
Expand Down
28 changes: 18 additions & 10 deletions k8s-tests/chainsaw/cli/node/chainsaw-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,15 @@ spec:
assert: 120s
exec: 90s
steps:
# Step 0: Reset state from previous runs
- name: reset-state
try:
- script:
timeout: 30s
content: |
../skyhook-cli reset cli-node-test --confirm 2>/dev/null || true
echo "✓ State reset complete"

# Step 1: Create a Skyhook
- name: setup-skyhook
try:
Expand All @@ -42,11 +51,11 @@ spec:
- script:
timeout: 30s
content: |
../../../../operator/bin/skyhook node list --skyhook cli-node-test
../skyhook-cli node list --skyhook cli-node-test
- script:
timeout: 30s
content: |
../../../../operator/bin/skyhook node list --skyhook cli-node-test -o json
../skyhook-cli node list --skyhook cli-node-test -o json

# Step 3: Test node status
- name: test-node-status
Expand All @@ -55,17 +64,17 @@ spec:
timeout: 30s
content: |
NODE=$(kubectl get nodes -l skyhook.nvidia.com/test-node=skyhooke2e -o jsonpath='{.items[0].metadata.name}')
../../../../operator/bin/skyhook node status "$NODE"
../skyhook-cli node status "$NODE"
- script:
timeout: 30s
content: |
NODE=$(kubectl get nodes -l skyhook.nvidia.com/test-node=skyhooke2e -o jsonpath='{.items[0].metadata.name}')
../../../../operator/bin/skyhook node status "$NODE" --skyhook cli-node-test
../skyhook-cli node status "$NODE" --skyhook cli-node-test
- script:
timeout: 30s
content: |
NODE=$(kubectl get nodes -l skyhook.nvidia.com/test-node=skyhooke2e -o jsonpath='{.items[0].metadata.name}')
../../../../operator/bin/skyhook node status "$NODE" -o json
../skyhook-cli node status "$NODE" -o json

# Step 4: Test node ignore/unignore
- name: test-node-ignore
Expand All @@ -74,7 +83,7 @@ spec:
timeout: 30s
content: |
NODE=$(kubectl get nodes -l skyhook.nvidia.com/test-node=skyhooke2e -o jsonpath='{.items[0].metadata.name}')
../../../../operator/bin/skyhook node ignore "$NODE"
../skyhook-cli node ignore "$NODE"
- assert:
file: assert-node-ignored.yaml
- name: test-node-unignore
Expand All @@ -83,7 +92,7 @@ spec:
timeout: 30s
content: |
NODE=$(kubectl get nodes -l skyhook.nvidia.com/test-node=skyhooke2e -o jsonpath='{.items[0].metadata.name}')
../../../../operator/bin/skyhook node unignore "$NODE"
../skyhook-cli node unignore "$NODE"
- assert:
file: assert-node-unignored.yaml

Expand All @@ -94,7 +103,7 @@ spec:
timeout: 30s
content: |
NODE=$(kubectl get nodes -l skyhook.nvidia.com/test-node=skyhooke2e -o jsonpath='{.items[0].metadata.name}')
../../../../operator/bin/skyhook node reset "$NODE" --skyhook cli-node-test --confirm
../skyhook-cli node reset "$NODE" --skyhook cli-node-test --confirm
- assert:
file: assert-node-reset.yaml

Expand All @@ -103,9 +112,8 @@ spec:
try:
- script:
content: |
kubectl delete skyhook cli-node-test 2>/dev/null || true
../skyhook-cli reset cli-node-test --confirm 2>/dev/null || true
for NODE in $(kubectl get nodes -l skyhook.nvidia.com/test-node=skyhooke2e -o jsonpath='{.items[*].metadata.name}'); do
kubectl annotate node "$NODE" skyhook.nvidia.com/nodeState_cli-node-test- 2>/dev/null || true
kubectl label node "$NODE" skyhook.nvidia.com/ignore- 2>/dev/null || true
done

24 changes: 15 additions & 9 deletions k8s-tests/chainsaw/cli/package/chainsaw-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,15 @@ spec:
assert: 120s
exec: 90s
steps:
# Step 0: Reset state from previous runs
- name: reset-state
try:
- script:
timeout: 30s
content: |
../skyhook-cli reset cli-package-test --confirm 2>/dev/null || true
echo "✓ State reset complete"

# Step 1: Create a Skyhook
- name: setup-skyhook
try:
Expand All @@ -41,11 +50,11 @@ spec:
- script:
timeout: 30s
content: |
../../../../operator/bin/skyhook package status hello-world --skyhook cli-package-test
../skyhook-cli package status hello-world --skyhook cli-package-test
- script:
timeout: 30s
content: |
../../../../operator/bin/skyhook package status hello-world --skyhook cli-package-test -o json
../skyhook-cli package status hello-world --skyhook cli-package-test -o json

# Step 3: Test package logs
- name: test-package-logs
Expand All @@ -56,7 +65,7 @@ spec:
NODE=$(kubectl get nodes -l skyhook.nvidia.com/test-node=skyhooke2e -o jsonpath='{.items[0].metadata.name}')

# Run logs command - it should succeed (may show waiting or actual logs)
../../../../operator/bin/skyhook package logs hello-world --skyhook cli-package-test --node "$NODE"
../skyhook-cli package logs hello-world --skyhook cli-package-test --node "$NODE"

# Step 4: Wait for package to complete, then test rerun
- name: test-package-rerun
Expand All @@ -70,14 +79,14 @@ spec:
NODE=$(kubectl get nodes -l skyhook.nvidia.com/test-node=skyhooke2e -o jsonpath='{.items[0].metadata.name}')

# Test dry-run first
../../../../operator/bin/skyhook package rerun hello-world --skyhook cli-package-test --node "$NODE" --dry-run
../skyhook-cli package rerun hello-world --skyhook cli-package-test --node "$NODE" --dry-run
- script:
timeout: 30s
content: |
NODE=$(kubectl get nodes -l skyhook.nvidia.com/test-node=skyhooke2e -o jsonpath='{.items[0].metadata.name}')

# Run actual rerun with confirm flag and stage
../../../../operator/bin/skyhook package rerun hello-world --skyhook cli-package-test --node "$NODE" --stage apply --confirm
../skyhook-cli package rerun hello-world --skyhook cli-package-test --node "$NODE" --stage apply --confirm
- assert:
file: assert-package-rerun.yaml

Expand All @@ -86,8 +95,5 @@ spec:
try:
- script:
content: |
kubectl delete skyhook cli-package-test 2>/dev/null || true
for NODE in $(kubectl get nodes -l skyhook.nvidia.com/test-node=skyhooke2e -o jsonpath='{.items[*].metadata.name}'); do
kubectl annotate node "$NODE" skyhook.nvidia.com/nodeState_cli-package-test- 2>/dev/null || true
done
../skyhook-cli reset cli-package-test --confirm 2>/dev/null || true

32 changes: 32 additions & 0 deletions k8s-tests/chainsaw/cli/reset/assert-nodes-reset.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

apiVersion: v1
kind: Node
metadata:
labels:
skyhook.nvidia.com/test-node: skyhooke2e
## since skyhook is running it will set it to disabled after we clear it
## should it do this, not sure i guess, but seems nice to have it that way
# (not):
# skyhook.nvidia.com/status_cli-reset-test: .*
annotations:
# Verify that all skyhook-related annotations are removed(lookup(@ || {}, 'skyhook.nvidia.com/nodeState_cli-reset-test')): null
(contains(keys(@), "skyhook.nvidia.com/nodeState_cli-reset-test")): false
(contains(keys(@), "skyhook.nvidia.com/cordon_cli-reset-test")): false
skyhook.nvidia.com/status_cli-reset-test: disabled ## disabled after clearing
# skyhook.nvidia.com/version_cli-reset-test: .* ## also rest after clearing

Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
#!/bin/bash -x

# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
Expand All @@ -16,14 +14,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.

apiVersion: skyhook.nvidia.com/v1alpha1
kind: Skyhook
metadata:
name: cli-reset-test
status:
status: complete

skyhook="$1"
pause="$2"

## assert is true or false
if [[ "$pause" != "true" && "$pause" != "false" ]]; then
echo "pause must be true or false"
exit 1
fi

kubectl patch skyhook ${skyhook} -p '{"spec":{"pause":'$pause'}}' --type=merge
22 changes: 6 additions & 16 deletions k8s-tests/chainsaw/skyhook/rest_nodes.sh → ...aw/cli/reset/assert-skyhook-disabled.yaml
100755 → 100644
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
#!/bin/bash -eox pipefail

# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
Expand All @@ -16,18 +14,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.

apiVersion: skyhook.nvidia.com/v1alpha1
kind: Skyhook
metadata:
name: cli-reset-test
annotations:
skyhook.nvidia.com/disable: "true"

## helper script
## clears labels and annotate from nodes with the prefix "skyhook.nvidia.com"
## note, a lot of tests have a label setup to target, so you might need to put that back
## example:
## ❯ kubectl label node/kind-worker skyhook.nvidia.com/test-node=skyhooke2e

for node in $(kubectl get nodes -o name); do
for anno in $(kubectl annotate --list ${node}); do
[[ ${anno} =~ (^skyhook.nvidia.com\/.*)=.* ]] && kubectl annotate ${node} ${BASH_REMATCH[1]}-
done
for label in $(kubectl label --list ${node}); do
[[ ${label} =~ (^skyhook.nvidia.com\/.*)=.* ]] && kubectl label ${node} ${BASH_REMATCH[1]}-
done
done
Loading
Loading