Skip to content

Commit b31c1da

Browse files
committed
chore(e2e): fixes for more stability
- Ignore failed to watch, fail to elect, fail to create vmip errors in virtualization-controller log. - Use Eventually to test IP in cilium agents. - Fix handling kubectl errors in SaveResourcesForTest. Signed-off-by: Ivan Mikheykin <[email protected]>
1 parent 6cc4d6d commit b31c1da

File tree

6 files changed

+44
-11
lines changed

6 files changed

+44
-11
lines changed

.github/workflows/dev_module_build.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -625,7 +625,7 @@ jobs:
625625
with:
626626
name: resources_from_failed_tests
627627
retention-days: 2
628-
path: /tmp/e2e_failed__*
628+
path: ${{ runner.temp }}/e2e_failed__*
629629
if-no-files-found: ignore
630630

631631
- name: Cleanup E2E resources on cancel

tests/e2e/default_config.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,10 +50,16 @@ logFilter:
5050
- "does not have a pvc reference" # "err": "kvvm head-345e7b6a-testcases-image-hotplug/head-345e7b6a-vm-image-hotplug spec volume vi-head-345e7b6a-vi-alpine-http does not have a pvc reference"
5151
- "lastTransitionTime: Required value" # Err.
5252
- "virtualmachineipaddressleases.virtualization.deckhouse.io "
53+
- "Failed to watch" # error if virtualization-controller restarts during tests. "msg": "Failed to watch", "err": "Get \"http://127.0.0.1:23915/apis/virtualization.deckhouse.io/v1alpha2/virtualmachinerestores?allowWatchBookmarks=true\u0026resourceVersion=709816257\u0026timeoutSeconds=310\u0026watch=true\": context canceled"
54+
- "leader election lost"
5355
regexpLogFilter:
5456
- "failed to detach: .* not found" # "err" "failed to detach: virtualmachine.kubevirt.io \"head-497d17b-vm-automatic-with-hotplug\" not found",
5557
- "error patching .* not found" # "err" "error patching *** virtualimages.virtualization.deckhouse.io \"head-497d17b-vi-pvc-oref-vi-oref-vd\" not found",
5658
- "IP address .* is not among addresses assigned to 'default' network interface .*" # "msg": "IP address (10.66.10.61) is not among addresses assigned to 'default' network interface (10.66.10.60)"
59+
- "Failed to update lock optimistically:.*leases.*leader-election-helper.*" # error during virtualization-controller lifecycle: attempt to reacquire leader election. "msg": "Failed to update lock optimistically: Put \"http://127.0.0.1:23915/apis/coordination.k8s.io/v1/namespaces/d8-virtualization/leases/d8-virt-operator-leader-election-helper?timeout=5s\": context deadline exceeded (Client.Timeout exceeded while awaiting headers), falling back to slow path"
60+
- "Failed to update lock: .* leases.*leader-election-helper.*" # "msg": "ock: Operation cannot be fulfilled on leases.coordination.k8s.io \"d8-virt-operator-leader-election-helper\": the object has been modified; please apply your changes to the latest version and try again",
61+
- "failed to create VirtualMachineIPAddress .* the specified IP address .* has already been allocated and has not been released" # "err": "failed to create VirtualMachineIPAddress \"head-5d2c558-vm-restore-safe-tfv4w\": admission webhook \"vmip.virtualization-controller.validate.d8-virtualization\" denied the request: the VirtualMachineIPAddress cannot be created: the specified IP address 10.66.10.4 has already been allocated and has not been released"
62+
- "error retrieving resource lock .*leader-election-helper" # "msg": "error retrieving resource lock d8-virtualization/d8-virt-operator-leader-election-helper: context deadline exceeded",
5763

5864
cleanupResources:
5965
- clustervirtualimages.virtualization.deckhouse.io

tests/e2e/network/cilium_agents.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ func CheckCilliumAgents(ctx context.Context, kubectl kc.Kubectl, vmName, vmNames
6262
}
6363

6464
if !found {
65-
return fmt.Errorf("failed: cilium agent %s for VM's node %s", pod.Name, nodeName)
65+
return fmt.Errorf("failed: not found cilium agent %s for VM's node %s", pod.Name, nodeName)
6666
}
6767
} else {
6868
// For pods on different nodes
@@ -72,7 +72,7 @@ func CheckCilliumAgents(ctx context.Context, kubectl kc.Kubectl, vmName, vmNames
7272
}
7373

7474
if !found {
75-
return fmt.Errorf("failed: cilium agent %s for node %s", pod.Name, pod.Spec.NodeName)
75+
return fmt.Errorf("failed: not found cilium agent %s for node %s", pod.Name, pod.Spec.NodeName)
7676
}
7777
}
7878
}

tests/e2e/util_test.go

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -734,6 +734,10 @@ func IsContainerRestarted(podName, containerName, namespace string, startedAt me
734734
return false, fmt.Errorf("failed to compare the `startedAt` field before and after the tests ran: %s", podName)
735735
}
736736

737+
// SaveTestResources dump some resources that may help in further diagnostic.
738+
//
739+
// NOTE: This method is called in AfterEach for failed specs only. Avoid to use Expect,
740+
// as it fails without reporting. Better use GinkgoWriter to report errors at this point.
737741
func SaveTestResources(labels map[string]string, additional string) {
738742
replacer := strings.NewReplacer(
739743
" ", "_",
@@ -743,16 +747,35 @@ func SaveTestResources(labels map[string]string, additional string) {
743747
"(", "_",
744748
")", "_",
745749
"|", "_",
750+
"`", "",
751+
"'", "",
746752
)
747753
additional = replacer.Replace(strings.ToLower(additional))
748754

749-
str := fmt.Sprintf("/tmp/e2e_failed__%s__%s.yaml", labels["testcase"], additional)
755+
tmpDir := os.Getenv("RUNNER_TEMP")
756+
if tmpDir == "" {
757+
tmpDir = "/tmp"
758+
}
759+
resFileName := fmt.Sprintf("%s/e2e_failed__%s__%s.yaml", tmpDir, labels["testcase"], additional)
760+
errorFileName := fmt.Sprintf("%s/e2e_failed__%s__%s_error.txt", tmpDir, labels["testcase"], additional)
750761

751-
cmdr := kubectl.Get("virtualization,intvirt -A", kc.GetOptions{Output: "yaml", Labels: labels})
752-
Expect(cmdr.Error()).NotTo(HaveOccurred(), "cmd: %s\nstderr: %s", cmdr.GetCmd(), cmdr.StdErr())
762+
cmdr := kubectl.Get("virtualization,intvirt,po -A", kc.GetOptions{Output: "yaml", Labels: labels})
763+
if cmdr.Error() != nil {
764+
errReport := fmt.Sprintf("cmd: %s\nerror: %s\nstderr: %s\n", cmdr.GetCmd(), cmdr.Error(), cmdr.StdErr())
765+
GinkgoWriter.Printf("Get resources error:\n%s\n", errReport)
766+
err := os.WriteFile(errorFileName, []byte(errReport), 0o644)
767+
if err != nil {
768+
GinkgoWriter.Printf("Save error to file '%s' failed: %s\n", errorFileName, err)
769+
}
770+
}
753771

754-
err := os.WriteFile(str, cmdr.StdOutBytes(), 0o644)
755-
Expect(err).NotTo(HaveOccurred(), "cmd: %s\nstderr: %s", cmdr.GetCmd(), cmdr.StdErr())
772+
// Stdout may present even if error is occurred.
773+
if len(cmdr.StdOutBytes()) > 0 {
774+
err := os.WriteFile(resFileName, cmdr.StdOutBytes(), 0o644)
775+
if err != nil {
776+
GinkgoWriter.Printf("Save resources to file '%s' failed: %s\n", errorFileName, err)
777+
}
778+
}
756779
}
757780

758781
type Watcher interface {

tests/e2e/vd_snapshots_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@ var _ = Describe("VirtualDiskSnapshots", ginkgoutil.CommonE2ETestDecorators(), f
319319
return nil
320320
}
321321
if frozen {
322-
return fmt.Errorf("the filesystem of the virtual machine %s/%s is frozen", vm.Namespace, vm.Name)
322+
return fmt.Errorf("the filesystem of the virtual machine %s/%s is still frozen", vm.Namespace, vm.Name)
323323
}
324324
return nil
325325
}).WithTimeout(

tests/e2e/vm_connectivity_test.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -308,8 +308,12 @@ func CheckCiliumAgents(kubectl kc.Kubectl, namespace string, vms ...string) {
308308
GinkgoHelper()
309309
for _, vm := range vms {
310310
By(fmt.Sprintf("Cilium agent should be OK's for VM: %s", vm))
311-
err := network.CheckCilliumAgents(context.Background(), kubectl, vm, namespace)
312-
Expect(err).NotTo(HaveOccurred())
311+
Eventually(func() error {
312+
return network.CheckCilliumAgents(context.Background(), kubectl, vm, namespace)
313+
}).
314+
WithTimeout(Timeout).
315+
WithPolling(Interval).
316+
Should(Succeed())
313317
}
314318
}
315319

0 commit comments

Comments
 (0)