Skip to content

Commit e48ee09

Browse files
committed
chore(e2e): fixes for more stability
- Ignore failed to watch, fail to elect, fail to create vmip errors in virtualization-controller log. - Use Eventually to test IP in cilium agents. - Fix handling kubectl errors in SaveResourcesForTest. - Wait for snapshots readiness before checking for unfrozen filesystems. Signed-off-by: Ivan Mikheykin <[email protected]>
1 parent 55a047a commit e48ee09

File tree

6 files changed

+79
-12
lines changed

6 files changed

+79
-12
lines changed

.github/workflows/dev_module_build.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -625,7 +625,7 @@ jobs:
625625
with:
626626
name: resources_from_failed_tests
627627
retention-days: 2
628-
path: /tmp/e2e_failed__*
628+
path: ${{ runner.temp }}/e2e_failed__*
629629
if-no-files-found: ignore
630630

631631
- name: Cleanup E2E resources on cancel

tests/e2e/default_config.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,12 +51,18 @@ logFilter:
5151
- "lastTransitionTime: Required value" # Err.
5252
- "virtualmachineipaddressleases.virtualization.deckhouse.io "
5353
- "Forbidden: no new finalizers can be added if the object is being deleted, found new finalizers"
54+
- "Failed to watch" # error if virtualization-controller restarts during tests. "msg": "Failed to watch", "err": "Get \"http://127.0.0.1:23915/apis/virtualization.deckhouse.io/v1alpha2/virtualmachinerestores?allowWatchBookmarks=true\u0026resourceVersion=709816257\u0026timeoutSeconds=310\u0026watch=true\": context canceled"
55+
- "leader election lost"
5456
regexpLogFilter:
5557
- "failed to detach: .* not found" # "err" "failed to detach: virtualmachine.kubevirt.io \"head-497d17b-vm-automatic-with-hotplug\" not found",
5658
- "error patching .* not found" # "err" "error patching *** virtualimages.virtualization.deckhouse.io \"head-497d17b-vi-pvc-oref-vi-oref-vd\" not found",
5759
- "IP address .* is not among addresses assigned to 'default' network interface .*" # "msg": "IP address (10.66.10.61) is not among addresses assigned to 'default' network interface (10.66.10.60)"
5860
- "failed to get vmSnapshot: VirtualMachineSnapshot\\.virtualization\\.deckhouse.io .* not found" # "msg": "failed to get vmSnapshot: VirtualMachineSnapshot.virtualization.deckhouse.io \"main-to-pr14969-ynv-0-ef17ba-20250908-142437\" not found"
5961
- "failed to sync virtual disk data source objectref: start immediate: internalvirtualizationdatavolumes.cdi.internal.virtualization.deckhouse.io .* is forbidden: unable to create new content in namespace .* because it is being terminated" # "err": "failed to sync virtual disk data source objectref: start immediate: internalvirtualizationdatavolumes.cdi.internal.virtualization.deckhouse.io \"vd-head-b3d8865-vd-root-migration-bios-d77ea313-f469-463d-a71b-00c89ca542ab\" is forbidden: unable to create new content in namespace head-b3d8865-end-to-end-vm-migration because it is being terminated"
62+
- "Failed to update lock optimistically:.*leases.*leader-election-helper.*" # error during virtualization-controller lifecycle: attempt to reacquire leader election. "msg": "Failed to update lock optimistically: Put \"http://127.0.0.1:23915/apis/coordination.k8s.io/v1/namespaces/d8-virtualization/leases/d8-virt-operator-leader-election-helper?timeout=5s\": context deadline exceeded (Client.Timeout exceeded while awaiting headers), falling back to slow path"
63+
- "Failed to update lock: .* leases.*leader-election-helper.*" # "msg": "ock: Operation cannot be fulfilled on leases.coordination.k8s.io \"d8-virt-operator-leader-election-helper\": the object has been modified; please apply your changes to the latest version and try again",
64+
- "failed to create VirtualMachineIPAddress .* the specified IP address .* has already been allocated and has not been released" # "err": "failed to create VirtualMachineIPAddress \"head-5d2c558-vm-restore-safe-tfv4w\": admission webhook \"vmip.virtualization-controller.validate.d8-virtualization\" denied the request: the VirtualMachineIPAddress cannot be created: the specified IP address 10.66.10.4 has already been allocated and has not been released"
65+
- "error retrieving resource lock .*leader-election-helper" # "msg": "error retrieving resource lock d8-virtualization/d8-virt-operator-leader-election-helper: context deadline exceeded",
6066

6167
cleanupResources:
6268
- clustervirtualimages.virtualization.deckhouse.io

tests/e2e/network/cilium_agents.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ func CheckCilliumAgents(ctx context.Context, kubectl kc.Kubectl, vmName, vmNames
6262
}
6363

6464
if !found {
65-
return fmt.Errorf("failed: cilium agent %s for VM's node %s", pod.Name, nodeName)
65+
return fmt.Errorf("failed: not found cilium agent %s for VM's node %s", pod.Name, nodeName)
6666
}
6767
} else {
6868
// For pods on different nodes
@@ -72,7 +72,7 @@ func CheckCilliumAgents(ctx context.Context, kubectl kc.Kubectl, vmName, vmNames
7272
}
7373

7474
if !found {
75-
return fmt.Errorf("failed: cilium agent %s for node %s", pod.Name, pod.Spec.NodeName)
75+
return fmt.Errorf("failed: not found cilium agent %s for node %s", pod.Name, pod.Spec.NodeName)
7676
}
7777
}
7878
}

tests/e2e/util_test.go

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -734,6 +734,10 @@ func IsContainerRestarted(podName, containerName, namespace string, startedAt me
734734
return false, fmt.Errorf("failed to compare the `startedAt` field before and after the tests ran: %s", podName)
735735
}
736736

737+
// SaveTestResources dump some resources that may help in further diagnostic.
738+
//
739+
// NOTE: This method is called in AfterEach for failed specs only. Avoid to use Expect,
740+
// as it fails without reporting. Better use GinkgoWriter to report errors at this point.
737741
func SaveTestResources(labels map[string]string, additional string) {
738742
replacer := strings.NewReplacer(
739743
" ", "_",
@@ -743,16 +747,35 @@ func SaveTestResources(labels map[string]string, additional string) {
743747
"(", "_",
744748
")", "_",
745749
"|", "_",
750+
"`", "",
751+
"'", "",
746752
)
747753
additional = replacer.Replace(strings.ToLower(additional))
748754

749-
str := fmt.Sprintf("/tmp/e2e_failed__%s__%s.yaml", labels["testcase"], additional)
755+
tmpDir := os.Getenv("RUNNER_TEMP")
756+
if tmpDir == "" {
757+
tmpDir = "/tmp"
758+
}
759+
resFileName := fmt.Sprintf("%s/e2e_failed__%s__%s.yaml", tmpDir, labels["testcase"], additional)
760+
errorFileName := fmt.Sprintf("%s/e2e_failed__%s__%s_error.txt", tmpDir, labels["testcase"], additional)
750761

751-
cmdr := kubectl.Get("virtualization,intvirt -A", kc.GetOptions{Output: "yaml", Labels: labels})
752-
Expect(cmdr.Error()).NotTo(HaveOccurred(), "cmd: %s\nstderr: %s", cmdr.GetCmd(), cmdr.StdErr())
762+
cmdr := kubectl.Get("virtualization,intvirt,po -A", kc.GetOptions{Output: "yaml", Labels: labels})
763+
if cmdr.Error() != nil {
764+
errReport := fmt.Sprintf("cmd: %s\nerror: %s\nstderr: %s\n", cmdr.GetCmd(), cmdr.Error(), cmdr.StdErr())
765+
GinkgoWriter.Printf("Get resources error:\n%s\n", errReport)
766+
err := os.WriteFile(errorFileName, []byte(errReport), 0o644)
767+
if err != nil {
768+
GinkgoWriter.Printf("Save error to file '%s' failed: %s\n", errorFileName, err)
769+
}
770+
}
753771

754-
err := os.WriteFile(str, cmdr.StdOutBytes(), 0o644)
755-
Expect(err).NotTo(HaveOccurred(), "cmd: %s\nstderr: %s", cmdr.GetCmd(), cmdr.StdErr())
772+
// Stdout may present even if error is occurred.
773+
if len(cmdr.StdOutBytes()) > 0 {
774+
err := os.WriteFile(resFileName, cmdr.StdOutBytes(), 0o644)
775+
if err != nil {
776+
GinkgoWriter.Printf("Save resources to file '%s' failed: %s\n", errorFileName, err)
777+
}
778+
}
756779
}
757780

758781
type Watcher interface {

tests/e2e/vd_snapshots_test.go

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -264,7 +264,7 @@ var _ = Describe("VirtualDiskSnapshots", ginkgoutil.CommonE2ETestDecorators(), f
264264
}(i)
265265
}
266266
wg.Wait()
267-
Expect(errs).To(BeEmpty(), "concurrent snapshotting error")
267+
Expect(errs).To(BeEmpty(), "should not face concurrent snapshotting error")
268268

269269
Eventually(func() error {
270270
frozen, err := CheckFileSystemFrozen(vm.Name, ns)
@@ -282,6 +282,28 @@ var _ = Describe("VirtualDiskSnapshots", ginkgoutil.CommonE2ETestDecorators(), f
282282
}
283283
})
284284

285+
It("checks snapshots", func() {
286+
By("Snapshots should be `Ready`")
287+
labels := make(map[string]string)
288+
maps.Copy(labels, attachedVirtualDiskLabel)
289+
maps.Copy(labels, testCaseLabel)
290+
291+
Eventually(func() error {
292+
vdSnapshots := GetVirtualDiskSnapshots(ns, labels)
293+
for _, snapshot := range vdSnapshots.Items {
294+
if snapshot.Status.Phase == virtv2.VirtualDiskSnapshotPhaseReady || snapshot.DeletionTimestamp != nil {
295+
continue
296+
}
297+
return errors.New("still wait for all snapshots either in ready or in deletion state")
298+
}
299+
return nil
300+
}).WithTimeout(
301+
LongWaitDuration,
302+
).WithPolling(
303+
Interval,
304+
).Should(Succeed(), "all snapshots should be in ready state after creation")
305+
})
306+
285307
// TODO: It is a known issue that disk snapshots are not always created consistently. To prevent this error from causing noise during testing, we disabled this check. It will need to be re-enabled once the consistency issue is fixed.
286308
// It("checks snapshots of attached VDs", func() {
287309
// By(fmt.Sprintf("Snapshots should be in %s phase", PhaseReady))
@@ -319,7 +341,7 @@ var _ = Describe("VirtualDiskSnapshots", ginkgoutil.CommonE2ETestDecorators(), f
319341
return nil
320342
}
321343
if frozen {
322-
return fmt.Errorf("the filesystem of the virtual machine %s/%s is frozen", vm.Namespace, vm.Name)
344+
return fmt.Errorf("the filesystem of the virtual machine %s/%s is still frozen", vm.Namespace, vm.Name)
323345
}
324346
return nil
325347
}).WithTimeout(
@@ -384,6 +406,18 @@ func CreateVirtualDiskSnapshot(vdName, snapshotName, namespace string, requiredC
384406
return nil
385407
}
386408

409+
func GetVirtualDiskSnapshots(namespace string, labels map[string]string) virtv2.VirtualDiskSnapshotList {
410+
GinkgoHelper()
411+
vdSnapshots := virtv2.VirtualDiskSnapshotList{}
412+
err := GetObjects(kc.ResourceVDSnapshot, &vdSnapshots, kc.GetOptions{
413+
ExcludedLabels: []string{"hasNoConsumer"},
414+
Namespace: namespace,
415+
Labels: labels,
416+
})
417+
Expect(err).NotTo(HaveOccurred(), "cannot get `vdSnapshots`\nstderr: %s", err)
418+
return vdSnapshots
419+
}
420+
387421
func CheckFileSystemFrozen(vmName, vmNamespace string) (bool, error) {
388422
vmObj := virtv2.VirtualMachine{}
389423
err := GetObject(kc.ResourceVM, vmName, &vmObj, kc.GetOptions{Namespace: vmNamespace})

tests/e2e/vm_connectivity_test.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -308,8 +308,12 @@ func CheckCiliumAgents(kubectl kc.Kubectl, namespace string, vms ...string) {
308308
GinkgoHelper()
309309
for _, vm := range vms {
310310
By(fmt.Sprintf("Cilium agent should be OK's for VM: %s", vm))
311-
err := network.CheckCilliumAgents(context.Background(), kubectl, vm, namespace)
312-
Expect(err).NotTo(HaveOccurred())
311+
Eventually(func() error {
312+
return network.CheckCilliumAgents(context.Background(), kubectl, vm, namespace)
313+
}).
314+
WithTimeout(Timeout).
315+
WithPolling(Interval).
316+
Should(Succeed())
313317
}
314318
}
315319

0 commit comments

Comments
 (0)