diff --git a/test/extended/two_node/tnf_degraded.go b/test/extended/two_node/tnf_degraded.go new file mode 100644 index 000000000000..c3c2e3bd7eb5 --- /dev/null +++ b/test/extended/two_node/tnf_degraded.go @@ -0,0 +1,344 @@ +package two_node + +import ( + "context" + "encoding/base64" + "fmt" + "time" + + g "github.com/onsi/ginkgo/v2" + o "github.com/onsi/gomega" + + machineconfigv1 "github.com/openshift/api/machineconfiguration/v1" + machineconfigclient "github.com/openshift/client-go/machineconfiguration/clientset/versioned" + "github.com/openshift/origin/test/extended/two_node/utils" + "github.com/openshift/origin/test/extended/util/image" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + policyv1 "k8s.io/api/policy/v1" + apierrs "k8s.io/apimachinery/pkg/api/errors" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + k8sruntime "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/kubernetes" + "k8s.io/utils/ptr" + + exutil "github.com/openshift/origin/test/extended/util" +) + +const ( + pdbLabelKey = "app" + pdbLabelValue = "pdb-demo" + pdbDeploymentName = "pdb-demo-deployment" + pdbName = "pdb-demo" + rebootTestMCName = "99-master-tnf-degraded-reboot-block-test" + rebootTestMCFile = "/etc/tnf-degraded-reboot-block-test" +) + +var _ = g.Describe("[sig-apps][OCPFeatureGate:DualReplica][Suite:openshift/two-node] Two Node Fencing behavior in degraded mode", func() { + oc := exutil.NewCLI("tnf-degraded").AsAdmin() + ctx := context.Background() + kubeClient := oc.AdminKubeClient() + + g.BeforeEach(func() { + utils.EnsureTNFDegradedOrSkip(oc) + }) + + g.It("should allow a single eviction and block the second when PDB minAvailable=1 [apigroup:policy]", func() { + ns := oc.Namespace() + labels := map[string]string{pdbLabelKey: pdbLabelValue} + selector := fmt.Sprintf("%s=%s", pdbLabelKey, pdbLabelValue) + + // Deployment with 2 pause pods + deploy, err := createPauseDeployment(ctx, kubeClient, ns, pdbDeploymentName, 2, labels) + o.Expect(err).NotTo(o.HaveOccurred()) + + err = exutil.WaitForDeploymentReadyWithTimeout(oc, deploy.Name, ns, -1, 3*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred(), "deployment did not reach 2 available replicas") + + // PDB minAvailable=1 + pdb, err := createPDBMinAvailable(ctx, kubeClient, ns, pdbName, labels, 1) + o.Expect(err).NotTo(o.HaveOccurred()) + + // Wait for disruptionsAllowed=1 + err = waitForPDBDisruptionsAllowed(ctx, kubeClient, ns, pdb.Name, 1, 2*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred(), "PDB did not report disruptionsAllowed=1") + + pods, err := kubeClient.CoreV1().Pods(ns).List(ctx, metav1.ListOptions{ + LabelSelector: selector, + }) + o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(len(pods.Items)).To(o.Equal(2), "expected exactly 2 pods before first eviction") + + firstPod := &pods.Items[0] + secondPod := &pods.Items[1] + + // Evict first pod should succeed and wait for PDB to consume + err = evictPod(ctx, kubeClient, firstPod) + o.Expect(err).NotTo(o.HaveOccurred(), "first eviction should succeed") + + err = waitForPDBDisruptionsAllowed(ctx, kubeClient, ns, pdb.Name, 0, 2*time.Minute) + o.Expect(err).NotTo(o.HaveOccurred(), "PDB did not update disruptionsAllowed=0 after first eviction") + + // Evict second original pod, should be blocked with 429 + err = evictPod(ctx, kubeClient, secondPod) + o.Expect(err).To(o.HaveOccurred(), "second eviction should be blocked by PDB") + + statusErr, ok := err.(*apierrs.StatusError) + o.Expect(ok).To(o.BeTrue(), "expected StatusError on blocked eviction") + o.Expect(statusErr.Status().Code).To(o.Equal(int32(429)), "expected HTTP 429 Too Many Requests for second eviction") + + // PDB disruptionsAllowed must be 0 + currentPDB, err := kubeClient.PolicyV1().PodDisruptionBudgets(ns).Get(ctx, pdb.Name, metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred()) + o.Expect(currentPDB.Status.DisruptionsAllowed).To(o.Equal(int32(0)), "expected disruptionsAllowed=0 after second eviction attempt") + }) + + g.It("should block a reboot-required MachineConfig rollout on the remaining master[Serial] [apigroup:machineconfiguration.openshift.io]", func() { + ns := oc.Namespace() + mcoClient := machineconfigclient.NewForConfigOrDie(oc.AdminConfig()) + + masterNode, err := utils.GetReadyMasterNode(ctx, oc) + o.Expect(err).NotTo(o.HaveOccurred(), "failed to find a Ready master node") + + originalBootID := masterNode.Status.NodeInfo.BootID + originalUnschedulable := masterNode.Spec.Unschedulable + + // Capture current master MachineConfigPool state so we can assert it never progresses + masterMCP, err := mcoClient.MachineconfigurationV1().MachineConfigPools().Get(ctx, "master", metav1.GetOptions{}) + o.Expect(err).NotTo(o.HaveOccurred(), "failed to get master MachineConfigPool") + + originalConfigName := masterMCP.Status.Configuration.Name + + // Create a small reboot-required MachineConfig targeting master + ignFileContents := fmt.Sprintf(" reboot-block test namespace=%s", ns) + + testMC := newMasterRebootRequiredMachineConfig(rebootTestMCName, rebootTestMCFile, ignFileContents) + + g.By(fmt.Sprintf("creating reboot-required MachineConfig %q for master pool", rebootTestMCName)) + _, err = mcoClient.MachineconfigurationV1().MachineConfigs().Create(ctx, testMC, metav1.CreateOptions{}) + o.Expect(err).NotTo(o.HaveOccurred(), "failed to create test MachineConfig") + + // Cleanup + defer func() { + cleanupCtx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) + defer cancel() + + _ = mcoClient.MachineconfigurationV1().MachineConfigs().Delete( + cleanupCtx, + rebootTestMCName, + metav1.DeleteOptions{}, + ) + }() + + g.By("observing (node safety + MCP blockage)") + + observationWindow := 3 * time.Minute + + err = observeTNFDegradedWindow( + ctx, + kubeClient, + mcoClient, + masterNode.Name, + originalBootID, + originalUnschedulable, + originalConfigName, + observationWindow, + ) + + o.Expect(err).NotTo(o.HaveOccurred(), "behavior was not enforced correctly") + }) +}, +) + +// HELPERS +func createPauseDeployment( + ctx context.Context, + client kubernetes.Interface, + ns, name string, + replicas int32, + labels map[string]string, +) (*appsv1.Deployment, error) { + deploy := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: ns, + }, + Spec: appsv1.DeploymentSpec{ + Replicas: &replicas, + Selector: &metav1.LabelSelector{ + MatchLabels: labels, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: labels, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "busy-work", + Image: image.ShellImage(), + Command: []string{ + "/bin/bash", + "-c", + `while true; do echo "Busy working, cycling through the ones and zeros"; sleep 5; done`, + }, + }, + }, + }, + }, + }, + } + + return client.AppsV1().Deployments(ns).Create(ctx, deploy, metav1.CreateOptions{}) +} + +func createPDBMinAvailable( + ctx context.Context, + client kubernetes.Interface, + ns, name string, + labels map[string]string, + minAvailable int, +) (*policyv1.PodDisruptionBudget, error) { + pdb := &policyv1.PodDisruptionBudget{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: ns, + }, + Spec: policyv1.PodDisruptionBudgetSpec{ + MinAvailable: ptr.To(intstr.FromInt(minAvailable)), + Selector: &metav1.LabelSelector{ + MatchLabels: labels, + }, + }, + } + return client.PolicyV1().PodDisruptionBudgets(ns).Create(ctx, pdb, metav1.CreateOptions{}) +} + +func waitForPDBDisruptionsAllowed( + ctx context.Context, + client kubernetes.Interface, + namespace, name string, + expected int32, + timeout time.Duration, +) error { + interval := 2 * time.Second + + return wait.PollUntilContextTimeout(ctx, interval, timeout, true, func(ctx context.Context) (bool, error) { + pdb, err := client.PolicyV1().PodDisruptionBudgets(namespace).Get(ctx, name, metav1.GetOptions{}) + if err != nil { + return false, err + } + if pdb.Generation != pdb.Status.ObservedGeneration { + return false, nil + } + return pdb.Status.DisruptionsAllowed == expected, nil + }) +} + +func evictPod( + ctx context.Context, + client kubernetes.Interface, + pod *corev1.Pod, +) error { + eviction := &policyv1.Eviction{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "policy/v1", + Kind: "Eviction", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: pod.Name, + Namespace: pod.Namespace, + }, + } + return client.CoreV1().Pods(pod.Namespace).EvictV1(ctx, eviction) +} + +func newMasterRebootRequiredMachineConfig(name, path, contents string) *machineconfigv1.MachineConfig { + encoded := base64.StdEncoding.EncodeToString([]byte(contents)) + + ignJSON := fmt.Sprintf(`{ + "ignition": { "version": "3.2.0" }, + "storage": { + "files": [{ + "path": "%s", + "mode": 420, + "overwrite": true, + "contents": { + "source": "data:text/plain;base64,%s" + } + }] + } +}`, path, encoded) + + return &machineconfigv1.MachineConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Labels: map[string]string{ + "machineconfiguration.openshift.io/role": "master", + }, + }, + Spec: machineconfigv1.MachineConfigSpec{ + Config: k8sruntime.RawExtension{ + Raw: []byte(ignJSON), + }, + }, + } +} + +// We don't use PollUntilContextTimeout here because it treats timeout as an error. +// we implement our own loop where only real reboot/drain/API/MCP errors fail the test. +func observeTNFDegradedWindow( + ctx context.Context, + kubeClient kubernetes.Interface, + mcoClient machineconfigclient.Interface, + nodeName, originalBootID string, + originalUnschedulable bool, + originalConfigName string, + duration time.Duration, +) error { + interval := 10 * time.Second + deadline := time.Now().Add(duration) + + for { + select { + case <-ctx.Done(): + return fmt.Errorf("context cancelled during TNF degraded observation: %w", ctx.Err()) + default: + } + + if time.Now().After(deadline) { + return nil // SUCCESS: node safe + MCP blocked + } + + // NODE SAFETY CHECKS + node, err := kubeClient.CoreV1().Nodes().Get(context.Background(), nodeName, metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("failed to get node %q during observation: %w", nodeName, err) + } + + if node.Status.NodeInfo.BootID != originalBootID { + return fmt.Errorf("node %q reboot detected (BootID changed)", nodeName) + } + + if node.Spec.Unschedulable && !originalUnschedulable { + return fmt.Errorf("node %q became unschedulable (drain detected)", nodeName) + } + + // MCP BLOCKAGE CHECK + mcp, err := mcoClient.MachineconfigurationV1(). + MachineConfigPools(). + Get(context.Background(), "master", metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("failed to get master MCP during observation: %w", err) + } + + cfg := mcp.Status.Configuration.Name + if cfg != "" && cfg != originalConfigName { + return fmt.Errorf("master MCP progressed to configuration %q (expected %q while degraded)", cfg, originalConfigName) + } + time.Sleep(interval) + } +} diff --git a/test/extended/two_node/utils/common.go b/test/extended/two_node/utils/common.go index 533c54db85bb..c2b617e3ac61 100644 --- a/test/extended/two_node/utils/common.go +++ b/test/extended/two_node/utils/common.go @@ -8,12 +8,16 @@ import ( "strings" "time" + g "github.com/onsi/ginkgo/v2" + o "github.com/onsi/gomega" v1 "github.com/openshift/api/config/v1" exutil "github.com/openshift/origin/test/extended/util" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/util/yaml" + "k8s.io/client-go/kubernetes" "k8s.io/klog/v2" nodehelper "k8s.io/kubernetes/test/e2e/framework/node" e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" @@ -248,3 +252,84 @@ func MonitorClusterOperators(oc *exutil.CLI, timeout time.Duration, pollInterval time.Sleep(pollInterval) } } + +// EnsureTNFDegradedOrSkip skips the test if the cluster is not in TNF degraded mode +// (DualReplica topology with exactly one Ready control-plane node). +func EnsureTNFDegradedOrSkip(oc *exutil.CLI) { + SkipIfNotTopology(oc, v1.DualReplicaTopologyMode) + + ctx := context.Background() + kubeClient := oc.AdminKubeClient() + + masters, err := ListControlPlaneNodes(ctx, kubeClient) + o.Expect(err).NotTo(o.HaveOccurred(), "failed to list control-plane nodes") + + if len(masters) != 2 { + g.Skip(fmt.Sprintf( + "TNF degraded tests expect exactly 2 control-plane nodes, found %d", + len(masters), + )) + } + + readyCount := CountReadyNodes(masters) + if readyCount != 1 { + g.Skip(fmt.Sprintf( + "cluster is not in TNF degraded mode (expected exactly 1 Ready master, got %d)", + readyCount, + )) + } +} + +// ListControlPlaneNodes returns all nodes labeled as control-plane or master. +func ListControlPlaneNodes(ctx context.Context, client kubernetes.Interface) ([]corev1.Node, error) { + nodes, err := client.CoreV1().Nodes().List(ctx, metav1.ListOptions{ + LabelSelector: "node-role.kubernetes.io/master", + }) + if err != nil { + return nil, err + } + if len(nodes.Items) > 0 { + return nodes.Items, nil + } + + nodes, err = client.CoreV1().Nodes().List(ctx, metav1.ListOptions{ + LabelSelector: "node-role.kubernetes.io/control-plane", + }) + if err != nil { + return nil, err + } + return nodes.Items, nil +} + +// CountReadyNodes returns the number of nodes in Ready state. +func CountReadyNodes(nodes []corev1.Node) int { + ready := 0 + for _, n := range nodes { + for _, cond := range n.Status.Conditions { + if cond.Type == corev1.NodeReady && cond.Status == corev1.ConditionTrue { + ready++ + break + } + } + } + return ready +} + +// GetReadyMasterNode returns the first Ready control-plane node. +func GetReadyMasterNode( + ctx context.Context, + oc *exutil.CLI, +) (*corev1.Node, error) { + nodes, err := ListControlPlaneNodes(ctx, oc.AdminKubeClient()) + if err != nil { + return nil, err + } + for i := range nodes { + node := &nodes[i] + if IsNodeReady(oc, node.Name) { + return node, nil + } + } + + return nil, fmt.Errorf("no Ready master node found") +}