@@ -173,7 +173,6 @@ var _ = g.Describe("[sig-etcd][apigroup:config.openshift.io][OCPFeatureGate:Dual
173173 g .It ("should recover from etcd process crash" , func () {
174174 // Note: This test kills the etcd process/container on one node to simulate
175175 // a process crash, testing Pacemaker's ability to detect and restart etcd
176- survivedNode := peerNode
177176 g .GinkgoT ().Printf ("Randomly selected %s (%s) for etcd process crash and %s (%s) to survive\n " ,
178177 targetNode .Name , targetNode .Status .Addresses [0 ].Address , peerNode .Name , peerNode .Status .Addresses [0 ].Address )
179178
@@ -183,25 +182,13 @@ var _ = g.Describe("[sig-etcd][apigroup:config.openshift.io][OCPFeatureGate:Dual
183182 "bash" , "-c" , "podman kill etcd 2>/dev/null || pkill -9 etcd 2>/dev/null || systemctl stop etcd 2>/dev/null || true" )
184183 o .Expect (err ).To (o .BeNil (), "Expected to kill etcd process without command errors" )
185184
186- g .By ("Waiting for Pacemaker to detect etcd failure and begin recovery " )
187- // Give Pacemaker time to detect the failure and start recovery
188- time .Sleep (30 * time .Second )
185+ g .By ("Waiting for cluster to recover - both nodes become started voting members " )
186+ // Wait for Pacemaker to detect failure, restart etcd, and complete full recovery
187+ time .Sleep (5 * time .Minute )
189188
190- g .By (fmt .Sprintf ("Ensuring %s becomes leader and %s rejoins as learner" , peerNode .Name , targetNode .Name ))
191- validateEtcdRecoveryState (etcdClientFactory ,
192- & survivedNode , true , false , // survivedNode expected started == true, learner == false
193- & targetNode , false , true , // targetNode expected started == false, learner == true
194- memberIsLeaderTimeout , pollInterval )
195-
196- g .By (fmt .Sprintf ("Ensuring %s rejoins as learner" , targetNode .Name ))
189+ g .By ("Ensuring both nodes are started and voting members after recovery" )
197190 validateEtcdRecoveryState (etcdClientFactory ,
198- & survivedNode , true , false , // survivedNode expected started == true, learner == false
199- & targetNode , true , true , // targetNode expected started == true, learner == true
200- memberRejoinedLearnerTimeout , pollInterval )
201-
202- g .By (fmt .Sprintf ("Ensuring %s is promoted back to voting member" , targetNode .Name ))
203- validateEtcdRecoveryState (etcdClientFactory ,
204- & survivedNode , true , false , // survivedNode expected started == true, learner == false
191+ & peerNode , true , false , // peerNode expected started == true, learner == false
205192 & targetNode , true , false , // targetNode expected started == true, learner == false
206193 memberPromotedVotingTimeout , pollInterval )
207194
0 commit comments