Skip to content

Commit b0adf57

Browse files
authored
[active-standby] Fix extra toggle observed in config reload (sonic-net#216)
Approach What is the motivation for this PR? Fix the issue that an extra mux toggle is observed when the standby ToR does a config reload. Two reasons: 1. extra toggle due to link prober init to unknown when the mux is wait The state transitions are: (unknown, standby, down) -- activate state machine --> (standby, standby, down) --> link prober unknown --> (unknown, standby, down) --> enter mux wait and probe mux --> (unknown, wait, down) --> port up --> (unknown, wait, up) --> mux probe standby --> (unknown, standby, up) --> toggle to active 2. extra toggle due to the port up event arriving within the link prober unknown detection period Signed-off-by: Longxiang Lyu [email protected] Work item tracking Microsoft ADO (number only): 24794476 How did you do it? when the port up and init the link prober state, init the link prober state to wait when the mux state is wait reset the link prober state when port up to refresh the detection. How did you verify/test it? UT
1 parent aa902a3 commit b0adf57

6 files changed

+165
-45
lines changed

src/link_manager/LinkManagerStateMachineActiveStandby.cpp

+16-10
Original file line numberDiff line numberDiff line change
@@ -237,13 +237,19 @@ void ActiveStandbyStateMachine::setLabel(Label label) {
237237
};
238238

239239
//
240-
// ---> enterLinkProberState(CompositeState &nextState, link_prober::LinkProberState::Label label);
240+
// ---> enterLinkProberState(CompositeState &nextState, link_prober::LinkProberState::Label label, bool forceReset);
241241
//
242242
// force LinkProberState to switch state
243243
//
244-
void ActiveStandbyStateMachine::enterLinkProberState(CompositeState &nextState, link_prober::LinkProberState::Label label)
244+
void ActiveStandbyStateMachine::enterLinkProberState(CompositeState &nextState, link_prober::LinkProberState::Label label, bool forceReset)
245245
{
246246
mLinkProberStateMachinePtr->enterState(label);
247+
248+
if (forceReset && ps(nextState) == label) {
249+
// only need to reset the link prober state if the state remains the same
250+
mLinkProberStateMachinePtr->resetCurrentState();
251+
}
252+
247253
ps(nextState) = label;
248254

249255
// link prober entering wait indicating switchover is initiated, but a switchover can be skipped if mode == manual.
@@ -550,7 +556,7 @@ void ActiveStandbyStateMachine::handleStateChange(LinkStateEvent &event, link_st
550556
// start fresh when the link transition from Down to UP state
551557
// and so the link prober will initially match the MUX state
552558
// There is a problem with this approach as it will hide link flaps that result in lost heart-beats.
553-
initLinkProberState(nextState);
559+
initLinkProberState(nextState, true);
554560
// enterMuxWaitState(nextState);
555561
} else if (ls(mCompositeState) == link_state::LinkState::Up &&
556562
ls(nextState) == link_state::LinkState::Down &&
@@ -1032,27 +1038,27 @@ void ActiveStandbyStateMachine::handleMuxWaitTimeout(boost::system::error_code e
10321038
}
10331039

10341040
//
1035-
// ---> initLinkProberState(CompositeState &compositeState);
1041+
// ---> initLinkProberState(CompositeState &compositeState, bool forceReset);
10361042
//
10371043
// initialize LinkProberState when configuring the composite state machine
10381044
//
1039-
void ActiveStandbyStateMachine::initLinkProberState(CompositeState &compositeState)
1045+
void ActiveStandbyStateMachine::initLinkProberState(CompositeState &compositeState, bool forceReset)
10401046
{
10411047
switch (ms(compositeState)) {
10421048
case mux_state::MuxState::Label::Active:
1043-
enterLinkProberState(compositeState, link_prober::LinkProberState::Label::Active);
1049+
enterLinkProberState(compositeState, link_prober::LinkProberState::Label::Active, forceReset);
10441050
break;
10451051
case mux_state::MuxState::Label::Standby:
1046-
enterLinkProberState(compositeState, link_prober::LinkProberState::Label::Standby);
1052+
enterLinkProberState(compositeState, link_prober::LinkProberState::Label::Standby, forceReset);
10471053
break;
10481054
case mux_state::MuxState::Label::Unknown:
1049-
enterLinkProberState(compositeState, link_prober::LinkProberState::Label::Unknown);
1055+
enterLinkProberState(compositeState, link_prober::LinkProberState::Label::Unknown, forceReset);
10501056
break;
10511057
case mux_state::MuxState::Label::Error:
1052-
enterLinkProberState(compositeState, link_prober::LinkProberState::Label::Wait);
1058+
enterLinkProberState(compositeState, link_prober::LinkProberState::Label::Wait, forceReset);
10531059
break;
10541060
case mux_state::MuxState::Label::Wait:
1055-
enterLinkProberState(compositeState, link_prober::LinkProberState::Label::Unknown);
1061+
enterLinkProberState(compositeState, link_prober::LinkProberState::Label::Wait, forceReset);
10561062
break;
10571063
default:
10581064
break;

src/link_manager/LinkManagerStateMachineActiveStandby.h

+6-2
Original file line numberDiff line numberDiff line change
@@ -143,9 +143,11 @@ class ActiveStandbyStateMachine: public LinkManagerStateMachineBase,
143143
* entry will be changed to align with state label provided
144144
*@param label (in) state to switch to
145145
*
146+
*@param forceReset (in) force reset the aligned link prober state
147+
*
146148
*@return none
147149
*/
148-
inline void enterLinkProberState(CompositeState &nextState, link_prober::LinkProberState::Label label);
150+
inline void enterLinkProberState(CompositeState &nextState, link_prober::LinkProberState::Label label, bool forceReset=false);
149151

150152
/**
151153
*@method enterMuxState
@@ -460,9 +462,11 @@ class ActiveStandbyStateMachine: public LinkManagerStateMachineBase,
460462
*@param compositeState (in, out) reference to composite state, the state linkProber
461463
* entry will be changed to align with MuxState
462464
*
465+
*@param forceReset (in) force reset the aligned link prober state
466+
*
463467
*@return none
464468
*/
465-
void initLinkProberState(CompositeState &compositeState);
469+
void initLinkProberState(CompositeState &compositeState, bool forceReset=false);
466470

467471
/**
468472
*@method LinkProberStandbyMuxActiveLinkUpTransitionFunction

src/link_prober/LinkProberStateMachineBase.cpp

+11
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,17 @@ LinkProberStateMachineBase::LinkProberStateMachineBase(
5454
{
5555
}
5656

57+
//
58+
// ---> LinkProberStateMachineBase::resetCurrentState();
59+
//
60+
// reset current link prober state
61+
//
62+
void LinkProberStateMachineBase::resetCurrentState()
63+
{
64+
LinkProberState *currentLinkProberState = dynamic_cast<LinkProberState *> (getCurrentState());
65+
currentLinkProberState->resetState();
66+
}
67+
5768
//
5869
// ---> LinkProberStateMachineBase::postLinkProberStateEvent(E &e);
5970
//

src/link_prober/LinkProberStateMachineBase.h

+8
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,14 @@ class LinkProberStateMachineBase : public common::StateMachine
399399
*/
400400
PeerWaitState* getPeerWaitState() {return &mPeerWaitState;};
401401

402+
public:
403+
/**
404+
*@method resetCurrentState
405+
*
406+
*@brief reset current link prober state
407+
*/
408+
void resetCurrentState();
409+
402410
public:
403411
/**
404412
*@method getIcmpSelfEvent

test/LinkManagerStateMachineTest.cpp

+123-32
Original file line numberDiff line numberDiff line change
@@ -68,35 +68,42 @@ void LinkManagerStateMachineTest::runIoService(uint32_t count)
6868
}
6969
}
7070

71-
void LinkManagerStateMachineTest::postLinkProberEvent(link_prober::LinkProberState::Label label, uint32_t count)
71+
void LinkManagerStateMachineTest::postLinkProberEvent(link_prober::LinkProberState::Label label, uint32_t count, uint32_t detect_multiplier)
7272
{
7373
switch (label) {
74-
case link_prober::LinkProberState::Active:
75-
for (uint8_t i = 0; i < mMuxConfig.getPositiveStateChangeRetryCount(); i++) {
76-
mFakeMuxPort.mFakeLinkProber->postLinkProberEvent<link_prober::IcmpSelfEvent&>(
77-
link_prober::LinkProberStateMachineBase::getIcmpSelfEvent()
78-
);
79-
runIoService(count);
74+
case link_prober::LinkProberState::Active: {
75+
detect_multiplier = (detect_multiplier == 0 ? mMuxConfig.getPositiveStateChangeRetryCount() : detect_multiplier);
76+
for (uint8_t i = 0; i < detect_multiplier; i++) {
77+
mFakeMuxPort.mFakeLinkProber->postLinkProberEvent<link_prober::IcmpSelfEvent&>(
78+
link_prober::LinkProberStateMachineBase::getIcmpSelfEvent()
79+
);
80+
runIoService(count);
81+
}
82+
break;
83+
}
84+
case link_prober::LinkProberState::Standby: {
85+
detect_multiplier = (detect_multiplier == 0 ? mMuxConfig.getPositiveStateChangeRetryCount() : detect_multiplier);
86+
for (uint8_t i = 0; i < detect_multiplier; i++) {
87+
mFakeMuxPort.mFakeLinkProber->postLinkProberEvent<link_prober::IcmpPeerEvent&>(
88+
link_prober::LinkProberStateMachineBase::getIcmpPeerEvent()
89+
);
90+
runIoService(count);
91+
}
92+
break;
8093
}
81-
break;
82-
case link_prober::LinkProberState::Standby:
83-
for (uint8_t i = 0; i < mMuxConfig.getPositiveStateChangeRetryCount(); i++) {
84-
mFakeMuxPort.mFakeLinkProber->postLinkProberEvent<link_prober::IcmpPeerEvent&>(
85-
link_prober::LinkProberStateMachineBase::getIcmpPeerEvent()
86-
);
87-
runIoService(count);
94+
case link_prober::LinkProberState::Unknown: {
95+
detect_multiplier = (detect_multiplier == 0 ? mMuxConfig.getNegativeStateChangeRetryCount() : detect_multiplier);
96+
for (uint8_t i = 0; i < detect_multiplier; i++) {
97+
mFakeMuxPort.mFakeLinkProber->postLinkProberEvent<link_prober::IcmpUnknownEvent&>(
98+
link_prober::LinkProberStateMachineBase::getIcmpUnknownEvent()
99+
);
100+
runIoService(count);
101+
}
102+
break;
88103
}
89-
break;
90-
case link_prober::LinkProberState::Unknown:
91-
for (uint8_t i = 0; i < mMuxConfig.getNegativeStateChangeRetryCount(); i++) {
92-
mFakeMuxPort.mFakeLinkProber->postLinkProberEvent<link_prober::IcmpUnknownEvent&>(
93-
link_prober::LinkProberStateMachineBase::getIcmpUnknownEvent()
94-
);
95-
runIoService(count);
104+
default: {
105+
break;
96106
}
97-
break;
98-
default:
99-
break;
100107
}
101108
}
102109

@@ -196,7 +203,7 @@ void LinkManagerStateMachineTest::setMuxActive()
196203
VALIDATE_STATE(Unknown, Wait, Down);
197204

198205
postLinkEvent(link_state::LinkState::Up);
199-
VALIDATE_STATE(Unknown, Wait, Up);
206+
VALIDATE_STATE(Wait, Wait, Up);
200207

201208
// change state to active
202209
postLinkProberEvent(link_prober::LinkProberState::Active);
@@ -220,7 +227,7 @@ void LinkManagerStateMachineTest::setMuxStandby()
220227
VALIDATE_STATE(Unknown, Wait, Down);
221228

222229
postLinkEvent(link_state::LinkState::Up);
223-
VALIDATE_STATE(Unknown, Wait, Up);
230+
VALIDATE_STATE(Wait, Wait, Up);
224231

225232
// change state to active
226233
postLinkProberEvent(link_prober::LinkProberState::Standby);
@@ -543,12 +550,14 @@ TEST_F(LinkManagerStateMachineTest, MuxActiveLinkDown)
543550
{
544551
setMuxActive();
545552

553+
int postLinkProberMetricsInvokeCountBefore = mDbInterfacePtr->mPostLinkProberMetricsInvokeCount;
554+
546555
EXPECT_EQ(mDbInterfacePtr->mSetMuxStateInvokeCount, 0);
547556
handleLinkState("down", 3);
548557

549558
VALIDATE_STATE(Active, Wait, Down);
550559
EXPECT_EQ(mDbInterfacePtr->mSetMuxStateInvokeCount, 1);
551-
EXPECT_EQ(mDbInterfacePtr->mPostLinkProberMetricsInvokeCount, 1);
560+
EXPECT_EQ(mDbInterfacePtr->mPostLinkProberMetricsInvokeCount, postLinkProberMetricsInvokeCountBefore);
552561
EXPECT_EQ(mDbInterfacePtr->mLastPostedSwitchCause, link_manager::ActiveStandbyStateMachine::SwitchCause::LinkDown);
553562

554563
// swss notification
@@ -1228,14 +1237,19 @@ TEST_F(LinkManagerStateMachineTest, MuxActivePeerLinkStateUp)
12281237
TEST_F(LinkManagerStateMachineTest, PostPckLossMetricsEvent)
12291238
{
12301239
setMuxStandby();
1231-
1232-
EXPECT_EQ(mDbInterfacePtr->mPostLinkProberMetricsInvokeCount, 1); // post link_prober_standby_start
1240+
1241+
int postLinkProberMetricsInvokeCountBefore = mDbInterfacePtr->mPostLinkProberMetricsInvokeCount;
1242+
1243+
// post link_prober_standby_start
1244+
EXPECT_EQ(mDbInterfacePtr->mPostLinkProberMetricsInvokeCount, postLinkProberMetricsInvokeCountBefore);
12331245
postLinkProberEvent(link_prober::LinkProberState::Unknown, 3);
12341246

1235-
EXPECT_EQ(mDbInterfacePtr->mPostLinkProberMetricsInvokeCount, 3); // post link_prober_unknown_start, link_prober_wait_start
1247+
// post link_prober_unknown_start, link_prober_wait_start
1248+
EXPECT_EQ(mDbInterfacePtr->mPostLinkProberMetricsInvokeCount, postLinkProberMetricsInvokeCountBefore + 2);
12361249
postLinkProberEvent(link_prober::LinkProberState::Active, 3);
1237-
1238-
EXPECT_EQ(mDbInterfacePtr->mPostLinkProberMetricsInvokeCount, 4); // post link_prober_unknown_start, post link_prober_active_start
1250+
1251+
// post link_prober_unknown_start, post link_prober_active_start
1252+
EXPECT_EQ(mDbInterfacePtr->mPostLinkProberMetricsInvokeCount, postLinkProberMetricsInvokeCountBefore + 3);
12391253
}
12401254

12411255
TEST_F(LinkManagerStateMachineTest, PostPckLossUpdateAndResetEvent)
@@ -1345,4 +1359,81 @@ TEST_F(LinkManagerStateMachineTest, MuxStandbyLinkProberUnknownDefaultRouteNA)
13451359
VALIDATE_STATE(Wait, Wait, Up);
13461360
}
13471361

1362+
TEST_F(LinkManagerStateMachineTest, MuxStandbyLinkProberStandbyLinkDownMuxWaitLinkUp)
1363+
{
1364+
setMuxStandby();
1365+
EXPECT_EQ(mDbInterfacePtr->mProbeMuxStateInvokeCount, 0);
1366+
EXPECT_EQ(mDbInterfacePtr->mSetMuxStateInvokeCount, 0);
1367+
VALIDATE_STATE(Standby, Standby, Up);
1368+
1369+
handleLinkState("down", 3);
1370+
EXPECT_EQ(mDbInterfacePtr->mProbeMuxStateInvokeCount, 0);
1371+
EXPECT_EQ(mDbInterfacePtr->mSetMuxStateInvokeCount, 0);
1372+
VALIDATE_STATE(Standby, Standby, Down);
1373+
1374+
postLinkProberEvent(link_prober::LinkProberState::Unknown, 2);
1375+
EXPECT_EQ(mDbInterfacePtr->mProbeMuxStateInvokeCount, 0);
1376+
EXPECT_EQ(mDbInterfacePtr->mSetMuxStateInvokeCount, 0);
1377+
VALIDATE_STATE(Unknown, Standby, Down);
1378+
1379+
runIoService(1);
1380+
EXPECT_EQ(mDbInterfacePtr->mProbeMuxStateInvokeCount, 1);
1381+
EXPECT_EQ(mDbInterfacePtr->mSetMuxStateInvokeCount, 0);
1382+
VALIDATE_STATE(Unknown, Wait, Down);
1383+
1384+
handleLinkState("up");
1385+
EXPECT_EQ(mDbInterfacePtr->mProbeMuxStateInvokeCount, 1);
1386+
EXPECT_EQ(mDbInterfacePtr->mSetMuxStateInvokeCount, 0);
1387+
VALIDATE_STATE(Wait, Wait, Up);
1388+
1389+
handleProbeMuxState("standby", 3);
1390+
EXPECT_EQ(mDbInterfacePtr->mProbeMuxStateInvokeCount, 1);
1391+
EXPECT_EQ(mDbInterfacePtr->mSetMuxStateInvokeCount, 0);
1392+
VALIDATE_STATE(Wait, Standby, Up);
1393+
1394+
runIoService(1);
1395+
EXPECT_EQ(mDbInterfacePtr->mProbeMuxStateInvokeCount, 2);
1396+
EXPECT_EQ(mDbInterfacePtr->mSetMuxStateInvokeCount, 0);
1397+
VALIDATE_STATE(Wait, Wait, Up);
1398+
1399+
postLinkProberEvent(link_prober::LinkProberState::Standby);
1400+
EXPECT_EQ(mDbInterfacePtr->mProbeMuxStateInvokeCount, 2);
1401+
EXPECT_EQ(mDbInterfacePtr->mSetMuxStateInvokeCount, 0);
1402+
VALIDATE_STATE(Standby, Wait, Up);
1403+
1404+
handleProbeMuxState("standby", 3);
1405+
EXPECT_EQ(mDbInterfacePtr->mProbeMuxStateInvokeCount, 2);
1406+
EXPECT_EQ(mDbInterfacePtr->mSetMuxStateInvokeCount, 0);
1407+
VALIDATE_STATE(Standby, Standby, Up);
1408+
}
1409+
1410+
TEST_F(LinkManagerStateMachineTest, MuxStandbyLinkProberStandbyLinkDownLinkUpResetLinkProberState)
1411+
{
1412+
setMuxStandby();
1413+
EXPECT_EQ(mDbInterfacePtr->mProbeMuxStateInvokeCount, 0);
1414+
EXPECT_EQ(mDbInterfacePtr->mSetMuxStateInvokeCount, 0);
1415+
VALIDATE_STATE(Standby, Standby, Up);
1416+
1417+
handleLinkState("down", 3);
1418+
EXPECT_EQ(mDbInterfacePtr->mProbeMuxStateInvokeCount, 0);
1419+
EXPECT_EQ(mDbInterfacePtr->mSetMuxStateInvokeCount, 0);
1420+
VALIDATE_STATE(Standby, Standby, Down);
1421+
1422+
postLinkProberEvent(link_prober::LinkProberState::Unknown, 2, 2);
1423+
EXPECT_EQ(mDbInterfacePtr->mProbeMuxStateInvokeCount, 0);
1424+
EXPECT_EQ(mDbInterfacePtr->mSetMuxStateInvokeCount, 0);
1425+
VALIDATE_STATE(Standby, Standby, Down);
1426+
1427+
handleLinkState("up");
1428+
EXPECT_EQ(mDbInterfacePtr->mProbeMuxStateInvokeCount, 0);
1429+
EXPECT_EQ(mDbInterfacePtr->mSetMuxStateInvokeCount, 0);
1430+
VALIDATE_STATE(Standby, Standby, Up);
1431+
1432+
postLinkProberEvent(link_prober::LinkProberState::Unknown, 2, 1);
1433+
EXPECT_EQ(mDbInterfacePtr->mProbeMuxStateInvokeCount, 0);
1434+
EXPECT_EQ(mDbInterfacePtr->mSetMuxStateInvokeCount, 0);
1435+
VALIDATE_STATE(Standby, Standby, Up);
1436+
}
1437+
1438+
13481439
} /* namespace test */

test/LinkManagerStateMachineTest.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ class LinkManagerStateMachineTest: public ::testing::Test
3939
virtual ~LinkManagerStateMachineTest() = default;
4040

4141
void runIoService(uint32_t count = 0);
42-
void postLinkProberEvent(link_prober::LinkProberState::Label label, uint32_t count = 0);
42+
void postLinkProberEvent(link_prober::LinkProberState::Label label, uint32_t count = 0, uint32_t detect_multiplier = 0);
4343
void postMuxEvent(mux_state::MuxState::Label label, uint32_t count = 0);
4444
void postLinkEvent(link_state::LinkState::Label label, uint32_t count = 0);
4545
void postSuspendTimerExpiredEvent(uint32_t count = 0);

0 commit comments

Comments
 (0)