From 954e9cb8f39554c2770c030c796c39feb667c194 Mon Sep 17 00:00:00 2001 From: Wataru Ishida Date: Thu, 3 Oct 2024 02:19:26 +0000 Subject: [PATCH] fix(xcvrd): fix PORT_DEL handling due to xcvr plug-out Signed-off-by: Wataru Ishida --- sonic-xcvrd/tests/test_xcvrd.py | 14 ++++++++++- sonic-xcvrd/xcvrd/xcvrd.py | 44 ++++++++++++++++++++++++++++----- 2 files changed, 51 insertions(+), 7 deletions(-) diff --git a/sonic-xcvrd/tests/test_xcvrd.py b/sonic-xcvrd/tests/test_xcvrd.py index 49fe9a37d..55cb46f54 100644 --- a/sonic-xcvrd/tests/test_xcvrd.py +++ b/sonic-xcvrd/tests/test_xcvrd.py @@ -303,7 +303,7 @@ def test_CmisManagerTask_get_xcvr_api_exception(self, mock_platform_chassis, moc task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True]) task.on_port_update_event(port_change_event) task.task_worker() - assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.port_mapping.get_asic_id_for_logical_port('Ethernet0'))) == CMIS_STATE_READY + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_READY # Case 3: get_cmis_application_desired() raises an exception mock_xcvr_api.is_flat_memory = MagicMock(return_value=False) @@ -1662,6 +1662,18 @@ def test_CmisManagerTask_handle_port_change_event(self): task.on_port_update_event(port_change_event) assert len(task.port_dict) == 1 + # STATE_DB DEL event doesn't remove port from port_dict + # this happens when transceiver is plugged-out or DPB is used + port_change_event = PortChangeEvent('Ethernet0', 1, 0, PortChangeEvent.PORT_DEL, {}, db_name='STATE_DB') + task.on_port_update_event(port_change_event) + assert len(task.port_dict) == 1 + + # CONFIG_DB DEL event removes port from port_dict + # this happens when DPB is used + port_change_event = PortChangeEvent('Ethernet0', 1, 0, PortChangeEvent.PORT_DEL, {}, db_name='CONFIG_DB', table_name='PORT') + task.on_port_update_event(port_change_event) + assert len(task.port_dict) == 0 + @patch('xcvrd.xcvrd.XcvrTableHelper') def test_CmisManagerTask_get_configured_freq(self, mock_table_helper): port_mapping = PortMapping() diff --git a/sonic-xcvrd/xcvrd/xcvrd.py b/sonic-xcvrd/xcvrd/xcvrd.py index bec23e84f..6e235779f 100644 --- a/sonic-xcvrd/xcvrd/xcvrd.py +++ b/sonic-xcvrd/xcvrd/xcvrd.py @@ -897,12 +897,44 @@ def on_port_update_event(self, port_change_event): self.force_cmis_reinit(lport, 0) - # PORT_DEL event for the same lport happens 3 times because - # we are subscribing to CONFIG_DB, STATE_DB|TRANSCEIVER_INFO, and STATE_DB|PORT_TABLE. - # To only handle the first one, check if the lport exists in the port_dict. - elif lport in self.port_dict: - self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_REMOVED) - self.port_dict.pop(lport) + elif port_change_event.event_type == port_change_event.PORT_DEL: + # In handling the DEL event, the following two scenarios must be considered: + # 1. PORT_DEL event due to transceiver plug-out + # 2. PORT_DEL event due to Dynamic Port Breakout (DPB) + # + # Scenario 1 is simple, as only a STATE_DB|TRANSCEIVER_INFO PORT_DEL event occurs, + # so we just need to set SW_CMIS_STATE to CMIS_STATE_REMOVED. + # + # Scenario 2 is a bit more complex. First, for the port(s) before DPB, a CONFIG_DB|PORT PORT_DEL + # and a STATE_DB|PORT_TABLE PORT_DEL event occur. Next, for the port(s) after DPB, + # a CONFIG_DB|PORT PORT_SET and a STATE_DB|PORT_TABLE PORT_SET event occur. + # After that (after a short delay), a STATE_DB|TRANSCEIVER_INFO PORT_DEL event + # occurs for the port(s) before DPB, and finally, a STATE_DB|TRANSCEIVER_INFO + # PORT_SET event occurs for the port(s) after DPB. + # + # Below is the event sequence when configuring Ethernet0 from "2x200G" to "1x400G" + # (based on actual logs). + # + # 1. SET Ethernet0 CONFIG_DB|PORT + # 2. DEL Ethernet2 CONFIG_DB|PORT + # 3. DEL Ethernet0 CONFIG_DB|PORT + # 4. DEL Ethernet0 STATE_DB|PORT_TABLE + # 5. DEL Ethernet2 STATE_DB|PORT_TABLE + # 6. SET Ethernet0 CONFIG_DB|PORT + # 7. SET Ethernet0 STATE_DB|PORT_TABLE + # 8. SET Ethernet0 STATE_DB|PORT_TABLE + # 9. DEL Ethernet2 STATE_DB|TRANSCEIVER_INFO + # 10. DEL Ethernet0 STATE_DB|TRANSCEIVER_INFO + # 11. SET Ethernet0 STATE_DB|TRANSCEIVER_INFO + # + # To handle both scenarios, if the lport exists in port_dict for any DEL EVENT, + # set SW_CMIS_STATE to REMOVED. Additionally, for DEL EVENTS from CONFIG_DB due to DPB, + # remove the lport from port_dict. + if lport in self.port_dict: + self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_REMOVED) + + if port_change_event.db_name == 'CONFIG_DB' and port_change_event.table_name == 'PORT': + self.port_dict.pop(lport) def get_cmis_dp_init_duration_secs(self, api): return api.get_datapath_init_duration()/1000