From 5dbf2ccddd1291b80770d5592e30a0a35efe265e Mon Sep 17 00:00:00 2001 From: chiourung_huang Date: Wed, 30 Aug 2023 05:46:50 +0000 Subject: [PATCH 1/4] Syslog for transceiver high/low temperature alarm Add syslog for high/low temperature alarm/warning(compare the temperature with threshold) Signed-off-by: chiourung_huang --- sonic-xcvrd/tests/test_xcvrd.py | 42 +++++++++++++++++++++++ sonic-xcvrd/xcvrd/xcvrd.py | 61 +++++++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+) diff --git a/sonic-xcvrd/tests/test_xcvrd.py b/sonic-xcvrd/tests/test_xcvrd.py index 568b19dd2..1a8a3c3eb 100644 --- a/sonic-xcvrd/tests/test_xcvrd.py +++ b/sonic-xcvrd/tests/test_xcvrd.py @@ -1892,6 +1892,7 @@ def test_DomInfoUpdateTask_task_worker(self, mock_post_pm_info, mock_update_stat task.task_stopping_event.wait = MagicMock(side_effect=[False, True]) task.get_dom_polling_from_config_db = MagicMock(return_value='enabled') task.is_port_in_cmis_terminal_state = MagicMock(return_value=False) + task.check_transceiver_temperature = MagicMock() mock_detect_error.return_value = True task.task_worker() assert task.port_mapping.logical_port_list.count('Ethernet0') @@ -1911,6 +1912,47 @@ def test_DomInfoUpdateTask_task_worker(self, mock_post_pm_info, mock_update_stat assert mock_update_status_hw.call_count == 1 assert mock_post_pm_info.call_count == 1 + @patch('xcvrd.xcvrd_utilities.port_mapping.PortMapping.logical_port_name_to_physical_port_list', MagicMock(return_value=[0])) + @patch('xcvrd.xcvrd._wrapper_get_presence', MagicMock(return_value=True)) + @pytest.mark.parametrize("dom_info_cache, dom_th_info, expected", [ + ({0: {'temperature': '75'}}, + (('temphighalarm', '80'), + ('templowalarm', '0'), + ('temphighwarning', '70'), + ('templowwarning', '0')), + 3), #TEMP_NORMAL = 0 + ({0: {'temperature': '85'}}, + (('temphighalarm', '80'), + ('templowalarm', '0'), + ('temphighwarning', '70'), + ('templowwarning', '10')), + 1), #TEMP_HIGH_ALARM = 1 + ({0: {'temperature': '5'}}, + (('temphighalarm', '80'), + ('templowalarm', '0'), + ('temphighwarning', '70'), + ('templowwarning', '10')), + 4), #TEMP_LOW_WARNING = 4 + ]) + def test_check_transceiver_temperature(self, dom_info_cache, dom_th_info, expected): + class MockTable: + data = {} + def set(self, key, fvs): + self.data[key] = fvs + + def get(self, key): + return self.data.get(key) + + port_mapping = PortMapping() + stop_event = threading.Event() + task = DomInfoUpdateTask(DEFAULT_NAMESPACE, port_mapping, stop_event) + logical_port_name = 'Ethernet0' + temperature_status = {} + dom_th_tbl = MockTable() + dom_th_tbl.get = MagicMock(return_value=(True, dom_th_info)) + task.check_transceiver_temperature(logical_port_name, dom_th_tbl, dom_info_cache, temperature_status) + assert temperature_status[0] == expected + @patch('xcvrd.xcvrd._wrapper_get_presence', MagicMock(return_value=False)) @patch('xcvrd.xcvrd.XcvrTableHelper') @patch('xcvrd.xcvrd.delete_port_from_status_table_hw') diff --git a/sonic-xcvrd/xcvrd/xcvrd.py b/sonic-xcvrd/xcvrd/xcvrd.py index aaecb27ed..baf8416dd 100644 --- a/sonic-xcvrd/xcvrd/xcvrd.py +++ b/sonic-xcvrd/xcvrd/xcvrd.py @@ -1683,6 +1683,7 @@ def task_worker(self): transceiver_status_cache = {} pm_info_cache = {} sel, asic_context = port_event_helper.subscribe_port_config_change(self.namespaces) + temperature_status = {} # Start loop to update dom info in DB periodically while not self.task_stopping_event.wait(DOM_INFO_UPDATE_PERIOD_SECS): @@ -1735,6 +1736,8 @@ def task_worker(self): helper_logger.log_warning("Got exception {} while processing pm info for port {}, ignored".format(repr(e), logical_port_name)) continue + self.check_transceiver_temperature(logical_port_name, self.xcvr_table_helper.get_dom_threshold_tbl(asic_index), dom_info_cache, temperature_status) + helper_logger.log_info("Stop DOM monitoring loop") def run(self): @@ -1758,6 +1761,64 @@ def join(self): if self.exc: raise self.exc + def check_transceiver_temperature(self, logical_port_name, th_table, dom_info_cache, temperature_status): + TEMP_NORMAL = 0 + TEMP_HIGH_ALARM = 1 + TEMP_LOW_ALARM = 2 + TEMP_HIGH_WARNING = 3 + TEMP_LOW_WARNING = 4 + + TEMP_ERROR_TO_DESCRIPTION_DICT = { + TEMP_NORMAL: "normal", + TEMP_HIGH_ALARM: "temperature high alarm", + TEMP_LOW_ALARM: "temperature low alarm", + TEMP_HIGH_WARNING: "temperature high warning", + TEMP_LOW_WARNING: "temperature low warning" + } + + for physical_port, physical_port_name in get_physical_port_name_dict(logical_port_name, self.port_mapping).items(): + ori_temp_status = temperature_status.get(physical_port) + if ori_temp_status is None: + ori_temp_status = TEMP_NORMAL + temperature_status[physical_port] = ori_temp_status + new_temp_status = TEMP_NORMAL + + dom_info_dict = dom_info_cache.get(physical_port) + presence, threshold = th_table.get(physical_port_name) + if presence: + dom_th_info_dict = dict(threshold) + else: + dom_th_info_dict = None + if dom_info_dict is not None and dom_th_info_dict is not None: + temperature = dom_info_dict.get("temperature") + temphighalarm = dom_th_info_dict.get("temphighalarm") + templowalarm = dom_th_info_dict.get("templowalarm") + temphighwarning = dom_th_info_dict.get("temphighwarning") + templowwarning = dom_th_info_dict.get("templowwarning") + if temperature != 'N/A' and temphighalarm != 'N/A' and templowalarm != 'N/A' and \ + temphighwarning != 'N/A' and templowwarning != 'N/A': + if float(temperature) > float(temphighalarm): + new_temp_status = TEMP_HIGH_ALARM + elif float(temperature) > float(temphighwarning): + new_temp_status = TEMP_HIGH_WARNING + elif float(temperature) < float(templowalarm): + new_temp_status = TEMP_LOW_ALARM + elif float(temperature) < float(templowwarning): + new_temp_status = TEMP_LOW_WARNING + else: + new_temp_status = TEMP_NORMAL + + if ori_temp_status != new_temp_status: + temperature_status[physical_port] = new_temp_status + helper_logger.log_notice("{}: temperature status change from {} to {}".format( + physical_port_name, + TEMP_ERROR_TO_DESCRIPTION_DICT[ori_temp_status], + TEMP_ERROR_TO_DESCRIPTION_DICT[new_temp_status])) + elif new_temp_status > 0: + helper_logger.log_notice("{}: {}".format(physical_port_name, TEMP_ERROR_TO_DESCRIPTION_DICT[new_temp_status])) + else: + temperature_status[physical_port] = TEMP_NORMAL + def on_port_config_change(self, port_change_event): if port_change_event.event_type == port_event_helper.PortChangeEvent.PORT_REMOVE: self.on_remove_logical_port(port_change_event) From efce5dfc8d82965c1eb02cf40bd15b2521a17402 Mon Sep 17 00:00:00 2001 From: chiourung_huang Date: Thu, 11 Apr 2024 09:18:28 +0000 Subject: [PATCH 2/4] fix test error --- sonic-xcvrd/tests/test_xcvrd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sonic-xcvrd/tests/test_xcvrd.py b/sonic-xcvrd/tests/test_xcvrd.py index 1a8a3c3eb..e2f8abca3 100644 --- a/sonic-xcvrd/tests/test_xcvrd.py +++ b/sonic-xcvrd/tests/test_xcvrd.py @@ -1912,7 +1912,7 @@ def test_DomInfoUpdateTask_task_worker(self, mock_post_pm_info, mock_update_stat assert mock_update_status_hw.call_count == 1 assert mock_post_pm_info.call_count == 1 - @patch('xcvrd.xcvrd_utilities.port_mapping.PortMapping.logical_port_name_to_physical_port_list', MagicMock(return_value=[0])) + @patch('xcvrd.xcvrd_utilities.port_event_helper.PortMapping.logical_port_name_to_physical_port_list', MagicMock(return_value=[0])) @patch('xcvrd.xcvrd._wrapper_get_presence', MagicMock(return_value=True)) @pytest.mark.parametrize("dom_info_cache, dom_th_info, expected", [ ({0: {'temperature': '75'}}, From 3723ea29fb197dca511345a27141699e8d51af19 Mon Sep 17 00:00:00 2001 From: chiourung_huang Date: Mon, 22 Apr 2024 01:11:10 +0000 Subject: [PATCH 3/4] Fix test error --- sonic-xcvrd/tests/test_xcvrd.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sonic-xcvrd/tests/test_xcvrd.py b/sonic-xcvrd/tests/test_xcvrd.py index e2f8abca3..c585f9af9 100644 --- a/sonic-xcvrd/tests/test_xcvrd.py +++ b/sonic-xcvrd/tests/test_xcvrd.py @@ -1945,7 +1945,8 @@ def get(self, key): port_mapping = PortMapping() stop_event = threading.Event() - task = DomInfoUpdateTask(DEFAULT_NAMESPACE, port_mapping, stop_event) + mock_cmis_manager = MagicMock() + task = DomInfoUpdateTask(DEFAULT_NAMESPACE, port_mapping, stop_event, mock_cmis_manager) logical_port_name = 'Ethernet0' temperature_status = {} dom_th_tbl = MockTable() From 96e6e19bd7821ae387ac2de113eb61bc50be69ce Mon Sep 17 00:00:00 2001 From: chiourung_huang Date: Mon, 1 Jul 2024 06:16:33 +0000 Subject: [PATCH 4/4] Code change after review --- sonic-xcvrd/xcvrd/xcvrd.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/sonic-xcvrd/xcvrd/xcvrd.py b/sonic-xcvrd/xcvrd/xcvrd.py index baf8416dd..a9e196ff2 100644 --- a/sonic-xcvrd/xcvrd/xcvrd.py +++ b/sonic-xcvrd/xcvrd/xcvrd.py @@ -1769,7 +1769,7 @@ def check_transceiver_temperature(self, logical_port_name, th_table, dom_info_ca TEMP_LOW_WARNING = 4 TEMP_ERROR_TO_DESCRIPTION_DICT = { - TEMP_NORMAL: "normal", + TEMP_NORMAL: "temperature normal", TEMP_HIGH_ALARM: "temperature high alarm", TEMP_LOW_ALARM: "temperature low alarm", TEMP_HIGH_WARNING: "temperature high warning", @@ -1777,10 +1777,10 @@ def check_transceiver_temperature(self, logical_port_name, th_table, dom_info_ca } for physical_port, physical_port_name in get_physical_port_name_dict(logical_port_name, self.port_mapping).items(): - ori_temp_status = temperature_status.get(physical_port) - if ori_temp_status is None: - ori_temp_status = TEMP_NORMAL - temperature_status[physical_port] = ori_temp_status + orig_temp_status = temperature_status.get(physical_port) + if orig_temp_status is None: + orig_temp_status = TEMP_NORMAL + temperature_status[physical_port] = orig_temp_status new_temp_status = TEMP_NORMAL dom_info_dict = dom_info_cache.get(physical_port) @@ -1808,11 +1808,11 @@ def check_transceiver_temperature(self, logical_port_name, th_table, dom_info_ca else: new_temp_status = TEMP_NORMAL - if ori_temp_status != new_temp_status: + if orig_temp_status != new_temp_status: temperature_status[physical_port] = new_temp_status - helper_logger.log_notice("{}: temperature status change from {} to {}".format( + helper_logger.log_notice("{}: temperature status changed from {} to {}".format( physical_port_name, - TEMP_ERROR_TO_DESCRIPTION_DICT[ori_temp_status], + TEMP_ERROR_TO_DESCRIPTION_DICT[orig_temp_status], TEMP_ERROR_TO_DESCRIPTION_DICT[new_temp_status])) elif new_temp_status > 0: helper_logger.log_notice("{}: {}".format(physical_port_name, TEMP_ERROR_TO_DESCRIPTION_DICT[new_temp_status]))