From 150679be1538145efaae9d0f5c8c1349298d348c Mon Sep 17 00:00:00 2001 From: Umang Francis Date: Tue, 21 Jan 2025 11:48:13 +0530 Subject: [PATCH 1/4] adding the dmesg_oops notifier with relevant update to mixin_module --- lisa/mixin_modules.py | 1 + lisa/notifiers/dmesg_oops.py | 141 +++++++++++++++++++++++++++++++++++ 2 files changed, 142 insertions(+) create mode 100644 lisa/notifiers/dmesg_oops.py diff --git a/lisa/mixin_modules.py b/lisa/mixin_modules.py index 86eac50b2d..e9710806aa 100644 --- a/lisa/mixin_modules.py +++ b/lisa/mixin_modules.py @@ -11,6 +11,7 @@ import lisa.combinators.git_bisect_combinator # noqa: F401 import lisa.combinators.grid_combinator # noqa: F401 import lisa.notifiers.console # noqa: F401 +import lisa.notifiers.dmesg_oops # noqa: F401 import lisa.notifiers.env_stats # noqa: F401 import lisa.notifiers.file # noqa: F401 import lisa.notifiers.html # noqa: F401 diff --git a/lisa/notifiers/dmesg_oops.py b/lisa/notifiers/dmesg_oops.py new file mode 100644 index 0000000000..8c44474f02 --- /dev/null +++ b/lisa/notifiers/dmesg_oops.py @@ -0,0 +1,141 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +import logging +import re +import subprocess +from dataclasses import dataclass +from typing import Any, List, Pattern, Type, cast +import smtplib +import os +from pathlib import Path +from email.mime.multipart import MIMEMultipart +from email.mime.text import MIMEText +from dataclasses_json import dataclass_json + +from lisa import messages, notifier, schema +from lisa.util import constants +from lisa.tools import Dmesg +from lisa.messages import ( + MessageBase, + TestResultMessage, + TestRunMessage, + TestRunStatus, + TestStatus, +) +from lisa.util import ( + check_panic, + get_matched_str, + KernelPanicException +) + +oops_regex_patterns: List[Pattern[str]] = [ + re.compile(r"Oops: [0-9]+ \[\#.*\]"), # Basic Oops Detection + re.compile(r"BUG: unable to handle kernel NULL pointer dereference at (0x)?[0-9a-fA-F]+"), # Null Pointer Dereference + re.compile(r"BUG: unable to handle kernel paging request at (0x)?[0-9a-fA-F]+"), # Invalid Memory Access + re.compile(r"RIP: [0-9a-fA-F]+:([a-zA-Z0-9_]+)\+[0-9a-fA-Fx]+/[0-9a-fA-Fx]+"), # RIP in Trace + re.compile(r"Call Trace:\s*(.*)"), # Kernel Call Trace + re.compile(r"general protection fault: [0-9]+ \[#.*\]"), # General Fault Errors + re.compile(r"Kernel panic - not syncing: (.*)"), # Kernel Panic Information + re.compile(r"Process: ([a-zA-Z0-9_]+)\s*\(pid:\s*\d+\)"), # Process Details + re.compile(r"Stack:\s*(.*)"), # Stack Dump + re.compile(r"Code:\s*(.*)"), # Code Dump +] + +@dataclass_json +@dataclass +class DmsgOopsSchema(schema.Notifier): + log_level: str = logging.getLevelName(logging.DEBUG) + output_file: str = "dmesg_errors.json" + + +class DmsgOops(notifier.Notifier): + """ + A sample notifier to check for Panics/OOPs Errors in the DMesg Logs. + """ + + @classmethod + def type_name(cls) -> str: + return "dmsg_oops_notifier" + + @classmethod + def type_schema(cls) -> Type[schema.TypedSchema]: + return DmsgOopsSchema + + def save_results(self) -> None: + file_path = Path(self.runbook.output_file) + if not file_path.is_absolute(): + file_path = constants.RUN_LOCAL_LOG_PATH / file_path + self._log.info(f"Writing output to file {file_path}") + with open(file_path, "w") as f: + f.write(str(self.dmesg_errors)) # type: ignore + + def check_kernel_oops(self, dmesg_logs: str, context_lines: int = 4) -> list[str]: + oops_list = [] + lines = dmesg_logs.splitlines() + for i, line in enumerate(lines): + for pattern in oops_regex_patterns: + if pattern.search(line): + start = max(i - context_lines, 0) + end = min(i + context_lines + 1, len(lines)) + context = lines[start:end] + oops_list.append("\n".join(context)) + break + return oops_list + + def dmesg_error_check(self, test_name: str, dmesg_logs: str) -> None: + try: + check_panic(dmesg_logs, "Result", self._log) + except KernelPanicException as e: + self._log.error( + f"Kernel Panic found in the dmesg logs. {e}" + ) + self.dmesg_errors['panics'].setdefault(test_name, []).append(e) + oops_list = self.check_kernel_oops(dmesg_logs) + self.dmesg_errors['oops'].setdefault(test_name, []).append(oops_list) + self._log.info(f"DMesg logs check completed") + + def process_serial_logs(self, test_name: str, file_path: str, pattern_start: str, pattern_end: str) -> None: + with open(file_path, 'r') as file: + buffer = file.read() + while True: + start_index = buffer.find(pattern_start) + end_index = buffer.find(pattern_end, start_index + len(pattern_start)) + if start_index == -1 or end_index == -1: + break + data_segment = buffer[start_index + len(pattern_start):end_index] + self.dmesg_error_check(test_name, data_segment) + buffer = buffer[end_index + len(pattern_end):] + + def process_test_result_message(self, message: TestResultMessage) -> None: + if message.log_file and message.status in [ + TestStatus.PASSED, + TestStatus.FAILED, + TestStatus.SKIPPED, + TestStatus.ATTEMPTED, + ]: + local_file_path = constants.RUN_LOCAL_LOG_PATH / message.log_file + local_absolute_file_path = local_file_path.absolute() + try: + self.process_serial_logs(message.name, local_absolute_file_path, "cmd: ['sudo', 'dmesg']", "execution time:") + except Exception as e: + self._log.error( + f"Error while Processing Serial Console Logs : {e}" + ) + + self.save_results() + + def _received_message(self, message: messages.MessageBase) -> None: + if isinstance(message, TestResultMessage): + self.process_test_result_message(message=message) + + def _subscribed_message_type(self) -> List[Type[messages.MessageBase]]: + return [TestResultMessage] + + def _initialize(self, *args: Any, **kwargs: Any) -> None: + runbook = cast(DmsgOopsSchema, self.runbook) + self._log_level = runbook.log_level + self.dmesg_errors = {"panics": {}, "oops": {}} + + def __init__(self, runbook: DmsgOopsSchema) -> None: + notifier.Notifier.__init__(self, runbook) From fcf02955def02015f2b366442f3833bb98b168ec Mon Sep 17 00:00:00 2001 From: Umang Francis Date: Mon, 27 Jan 2025 10:42:54 +0530 Subject: [PATCH 2/4] Cleaning up the Code and ensuring Flake8 and Black compliance --- lisa/notifiers/dmesg_oops.py | 69 +++++++++++++++--------------------- 1 file changed, 29 insertions(+), 40 deletions(-) diff --git a/lisa/notifiers/dmesg_oops.py b/lisa/notifiers/dmesg_oops.py index 8c44474f02..19c9650b22 100644 --- a/lisa/notifiers/dmesg_oops.py +++ b/lisa/notifiers/dmesg_oops.py @@ -3,37 +3,27 @@ import logging import re -import subprocess from dataclasses import dataclass -from typing import Any, List, Pattern, Type, cast -import smtplib -import os from pathlib import Path -from email.mime.multipart import MIMEMultipart -from email.mime.text import MIMEText +from typing import Any, List, Pattern, Type, cast + from dataclasses_json import dataclass_json from lisa import messages, notifier, schema +from lisa.messages import TestResultMessage, TestStatus from lisa.util import constants -from lisa.tools import Dmesg -from lisa.messages import ( - MessageBase, - TestResultMessage, - TestRunMessage, - TestRunStatus, - TestStatus, -) -from lisa.util import ( - check_panic, - get_matched_str, - KernelPanicException -) oops_regex_patterns: List[Pattern[str]] = [ re.compile(r"Oops: [0-9]+ \[\#.*\]"), # Basic Oops Detection - re.compile(r"BUG: unable to handle kernel NULL pointer dereference at (0x)?[0-9a-fA-F]+"), # Null Pointer Dereference - re.compile(r"BUG: unable to handle kernel paging request at (0x)?[0-9a-fA-F]+"), # Invalid Memory Access - re.compile(r"RIP: [0-9a-fA-F]+:([a-zA-Z0-9_]+)\+[0-9a-fA-Fx]+/[0-9a-fA-Fx]+"), # RIP in Trace + re.compile( + r"BUG: unable to handle kernel NULL pointer dereference at (0x)?[0-9a-fA-F]+" + ), # Null Pointer Dereference + re.compile( + r"BUG: unable to handle kernel paging request at (0x)?[0-9a-fA-F]+" + ), # Invalid Memory Access + re.compile( + r"RIP: [0-9a-fA-F]+:([a-zA-Z0-9_]+)\+[0-9a-fA-Fx]+/[0-9a-fA-Fx]+" + ), # RIP in Trace re.compile(r"Call Trace:\s*(.*)"), # Kernel Call Trace re.compile(r"general protection fault: [0-9]+ \[#.*\]"), # General Fault Errors re.compile(r"Kernel panic - not syncing: (.*)"), # Kernel Panic Information @@ -42,6 +32,7 @@ re.compile(r"Code:\s*(.*)"), # Code Dump ] + @dataclass_json @dataclass class DmsgOopsSchema(schema.Notifier): @@ -84,28 +75,23 @@ def check_kernel_oops(self, dmesg_logs: str, context_lines: int = 4) -> list[str return oops_list def dmesg_error_check(self, test_name: str, dmesg_logs: str) -> None: - try: - check_panic(dmesg_logs, "Result", self._log) - except KernelPanicException as e: - self._log.error( - f"Kernel Panic found in the dmesg logs. {e}" - ) - self.dmesg_errors['panics'].setdefault(test_name, []).append(e) oops_list = self.check_kernel_oops(dmesg_logs) - self.dmesg_errors['oops'].setdefault(test_name, []).append(oops_list) - self._log.info(f"DMesg logs check completed") + self.dmesg_errors["oops"].setdefault(test_name, []).append(oops_list) + self._log.info("DMesg logs check completed") - def process_serial_logs(self, test_name: str, file_path: str, pattern_start: str, pattern_end: str) -> None: - with open(file_path, 'r') as file: + def process_serial_logs( + self, test_name: str, file_path: str, pattern_start: str, pattern_end: str + ) -> None: + with open(file_path, "r") as file: buffer = file.read() while True: start_index = buffer.find(pattern_start) end_index = buffer.find(pattern_end, start_index + len(pattern_start)) if start_index == -1 or end_index == -1: break - data_segment = buffer[start_index + len(pattern_start):end_index] + data_segment = buffer[start_index + len(pattern_start) : end_index] self.dmesg_error_check(test_name, data_segment) - buffer = buffer[end_index + len(pattern_end):] + buffer = buffer[end_index + len(pattern_end) :] def process_test_result_message(self, message: TestResultMessage) -> None: if message.log_file and message.status in [ @@ -117,11 +103,14 @@ def process_test_result_message(self, message: TestResultMessage) -> None: local_file_path = constants.RUN_LOCAL_LOG_PATH / message.log_file local_absolute_file_path = local_file_path.absolute() try: - self.process_serial_logs(message.name, local_absolute_file_path, "cmd: ['sudo', 'dmesg']", "execution time:") - except Exception as e: - self._log.error( - f"Error while Processing Serial Console Logs : {e}" + self.process_serial_logs( + message.name, + local_absolute_file_path, + "cmd: ['sudo', 'dmesg']", + "execution time:", ) + except Exception as e: + self._log.error(f"Error while Processing Serial Console Logs : {e}") self.save_results() @@ -135,7 +124,7 @@ def _subscribed_message_type(self) -> List[Type[messages.MessageBase]]: def _initialize(self, *args: Any, **kwargs: Any) -> None: runbook = cast(DmsgOopsSchema, self.runbook) self._log_level = runbook.log_level - self.dmesg_errors = {"panics": {}, "oops": {}} + self.dmesg_errors = {"oops": {}} def __init__(self, runbook: DmsgOopsSchema) -> None: notifier.Notifier.__init__(self, runbook) From 3c8932ca1cef49fcbbdcca627fcc7667852242fa Mon Sep 17 00:00:00 2001 From: Umang Francis Date: Mon, 27 Jan 2025 11:45:12 +0530 Subject: [PATCH 3/4] Addressing the mypy build errors --- lisa/notifiers/dmesg_oops.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lisa/notifiers/dmesg_oops.py b/lisa/notifiers/dmesg_oops.py index 19c9650b22..735d100f15 100644 --- a/lisa/notifiers/dmesg_oops.py +++ b/lisa/notifiers/dmesg_oops.py @@ -5,7 +5,7 @@ import re from dataclasses import dataclass from pathlib import Path -from typing import Any, List, Pattern, Type, cast +from typing import Any, Dict, List, Pattern, Type, cast from dataclasses_json import dataclass_json @@ -44,7 +44,7 @@ class DmsgOops(notifier.Notifier): """ A sample notifier to check for Panics/OOPs Errors in the DMesg Logs. """ - + dmesg_errors: Dict[str, Dict[str, List[str]]] @classmethod def type_name(cls) -> str: return "dmsg_oops_notifier" @@ -59,9 +59,9 @@ def save_results(self) -> None: file_path = constants.RUN_LOCAL_LOG_PATH / file_path self._log.info(f"Writing output to file {file_path}") with open(file_path, "w") as f: - f.write(str(self.dmesg_errors)) # type: ignore + f.write(str(self.dmesg_errors)) - def check_kernel_oops(self, dmesg_logs: str, context_lines: int = 4) -> list[str]: + def check_kernel_oops(self, dmesg_logs: str, context_lines: int = 4) -> List[str]: oops_list = [] lines = dmesg_logs.splitlines() for i, line in enumerate(lines): @@ -80,7 +80,7 @@ def dmesg_error_check(self, test_name: str, dmesg_logs: str) -> None: self._log.info("DMesg logs check completed") def process_serial_logs( - self, test_name: str, file_path: str, pattern_start: str, pattern_end: str + self, test_name: str, file_path: Path, pattern_start: str, pattern_end: str ) -> None: with open(file_path, "r") as file: buffer = file.read() From 49566b182ad088dccbb5591f6f0ea02350280f62 Mon Sep 17 00:00:00 2001 From: Umang Francis Date: Mon, 27 Jan 2025 12:17:29 +0530 Subject: [PATCH 4/4] syntax changes to fix mypy flake8 errors --- lisa/notifiers/dmesg_oops.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lisa/notifiers/dmesg_oops.py b/lisa/notifiers/dmesg_oops.py index 735d100f15..885d2ead8f 100644 --- a/lisa/notifiers/dmesg_oops.py +++ b/lisa/notifiers/dmesg_oops.py @@ -44,7 +44,9 @@ class DmsgOops(notifier.Notifier): """ A sample notifier to check for Panics/OOPs Errors in the DMesg Logs. """ - dmesg_errors: Dict[str, Dict[str, List[str]]] + + dmesg_errors: Dict[str, Dict[str, List[List[str]]]] + @classmethod def type_name(cls) -> str: return "dmsg_oops_notifier"