diff --git a/lisa/features/nvme.py b/lisa/features/nvme.py index dae3ecc979..f68888c590 100644 --- a/lisa/features/nvme.py +++ b/lisa/features/nvme.py @@ -16,6 +16,7 @@ from lisa.tools import Ls, Lspci, Nvmecli from lisa.tools.lspci import PciDevice from lisa.util import field_metadata, get_matched_str +from lisa.util.constants import DEVICE_TYPE_NVME class Nvme(Feature): @@ -42,6 +43,9 @@ class Nvme(Feature): # /dev/nvme0n1p15 -> /dev/nvme0n1 NVME_NAMESPACE_PATTERN = re.compile(r"/dev/nvme[0-9]+n[0-9]+", re.M) + # /dev/nvme0n1p15 -> /dev/nvme0n1 + NVME_DEVICE_PATTERN = re.compile(r"/dev/nvme[0-9]+", re.M) + _pci_device_name = "Non-Volatile memory controller" _ls_devices: str = "" @@ -63,6 +67,11 @@ def get_devices(self) -> List[str]: matched_result = self._device_pattern.match(row) if matched_result: devices_list.append(matched_result.group("device_name")) + node_disk = self._node.features[Disk] + if node_disk.get_os_disk_controller_type() == schema.DiskControllerType.NVME: + os_disk_nvme_device = self.get_os_disk_nvme_device() + # Removing OS disk/device from the list. + devices_list.remove(os_disk_nvme_device) return devices_list def get_namespaces(self) -> List[str]: @@ -78,7 +87,13 @@ def get_namespaces(self) -> List[str]: return namespaces def get_namespaces_from_cli(self) -> List[str]: - return self._node.tools[Nvmecli].get_namespaces() + namespaces_list = self._node.tools[Nvmecli].get_namespaces() + node_disk = self._node.features[Disk] + if node_disk.get_os_disk_controller_type() == schema.DiskControllerType.NVME: + os_disk_nvme_namespace = self.get_os_disk_nvme_namespace() + # Removing OS disk/device from the list. + namespaces_list.remove(os_disk_nvme_namespace) + return namespaces_list def get_os_disk_nvme_namespace(self) -> str: node_disk = self._node.features[Disk] @@ -93,10 +108,22 @@ def get_os_disk_nvme_namespace(self) -> str: ) return os_partition_namespace + def get_os_disk_nvme_device(self) -> str: + os_disk_nvme_namespace = self.get_os_disk_nvme_namespace() + # Sample os_boot_partition when disc controller type is NVMe: + # name: /dev/nvme0n1p15, disk: nvme, mount_point: /boot/efi, type: vfat + if os_disk_nvme_namespace: + os_disk_nvme_device = get_matched_str( + os_disk_nvme_namespace, + self.NVME_DEVICE_PATTERN, + ) + return os_disk_nvme_device + def get_devices_from_lspci(self) -> List[PciDevice]: devices_from_lspci = [] lspci_tool = self._node.tools[Lspci] device_list = lspci_tool.get_devices() + device_list = lspci_tool.get_devices_by_type(DEVICE_TYPE_NVME, use_pci_ids=True) devices_from_lspci = [ x for x in device_list if self._pci_device_name == x.device_class ] diff --git a/lisa/tools/lspci.py b/lisa/tools/lspci.py index ccab4d9d8b..341977d124 100644 --- a/lisa/tools/lspci.py +++ b/lisa/tools/lspci.py @@ -8,12 +8,7 @@ from lisa.executable import Tool from lisa.operating_system import Posix from lisa.tools import Echo -from lisa.util import ( - LisaException, - constants, - find_patterns_in_lines, - get_matched_str, -) +from lisa.util import LisaException, constants, find_patterns_in_lines, get_matched_str # Example output of lspci command - # lspci -m @@ -129,13 +124,20 @@ def parse(self, raw_str: str, pci_ids: Dict[str, Any]) -> None: matched_pci_device_info = PATTERN_PCI_DEVICE.match(raw_str) if matched_pci_device_info: self.slot = matched_pci_device_info.group("slot") + assert self.slot, f"Can not find slot info for: {raw_str}" self.device_class = matched_pci_device_info.group("device_class") + assert self.device_class, f"Can not find device class for: {raw_str}" self.vendor = matched_pci_device_info.group("vendor") + assert self.vendor, f"Can not find vendor info for: {raw_str}" self.device_info = matched_pci_device_info.group("device") + assert self.device_info, f"Can not find device info for: {raw_str}" if pci_ids: self.device_id = pci_ids[self.slot]["device_id"] + assert self.device_id, f"cannot find device id from {raw_str}" self.vendor_id = pci_ids[self.slot]["vendor_id"] + assert self.vendor_id, f"cannot find vendor id from {raw_str}" self.controller_id = pci_ids[self.slot]["controller_id"] + assert self.controller_id, f"cannot findcontroller_id from {raw_str}" else: raise LisaException("cannot find any matched pci devices") @@ -163,13 +165,25 @@ def _install(self) -> bool: return self._check_exists() def get_device_names_by_type( - self, device_type: str, force_run: bool = False + self, device_type: str, force_run: bool = False, use_pci_ids: bool = False ) -> List[str]: if device_type.upper() not in DEVICE_TYPE_DICT.keys(): raise LisaException(f"pci_type '{device_type}' is not recognized.") class_names = DEVICE_TYPE_DICT[device_type.upper()] devices_list = self.get_devices(force_run) - devices_slots = [x.slot for x in devices_list if x.device_class in class_names] + devices_slots = [] + if use_pci_ids: + for device in devices_list: + if ( + device.controller_id in CONTROLLER_ID_DICT[device_type.upper()] + and device.vendor_id in VENDOR_ID_DICT[device_type.upper()] + and device.device_id in DEVICE_ID_DICT[device_type.upper()] + ): + devices_slots.append(device.slot) + else: + devices_slots = [ + x.slot for x in devices_list if x.device_class in class_names + ] return devices_slots def get_devices_by_type( @@ -196,6 +210,7 @@ def get_devices_by_type( ] return device_type_list + @retry(KeyError, tries=10, delay=20) def get_devices(self, force_run: bool = False) -> List[PciDevice]: if (not self._pci_devices) or force_run: self._pci_devices = [] @@ -318,7 +333,7 @@ class LspciBSD(Lspci): _disabled_devices: Set[str] = set() def get_device_names_by_type( - self, device_type: str, force_run: bool = False + self, device_type: str, force_run: bool = False, use_pci_ids: bool = False ) -> List[str]: output = self.node.execute("pciconf -l", sudo=True).stdout if device_type.upper() not in self._DEVICE_DRIVER_MAPPING.keys(): diff --git a/microsoft/testsuites/nvme/nvme.py b/microsoft/testsuites/nvme/nvme.py index debc18b2f0..edb875f0f5 100644 --- a/microsoft/testsuites/nvme/nvme.py +++ b/microsoft/testsuites/nvme/nvme.py @@ -11,26 +11,33 @@ TestCaseMetadata, TestSuite, TestSuiteMetadata, - constants, simple_requirement, ) from lisa.features import Nvme, NvmeSettings, Sriov from lisa.sut_orchestrator.azure.platform_ import AzurePlatform -from lisa.tools import Cat, Echo, Fdisk, Lscpu, Lspci, Mount, Nvmecli +from lisa.tools import Cat, Df, Echo, Fdisk, Lscpu, Lspci, Mkfs, Mount, Nvmecli from lisa.tools.fdisk import FileSystem +from lisa.util.constants import DEVICE_TYPE_NVME, DEVICE_TYPE_SRIOV def _format_mount_disk( node: Node, namespace: str, file_system: FileSystem, + use_partitions: bool = True, ) -> None: mount_point = namespace.rpartition("/")[-1] fdisk = node.tools[Fdisk] mount = node.tools[Mount] mount.umount(namespace, mount_point) - fdisk.make_partition(namespace, file_system) - mount.mount(f"{namespace}p1", mount_point) + fdisk.delete_partitions(namespace) + if use_partitions: + fdisk.make_partition(namespace, file_system) + mount.mount(f"{namespace}p1", mount_point) + else: + format_disk = node.tools[Mkfs] + format_disk.mkfs(f"{namespace}", file_system) + mount.mount(f"{namespace}", mount_point) @TestSuiteMetadata( @@ -97,75 +104,28 @@ def verify_nvme_max_disk(self, environment: Environment, node: Node) -> None: ), ) def verify_nvme_function(self, node: Node) -> None: - nvme = node.features[Nvme] - nvme_namespaces = nvme.get_raw_nvme_disks() - nvme_cli = node.tools[Nvmecli] - cat = node.tools[Cat] - mount = node.tools[Mount] - for namespace in nvme_namespaces: - # 1. Get the number of errors from nvme-cli before operations. - error_count_before_operations = nvme_cli.get_error_count(namespace) - - # 2. Create a partition, filesystem and mount it. - _format_mount_disk(node, namespace, FileSystem.ext4) - - # 3. Create a txt file on the partition, content is 'TestContent'. - mount_point = namespace.rpartition("/")[-1] - cmd_result = node.execute( - f"echo TestContent > {mount_point}/testfile.txt", shell=True, sudo=True - ) - cmd_result.assert_exit_code( - message=f"{mount_point}/testfile.txt may not exist." - ) + self._verify_nvme_function(node) - # 4. Create a file 'data' on the partition, get the md5sum value. - cmd_result = node.execute( - f"dd if=/dev/zero of={mount_point}/data bs=10M count=100", - shell=True, - sudo=True, - ) - cmd_result.assert_exit_code( - message=f"{mount_point}/data is not created successfully, " - "please check the disk space." - ) - initial_md5 = node.execute( - f"md5sum {mount_point}/data", shell=True, sudo=True - ) - initial_md5.assert_exit_code( - message=f"{mount_point}/data not exist or md5sum command enounter" - " unexpected error." - ) - - # 5. Umount and remount the partition. - mount.umount(namespace, mount_point, erase=False) - mount.mount(f"{namespace}p1", mount_point) - - # 6. Get the txt file content, compare the value. - file_content = cat.run(f"{mount_point}/testfile.txt", shell=True, sudo=True) - assert_that( - file_content.stdout, - f"content of {mount_point}/testfile.txt should keep consistent " - "after umount and re-mount.", - ).is_equal_to("TestContent") - - # 6. Get md5sum value of file 'data', compare with initial value. - final_md5 = node.execute( - f"md5sum {mount_point}/data", shell=True, sudo=True - ) - assert_that( - initial_md5.stdout, - f"md5sum of {mount_point}/data should keep consistent " - "after umount and re-mount.", - ).is_equal_to(final_md5.stdout) - - # 7. Compare the number of errors from nvme-cli after operations. - error_count_after_operations = nvme_cli.get_error_count(namespace) - assert_that( - error_count_before_operations, - "error-log should not increase after operations.", - ).is_equal_to(error_count_after_operations) - - mount.umount(disk_name=namespace, point=mount_point) + @TestCaseMetadata( + description=""" + The test case is same as `verify_nvme_function`, except it uses + unpartitioned disks. + This test case will do following things for each NVMe device. + 1. Get the number of errors from nvme-cli before operations. + 2. Create filesystem and mount it. + 3. Create a txt file on the partition, content is 'TestContent'. + 4. Create a file 'data' on the partition, get the md5sum value. + 5. Umount and remount the partition. + 6. Get the txt file content, compare the value. + 7. Compare the number of errors from nvme-cli after operations. + """, + priority=2, + requirement=simple_requirement( + supported_features=[Nvme], + ), + ) + def verify_nvme_function_unpartitioned(self, node: Node) -> None: + self._verify_nvme_function(node, use_partitions=False) @TestCaseMetadata( description=""" @@ -187,6 +147,7 @@ def verify_nvme_fstrim(self, node: Node) -> None: nvme = node.features[Nvme] nvme_namespaces = nvme.get_raw_nvme_disks() mount = node.tools[Mount] + df = node.tools[Df] for namespace in nvme_namespaces: mount_point = namespace.rpartition("/")[-1] @@ -202,12 +163,16 @@ def verify_nvme_fstrim(self, node: Node) -> None: message=f"{mount_point} not exist or fstrim command enounter " "unexpected error." ) - - # 3. Create a 300 gb file 'data' using dd command in the partition. + # 80% of free space is used to create a file. + free_space_gb = int(df.get_filesystem_available_space(mount_point) * 0.8) + # limit the free space to 300GB to avoid long time operation. + free_space_gb = min(free_space_gb, 300) + # 3. Create a file 'data' using dd command in the partition. cmd_result = node.execute( - f"dd if=/dev/zero of={mount_point}/data bs=1G count=300", + f"dd if=/dev/zero of={mount_point}/data bs=1G count={free_space_gb}", shell=True, sudo=True, + timeout=1200, ) cmd_result.assert_exit_code( message=f"{mount_point}/data is not created successfully, " @@ -350,7 +315,9 @@ def verify_nvme_manage_ns(self, node: Node) -> None: description=""" This test case will 1. Disable NVME devices. - 2. Enable NVME device. + 2. Enable PCI devices. + 3. Get NVMe devices slots. + 4. Check NVMe devices are back after rescan. """, priority=2, requirement=simple_requirement( @@ -360,9 +327,20 @@ def verify_nvme_manage_ns(self, node: Node) -> None: def verify_nvme_rescind(self, node: Node) -> None: lspci = node.tools[Lspci] # 1. Disable NVME devices. - lspci.disable_devices_by_type(device_type=constants.DEVICE_TYPE_NVME) - # 2. Enable NVME device. + before_pci_count = lspci.disable_devices_by_type( + device_type=DEVICE_TYPE_NVME, use_pci_ids=True + ) + # 2. Enable PCI devices. lspci.enable_devices() + # 3. Get PCI devices slots. + after_devices_slots = lspci.get_device_names_by_type( + DEVICE_TYPE_NVME, True, True + ) + # 4. Check PCI devices are back after rescan. + assert_that( + after_devices_slots, + "After rescan, the disabled NVMe PCI devices should be back.", + ).is_length(before_pci_count) @TestCaseMetadata( description=""" @@ -381,18 +359,20 @@ def verify_nvme_rescind(self, node: Node) -> None: ) def verify_nvme_sriov_rescind(self, node: Node) -> None: lspci = node.tools[Lspci] - device_types = [constants.DEVICE_TYPE_NVME, constants.DEVICE_TYPE_SRIOV] + device_types = [DEVICE_TYPE_NVME, DEVICE_TYPE_SRIOV] for device_type in device_types: # 1. Disable PCI devices. - before_pci_count = lspci.disable_devices_by_type(device_type) + before_pci_count = lspci.disable_devices_by_type(device_type, True) # 2. Enable PCI devices. lspci.enable_devices() # 3. Get PCI devices slots. - after_devices_slots = lspci.get_device_names_by_type(device_type, True) + after_devices_slots = lspci.get_device_names_by_type( + device_type, True, True + ) # 4. Check PCI devices are back after rescan. assert_that( after_devices_slots, - "After rescan, the disabled PCI devices should be back.", + f"After rescan, the disabled {device_type} PCI devices should be back.", ).is_length(before_pci_count) def _verify_nvme_disk(self, environment: Environment, node: Node) -> None: @@ -431,3 +411,78 @@ def _verify_nvme_disk(self, environment: Environment, node: Node) -> None: assert_that(nvme_namespace).described_as( "nvme devices count should be equal to [vCPU/8]." ).is_length(expected_count) + + def _verify_nvme_function(self, node: Node, use_partitions: bool = True) -> None: + # Verify the basic function of all NVMe disks + nvme = node.features[Nvme] + nvme_namespaces = nvme.get_raw_nvme_disks() + nvme_cli = node.tools[Nvmecli] + cat = node.tools[Cat] + mount = node.tools[Mount] + for namespace in nvme_namespaces: + # 1. Get the number of errors from nvme-cli before operations. + error_count_before_operations = nvme_cli.get_error_count(namespace) + + # 2. Create a partition, filesystem and mount it. + _format_mount_disk(node, namespace, FileSystem.ext4, use_partitions) + + # 3. Create a txt file on the partition, content is 'TestContent'. + mount_point = namespace.rpartition("/")[-1] + cmd_result = node.execute( + f"echo TestContent > {mount_point}/testfile.txt", shell=True, sudo=True + ) + cmd_result.assert_exit_code( + message=f"{mount_point}/testfile.txt may not exist." + ) + + # 4. Create a file 'data' on the partition, get the md5sum value. + cmd_result = node.execute( + f"dd if=/dev/zero of={mount_point}/data bs=10M count=100", + shell=True, + sudo=True, + ) + cmd_result.assert_exit_code( + message=f"{mount_point}/data is not created successfully, " + "please check the disk space." + ) + initial_md5 = node.execute( + f"md5sum {mount_point}/data", shell=True, sudo=True + ) + initial_md5.assert_exit_code( + message=f"{mount_point}/data not exist or md5sum command encountered" + " unexpected error." + ) + + # 5. Umount and remount the partition. + mount.umount(namespace, mount_point, erase=False) + if use_partitions: + mount.mount(f"{namespace}p1", mount_point) + else: + mount.mount(f"{namespace}", mount_point) + + # 6. Get the txt file content, compare the value. + file_content = cat.run(f"{mount_point}/testfile.txt", shell=True, sudo=True) + assert_that( + file_content.stdout, + f"content of {mount_point}/testfile.txt should keep consistent " + "after umount and re-mount.", + ).is_equal_to("TestContent") + + # 6. Get md5sum value of file 'data', compare with initial value. + final_md5 = node.execute( + f"md5sum {mount_point}/data", shell=True, sudo=True + ) + assert_that( + initial_md5.stdout, + f"md5sum of {mount_point}/data should keep consistent " + "after umount and re-mount.", + ).is_equal_to(final_md5.stdout) + + # 7. Compare the number of errors from nvme-cli after operations. + error_count_after_operations = nvme_cli.get_error_count(namespace) + assert_that( + error_count_before_operations, + "error-log should not increase after operations.", + ).is_equal_to(error_count_after_operations) + + mount.umount(disk_name=namespace, point=mount_point)