From ac31d9baa2add62fd993ca30130fb95d275ace8f Mon Sep 17 00:00:00 2001 From: SrikanthMyakam Date: Mon, 11 Nov 2024 20:37:36 +0530 Subject: [PATCH 01/13] lspci tool - Separate InfiniBand devices from SRIOV devices --- lisa/tools/lspci.py | 11 ++++++++++- lisa/util/constants.py | 1 + 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/lisa/tools/lspci.py b/lisa/tools/lspci.py index 316414b88b..2c3798a1ad 100644 --- a/lisa/tools/lspci.py +++ b/lisa/tools/lspci.py @@ -76,9 +76,13 @@ "1016", # Mellanox Technologies MT27710 Family [ConnectX-4 Lx Virtual Function] "1018", # Mellanox Technologies MT27800 Family [ConnectX-5 Virtual Function] "101a", # Mellanox Technologies MT28800 Family [ConnectX-5 Ex Virtual Function] - "101e", # Mellanox Technologies [ConnectX Family mlx5Gen Virtual Function] "00ba", # Microsft Azure Network Adapter VF (MANA VF) ], + constants.DEVICE_TYPE_INFINIBAND: [ + "1018", # Mellanox Technologies MT27800 Family [ConnectX-5 Virtual Function] + "101a", # Mellanox Technologies MT28800 Family [ConnectX-5 Ex Virtual Function] + "101e", # Mellanox Technologies [ConnectX Family mlx5Gen Virtual Function] + ], constants.DEVICE_TYPE_NVME: [ "b111" # Microsoft Corporation Device, Local NVMe discs ], @@ -99,6 +103,9 @@ VENDOR_ID_MICROSOFT, VENDOR_ID_MELLANOX, ], + constants.DEVICE_TYPE_INFINIBAND: [ + VENDOR_ID_MELLANOX, + ], constants.DEVICE_TYPE_NVME: [VENDOR_ID_MICROSOFT], constants.DEVICE_TYPE_GPU: [VENDOR_ID_NVIDIA], constants.DEVICE_TYPE_AMD_GPU: [VENDOR_ID_AMD], @@ -107,6 +114,8 @@ CONTROLLER_ID_DICT: Dict[str, List[str]] = { constants.DEVICE_TYPE_SRIOV: [ "0200", # Ethernet controller + ], + constants.DEVICE_TYPE_INFINIBAND: [ "0207", # Infiniband controller ], constants.DEVICE_TYPE_NVME: [ diff --git a/lisa/util/constants.py b/lisa/util/constants.py index 771c5317eb..84caca9c26 100644 --- a/lisa/util/constants.py +++ b/lisa/util/constants.py @@ -157,6 +157,7 @@ DATADISK_CACHING_TYPE_READYWRITE = "ReadWrite" DEVICE_TYPE_SRIOV = "SRIOV" +DEVICE_TYPE_INFINIBAND = "INFINIBAND" DEVICE_TYPE_NVME = "NVME" DEVICE_TYPE_GPU = "GPU" DEVICE_TYPE_AMD_GPU = "AMD_GPU" From 82edadc3c9f7fff6e4bf695a933621de040cec55 Mon Sep 17 00:00:00 2001 From: Lili Deng Date: Wed, 13 Nov 2024 23:22:25 +0800 Subject: [PATCH 02/13] wget: remove log before throwing exception --- lisa/base_tools/wget.py | 1 + 1 file changed, 1 insertion(+) diff --git a/lisa/base_tools/wget.py b/lisa/base_tools/wget.py index 160c5d0133..6225d6d930 100644 --- a/lisa/base_tools/wget.py +++ b/lisa/base_tools/wget.py @@ -78,6 +78,7 @@ def get( if matched_result: download_file_path = matched_result.group("path") else: + self.node.tools[Rm].remove_file(log_file, sudo=sudo) raise LisaException( f"cannot find file path in stdout of '{command}', it may be caused " " due to failed download or pattern mismatch." From 14cbb48279b623b47fdd7a9815f0dd3fec82310b Mon Sep 17 00:00:00 2001 From: Kameron Carr Date: Wed, 13 Nov 2024 17:51:24 -0800 Subject: [PATCH 03/13] Availability: Skip bad configurations (#3502) * Availability: Skip bad configurations Mismatches in Availability Set requirements and capabilities may be due to test case requirements, so we can skip the test instead of failing. * Availability: Break up on_before_deployment Break up on_before_deployment to reduce the complexity of the function. * Availability: change error message Make it more clear that supported availability types can also be affected by test case requirements. --- lisa/sut_orchestrator/azure/features.py | 133 ++++++++++++++---------- 1 file changed, 79 insertions(+), 54 deletions(-) diff --git a/lisa/sut_orchestrator/azure/features.py b/lisa/sut_orchestrator/azure/features.py index db8646edef..a84af2c41d 100644 --- a/lisa/sut_orchestrator/azure/features.py +++ b/lisa/sut_orchestrator/azure/features.py @@ -2430,6 +2430,75 @@ def create_setting( return availability_settings + @classmethod + def _resolve_configuration( + cls, + environment: Environment, + settings: AvailabilitySettings, + params: AvailabilityArmParameter, + ) -> None: + """ + Resolve Availability configuration based on the current environment + 1. Remove unsupported availability types when using ultra disk + 2. Automatically resolve the availability type based on priority + 3. Select and validate an availability zone if applicable + """ + assert isinstance(settings.availability_type, search_space.SetSpace) + + # Ultra Disk does not support Availability Sets + assert environment.capability.nodes + assert environment.capability.nodes[0].disk + is_ultra_disk = ( + environment.capability.nodes[0].disk.data_disk_type + == schema.DiskType.UltraSSDLRS + ) + if is_ultra_disk: + settings.availability_type.discard(AvailabilityType.AvailabilitySet) + # If a region supports Ultra Disk in availability zones, + # then availability zones must be used + if AvailabilityType.AvailabilityZone in settings.availability_type: + settings.availability_type.discard(AvailabilityType.NoRedundancy) + + # Set ARM parameters based on min capability + if params.availability_type == AvailabilityType.Default: + params.availability_type = settings._resolve_availability_type_by_priority( + params + ).value + if ( + params.availability_zones + and params.availability_type == AvailabilityType.AvailabilityZone + ): + params.availability_zones = [ + zone + for zone in params.availability_zones + if zone in settings.availability_zones + ] + if not params.availability_zones: + raise SkippedException( + "Invalid zones provided. " + "This SKU in this location supports zones: " + f"{settings.availability_zones}. " + ) + elif settings.availability_zones: + params.availability_zones = [settings.availability_zones.items[0]] + + assert params.availability_type in [type.value for type in AvailabilityType], ( + "Not a valid Availability Type: " f"{params.availability_type}" + ) + + if not ( + AvailabilityType(params.availability_type) in settings.availability_type + ): + raise SkippedException( + f"Availability Type " + f"'{params.availability_type}' " + "is not supported in the current configuration. " + "Please select one of " + f"{[type.value for type in settings.availability_type.items]}. " + "The supported availability types is affected by disk type, " + "location, and test case requirements." + ) + @classmethod def on_before_deployment(cls, *args: Any, **kwargs: Any) -> None: environment = cast(Environment, kwargs.get("environment")) @@ -2447,57 +2516,7 @@ def on_before_deployment(cls, *args: Any, **kwargs: Any) -> None: is_maximize_capability = False if not is_maximize_capability: - assert isinstance(settings.availability_type, search_space.SetSpace) - - # Ultra Disk does not support Availability Sets - assert environment.capability.nodes - assert environment.capability.nodes[0].disk - is_ultra_disk = ( - environment.capability.nodes[0].disk.data_disk_type - == schema.DiskType.UltraSSDLRS - ) - if is_ultra_disk: - settings.availability_type.discard(AvailabilityType.AvailabilitySet) - # If a region supports Ultra Disk in availability zones, - # then availability zones must be used - if AvailabilityType.AvailabilityZone in settings.availability_type: - settings.availability_type.discard(AvailabilityType.NoRedundancy) - - # Set ARM parameters based on min capability - if params.availability_type == AvailabilityType.Default: - params.availability_type = ( - settings._resolve_availability_type_by_priority(params).value - ) - if ( - params.availability_zones - and params.availability_type == AvailabilityType.AvailabilityZone - ): - params.availability_zones = [ - zone - for zone in params.availability_zones - if zone in settings.availability_zones - ] - assert params.availability_zones, ( - "Invalid zones provided. " - "This SKU in this location supports zones: " - f"{settings.availability_zones}. " - ) - elif settings.availability_zones: - params.availability_zones = [settings.availability_zones.items[0]] - - assert params.availability_type in [ - type.value for type in AvailabilityType - ], ("Not a valid Availability Type: " f"{params.availability_type}") - - assert ( - AvailabilityType(params.availability_type) in settings.availability_type - ), ( - f"Availability Type " - f"'{params.availability_type}' " - "is not supported in the current configuration. Please select one of " - f"{[type.value for type in settings.availability_type.items]}. " - "Or consider changing the disk type or location." - ) + cls._resolve_configuration(environment, settings, params) # If the availability_type is still set to Default, then # resolve the default without considering capabilities @@ -2517,9 +2536,15 @@ def on_before_deployment(cls, *args: Any, **kwargs: Any) -> None: if "platformUpdateDomainCount" not in params.availability_set_properties: params.availability_set_properties["platformUpdateDomainCount"] = 1 elif params.availability_type == AvailabilityType.AvailabilityZone: - assert ( - params.availability_zones - ), "Availability Zone is selected, but no zone was provided." + if not params.availability_zones: + raise SkippedException( + "Availability Zone is selected, but no zone was provided. " + "Please consider one of the following\n" + "1. Providing availability_zones in the runbook\n" + "2. Selecting a different availability_type in the runbook\n" + "3. Setting maximize_capability to false " + "so the zone can be selected automatically." + ) params.availability_zones = [params.availability_zones[0]] params.availability_set_tags.clear() params.availability_set_properties.clear() From fb8e10834cecd8ee9c7f3db5b2392d59904bc9ca Mon Sep 17 00:00:00 2001 From: r-dailey <108893075+r-dailey@users.noreply.github.com> Date: Wed, 13 Nov 2024 20:09:28 -0800 Subject: [PATCH 04/13] Get disk lun info on FreeBSD (#3512) * Get disk lun info on FreeBSD * Changed the regex filter to accept more situations. In Testing * Removed the CD part of the filter --- lisa/sut_orchestrator/azure/features.py | 46 ++++++++++++++++++++----- 1 file changed, 37 insertions(+), 9 deletions(-) diff --git a/lisa/sut_orchestrator/azure/features.py b/lisa/sut_orchestrator/azure/features.py index a84af2c41d..5a95a1e040 100644 --- a/lisa/sut_orchestrator/azure/features.py +++ b/lisa/sut_orchestrator/azure/features.py @@ -1638,6 +1638,20 @@ class Disk(AzureFeatureMixin, features.Disk): # /dev/nvme0n1p15 -> /dev/nvme0 NVME_CONTROLLER_PATTERN = re.compile(r"/dev/nvme[0-9]+", re.M) + # at scbus0 target 0 lun 0 (pass0,da0) + # at scbus0 target 0 lun 1 (pass1,da1) + # at scbus0 target 0 lun 2 (pass2,cd0) + # at scbus1 target 0 lun 0 (da6,pass7) + # at scbus1 target 0 lun 1 (da9,pass10) + # at scbus1 target 0 lun 2 (da7,pass8) + # at scbus1 target 0 lun 3 (da8,pass9) + # at scbus1 target 0 lun 4 (da5,pass6) + # at scbus1 target 0 lun 6 (da4,pass5) + # at scbus1 target 0 lun 7 (da3,pass4) + LUN_PATTERN_BSD = re.compile( + r"at\s+scbus\d+\s+target\s+\d+\s+lun\s+(\d+)\s+\(.*(da\d+)", re.M + ) + @classmethod def settings_type(cls) -> Type[schema.FeatureSettings]: return AzureDiskOptionSettings @@ -1707,18 +1721,32 @@ def _get_scsi_data_disks(self) -> List[str]: def get_luns(self) -> Dict[str, int]: # disk_controller_type == SCSI # get azure scsi attached disks - azure_scsi_disks = self._get_scsi_data_disks() device_luns = {} - lun_number_pattern = re.compile(r"[0-9]+$", re.M) - for disk in azure_scsi_disks: - # /dev/disk/azure/scsi1/lun20 -> 20 - device_lun = int(get_matched_str(disk, lun_number_pattern)) - # readlink -f /dev/disk/azure/scsi1/lun0 - # /dev/sdc + if isinstance(self._node.os, BSD): cmd_result = self._node.execute( - f"readlink -f {disk}", shell=True, sudo=True + "camcontrol devlist", + shell=True, + sudo=True, ) - device_luns.update({cmd_result.stdout: device_lun}) + for line in cmd_result.stdout.splitlines(): + match = self.LUN_PATTERN_BSD.search(line) + if match: + lun_number = int(match.group(1)) + device_name = match.group(2) + device_luns.update({device_name: lun_number}) + else: + azure_scsi_disks = self._get_scsi_data_disks() + device_luns = {} + lun_number_pattern = re.compile(r"[0-9]+$", re.M) + for disk in azure_scsi_disks: + # /dev/disk/azure/scsi1/lun20 -> 20 + device_lun = int(get_matched_str(disk, lun_number_pattern)) + # readlink -f /dev/disk/azure/scsi1/lun0 + # /dev/sdc + cmd_result = self._node.execute( + f"readlink -f {disk}", shell=True, sudo=True + ) + device_luns.update({cmd_result.stdout: device_lun}) return device_luns def get_raw_data_disks(self) -> List[str]: From 5cc2f2f5acde12abfd5b0e7d8ca91bcd0c31c6f0 Mon Sep 17 00:00:00 2001 From: vipunj <169135823+vipunj@users.noreply.github.com> Date: Thu, 14 Nov 2024 18:52:31 +0530 Subject: [PATCH 05/13] Fixed VM Snapshot Extension test failure - Expected <10> to be less than <10>, but was not (#3503) * removed redundant count increment * Updated count * Updated code --- microsoft/testsuites/vm_extensions/vmsnapshot_extension.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/microsoft/testsuites/vm_extensions/vmsnapshot_extension.py b/microsoft/testsuites/vm_extensions/vmsnapshot_extension.py index b7e5c6785e..7de6d664ee 100644 --- a/microsoft/testsuites/vm_extensions/vmsnapshot_extension.py +++ b/microsoft/testsuites/vm_extensions/vmsnapshot_extension.py @@ -235,7 +235,7 @@ def _verify_vmsnapshot_extension( rpc_status = response.provisioning_state assert_that(rpc_status, "RPC creation failed").is_equal_to("Succeeded") count = 0 - for count in range(10): + for _ in range(10): try: # create a restore point for the VM restore_point = "rp_" + datetime.now().strftime("%Y-%m-%d-%H-%M-%S") @@ -265,7 +265,7 @@ def _verify_vmsnapshot_extension( else: raise e time.sleep(1) - count = count + 1 + count += 1 assert_that(count, "Restore point creation failed.").is_less_than(10) def _find_extension_dir(self, node: Node) -> str: From 46e3a78ab75a52439b082c4b720bebe812107158 Mon Sep 17 00:00:00 2001 From: Lili Deng Date: Thu, 14 Nov 2024 12:06:02 +0800 Subject: [PATCH 06/13] vm resize: filter vm size with proper disk properties --- lisa/sut_orchestrator/azure/features.py | 244 +++++++++++++++++------- 1 file changed, 174 insertions(+), 70 deletions(-) diff --git a/lisa/sut_orchestrator/azure/features.py b/lisa/sut_orchestrator/azure/features.py index 5a95a1e040..74b1caa50d 100644 --- a/lisa/sut_orchestrator/azure/features.py +++ b/lisa/sut_orchestrator/azure/features.py @@ -2040,6 +2040,143 @@ def resize( self._node.capability = cast(schema.Capability, new_capability) return new_capability, origin_vm_size, new_vm_size_info.vm_size + def _compare_disk_property( + self, + candidate_size: AzureCapability, + current_vm_size: AzureCapability, + property_name: str, + ) -> bool: + assert candidate_size.capability + assert current_vm_size.capability + assert candidate_size.capability.disk + assert current_vm_size.capability.disk + candidate_value = getattr(candidate_size.capability.disk, property_name, None) + current_value = getattr(current_vm_size.capability.disk, property_name, None) + if candidate_value is None or current_value is None: + return False + # If both values are iterable (list or set), check if there's any match + if isinstance(candidate_value, (list, set)): + return any(dc_type in candidate_value for dc_type in current_value) + # Otherwise, do a simple direct comparison + if isinstance(candidate_value, AzureDiskOptionSettings) and isinstance( + current_value, AzureDiskOptionSettings + ): + return candidate_value == current_value + return False + + def _compare_architecture( + self, + candidate_size: AzureCapability, + current_vm_size: AzureCapability, + ) -> bool: + assert candidate_size.capability + assert current_vm_size.capability + assert current_vm_size.capability.features + assert candidate_size.capability.features + current_arch = next( + ( + feature + for feature in current_vm_size.capability.features + if feature.type == ArchitectureSettings.type + ), + None, + ) + candidate_arch = next( + ( + feature + for feature in candidate_size.capability.features + if feature.type == ArchitectureSettings.type + ), + None, + ) + if isinstance(current_arch, ArchitectureSettings) and isinstance( + candidate_arch, ArchitectureSettings + ): + return current_arch.arch == candidate_arch.arch + return False + + def _compare_size_generation( + self, + candidate_size: AzureCapability, + current_vm_size: AzureCapability, + ) -> bool: + assert candidate_size.capability + assert current_vm_size.capability + assert current_vm_size.capability.features + assert candidate_size.capability.features + current_gen = next( + ( + feature + for feature in current_vm_size.capability.features + if feature.type == VhdGenerationSettings.type + ), + None, + ) + candidate_gen = next( + ( + feature + for feature in candidate_size.capability.features + if feature.type == VhdGenerationSettings.type + ), + None, + ) + + if isinstance(current_gen, VhdGenerationSettings) and isinstance( + candidate_gen, VhdGenerationSettings + ): + result = search_space.check_setspace(current_gen.gen, candidate_gen.gen) + return result.result + return False + + def _compare_network_interface( + self, candidate_size: AzureCapability, current_vm_size: AzureCapability + ) -> bool: + assert candidate_size.capability + assert current_vm_size.capability + assert current_vm_size.capability.network_interface + assert candidate_size.capability.network_interface + current_network_interface = current_vm_size.capability.network_interface + assert_that(current_network_interface).described_as( + "current_network_interface is not of type NetworkInterfaceOptionSettings." + ).is_instance_of(schema.NetworkInterfaceOptionSettings) + current_data_path = current_network_interface.data_path + candidate_network_interface = candidate_size.capability.network_interface + assert_that(candidate_network_interface).described_as( + "candidate_network_interface is not of type NetworkInterfaceOptionSettings." + ).is_instance_of(schema.NetworkInterfaceOptionSettings) + candidate_data_path = candidate_network_interface.data_path + + # If current VM has accelerated networking enabled + # check that the candidate also has it enabled + if schema.NetworkDataPath.Sriov in current_data_path: # type: ignore + return schema.NetworkDataPath.Sriov in candidate_data_path # type: ignore + return True + + def _compare_core_count( + self, + candidate_size: AzureCapability, + current_vm_size: AzureCapability, + resize_action: ResizeAction, + ) -> bool: + assert candidate_size.capability + assert current_vm_size.capability + assert current_vm_size.capability.core_count + assert candidate_size.capability.core_count + + candidate_core_count = candidate_size.capability.core_count + current_core_count = current_vm_size.capability.core_count + if ( + resize_action == ResizeAction.IncreaseCoreCount + and candidate_core_count < current_core_count # type: ignore + ): + return False + if ( + resize_action == ResizeAction.DecreaseCoreCount + and candidate_core_count > current_core_count # type: ignore + ): + return False + return True + def _select_vm_size( self, resize_action: ResizeAction = ResizeAction.IncreaseCoreCount ) -> Tuple[str, "AzureCapability"]: @@ -2088,91 +2225,58 @@ def _select_vm_size( avail_eligible_intersect.append(new_vm_size) - current_network_interface = current_vm_size.capability.network_interface - assert_that(current_network_interface).described_as( - "current_network_interface is not of type NetworkInterfaceOptionSettings." - ).is_instance_of(schema.NetworkInterfaceOptionSettings) - current_data_path = current_network_interface.data_path # type: ignore current_core_count = current_vm_size.capability.core_count assert_that(current_core_count).described_as( "Didn't return an integer to represent the current VM size core count." ).is_instance_of(int) assert current_vm_size.capability.features - current_arch = [ - feature - for feature in current_vm_size.capability.features - if feature.type == ArchitectureSettings.type - ] - current_gen = [ - feature - for feature in current_vm_size.capability.features - if feature.type == VhdGenerationSettings.type - ] + # Loop removes candidate vm sizes if they can't be resized to or if the # change in cores resulting from the resize is undesired for candidate_size in avail_eligible_intersect[:]: - assert candidate_size.capability.features - candidate_arch = [ - feature - for feature in candidate_size.capability.features - if feature.type == ArchitectureSettings.type - ] - # Removing vm size from candidate list if the candidate architecture is - # different with current vm size - if isinstance(current_arch[0], ArchitectureSettings) and isinstance( - candidate_arch[0], ArchitectureSettings - ): - if candidate_arch[0].arch != current_arch[0].arch: - avail_eligible_intersect.remove(candidate_size) - continue + if not self._compare_architecture(candidate_size, current_vm_size): + avail_eligible_intersect.remove(candidate_size) + continue - candidate_gen = [ - feature - for feature in candidate_size.capability.features - if feature.type == VhdGenerationSettings.type + # List of disk properties to compare + disk_properties_to_compare = [ + "disk_controller_type", + "os_disk_type", + "data_disk_type", ] - if isinstance(current_gen[0], VhdGenerationSettings) and isinstance( - candidate_gen[0], VhdGenerationSettings - ): - result = search_space.check_setspace( - current_gen[0].gen, candidate_gen[0].gen - ) - # Removing vm size from candidate list if the candidate vhd gen type is - # different with current vm size gen type - if not result.result: - avail_eligible_intersect.remove(candidate_size) - continue - candidate_network_interface = candidate_size.capability.network_interface - assert_that(candidate_network_interface).described_as( - "candidate_network_interface is not of type " - "NetworkInterfaceOptionSettings." - ).is_instance_of(schema.NetworkInterfaceOptionSettings) - candidate_data_path = candidate_network_interface.data_path # type: ignore - # Can't resize from an accelerated networking enabled size to a size where - # accelerated networking isn't enabled - if ( - schema.NetworkDataPath.Sriov in current_data_path # type: ignore - and schema.NetworkDataPath.Sriov not in candidate_data_path # type: ignore # noqa: E501 - ): - # Removing sizes without accelerated networking capabilities - # if original size has it enabled + # Flag to track whether the candidate passed all disk property checks + candidate_passed_all_checks = True + for prop in disk_properties_to_compare: + # compare the current property between the candidate size + # and the current VM size + if not self._compare_disk_property( + candidate_size, current_vm_size, prop + ): + # If the comparison fails (returns False) + # mark the candidate as failing all checks + candidate_passed_all_checks = False + break + # If the candidate failed any of the checks (disk properties did not match) + if not candidate_passed_all_checks: + # Remove the candidate size from the list of available eligible sizes avail_eligible_intersect.remove(candidate_size) + # Continue to the next candidate size in the loop + # without checking further continue - candidate_core_count = candidate_size.capability.core_count - assert_that(candidate_core_count).described_as( - "Didn't return an integer to represent the " - "candidate VM size core count." - ).is_instance_of(int) - # Removing vm size from candidate list if the change in core count - # doesn't align with the ResizeAction passed into this function - if ( - resize_action == ResizeAction.IncreaseCoreCount - and candidate_core_count < current_core_count # type: ignore - or resize_action == ResizeAction.DecreaseCoreCount - and candidate_core_count > current_core_count # type: ignore + if not self._compare_size_generation(candidate_size, current_vm_size): + avail_eligible_intersect.remove(candidate_size) + continue + + if not self._compare_network_interface(candidate_size, current_vm_size): + avail_eligible_intersect.remove(candidate_size) + continue + + if not self._compare_core_count( + candidate_size, current_vm_size, resize_action ): avail_eligible_intersect.remove(candidate_size) + continue if not avail_eligible_intersect: raise LisaException( From 9e0646428f2cd09dd3d9546be7093d7f60d722f3 Mon Sep 17 00:00:00 2001 From: Aditya Nagesh Date: Thu, 7 Nov 2024 23:17:25 +0530 Subject: [PATCH 07/13] Hibernation Testcase: Add NoRedundancy Feature Requirement Hibernation test cases requires VM Redundancy type to be NoRedundancy. Add that to test requirement --- lisa/features/availability.py | 5 +++++ microsoft/testsuites/power/power.py | 19 ++++++++++--------- microsoft/testsuites/power/stress.py | 3 ++- 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/lisa/features/availability.py b/lisa/features/availability.py index 5421624121..024f689f4f 100644 --- a/lisa/features/availability.py +++ b/lisa/features/availability.py @@ -138,3 +138,8 @@ def enabled(self) -> bool: AvailabilitySettings, availability_type=search_space.SetSpace(True, [AvailabilityType.AvailabilityZone]), ) + +AvailabilityTypeNoRedundancy = partial( + AvailabilitySettings, + availability_type=search_space.SetSpace(True, [AvailabilityType.NoRedundancy]), +) diff --git a/microsoft/testsuites/power/power.py b/microsoft/testsuites/power/power.py index d29a0aa537..d3cdcef898 100644 --- a/microsoft/testsuites/power/power.py +++ b/microsoft/testsuites/power/power.py @@ -15,6 +15,7 @@ TestSuiteMetadata, ) from lisa.features import Disk, HibernationEnabled, Sriov, Synthetic +from lisa.features.availability import AvailabilityTypeNoRedundancy from lisa.node import Node from lisa.operating_system import BSD, Windows from lisa.testsuite import simple_requirement @@ -61,7 +62,7 @@ def before_case(self, log: Logger, **kwargs: Any) -> None: priority=3, requirement=simple_requirement( network_interface=Synthetic(), - supported_features=[HibernationEnabled()], + supported_features=[HibernationEnabled(), AvailabilityTypeNoRedundancy()], ), ) def verify_hibernation_synthetic_network(self, node: Node, log: Logger) -> None: @@ -76,7 +77,7 @@ def verify_hibernation_synthetic_network(self, node: Node, log: Logger) -> None: priority=3, requirement=simple_requirement( network_interface=Sriov(), - supported_features=[HibernationEnabled()], + supported_features=[HibernationEnabled(), AvailabilityTypeNoRedundancy()], ), ) def verify_hibernation_sriov_network(self, node: Node, log: Logger) -> None: @@ -94,7 +95,7 @@ def verify_hibernation_sriov_network(self, node: Node, log: Logger) -> None: """, priority=3, requirement=simple_requirement( - supported_features=[HibernationEnabled()], + supported_features=[HibernationEnabled(), AvailabilityTypeNoRedundancy()], ), ) def verify_hibernation_time_sync(self, node: Node, log: Logger) -> None: @@ -136,7 +137,7 @@ def verify_hibernation_time_sync(self, node: Node, log: Logger) -> None: priority=3, requirement=simple_requirement( min_count=2, - supported_features=[HibernationEnabled()], + supported_features=[HibernationEnabled(), AvailabilityTypeNoRedundancy()], ), ) def verify_hibernation_with_network_workload( @@ -159,7 +160,7 @@ def verify_hibernation_with_network_workload( """, priority=3, requirement=simple_requirement( - supported_features=[HibernationEnabled()], + supported_features=[HibernationEnabled(), AvailabilityTypeNoRedundancy()], ), ) def verify_hibernation_with_storage_workload(self, node: Node, log: Logger) -> None: @@ -179,7 +180,7 @@ def verify_hibernation_with_storage_workload(self, node: Node, log: Logger) -> N """, priority=3, requirement=simple_requirement( - supported_features=[HibernationEnabled()], + supported_features=[HibernationEnabled(), AvailabilityTypeNoRedundancy()], ), ) def verify_hibernation_with_memory_workload(self, node: Node, log: Logger) -> None: @@ -207,7 +208,7 @@ def verify_hibernation_with_memory_workload(self, node: Node, log: Logger) -> No requirement=simple_requirement( min_nic_count=8, network_interface=Synthetic(), - supported_features=[HibernationEnabled()], + supported_features=[HibernationEnabled(), AvailabilityTypeNoRedundancy()], ), ) def verify_hibernation_synthetic_network_max_nics( @@ -225,7 +226,7 @@ def verify_hibernation_synthetic_network_max_nics( requirement=simple_requirement( min_nic_count=8, network_interface=Sriov(), - supported_features=[HibernationEnabled()], + supported_features=[HibernationEnabled(), AvailabilityTypeNoRedundancy()], ), ) def verify_hibernation_sriov_network_max_nics( @@ -242,7 +243,7 @@ def verify_hibernation_sriov_network_max_nics( priority=3, requirement=simple_requirement( min_nic_count=8, - supported_features=[HibernationEnabled()], + supported_features=[HibernationEnabled(), AvailabilityTypeNoRedundancy()], min_data_disk_count=32, ), ) diff --git a/microsoft/testsuites/power/stress.py b/microsoft/testsuites/power/stress.py index 03ad354951..065f52e500 100644 --- a/microsoft/testsuites/power/stress.py +++ b/microsoft/testsuites/power/stress.py @@ -12,6 +12,7 @@ TestSuiteMetadata, ) from lisa.features import HibernationEnabled, Sriov +from lisa.features.availability import AvailabilityTypeNoRedundancy from lisa.node import Node from lisa.operating_system import BSD, Windows from lisa.testsuite import simple_requirement @@ -46,7 +47,7 @@ def before_case(self, log: Logger, **kwargs: Any) -> None: timeout=720000, requirement=simple_requirement( network_interface=Sriov(), - supported_features=[HibernationEnabled()], + supported_features=[HibernationEnabled(), AvailabilityTypeNoRedundancy()], ), ) def stress_hibernation(self, environment: Environment, log: Logger) -> None: From 817d3686988161c7e66b900b5901136b91c7211a Mon Sep 17 00:00:00 2001 From: bhagyapathak Date: Mon, 18 Nov 2024 12:15:33 +0000 Subject: [PATCH 08/13] Fix verify_azuremonitoragent_linux for Azure Linux 3.0 --- microsoft/testsuites/vm_extensions/AzureMonitorAgentLinux.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/microsoft/testsuites/vm_extensions/AzureMonitorAgentLinux.py b/microsoft/testsuites/vm_extensions/AzureMonitorAgentLinux.py index 61ba5fe39b..86a0f95e15 100644 --- a/microsoft/testsuites/vm_extensions/AzureMonitorAgentLinux.py +++ b/microsoft/testsuites/vm_extensions/AzureMonitorAgentLinux.py @@ -96,7 +96,7 @@ def _is_supported_linux_distro(self, node: Node) -> bool: Ubuntu: [16, 18, 20], Suse: [12, 15], SLES: [12, 15], - CBLMariner: [2], + CBLMariner: [2, 3], } supported_major_versions_arm64 = { @@ -105,7 +105,7 @@ def _is_supported_linux_distro(self, node: Node) -> bool: Debian: [11], Ubuntu: [18, 20], SLES: [15], - CBLMariner: [2], + CBLMariner: [2, 3], } for distro in supported_major_versions_x86_64: From 4ab37dacb993105006a023e1b8b9c5fe33e2b1c2 Mon Sep 17 00:00:00 2001 From: Lili Deng Date: Wed, 20 Nov 2024 13:54:14 +0800 Subject: [PATCH 09/13] Check and uninstall extensions before installing it --- .../runtime_extensions/custom_script.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/microsoft/testsuites/vm_extensions/runtime_extensions/custom_script.py b/microsoft/testsuites/vm_extensions/runtime_extensions/custom_script.py index 9cb52117e6..f00d4045e4 100644 --- a/microsoft/testsuites/vm_extensions/runtime_extensions/custom_script.py +++ b/microsoft/testsuites/vm_extensions/runtime_extensions/custom_script.py @@ -21,6 +21,7 @@ from lisa.sut_orchestrator import AZURE from lisa.sut_orchestrator.azure.features import AzureExtension from lisa.sut_orchestrator.azure.tools import Waagent +from lisa.util import LisaException from microsoft.testsuites.vm_extensions.runtime_extensions.common import ( check_waagent_version_supported, execute_command, @@ -38,10 +39,22 @@ def _create_and_verify_extension_run( assert_exception: Any = None, ) -> None: extension = node.features[AzureExtension] + extension_name = "CustomScript" + try: + # Delete VM Extension if already present + extension.delete(extension_name) + except HttpResponseError as identifier: + if any(s in str(identifier) for s in ["was not found"]): + node.log.info(f"{extension_name} is not installed") + else: + raise LisaException( + f"unexpected exception happened {identifier} during delete" + f" extension {extension_name}" + ) from identifier def enable_extension() -> Any: result = extension.create_or_update( - name="CustomScript", + name=extension_name, publisher="Microsoft.Azure.Extensions", type_="CustomScript", type_handler_version="2.1", From 5a3b9a407f7ba6b9405ae5537de492f5382fa0ae Mon Sep 17 00:00:00 2001 From: Lili Deng Date: Wed, 20 Nov 2024 10:44:45 +0800 Subject: [PATCH 10/13] Fix value error for guest_vm_type --- lisa/sut_orchestrator/libvirt/context.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lisa/sut_orchestrator/libvirt/context.py b/lisa/sut_orchestrator/libvirt/context.py index 77893a824d..e9fbc5aab4 100644 --- a/lisa/sut_orchestrator/libvirt/context.py +++ b/lisa/sut_orchestrator/libvirt/context.py @@ -55,7 +55,7 @@ class NodeContext: vm_name: str = "" kernel_source_path: str = "" kernel_path: str = "" - guest_vm_type: GuestVmType = GuestVmType.Standard + guest_vm_type: GuestVmType = field(default_factory=lambda: GuestVmType.Standard) cloud_init_file_path: str = "" ignition_file_path: str = "" os_disk_source_file_path: Optional[str] = None From a1348d6d14dd8ac4ece1a911046f4410b147c818 Mon Sep 17 00:00:00 2001 From: Smit Gardhariya <113590758+smit-gardhariya@users.noreply.github.com> Date: Fri, 22 Nov 2024 10:33:48 +0530 Subject: [PATCH 11/13] dom0_kernel_installer: Add support for mariner-3.0 (#3522) dom0 image based on mariner 3.0 has different parameter that need to be changed while installing kernel under different file. Add support for it under kernel installer transformer. Signed-off-by: Smit Gardhariya --- lisa/transformers/dom0_kernel_installer.py | 46 +++++++++++++++++----- 1 file changed, 37 insertions(+), 9 deletions(-) diff --git a/lisa/transformers/dom0_kernel_installer.py b/lisa/transformers/dom0_kernel_installer.py index 19a63c0b48..011d1f368d 100644 --- a/lisa/transformers/dom0_kernel_installer.py +++ b/lisa/transformers/dom0_kernel_installer.py @@ -9,6 +9,7 @@ from lisa import schema from lisa.node import Node +from lisa.operating_system import CBLMariner from lisa.tools import Cp, Echo, Ln, Ls, Sed, Tar, Uname from lisa.util import field_metadata @@ -80,6 +81,8 @@ def install(self) -> str: uname = node.tools[Uname] current_kernel = uname.get_linux_information().kernel_version_raw + mariner_version = int(node.os.information.version.major) + # Kernel absolute path: /home/user/vmlinuz-5.15.57.1+ # Naming convention : vmlinuz- new_kernel = os.path.basename(kernel_image_path).split("-")[1].strip() @@ -124,10 +127,19 @@ def install(self) -> str: node.get_pure_path(f"/boot/initrd.img-{new_kernel}"), ) else: + # Mariner 3.0 initrd + target = f"/boot/initramfs-{current_kernel}.img" + link = f"/boot/initramfs-{new_kernel}.img" + + if isinstance(node.os, CBLMariner) and mariner_version == 2: + # Mariner 2.0 initrd + target = f"/boot/initrd.img-{current_kernel}" + link = f"/boot/initrd.img-{new_kernel}" + ln = node.tools[Ln] ln.create_link( - target=f"/boot/initrd.img-{current_kernel}", - link=f"/boot/initrd.img-{new_kernel}", + target=target, + link=link, ) if kernel_config_path: @@ -148,6 +160,7 @@ def install(self) -> str: node, current_kernel, new_kernel, + mariner_version, ) return new_kernel @@ -185,10 +198,12 @@ def install(self) -> str: uname = node.tools[Uname] current_kernel = uname.get_linux_information().kernel_version_raw + mariner_version = int(node.os.information.version.major) _update_mariner_config( node, current_kernel, new_kernel, + mariner_version, ) return new_kernel @@ -211,22 +226,35 @@ def _update_mariner_config( node: Node, current_kernel: str, new_kernel: str, + mariner_version: int, ) -> None: - mariner_config: str = "/boot/mariner-mshv.cfg" sed = node.tools[Sed] - # Modify the /boot/mariner-mshv.cfg to point new kernel binary + # Param for Dom0 3.0 kernel installation + mariner_config = "/boot/grub2/grub.cfg" + vmlinuz_regexp = f"vmlinuz-{current_kernel}" + vmlinuz_replacement = f"vmlinuz-{new_kernel}" + initrd_regexp = f"initramfs-{current_kernel}.img" + initrd_replacement = f"initramfs-{new_kernel}.img" + + if isinstance(node.os, CBLMariner) and mariner_version == 2: + # Change param for Dom0 2.0 kernel installation + mariner_config = "/boot/mariner-mshv.cfg" + initrd_regexp = f"mariner_initrd_mshv=initrd.img-{current_kernel}" + initrd_replacement = f"mariner_initrd_mshv=initrd.img-{new_kernel}" + + # Modify file to point new kernel binary sed.substitute( - regexp=f"mariner_linux_mshv=vmlinuz-{current_kernel}", - replacement=f"mariner_linux_mshv=vmlinuz-{new_kernel}", + regexp=vmlinuz_regexp, + replacement=vmlinuz_replacement, file=mariner_config, sudo=True, ) - # Modify the /boot/mariner-mshv.cfg to point new initrd binary + # Modify file to point new initrd binary sed.substitute( - regexp=f"mariner_initrd_mshv=initrd.img-{current_kernel}", - replacement=f"mariner_initrd_mshv=initrd.img-{new_kernel}", + regexp=initrd_regexp, + replacement=initrd_replacement, file=mariner_config, sudo=True, ) From e1f5e89ccdf84f40707b23cdff4cfe473f655605 Mon Sep 17 00:00:00 2001 From: Sudipta Pandit Date: Fri, 22 Nov 2024 16:50:36 +0530 Subject: [PATCH 12/13] xfstests: fix umask for mariner (#3526) * xfstests: fix umask for mariner * fix types --- microsoft/testsuites/xfstests/xfstesting.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/microsoft/testsuites/xfstests/xfstesting.py b/microsoft/testsuites/xfstests/xfstesting.py index cb40aea2cd..97f2472fd1 100644 --- a/microsoft/testsuites/xfstests/xfstesting.py +++ b/microsoft/testsuites/xfstests/xfstesting.py @@ -18,7 +18,7 @@ simple_requirement, ) from lisa.features import Disk, Nvme -from lisa.operating_system import BSD, Oracle, Redhat, Windows +from lisa.operating_system import BSD, CBLMariner, Oracle, Redhat, Windows from lisa.sut_orchestrator import AZURE from lisa.sut_orchestrator.azure.features import AzureFileShare from lisa.sut_orchestrator.azure.platform_ import AzurePlatform @@ -27,8 +27,8 @@ from lisa.util import BadEnvironmentStateException, generate_random_chars from microsoft.testsuites.xfstests.xfstests import Xfstests -_scratch_folder = "/root/scratch" -_test_folder = "/root/test" +_scratch_folder = "/mnt/scratch" +_test_folder = "/mnt/test" def _prepare_data_disk( @@ -512,6 +512,15 @@ def _execute_xfstests( assert environment, "fail to get environment from testresult" node = cast(RemoteNode, environment.nodes[0]) + + # Fix Mariner umask for xfstests + if isinstance(node.os, CBLMariner): + echo = node.tools[Echo] + profile_path = node.get_pure_path("/etc/profile") + echo.write_to_file("umask 0022\n", profile_path, sudo=True, append=True) + # Close the current session to apply the umask change on the next login + node.close() + # TODO: will include generic/641 once the kernel contains below fix. # exclude this case generic/641 temporarily # it will trigger oops on RHEL8.3/8.4, VM will reboot From a534b8b82ff32c35c3ba67f4c89a0ac67c8c915a Mon Sep 17 00:00:00 2001 From: Sudipta Pandit Date: Fri, 22 Nov 2024 20:22:33 +0530 Subject: [PATCH 13/13] xfstests: add new generic xfstests for ext4 filesystem (#3527) * add generic xfstests for ext4 filesystem * fix formatting --- microsoft/testsuites/xfstests/xfstesting.py | 71 +++++++++++++++++++++ microsoft/testsuites/xfstests/xfstests.py | 6 +- 2 files changed, 74 insertions(+), 3 deletions(-) diff --git a/microsoft/testsuites/xfstests/xfstesting.py b/microsoft/testsuites/xfstests/xfstesting.py index 97f2472fd1..297b3ce003 100644 --- a/microsoft/testsuites/xfstests/xfstesting.py +++ b/microsoft/testsuites/xfstests/xfstesting.py @@ -178,6 +178,44 @@ def verify_generic_standard_datadisk( excluded_tests=self.excluded_tests, ) + @TestCaseMetadata( + description=""" + This test case will run generic xfstests testing against + standard data disk with ext4 type system. + """, + requirement=simple_requirement( + disk=schema.DiskOptionSettings( + data_disk_type=schema.DiskType.StandardHDDLRS, + os_disk_type=schema.DiskType.StandardHDDLRS, + data_disk_iops=500, + data_disk_count=search_space.IntRange(min=1), + ), + unsupported_os=[BSD, Windows], + ), + timeout=TIME_OUT, + use_new_environment=True, + priority=3, + ) + def verify_generic_ext4_standard_datadisk( + self, log_path: Path, result: TestResult + ) -> None: + environment = result.environment + assert environment, "fail to get environment from testresult" + node = cast(RemoteNode, environment.nodes[0]) + xfstests = self._install_xfstests(node) + disk = node.features[Disk] + data_disks = disk.get_raw_data_disks() + self._execute_xfstests( + log_path, + xfstests, + result, + data_disks[0], + f"{data_disks[0]}1", + f"{data_disks[0]}2", + file_system=FileSystem.ext4, + excluded_tests=self.excluded_tests, + ) + @TestCaseMetadata( description=""" This test case will run xfs xfstests testing against @@ -320,6 +358,38 @@ def verify_generic_nvme_datadisk(self, log_path: Path, result: TestResult) -> No excluded_tests=self.excluded_tests, ) + @TestCaseMetadata( + description=""" + This test case will run generic xfstests testing against + nvme data disk with ext4 type system. + """, + timeout=TIME_OUT, + priority=3, + use_new_environment=True, + requirement=simple_requirement( + supported_features=[Nvme], unsupported_os=[BSD, Windows] + ), + ) + def verify_generic_ext4_nvme_datadisk( + self, log_path: Path, result: TestResult + ) -> None: + environment = result.environment + assert environment, "fail to get environment from testresult" + node = cast(RemoteNode, environment.nodes[0]) + xfstests = self._install_xfstests(node) + nvme_disk = node.features[Nvme] + nvme_data_disks = nvme_disk.get_raw_data_disks() + self._execute_xfstests( + log_path, + xfstests, + result, + nvme_data_disks[0], + f"{nvme_data_disks[0]}p1", + f"{nvme_data_disks[0]}p2", + file_system=FileSystem.ext4, + excluded_tests=self.excluded_tests, + ) + @TestCaseMetadata( description=""" This test case will run xfs xfstests testing against @@ -547,6 +617,7 @@ def _execute_xfstests( test_dev, _test_folder, test_type, + file_system.name, mount_opts, ) xfstests.set_excluded_tests(excluded_tests) diff --git a/microsoft/testsuites/xfstests/xfstests.py b/microsoft/testsuites/xfstests/xfstests.py index 97d91d2d5b..5fa396e787 100644 --- a/microsoft/testsuites/xfstests/xfstests.py +++ b/microsoft/testsuites/xfstests/xfstests.py @@ -304,14 +304,14 @@ def set_local_config( test_dev: str, test_folder: str, test_type: str, + fs_type: str, mount_opts: str = "", ) -> None: xfstests_path = self.get_xfstests_path() config_path = xfstests_path.joinpath("local.config") if self.node.shell.exists(config_path): self.node.shell.remove(config_path) - if "generic" == test_type: - test_type = "xfs" + echo = self.node.tools[Echo] if mount_opts: content = "\n".join( @@ -326,7 +326,7 @@ def set_local_config( content = "\n".join( [ f"[{test_type}]", - f"FSTYP={test_type}", + f"FSTYP={fs_type}", ] ) echo.write_to_file(content, config_path, append=True)