Skip to content

Commit

Permalink
OFED driver for ConnectX-3 devices is not supported on 5.6+ kernel
Browse files Browse the repository at this point in the history
  • Loading branch information
LiliDeng committed Oct 13, 2023
1 parent 8bd6376 commit 04f818b
Showing 1 changed file with 18 additions and 8 deletions.
26 changes: 18 additions & 8 deletions lisa/features/infiniband.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def _get_ib_device_names(self) -> List[str]:
).is_not_empty()
return result.stdout.split()

def _get_mofed_version(self) -> str:
def _get_ofed_version(self) -> str:
node = self._node
default = "5.4-3.0.3.0"
if self._is_legacy_device():
Expand Down Expand Up @@ -336,25 +336,35 @@ def install_ofed(self) -> None:
os_version = node.os.information.release.split(".")
# Dependencies
kernel = node.tools[Uname].get_linux_information().kernel_version_raw
kernel_version = node.tools[Uname].get_linux_information().kernel_version
self._install_dependencies()

# Install OFED
mofed_version = self._get_mofed_version()
ofed_version = self._get_ofed_version()
if isinstance(node.os, Oracle):
os_class = "ol"
elif isinstance(node.os, Redhat):
os_class = "rhel"
else:
os_class = node.os.name.lower()

mofed_folder = (
f"MLNX_OFED_LINUX-{mofed_version}-{os_class}"
# refer https://forums.developer.nvidia.com/t/connectx-3-on-ubuntu-20-04/206201/8 # noqa: E501
# for why we don't support ConnectX-3 on kernel >= 5.6
if self._is_legacy_device() and kernel_version >= "5.6.0":
raise UnsupportedKernelException(
node.os,
"OFED driver for ConnectX-3 devices is not supported on "
"kernel versions >= 5.6",
)

ofed_folder = (
f"MLNX_OFED_LINUX-{ofed_version}-{os_class}"
f"{os_version[0]}."
f"{os_version[1]}-x86_64"
)
tarball_name = f"{mofed_folder}.tgz"
tarball_name = f"{ofed_folder}.tgz"
mlnx_ofed_download_url = (
f"https://content.mellanox.com/ofed/MLNX_OFED-{mofed_version}"
f"https://content.mellanox.com/ofed/MLNX_OFED-{ofed_version}"
f"/{tarball_name}"
)

Expand Down Expand Up @@ -402,12 +412,12 @@ def install_ofed(self) -> None:
extra_params += " --skip-unsupported-devices-check"

node.execute(
f"{self.resource_disk_path}/{mofed_folder}/mlnxofedinstall "
f"{self.resource_disk_path}/{ofed_folder}/mlnxofedinstall "
f"--add-kernel-support {extra_params} "
f"--tmpdir {self.resource_disk_path}/tmp",
expected_exit_code=0,
expected_exit_code_failure_message="SetupRDMA: failed to install "
"MOFED Drivers",
"OFED Drivers",
sudo=True,
timeout=1200,
)
Expand Down

0 comments on commit 04f818b

Please sign in to comment.