Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding GPU feature and some dependent tools #1318

Merged
merged 2 commits into from
May 25, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 30 additions & 4 deletions lisa/tools/wget.py → lisa/base_tools/wget.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,33 @@
import pathlib
import re
from typing import cast
from typing import TYPE_CHECKING

from lisa.executable import Tool
from lisa.operating_system import Posix
from lisa.util import LisaException

if TYPE_CHECKING:
from lisa.operating_system import Posix


class Wget(Tool):
__pattern_path = re.compile(
r"([\w\W]*?)(-|File) (‘|')(?P<path>.+?)(’|') (saved|already there)"
)

# regex to validate url
# source -
# https://github.com/django/django/blob/stable/1.3.x/django/core/validators.py#L45
__url_pattern = re.compile(
r"^(?:http|ftp)s?://" # http:// or https://
r"(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)"
r"+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|" # ...domain
r"localhost|" # localhost...
r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})" # ...or ip
r"(?::\d+)?" # optional port
r"(?:/?|[/?]\S+)$",
re.IGNORECASE,
)

@property
def command(self) -> str:
return "wget"
Expand All @@ -21,13 +37,20 @@ def can_install(self) -> bool:
return True

def install(self) -> bool:
posix_os: Posix = cast(Posix, self.node.os)
posix_os: Posix = self.node.os # type: ignore
posix_os.install_packages([self])
return self._check_exists()

def get(
self, url: str, file_path: str = "", filename: str = "", overwrite: bool = True
self,
url: str,
file_path: str = "",
filename: str = "",
overwrite: bool = True,
executable: bool = False,
) -> str:
if re.match(self.__url_pattern, url) is None:
raise LisaException(f"Invalid URL '{url}'")
# create folder when it doesn't exist
self.node.execute(f"mkdir -p {file_path}", shell=True)
# combine download file path
Expand All @@ -52,4 +75,7 @@ def get(
actual_file_path = self.node.execute(f"ls {download_file_path}", shell=True)
if actual_file_path.exit_code != 0:
raise LisaException(f"File {actual_file_path} doesn't exist.")
if executable:
self.node.execute(f"chmod +x {actual_file_path}")

return actual_file_path.stdout
3 changes: 2 additions & 1 deletion lisa/features/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

from .gpu import Gpu
from .serial_console import SerialConsole
from .startstop import StartStop

__all__ = ["SerialConsole", "StartStop"]
__all__ = ["Gpu", "SerialConsole", "StartStop"]
148 changes: 148 additions & 0 deletions lisa/features/gpu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

import re
from enum import Enum
from typing import Any

from lisa.base_tools.wget import Wget
from lisa.feature import Feature
from lisa.operating_system import Redhat, Ubuntu
from lisa.tools import Uname
from lisa.util import LisaException, SkippedException
from lisa.util.logger import get_logger

FEATURE_NAME_GPU = "Gpu"

ComputeSDK = Enum(
"ComputeSDK",
[
# GRID Driver
"GRID",
# CUDA Driver
"CUDA",
],
)

# Link to the latest GRID driver
# The DIR link is
# https://download.microsoft.com/download/9/5/c/95c667ff-ab95-4c56-89e0-e13e9a76782d/NVIDIA-Linux-x86_64-460.32.03-grid-azure.run
DEFAULT_GRID_DRIVER_URL = "https://go.microsoft.com/fwlink/?linkid=874272"


class Gpu(Feature):
def __init__(self, node: Any, platform: Any) -> None:
sharsonia marked this conversation as resolved.
Show resolved Hide resolved
super().__init__(node, platform)
self._log = get_logger("feature", self.name(), self._node.log)

@classmethod
def name(cls) -> str:
return FEATURE_NAME_GPU

def _is_supported(self) -> bool:
raise NotImplementedError()

# download and install NVIDIA grid driver
def _install_grid_driver(self, driver_url: str) -> None:
self._log.debug("Starting GRID driver installation")
# download and install the NVIDIA GRID driver
wget_tool = self._node.tools[Wget]
grid_file_path = wget_tool.get(
driver_url,
str(self._node.working_path),
"NVIDIA-Linux-x86_64-grid.run",
executable=True,
)
result = self._node.execute(
f"{grid_file_path} --no-nouveau-check --silent --no-cc-version-check"
)
if result.exit_code != 0:
raise LisaException(
sharsonia marked this conversation as resolved.
Show resolved Hide resolved
"Failed to install the GRID driver! "
f"exit-code: {result.exit_code} stderr: {result.stderr}"
)

self._log.debug("Successfully installed the GRID driver")

# download and install CUDA Driver
def _install_cuda_driver(self, version: str) -> None:
self._log.debug("Starting CUDA driver installation")
cuda_repo = ""
os_version = self._node.os.os_version

if isinstance(self._node.os, Redhat):
release = os_version.release.split(".")[0]
cuda_repo_pkg = f"cuda-repo-rhel{release}-{version}.x86_64.rpm"
cuda_repo = (
"http://developer.download.nvidia.com/"
f"compute/cuda/repos/rhel{release}/x86_64/{cuda_repo_pkg}"
)
elif isinstance(self._node.os, Ubuntu):
release = re.sub("[^0-9]+", "", os_version.release)
cuda_repo_pkg = f"cuda-repo-ubuntu{release}_{version}_amd64.deb"
cuda_repo = (
"http://developer.download.nvidia.com/compute/"
f"cuda/repos/ubuntu{release}/x86_64/{cuda_repo_pkg}"
)
else:
raise LisaException(
f"Distro {self._node.os.__class__.__name__}"
"not supported to install CUDA driver."
)

# download and install the cuda driver package from the repo
self._node.os._install_package_from_url(f"{cuda_repo}", signed=False)

def _install_gpu_dep(self) -> None:
uname_tool = self._node.tools[Uname]
uname_ver = uname_tool.get_linux_information().uname_version

# install dependency libraries for distros
if isinstance(self._node.os, Redhat):
# install the kernel-devel and kernel-header packages
package_name = f"kernel-devel-{uname_ver} kernel-headers-{uname_ver}"
self._node.os.install_packages(package_name)
# mesa-libEGL install/update is require to avoid a conflict between
# libraries - bugzilla.redhat 1584740
package_name = "mesa-libGL mesa-libEGL libglvnd-devel"
self._node.os.install_packages(package_name)
# install dkms
package_name = "dkms"
self._node.os.install_packages(package_name, signed=False)
elif isinstance(self._node.os, Ubuntu):
package_name = (
f"build-essential libelf-dev linux-tools-{uname_ver}"
f" linux-cloud-tools-{uname_ver} python libglvnd-dev ubuntu-desktop"
)
self._node.os.install_packages(package_name)
sharsonia marked this conversation as resolved.
Show resolved Hide resolved
else:
raise LisaException(
f"Distro {self._node.os.__class__.__name__}"
" is not supported for GPU."
)

def check_support(self) -> None:
# TODO: more supportability can be defined here
if not self._is_supported():
raise SkippedException(f"GPU is not supported with distro {self._node.os}")

def install_compute_sdk(
self, driver: ComputeSDK = ComputeSDK.CUDA, version: str = ""
sharsonia marked this conversation as resolved.
Show resolved Hide resolved
) -> None:
# install GPU dependencies before installing driver
self._install_gpu_dep()
sharsonia marked this conversation as resolved.
Show resolved Hide resolved

# install the driver
if driver == ComputeSDK.GRID:
if version == "":
version = DEFAULT_GRID_DRIVER_URL
self._install_grid_driver(version)
elif driver == ComputeSDK.CUDA:
if version == "":
version = "10.1.105-1"
self._install_cuda_driver(version)
else:
raise LisaException(
f"{ComputeSDK} is invalid."
"No valid driver SDK name provided to install."
)
3 changes: 2 additions & 1 deletion lisa/features/startstop.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,15 @@
from typing import Any

from lisa.feature import Feature
from lisa.util.logger import get_logger

FEATURE_NAME_STARTSTOP = "StartStop"


class StartStop(Feature):
def __init__(self, node: Any, platform: Any) -> None:
super().__init__(node, platform)
self._log = self._node.log
self._log = get_logger("feature", self.name(), self._node.log)

@classmethod
def name(cls) -> str:
Expand Down
Loading