Skip to content

Commit

Permalink
Adding GPU feature and some dependent tools
Browse files Browse the repository at this point in the history
Added separate install_packages_from_url()
Moved wget class under base_tools/
Some cleanups and fixes.
  • Loading branch information
sharsonia committed May 25, 2021
1 parent 96703c2 commit ea78d5a
Show file tree
Hide file tree
Showing 9 changed files with 537 additions and 38 deletions.
34 changes: 30 additions & 4 deletions lisa/tools/wget.py → lisa/base_tools/wget.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,33 @@
import pathlib
import re
from typing import cast
from typing import TYPE_CHECKING

from lisa.executable import Tool
from lisa.operating_system import Posix
from lisa.util import LisaException

if TYPE_CHECKING:
from lisa.operating_system import Posix


class Wget(Tool):
__pattern_path = re.compile(
r"([\w\W]*?)(-|File) (‘|')(?P<path>.+?)(’|') (saved|already there)"
)

# regex to validate url
# source -
# https://github.com/django/django/blob/stable/1.3.x/django/core/validators.py#L45
__url_pattern = re.compile(
r"^(?:http|ftp)s?://" # http:// or https://
r"(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)"
r"+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|" # ...domain
r"localhost|" # localhost...
r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})" # ...or ip
r"(?::\d+)?" # optional port
r"(?:/?|[/?]\S+)$",
re.IGNORECASE,
)

@property
def command(self) -> str:
return "wget"
Expand All @@ -21,13 +37,20 @@ def can_install(self) -> bool:
return True

def install(self) -> bool:
posix_os: Posix = cast(Posix, self.node.os)
posix_os: Posix = self.node.os # type: ignore
posix_os.install_packages([self])
return self._check_exists()

def get(
self, url: str, file_path: str = "", filename: str = "", overwrite: bool = True
self,
url: str,
file_path: str = "",
filename: str = "",
overwrite: bool = True,
executable: bool = False,
) -> str:
if re.match(self.__url_pattern, url) is None:
raise LisaException(f"Invalid URL '{url}'")
# create folder when it doesn't exist
self.node.execute(f"mkdir -p {file_path}", shell=True)
# combine download file path
Expand All @@ -52,4 +75,7 @@ def get(
actual_file_path = self.node.execute(f"ls {download_file_path}", shell=True)
if actual_file_path.exit_code != 0:
raise LisaException(f"File {actual_file_path} doesn't exist.")
if executable:
self.node.execute(f"chmod +x {actual_file_path}")

return actual_file_path.stdout
3 changes: 2 additions & 1 deletion lisa/features/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

from .gpu import Gpu
from .serial_console import SerialConsole
from .startstop import StartStop

__all__ = ["SerialConsole", "StartStop"]
__all__ = ["Gpu", "SerialConsole", "StartStop"]
148 changes: 148 additions & 0 deletions lisa/features/gpu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

import re
from enum import Enum
from typing import Any

from lisa.base_tools.wget import Wget
from lisa.feature import Feature
from lisa.operating_system import Redhat, Ubuntu
from lisa.tools import Uname
from lisa.util import LisaException, SkippedException
from lisa.util.logger import get_logger

FEATURE_NAME_GPU = "Gpu"

ComputeSDK = Enum(
"ComputeSDK",
[
# GRID Driver
"GRID",
# CUDA Driver
"CUDA",
],
)

# Link to the latest GRID driver
# The DIR link is
# https://download.microsoft.com/download/9/5/c/95c667ff-ab95-4c56-89e0-e13e9a76782d/NVIDIA-Linux-x86_64-460.32.03-grid-azure.run
DEFAULT_GRID_DRIVER_URL = "https://go.microsoft.com/fwlink/?linkid=874272"


class Gpu(Feature):
def __init__(self, node: Any, platform: Any) -> None:
super().__init__(node, platform)
self._log = get_logger("gpu", self.name(), self._node.log)

@classmethod
def name(cls) -> str:
return FEATURE_NAME_GPU

def _is_supported(self) -> bool:
raise NotImplementedError()

# download and install NVIDIA grid driver
def _install_grid_driver(self, driver_url: str) -> None:
self._log.debug("Starting GRID driver installation")
# download and install the NVIDIA GRID driver
wget_tool = self._node.tools[Wget]
grid_file_path = wget_tool.get(
driver_url,
str(self._node.working_path),
"NVIDIA-Linux-x86_64-grid.run",
executable=True,
)
result = self._node.execute(
f"{grid_file_path} --no-nouveau-check --silent --no-cc-version-check"
)
if result.exit_code != 0:
raise LisaException(
"Failed to install the GRID driver! "
f"exit-code: {result.exit_code} stderr: {result.stderr}"
)

self._log.debug("Successfully installed the GRID driver")

# download and install CUDA Driver
def _install_cuda_driver(self, version: str) -> None:
self._log.debug("Starting CUDA driver installation")
cuda_repo = ""
os_version = self._node.os.os_version

if isinstance(self._node.os, Redhat):
release = os_version.release.split(".")[0]
cuda_repo_pkg = f"cuda-repo-rhel{release}-{version}.x86_64.rpm"
cuda_repo = (
"http://developer.download.nvidia.com/"
f"compute/cuda/repos/rhel{release}/x86_64/{cuda_repo_pkg}"
)
elif isinstance(self._node.os, Ubuntu):
release = re.sub("[^0-9]+", "", os_version.release)
cuda_repo_pkg = f"cuda-repo-ubuntu{release}_{version}_amd64.deb"
cuda_repo = (
"http://developer.download.nvidia.com/compute/"
f"cuda/repos/ubuntu{release}/x86_64/{cuda_repo_pkg}"
)
else:
raise LisaException(
f"Distro {self._node.os.__class__.__name__}"
"not supported to install CUDA driver."
)

# download and install the cuda driver package from the repo
self._node.os._install_package_from_url(f"{cuda_repo}", signed=False)

def _install_gpu_dep(self) -> None:
uname_tool = self._node.tools[Uname]
uname_ver = uname_tool.get_linux_information().uname_version

# install dependency libraries for distros
if isinstance(self._node.os, Redhat):
# install the kernel-devel and kernel-header packages
package_name = f"kernel-devel-{uname_ver} kernel-headers-{uname_ver}"
self._node.os.install_packages(package_name)
# mesa-libEGL install/update is require to avoid a conflict between
# libraries - bugzilla.redhat 1584740
package_name = "mesa-libGL mesa-libEGL libglvnd-devel"
self._node.os.install_packages(package_name)
# install dkms
package_name = "dkms"
self._node.os.install_packages(package_name, signed=False)
elif isinstance(self._node.os, Ubuntu):
package_name = (
f"build-essential libelf-dev linux-tools-{uname_ver}"
f" linux-cloud-tools-{uname_ver} python libglvnd-dev ubuntu-desktop"
)
self._node.os.install_packages(package_name)
else:
raise LisaException(
f"Distro {self._node.os.__class__.__name__}"
" is not supported for GPU."
)

def check_support(self) -> None:
# TODO: more supportability can be defined here
if not self._is_supported():
raise SkippedException(f"GPU is not supported with distro {self._node.os}")

def install_compute_sdk(
self, driver: ComputeSDK = ComputeSDK.CUDA, version: str = ""
) -> None:
# install GPU dependencies before installing driver
self._install_gpu_dep()

# install the driver
if driver == ComputeSDK.GRID:
if version == "":
version = DEFAULT_GRID_DRIVER_URL
self._install_grid_driver(version)
elif driver == ComputeSDK.CUDA:
if version == "":
version = "10.1.105-1"
self._install_cuda_driver(version)
else:
raise LisaException(
f"{ComputeSDK} is invalid."
"No valid driver SDK name provided to install."
)
Loading

0 comments on commit ea78d5a

Please sign in to comment.