From cc549d97b772185d70bbeabc63cb88e102851ee0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Joonhyung=20Lee/=EC=9D=B4=EC=A4=80=ED=98=95?=
 <33523965+veritas9872@users.noreply.github.com>
Date: Mon, 10 Apr 2023 18:12:48 +0900
Subject: [PATCH] Tests and versions. (#116)

* Create conftest.py file to allow the number of iterations in the inference test to be configurable.
This allows the tests to be used as inference speed benchmarks as originally intended.
Also, the allow_tf32 flags are now logged as well.

* Add `tmux` to the `simple` environment default environment.

* Make tests configurable for GPU as well.

* Add explanation as to how to run configurable test for inference speed comparison.

* Fix typo.

* Update Docker Compose installed version.

* Update pytest minimum version to 7.3.0, which fixes the walrus operator bug.

* Remove unnecessary import from conftest.py.

* Reformat project.

* Update documentation to mention that runtime speeds will probably be similar for `conda` installs and source builds. I have confirmed that the `conda` installs use `cuDNN` and probably `magma` properly. The speeds were identical on the hardware I tested.

* Update all Docker BuildKit frontend versions to simply 1, which will use the latest BuildKit syntax until the next major release.
---
 Dockerfile                    |  2 +-
 Makefile                      |  2 +-
 README.md                     | 19 ++++++++++---------
 dockerfiles/hub.Dockerfile    |  2 +-
 dockerfiles/ngc.Dockerfile    |  2 +-
 dockerfiles/simple.Dockerfile |  2 +-
 pyproject.toml                |  4 ++--
 reqs/simple-environment.yaml  |  3 ++-
 tests/README.md               |  5 +++++
 tests/conftest.py             |  3 +++
 tests/test_run.py             | 20 +++++++++++++++++---
 11 files changed, 44 insertions(+), 20 deletions(-)
 create mode 100644 tests/conftest.py

diff --git a/Dockerfile b/Dockerfile
index 705c498..301b821 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,4 +1,4 @@
-# syntax = docker/dockerfile:1.4
+# syntax = docker/dockerfile:1
 # The top line is used by BuildKit. _**DO NOT ERASE IT**_.
 
 # Use `export BUILDKIT_PROGRESS=plain` in the host terminal to see full build logs.
diff --git a/Makefile b/Makefile
index 6026073..c15efe6 100644
--- a/Makefile
+++ b/Makefile
@@ -120,7 +120,7 @@ ls:  # List all services.
 
 # Utility for installing Docker Compose on Linux (but not WSL) systems.
 # Visit https://docs.docker.com/compose/install for the full documentation.
-COMPOSE_VERSION = v2.15.1
+COMPOSE_VERSION = v2.17.2
 COMPOSE_OS_ARCH = linux-x86_64
 COMPOSE_URL = https://github.com/docker/compose/releases/download/${COMPOSE_VERSION}/docker-compose-${COMPOSE_OS_ARCH}
 COMPOSE_PATH = ${HOME}/.docker/cli-plugins
diff --git a/README.md b/README.md
index 027d915..d7a76f4 100644
--- a/README.md
+++ b/README.md
@@ -55,7 +55,7 @@ If this is your first time using this project, follow these steps:
    for the latest installation information. Note that Docker Compose V2
    is available for WSL users with Docker Desktop by default.
 
-4. Run `make env SERVICE=(train|devel|ngc|hub|simple)` on the terminal 
+4. Run `make env SERVICE=(train|devel|ngc|hub|simple)` on the terminal
    at project root to create a basic `.env` file.
    The `.env` file provides environment variables for `docker-compose.yaml`,
    allowing different users and machines to set their own variables as required.
@@ -67,17 +67,17 @@ If this is your first time using this project, follow these steps:
    Add configurations that should not be shared via source control there.
    For example, volume-mount pairs specific to each host machine.
 
-
 ### Explanation of services
+
 Different Docker Compose services are organized to serve different needs.
 
 - `train`, the default service, should be used when compiled dependencies are
-  necessary or when PyTorch needs to be compiled from source due to 
+  necessary or when PyTorch needs to be compiled from source due to
   Compute Capability issues, etc.
-- `devel` is designed for PyTorch CUDA/C++ developers who need to recompile 
+- `devel` is designed for PyTorch CUDA/C++ developers who need to recompile
   frequently and have many complex dependencies.
 - `ngc` is derived from the official NVIDIA PyTorch HPC images with the option
-  to install additional packages. It is recommended for users who wish to base 
+  to install additional packages. It is recommended for users who wish to base
   their projects on the NGC images provided by NVIDIA. Note that the NGC images
   change greatly between different releases and that configurations for one
   release may not work for another one.
@@ -91,7 +91,8 @@ Different Docker Compose services are organized to serve different needs.
   `pip` packages can also be installed via `conda`. Also, the base image can
   be configured to use images other than the Official Linux Docker images
   by specifying the `BASE_IMAGE` argument directly in the `.env` file.
-  PyTorch runtime performance may be superior in official NVIDIA CUDA images.
+  PyTorch runtime performance may be superior in official NVIDIA CUDA images
+  under certain circumstances. Use the tests to benchmark runtime speeds.
   **The `simple` service is recommended for users without compiled dependencies.**
 
 The `Makefile` has been configured to take values specified in the `.env` file
@@ -250,7 +251,7 @@ Please read the Makefile to see the exact commands.
   To fix this issue, create a new directory on the host to mount the containers' `.vscode-server` directories.
   For example, one can set a volume pair as `${HOME}/.vscode-project1:/home/${USR}/.vscode-server` for project1.
   Do not forget to create `${HOME}/.vscode-project1` on the host first. Otherwise, the directory will be owned by `root`,
-  which will cause VSCode to stall indefinately.
+  which will cause VSCode to stall indefinitely.
 - If any networking issues arise, check `docker network ls` and check for conflicts.
   Most networking and SSH problems can be solved by running `docker network prune`.
 
@@ -261,7 +262,7 @@ The main components of the project are as follows. The other files are utilities
 1. Dockerfile
 2. docker-compose.yaml
 3. docker-compose.override.yaml
-4. reqs/\*requirements.txt
+4. reqs/(`*requirements.txt`|`*environment.yaml`)
 5. .env
 
 When the user inputs `make up` or another `make` command,
@@ -497,7 +498,7 @@ For other VSCode problems, try deleting `~/.vscode-server` on the host.
    [not fail-safe](https://stackoverflow.com/a/8573310/9289275).
 
 6. `torch.cuda.is_available()` will return a `... UserWarning:
-   CUDA initialization:...` error or the image will simply not start if
+CUDA initialization:...` error or the image will simply not start if
    the CUDA driver on the host is incompatible with the CUDA version on
    the Docker image. Either upgrade the host CUDA driver or downgrade
    the CUDA version of the image. Check the
diff --git a/dockerfiles/hub.Dockerfile b/dockerfiles/hub.Dockerfile
index a473ee1..d0b7b29 100644
--- a/dockerfiles/hub.Dockerfile
+++ b/dockerfiles/hub.Dockerfile
@@ -1,4 +1,4 @@
-# syntax = docker/dockerfile:1.4
+# syntax = docker/dockerfile:1
 # The top line is used by BuildKit. _**DO NOT ERASE IT**_.
 ARG PYTORCH_VERSION
 ARG CUDA_SHORT_VERSION
diff --git a/dockerfiles/ngc.Dockerfile b/dockerfiles/ngc.Dockerfile
index 8a52de8..ead4abb 100644
--- a/dockerfiles/ngc.Dockerfile
+++ b/dockerfiles/ngc.Dockerfile
@@ -1,4 +1,4 @@
-# syntax = docker/dockerfile:1.4
+# syntax = docker/dockerfile:1
 # The top line is used by BuildKit. _**DO NOT ERASE IT**_.
 
 ARG INTERACTIVE_MODE
diff --git a/dockerfiles/simple.Dockerfile b/dockerfiles/simple.Dockerfile
index 6da531b..4f82ad6 100644
--- a/dockerfiles/simple.Dockerfile
+++ b/dockerfiles/simple.Dockerfile
@@ -1,4 +1,4 @@
-# syntax = docker/dockerfile:1.4
+# syntax = docker/dockerfile:1
 # The top line is used by BuildKit. _**DO NOT ERASE IT**_.
 
 # This Dockerfile exists to provide a method of installing all packages from
diff --git a/pyproject.toml b/pyproject.toml
index 796cbf6..592fc8d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -13,7 +13,7 @@ target-version = ['py38', 'py39', 'py310']
 include = '\.pyi?$'
 
 [tool.pytest.ini_options]
-minversion = "7.0"  # Update to 7.2.3 as soon as it becomes available.
+minversion = "7.3.0"
 addopts = """\
     --capture=tee-sys \
     --doctest-modules \
@@ -85,7 +85,7 @@ max-doc-length = 80
 [tool.ruff.per-file-ignores]
 # Ignore `E402` (import violations) in all `__init__.py` files.
 "__init__.py" = ["E402"]
-"*test*.py" = ["D"] # ignore all docstring lints in tests
+"*test*.py" = ["D"] # Ignore all docstring lints in tests.
 
 [tool.ruff.mccabe]
 # Unlike Flake8, default to a complexity level of 10.
diff --git a/reqs/simple-environment.yaml b/reqs/simple-environment.yaml
index 060c53c..19689bf 100644
--- a/reqs/simple-environment.yaml
+++ b/reqs/simple-environment.yaml
@@ -10,6 +10,7 @@ dependencies: # Use conda packages if possible.
   - pytorch::pytorch-cuda==11.8
   - jemalloc
   - intel::mkl
-  - intel::numpy  # Use Numpy built with the Intel compiler for best performance with MKL.
+  - intel::numpy # Use Numpy built with the Intel compiler for best performance with MKL.
   - pytest
+  - tmux==3.2a
   - tqdm
diff --git a/tests/README.md b/tests/README.md
index 327e2f4..2edb533 100644
--- a/tests/README.md
+++ b/tests/README.md
@@ -5,3 +5,8 @@ PyTest is the recommended testing platform.
 
 Simple unit tests should preferably be written as doctests,
 with more advanced tests being placed in this directory.
+
+To use the `test_run.py` file as an inference speed benchmark, which was its
+original purpose, use the following command to run 1024 iterations on GPU 0:
+
+`python -m pytest tests/test_run.py::test_inference_run --gpu 0 --num_steps 1024`
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..32cab5c
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,3 @@
+def pytest_addoption(parser):
+    parser.addoption("--num_steps", type=int, action="store", default=64)
+    parser.addoption("--gpu", type=int, action="store", default=0)
diff --git a/tests/test_run.py b/tests/test_run.py
index 5848f94..c601af6 100644
--- a/tests/test_run.py
+++ b/tests/test_run.py
@@ -44,10 +44,16 @@ def enable_cudnn_benchmarking():
     torch.backends.cudnn.benchmark = True
 
 
+@pytest.fixture(scope="session", autouse=True)
+def allow_tf32():
+    torch.backends.cuda.matmul.allow_tf32 = True
+    torch.backends.cudnn.allow_tf32 = True
+
+
 @pytest.fixture(scope="session")
-def device(gpu: int = 0) -> torch.device:
+def device(pytestconfig) -> torch.device:
     if torch.cuda.is_available():
-        device = torch.device(f"cuda:{int(gpu)}")
+        device = torch.device(f"cuda:{pytestconfig.getoption('gpu')}")
     else:
         device = torch.device("cpu")
         msg = "No GPUs found for this container. Please check run configurations."
@@ -77,13 +83,18 @@ class Config(NamedTuple):
 ]
 
 
+@pytest.fixture(scope="session")
+def num_steps(pytestconfig):
+    return pytestconfig.getoption("num_steps")
+
+
 @pytest.mark.parametrize(["name", "network_func", "input_shapes"], _configs)
 def test_inference_run(
     name: str,
     network_func: Callable[[], nn.Module],
     input_shapes: Sequence[Sequence[int]],
     device: torch.device,
-    num_steps: int = 64,
+    num_steps,
     enable_amp: bool = False,
     enable_scripting: bool = False,
 ):
@@ -153,6 +164,9 @@ def get_cuda_info(device):  # Using as a fixture to get device info.
     logger.info(f"PyTorch Architecture List: {al}")
     logger.info(f"GPU Device Name: {dp.name}")
     logger.info(f"GPU Compute Capability: {dp.major}.{dp.minor}")
+    # No way to check if the GPU has TF32 hardware, only whether it is allowed.
+    logger.info(f"MatMul TF32 Allowed: {torch.backends.cuda.matmul.allow_tf32}")
+    logger.info(f"cuDNN TF32 Allowed: {torch.backends.cudnn.allow_tf32}")
 
     # Python3.7+ required for `subprocess` to work as intended.
     if int(platform.python_version_tuple()[1]) > 6: