feat: add testnet cleanup on interrupted test run

- Introduced `_cleanup_testnet` function to handle testnet cleanup. - Modified the trap to call `_interrupted` function on SIGINT. - Added `pytest_keyboard_interrupt` hook to create a status file. - Updated `setup_venv.sh` to create venv only if it doesn't exist. - Enhanced `testenv_setup_teardown` to handle interrupted test runs.
IntersectMBO · Oct 1, 2024 · 0b10dee · 0b10dee
1 parent 2cd21ea
commit 0b10dee
Show file tree

Hide file tree

Showing 3 changed files with 57 additions and 6 deletions.
diff --git a/.github/regression.sh b/.github/regression.sh
@@ -1,5 +1,5 @@
 #! /usr/bin/env -S nix develop --accept-flake-config .#base -c bash
-# shellcheck shell=bash
+# shellcheck shell=bash disable=SC2317
 
 set -xeuo pipefail
 
@@ -139,8 +139,39 @@ _cleanup() {
   stop_postgres || true
 }
 
+_cleanup_testnet() {
+  [ -z "${BOOTSTRAP_DIR:-""}" ] && return
+
+  _PYTEST_CURRENT="$(find "$WORKDIR" -type l -name pytest-current)"
+  [ -z "$_PYTEST_CURRENT" ] && return
+  _PYTEST_CURRENT="$(readlink -m "$_PYTEST_CURRENT")"
+  export _PYTEST_CURRENT
+
+  echo "::endgroup::" # end group for the interrupted group
+  echo "::group::Testnet cleanup"
+
+  # shellcheck disable=SC2016
+  nix develop --accept-flake-config .#venv --command bash -c '
+    . .github/setup_venv.sh
+    export PATH="${PWD}/.bin":"$WORKDIR/cardano-cli/cardano-cli-build/bin":"$PATH"
+    export CARDANO_NODE_SOCKET_PATH="$CARDANO_NODE_SOCKET_PATH_CI"
+    cleanup_dir="${_PYTEST_CURRENT}/../cleanup-${_PYTEST_CURRENT##*/}-script"
+    mkdir "$cleanup_dir"
+    cd "$cleanup_dir"
+    testnet-cleanup -a "$_PYTEST_CURRENT"
+  '
+
+  echo "::endgroup::"
+}
+
 # cleanup on Ctrl+C
-trap 'set +e; _cleanup; exit 130' SIGINT
+_interrupted() {
+  # Do testnet cleanup only on interrupted testrun. When not interrupted,
+  # cleanup is done as part of a testrun.
+  _cleanup_testnet
+  _cleanup
+}
+trap 'set +e; _interrupted; exit 130' SIGINT
 
 echo "::group::Nix env setup"
 printf "start: %(%H:%M:%S)T\n" -1
@@ -156,7 +187,7 @@ nix develop --accept-flake-config .#venv --command bash -c '
   echo "::endgroup::"  # end group for "Nix env setup"
 
   echo "::group::Python venv setup"
-  . .github/setup_venv.sh
+  . .github/setup_venv.sh clean
   echo "::endgroup::"  # end group for "Python venv setup"
 
   echo "::group::Pytest run"

diff --git a/.github/setup_venv.sh b/.github/setup_venv.sh
@@ -6,7 +6,10 @@ if [ "${1:-""}" = "clean" ]; then
   rm -rf "$VENV_DIR"
 fi
 
-python3 -m venv "$VENV_DIR"
+if [ ! -e "$VENV_DIR" ]; then
+  python3 -m venv "$VENV_DIR"
+fi
+
 # shellcheck disable=SC1090,SC1091
 . "$VENV_DIR/bin/activate"
 

diff --git a/cardano_node_tests/tests/conftest.py b/cardano_node_tests/tests/conftest.py
@@ -30,6 +30,7 @@
 from cardano_node_tests.utils.versions import VERSIONS
 
 LOGGER = logging.getLogger(__name__)
+INTERRUPTED_NAME = ".session_interrupted"
 
 # make sure there's enough time to stop all cluster instances at the end of session
 workermanage.NodeManager.EXIT_TIMEOUT = 30
@@ -183,6 +184,13 @@ def _skip_disabled(item: tp.Any) -> None:
         _skip_disabled(item)
 
 
+@pytest.hookimpl(tryfirst=True)
+def pytest_keyboard_interrupt() -> None:
+    """Create a status file indicating that the test run was interrupted."""
+    session_basetemp = temptools.get_basetemp()
+    (session_basetemp / INTERRUPTED_NAME).touch()
+
+
 @pytest.fixture(scope="session")
 def init_pytest_temp_dirs(tmp_path_factory: TempPathFactory) -> None:
     """Init `PytestTempDirs`."""
@@ -262,26 +270,35 @@ def testenv_setup_teardown(
 ) -> tp.Generator[None, None, None]:
     """Setup and teardown test environment."""
     pytest_root_tmp = temptools.get_pytest_root_tmp()
+    session_basetemp = temptools.get_basetemp()
     running_session_glob = ".running_session"
 
     with locking.FileLockIfXdist(f"{pytest_root_tmp}/{cluster_management.CLUSTER_LOCK}"):
         # Save environment info for Allure
         if not list(pytest_root_tmp.glob(f"{running_session_glob}_*")):
             _save_env_for_allure(request.config)
 
+        # Remove dangling files from previous interrupted test run
+        (session_basetemp / INTERRUPTED_NAME).unlink(missing_ok=True)
+
+        # Create file indicating that testing session on this worker is running
         (pytest_root_tmp / f"{running_session_glob}_{worker_id}").touch()
 
     yield
 
     with locking.FileLockIfXdist(f"{pytest_root_tmp}/{cluster_management.CLUSTER_LOCK}"):
+        # Remove file indicating that testing session on this worker is running
+        (pytest_root_tmp / f"{running_session_glob}_{worker_id}").unlink()
+
         # Save CLI coverage to dir specified by `--cli-coverage-dir`
         cluster_manager_obj = cluster_management.ClusterManager(
             worker_id=worker_id, pytest_config=request.config
         )
         cluster_manager_obj.save_worker_cli_coverage()
 
-        # Remove file indicating that testing session on this worker is running
-        (pytest_root_tmp / f"{running_session_glob}_{worker_id}").unlink()
+        # Don't do any cleanup on keyboard interrupt
+        if (session_basetemp / INTERRUPTED_NAME).exists():
+            return None
 
         # Perform cleanup if this is the last running pytest worker
         if not list(pytest_root_tmp.glob(f"{running_session_glob}_*")):