Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion .github/workflows/workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@ env:

jobs:
unit-tests:

runs-on: ubuntu-latest
timeout-minutes: 20
strategy:
matrix:
python-version: ["3.10", "3.11", "3.12", "3.13"]
Expand Down Expand Up @@ -46,6 +47,7 @@ jobs:

slurm:
runs-on: ubuntu-latest
timeout-minutes: 20
steps:
- uses: actions/checkout@v4
- name: Build the Slurm Docker image
Expand All @@ -59,6 +61,7 @@ jobs:

flux:
runs-on: ubuntu-latest
timeout-minutes: 20
steps:
- uses: actions/checkout@v4
- name: Pull the Flux Docker image
Expand All @@ -72,6 +75,7 @@ jobs:

pbs:
runs-on: ubuntu-latest
timeout-minutes: 20
steps:
- uses: actions/checkout@v4
- name: Pull the PBS Docker image
Expand All @@ -85,6 +89,8 @@ jobs:

lint:
runs-on: ubuntu-latest
timeout-minutes: 5

strategy:
matrix:
python-version: ["3.10", "3.11", "3.12", "3.13"]
Expand Down Expand Up @@ -130,6 +136,8 @@ jobs:

security:
runs-on: ubuntu-latest
timeout-minutes: 5

strategy:
matrix:
python-version: ["3.10", "3.11", "3.12", "3.13"]
Expand Down
5 changes: 4 additions & 1 deletion src/_canary/testcase.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
from .paramset import ParameterSet
from .status import Status
from .util import filesystem as fs
from .util import kill_tree
from .util import logging
from .util._json import safeload
from .util._json import safesave
Expand Down Expand Up @@ -1756,7 +1757,7 @@ def cancel(sig, frame):
self.tee_run_output(proc)
get_process_metrics(proc, metrics=metrics)
if timeout > 0 and time.monotonic() - start_marker > timeout:
os.kill(proc.pid, signal.SIGINT)
proc.send_signal(signal.SIGINT)
raise TimeoutError
time.sleep(sleep_interval)
except MissingSourceError as e:
Expand Down Expand Up @@ -1795,6 +1796,8 @@ def cancel(sig, frame):
if metrics is not None:
self.add_measurement(**metrics)
logger.debug(f"{self}: finished with status {self.status}")
logger.debug(f"{self}: cleaning up resources (pid: {proc.pid})")
kill_tree(proc)
self.log_to_stdout(
f"Finished running {self.display_name} "
f"in {self.duration} s. with exit code {self.returncode}"
Expand Down
22 changes: 22 additions & 0 deletions src/_canary/util/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@

import psutil

from . import logging

logger = logging.get_logger(__name__)


def cpu_count(logical: bool | None = None) -> int:
from .. import config # lazy import to avoid circular deps
Expand All @@ -15,3 +19,21 @@ def cpu_count(logical: bool | None = None) -> int:
if count is None:
raise RuntimeError("Unable to determine the number of CPUs")
return count


def kill_tree(proc: psutil.Process | None) -> None:
"""kill a process tree rooted by `proc`"""
if proc is None:
return

logger.debug(f"Killing process tree (root={proc.pid})")
for child in proc.children(recursive=True):
try:
child.kill()
logger.debug(f"--> killed child process ({child.pid}, root={proc.pid})")
except psutil.NoSuchProcess:
pass
try:
proc.kill()
except psutil.NoSuchProcess as e:
logger.debug(f"--> root process already finished ({e.pid})")
Loading