From 6a97fd06f5b8b34ed1d7d8be1eb9ee244352440a Mon Sep 17 00:00:00 2001 From: JamesWrigley Date: Thu, 9 Feb 2023 17:28:22 +0100 Subject: [PATCH 1/2] Save slurm logs in a dedicated directory with a readable filename And set a readable job name. --- damnit/backend/extract_data.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/damnit/backend/extract_data.py b/damnit/backend/extract_data.py index dab3eef6..14273544 100644 --- a/damnit/backend/extract_data.py +++ b/damnit/backend/extract_data.py @@ -191,11 +191,19 @@ def extract_and_ingest(self, proposal, run, cluster=False, ctx = self.ctx_whole.filter(run_data=run_data, name_matches=match, cluster=cluster) ctx_slurm = self.ctx_whole.filter(run_data=run_data, name_matches=match, cluster=True) if set(ctx_slurm.vars) > set(ctx.vars): + slurm_logs_dir = Path.cwd() / "slurm_logs" + slurm_logs_dir.mkdir(exist_ok=True) + slurm_logs_dir.chmod(0o777) + python_cmd = [sys.executable, '-m', 'damnit.backend.extract_data', '--cluster-job', str(proposal), str(run), run_data.value] res = subprocess.run([ 'sbatch', '--parsable', *self.slurm_options(), + '-o', str(slurm_logs_dir / f"r{run}-p{proposal}-%j.out"), + # Note: we put the run number first so that it's visible in + # squeue's default 11-character column for the JobName. + '--job-name', f"r{run}-p{proposal}-damnit", '--wrap', shlex.join(python_cmd) ], stdout=subprocess.PIPE, text=True) job_id = res.stdout.partition(';')[0] From c082f3e50718d9fba2e2643c2fce9223859c78f1 Mon Sep 17 00:00:00 2001 From: JamesWrigley Date: Sat, 11 Feb 2023 22:06:07 +0100 Subject: [PATCH 2/2] Make the slurm timeout configurable And increase the default timeout to two hours. --- damnit/backend/extract_data.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/damnit/backend/extract_data.py b/damnit/backend/extract_data.py index 14273544..f74ea305 100644 --- a/damnit/backend/extract_data.py +++ b/damnit/backend/extract_data.py @@ -152,10 +152,15 @@ def proposal(self): return self._proposal def slurm_options(self): + opts = ["--time", self.db.metameta.get("slurm_time", "02:00:00")] + if reservation := self.db.metameta.get('slurm_reservation', ''): - return ['--reservation', reservation] - partition = self.db.metameta.get('slurm_partition', '') or default_slurm_partition() - return ['--partition', partition] + opts.extend(['--reservation', reservation]) + else: + partition = self.db.metameta.get('slurm_partition', '') or default_slurm_partition() + opts.extend(['--partition', partition]) + + return opts def extract_and_ingest(self, proposal, run, cluster=False, run_data=RunData.ALL, match=()):