From 1b752fb77e7cbfd367a75da23f16acc33f62c9f5 Mon Sep 17 00:00:00 2001 From: Jim Garlick Date: Wed, 19 Feb 2025 16:01:22 -0800 Subject: [PATCH 1/6] update housekeeping/perilog scripts for transition Problem: the scripts flux-run-{epilog,prolog,housekeeping} and their templated systemd unit files are no longer needed. The job manager will now start the site-provided scripts directly with the IMP in a transient systemd unit in the Flux systemd instance. Remove the systemd unit files. Instead of removing the run scripts and forcing sites to update the IMP config to run the site scripts immediately, retain the run scripts for now and modify them to exec the site scripts (making them a no-op). We can remove the run scripts later. --- configure.ac | 3 --- etc/Makefile.am | 5 +---- etc/flux-epilog@.service.in | 9 --------- etc/flux-housekeeping@.service.in | 18 ------------------ etc/flux-prolog@.service.in | 9 --------- src/cmd/flux-run-epilog.in | 22 ++++------------------ src/cmd/flux-run-housekeeping.in | 22 ++++------------------ src/cmd/flux-run-prolog.in | 22 ++++------------------ 8 files changed, 13 insertions(+), 97 deletions(-) delete mode 100644 etc/flux-epilog@.service.in delete mode 100644 etc/flux-housekeeping@.service.in delete mode 100644 etc/flux-prolog@.service.in diff --git a/configure.ac b/configure.ac index deac11f89894..b94ca82d45f7 100644 --- a/configure.ac +++ b/configure.ac @@ -651,10 +651,7 @@ AC_CONFIG_FILES( \ etc/flux-hostlist.pc \ etc/flux-taskmap.pc \ etc/flux.service \ - etc/flux-housekeeping@.service \ src/cmd/flux-run-housekeeping \ - etc/flux-prolog@.service \ - etc/flux-epilog@.service \ src/cmd/flux-run-prolog \ src/cmd/flux-run-epilog \ doc/Makefile \ diff --git a/etc/Makefile.am b/etc/Makefile.am index 546a94814228..6addab1b0375 100644 --- a/etc/Makefile.am +++ b/etc/Makefile.am @@ -1,9 +1,6 @@ #if HAVE_SYSTEMD systemdsystemunit_DATA = \ - flux.service \ - flux-housekeeping@.service \ - flux-prolog@.service \ - flux-epilog@.service + flux.service #endif tmpfilesdir = $(prefix)/lib/tmpfiles.d diff --git a/etc/flux-epilog@.service.in b/etc/flux-epilog@.service.in deleted file mode 100644 index 15c229dd84d8..000000000000 --- a/etc/flux-epilog@.service.in +++ /dev/null @@ -1,9 +0,0 @@ -[Unit] -Description=Epilog for Flux job %I -CollectMode=inactive-or-failed - -[Service] -Type=oneshot -EnvironmentFile=-@X_RUNSTATEDIR@/flux-epilog@%I.env -ExecStart=@X_SYSCONFDIR@/flux/system/epilog -ExecStopPost=-rm -f @X_RUNSTATEDIR@/flux-epilog@%I.env diff --git a/etc/flux-housekeeping@.service.in b/etc/flux-housekeeping@.service.in deleted file mode 100644 index cd6012351faf..000000000000 --- a/etc/flux-housekeeping@.service.in +++ /dev/null @@ -1,18 +0,0 @@ -[Unit] -Description=Housekeeping for Flux job %I -CollectMode=inactive-or-failed - -[Service] -Type=oneshot -EnvironmentFile=-@X_RUNSTATEDIR@/flux-housekeeping@%I.env -ExecStart=@X_SYSCONFDIR@/flux/system/housekeeping -ExecStopPost=-rm -f @X_RUNSTATEDIR@/flux-housekeeping@%I.env -ExecStopPost=-sh -c '\ - if test "$SERVICE_RESULT" != "success"; then \ - message="housekeeping@%I ${SERVICE_RESULT:-failure}"; \ - if test "${EXIT_CODE}${EXIT_STATUS}"; then \ - message="$message: $EXIT_CODE $EXIT_STATUS"; \ - fi; \ - flux resource drain $(flux getattr rank) $message; \ - fi \ -' diff --git a/etc/flux-prolog@.service.in b/etc/flux-prolog@.service.in deleted file mode 100644 index 14d6a1a92ca3..000000000000 --- a/etc/flux-prolog@.service.in +++ /dev/null @@ -1,9 +0,0 @@ -[Unit] -Description=Prolog for Flux job %I -CollectMode=inactive-or-failed - -[Service] -Type=oneshot -EnvironmentFile=-@X_RUNSTATEDIR@/flux-prolog@%I.env -ExecStart=@X_SYSCONFDIR@/flux/system/prolog -ExecStopPost=-rm -f @X_RUNSTATEDIR@/flux-prolog@%I.env diff --git a/src/cmd/flux-run-epilog.in b/src/cmd/flux-run-epilog.in index f996b7f0a576..222fa8b804c3 100755 --- a/src/cmd/flux-run-epilog.in +++ b/src/cmd/flux-run-epilog.in @@ -1,21 +1,7 @@ #!/bin/sh -if test $FLUX_JOB_ID; then - FLUX_JOB_ID=$(flux job id --to=f58plain $FLUX_JOB_ID) -fi -unitname=flux-epilog@${FLUX_JOB_ID:-unknown} +# This script exists for transition purposes and will be removed. +# Please reconfigure the IMP [run.epilog] table to use +# path = "@X_SYSCONFDIR@/flux/system/epilog". -terminate() { - systemctl stop $unitname - exit 1 -} - -trap terminate INT TERM - -umask 022 -printenv >@X_RUNSTATEDIR@/${unitname}.env - -# Run systemctl start in background and `wait` for it so that the trap -# will run immediately when signal is received: -systemctl start $unitname --quiet & -wait $! +exec @X_SYSCONFDIR@/flux/system/epilog diff --git a/src/cmd/flux-run-housekeeping.in b/src/cmd/flux-run-housekeeping.in index 6d4bea11cb0a..71f67b97671f 100755 --- a/src/cmd/flux-run-housekeeping.in +++ b/src/cmd/flux-run-housekeeping.in @@ -1,21 +1,7 @@ #!/bin/sh -if test $FLUX_JOB_ID; then - FLUX_JOB_ID=$(flux job id --to=f58plain $FLUX_JOB_ID) -fi -unitname=flux-housekeeping@${FLUX_JOB_ID:-unknown} +# This script exists for transition purposes and will be removed. +# Please reconfigure the IMP [run.housekeeping] table to use +# path = "@X_SYSCONFDIR@/flux/system/housekeeping". -terminate() { - systemctl stop $unitname - exit 1 -} - -trap terminate INT TERM - -umask 022 -printenv >@X_RUNSTATEDIR@/${unitname}.env - -# Run systemctl start in background and `wait` for it so that the trap -# will run immediately when signal is received: -systemctl start $unitname --quiet & -wait $! +exec @X_SYSCONFDIR@/flux/system/housekeeping diff --git a/src/cmd/flux-run-prolog.in b/src/cmd/flux-run-prolog.in index 944001659958..131039c1784e 100755 --- a/src/cmd/flux-run-prolog.in +++ b/src/cmd/flux-run-prolog.in @@ -1,21 +1,7 @@ #!/bin/sh -if test $FLUX_JOB_ID; then - FLUX_JOB_ID=$(flux job id --to=f58plain $FLUX_JOB_ID) -fi -unitname=flux-prolog@${FLUX_JOB_ID:-unknown} +# This script exists for transition purposes and will be removed. +# Please reconfigure the IMP [run.prolog] table to use +# path = "@X_SYSCONFDIR@/flux/system/prolog". -terminate() { - systemctl stop $unitname - exit 1 -} - -trap terminate INT TERM - -umask 022 -printenv >@X_RUNSTATEDIR@/${unitname}.env - -# Run systemctl start in background and `wait` for it so that the trap -# will run immediately when signal is received: -systemctl start $unitname --quiet & -wait $! +exec @X_SYSCONFDIR@/flux/system/prolog From 4c90ee8fc94fbea469cfb5946e4caf1d84e81888 Mon Sep 17 00:00:00 2001 From: Jim Garlick Date: Tue, 18 Feb 2025 13:40:09 -0800 Subject: [PATCH 2/6] job-manager: fix comment typo Problem: a housekeeping comment references 'flux imp exec' when flux 'flux imp run' was intended. Fix the comment. --- src/modules/job-manager/housekeeping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/modules/job-manager/housekeeping.c b/src/modules/job-manager/housekeeping.c index 6ea9573f356a..82b376fc66af 100644 --- a/src/modules/job-manager/housekeeping.c +++ b/src/modules/job-manager/housekeeping.c @@ -800,7 +800,7 @@ static int housekeeping_parse_config (const flux_conf_t *conf, return errprintf (error, "error creating housekeeping command"); } - // if no command line was defined, assume "imp exec housekeeping" + // if no command line was defined, assume "imp run housekeeping" else { if (!imp_path) { return errprintf (error, From 37f13b250b3dbd53d8831c2e888716478a221034 Mon Sep 17 00:00:00 2001 From: Jim Garlick Date: Fri, 21 Feb 2025 06:02:40 -0800 Subject: [PATCH 3/6] housekeeping: refactor config parsing Problem: when housekeeping (re-)parses its configuration, it recreates the subprocess command template within the config parsing function, which will become unwieldy given planned changes. Without altering functionality, move the command template creation to a separate function. Also, drop the unused hk->imp_path member. --- src/modules/job-manager/housekeeping.c | 58 +++++++++++++------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/src/modules/job-manager/housekeeping.c b/src/modules/job-manager/housekeeping.c index 82b376fc66af..af3f2d099d4d 100644 --- a/src/modules/job-manager/housekeeping.c +++ b/src/modules/job-manager/housekeeping.c @@ -125,7 +125,6 @@ struct housekeeping { struct job_manager *ctx; flux_cmd_t *cmd; // NULL if not configured double release_after; - char *imp_path; zlistx_t *allocations; flux_msg_handler_t **handlers; }; @@ -742,6 +741,25 @@ static flux_cmd_t *create_cmd (json_t *cmdline) return cmd; } +/* Create the housekeeping command template based on parsed configuration. + * If no 'cmdline' was configured, assume "imp run housekeeping". + */ +static flux_cmd_t *create_cmd_template (json_t *cmdline, + const char *imp_path) +{ + flux_cmd_t *cmd; + json_t *o = NULL; + + if (!cmdline) { + if (!(o = json_pack ("[sss]", imp_path, "run", "housekeeping"))) + return NULL; + cmdline = o; + } + cmd = create_cmd (cmdline); + json_decref (o); + return cmd; +} + static int housekeeping_parse_config (const flux_conf_t *conf, flux_error_t *error, void *arg) @@ -755,7 +773,6 @@ static int housekeeping_parse_config (const flux_conf_t *conf, double release_after = default_release_after; flux_cmd_t *cmd = NULL; const char *imp_path = NULL; - char *imp_path_cpy = NULL; int use_systemd_unit = 0; if (flux_conf_unpack (conf, @@ -785,9 +802,15 @@ static int housekeeping_parse_config (const flux_conf_t *conf, " - ignoring"); } - // let job-exec handle exec errors + // let job-exec handle exec parse errors (void)flux_conf_unpack (conf, NULL, "{s?{s?s}}", "exec", "imp", &imp_path); + if (!cmdline && !imp_path) { + return errprintf (error, + "job-manager.housekeeping implies IMP" + " but exec.imp is undefined"); + } + if (release_after_fsd) { if (fsd_parse_duration (release_after_fsd, &release_after) < 0) return errprintf (error, @@ -795,40 +818,18 @@ static int housekeeping_parse_config (const flux_conf_t *conf, " FSD parse error"); } - if (cmdline) { - if (!(cmd = create_cmd (cmdline))) - return errprintf (error, "error creating housekeeping command"); - } + if (!(cmd = create_cmd_template (cmdline, imp_path))) + return errprintf (error, "could not create command template"); - // if no command line was defined, assume "imp run housekeeping" - else { - if (!imp_path) { - return errprintf (error, - "job-manager.housekeeping implies IMP" - " but exec.imp is undefined"); - } - json_t *o; - if ((o = json_pack ("[sss]", imp_path, "run", "housekeeping"))) - cmd = create_cmd (o); - json_decref (o); - if (!cmd) - return errprintf (error, "error creating housekeeping command"); - if (!(imp_path_cpy = strdup (imp_path))) { - flux_cmd_destroy (cmd); - return errprintf (error, "error duplicating IMP path"); - } - } done: flux_cmd_destroy (hk->cmd); hk->cmd = cmd; - free (hk->imp_path); - hk->imp_path = imp_path_cpy; hk->release_after = release_after; flux_log (hk->ctx->h, LOG_DEBUG, "housekeeping is %sconfigured%s", hk->cmd ? "" : "not ", - hk->imp_path ? " with IMP" : ""); + (imp_path && !cmdline) ? " with IMP" : ""); return 1; // allow dynamic changes } @@ -850,7 +851,6 @@ void housekeeping_ctx_destroy (struct housekeeping *hk) flux_cmd_destroy (hk->cmd); zlistx_destroy (&hk->allocations); flux_msg_handler_delvec (hk->handlers); - free (hk->imp_path); free (hk); errno = saved_errno; } From 620558f4831bcb2e49ab5c1ce627821ab68298b1 Mon Sep 17 00:00:00 2001 From: Jim Garlick Date: Fri, 21 Feb 2025 06:15:12 -0800 Subject: [PATCH 4/6] housekeeping: let the exec service be configurable Problem: we'd like to start housekeeping in the flux systemd instance but it currently hard codes the "rexec" subprocess exec service. If exec.service is set in configuration, use that for housekeeping too. If using the IMP with sdexec, set command options for proxy signaling. Change the bulk exec name from "housekeeping" to "flux-housekeeping". This becomes the unit name under sdexec, which must begin with "flux-" to get the required IMP signal forwarding behavior --- src/modules/job-manager/housekeeping.c | 67 +++++++++++++++++++++++--- 1 file changed, 59 insertions(+), 8 deletions(-) diff --git a/src/modules/job-manager/housekeeping.c b/src/modules/job-manager/housekeeping.c index af3f2d099d4d..17923a98f1a8 100644 --- a/src/modules/job-manager/housekeeping.c +++ b/src/modules/job-manager/housekeeping.c @@ -90,6 +90,7 @@ #include "src/common/libhostlist/hostlist.h" #include "src/common/libutil/fsd.h" #include "src/common/libutil/errprintf.h" +#include "src/common/libutil/errno_safe.h" #include "src/common/libjob/idf58.h" #include "src/common/libsubprocess/bulk-exec.h" #include "src/common/libsubprocess/command.h" @@ -107,6 +108,8 @@ extern char **environ; // -1 = never, 0 = immediate, >0 = time in seconds static const double default_release_after = -1; +static const char *default_exec_service = "rexec"; + struct allocation { flux_jobid_t id; struct rlist *rl; // R, diminished each time a subset is released @@ -119,12 +122,15 @@ struct allocation { double t_start; struct bulk_exec *bulk_exec; void *list_handle; + int kill_signal; }; struct housekeeping { struct job_manager *ctx; flux_cmd_t *cmd; // NULL if not configured + char *exec_service; double release_after; + int kill_signal; zlistx_t *allocations; flux_msg_handler_t **handlers; }; @@ -180,6 +186,8 @@ static struct allocation *allocation_create (struct housekeeping *hk, return NULL; a->hk = hk; a->id = id; + a->kill_signal = hk->kill_signal; + a->t_start = flux_reactor_now (flux_get_reactor (hk->ctx->h)); if (!(a->rl = rlist_from_json (R, NULL)) || !(a->pending = rlist_ranks (a->rl)) @@ -190,9 +198,9 @@ static struct allocation *allocation_create (struct housekeeping *hk, allocation_timeout, a)) || !(a->bulk_exec = bulk_exec_create (&bulk_ops, - "rexec", + hk->exec_service, id, - "housekeeping", + "flux-housekeeping", a)) || update_cmd_env (hk->cmd, id, userid) < 0 || bulk_exec_push_cmd (a->bulk_exec, a->pending, hk->cmd, 0) < 0) { @@ -697,7 +705,10 @@ static void housekeeping_kill_cb (flux_t *h, while (a) { if (a->id == jobid || jobid == FLUX_JOBID_ANY) { if (a->bulk_exec) { - f = bulk_exec_kill (a->bulk_exec, ids, signum); + int sig = signum; + if (sig == SIGKILL) + sig = a->kill_signal; + f = bulk_exec_kill (a->bulk_exec, ids, sig); if (flux_future_then (f, -1, kill_continuation, hk) < 0) flux_future_destroy (f); } @@ -743,21 +754,39 @@ static flux_cmd_t *create_cmd (json_t *cmdline) /* Create the housekeeping command template based on parsed configuration. * If no 'cmdline' was configured, assume "imp run housekeeping". + * In addition, if the IMP will run as the main process in a transient + * systemd unit, set options for correct signal forwarding semantics. + * 'kill_signal' is assigned (only) if a SIGKILL proxy signal is needed. */ static flux_cmd_t *create_cmd_template (json_t *cmdline, - const char *imp_path) + const char *exec_service, + const char *imp_path, + int *kill_signal) { - flux_cmd_t *cmd; + flux_cmd_t *cmd = NULL; json_t *o = NULL; + bool use_imp = false; if (!cmdline) { if (!(o = json_pack ("[sss]", imp_path, "run", "housekeeping"))) return NULL; cmdline = o; + use_imp = true; + } + if (!(cmd = create_cmd (cmdline))) + goto error; + if (use_imp && streq (exec_service, "sdexec")) { + if (flux_cmd_setopt (cmd, "SDEXEC_PROP_KillMode", "process") < 0 + || flux_cmd_setopt (cmd, "SDEXEC_PROP_SendSIGKILL", "off") < 0) + goto error; + *kill_signal = SIGUSR1; } - cmd = create_cmd (cmdline); json_decref (o); return cmd; +error: + flux_cmd_destroy (cmd); + ERRNO_SAFE_WRAP (json_decref, o); + return NULL; } static int housekeeping_parse_config (const flux_conf_t *conf, @@ -774,6 +803,9 @@ static int housekeeping_parse_config (const flux_conf_t *conf, flux_cmd_t *cmd = NULL; const char *imp_path = NULL; int use_systemd_unit = 0; + const char *exec_service = default_exec_service; + char *exec_service_cpy = NULL; + int kill_signal = SIGKILL; if (flux_conf_unpack (conf, &e, @@ -803,7 +835,12 @@ static int housekeeping_parse_config (const flux_conf_t *conf, } // let job-exec handle exec parse errors - (void)flux_conf_unpack (conf, NULL, "{s?{s?s}}", "exec", "imp", &imp_path); + (void)flux_conf_unpack (conf, + NULL, + "{s?{s?s s?s}}", + "exec", + "imp", &imp_path, + "service", &exec_service); if (!cmdline && !imp_path) { return errprintf (error, @@ -818,13 +855,23 @@ static int housekeeping_parse_config (const flux_conf_t *conf, " FSD parse error"); } - if (!(cmd = create_cmd_template (cmdline, imp_path))) + if (!(exec_service_cpy = strdup (exec_service))) + return errprintf (error, "error duplicating exec service"); + if (!(cmd = create_cmd_template (cmdline, + exec_service, + imp_path, + &kill_signal))) { + ERRNO_SAFE_WRAP (free, exec_service_cpy); return errprintf (error, "could not create command template"); + } done: flux_cmd_destroy (hk->cmd); hk->cmd = cmd; + free (hk->exec_service); + hk->exec_service = exec_service_cpy; hk->release_after = release_after; + hk->kill_signal = kill_signal; flux_log (hk->ctx->h, LOG_DEBUG, "housekeeping is %sconfigured%s", @@ -851,6 +898,7 @@ void housekeeping_ctx_destroy (struct housekeeping *hk) flux_cmd_destroy (hk->cmd); zlistx_destroy (&hk->allocations); flux_msg_handler_delvec (hk->handlers); + free (hk->exec_service); free (hk); errno = saved_errno; } @@ -865,10 +913,13 @@ struct housekeeping *housekeeping_ctx_create (struct job_manager *ctx) return NULL; hk->ctx = ctx; hk->release_after = default_release_after; + if (!(hk->exec_service = strdup (default_exec_service))) + goto error; if (!(hk->allocations = zlistx_new ())) { errno = ENOMEM; goto error; } + hk->kill_signal = SIGKILL; zlistx_set_destructor (hk->allocations, allocation_destructor); if (conf_register_callback (ctx->conf, &error, From fb9606da18cd0b9d88b064ecb7d587295deb7995 Mon Sep 17 00:00:00 2001 From: Jim Garlick Date: Sun, 23 Feb 2025 07:34:21 -0800 Subject: [PATCH 5/6] perilog: drop unused struct member Problem: perilog_procdesc.uses_imp is unused Drop unused struct member. --- src/modules/job-manager/plugins/perilog.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/modules/job-manager/plugins/perilog.c b/src/modules/job-manager/plugins/perilog.c index 291b725d807a..e21777602835 100644 --- a/src/modules/job-manager/plugins/perilog.c +++ b/src/modules/job-manager/plugins/perilog.c @@ -75,7 +75,6 @@ extern char **environ; */ struct perilog_procdesc { flux_cmd_t *cmd; - bool uses_imp; bool prolog; bool per_rank; bool cancel_on_exception; @@ -184,7 +183,6 @@ static struct perilog_procdesc *perilog_procdesc_create (json_t *o, double kill_timeout = -1.; flux_cmd_t *cmd = NULL; json_t *command = NULL; - bool uses_imp = false; json_error_t error; const char *name = prolog ? "prolog" : "epilog"; @@ -230,7 +228,6 @@ static struct perilog_procdesc *perilog_procdesc_create (json_t *o, errprintf (errp, "error creating %s command", name); return NULL; } - uses_imp = true; } if (!(pd = calloc (1, sizeof (*pd)))) { errprintf (errp, "Out of memory"); @@ -257,7 +254,6 @@ static struct perilog_procdesc *perilog_procdesc_create (json_t *o, pd->kill_timeout = kill_timeout > 0. ? kill_timeout : default_kill_timeout; pd->per_rank = per_rank; pd->prolog = prolog; - pd->uses_imp = uses_imp; /* If cancel_on_exception unset, default to prolog=true, epilog=false * Otherwise, use set value: From 51fa4c938d37c74910ba8260da393f77c01c7b8d Mon Sep 17 00:00:00 2001 From: Jim Garlick Date: Sun, 23 Feb 2025 07:41:39 -0800 Subject: [PATCH 6/6] perilog: let the exec service be configurable Problem: we'd like to start prolog/epilog in the flux systemd instance but it currently hard codes the "rexec" subprocess exec service. If exec.service is set in configuration, use that for perilog too. If using the IMP with sdexec, set command options for proxy signaling. Change the bulk exec name from prolog/epilog to flux-prolog/flux-epilog". This becomes the unit name under sdexec, which must begin with "flux-" to get the required IMP signal forwarding behavior Although perilog never sends SIGKILL after it was disabled in #6330, set a kill_signal to SIGUSR1 or SIGKILL in case we want to revert that change in the future. --- src/modules/job-manager/plugins/perilog.c | 42 +++++++++++++++++++++-- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/src/modules/job-manager/plugins/perilog.c b/src/modules/job-manager/plugins/perilog.c index e21777602835..07b0bc0f1317 100644 --- a/src/modules/job-manager/plugins/perilog.c +++ b/src/modules/job-manager/plugins/perilog.c @@ -80,6 +80,7 @@ struct perilog_procdesc { bool cancel_on_exception; double timeout; double kill_timeout; + int kill_signal; }; /* Global prolog/epilog configuration @@ -88,6 +89,7 @@ static struct perilog_conf { bool initialized; char *imp_path; + char *exec_service; struct perilog_procdesc *prolog; struct perilog_procdesc *epilog; @@ -125,6 +127,8 @@ struct perilog_proc { */ static double default_kill_timeout = 60.; +static const char *default_exec_service = "rexec"; + static struct perilog_proc *procdesc_run (flux_t *h, flux_plugin_t *p, struct perilog_procdesc *pd, @@ -181,6 +185,7 @@ static struct perilog_procdesc *perilog_procdesc_create (json_t *o, int cancel_on_exception = -1; const char *timeout; double kill_timeout = -1.; + bool use_imp = false; flux_cmd_t *cmd = NULL; json_t *command = NULL; json_error_t error; @@ -228,6 +233,7 @@ static struct perilog_procdesc *perilog_procdesc_create (json_t *o, errprintf (errp, "error creating %s command", name); return NULL; } + use_imp = true; } if (!(pd = calloc (1, sizeof (*pd)))) { errprintf (errp, "Out of memory"); @@ -250,6 +256,17 @@ static struct perilog_procdesc *perilog_procdesc_create (json_t *o, goto error; } + /* If the IMP will run as the main process in a transient + * systemd unit, set options for correct signal forwarding semantics. + */ + if (use_imp && streq (perilog_config.exec_service, "sdexec")) { + if (flux_cmd_setopt (cmd, "SDEXEC_PROP_KillMode", "process") < 0 + || flux_cmd_setopt (cmd, "SDEXEC_PROP_SendSIGKILL", "off") < 0) { + errprintf (errp, "error setting sdexec options"); + goto error; + } + pd->kill_signal = SIGUSR1; + } pd->cmd = cmd; pd->kill_timeout = kill_timeout > 0. ? kill_timeout : default_kill_timeout; pd->per_rank = per_rank; @@ -815,10 +832,16 @@ static struct perilog_proc *procdesc_run (flux_t *h, goto error; } + /* If using sdexec, bulk-exec sets the unit name (and hence the cgroup + * name) to "NAME-RANK-JOBID.service", where NAME is the bulk_name below. + * The IMP checks the cgroup for a "flux-" prefix to change its signal + * forwarding and straggler handling behavior, so use that prefix here. + */ + char *bulk_name = proc->prolog ? "flux-prolog" : "flux-epilog"; if (!(bulk_exec = bulk_exec_create (&ops, - "rexec", + perilog_config.exec_service, id, - perilog_proc_name (proc), + bulk_name, NULL)) || bulk_exec_push_cmd (bulk_exec, ranks, pd->cmd, 0) < 0) { flux_log_error (h, @@ -1224,6 +1247,7 @@ static void free_config (struct perilog_conf *conf) perilog_procdesc_destroy (conf->epilog); zhashx_destroy (&conf->processes); zlistx_destroy (&conf->log_ignore); + free (conf->exec_service); } /* Initialize a perilog_config object @@ -1241,6 +1265,10 @@ static int conf_init (flux_plugin_t *p, struct perilog_conf *conf) } zhashx_set_destructor (conf->processes, perilog_proc_destructor); + if (!(conf->exec_service = strdup (default_exec_service))) { + flux_log_error (h, "perilog: failed to duplicate exec service name"); + goto error; + } /* Watch for broker transition to CLEANUP. */ @@ -1440,6 +1468,7 @@ static int conf_update_cb (flux_plugin_t *p, flux_error_t error; json_error_t jerror; const char *imp_path = NULL; + const char *exec_service = default_exec_service; json_t *prolog_config = NULL; json_t *epilog_config = NULL; struct perilog_procdesc *prolog = NULL; @@ -1468,9 +1497,10 @@ static int conf_update_cb (flux_plugin_t *p, if (json_unpack_ex (conf, &jerror, 0, - "{s?{s?s} s?{s?o s?o s?{s?o}}}", + "{s?{s?s s?s} s?{s?o s?o s?{s?o}}}", "exec", "imp", &imp_path, + "service", &exec_service, "job-manager", "prolog", &prolog_config, "epilog", &epilog_config, @@ -1482,6 +1512,12 @@ static int conf_update_cb (flux_plugin_t *p, goto error; } + free (perilog_config.exec_service); + if (!(perilog_config.exec_service = strdup (exec_service))) { + errprintf (&error, "failed to duplicate exec service name"); + goto error; + } + /* Capture IMP path before first call to perilog_procdesc_create() */ free (perilog_config.imp_path);