Skip to content

Commit

Permalink
fix: Fix cmd exection mode detection
Browse files Browse the repository at this point in the history
* We drop all privs by the time we come to execution mode detection. So, we need to check if process has caps for capability mode. Use sudo as last option.

* Seems like exeucting with slurm user is not guaranteed to give jobs of all users. So we always execute the command using root user

Signed-off-by: Mahendra Paipuri <[email protected]>
  • Loading branch information
mahendrapaipuri committed Oct 3, 2024
1 parent aaad86b commit d1c4d0a
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 39 deletions.
55 changes: 18 additions & 37 deletions pkg/api/resource/slurm/cli.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,46 +67,28 @@ func preflightsCLI(slurm *slurmScheduler) error {
// sacct path
sacctPath := filepath.Join(slurm.cluster.CLI.Path, "sacct")

// If current user is slurm or root pass checks
if currentUser, err := user.Current(); err == nil && (currentUser.Username == "slurm" || currentUser.Uid == "0") {
// If current user root pass checks
if currentUser, err := user.Current(); err == nil && currentUser.Uid == "0" {
slurm.cmdExecMode = capabilityMode
level.Info(slurm.logger).
Log("msg", "Current user have enough privileges to execute SLURM commands", "user", currentUser.Username)

return nil
}

// First try to run as slurm user in a subprocess. If current process have capabilities
// it will be a success
slurmUser, err := user.Lookup("slurm")
if err != nil {
level.Debug(slurm.logger).
Log("msg", "User slurm not found. Next attempt to execute SLURM commands with sudo", "err", err)

goto sudomode
}

slurmUserUID, err = strconv.Atoi(slurmUser.Uid)
if err != nil {
level.Debug(slurm.logger).
Log("msg", "Failed to convert SLURM user uid to int. Next attempt to execute SLURM commands with sudo", "uid", slurmUserUID, "err", err)

goto sudomode
}

slurmUserGID, err = strconv.Atoi(slurmUser.Gid)
if err != nil {
level.Debug(slurm.logger).
Log("msg", "Failed to convert SLURM user gid to int. Next attempt to execute SLURM commands with sudo", "gid", slurmUserGID, "err", err)

goto sudomode
goto secu_context
}

if _, err := internal_osexec.ExecuteAs(sacctPath, []string{"--help"}, slurmUserUID, slurmUserGID, nil); err == nil {
slurm.cmdExecMode = "cap"
// Check if current process has necessary caps
if currentCaps := cap.GetProc().String(); strings.Contains(currentCaps, "cap_setuid") && strings.Contains(currentCaps, "cap_setgid") {
slurm.cmdExecMode = capabilityMode
level.Info(slurm.logger).Log("msg", "Linux capabilities will be used to execute SLURM commands as slurm user")
}

secu_context:
// If using capability mode, setup security context
if slurm.cmdExecMode == capabilityMode {
var caps []cap.Value

var err error

for _, name := range []string{"cap_setuid", "cap_setgid"} {
value, err := cap.FromName(name)
if err != nil {
Expand Down Expand Up @@ -135,10 +117,9 @@ func preflightsCLI(slurm *slurmScheduler) error {
return nil
}

sudomode:
// Last attempt to run sacct with sudo
if _, err := internal_osexec.ExecuteWithTimeout("sudo", []string{sacctPath, "--help"}, 5, nil); err == nil {
slurm.cmdExecMode = "sudo"
slurm.cmdExecMode = sudoMode
level.Info(slurm.logger).Log("msg", "sudo will be used to execute SLURM commands")

return nil
Expand Down Expand Up @@ -550,8 +531,8 @@ func (s *slurmScheduler) runSacctCmd(ctx context.Context, startTime string, endT
Cmd: cmd,
Environ: env,
Logger: s.logger,
UID: slurmUserUID,
GID: slurmUserGID,
UID: 0,
GID: 0,
}

return executeInSecurityContext(securityCtx, dataPtr)
Expand Down Expand Up @@ -599,8 +580,8 @@ func (s *slurmScheduler) runSacctMgrCmd(ctx context.Context) ([]byte, error) {
Cmd: cmd,
Environ: env,
Logger: s.logger,
UID: slurmUserUID,
GID: slurmUserGID,
UID: 0,
GID: 0,
}

return executeInSecurityContext(securityCtx, dataPtr)
Expand Down
6 changes: 4 additions & 2 deletions pkg/api/resource/slurm/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,6 @@ type slurmScheduler struct {
const slurmBatchScheduler = "slurm"

var (
slurmUserUID int
slurmUserGID int
slurmTimeFormat = base.DatetimeLayout + "-0700"
jobLock = sync.RWMutex{}
assocLock = sync.RWMutex{}
Expand Down Expand Up @@ -150,6 +148,8 @@ func (s *slurmScheduler) fetchFromSacct(ctx context.Context, start time.Time, en
// Execute sacct command between start and end times
sacctOutput, err := s.runSacctCmd(ctx, startTime, endTime)
if err != nil {
level.Error(s.logger).Log("msg", "Failed to run sacct command", "cluster_id", s.cluster.ID, "err", err)

return []models.Unit{}, err
}

Expand All @@ -172,6 +172,8 @@ func (s *slurmScheduler) fetchFromSacctMgr(
// Execute sacctmgr command
sacctMgrOutput, err := s.runSacctMgrCmd(ctx)
if err != nil {
level.Error(s.logger).Log("msg", "Failed to run sacctmgr command", "cluster_id", s.cluster.ID, "err", err)

return nil, nil, err
}

Expand Down

0 comments on commit d1c4d0a

Please sign in to comment.