diff --git a/config_linux.go b/config_linux.go index 9bc58a3..3d29d93 100644 --- a/config_linux.go +++ b/config_linux.go @@ -90,8 +90,8 @@ type Resources struct { // Cgroup's SCHED_IDLE value. CPUIdle *int64 `json:"cpu_idle,omitempty"` - // Process limit; set <= `0' to disable limit. - PidsLimit int64 `json:"pids_limit,omitempty"` + // Process limit; set < `0' to disable limit. `nil` means "keep current limit". + PidsLimit *int64 `json:"pids_limit,omitempty"` // Specifies per cgroup weight, range is from 10 to 1000. BlkioWeight uint16 `json:"blkio_weight,omitempty"` diff --git a/devices/systemd_test.go b/devices/systemd_test.go index 8319fbd..2cbb5ca 100644 --- a/devices/systemd_test.go +++ b/devices/systemd_test.go @@ -13,6 +13,8 @@ import ( "github.com/opencontainers/cgroups/systemd" ) +func toPtr[T any](v T) *T { return &v } + // TestPodSkipDevicesUpdate checks that updating a pod having SkipDevices: true // does not result in spurious "permission denied" errors in a container // running under the pod. The test is somewhat similar in nature to the @@ -32,7 +34,7 @@ func TestPodSkipDevicesUpdate(t *testing.T) { Parent: "system.slice", Name: podName, Resources: &cgroups.Resources{ - PidsLimit: 42, + PidsLimit: toPtr[int64](42), Memory: 32 * 1024 * 1024, SkipDevices: true, }, @@ -96,7 +98,7 @@ func TestPodSkipDevicesUpdate(t *testing.T) { // Now update the pod a few times. for range 42 { - podConfig.Resources.PidsLimit++ + (*podConfig.Resources.PidsLimit)++ podConfig.Resources.Memory += 1024 * 1024 if err := pm.Set(podConfig.Resources); err != nil { t.Fatal(err) diff --git a/fs/pids.go b/fs/pids.go index 9319761..36bd339 100644 --- a/fs/pids.go +++ b/fs/pids.go @@ -19,19 +19,24 @@ func (s *PidsGroup) Apply(path string, _ *cgroups.Resources, pid int) error { } func (s *PidsGroup) Set(path string, r *cgroups.Resources) error { - if r.PidsLimit != 0 { - // "max" is the fallback value. - limit := "max" - - if r.PidsLimit > 0 { - limit = strconv.FormatInt(r.PidsLimit, 10) - } - - if err := cgroups.WriteFile(path, "pids.max", limit); err != nil { - return err - } + if r.PidsLimit == nil { + return nil } + // "max" is the fallback value. + val := "max" + if limit := *r.PidsLimit; limit > 0 { + val = strconv.FormatInt(limit, 10) + } else if limit == 0 { + // systemd doesn't support setting pids.max to "0", so when setting + // TasksMax we need to remap it to "1". We do the same thing here to + // avoid flip-flop behaviour between the fs and systemd drivers. In + // practice, the pids cgroup behaviour is basically identical. + val = "1" + } + if err := cgroups.WriteFile(path, "pids.max", val); err != nil { + return err + } return nil } diff --git a/fs/pids_test.go b/fs/pids_test.go index a33db7a..d1fa5a1 100644 --- a/fs/pids_test.go +++ b/fs/pids_test.go @@ -13,6 +13,8 @@ const ( maxLimited = 1024 ) +func toPtr[T any](v T) *T { return &v } + func TestPidsSetMax(t *testing.T) { path := tempDir(t, "pids") @@ -21,7 +23,7 @@ func TestPidsSetMax(t *testing.T) { }) r := &cgroups.Resources{ - PidsLimit: maxLimited, + PidsLimit: toPtr[int64](maxLimited), } pids := &PidsGroup{} if err := pids.Set(path, r); err != nil { @@ -37,6 +39,55 @@ func TestPidsSetMax(t *testing.T) { } } +func TestPidsSetZero(t *testing.T) { + path := tempDir(t, "pids") + + writeFileContents(t, path, map[string]string{ + "pids.max": "max", + }) + + r := &cgroups.Resources{ + PidsLimit: toPtr[int64](0), + } + pids := &PidsGroup{} + if err := pids.Set(path, r); err != nil { + t.Fatal(err) + } + + value, err := fscommon.GetCgroupParamUint(path, "pids.max") + if err != nil { + t.Fatal(err) + } + // See comment in (*PidsGroup).Set for why we set to 1 here. + if value != 1 { + t.Fatalf("Expected 1, got %d for setting pids.max = 0", value) + } +} + +func TestPidsUnset(t *testing.T) { + path := tempDir(t, "pids") + + writeFileContents(t, path, map[string]string{ + "pids.max": "12345", + }) + + r := &cgroups.Resources{ + PidsLimit: nil, + } + pids := &PidsGroup{} + if err := pids.Set(path, r); err != nil { + t.Fatal(err) + } + + value, err := fscommon.GetCgroupParamUint(path, "pids.max") + if err != nil { + t.Fatal(err) + } + if value != 12345 { + t.Fatalf("Expected 12345, got %d for not setting pids.max", value) + } +} + func TestPidsSetUnlimited(t *testing.T) { path := tempDir(t, "pids") @@ -45,7 +96,7 @@ func TestPidsSetUnlimited(t *testing.T) { }) r := &cgroups.Resources{ - PidsLimit: maxUnlimited, + PidsLimit: toPtr[int64](maxUnlimited), } pids := &PidsGroup{} if err := pids.Set(path, r); err != nil { diff --git a/fs2/pids.go b/fs2/pids.go index 9b82b90..f932259 100644 --- a/fs2/pids.go +++ b/fs2/pids.go @@ -4,6 +4,7 @@ import ( "errors" "math" "os" + "strconv" "strings" "golang.org/x/sys/unix" @@ -13,19 +14,26 @@ import ( ) func isPidsSet(r *cgroups.Resources) bool { - return r.PidsLimit != 0 + return r.PidsLimit != nil } func setPids(dirPath string, r *cgroups.Resources) error { if !isPidsSet(r) { return nil } - if val := numToStr(r.PidsLimit); val != "" { - if err := cgroups.WriteFile(dirPath, "pids.max", val); err != nil { - return err - } + val := "max" + if limit := *r.PidsLimit; limit > 0 { + val = strconv.FormatInt(limit, 10) + } else if limit == 0 { + // systemd doesn't support setting pids.max to "0", so when setting + // TasksMax we need to remap it to "1". We do the same thing here to + // avoid flip-flop behaviour between the fs and systemd drivers. In + // practice, the pids cgroup behaviour is basically identical. + val = "1" + } + if err := cgroups.WriteFile(dirPath, "pids.max", val); err != nil { + return err } - return nil } diff --git a/systemd/systemd_test.go b/systemd/systemd_test.go index f770ab1..60a6c1f 100644 --- a/systemd/systemd_test.go +++ b/systemd/systemd_test.go @@ -236,3 +236,40 @@ func TestAddCPUQuota(t *testing.T) { }) } } + +func TestTasksMax(t *testing.T) { + if !IsRunningSystemd() { + t.Skip("Test requires systemd.") + } + if os.Geteuid() != 0 { + t.Skip("Test requires root.") + } + + podConfig := &cgroups.Cgroup{ + Parent: "system.slice", + Name: "system-runc_test_tasksmax.slice", + Resources: &cgroups.Resources{}, + } + // Create "pods" cgroup (a systemd slice to hold containers). + pm := newManager(t, podConfig) + if err := pm.Apply(-1); err != nil { + t.Fatal(err) + } + + res := &cgroups.Resources{PidsLimit: nil} + if err := pm.Set(res); err != nil { + t.Fatalf("failed to set PidsLimit=nil: %v", err) + } + + for _, limit := range []int64{100, 0, 42, -1, 100, -99} { + res.PidsLimit = &limit + if err := pm.Set(res); err != nil { + t.Fatalf("failed to set PidsLimit=%d: %v", limit, err) + } + } + + res.PidsLimit = nil + if err := pm.Set(res); err != nil { + t.Fatalf("failed to set PidsLimit=nil: %v", err) + } +} diff --git a/systemd/v1.go b/systemd/v1.go index 5500a53..96e69bb 100644 --- a/systemd/v1.go +++ b/systemd/v1.go @@ -2,6 +2,7 @@ package systemd import ( "errors" + "math" "os" "path/filepath" "strings" @@ -97,9 +98,17 @@ func genV1ResourcesProperties(r *cgroups.Resources, cm *dbusConnManager) ([]syst newProp("BlockIOWeight", uint64(r.BlkioWeight))) } - if r.PidsLimit > 0 || r.PidsLimit == -1 { + if r.PidsLimit != nil { + var tasksMax uint64 + if limit := *r.PidsLimit; limit < 0 { + tasksMax = math.MaxUint64 // "infinity" + } else if limit == 0 { + tasksMax = 1 // systemd does not accept "0" for TasksMax + } else { + tasksMax = uint64(limit) + } properties = append(properties, - newProp("TasksMax", uint64(r.PidsLimit))) + newProp("TasksMax", tasksMax)) } err = addCpuset(cm, &properties, r.CpusetCpus, r.CpusetMems) diff --git a/systemd/v2.go b/systemd/v2.go index c2f2e87..f76c93e 100644 --- a/systemd/v2.go +++ b/systemd/v2.go @@ -176,6 +176,9 @@ func unifiedResToSystemdProps(cm *dbusConnManager, res map[string]string) (props return nil, fmt.Errorf("unified resource %q value conversion error: %w", k, err) } } + if num == 0 { + num = 1 // systemd does not accept "0" for TasksMax + } props = append(props, newProp("TasksMax", num)) @@ -256,9 +259,17 @@ func genV2ResourcesProperties(dirPath string, r *cgroups.Resources, cm *dbusConn addCPUQuota(cm, &properties, &r.CpuQuota, r.CpuPeriod) - if r.PidsLimit > 0 || r.PidsLimit == -1 { + if r.PidsLimit != nil { + var tasksMax uint64 + if limit := *r.PidsLimit; limit < 0 { + tasksMax = math.MaxUint64 // "infinity" + } else if limit == 0 { + tasksMax = 1 // systemd does not accept "0" for TasksMax + } else { + tasksMax = uint64(limit) + } properties = append(properties, - newProp("TasksMax", uint64(r.PidsLimit))) + newProp("TasksMax", tasksMax)) } err = addCpuset(cm, &properties, r.CpusetCpus, r.CpusetMems)