From ae52e0c183cabb5fb4b1200e9543d76d4c0e209d Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Wed, 22 Oct 2025 23:49:37 +1100 Subject: [PATCH 1/2] config: switch PidsLimit to *int64 Previously we would treat a value of "0" as meaning "no-op", which lead to quite a bit of confusion. The runtime-spec has been updated to make the "pids.limit" value a pointer, and explicitly state that: * Values >= 0 should be treated as normal values; and * < 0 (usually -1) indicates "max". In practice this means we should switch PidsLimit to an *int64. Luckily, this is actually backwards-compatible with our previous JSON -- an old value of "0" would be omitted from the output, which will now be parsed as "nil". The handling by our cgroup code would be identical but the latter now correctly reflects the guidance by the runtime-spec. Signed-off-by: Aleksa Sarai --- config_linux.go | 4 +-- devices/systemd_test.go | 6 +++-- fs/pids.go | 27 +++++++++++--------- fs/pids_test.go | 55 +++++++++++++++++++++++++++++++++++++++-- fs2/pids.go | 20 ++++++++++----- systemd/v1.go | 13 ++++++++-- systemd/v2.go | 15 +++++++++-- 7 files changed, 113 insertions(+), 27 deletions(-) diff --git a/config_linux.go b/config_linux.go index 9bc58a3..3d29d93 100644 --- a/config_linux.go +++ b/config_linux.go @@ -90,8 +90,8 @@ type Resources struct { // Cgroup's SCHED_IDLE value. CPUIdle *int64 `json:"cpu_idle,omitempty"` - // Process limit; set <= `0' to disable limit. - PidsLimit int64 `json:"pids_limit,omitempty"` + // Process limit; set < `0' to disable limit. `nil` means "keep current limit". + PidsLimit *int64 `json:"pids_limit,omitempty"` // Specifies per cgroup weight, range is from 10 to 1000. BlkioWeight uint16 `json:"blkio_weight,omitempty"` diff --git a/devices/systemd_test.go b/devices/systemd_test.go index 8319fbd..2cbb5ca 100644 --- a/devices/systemd_test.go +++ b/devices/systemd_test.go @@ -13,6 +13,8 @@ import ( "github.com/opencontainers/cgroups/systemd" ) +func toPtr[T any](v T) *T { return &v } + // TestPodSkipDevicesUpdate checks that updating a pod having SkipDevices: true // does not result in spurious "permission denied" errors in a container // running under the pod. The test is somewhat similar in nature to the @@ -32,7 +34,7 @@ func TestPodSkipDevicesUpdate(t *testing.T) { Parent: "system.slice", Name: podName, Resources: &cgroups.Resources{ - PidsLimit: 42, + PidsLimit: toPtr[int64](42), Memory: 32 * 1024 * 1024, SkipDevices: true, }, @@ -96,7 +98,7 @@ func TestPodSkipDevicesUpdate(t *testing.T) { // Now update the pod a few times. for range 42 { - podConfig.Resources.PidsLimit++ + (*podConfig.Resources.PidsLimit)++ podConfig.Resources.Memory += 1024 * 1024 if err := pm.Set(podConfig.Resources); err != nil { t.Fatal(err) diff --git a/fs/pids.go b/fs/pids.go index 9319761..36bd339 100644 --- a/fs/pids.go +++ b/fs/pids.go @@ -19,19 +19,24 @@ func (s *PidsGroup) Apply(path string, _ *cgroups.Resources, pid int) error { } func (s *PidsGroup) Set(path string, r *cgroups.Resources) error { - if r.PidsLimit != 0 { - // "max" is the fallback value. - limit := "max" - - if r.PidsLimit > 0 { - limit = strconv.FormatInt(r.PidsLimit, 10) - } - - if err := cgroups.WriteFile(path, "pids.max", limit); err != nil { - return err - } + if r.PidsLimit == nil { + return nil } + // "max" is the fallback value. + val := "max" + if limit := *r.PidsLimit; limit > 0 { + val = strconv.FormatInt(limit, 10) + } else if limit == 0 { + // systemd doesn't support setting pids.max to "0", so when setting + // TasksMax we need to remap it to "1". We do the same thing here to + // avoid flip-flop behaviour between the fs and systemd drivers. In + // practice, the pids cgroup behaviour is basically identical. + val = "1" + } + if err := cgroups.WriteFile(path, "pids.max", val); err != nil { + return err + } return nil } diff --git a/fs/pids_test.go b/fs/pids_test.go index a33db7a..d1fa5a1 100644 --- a/fs/pids_test.go +++ b/fs/pids_test.go @@ -13,6 +13,8 @@ const ( maxLimited = 1024 ) +func toPtr[T any](v T) *T { return &v } + func TestPidsSetMax(t *testing.T) { path := tempDir(t, "pids") @@ -21,7 +23,7 @@ func TestPidsSetMax(t *testing.T) { }) r := &cgroups.Resources{ - PidsLimit: maxLimited, + PidsLimit: toPtr[int64](maxLimited), } pids := &PidsGroup{} if err := pids.Set(path, r); err != nil { @@ -37,6 +39,55 @@ func TestPidsSetMax(t *testing.T) { } } +func TestPidsSetZero(t *testing.T) { + path := tempDir(t, "pids") + + writeFileContents(t, path, map[string]string{ + "pids.max": "max", + }) + + r := &cgroups.Resources{ + PidsLimit: toPtr[int64](0), + } + pids := &PidsGroup{} + if err := pids.Set(path, r); err != nil { + t.Fatal(err) + } + + value, err := fscommon.GetCgroupParamUint(path, "pids.max") + if err != nil { + t.Fatal(err) + } + // See comment in (*PidsGroup).Set for why we set to 1 here. + if value != 1 { + t.Fatalf("Expected 1, got %d for setting pids.max = 0", value) + } +} + +func TestPidsUnset(t *testing.T) { + path := tempDir(t, "pids") + + writeFileContents(t, path, map[string]string{ + "pids.max": "12345", + }) + + r := &cgroups.Resources{ + PidsLimit: nil, + } + pids := &PidsGroup{} + if err := pids.Set(path, r); err != nil { + t.Fatal(err) + } + + value, err := fscommon.GetCgroupParamUint(path, "pids.max") + if err != nil { + t.Fatal(err) + } + if value != 12345 { + t.Fatalf("Expected 12345, got %d for not setting pids.max", value) + } +} + func TestPidsSetUnlimited(t *testing.T) { path := tempDir(t, "pids") @@ -45,7 +96,7 @@ func TestPidsSetUnlimited(t *testing.T) { }) r := &cgroups.Resources{ - PidsLimit: maxUnlimited, + PidsLimit: toPtr[int64](maxUnlimited), } pids := &PidsGroup{} if err := pids.Set(path, r); err != nil { diff --git a/fs2/pids.go b/fs2/pids.go index 9b82b90..f932259 100644 --- a/fs2/pids.go +++ b/fs2/pids.go @@ -4,6 +4,7 @@ import ( "errors" "math" "os" + "strconv" "strings" "golang.org/x/sys/unix" @@ -13,19 +14,26 @@ import ( ) func isPidsSet(r *cgroups.Resources) bool { - return r.PidsLimit != 0 + return r.PidsLimit != nil } func setPids(dirPath string, r *cgroups.Resources) error { if !isPidsSet(r) { return nil } - if val := numToStr(r.PidsLimit); val != "" { - if err := cgroups.WriteFile(dirPath, "pids.max", val); err != nil { - return err - } + val := "max" + if limit := *r.PidsLimit; limit > 0 { + val = strconv.FormatInt(limit, 10) + } else if limit == 0 { + // systemd doesn't support setting pids.max to "0", so when setting + // TasksMax we need to remap it to "1". We do the same thing here to + // avoid flip-flop behaviour between the fs and systemd drivers. In + // practice, the pids cgroup behaviour is basically identical. + val = "1" + } + if err := cgroups.WriteFile(dirPath, "pids.max", val); err != nil { + return err } - return nil } diff --git a/systemd/v1.go b/systemd/v1.go index 5500a53..96e69bb 100644 --- a/systemd/v1.go +++ b/systemd/v1.go @@ -2,6 +2,7 @@ package systemd import ( "errors" + "math" "os" "path/filepath" "strings" @@ -97,9 +98,17 @@ func genV1ResourcesProperties(r *cgroups.Resources, cm *dbusConnManager) ([]syst newProp("BlockIOWeight", uint64(r.BlkioWeight))) } - if r.PidsLimit > 0 || r.PidsLimit == -1 { + if r.PidsLimit != nil { + var tasksMax uint64 + if limit := *r.PidsLimit; limit < 0 { + tasksMax = math.MaxUint64 // "infinity" + } else if limit == 0 { + tasksMax = 1 // systemd does not accept "0" for TasksMax + } else { + tasksMax = uint64(limit) + } properties = append(properties, - newProp("TasksMax", uint64(r.PidsLimit))) + newProp("TasksMax", tasksMax)) } err = addCpuset(cm, &properties, r.CpusetCpus, r.CpusetMems) diff --git a/systemd/v2.go b/systemd/v2.go index c2f2e87..f76c93e 100644 --- a/systemd/v2.go +++ b/systemd/v2.go @@ -176,6 +176,9 @@ func unifiedResToSystemdProps(cm *dbusConnManager, res map[string]string) (props return nil, fmt.Errorf("unified resource %q value conversion error: %w", k, err) } } + if num == 0 { + num = 1 // systemd does not accept "0" for TasksMax + } props = append(props, newProp("TasksMax", num)) @@ -256,9 +259,17 @@ func genV2ResourcesProperties(dirPath string, r *cgroups.Resources, cm *dbusConn addCPUQuota(cm, &properties, &r.CpuQuota, r.CpuPeriod) - if r.PidsLimit > 0 || r.PidsLimit == -1 { + if r.PidsLimit != nil { + var tasksMax uint64 + if limit := *r.PidsLimit; limit < 0 { + tasksMax = math.MaxUint64 // "infinity" + } else if limit == 0 { + tasksMax = 1 // systemd does not accept "0" for TasksMax + } else { + tasksMax = uint64(limit) + } properties = append(properties, - newProp("TasksMax", uint64(r.PidsLimit))) + newProp("TasksMax", tasksMax)) } err = addCpuset(cm, &properties, r.CpusetCpus, r.CpusetMems) From 7c34f099ee0b36a9840be7b19f925f8830c11df0 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Fri, 24 Oct 2025 15:43:54 +1100 Subject: [PATCH 2/2] systemd: add TasksMax test Co-developed-by: Kir Kolyshkin Signed-off-by: Kir Kolyshkin Signed-off-by: Aleksa Sarai --- systemd/systemd_test.go | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/systemd/systemd_test.go b/systemd/systemd_test.go index f770ab1..60a6c1f 100644 --- a/systemd/systemd_test.go +++ b/systemd/systemd_test.go @@ -236,3 +236,40 @@ func TestAddCPUQuota(t *testing.T) { }) } } + +func TestTasksMax(t *testing.T) { + if !IsRunningSystemd() { + t.Skip("Test requires systemd.") + } + if os.Geteuid() != 0 { + t.Skip("Test requires root.") + } + + podConfig := &cgroups.Cgroup{ + Parent: "system.slice", + Name: "system-runc_test_tasksmax.slice", + Resources: &cgroups.Resources{}, + } + // Create "pods" cgroup (a systemd slice to hold containers). + pm := newManager(t, podConfig) + if err := pm.Apply(-1); err != nil { + t.Fatal(err) + } + + res := &cgroups.Resources{PidsLimit: nil} + if err := pm.Set(res); err != nil { + t.Fatalf("failed to set PidsLimit=nil: %v", err) + } + + for _, limit := range []int64{100, 0, 42, -1, 100, -99} { + res.PidsLimit = &limit + if err := pm.Set(res); err != nil { + t.Fatalf("failed to set PidsLimit=%d: %v", limit, err) + } + } + + res.PidsLimit = nil + if err := pm.Set(res); err != nil { + t.Fatalf("failed to set PidsLimit=nil: %v", err) + } +}