Skip to content

Commit 3965514

Browse files
committed
feat(advisor): support numa reclaim reserve
1 parent 17ba71b commit 3965514

File tree

7 files changed

+89
-22
lines changed

7 files changed

+89
-22
lines changed

cmd/katalyst-agent/app/options/dynamic/adminqos/reclaimedresource/reclaimedresource_base.go

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,14 +29,14 @@ import (
2929
)
3030

3131
type ReclaimedResourceOptions struct {
32-
EnableReclaim bool
33-
DisableReclaimSharePools []string
34-
ReservedResourceForReport general.ResourceList
35-
MinReclaimedResourceForReport general.ResourceList
36-
MinIgnoredReclaimedResourceForReport general.ResourceList
37-
ReservedResourceForAllocate general.ResourceList
38-
ReservedResourceForReclaimedCores general.ResourceList
39-
32+
EnableReclaim bool
33+
DisableReclaimSharePools []string
34+
ReservedResourceForReport general.ResourceList
35+
MinReclaimedResourceForReport general.ResourceList
36+
MinIgnoredReclaimedResourceForReport general.ResourceList
37+
ReservedResourceForAllocate general.ResourceList
38+
ReservedResourceForReclaimedCores general.ResourceList
39+
NumaReclaimedResourceRatioForAllocate general.ResourceList
4040
*cpuheadroom.CPUHeadroomOptions
4141
*memoryheadroom.MemoryHeadroomOptions
4242
}
@@ -64,6 +64,10 @@ func NewReclaimedResourceOptions() *ReclaimedResourceOptions {
6464
v1.ResourceCPU: resource.MustParse("4"),
6565
v1.ResourceMemory: resource.MustParse("0"),
6666
},
67+
NumaReclaimedResourceRatioForAllocate: map[v1.ResourceName]resource.Quantity{
68+
v1.ResourceCPU: resource.MustParse("0"),
69+
v1.ResourceMemory: resource.MustParse("0"),
70+
},
6771
CPUHeadroomOptions: cpuheadroom.NewCPUHeadroomOptions(),
6872
MemoryHeadroomOptions: memoryheadroom.NewMemoryHeadroomOptions(),
6973
}
@@ -87,6 +91,8 @@ func (o *ReclaimedResourceOptions) AddFlags(fss *cliflag.NamedFlagSets) {
8791
"reserved reclaimed resource actually not allocate to reclaimed resource")
8892
fs.Var(&o.ReservedResourceForReclaimedCores, "reserved-resource-for-reclaimed-cores",
8993
"reserved resources for reclaimed_cores pods")
94+
fs.Var(&o.NumaReclaimedResourceRatioForAllocate, "numa-reserved-resource-ratio-for-reclaimed-cores",
95+
"NUMA level reserved resources ratio for reclaimed_cores pods")
9096

9197
o.CPUHeadroomOptions.AddFlags(fss)
9298
o.MemoryHeadroomOptions.AddFlags(fss)
@@ -102,6 +108,7 @@ func (o *ReclaimedResourceOptions) ApplyTo(c *reclaimedresource.ReclaimedResourc
102108
c.MinIgnoredReclaimedResourceForReport = v1.ResourceList(o.MinIgnoredReclaimedResourceForReport)
103109
c.ReservedResourceForAllocate = v1.ResourceList(o.ReservedResourceForAllocate)
104110
c.MinReclaimedResourceForAllocate = v1.ResourceList(o.ReservedResourceForReclaimedCores)
111+
c.NumaMinReclaimedResourceRatioForAllocate = v1.ResourceList(o.NumaReclaimedResourceRatioForAllocate)
105112

106113
errList = append(errList, o.CPUHeadroomOptions.ApplyTo(c.CPUHeadroomConfiguration))
107114
errList = append(errList, o.MemoryHeadroomOptions.ApplyTo(c.MemoryHeadroomConfiguration))

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,7 @@ require (
174174
)
175175

176176
replace (
177+
github.com/kubewharf/katalyst-api => github.com/lihonghao314/katalyst-api v0.0.0-20250905072506-4372915c2ee6
177178
k8s.io/api => k8s.io/api v0.24.6
178179
k8s.io/apiextensions-apiserver => k8s.io/apiextensions-apiserver v0.24.6
179180
k8s.io/apimachinery => k8s.io/apimachinery v0.24.6

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -573,8 +573,6 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
573573
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
574574
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
575575
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
576-
github.com/kubewharf/katalyst-api v0.5.7-0.20250904111935-47f5110ad28b h1:ChgLRtZfRyR9Dkt52SArKrclrHkQj/uubl8PVvQhoEI=
577-
github.com/kubewharf/katalyst-api v0.5.7-0.20250904111935-47f5110ad28b/go.mod h1:Y2IeIorxQamF2a3oa0+URztl5QCSty6Jj3zD83R8J9k=
578576
github.com/kubewharf/kubelet v1.24.6-kubewharf.9 h1:jOTYZt7h/J7I8xQMKMUcJjKf5UFBv37jHWvNp5VRFGc=
579577
github.com/kubewharf/kubelet v1.24.6-kubewharf.9/go.mod h1:MxbSZUx3wXztFneeelwWWlX7NAAStJ6expqq7gY2J3c=
580578
github.com/kyoh86/exportloopref v0.1.7/go.mod h1:h1rDl2Kdj97+Kwh4gdz3ujE7XHmH51Q0lUiZ1z4NLj8=
@@ -583,6 +581,8 @@ github.com/libopenstorage/openstorage v1.0.0/go.mod h1:Sp1sIObHjat1BeXhfMqLZ14wn
583581
github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de/go.mod h1:zAbeS9B/r2mtpb6U+EI2rYA5OAXxsYw6wTamcNW+zcE=
584582
github.com/lightstep/lightstep-tracer-common/golang/gogo v0.0.0-20190605223551-bc2310a04743/go.mod h1:qklhhLq1aX+mtWk9cPHPzaBjWImj5ULL6C7HFJtXQMM=
585583
github.com/lightstep/lightstep-tracer-go v0.18.1/go.mod h1:jlF1pusYV4pidLvZ+XD0UBX0ZE6WURAspgAczcDHrL4=
584+
github.com/lihonghao314/katalyst-api v0.0.0-20250905072506-4372915c2ee6 h1:4SjwKutz/vaHavMOnPfpQYfTEx9iXmn2QLK1N4It8Zo=
585+
github.com/lihonghao314/katalyst-api v0.0.0-20250905072506-4372915c2ee6/go.mod h1:Y2IeIorxQamF2a3oa0+URztl5QCSty6Jj3zD83R8J9k=
586586
github.com/lithammer/dedent v1.1.0/go.mod h1:jrXYCQtgg0nJiN+StA2KgR7w6CiQNv9Fd/Z9BP0jIOc=
587587
github.com/logrusorgru/aurora v0.0.0-20181002194514-a7b3b318ed4e/go.mod h1:7rIyQOR62GCctdiQpZ/zOJlFyk6y+94wXzv6RNZgaR4=
588588
github.com/lpabon/godbc v0.1.1/go.mod h1:Jo9QV0cf3U6jZABgiJ2skINAXb9j8m51r07g4KI92ZA=

pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/advisor_helper.go

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,10 @@ package cpu
1818

1919
import (
2020
"fmt"
21+
"math"
2122

2223
v1 "k8s.io/api/core/v1"
24+
"k8s.io/apimachinery/pkg/api/resource"
2325
"k8s.io/apimachinery/pkg/util/sets"
2426
"k8s.io/klog/v2"
2527
"k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1"
@@ -31,8 +33,10 @@ import (
3133
"github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/provisionassembler"
3234
"github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/region"
3335
"github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/types"
36+
"github.com/kubewharf/katalyst-core/pkg/consts"
3437
"github.com/kubewharf/katalyst-core/pkg/util/general"
3538
"github.com/kubewharf/katalyst-core/pkg/util/machine"
39+
"github.com/kubewharf/katalyst-core/pkg/util/strategygroup"
3640
)
3741

3842
func RegisterCPUAdvisorHealthCheck() {
@@ -186,6 +190,20 @@ func (cra *cpuResourceAdvisor) updateNumasAvailableResource() {
186190
}
187191

188192
func (cra *cpuResourceAdvisor) updateReservedForReclaim() {
193+
numaReservedRatio := cra.conf.GetDynamicConfiguration().NumaMinReclaimedResourceRatioForAllocate[v1.ResourceCPU]
194+
if numaReservedRatio.Value() != 0 {
195+
cra.updateReservedForReclaimByNuma(numaReservedRatio)
196+
return
197+
}
198+
// temporary logic
199+
numaReclaimReserve, err := strategygroup.IsStrategyEnabledForNode(consts.StrategyNameNumaReclaimReserve, true, cra.conf)
200+
if err == nil && numaReclaimReserve {
201+
numaReservedRatio = resource.MustParse("0.05")
202+
general.Infof("numaReclaimReserve enabled")
203+
cra.updateReservedForReclaimByNuma(numaReservedRatio)
204+
return
205+
}
206+
189207
coreNumReservedForReclaim := cra.conf.GetDynamicConfiguration().MinReclaimedResourceForAllocate[v1.ResourceCPU]
190208
if coreNumReservedForReclaim.Value() > int64(cra.metaServer.NumCPUs) {
191209
coreNumReservedForReclaim.Set(int64(cra.metaServer.NumCPUs))
@@ -196,6 +214,18 @@ func (cra *cpuResourceAdvisor) updateReservedForReclaim() {
196214
coreNumReservedForReclaim.Set(int64(cra.metaServer.NumNUMANodes))
197215
}
198216
cra.reservedForReclaim = machine.GetCoreNumReservedForReclaim(int(coreNumReservedForReclaim.Value()), cra.metaServer.NumNUMANodes)
217+
general.Infof("reservedForReclaim: %v, coreNumReservedForReclaim %v", cra.reservedForReclaim, coreNumReservedForReclaim.Value())
218+
}
219+
220+
func (cra *cpuResourceAdvisor) updateReservedForReclaimByNuma(numaReservedRatio resource.Quantity) {
221+
reservedForReclaim := make(map[int]int)
222+
for id := 0; id < cra.metaServer.NumNUMANodes; id++ {
223+
size := cra.metaServer.NUMAToCPUs.CPUSizeInNUMAs(id)
224+
reserved := math.Round(numaReservedRatio.AsApproximateFloat64() * float64(size))
225+
reservedForReclaim[id] = int(math.Max(1, reserved))
226+
}
227+
cra.reservedForReclaim = reservedForReclaim
228+
general.Infof("reservedForReclaim: %v, numaReservedRatio %v", reservedForReclaim, numaReservedRatio.AsApproximateFloat64())
199229
}
200230

201231
func (cra *cpuResourceAdvisor) getNumasReservedForAllocate(numas machine.CPUSet) float64 {

pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/advisor_helper_test.go

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,11 @@ import (
3434
func Test_cpuResourceAdvisor_updateReservedForReclaim(t *testing.T) {
3535
t.Parallel()
3636
type fields struct {
37-
numaNum int
38-
socketNum int
39-
numCPUs int
40-
minReclaimedResourceForAllocate v1.ResourceList
37+
numaNum int
38+
socketNum int
39+
numCPUs int
40+
minReclaimedResourceForAllocate v1.ResourceList
41+
numaMinReclaimedResourceRatioForAllocate v1.ResourceList
4142
}
4243
tests := []struct {
4344
name string
@@ -95,6 +96,24 @@ func Test_cpuResourceAdvisor_updateReservedForReclaim(t *testing.T) {
9596
7: 1,
9697
},
9798
},
99+
{
100+
name: "reserved with numa size ratio",
101+
fields: fields{
102+
numaNum: 2,
103+
socketNum: 1,
104+
numCPUs: 64,
105+
minReclaimedResourceForAllocate: v1.ResourceList{
106+
v1.ResourceCPU: resource.MustParse("4"),
107+
},
108+
numaMinReclaimedResourceRatioForAllocate: v1.ResourceList{
109+
v1.ResourceCPU: resource.MustParse("0.05"),
110+
},
111+
},
112+
wantReservedForReclaim: map[int]int{
113+
0: 2,
114+
1: 2,
115+
},
116+
},
98117
}
99118
for _, tt := range tests {
100119
tt := tt
@@ -110,8 +129,9 @@ func Test_cpuResourceAdvisor_updateReservedForReclaim(t *testing.T) {
110129
defer func() { _ = os.RemoveAll(sfDir) }()
111130

112131
conf := generateTestConfiguration(t, ckDir, sfDir)
113-
132+
conf.GetDynamicConfiguration().EnableStrategyGroup = true
114133
conf.GetDynamicConfiguration().MinReclaimedResourceForAllocate = tt.fields.minReclaimedResourceForAllocate
134+
conf.GetDynamicConfiguration().NumaMinReclaimedResourceRatioForAllocate = tt.fields.numaMinReclaimedResourceRatioForAllocate
115135

116136
cpuTopology, err := machine.GenerateDummyCPUTopology(tt.fields.numCPUs, tt.fields.socketNum, tt.fields.numaNum)
117137
assert.NoError(t, err)

pkg/config/agent/dynamic/adminqos/reclaimedresource/reclaimedresource_base.go

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,14 @@ import (
2525
)
2626

2727
type ReclaimedResourceConfiguration struct {
28-
EnableReclaim bool
29-
DisableReclaimSharePools []string
30-
ReservedResourceForReport v1.ResourceList
31-
MinReclaimedResourceForReport v1.ResourceList
32-
MinIgnoredReclaimedResourceForReport v1.ResourceList
33-
ReservedResourceForAllocate v1.ResourceList
34-
MinReclaimedResourceForAllocate v1.ResourceList
28+
EnableReclaim bool
29+
DisableReclaimSharePools []string
30+
ReservedResourceForReport v1.ResourceList
31+
MinReclaimedResourceForReport v1.ResourceList
32+
MinIgnoredReclaimedResourceForReport v1.ResourceList
33+
ReservedResourceForAllocate v1.ResourceList
34+
MinReclaimedResourceForAllocate v1.ResourceList
35+
NumaMinReclaimedResourceRatioForAllocate v1.ResourceList
3536

3637
*cpuheadroom.CPUHeadroomConfiguration
3738
*memoryheadroom.MemoryHeadroomConfiguration
@@ -84,6 +85,12 @@ func (c *ReclaimedResourceConfiguration) ApplyConfiguration(conf *crd.DynamicCon
8485
c.MinReclaimedResourceForAllocate[resourceName] = value
8586
}
8687
}
88+
89+
if config.NumaMinReclaimedResourceRatioForAllocate != nil {
90+
for resourceName, value := range *config.NumaMinReclaimedResourceRatioForAllocate {
91+
c.NumaMinReclaimedResourceRatioForAllocate[resourceName] = value
92+
}
93+
}
8794
}
8895

8996
c.CPUHeadroomConfiguration.ApplyConfiguration(conf)

pkg/consts/strategy_names.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ const (
4949
// StrategyNameMetricThreshold is the name of metric threshold,
5050
// it offers metric threshold from trombe
5151
StrategyNameMetricThreshold = "metric_threshold"
52+
53+
StrategyNameNumaReclaimReserve = "numa_reclaim_reserve"
5254
)
5355

5456
const (

0 commit comments

Comments
 (0)