Skip to content

Commit 749b14d

Browse files
authored
koord-descheduler: LowNodeLoad check if evicted pod can cause new node over utilized (koordinator-sh#2142)
Signed-off-by: songtao98 <[email protected]>
1 parent cb89d6d commit 749b14d

File tree

5 files changed

+171
-4
lines changed

5 files changed

+171
-4
lines changed

pkg/descheduler/evictions/evictions.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,7 @@ func NewEvictorFilter(
302302
return nil
303303
})
304304
}
305+
// todo: to align with the k8s descheduling framework, nodeFit should be moved into PreEvictionFilter in the future
305306
if options.nodeFit {
306307
ev.constraints = append(ev.constraints, func(pod *corev1.Pod) error {
307308
nodes, err := nodeGetter()

pkg/descheduler/framework/plugins/loadaware/low_node_load.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,8 @@ func (pl *LowNodeLoad) processOneNodePool(ctx context.Context, nodePool *desched
245245
abnormalProdNodes,
246246
prodLowNodes,
247247
bothLowNodes,
248+
nodeUsages,
249+
nodeThresholds,
248250
pl.args.DryRun,
249251
pl.args.NodeFit,
250252
nodePool.ResourceWeights,

pkg/descheduler/framework/plugins/loadaware/low_node_load_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1297,7 +1297,7 @@ func TestLowNodeLoad(t *testing.T) {
12971297
test.BuildTestPod("p10", 400, 0, n3NodeName, test.SetRSOwnerRef),
12981298
test.BuildTestPod("p11", 400, 0, n3NodeName, test.SetRSOwnerRef),
12991299
},
1300-
expectedPodsEvicted: 4,
1300+
expectedPodsEvicted: 3,
13011301
evictedPods: []string{},
13021302
},
13031303
}

pkg/descheduler/framework/plugins/loadaware/utilization_util.go

Lines changed: 56 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626
"k8s.io/apimachinery/pkg/api/resource"
2727
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2828
"k8s.io/apimachinery/pkg/types"
29+
utilerrors "k8s.io/apimachinery/pkg/util/errors"
2930
"k8s.io/klog/v2"
3031

3132
"github.com/koordinator-sh/koordinator/apis/extension"
@@ -309,6 +310,8 @@ func evictPodsFromSourceNodes(
309310
nodePoolName string,
310311
sourceNodes, destinationNodes,
311312
prodSourceNodes, prodDestinationNodes, bothDestinationNodes []NodeInfo,
313+
nodeUsages map[string]*NodeUsage,
314+
nodeThresholds map[string]NodeThresholds,
312315
dryRun bool,
313316
nodeFit bool,
314317
resourceWeights map[corev1.ResourceName]int64,
@@ -346,7 +349,8 @@ func evictPodsFromSourceNodes(
346349
klog.V(4).InfoS("Total node usage capacity to be moved", nodeKeysAndValues...)
347350

348351
targetNodes = append(targetNodes, bothTotalNodes...)
349-
balancePods(ctx, nodePoolName, sourceNodes, targetNodes, nodeTotalAvailableUsages, dryRun, nodeFit, false, resourceWeights, podEvictor,
352+
balancePods(ctx, nodePoolName, sourceNodes, targetNodes, nodeUsages, nodeThresholds,
353+
nodeTotalAvailableUsages, dryRun, nodeFit, false, resourceWeights, podEvictor,
350354
podFilter, nodeIndexer, continueEviction, evictionReasonGenerator)
351355

352356
// bothLowNode will be used by nodeHigh and prodHigh nodes, needs sub resources used by pods on nodeHigh.
@@ -384,7 +388,8 @@ func evictPodsFromSourceNodes(
384388
prodKeysAndValues = append(prodKeysAndValues, string(resourceName), quantity.String())
385389
}
386390
klog.V(4).InfoS("Total prod usage capacity to be moved", prodKeysAndValues...)
387-
balancePods(ctx, nodePoolName, prodSourceNodes, prodTargetNodes, prodTotalAvailableUsages, dryRun, nodeFit, true, resourceWeights, podEvictor,
391+
balancePods(ctx, nodePoolName, prodSourceNodes, prodTargetNodes, nodeUsages, nodeThresholds,
392+
prodTotalAvailableUsages, dryRun, nodeFit, true, resourceWeights, podEvictor,
388393
podFilter, nodeIndexer, continueEviction, evictionReasonGenerator)
389394
}
390395

@@ -409,6 +414,8 @@ func balancePods(ctx context.Context,
409414
nodePoolName string,
410415
sourceNodes []NodeInfo,
411416
targetNodes []*corev1.Node,
417+
nodeUsages map[string]*NodeUsage,
418+
nodeThresholds map[string]NodeThresholds,
412419
totalAvailableUsages map[corev1.ResourceName]*resource.Quantity,
413420
dryRun bool,
414421
nodeFit, prod bool,
@@ -431,7 +438,9 @@ func balancePods(ctx context.Context,
431438
if !nodeFit {
432439
return true
433440
}
434-
return nodeutil.PodFitsAnyNode(nodeIndexer, pod, targetNodes)
441+
podNamespacedName := types.NamespacedName{Namespace: pod.Namespace, Name: pod.Name}
442+
podMetric := srcNode.podMetrics[podNamespacedName]
443+
return podFitsAnyNodeWithThreshold(nodeIndexer, pod, targetNodes, nodeUsages, nodeThresholds, prod, podMetric)
435444
}),
436445
)
437446
klog.V(4).InfoS("Evicting pods from node",
@@ -701,3 +710,47 @@ func sortPodsOnOneOverloadedNode(srcNode NodeInfo, removablePods []*corev1.Pod,
701710
weights,
702711
)
703712
}
713+
714+
// podFitsAnyNodeWithThreshold checks if the given pod will fit any of the given nodes. It also checks if the node
715+
// utilization will exceed the threshold after this pod was scheduled on it.
716+
func podFitsAnyNodeWithThreshold(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *corev1.Pod, nodes []*corev1.Node,
717+
nodeUsages map[string]*NodeUsage, nodeThresholds map[string]NodeThresholds, prod bool, podMetric *slov1alpha1.ResourceMap) bool {
718+
for _, node := range nodes {
719+
errors := nodeutil.NodeFit(nodeIndexer, pod, node)
720+
if len(errors) == 0 {
721+
// check if node utilization exceeds threshold if pod scheduled
722+
nodeUsage, usageOk := nodeUsages[node.Name]
723+
nodeThreshold, thresholdOk := nodeThresholds[node.Name]
724+
if usageOk && thresholdOk {
725+
var usage, thresholds map[corev1.ResourceName]*resource.Quantity
726+
if prod {
727+
usage = nodeUsage.prodUsage
728+
thresholds = nodeThreshold.prodHighResourceThreshold
729+
} else {
730+
usage = nodeUsage.usage
731+
thresholds = nodeThreshold.highResourceThreshold
732+
}
733+
exceeded := false
734+
for resourceName, threshold := range thresholds {
735+
if used := usage[resourceName]; used != nil {
736+
used.Add(podMetric.ResourceList[resourceName])
737+
if used.Cmp(*threshold) > 0 {
738+
exceeded = true
739+
break
740+
}
741+
}
742+
743+
}
744+
if exceeded {
745+
klog.V(4).InfoS("Pod may cause node over-utilized", "pod", klog.KObj(pod), "node", klog.KObj(node))
746+
continue
747+
}
748+
}
749+
klog.V(4).InfoS("Pod fits on node", "pod", klog.KObj(pod), "node", klog.KObj(node))
750+
return true
751+
} else {
752+
klog.V(4).InfoS("Pod does not fit on node", "pod", klog.KObj(pod), "node", klog.KObj(node), "errors", utilerrors.NewAggregate(errors))
753+
}
754+
}
755+
return false
756+
}

pkg/descheduler/framework/plugins/loadaware/utilization_util_test.go

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,16 +17,21 @@ limitations under the License.
1717
package loadaware
1818

1919
import (
20+
"context"
2021
"math"
2122
"testing"
2223

2324
"github.com/stretchr/testify/assert"
2425
corev1 "k8s.io/api/core/v1"
2526
"k8s.io/apimachinery/pkg/api/resource"
2627
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
28+
"k8s.io/apimachinery/pkg/runtime"
2729
"k8s.io/apimachinery/pkg/types"
30+
"k8s.io/client-go/informers"
31+
"k8s.io/client-go/kubernetes/fake"
2832

2933
slov1alpha1 "github.com/koordinator-sh/koordinator/apis/slo/v1alpha1"
34+
"github.com/koordinator-sh/koordinator/pkg/descheduler/test"
3035
)
3136

3237
var (
@@ -229,3 +234,109 @@ func TestSortPodsOnOneOverloadedNode(t *testing.T) {
229234
sortPodsOnOneOverloadedNode(nodeInfo, removablePods, resourceWeights, false)
230235
assert.Equal(t, expectedResult, removablePods)
231236
}
237+
238+
func TestPodFitsAnyNodeWithThreshold(t *testing.T) {
239+
tests := []struct {
240+
name string
241+
pod *corev1.Pod
242+
nodes []*corev1.Node
243+
nodeUsages map[string]*NodeUsage
244+
nodeThresholds map[string]NodeThresholds
245+
prod bool
246+
podMetric *slov1alpha1.ResourceMap
247+
want bool
248+
}{
249+
{
250+
name: "Nodes matches the Pod via affinity, but exceeds threshold",
251+
pod: &corev1.Pod{
252+
ObjectMeta: metav1.ObjectMeta{
253+
Namespace: "default",
254+
Name: "test-pod-1",
255+
},
256+
Spec: corev1.PodSpec{
257+
Affinity: &corev1.Affinity{
258+
NodeAffinity: &corev1.NodeAffinity{
259+
RequiredDuringSchedulingIgnoredDuringExecution: &corev1.NodeSelector{
260+
NodeSelectorTerms: []corev1.NodeSelectorTerm{
261+
{
262+
MatchExpressions: []corev1.NodeSelectorRequirement{
263+
{
264+
Key: "test-node-type",
265+
Operator: corev1.NodeSelectorOpIn,
266+
Values: []string{
267+
"test-node-type-A",
268+
},
269+
},
270+
},
271+
},
272+
},
273+
},
274+
},
275+
},
276+
},
277+
},
278+
nodes: []*corev1.Node{
279+
{
280+
ObjectMeta: metav1.ObjectMeta{
281+
Name: "test-node-1",
282+
Labels: map[string]string{
283+
"test-node-type": "test-node-type-A",
284+
},
285+
},
286+
},
287+
},
288+
nodeUsages: map[string]*NodeUsage{
289+
"test-node-1": {
290+
usage: map[corev1.ResourceName]*resource.Quantity{
291+
corev1.ResourceCPU: resource.NewMilliQuantity(1000, resource.DecimalSI),
292+
corev1.ResourceMemory: resource.NewQuantity(2000000000, resource.BinarySI),
293+
},
294+
},
295+
},
296+
nodeThresholds: map[string]NodeThresholds{
297+
"test-node-1": {
298+
highResourceThreshold: map[corev1.ResourceName]*resource.Quantity{
299+
corev1.ResourceCPU: resource.NewMilliQuantity(2000, resource.DecimalSI),
300+
corev1.ResourceMemory: resource.NewMilliQuantity(3000000000, resource.DecimalSI),
301+
},
302+
},
303+
},
304+
podMetric: &slov1alpha1.ResourceMap{
305+
ResourceList: corev1.ResourceList{
306+
corev1.ResourceCPU: *resource.NewMilliQuantity(1500, resource.DecimalSI),
307+
corev1.ResourceMemory: *resource.NewMilliQuantity(1500000000, resource.DecimalSI),
308+
},
309+
},
310+
want: false,
311+
},
312+
}
313+
for _, tt := range tests {
314+
t.Run(tt.name, func(t *testing.T) {
315+
ctx, cancel := context.WithCancel(context.Background())
316+
defer cancel()
317+
318+
var objs []runtime.Object
319+
for _, node := range tt.nodes {
320+
objs = append(objs, node)
321+
}
322+
objs = append(objs, tt.pod)
323+
324+
fakeClient := fake.NewSimpleClientset(objs...)
325+
326+
sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
327+
podInformer := sharedInformerFactory.Core().V1().Pods()
328+
329+
getPodsAssignedToNode, err := test.BuildGetPodsAssignedToNodeFunc(podInformer)
330+
if err != nil {
331+
t.Errorf("Build get pods assigned to node function error: %v", err)
332+
}
333+
334+
sharedInformerFactory.Start(ctx.Done())
335+
sharedInformerFactory.WaitForCacheSync(ctx.Done())
336+
337+
if got := podFitsAnyNodeWithThreshold(getPodsAssignedToNode, tt.pod, tt.nodes, tt.nodeUsages, tt.nodeThresholds, false, tt.podMetric); got != tt.want {
338+
t.Errorf("PodFitsAnyNode() = %v, want %v", got, tt.want)
339+
}
340+
})
341+
}
342+
}

0 commit comments

Comments
 (0)