@@ -33,7 +33,6 @@ import (
33
33
appsv1 "k8s.io/api/apps/v1"
34
34
corev1 "k8s.io/api/core/v1"
35
35
"k8s.io/apimachinery/pkg/types"
36
- "k8s.io/apimachinery/pkg/util/sets"
37
36
"k8s.io/client-go/util/workqueue"
38
37
"k8s.io/klog/v2"
39
38
controllerruntime "sigs.k8s.io/controller-runtime"
@@ -235,8 +234,7 @@ func (p *Provisioner) NewScheduler(ctx context.Context, pods []*corev1.Pod, stat
235
234
nodePoolList .OrderByWeight ()
236
235
237
236
instanceTypes := map [string ][]* cloudprovider.InstanceType {}
238
- domains := map [string ]sets.Set [string ]{}
239
- var notReadyNodePools []string
237
+ domainGroups := map [string ]scheduler.TopologyDomainGroup {}
240
238
for _ , nodePool := range nodePoolList .Items {
241
239
// Get instance type options
242
240
instanceTypeOptions , err := p .cloudProvider .GetInstanceTypes (ctx , lo .ToPtr (nodePool ))
@@ -252,6 +250,7 @@ func (p *Provisioner) NewScheduler(ctx context.Context, pods []*corev1.Pod, stat
252
250
continue
253
251
}
254
252
instanceTypes [nodePool .Name ] = append (instanceTypes [nodePool .Name ], instanceTypeOptions ... )
253
+ nodePoolTaints := nodePool .Spec .Template .Spec .Taints
255
254
256
255
// Construct Topology Domains
257
256
for _ , instanceType := range instanceTypeOptions {
@@ -261,15 +260,12 @@ func (p *Provisioner) NewScheduler(ctx context.Context, pods []*corev1.Pod, stat
261
260
requirements .Add (scheduling .NewLabelRequirements (nodePool .Spec .Template .Labels ).Values ()... )
262
261
requirements .Add (instanceType .Requirements .Values ()... )
263
262
264
- for key , requirement := range requirements {
265
- // This code used to execute a Union between domains[key] and requirement.Values().
266
- // The downside of this is that Union is immutable and takes a copy of the set it is executed upon.
267
- // This resulted in a lot of memory pressure on the heap and poor performance
268
- // https://github.com/aws/karpenter/issues/3565
269
- if domains [key ] == nil {
270
- domains [key ] = sets .New (requirement .Values ()... )
271
- } else {
272
- domains [key ].Insert (requirement .Values ()... )
263
+ for topologyKey , requirement := range requirements {
264
+ if _ , ok := domainGroups [topologyKey ]; ! ok {
265
+ domainGroups [topologyKey ] = scheduler .NewTopologyDomainGroup ()
266
+ }
267
+ for _ , domain := range requirement .Values () {
268
+ domainGroups [topologyKey ].Insert (domain , nodePoolTaints ... )
273
269
}
274
270
}
275
271
}
@@ -278,23 +274,20 @@ func (p *Provisioner) NewScheduler(ctx context.Context, pods []*corev1.Pod, stat
278
274
requirements .Add (scheduling .NewLabelRequirements (nodePool .Spec .Template .Labels ).Values ()... )
279
275
for key , requirement := range requirements {
280
276
if requirement .Operator () == corev1 .NodeSelectorOpIn {
281
- // The following is a performance optimisation, for the explanation see the comment above
282
- if domains [key ] == nil {
283
- domains [ key ] = sets . New ( requirement . Values () ... )
284
- } else {
285
- domains [key ].Insert (requirement . Values () ... )
277
+ if _ , ok := domainGroups [ key ]; ! ok {
278
+ domainGroups [key ] = scheduler . NewTopologyDomainGroup ()
279
+ }
280
+ for _ , value := range requirement . Values () {
281
+ domainGroups [key ].Insert (value , nodePoolTaints ... )
286
282
}
287
283
}
288
284
}
289
285
}
290
- if len (notReadyNodePools ) > 0 {
291
- log .FromContext (ctx ).WithValues ("nodePools" , nodePoolList ).Info ("skipped nodePools, not ready" )
292
- }
293
286
// inject topology constraints
294
287
pods = p .injectVolumeTopologyRequirements (ctx , pods )
295
288
296
289
// Calculate cluster topology
297
- topology , err := scheduler .NewTopology (ctx , p .kubeClient , p .cluster , domains , pods )
290
+ topology , err := scheduler .NewTopology (ctx , p .kubeClient , p .cluster , domainGroups , pods )
298
291
if err != nil {
299
292
return nil , fmt .Errorf ("tracking topology counts, %w" , err )
300
293
}
0 commit comments