Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
9888acc
First version of scheduling cache.
bsalmon-goog Sep 29, 2025
80fe07e
Bug fixes, clarity edits.
bsalmon-goog Sep 29, 2025
da3fa73
More cleanup and adjusting.
bsalmon-goog Sep 29, 2025
4b390c4
Switch to default linked list implementation.
bsalmon-goog Sep 30, 2025
34e9083
Commants and legibility.
bsalmon-goog Sep 30, 2025
a6708e2
New unit tests courtesty of Gemini.
bsalmon-goog Sep 30, 2025
367a339
Comments.
bsalmon-goog Sep 30, 2025
ef516f8
Update signatures to use json because I know it is stable now.
bsalmon-goog Sep 30, 2025
14c9733
Undo logging changes I did for debugging.
bsalmon-goog Sep 30, 2025
e01a929
More logging cleanup.
bsalmon-goog Sep 30, 2025
0633e26
Comments on attributes we need to look at...
bsalmon-goog Sep 30, 2025
920ddbd
More comments.
bsalmon-goog Sep 30, 2025
1090dc7
`
bsalmon-goog Sep 30, 2025
507eea9
Comment typeo.
bsalmon-goog Sep 30, 2025
640d693
Tweak interfaces to make it easier to call from plugins.
bsalmon-goog Oct 1, 2025
9286f49
First pass of plugin signature implementations.
bsalmon-goog Oct 1, 2025
7564199
Comment improvements.
bsalmon-goog Oct 1, 2025
d6213d3
Pass 2 on plugins.
bsalmon-goog Oct 1, 2025
2f0bdb3
Comments.
bsalmon-goog Oct 1, 2025
5eb8e89
Forgot a plugin.
bsalmon-goog Oct 1, 2025
e06cd30
Update the signature interface so we can share elements.
bsalmon-goog Oct 2, 2025
c6ce675
Comment change.
bsalmon-goog Oct 2, 2025
2bca4f0
Add type checks to the plugins.
bsalmon-goog Oct 3, 2025
b5d8a15
Clean up legacy handling, add option to scheduler profiles.
bsalmon-goog Oct 3, 2025
83afe71
Add non-plugin elements to the signature.
bsalmon-goog Oct 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions pkg/scheduler/apis/config/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,12 @@ type KubeSchedulerProfile struct {
// global PercentageOfNodesToScore will be used.
PercentageOfNodesToScore *int32

// If set, this enables the scheduler cache. This cache dramatically improves the performance
// when many pods are scheduled on large clusters. For 1-pod-per-node environments this
// cache has no downsides, but for multi-pod-per-node environments it will degrade the scoring
// of nodes to some degree.
CacheEnabled bool

// Plugins specify the set of plugins that should be enabled or disabled.
// Enabled plugins are the ones that should be enabled in addition to the
// default plugins. Disabled plugins are any of the default plugins that
Expand Down Expand Up @@ -176,6 +182,9 @@ type Plugins struct {

// MultiPoint is a simplified config field for enabling plugins for all valid extension points
MultiPoint PluginSet

// Signature plugins contribute to a pod's scheduling signature
Signature PluginSet
}

// PluginSet specifies enabled and disabled plugins for an extension point.
Expand Down
1 change: 1 addition & 0 deletions pkg/scheduler/extender_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,7 @@ func TestSchedulerWithExtenders(t *testing.T) {
percentageOfNodesToScore: schedulerapi.DefaultPercentageOfNodesToScore,
Extenders: extenders,
logger: logger,
podHostCache: NewPodHostCache(),
}
sched.applyDefaultHandlers()

Expand Down
42 changes: 42 additions & 0 deletions pkg/scheduler/framework/interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,29 @@ type PreFilterExtensions interface {
RemovePod(ctx context.Context, state fwk.CycleState, podToSchedule *v1.Pod, podInfoToRemove fwk.PodInfo, nodeInfo fwk.NodeInfo) *fwk.Status
}

type PodSignatureMaker interface {
Unsignable()
AddElement(elementName, sigString string)
AddElementFromObj(elementName string, obj any) error
HasElement(elementName string) bool
}

// SignaturePlugin is an interface that should be implemented by plugins that either filter or score
// pods to enable result caching and gang scheduling optimizations. If an enabled plugin that does Scoring,
// Prescoring, Filtering or Prefiltering does not implement this interface we will turn off signatures for all pods.
// For now we leave this optional, but in the future we may make it mandatory for all filtering and scoring plugins
// to implement the interface (but of course plugins may choose to always return "unsignable").
type SignaturePlugin interface {
Plugin
// This is called before PreFilter. The return value can be:
// - A string that represents the signature of this pod from this plugin's perspective. All pods with the same signature should see the same feasibility and
// scoring for the same set of nodes in the same state, from the perspective of this plugin.
// - The assertion that this pod cannot be signed by this plugin; i.e. the scoring of this pod is dependent on more than the node and the pod. This
// will disable caching and gang scheduling optimizations for the given pod, hurting performance.
// - An internal error condition passed back through the scheduling code.
PodSignature(pod *v1.Pod, signature PodSignatureMaker) error
}

// PreFilterPlugin is an interface that must be implemented by "PreFilter" plugins.
// These plugins are called at the beginning of the scheduling cycle.
type PreFilterPlugin interface {
Expand Down Expand Up @@ -488,6 +511,19 @@ type BindPlugin interface {
Bind(ctx context.Context, state fwk.CycleState, p *v1.Pod, nodeName string) *fwk.Status
}

// Results from a PodSignature call.
type PodSignatureResult struct {
// Can we create a scheduling signature for this pod?
Signable bool

// The pod scheduling signature. Two pods with the same signature should
// get the same feasibility and scores for the same set of nodes.
Signature string

// Non-nil if the signature process encountered an internal error.
Error error
}

// Framework manages the set of plugins in use by the scheduling framework.
// Configured plugins are called at specified points in a scheduling context.
type Framework interface {
Expand All @@ -502,6 +538,12 @@ type Framework interface {
// QueueSortFunc returns the function to sort pods in scheduling queue
QueueSortFunc() LessFunc

// Create a scheduling signature for a given pod, if possible. Two pods with the same signature
// should get the same feasibility and scores for any given set of nodes. If some plugins
// are unable to create a signature, the pod may be "unsignable" which disables results caching
// and gang scheduling optimizations.
PodSignature(ctx context.Context, pod *v1.Pod) *PodSignatureResult

// RunPreFilterPlugins runs the set of configured PreFilter plugins. It returns
// *fwk.Status and its code is set to non-success if any of the plugins returns
// anything but Success. If a non-success status is returned, then the scheduling
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -231,12 +231,22 @@ var _ framework.PostFilterPlugin = &DynamicResources{}
var _ framework.ReservePlugin = &DynamicResources{}
var _ framework.EnqueueExtensions = &DynamicResources{}
var _ framework.PreBindPlugin = &DynamicResources{}
var _ framework.SignaturePlugin = &DynamicResources{}

// Name returns name of the plugin. It is used in logs, etc.
func (pl *DynamicResources) Name() string {
return Name
}

// Because it isn't simple to determine if DRA claims are single host or more complex,
// we exclude any pod with a DRA claim from signatures. We should improve this.
func (pl *DynamicResources) PodSignature(pod *v1.Pod, signature framework.PodSignatureMaker) error {
if len(pod.Spec.ResourceClaims) > 0 {
signature.Unsignable()
}
return nil
}

// EventsToRegister returns the possible events that may make a Pod
// failed by this plugin schedulable.
func (pl *DynamicResources) EventsToRegister(_ context.Context) ([]fwk.ClusterEventWithHint, error) {
Expand Down
15 changes: 15 additions & 0 deletions pkg/scheduler/framework/plugins/helper/signature.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package helper

import v1 "k8s.io/api/core/v1"

// Common signature element: the pod's Volumes. Note that
// we exclude ConfigMap and Secret volumes because they are synthetic.
func SignatureVolumes(pod *v1.Pod) any {
volumes := []v1.Volume{}
for _, volume := range pod.Spec.Volumes {
if volume.VolumeSource.ConfigMap == nil && volume.VolumeSource.Secret == nil {
volumes = append(volumes, volume)
}
}
return volumes
}
16 changes: 16 additions & 0 deletions pkg/scheduler/framework/plugins/imagelocality/image_locality.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ type ImageLocality struct {
}

var _ framework.ScorePlugin = &ImageLocality{}
var _ framework.SignaturePlugin = &ImageLocality{}

// Name is the name of the plugin used in the plugin registry and configurations.
const Name = names.ImageLocality
Expand All @@ -50,6 +51,21 @@ func (pl *ImageLocality) Name() string {
return Name
}

// Image locality filtering and scoring depends on images for the pod's containers.
func (pl *ImageLocality) PodSignature(pod *v1.Pod, signature framework.PodSignatureMaker) error {
imageNames := []string{}

for _, container := range pod.Spec.InitContainers {
imageNames = append(imageNames, normalizedImageName(container.Image))
}

for _, container := range pod.Spec.Containers {
imageNames = append(imageNames, normalizedImageName(container.Image))
}

return signature.AddElementFromObj(pl.Name(), imageNames)
}

// Score invoked at the score extension point.
func (pl *ImageLocality) Score(ctx context.Context, state fwk.CycleState, pod *v1.Pod, nodeInfo fwk.NodeInfo) (int64, *fwk.Status) {
nodeInfos, err := pl.handle.SnapshotSharedLister().NodeInfos().List()
Expand Down
11 changes: 11 additions & 0 deletions pkg/scheduler/framework/plugins/interpodaffinity/plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ var _ framework.FilterPlugin = &InterPodAffinity{}
var _ framework.PreScorePlugin = &InterPodAffinity{}
var _ framework.ScorePlugin = &InterPodAffinity{}
var _ framework.EnqueueExtensions = &InterPodAffinity{}
var _ framework.SignaturePlugin = &InterPodAffinity{}

// InterPodAffinity is a plugin that checks inter pod affinity
type InterPodAffinity struct {
Expand All @@ -58,6 +59,16 @@ func (pl *InterPodAffinity) Name() string {
return Name
}

// Inter pod affinity make feasibility and scoring dependent on the placement of other
// pods in addition the current pod and node, so we cannot sign pods with these
// constraints.
func (pl *InterPodAffinity) PodSignature(pod *v1.Pod, signature framework.PodSignatureMaker) error {
if pod.Spec.Affinity != nil && (pod.Spec.Affinity.PodAffinity != nil || pod.Spec.Affinity.PodAntiAffinity != nil) {
signature.Unsignable()
}
return nil
}

// EventsToRegister returns the possible events that may make a failed Pod
// schedulable
func (pl *InterPodAffinity) EventsToRegister(_ context.Context) ([]fwk.ClusterEventWithHint, error) {
Expand Down
14 changes: 14 additions & 0 deletions pkg/scheduler/framework/plugins/nodeaffinity/node_affinity.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ var _ framework.FilterPlugin = &NodeAffinity{}
var _ framework.PreScorePlugin = &NodeAffinity{}
var _ framework.ScorePlugin = &NodeAffinity{}
var _ framework.EnqueueExtensions = &NodeAffinity{}
var _ framework.SignaturePlugin = &NodeAffinity{}

const (
// Name is the name of the plugin used in the plugin registry and configurations.
Expand All @@ -75,6 +76,19 @@ func (pl *NodeAffinity) Name() string {
return Name
}

// Node affinity filtering and scoring depend on NodeAffinity and NodeSelectors.
func (pl *NodeAffinity) PodSignature(pod *v1.Pod, signature framework.PodSignatureMaker) error {
objs := []any{"affinity"}
if pod.Spec.Affinity != nil && pod.Spec.Affinity.NodeAffinity != nil {
objs = append(objs, pod.Spec.Affinity.NodeAffinity)
}

objs = append(objs, "selector")
objs = append(objs, pod.Spec.NodeSelector)

return signature.AddElementFromObj(pl.Name(), objs)
}

type preFilterState struct {
requiredNodeSelectorAndAffinity nodeaffinity.RequiredNodeAffinity
}
Expand Down
7 changes: 7 additions & 0 deletions pkg/scheduler/framework/plugins/nodename/node_name.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ type NodeName struct {

var _ framework.FilterPlugin = &NodeName{}
var _ framework.EnqueueExtensions = &NodeName{}
var _ framework.SignaturePlugin = &NodeName{}

const (
// Name is the name of the plugin used in the plugin registry and configurations.
Expand Down Expand Up @@ -68,6 +69,12 @@ func (pl *NodeName) Name() string {
return Name
}

// NodeName scoring and feasibility are dependent on the NodeName field.
func (pl *NodeName) PodSignature(pod *v1.Pod, signature framework.PodSignatureMaker) error {
signature.AddElement(pl.Name(), pod.Spec.NodeName)
return nil
}

// Filter invoked at the filter extension point.
func (pl *NodeName) Filter(ctx context.Context, _ fwk.CycleState, pod *v1.Pod, nodeInfo fwk.NodeInfo) *fwk.Status {

Expand Down
6 changes: 6 additions & 0 deletions pkg/scheduler/framework/plugins/nodeports/node_ports.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ type NodePorts struct {
var _ framework.PreFilterPlugin = &NodePorts{}
var _ framework.FilterPlugin = &NodePorts{}
var _ framework.EnqueueExtensions = &NodePorts{}
var _ framework.SignaturePlugin = &NodePorts{}

const (
// Name is the name of the plugin used in the plugin registry and configurations.
Expand All @@ -64,6 +65,11 @@ func (pl *NodePorts) Name() string {
return Name
}

// NodePort feasibility and scheduling is based on the host ports for the containers.
func (pl *NodePorts) PodSignature(pod *v1.Pod, signature framework.PodSignatureMaker) error {
return signature.AddElementFromObj(pl.Name(), util.GetHostPorts(pod))
}

// PreFilter invoked at the prefilter extension point.
func (pl *NodePorts) PreFilter(ctx context.Context, cycleState fwk.CycleState, pod *v1.Pod, nodes []fwk.NodeInfo) (*framework.PreFilterResult, *fwk.Status) {
s := util.GetHostPorts(pod)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ type BalancedAllocation struct {

var _ framework.PreScorePlugin = &BalancedAllocation{}
var _ framework.ScorePlugin = &BalancedAllocation{}
var _ framework.SignaturePlugin = &BalancedAllocation{}

// BalancedAllocationName is the name of the plugin used in the plugin registry and configurations.
const (
Expand Down Expand Up @@ -96,6 +97,12 @@ func (ba *BalancedAllocation) Name() string {
return BalancedAllocationName
}

// Feasibilty and scoring are based on a set of resources considered by BA. We
// reuse the internal function used to determine the relevant resources.
func (ba *BalancedAllocation) PodSignature(pod *v1.Pod, signature framework.PodSignatureMaker) error {
return signature.AddElementFromObj(ba.Name(), ba.calculatePodResourceRequestList(pod, ba.resources))
}

// Score invoked at the score extension point.
func (ba *BalancedAllocation) Score(ctx context.Context, state fwk.CycleState, pod *v1.Pod, nodeInfo fwk.NodeInfo) (int64, *fwk.Status) {
s, err := getBalancedAllocationPreScoreState(state)
Expand Down
10 changes: 10 additions & 0 deletions pkg/scheduler/framework/plugins/noderesources/fit.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ var _ framework.FilterPlugin = &Fit{}
var _ framework.EnqueueExtensions = &Fit{}
var _ framework.PreScorePlugin = &Fit{}
var _ framework.ScorePlugin = &Fit{}
var _ framework.SignaturePlugin = &Fit{}

const (
// Name is the name of the plugin used in the plugin registry and configurations.
Expand Down Expand Up @@ -154,6 +155,15 @@ func (f *Fit) Name() string {
return Name
}

// Fit is based on the node resources for the pod. We reuse the function used
// internally to compute the final resource list.
func (f *Fit) PodSignature(pod *v1.Pod, signature framework.PodSignatureMaker) error {
return signature.AddElementFromObj(
f.Name(),
computePodResourceRequest(pod, ResourceRequestsOptions{EnablePodLevelResources: f.enablePodLevelResources}),
)
}

// NewFit initializes a new plugin and returns it.
func NewFit(_ context.Context, plArgs runtime.Object, h framework.Handle, fts feature.Features) (framework.Plugin, error) {
args, ok := plArgs.(*config.NodeResourcesFitArgs)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ type NodeUnschedulable struct {

var _ framework.FilterPlugin = &NodeUnschedulable{}
var _ framework.EnqueueExtensions = &NodeUnschedulable{}
var _ framework.SignaturePlugin = &NodeUnschedulable{}

// Name is the name of the plugin used in the plugin registry and configurations.
const Name = names.NodeUnschedulable
Expand Down Expand Up @@ -129,6 +130,14 @@ func (pl *NodeUnschedulable) Name() string {
return Name
}

// Feasibility and scoring based on the pod's tolerations.
func (pl *NodeUnschedulable) PodSignature(pod *v1.Pod, signature framework.PodSignatureMaker) error {
if !signature.HasElement("Tolerations") {
return signature.AddElementFromObj("Tolerations", pod.Spec.Tolerations)
}
return nil
}

// Filter invoked at the filter extension point.
func (pl *NodeUnschedulable) Filter(ctx context.Context, _ fwk.CycleState, pod *v1.Pod, nodeInfo fwk.NodeInfo) *fwk.Status {
node := nodeInfo.Node()
Expand Down
9 changes: 9 additions & 0 deletions pkg/scheduler/framework/plugins/nodevolumelimits/csi.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ import (
fwk "k8s.io/kube-scheduler/framework"
"k8s.io/kubernetes/pkg/scheduler/framework"
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature"
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/helper"
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/names"
"k8s.io/kubernetes/pkg/scheduler/util"
)
Expand Down Expand Up @@ -73,6 +74,7 @@ type CSILimits struct {
var _ framework.PreFilterPlugin = &CSILimits{}
var _ framework.FilterPlugin = &CSILimits{}
var _ framework.EnqueueExtensions = &CSILimits{}
var _ framework.SignaturePlugin = &CSILimits{}

// CSIName is the name of the plugin used in the plugin registry and configurations.
const CSIName = names.NodeVolumeLimits
Expand Down Expand Up @@ -620,3 +622,10 @@ func (pl *CSILimits) getNodeVolumeAttachmentInfo(logger klog.Logger, nodeName st
func getVolumeUniqueName(driverName, volumeHandle string) string {
return fmt.Sprintf("%s/%s", driverName, volumeHandle)
}

func (pl *CSILimits) PodSignature(pod *v1.Pod, signature framework.PodSignatureMaker) error {
if !signature.HasElement("Volumes") {
return signature.AddElementFromObj("Volumes", helper.SignatureVolumes(pod))
}
return nil
}
12 changes: 12 additions & 0 deletions pkg/scheduler/framework/plugins/podtopologyspread/plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ var _ framework.FilterPlugin = &PodTopologySpread{}
var _ framework.PreScorePlugin = &PodTopologySpread{}
var _ framework.ScorePlugin = &PodTopologySpread{}
var _ framework.EnqueueExtensions = &PodTopologySpread{}
var _ framework.SignaturePlugin = &PodTopologySpread{}

// Name is the name of the plugin used in the plugin registry and configurations.
const Name = names.PodTopologySpread
Expand All @@ -87,6 +88,17 @@ func (pl *PodTopologySpread) Name() string {
return Name
}

// Pod topology spread is not localized to a pod and node, so we cannot
// sign pods that have topology spread constraints, either explicit or
// defaulted.
// XXX need to fix the case where the controller type doesn't get a default...
func (pl *PodTopologySpread) PodSignature(pod *v1.Pod, signature framework.PodSignatureMaker) error {
if len(pod.Spec.TopologySpreadConstraints) > 0 || pl.systemDefaulted {
signature.Unsignable()
}
return nil
}

// New initializes a new plugin and returns it.
func New(_ context.Context, plArgs runtime.Object, h framework.Handle, fts feature.Features) (framework.Plugin, error) {
if h.SnapshotSharedLister() == nil {
Expand Down
Loading