Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add cordon option for highnodeutilization strategy #1

Open
wants to merge 32 commits into
base: add-cordon
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 13 additions & 8 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
.PHONY: test

# VERSION is based on a date stamp plus the last commit
VERSION?=v$(shell date +%Y%m%d)-$(shell git describe --tags --match "v*")
VERSION?=v$(shell date +%Y%m%d)-$(shell git describe --tags)
BRANCH?=$(shell git branch --show-current)
SHA1?=$(shell git rev-parse HEAD)
BUILD=$(shell date +%FT%T%z)
Expand All @@ -24,7 +24,7 @@ ARCHS = amd64 arm arm64

LDFLAGS=-ldflags "-X ${LDFLAG_LOCATION}.version=${VERSION} -X ${LDFLAG_LOCATION}.buildDate=${BUILD} -X ${LDFLAG_LOCATION}.gitbranch=${BRANCH} -X ${LDFLAG_LOCATION}.gitsha1=${SHA1}"

GOLANGCI_VERSION := v1.43.0
GOLANGCI_VERSION := v1.46.1
HAS_GOLANGCI := $(shell ls _output/bin/golangci-lint 2> /dev/null)

# REGISTRY is the container registry to push
Expand Down Expand Up @@ -135,13 +135,18 @@ ifndef HAS_GOLANGCI
endif
./_output/bin/golangci-lint run

lint-chart: ensure-helm-install
helm lint ./charts/descheduler

test-helm: ensure-helm-install
./test/run-helm-tests.sh
# helm

ensure-helm-install:
ifndef HAS_HELM
curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3 && chmod 700 ./get_helm.sh && ./get_helm.sh
endif
endif

lint-chart: ensure-helm-install
helm lint ./charts/descheduler

build-helm:
helm package ./charts/descheduler --dependency-update --destination ./bin/chart

test-helm: ensure-helm-install
./test/run-helm-tests.sh
61 changes: 47 additions & 14 deletions pkg/descheduler/evictions/evictions.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,15 @@ import (
"context"
"fmt"
"strings"
"time"
"encoding/json"

v1 "k8s.io/api/core/v1"
policy "k8s.io/api/policy/v1beta1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/errors"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/client-go/kubernetes/scheme"
Expand Down Expand Up @@ -118,7 +121,7 @@ func (pe *PodEvictor) TotalEvicted() uint {
// EvictPod returns non-nil error only when evicting a pod on a node is not
// possible (due to maxPodsToEvictPerNode constraint). Success is true when the pod
// is evicted on the server side.
func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, node *v1.Node, strategy string, reasons ...string) (bool, error) {
func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, node *v1.Node, strategy string, reasons ...string) (bool, bool, error) {
reason := strategy
if len(reasons) > 0 {
reason += " (" + strings.Join(reasons, ", ") + ")"
Expand All @@ -127,24 +130,24 @@ func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, node *v1.Node,
if pe.metricsEnabled {
metrics.PodsEvicted.With(map[string]string{"result": "maximum number of pods per node reached", "strategy": strategy, "namespace": pod.Namespace, "node": node.Name}).Inc()
}
return false, fmt.Errorf("Maximum number %v of evicted pods per %q node reached", *pe.maxPodsToEvictPerNode, node.Name)
return false, false, fmt.Errorf("Maximum number %v of evicted pods per %q node reached", *pe.maxPodsToEvictPerNode, node.Name)
}

if pe.maxPodsToEvictPerNamespace != nil && pe.namespacePodCount[pod.Namespace]+1 > *pe.maxPodsToEvictPerNamespace {
if pe.metricsEnabled {
metrics.PodsEvicted.With(map[string]string{"result": "maximum number of pods per namespace reached", "strategy": strategy, "namespace": pod.Namespace, "node": node.Name}).Inc()
}
return false, fmt.Errorf("Maximum number %v of evicted pods per %q namespace reached", *pe.maxPodsToEvictPerNamespace, pod.Namespace)
return false, false, fmt.Errorf("Maximum number %v of evicted pods per %q namespace reached", *pe.maxPodsToEvictPerNamespace, pod.Namespace)
}

err := evictPod(ctx, pe.client, pod, pe.policyGroupVersion)
eviction, err := evictPod(ctx, pe.client, pod, node, pe.policyGroupVersion)
if err != nil {
// err is used only for logging purposes
klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod), "reason", reason)
if pe.metricsEnabled {
metrics.PodsEvicted.With(map[string]string{"result": "error", "strategy": strategy, "namespace": pod.Namespace, "node": node.Name}).Inc()
}
return false, nil
return false, eviction, nil
}

pe.nodepodCount[node]++
Expand All @@ -164,10 +167,10 @@ func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, node *v1.Node,
r := eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "sigs.k8s.io.descheduler"})
r.Event(pod, v1.EventTypeNormal, "Descheduled", fmt.Sprintf("pod evicted by sigs.k8s.io/descheduler%s", reason))
}
return true, nil
return true, false, nil
}

func evictPod(ctx context.Context, client clientset.Interface, pod *v1.Pod, policyGroupVersion string) error {
func evictPod(ctx context.Context, client clientset.Interface, pod *v1.Pod, currentNode *v1.Node, policyGroupVersion string) (bool, error) {
deleteOptions := &metav1.DeleteOptions{}
// GracePeriodSeconds ?
eviction := &policy.Eviction{
Expand All @@ -181,15 +184,45 @@ func evictPod(ctx context.Context, client clientset.Interface, pod *v1.Pod, poli
},
DeleteOptions: deleteOptions,
}
err := client.PolicyV1beta1().Evictions(eviction.Namespace).Evict(ctx, eviction)

if apierrors.IsTooManyRequests(err) {
return fmt.Errorf("error when evicting pod (ignoring) %q: %v", pod.Name, err)
}
if apierrors.IsNotFound(err) {
return fmt.Errorf("pod not found when evicting %q: %v", pod.Name, err)
deadline := time.Now().Add(1800 * time.Second)
for {
err := client.PolicyV1beta1().Evictions(eviction.Namespace).Evict(ctx, eviction)
if apierrors.IsNotFound(err) {
return false, fmt.Errorf("pod not found when evicting %q: %v", pod.Name, err)
break
}
if apierrors.IsTooManyRequests(err) && time.Now().After(deadline) {
err = uncordonNode(ctx, client, currentNode)
if err != nil {
klog.ErrorS(err, "Failed to uncordon node", "node", klog.KObj(currentNode))
}
return true, fmt.Errorf("Not possible to evict pod %q. Node is now uncordon again.", pod.Name, err)
break
}
if err == nil {
break
}
klog.V(1).InfoS("Retrying to remove", "pod", klog.KObj(pod))
time.Sleep(60 * time.Second)
}
return err
return false, fmt.Errorf("Ready to evict %q", pod.Name)
}

func uncordonNode(ctx context.Context, client clientset.Interface, node *v1.Node) error {
klog.InfoS("Uncordoning node", "node", klog.KObj(node))

patch := struct {
Spec struct {
Unschedulable bool `json:"unschedulable"`
} `json:"spec"`
}{}
patch.Spec.Unschedulable = false

patchJson, _ := json.Marshal(patch)
options := metav1.PatchOptions{}
_, error := client.CoreV1().Nodes().Patch(ctx, node.Name, types.StrategicMergePatchType, patchJson, options)
return error
}

type Options struct {
Expand Down
2 changes: 1 addition & 1 deletion pkg/descheduler/strategies/duplicates.go
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ func RemoveDuplicatePods(
// It's assumed all duplicated pods are in the same priority class
// TODO(jchaloup): check if the pod has a different node to lend to
for _, pod := range pods[upperAvg-1:] {
if _, err := podEvictor.EvictPod(ctx, pod, nodeMap[nodeName], "RemoveDuplicatePods"); err != nil {
if _, _, err := podEvictor.EvictPod(ctx, pod, nodeMap[nodeName], "RemoveDuplicatePods"); err != nil {
klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod))
break
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/descheduler/strategies/failedpods.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ func RemoveFailedPods(
continue
}

if _, err = podEvictor.EvictPod(ctx, pods[i], node, "FailedPod"); err != nil {
if _, _, err = podEvictor.EvictPod(ctx, pods[i], node, "FailedPod"); err != nil {
klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod))
break
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/descheduler/strategies/node_affinity.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ func RemovePodsViolatingNodeAffinity(ctx context.Context, client clientset.Inter
for _, pod := range pods {
if pod.Spec.Affinity != nil && pod.Spec.Affinity.NodeAffinity != nil && pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution != nil {
klog.V(1).InfoS("Evicting pod", "pod", klog.KObj(pod))
if _, err := podEvictor.EvictPod(ctx, pod, node, "NodeAffinity"); err != nil {
if _, _, err := podEvictor.EvictPod(ctx, pod, node, "NodeAffinity"); err != nil {
klog.ErrorS(err, "Error evicting pod")
break
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/descheduler/strategies/node_taint.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ func RemovePodsViolatingNodeTaints(ctx context.Context, client clientset.Interfa
taintFilterFnc,
) {
klog.V(2).InfoS("Not all taints with NoSchedule effect are tolerated after update for pod on node", "pod", klog.KObj(pods[i]), "node", klog.KObj(node))
if _, err := podEvictor.EvictPod(ctx, pods[i], node, "NodeTaint"); err != nil {
if _, _, err := podEvictor.EvictPod(ctx, pods[i], node, "NodeTaint"); err != nil {
klog.ErrorS(err, "Error evicting pod")
break
}
Expand Down
Loading