From cc006655c398fe8b631318f68fc88d5b03eea04f Mon Sep 17 00:00:00 2001 From: Joseph Irving Date: Mon, 25 Mar 2019 10:09:58 +0000 Subject: [PATCH] change taint format and events (#7) * change taint format and events --- README.md | 14 ++++++++-- pkg/nidhogg/handler.go | 61 ++++++++++++++++++++++++++++-------------- 2 files changed, 53 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index 14a03b76..b70a4144 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ Nidhogg was built using [Kubebuilder](https://github.com/kubernetes-sigs/kubebui ## Usage Nidhogg requires a json config file to tell it what Daemonsets to watch and what nodes to act on. -`nodeSelector` is a map of keys/values corresponding to node labels. `daemonsets` is an array of Daemonsets to watch, each containing two fields `name` and `namespace`. Nodes are tainted with taint that follows the format of `-not-ready:NoSchedule`. +`nodeSelector` is a map of keys/values corresponding to node labels. `daemonsets` is an array of Daemonsets to watch, each containing two fields `name` and `namespace`. Nodes are tainted with taint that follows the format of `nidhogg.uswitch.com/namespace.name:NoSchedule`. Example: @@ -27,7 +27,17 @@ Example: } ``` This example will taint any nodes that have the label `node-role.kubernetes.io/node=""` if they do not have a running and ready pod from the `kiam` daemonset in the `kube-system` namespace. -It will add a taint of `kiam-not-ready:NoSchedule` until there is a ready kiam pod on the node. +It will add a taint of `nidhogg.uswitch.com/kube-system.kiam:NoSchedule` until there is a ready kiam pod on the node. + +If you want pods to be able to run on the nidhogg tainted nodes you can add a toleration: + +```yaml +spec: + tolerations: + - key: nidhogg.uswitch.com/kube-system.kiam + operator: "Exists" + effect: NoSchedule +``` ## Deploying Docker images can be found at https://quay.io/uswitch/nidhogg diff --git a/pkg/nidhogg/handler.go b/pkg/nidhogg/handler.go index 0047312b..f4a94bee 100644 --- a/pkg/nidhogg/handler.go +++ b/pkg/nidhogg/handler.go @@ -14,6 +14,8 @@ import ( logf "sigs.k8s.io/controller-runtime/pkg/runtime/log" ) +const taintKey = "nidhogg.uswitch.com" + // Handler performs the main business logic of the Wave controller type Handler struct { client.Client @@ -33,6 +35,11 @@ type Daemonset struct { Namespace string `json:"namespace"` } +type taintChanges struct { + taintsAdded []string + taintsRemoved []string +} + // NewHandler constructs a new instance of Handler func NewHandler(c client.Client, r record.EventRecorder, conf HandlerConfig) *Handler { return &Handler{Client: c, recorder: r, config: conf} @@ -49,34 +56,19 @@ func (h *Handler) HandleNode(instance *corev1.Node) (reconcile.Result, error) { return reconcile.Result{}, nil } - copy := instance.DeepCopy() - - for _, daemonset := range h.config.Daemonsets { - taintName := daemonset.Name + "-not-ready" - // Get Pod for node - pod, err := h.getDaemonsetPod(instance.Name, daemonset) - if err != nil { - return reconcile.Result{}, fmt.Errorf("error fetching pods: %v", err) - } - - if pod == nil || podNotReady(pod) { - if !taintPresent(copy, taintName) { - copy.Spec.Taints = addTaint(copy.Spec.Taints, taintName) - } - } else { - copy.Spec.Taints = removeTaint(copy.Spec.Taints, taintName) - } - + copy, taintChanges, err := h.caclulateTaints(instance) + if err != nil { + return reconcile.Result{}, fmt.Errorf("error caluclating taints for node: %v", err) } if !reflect.DeepEqual(copy, instance) { instance = copy - log.Info("Updating Node taints", "instance", instance.Name, "taints", instance.Spec.Taints) + log.Info("Updating Node taints", "instance", instance.Name, "taints added", taintChanges.taintsAdded, "taints removed", taintChanges.taintsRemoved) err := h.Update(context.TODO(), instance) // this is a hack to make the event work on a non-namespaced object copy.UID = types.UID(copy.Name) - h.recorder.Eventf(copy, corev1.EventTypeNormal, "TaintsChanged", "Taints updated to %s", copy.Spec.Taints) + h.recorder.Eventf(copy, corev1.EventTypeNormal, "TaintsChanged", "Taints added: %s, Taints removed: %s", taintChanges.taintsAdded, taintChanges.taintsRemoved) if err != nil { return reconcile.Result{}, err } @@ -85,6 +77,35 @@ func (h *Handler) HandleNode(instance *corev1.Node) (reconcile.Result, error) { return reconcile.Result{}, nil } +func (h *Handler) caclulateTaints(instance *corev1.Node) (*corev1.Node, taintChanges, error) { + + copy := instance.DeepCopy() + + var changes taintChanges + + for _, daemonset := range h.config.Daemonsets { + + taint := fmt.Sprintf("%s/%s.%s", taintKey, daemonset.Namespace, daemonset.Name) + // Get Pod for node + pod, err := h.getDaemonsetPod(instance.Name, daemonset) + if err != nil { + return nil, taintChanges{}, fmt.Errorf("error fetching pods: %v", err) + } + + if pod == nil || podNotReady(pod) { + if !taintPresent(copy, taint) { + copy.Spec.Taints = addTaint(copy.Spec.Taints, taint) + changes.taintsAdded = append(changes.taintsAdded, taint) + } + } else if taintPresent(copy, taint) { + copy.Spec.Taints = removeTaint(copy.Spec.Taints, taint) + changes.taintsRemoved = append(changes.taintsRemoved, taint) + } + + } + return copy, changes, nil +} + func (h *Handler) getDaemonsetPod(nodeName string, ds Daemonset) (*corev1.Pod, error) { opts := client.InNamespace(ds.Namespace) pods := &corev1.PodList{}