Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement operator logic #25

20 changes: 14 additions & 6 deletions cmd/manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (

"github.com/openshift-kni/performance-addon-operators/pkg/apis"
"github.com/openshift-kni/performance-addon-operators/pkg/controller"
"github.com/openshift-kni/performance-addon-operators/pkg/controller/performanceprofile/components"
"github.com/openshift-kni/performance-addon-operators/version"

configv1 "github.com/openshift/api/config/v1"
Expand All @@ -22,13 +23,15 @@ import (
"github.com/spf13/pflag"

v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/klog"

// Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.)
_ "k8s.io/client-go/plugin/pkg/client/auth"
"k8s.io/client-go/rest"

"sigs.k8s.io/controller-runtime/pkg/cache"
"sigs.k8s.io/controller-runtime/pkg/client/config"
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/manager/signals"
Expand Down Expand Up @@ -66,6 +69,14 @@ func main() {
os.Exit(1)
}

// we have two namespaces that we need to watch
// 1. tuned namespace
// 2. None namespace
namespaces := []string{
components.NamespaceNodeTuningOperator,
metav1.NamespaceNone,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you please add a one-line comment to remind our future selves what "None namespace" means in this context? (e.g. watch all the namespaces, even though IIRC this is not the case)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

}

// Get a config to talk to the apiserver
cfg, err := config.GetConfig()
if err != nil {
Expand All @@ -75,6 +86,7 @@ func main() {

// Create a new Cmd to provide shared dependencies and start components
mgr, err := manager.New(cfg, manager.Options{
NewCache: cache.MultiNamespacedCacheBuilder(namespaces),
LeaderElection: true,
LeaderElectionID: "performance-addon-operators",
LeaderElectionNamespace: namespace,
Expand Down Expand Up @@ -162,13 +174,9 @@ func serveCRMetrics(cfg *rest.Config) error {
if err != nil {
return err
}
// Get the namespace the operator is currently deployed in.
operatorNs, err := k8sutil.GetOperatorNamespace()
if err != nil {
return err
}

// To generate metrics in other namespaces, add the values below.
ns := []string{operatorNs}
ns := []string{metav1.NamespaceNone}
// Generate and serve custom resource specific metrics.
err = kubemetrics.GenerateAndServeCRMetrics(cfg, ns, filteredGVK, metricsHost, operatorMetricsPort)
if err != nil {
Expand Down
144 changes: 101 additions & 43 deletions pkg/controller/performanceprofile/performanceprofile_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package performanceprofile
import (
"context"
"fmt"
"reflect"

performancev1alpha1 "github.com/openshift-kni/performance-addon-operators/pkg/apis/performance/v1alpha1"
"github.com/openshift-kni/performance-addon-operators/pkg/controller/performanceprofile/components"
Expand All @@ -15,15 +16,18 @@ import (
tunedv1 "github.com/openshift/cluster-node-tuning-operator/pkg/apis/tuned/v1"
mcov1 "github.com/openshift/machine-config-operator/pkg/apis/machineconfiguration.openshift.io/v1"

apiequality "k8s.io/apimachinery/pkg/api/equality"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/klog"

"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
"sigs.k8s.io/controller-runtime/pkg/event"
"sigs.k8s.io/controller-runtime/pkg/handler"
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/predicate"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
"sigs.k8s.io/controller-runtime/pkg/source"
)
Expand All @@ -43,7 +47,11 @@ func Add(mgr manager.Manager) error {

// newReconciler returns a new reconcile.Reconciler
func newReconciler(mgr manager.Manager) reconcile.Reconciler {
return &ReconcilePerformanceProfile{client: mgr.GetClient(), scheme: mgr.GetScheme()}
return &ReconcilePerformanceProfile{
client: mgr.GetClient(),
scheme: mgr.GetScheme(),
assetsDir: components.AssetsDir,
}
}

// add adds a new Controller to mgr with r as the reconcile.Reconciler
Expand All @@ -60,11 +68,36 @@ func add(mgr manager.Manager, r reconcile.Reconciler) error {
return err
}

// we want to initate reconcile loop only on change under labels or spec of the object
p := predicate.Funcs{
UpdateFunc: func(e event.UpdateEvent) bool {
if e.MetaOld == nil {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

where did you get this from? I'm a bit surprised that this is necessary, looking at sigs.k8s.io/controller-runtime/pkg/source/internal/eventsource.go OnUpdate() I think these fields are always filled?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I unsure when we will get this, but the default update predicate has same checks so I left it as it is

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ack, works for me

klog.Error("Update event has no old metadata")
return false
}
if e.ObjectOld == nil {
klog.Error("Update event has no old runtime object to update")
return false
}
if e.ObjectNew == nil {
klog.Error("Update event has no new runtime object for update")
return false
}
if e.MetaNew == nil {
klog.Error("Update event has no new metadata")
return false
}

return e.MetaNew.GetGeneration() != e.MetaOld.GetGeneration() ||
!apiequality.Semantic.DeepEqual(e.MetaNew.GetLabels(), e.MetaOld.GetLabels())
},
}

// Watch for changes to machine configs owned by our controller
err = c.Watch(&source.Kind{Type: &mcov1.MachineConfig{}}, &handler.EnqueueRequestForOwner{
IsController: true,
OwnerType: &performancev1alpha1.PerformanceProfile{},
})
}, p)
if err != nil {
return err
}
Expand All @@ -73,34 +106,65 @@ func add(mgr manager.Manager, r reconcile.Reconciler) error {
err = c.Watch(&source.Kind{Type: &mcov1.KubeletConfig{}}, &handler.EnqueueRequestForOwner{
IsController: true,
OwnerType: &performancev1alpha1.PerformanceProfile{},
})
}, p)
if err != nil {
return err
}

// Watch for changes to machine config pools owned by our controller
err = c.Watch(&source.Kind{Type: &mcov1.MachineConfigPool{}}, &handler.EnqueueRequestForOwner{
// Watch for changes to feature gates owned by our controller
err = c.Watch(&source.Kind{Type: &configv1.FeatureGate{}}, &handler.EnqueueRequestForOwner{
IsController: true,
OwnerType: &performancev1alpha1.PerformanceProfile{},
})
}, p)
if err != nil {
return err
}

// Watch for changes to feature gates owned by our controller
err = c.Watch(&source.Kind{Type: &configv1.FeatureGate{}}, &handler.EnqueueRequestForOwner{
// Watch for changes to tuned owned by our controller
err = c.Watch(&source.Kind{Type: &tunedv1.Tuned{}}, &handler.EnqueueRequestForOwner{
IsController: true,
OwnerType: &performancev1alpha1.PerformanceProfile{},
})
}, p)
if err != nil {
return err
}

// Watch for changes to tuned owned by our controller
err = c.Watch(&source.Kind{Type: &tunedv1.Tuned{}}, &handler.EnqueueRequestForOwner{
// we do not want initiate reconcile loop on the pause of machine config pool
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pause and configuration?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

mcpPredicates := predicate.Funcs{
UpdateFunc: func(e event.UpdateEvent) bool {
if e.MetaOld == nil {
klog.Error("Update event has no old metadata")
return false
}
if e.ObjectOld == nil {
klog.Error("Update event has no old runtime object to update")
return false
}
if e.ObjectNew == nil {
klog.Error("Update event has no new runtime object for update")
return false
}
if e.MetaNew == nil {
klog.Error("Update event has no new metadata")
return false
}

mcpNew := e.ObjectOld.(*mcov1.MachineConfigPool)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the result is the same here, but assigning old to new looks confusing?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it is a mistake, fixed

mcpOld := e.ObjectNew.(*mcov1.MachineConfigPool)

mcpNew.Spec.Paused = mcpOld.Spec.Paused
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this mutating a pointer that's going to be cached by the informer? We might need to do a deepcopy on the mcpNew object here. I'm not 100% certain though.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

will add deep copy just to be sure:)

mcpNew.Spec.Configuration = mcpOld.Spec.Configuration

return !reflect.DeepEqual(mcpOld.Spec, mcpNew.Spec) ||
!apiequality.Semantic.DeepEqual(e.MetaNew.GetLabels(), e.MetaOld.GetLabels())
},
}

// Watch for changes to machine config pools owned by our controller
err = c.Watch(&source.Kind{Type: &mcov1.MachineConfigPool{}}, &handler.EnqueueRequestForOwner{
IsController: true,
OwnerType: &performancev1alpha1.PerformanceProfile{},
})
}, mcpPredicates)
if err != nil {
return err
}
Expand All @@ -115,8 +179,9 @@ var _ reconcile.Reconciler = &ReconcilePerformanceProfile{}
type ReconcilePerformanceProfile struct {
// This client, initialized using mgr.Client() above, is a split client
// that reads objects from the cache and writes to the apiserver
client client.Client
scheme *runtime.Scheme
client client.Client
scheme *runtime.Scheme
assetsDir string
}

// Reconcile reads that state of the cluster for a PerformanceProfile object and makes changes based on the state read
Expand Down Expand Up @@ -145,7 +210,7 @@ func (r *ReconcilePerformanceProfile) Reconcile(request reconcile.Request) (reco

if instance.DeletionTimestamp != nil {
// delete components
if err := r.deleteComponents(); err != nil {
if err := r.deleteComponents(instance); err != nil {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should wait until both all components are deleted and removed from the cluster before removing the finalizer. Right now I believe we're just issuing the delete. We should also be waiting for the components to be removed.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mm I do not remember that under some operator we waited for the real deletion, but if you insist I can add it.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mm I do not remember that under some operator we waited for the real deletion, but if you insist I can add it.

Waiting for deletion guarantees us that we have 100% successfully cleaned up all dependent resources before the parent resource is completely removed. Without this, sometimes issues during tear down are masked. CI wouldn't necessarily catch this because the parent resource was deleted while dependent resources might still remain in a pending deletion type phase.

so basically, it ensures our teardown logic works. otherwise these issues go un-noticed and only manifest themselves in odd situations where people attempt to delete a resource and then re-post it again.

Copy link
Contributor Author

@cynepco3hahue cynepco3hahue Jan 7, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

klog.Errorf("failed to delete components: %v", err)
return reconcile.Result{}, err
}
Expand All @@ -160,6 +225,7 @@ func (r *ReconcilePerformanceProfile) Reconcile(request reconcile.Request) (reco
return reconcile.Result{}, nil
}
}

// TODO: we need to check if all under performance profiles values != nil
// first we need to decide if each of values required and we should move the check into validation webhook
// for now let's assume that all parameters needed for assets scrips are required
Expand Down Expand Up @@ -191,10 +257,10 @@ func (r *ReconcilePerformanceProfile) Reconcile(request reconcile.Request) (reco
return reconcile.Result{}, nil
}

func (r *ReconcilePerformanceProfile) applyComponents(performanceProfile *performancev1alpha1.PerformanceProfile) error {
func (r *ReconcilePerformanceProfile) applyComponents(profle *performancev1alpha1.PerformanceProfile) error {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

typo in arg name profile

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

// deploy machine config pool
mcp := machineconfigpool.NewPerformance(performanceProfile.Spec.NodeSelector)
if err := controllerutil.SetControllerReference(performanceProfile, mcp, r.scheme); err != nil {
mcp := machineconfigpool.NewPerformance(profle)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe NewPerformancePool is a better name?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we already have machineconfigpool in the name of the package, see a nice link that @fedepaol sent me - https://blog.golang.org/package-names

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah, I see, interesting read about package names and New(). Thanks!
But NewPerformance still sounds a bit wrong to me... 🤔
Maybe go into the other direction and make the func name even shorter, only New()?
Or sth different than New, e.g. FromProfile()?
WDYT?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Only New() works for me, will change it

if err := controllerutil.SetControllerReference(profle, mcp, r.scheme); err != nil {
return err
}
if err := r.createOrUpdateMachineConfigPool(mcp); err != nil {
Expand All @@ -207,18 +273,12 @@ func (r *ReconcilePerformanceProfile) applyComponents(performanceProfile *perfor
}

// deploy machine config
data := &machineconfig.Data{
HugePages: performanceProfile.Spec.HugePages,
IsolatedCPUs: performanceProfile.Spec.CPU.Isolated,
NonIsolatedCpus: performanceProfile.Spec.CPU.NonIsolated,
RTRepoURL: *performanceProfile.Spec.RealTimeKernel.RepoURL,
}
mc, err := machineconfig.NewPerformance(components.AssetsDir, data)
mc, err := machineconfig.NewPerformance(r.assetsDir, profle)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NewPerformanceConfig?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

see the link above

if err != nil {
return err
}

if err := controllerutil.SetControllerReference(performanceProfile, mc, r.scheme); err != nil {
if err := controllerutil.SetControllerReference(profle, mc, r.scheme); err != nil {
return err
}

Expand All @@ -227,8 +287,8 @@ func (r *ReconcilePerformanceProfile) applyComponents(performanceProfile *perfor
}

// deploy kubelet config
kc := kubeletconfig.NewPerformance(performanceProfile.Spec.CPU.Reserved)
if err := controllerutil.SetControllerReference(performanceProfile, kc, r.scheme); err != nil {
kc := kubeletconfig.NewPerformance(profle)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NewPerformanceConfig?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same for following NewXyz() funcs...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

see the link above

if err := controllerutil.SetControllerReference(profle, kc, r.scheme); err != nil {
return err
}
if err := r.createOrUpdateKubeletConfig(kc); err != nil {
Expand All @@ -237,20 +297,20 @@ func (r *ReconcilePerformanceProfile) applyComponents(performanceProfile *perfor

// deploy feature gate
fg := featuregate.NewLatencySensitive()
if err := controllerutil.SetControllerReference(performanceProfile, fg, r.scheme); err != nil {
if err := controllerutil.SetControllerReference(profle, fg, r.scheme); err != nil {
return err
}
if err := r.createOrUpdateFeatureGate(fg); err != nil {
return err
}

// deploy network latency tuned
networkLatencyTuned, err := tuned.NewNetworkLatency(components.AssetsDir)
networkLatencyTuned, err := tuned.NewNetworkLatency(r.assetsDir)
if err != nil {
return err
}

if err := controllerutil.SetControllerReference(performanceProfile, networkLatencyTuned, r.scheme); err != nil {
if err := controllerutil.SetControllerReference(profle, networkLatencyTuned, r.scheme); err != nil {
return err
}

Expand All @@ -259,16 +319,12 @@ func (r *ReconcilePerformanceProfile) applyComponents(performanceProfile *perfor
}

// deploy real time kernel tuned
realTimeKernelTuned, err := tuned.NewWorkerRealTimeKernel(
components.AssetsDir,
performanceProfile.Spec.NodeSelector,
performanceProfile.Spec.CPU.Isolated,
)
realTimeKernelTuned, err := tuned.NewWorkerRealTimeKernel(r.assetsDir, profle)
if err != nil {
return err
}

if err := controllerutil.SetControllerReference(performanceProfile, realTimeKernelTuned, r.scheme); err != nil {
if err := controllerutil.SetControllerReference(profle, realTimeKernelTuned, r.scheme); err != nil {
return err
}

Expand Down Expand Up @@ -323,12 +379,14 @@ func (r *ReconcilePerformanceProfile) verifyPerformanceProfileParameters(perform
return nil
}

func (r *ReconcilePerformanceProfile) deleteComponents() error {
if err := r.pauseMachineConfigPool(components.RoleWorkerPerformance, true); err != nil {
func (r *ReconcilePerformanceProfile) deleteComponents(profile *performancev1alpha1.PerformanceProfile) error {
name := components.GetComponentName(profile.Name, components.RoleWorkerPerformance)
if err := r.pauseMachineConfigPool(name, true); err != nil {
return err
}

if err := r.deleteTuned(components.ProfileNameWorkerRT, components.NamespaceNodeTuningOperator); err != nil {
tunedName := components.GetComponentName(profile.Name, components.ProfileNameWorkerRT)
if err := r.deleteTuned(tunedName, components.NamespaceNodeTuningOperator); err != nil {
return err
}

Expand All @@ -340,15 +398,15 @@ func (r *ReconcilePerformanceProfile) deleteComponents() error {
return err
}

if err := r.deleteKubeletConfig(components.RoleWorkerPerformance); err != nil {
if err := r.deleteKubeletConfig(name); err != nil {
return err
}

if err := r.deleteMachineConfig(components.RoleWorkerPerformance); err != nil {
if err := r.deleteMachineConfig(name); err != nil {
return err
}

return r.deleteMachineConfigPool(components.RoleWorkerPerformance)
return r.deleteMachineConfigPool(name)
}

func hasFinalizer(profile *performancev1alpha1.PerformanceProfile, finalizer string) bool {
Expand Down