diff --git a/cmd/deletiondefender/main.go b/cmd/deletiondefender/main.go index 5ddc5542b4..16ce3e3623 100644 --- a/cmd/deletiondefender/main.go +++ b/cmd/deletiondefender/main.go @@ -76,13 +76,14 @@ func main() { log.Fatal(err) } + opts := manager.Options{} + // WARNING: It is CRITICAL that we do not use a cache for the client for the deletion defender. + // Doing so could give us stale reads when checking the deletion timestamp of CRDs, negating + // the Kubernetes API Server's strong consistency guarantees. + nocache.TurnOffAllCaching(&opts) + // Create a new Manager to provide shared dependencies and start components - mgr, err := manager.New(cfg, manager.Options{ - // WARNING: It is CRITICAL that we do not use a cache for the client for the deletion defender. - // Doing so could give us stale reads when checking the deletion timestamp of CRDs, negating - // the Kubernetes API Server's strong consistency guarantees. - NewClient: nocache.NoCacheClientFunc, - }) + mgr, err := manager.New(cfg, opts) if err != nil { log.Fatal(err) } diff --git a/cmd/unmanageddetector/main.go b/cmd/unmanageddetector/main.go index 2c40006cf2..be38c279c1 100644 --- a/cmd/unmanageddetector/main.go +++ b/cmd/unmanageddetector/main.go @@ -73,14 +73,16 @@ func main() { logging.Fatal(err, "error getting config to talk to API server") } + opts := manager.Options{} + + // Disable cache to avoid stale reads (e.g. of pods, which we need do + // to determine if a controller pod exists for a namespace). + // TODO(jcanseco): Determine if disabling the cache for this controller + // is really necessary. Disable it for now to play it safe. + nocache.TurnOffAllCaching(&opts) + // Create a new Manager to provide shared dependencies and start components - mgr, err := manager.New(cfg, manager.Options{ - // Disable cache to avoid stale reads (e.g. of pods, which we need do - // to determine if a controller pod exists for a namespace). - // TODO(jcanseco): Determine if disabling the cache for this controller - // is really necessary. Disable it for now to play it safe. - NewClient: nocache.NoCacheClientFunc, - }) + mgr, err := manager.New(cfg, opts) if err != nil { logging.Fatal(err, "error creating the manager") } diff --git a/config/tests/samples/create/harness.go b/config/tests/samples/create/harness.go index a8fd4ac032..2d90ce110d 100644 --- a/config/tests/samples/create/harness.go +++ b/config/tests/samples/create/harness.go @@ -122,8 +122,8 @@ func NewHarness(ctx context.Context, t *testing.T) *Harness { // creating multiple managers for tests will fail if more than one // manager tries to bind to the same port. kccConfig.ManagerOptions.HealthProbeBindAddress = "0" - // supply a concrete client to disable the default behavior of caching - kccConfig.ManagerOptions.Cache.ByObject = nocache.ByCCandCCC + // configure caching + nocache.OnlyCacheCCAndCCC(&kccConfig.ManagerOptions) kccConfig.StateIntoSpecDefaultValue = k8s.StateIntoSpecDefaultValueV1Beta1 var webhooks []cnrmwebhook.Config diff --git a/operator/cmd/manager/main.go b/operator/cmd/manager/main.go index 47d6d02d8b..9953b54bfb 100644 --- a/operator/cmd/manager/main.go +++ b/operator/cmd/manager/main.go @@ -76,15 +76,17 @@ func main() { scheme := controllers.BuildScheme() - mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{ + opts := ctrl.Options{ Scheme: scheme, MetricsBindAddress: metricsAddr, LeaderElection: enableLeaderElection, Port: 9443, - // Disable the caching for the client. The cached reader will lazily list structured resources cross namespaces. - // The operator mostly only cares about resources in cnrm-system namespace. - NewClient: nocache.NoCacheClientFunc, - }) + } + // Disable the caching for the client. The cached reader will lazily list structured resources cross namespaces. + // The operator mostly only cares about resources in cnrm-system namespace. + nocache.TurnOffAllCaching(&opts) + + mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), opts) if err != nil { setupLog.Error(err, "unable to start manager") os.Exit(1) diff --git a/operator/pkg/test/main/testmain.go b/operator/pkg/test/main/testmain.go index 777f9f4870..8376805851 100644 --- a/operator/pkg/test/main/testmain.go +++ b/operator/pkg/test/main/testmain.go @@ -79,9 +79,7 @@ func StartTestEnv() (*rest.Config, func()) { func StartTestManager(cfg *rest.Config) (manager.Manager, func(), error) { scheme := controllers.BuildScheme() - mgr, err := manager.New(cfg, manager.Options{ - // Supply a concrete client to disable the default behavior of caching - NewClient: nocache.NoCacheClientFunc, + opts := manager.Options{ // Prevent manager from binding to a port to serve prometheus metrics // since creating multiple managers for tests will fail if more than // one manager tries to bind to the same port. @@ -91,7 +89,11 @@ func StartTestManager(cfg *rest.Config) (manager.Manager, func(), error) { // manager tries to bind to the same port. HealthProbeBindAddress: "0", Scheme: scheme, - }) + } + // Supply a concrete client to disable the default behavior of caching + nocache.TurnOffAllCaching(&opts) + + mgr, err := manager.New(cfg, opts) if err != nil { return nil, nil, fmt.Errorf("error creating manager: %w", err) } diff --git a/pkg/controller/kccmanager/kccmanager.go b/pkg/controller/kccmanager/kccmanager.go index 85d092aee4..c871240967 100644 --- a/pkg/controller/kccmanager/kccmanager.go +++ b/pkg/controller/kccmanager/kccmanager.go @@ -92,7 +92,7 @@ func New(ctx context.Context, restConfig *rest.Config, config Config) (manager.M } // only cache CC and CCC resources - opts.Cache.ByObject = nocache.ByCCandCCC + nocache.OnlyCacheCCAndCCC(&opts) mgr, err := manager.New(restConfig, opts) if err != nil { return nil, fmt.Errorf("error creating new manager: %w", err) diff --git a/pkg/controller/kccmanager/nocache/clientbuilder.go b/pkg/controller/kccmanager/nocache/clientbuilder.go index d109297637..f1cbbe1170 100644 --- a/pkg/controller/kccmanager/nocache/clientbuilder.go +++ b/pkg/controller/kccmanager/nocache/clientbuilder.go @@ -15,20 +15,60 @@ package nocache import ( - opv1beta1 "github.com/GoogleCloudPlatform/k8s-config-connector/operator/pkg/apis/core/v1beta1" - + iamv1beta1 "github.com/GoogleCloudPlatform/k8s-config-connector/pkg/apis/iam/v1beta1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/client-go/rest" - "sigs.k8s.io/controller-runtime/pkg/cache" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/manager" ) -var NoCacheClientFunc = func(config *rest.Config, options client.Options) (client.Client, error) { +var newClientOnlyCacheCCAndCCC = func(config *rest.Config, options client.Options) (client.Client, error) { + kind := func(gvk schema.GroupVersionKind) *unstructured.Unstructured { + u := &unstructured.Unstructured{} + u.SetGroupVersionKind(gvk) + return u + } + + // We cache some objects because we read them often: + // + // * CC and CCC: read reconciliation mode, default state-into-spec, etc + // * CRDs: Build our schema (TODO: should we just list and then restart if these change?) + // + // Everything else, we don't want to cache. + // Unstructured objects don't get cached, but we need an exclude list for some objects we read using typed clients. + + options.Cache.DisableFor = []client.Object{ + kind(schema.GroupVersionKind{Group: "", Version: "v1", Kind: "Namespace"}), + kind(schema.GroupVersionKind{Group: "", Version: "v1", Kind: "Secret"}), + kind(schema.GroupVersionKind{Group: "", Version: "v1", Kind: "ConfigMap"}), + &iamv1beta1.IAMAuditConfig{}, + &iamv1beta1.IAMPartialPolicy{}, + &iamv1beta1.IAMPolicy{}, + &iamv1beta1.IAMPolicyMember{}, + } + + // Don't cache unstructured objects (this is the default anyway) + options.Cache.Unstructured = false + + return client.New(config, options) +} + +// OnlyCacheCCAndCCC turns off caching for most objects, except for CC and CCC objects. +// We do this so that our memory usage should not grow with the size of objects in the cluster, +// only those we are actively reconciling. +func OnlyCacheCCAndCCC(mgr *manager.Options) { + mgr.NewClient = newClientOnlyCacheCCAndCCC +} + +var newClientCacheNothing = func(config *rest.Config, options client.Options) (client.Client, error) { options.Cache = nil return client.New(config, options) } -// Fine grained cache controls for ConfigConnector and ConfigConnectorContext. -var ByCCandCCC = map[client.Object]cache.ByObject{ - &opv1beta1.ConfigConnector{}: {}, - &opv1beta1.ConfigConnectorContext{}: {}, +// TurnOffAllCaching turns off caching for all objects (including CC and CCC objects). +// We do this so that our memory usage should not grow with the size of objects in the cluster, +// only those we are actively reconciling. +func TurnOffAllCaching(mgr *manager.Options) { + mgr.NewClient = newClientCacheNothing } diff --git a/pkg/test/controller/reconcile.go b/pkg/test/controller/reconcile.go index 2ef38098a4..cbabcfb419 100644 --- a/pkg/test/controller/reconcile.go +++ b/pkg/test/controller/reconcile.go @@ -64,15 +64,17 @@ func StartTestManagerInstance(env *envtest.Environment, testType test.Type, whCf } func startTestManager(env *envtest.Environment, testType test.Type, whCfgs []cnrmwebhook.Config) (manager.Manager, func(), error) { - opt := manager.Options{ + opts := manager.Options{ Port: env.WebhookInstallOptions.LocalServingPort, Host: env.WebhookInstallOptions.LocalServingHost, CertDir: env.WebhookInstallOptions.LocalServingCertDir, // Disable metrics server for testing MetricsBindAddress: "0", } - opt.Cache.ByObject = nocache.ByCCandCCC - mgr, err := manager.New(env.Config, opt) + // supply a concrete client to disable the default behavior of caching + nocache.OnlyCacheCCAndCCC(&opts) + + mgr, err := manager.New(env.Config, opts) if err != nil { return nil, nil, fmt.Errorf("error creating manager: %w", err) }