diff --git a/build/yaml/samples/nsx_v1alpha1_networkinfo.yaml b/build/yaml/samples/nsx_v1alpha1_networkinfo.yaml index 1368d2e6a..06752bcc6 100644 --- a/build/yaml/samples/nsx_v1alpha1_networkinfo.yaml +++ b/build/yaml/samples/nsx_v1alpha1_networkinfo.yaml @@ -2,8 +2,6 @@ apiVersion: crd.nsx.vmware.com/v1alpha1 kind: NetworkInfo metadata: creationTimestamp: "2024-05-14T02:14:18Z" - finalizers: - - networkinfo.crd.nsx.vmware.com/finalizer generation: 2 name: kube-system namespace: kube-system diff --git a/pkg/controllers/networkinfo/networkinfo_controller.go b/pkg/controllers/networkinfo/networkinfo_controller.go index d3f856362..277c8ece4 100644 --- a/pkg/controllers/networkinfo/networkinfo_controller.go +++ b/pkg/controllers/networkinfo/networkinfo_controller.go @@ -7,8 +7,10 @@ import ( "context" "errors" "fmt" + "time" corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" apimachineryruntime "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/sets" @@ -17,7 +19,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/builder" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller" - "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "github.com/vmware-tanzu/nsx-operator/pkg/apis/vpc/v1alpha1" "github.com/vmware-tanzu/nsx-operator/pkg/controllers/common" @@ -45,224 +46,194 @@ type NetworkInfoReconciler struct { } func (r *NetworkInfoReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - obj := &v1alpha1.NetworkInfo{} - log.Info("reconciling NetworkInfo CR", "NetworkInfo", req.NamespacedName) + startTime := time.Now() + defer func() { + log.Info("Finished reconciling NetworkInfo", "NetworkInfo", req.NamespacedName, "duration(ms)", time.Since(startTime).Milliseconds()) + }() metrics.CounterInc(r.Service.NSXConfig, metrics.ControllerSyncTotal, common.MetricResTypeNetworkInfo) - if err := r.Client.Get(ctx, req.NamespacedName, obj); err != nil { - log.Error(err, "unable to fetch NetworkInfo CR", "req", req.NamespacedName) - return common.ResultNormal, client.IgnoreNotFound(err) - } - if obj.ObjectMeta.DeletionTimestamp.IsZero() { - metrics.CounterInc(r.Service.NSXConfig, metrics.ControllerUpdateTotal, common.MetricResTypeNetworkInfo) - if !controllerutil.ContainsFinalizer(obj, commonservice.NetworkInfoFinalizerName) { - controllerutil.AddFinalizer(obj, commonservice.NetworkInfoFinalizerName) - if err := r.Client.Update(ctx, obj); err != nil { - log.Error(err, "add finalizer", "NetworkInfo", req.NamespacedName) - updateFail(r, ctx, obj, &err, r.Client, nil) + networkInfoCR := &v1alpha1.NetworkInfo{} + if err := r.Client.Get(ctx, req.NamespacedName, networkInfoCR); err != nil { + if apierrors.IsNotFound(err) { + if err := r.deleteStaleVPCs(ctx, req.Namespace, ""); err != nil { + log.Error(err, "Failed to delete stale NSX VPC", "NetworkInfo", req.NamespacedName) return common.ResultRequeue, err } - log.V(1).Info("added finalizer on NetworkInfo CR", "NetworkInfo", req.NamespacedName) - } - // TODO: - // 1. check whether the logic to get VPC network config can be replaced by GetVPCNetworkConfigByNamespace - // 2. sometimes the variable nc points to a VPCNetworkInfo, sometimes it's a VPCNetworkConfiguration, we need to distinguish between them. - ncName, err := r.Service.GetNetworkconfigNameFromNS(obj.Namespace) - if err != nil { - log.Error(err, "failed to get network config name for VPC when creating NSX VPC", "VPC", obj.Name) - updateFail(r, ctx, obj, &err, r.Client, nil) - return common.ResultRequeueAfter10sec, err - } - nc, _exist := r.Service.GetVPCNetworkConfig(ncName) - if !_exist { - message := fmt.Sprintf("failed to read network config %s when creating NSX VPC", ncName) - log.Info(message) - updateFail(r, ctx, obj, &err, r.Client, nil) - return common.ResultRequeueAfter10sec, errors.New(message) - } - log.Info("got network config from store", "NetworkConfig", ncName) - vpcNetworkConfiguration := &v1alpha1.VPCNetworkConfiguration{} - err = r.Client.Get(ctx, types.NamespacedName{Name: commonservice.SystemVPCNetworkConfigurationName}, vpcNetworkConfiguration) - if err != nil { - log.Error(err, "failed to get system VPCNetworkConfiguration") - updateFail(r, ctx, obj, &err, r.Client, nil) - return common.ResultRequeueAfter10sec, err - } - gatewayConnectionReady, _, err := getGatewayConnectionStatus(ctx, vpcNetworkConfiguration) - if err != nil { - log.Error(err, "failed to get the gateway connection status", "req", req.NamespacedName) - return common.ResultRequeueAfter10sec, err - } - - gatewayConnectionReason := "" - if !gatewayConnectionReady { - if ncName == commonservice.SystemVPCNetworkConfigurationName { - gatewayConnectionReady, gatewayConnectionReason, err = r.Service.ValidateGatewayConnectionStatus(&nc) - log.Info("got the gateway connection status", "gatewayConnectionReady", gatewayConnectionReady, "gatewayConnectionReason", gatewayConnectionReason) - if err != nil { - log.Error(err, "failed to validate the edge and gateway connection", "org", nc.Org, "project", nc.NSXProject) - updateFail(r, ctx, obj, &err, r.Client, nil) - return common.ResultRequeueAfter10sec, err - } - setVPCNetworkConfigurationStatusWithGatewayConnection(ctx, r.Client, vpcNetworkConfiguration, gatewayConnectionReady, gatewayConnectionReason) - } else { - log.Info("skipping reconciling the network info because the system gateway connection is not ready", "NetworkInfo", req.NamespacedName) - return common.ResultRequeueAfter60sec, nil - } - } - lbProvider := r.Service.GetLBProvider() - createdVpc, err := r.Service.CreateOrUpdateVPC(obj, &nc, lbProvider) - if err != nil { - log.Error(err, "create vpc failed, would retry exponentially", "VPC", req.NamespacedName) - updateFail(r, ctx, obj, &err, r.Client, nil) - return common.ResultRequeueAfter10sec, err + return common.ResultNormal, nil } + log.Error(err, "Unable to fetch NetworkInfo CR", "NetworkInfo", req.NamespacedName) + return common.ResultRequeue, err + } - var privateIPs []string - var vpcConnectivityProfilePath string - var nsxLBSPath string - isPreCreatedVPC := vpc.IsPreCreatedVPC(nc) - if isPreCreatedVPC { - privateIPs = createdVpc.PrivateIps - vpcConnectivityProfilePath = *createdVpc.VpcConnectivityProfile - // Retrieve NSX lbs path if Avi is not used with the pre-created VPC. - if createdVpc.LoadBalancerVpcEndpoint == nil || createdVpc.LoadBalancerVpcEndpoint.Enabled == nil || - !*createdVpc.LoadBalancerVpcEndpoint.Enabled { - nsxLBSPath, err = r.Service.GetLBSsFromNSXByVPC(*createdVpc.Path) - if err != nil { - log.Error(err, "failed to get NSX LBS path with pre-created VPC", "VPC", createdVpc.Path) - updateFail(r, ctx, obj, &err, r.Client, nil) - return common.ResultRequeueAfter10sec, err - } - } - } else { - privateIPs = nc.PrivateIPs - vpcConnectivityProfilePath = nc.VPCConnectivityProfile + // Check if the CR is marked for deletion + if !networkInfoCR.ObjectMeta.DeletionTimestamp.IsZero() { + metrics.CounterInc(r.Service.NSXConfig, metrics.ControllerDeleteTotal, common.MetricResTypeNetworkInfo) + if err := r.deleteStaleVPCs(ctx, networkInfoCR.GetNamespace(), string(networkInfoCR.UID)); err != nil { + deleteFail(r, ctx, networkInfoCR, &err, r.Client) + log.Error(err, "Failed to delete stale NSX VPC, retrying", "NetworkInfo", req.NamespacedName) + return common.ResultRequeue, err } + deleteSuccess(r, ctx, networkInfoCR) + return common.ResultNormal, nil + } + metrics.CounterInc(r.Service.NSXConfig, metrics.ControllerUpdateTotal, common.MetricResTypeNetworkInfo) + // TODO: + // 1. check whether the logic to get VPC network config can be replaced by GetVPCNetworkConfigByNamespace + // 2. sometimes the variable nc points to a VPCNetworkInfo, sometimes it's a VPCNetworkConfiguration, we need to distinguish between them. + ncName, err := r.Service.GetNetworkconfigNameFromNS(networkInfoCR.Namespace) + if err != nil { + log.Error(err, "Failed to get network config name for VPC when creating NSX VPC", "NetworkInfo", networkInfoCR.Name) + updateFail(r, ctx, networkInfoCR, &err, r.Client, nil) + return common.ResultRequeueAfter10sec, err + } + nc, _exist := r.Service.GetVPCNetworkConfig(ncName) + if !_exist { + message := fmt.Sprintf("Failed to read network config %s when creating NSX VPC", ncName) + log.Info(message) + updateFail(r, ctx, networkInfoCR, &err, r.Client, nil) + return common.ResultRequeueAfter10sec, errors.New(message) + } + log.Info("Fetched network config from store", "NetworkConfig", ncName) + vpcNetworkConfiguration := &v1alpha1.VPCNetworkConfiguration{} + err = r.Client.Get(ctx, types.NamespacedName{Name: commonservice.SystemVPCNetworkConfigurationName}, vpcNetworkConfiguration) + if err != nil { + log.Error(err, "Failed to get system VPCNetworkConfiguration") + updateFail(r, ctx, networkInfoCR, &err, r.Client, nil) + return common.ResultRequeueAfter10sec, err + } + gatewayConnectionReady, _, err := getGatewayConnectionStatus(ctx, vpcNetworkConfiguration) + if err != nil { + log.Error(err, "Failed to get the gateway connection status", "NetworkInfo", req.NamespacedName) + return common.ResultRequeueAfter10sec, err + } - snatIP, path, cidr := "", "", "" - - vpcConnectivityProfile, err := r.Service.GetVpcConnectivityProfile(&nc, vpcConnectivityProfilePath) - if err != nil { - log.Error(err, "get VpcConnectivityProfile failed, would retry exponentially", "VPC", req.NamespacedName) - updateFail(r, ctx, obj, &err, r.Client, nil) - return common.ResultRequeueAfter10sec, err - } - hasExternalIPs := true + gatewayConnectionReason := "" + if !gatewayConnectionReady { if ncName == commonservice.SystemVPCNetworkConfigurationName { - if len(vpcConnectivityProfile.ExternalIpBlocks) == 0 { - hasExternalIPs = false - log.Error(err, "there is no ExternalIPBlock in VPC ConnectivityProfile", "VPC", req.NamespacedName) - } - setVPCNetworkConfigurationStatusWithNoExternalIPBlock(ctx, r.Client, vpcNetworkConfiguration, hasExternalIPs) - } - // currently, auto snat is not exposed, and use default value True - // checking autosnat to support future extension in vpc configuration - autoSnatEnabled := r.Service.IsEnableAutoSNAT(vpcConnectivityProfile) - if autoSnatEnabled { - snatIP, err = r.Service.GetDefaultSNATIP(*createdVpc) + gatewayConnectionReady, gatewayConnectionReason, err = r.Service.ValidateGatewayConnectionStatus(&nc) + log.Info("got the gateway connection status", "gatewayConnectionReady", gatewayConnectionReady, "gatewayConnectionReason", gatewayConnectionReason) if err != nil { - log.Error(err, "failed to read default SNAT ip from VPC", "VPC", createdVpc.Id) - state := &v1alpha1.VPCState{ - Name: *createdVpc.DisplayName, - DefaultSNATIP: "", - LoadBalancerIPAddresses: "", - PrivateIPs: privateIPs, - } - updateFail(r, ctx, obj, &err, r.Client, state) + log.Error(err, "Failed to validate the edge and gateway connection", "Org", nc.Org, "Project", nc.NSXProject) + updateFail(r, ctx, networkInfoCR, &err, r.Client, nil) return common.ResultRequeueAfter10sec, err } + setVPCNetworkConfigurationStatusWithGatewayConnection(ctx, r.Client, vpcNetworkConfiguration, gatewayConnectionReady, gatewayConnectionReason) + } else { + log.Info("Skipping reconciliation due to unready system gateway connection", "NetworkInfo", req.NamespacedName) + return common.ResultRequeueAfter60sec, nil } - if ncName == commonservice.SystemVPCNetworkConfigurationName { - vpcNetworkConfiguration := &v1alpha1.VPCNetworkConfiguration{} - err := r.Client.Get(ctx, types.NamespacedName{Name: ncName}, vpcNetworkConfiguration) - if err != nil { - log.Error(err, "failed to get VPCNetworkConfiguration", "Name", ncName) - updateFail(r, ctx, obj, &err, r.Client, nil) - return common.ResultRequeueAfter10sec, err - } - log.Info("got the AutoSnat status", "autoSnatEnabled", autoSnatEnabled, "req", req.NamespacedName) - setVPCNetworkConfigurationStatusWithSnatEnabled(ctx, r.Client, vpcNetworkConfiguration, autoSnatEnabled) - } + } + lbProvider := r.Service.GetLBProvider() + createdVpc, err := r.Service.CreateOrUpdateVPC(networkInfoCR, &nc, lbProvider) + if err != nil { + log.Error(err, "Failed to create or update VPC", "NetworkInfo", req.NamespacedName) + updateFail(r, ctx, networkInfoCR, &err, r.Client, nil) + return common.ResultRequeueAfter10sec, err + } - // if lb vpc enabled, read avi subnet path and cidr - // nsx bug, if set LoadBalancerVpcEndpoint.Enabled to false, when read this vpc back, - // LoadBalancerVpcEndpoint.Enabled will become a nil pointer. - if lbProvider == vpc.AVILB && createdVpc.LoadBalancerVpcEndpoint != nil && createdVpc.LoadBalancerVpcEndpoint.Enabled != nil && *createdVpc.LoadBalancerVpcEndpoint.Enabled { - path, cidr, err = r.Service.GetAVISubnetInfo(*createdVpc) + var privateIPs []string + var vpcConnectivityProfilePath, nsxLBSPath string + isPreCreatedVPC := vpc.IsPreCreatedVPC(nc) + if isPreCreatedVPC { + privateIPs = createdVpc.PrivateIps + vpcConnectivityProfilePath = *createdVpc.VpcConnectivityProfile + // Retrieve NSX lbs path if Avi is not used with the pre-created VPC. + if createdVpc.LoadBalancerVpcEndpoint == nil || createdVpc.LoadBalancerVpcEndpoint.Enabled == nil || + !*createdVpc.LoadBalancerVpcEndpoint.Enabled { + nsxLBSPath, err = r.Service.GetLBSsFromNSXByVPC(*createdVpc.Path) if err != nil { - log.Error(err, "failed to read lb subnet path and cidr", "VPC", createdVpc.Id) - state := &v1alpha1.VPCState{ - Name: *createdVpc.DisplayName, - DefaultSNATIP: snatIP, - LoadBalancerIPAddresses: "", - PrivateIPs: privateIPs, - } - updateFail(r, ctx, obj, &err, r.Client, state) + log.Error(err, "Failed to get NSX LBS path with pre-created VPC", "VPC", createdVpc.Path) + updateFail(r, ctx, networkInfoCR, &err, r.Client, nil) return common.ResultRequeueAfter10sec, err } } + } else { + privateIPs = nc.PrivateIPs + vpcConnectivityProfilePath = nc.VPCConnectivityProfile + } - state := &v1alpha1.VPCState{ - Name: *createdVpc.DisplayName, - DefaultSNATIP: snatIP, - LoadBalancerIPAddresses: cidr, - PrivateIPs: privateIPs, - VPCPath: *createdVpc.Path, - } + snatIP, path, cidr := "", "", "" - if !isPreCreatedVPC { - nsxLBSPath = r.Service.GetDefaultNSXLBSPathByVPC(*createdVpc.Id) - } - // AKO needs to know the AVI subnet path created by NSX - setVPCNetworkConfigurationStatusWithLBS(ctx, r.Client, ncName, state.Name, path, nsxLBSPath, *createdVpc.Path) - updateSuccess(r, ctx, obj, r.Client, state, nc.Name, path) - if ncName == commonservice.SystemVPCNetworkConfigurationName && (!gatewayConnectionReady || !autoSnatEnabled || !hasExternalIPs) { - log.Info("requeuing the NetworkInfo CR because VPCNetworkConfiguration system is not ready", "gatewayConnectionReason", gatewayConnectionReason, "autoSnatEnabled", autoSnatEnabled, "hasExternalIPs", hasExternalIPs, "req", req) - return common.ResultRequeueAfter60sec, nil + vpcConnectivityProfile, err := r.Service.GetVpcConnectivityProfile(&nc, vpcConnectivityProfilePath) + if err != nil { + log.Error(err, "Failed to get VPC connectivity profile", "NetworkInfo", req.NamespacedName) + updateFail(r, ctx, networkInfoCR, &err, r.Client, nil) + return common.ResultRequeueAfter10sec, err + } + hasExternalIPs := true + if ncName == commonservice.SystemVPCNetworkConfigurationName { + if len(vpcConnectivityProfile.ExternalIpBlocks) == 0 { + hasExternalIPs = false + log.Error(err, "There is no ExternalIPBlock in VPC ConnectivityProfile", "NetworkInfo", req.NamespacedName) } - } else { - if controllerutil.ContainsFinalizer(obj, commonservice.NetworkInfoFinalizerName) { - metrics.CounterInc(r.Service.NSXConfig, metrics.ControllerDeleteTotal, common.MetricResTypeNetworkInfo) - isShared, err := r.Service.IsSharedVPCNamespaceByNS(obj.GetNamespace()) - if err != nil { - log.Error(err, "failed to check if namespace is shared", "Namespace", obj.GetNamespace()) - return common.ResultRequeue, err - } - vpcs := r.Service.GetVPCsByNamespace(obj.GetNamespace()) - // if nsx resource do not exist, continue to remove finalizer, or the crd can not be removed - if len(vpcs) == 0 { - // when nsx vpc not found in vpc store, skip deleting NSX VPC - log.Info("can not find VPC in store, skip deleting NSX VPC, remove finalizer from NetworkInfo CR") - } else if !isShared { - for _, vpc := range vpcs { - // first delete vpc and then ipblock or else it will fail arguing it is being referenced by other objects - if err := r.Service.DeleteVPC(*vpc.Path); err != nil { - log.Error(err, "failed to delete nsx VPC, would retry exponentially", "NetworkInfo", req.NamespacedName) - deleteFail(r, ctx, obj, &err, r.Client) - return common.ResultRequeueAfter10sec, err - } - } + setVPCNetworkConfigurationStatusWithNoExternalIPBlock(ctx, r.Client, vpcNetworkConfiguration, hasExternalIPs) + } + // currently, auto snat is not exposed, and use default value True + // checking autosnat to support future extension in VPC configuration + autoSnatEnabled := r.Service.IsEnableAutoSNAT(vpcConnectivityProfile) + if autoSnatEnabled { + snatIP, err = r.Service.GetDefaultSNATIP(*createdVpc) + if err != nil { + log.Error(err, "Failed to read default SNAT IP from VPC", "VPC", createdVpc.Id) + state := &v1alpha1.VPCState{ + Name: *createdVpc.DisplayName, + DefaultSNATIP: "", + LoadBalancerIPAddresses: "", + PrivateIPs: privateIPs, } + updateFail(r, ctx, networkInfoCR, &err, r.Client, state) + return common.ResultRequeueAfter10sec, err + } + } + if ncName == commonservice.SystemVPCNetworkConfigurationName { + vpcNetworkConfiguration := &v1alpha1.VPCNetworkConfiguration{} + err := r.Client.Get(ctx, types.NamespacedName{Name: ncName}, vpcNetworkConfiguration) + if err != nil { + log.Error(err, "Failed to get VPCNetworkConfiguration", "Name", ncName) + updateFail(r, ctx, networkInfoCR, &err, r.Client, nil) + return common.ResultRequeueAfter10sec, err + } + log.Info("Got the AutoSnat status", "autoSnatEnabled", autoSnatEnabled, "NetworkInfo", req.NamespacedName) + setVPCNetworkConfigurationStatusWithSnatEnabled(ctx, r.Client, vpcNetworkConfiguration, autoSnatEnabled) + } - controllerutil.RemoveFinalizer(obj, commonservice.NetworkInfoFinalizerName) - if err := r.Client.Update(ctx, obj); err != nil { - deleteFail(r, ctx, obj, &err, r.Client) - return common.ResultRequeue, err - } - ncName, err := r.Service.GetNetworkconfigNameFromNS(obj.Namespace) - if err != nil { - log.Error(err, "failed to get network config name for VPC when deleting NetworkInfo CR", "NetworkInfo", obj.Name) - return common.ResultRequeueAfter10sec, err + // if lb VPC enabled, read avi subnet path and cidr + // nsx bug, if set LoadBalancerVpcEndpoint.Enabled to false, when read this VPC back, + // LoadBalancerVpcEndpoint.Enabled will become a nil pointer. + if lbProvider == vpc.AVILB && createdVpc.LoadBalancerVpcEndpoint != nil && createdVpc.LoadBalancerVpcEndpoint.Enabled != nil && *createdVpc.LoadBalancerVpcEndpoint.Enabled { + path, cidr, err = r.Service.GetAVISubnetInfo(*createdVpc) + if err != nil { + log.Error(err, "Failed to read LB Subnet path and CIDR", "VPC", createdVpc.Id) + state := &v1alpha1.VPCState{ + Name: *createdVpc.DisplayName, + DefaultSNATIP: snatIP, + LoadBalancerIPAddresses: "", + PrivateIPs: privateIPs, } - log.V(1).Info("removed finalizer", "NetworkInfo", req.NamespacedName) - deleteVPCNetworkConfigurationStatus(ctx, r.Client, ncName, vpcs, r.Service.ListVPC()) - deleteSuccess(r, ctx, obj) - } else { - // only print a message because it's not a normal case - log.Info("finalizers cannot be recognized", "NetworkInfo", req.NamespacedName) + updateFail(r, ctx, networkInfoCR, &err, r.Client, state) + return common.ResultRequeueAfter10sec, err } } + + state := &v1alpha1.VPCState{ + Name: *createdVpc.DisplayName, + DefaultSNATIP: snatIP, + LoadBalancerIPAddresses: cidr, + PrivateIPs: privateIPs, + VPCPath: *createdVpc.Path, + } + + if !isPreCreatedVPC { + nsxLBSPath = r.Service.GetDefaultNSXLBSPathByVPC(*createdVpc.Id) + } + // AKO needs to know the AVI subnet path created by NSX + setVPCNetworkConfigurationStatusWithLBS(ctx, r.Client, ncName, state.Name, path, nsxLBSPath, *createdVpc.Path) + updateSuccess(r, ctx, networkInfoCR, r.Client, state, nc.Name, path) + if ncName == commonservice.SystemVPCNetworkConfigurationName && (!gatewayConnectionReady || !autoSnatEnabled || !hasExternalIPs) { + log.Info("Requeue NetworkInfo CR because VPCNetworkConfiguration system is not ready", "gatewayConnectionReason", gatewayConnectionReason, "autoSnatEnabled", autoSnatEnabled, "hasExternalIPs", hasExternalIPs, "req", req) + return common.ResultRequeueAfter60sec, nil + } + return common.ResultNormal, nil } @@ -274,10 +245,10 @@ func (r *NetworkInfoReconciler) setupWithManager(mgr ctrl.Manager) error { MaxConcurrentReconciles: common.NumReconcile(), }). Watches( - // For created/removed network config, add/remove from vpc network config cache, + // For created/removed network config, add/remove from VPC network config cache, // and update IPBlocksInfo. // For modified network config, currently only support appending ips to public ip blocks, - // update network config in cache and update nsx vpc object. + // update network config in cache and update nsx VPC object. &v1alpha1.VPCNetworkConfiguration{}, &VPCNetworkConfigurationHandler{ Client: mgr.GetClient(), @@ -297,45 +268,114 @@ func (r *NetworkInfoReconciler) Start(mgr ctrl.Manager) error { return nil } -// CollectGarbage logic for nsx-vpc is that: +func (r *NetworkInfoReconciler) listNamespaceCRsName(ctx context.Context) (sets.Set[string], sets.Set[string], error) { + // read all Namespaces from K8s + namespaces := &corev1.NamespaceList{} + err := r.Client.List(ctx, namespaces) + if err != nil { + return nil, nil, err + } + nsSet := sets.Set[string]{} + idSet := sets.Set[string]{} + for _, ns := range namespaces.Items { + nsSet.Insert(ns.Name) + idSet.Insert(string(ns.UID)) + } + return nsSet, idSet, nil +} + +// CollectGarbage logic for NSX VPC is that: // 1. list all current existing namespace in kubernetes -// 2. list all the nsx-vpc in vpcStore -// 3. loop all the nsx-vpc to get its namespace, check if the namespace still exist -// 4. if ns do not exist anymore, delete the nsx-vpc resource +// 2. list all the NSX VPC in vpcStore +// 3. loop all the NSX VPC to get its namespace, check if the namespace still exist +// 4. if ns do not exist anymore, delete the NSX VPC resource // it implements the interface GarbageCollector method. func (r *NetworkInfoReconciler) CollectGarbage(ctx context.Context) { - log.Info("VPC garbage collector started") - // read all nsx-vpc from vpc store + startTime := time.Now() + defer func() { + log.Info("VPC garbage collection completed", "time", time.Since(startTime)) + }() + // read all NSX VPC from VPC store nsxVPCList := r.Service.ListVPC() if len(nsxVPCList) == 0 { + log.Info("No NSX VPCs found in the store, skipping garbage collection") return } - // read all namespaces from k8s - namespaces := &corev1.NamespaceList{} - err := r.Client.List(ctx, namespaces) + nsSet, idSet, err := r.listNamespaceCRsName(ctx) if err != nil { - log.Error(err, "failed to list k8s namespaces") + log.Error(err, "Failed to list Kubernetes Namespaces") return } - nsSet := sets.NewString() - for _, ns := range namespaces.Items { - nsSet.Insert(ns.Name) - } - for i := len(nsxVPCList) - 1; i >= 0; i-- { - nsxVPCNamespace := getNamespaceFromNSXVPC(&nsxVPCList[i]) - if nsSet.Has(nsxVPCNamespace) { + for i, nsxVPC := range nsxVPCList { + nsxVPCNamespaceName := filterTagFromNSXVPC(&nsxVPCList[i], commonservice.TagScopeNamespace) + nsxVPCNamespaceID := filterTagFromNSXVPC(&nsxVPCList[i], commonservice.TagScopeNamespaceUID) + if nsSet.Has(nsxVPCNamespaceName) && idSet.Has(nsxVPCNamespaceID) { continue } - elem := nsxVPCList[i] - log.Info("GC collected nsx VPC object", "ID", elem.Id, "Namespace", nsxVPCNamespace) + log.Info("Garbage collecting NSX VPC object", "VPC", nsxVPC.Id, "Namespace", nsxVPCNamespaceName) metrics.CounterInc(r.Service.NSXConfig, metrics.ControllerDeleteTotal, common.MetricResTypeNetworkInfo) - err = r.Service.DeleteVPC(*elem.Path) - if err != nil { + + if err = r.Service.DeleteVPC(*nsxVPC.Path); err != nil { + log.Error(err, "Failed to delete NSX VPC", "VPC", nsxVPC.Id, "Namespace", nsxVPCNamespaceName) metrics.CounterInc(r.Service.NSXConfig, metrics.ControllerDeleteFailTotal, common.MetricResTypeNetworkInfo) - } else { - metrics.CounterInc(r.Service.NSXConfig, metrics.ControllerDeleteSuccessTotal, common.MetricResTypeNetworkInfo) + continue } + + metrics.CounterInc(r.Service.NSXConfig, metrics.ControllerDeleteSuccessTotal, common.MetricResTypeNetworkInfo) + log.Info("Successfully deleted NSX VPC", "VPC", nsxVPC.Id) + } +} + +func (r *NetworkInfoReconciler) deleteStaleVPCs(ctx context.Context, ns, id string) error { + isShared, err := r.Service.IsSharedVPCNamespaceByNS(ns) + if err != nil { + return fmt.Errorf("failed to check if Namespace is shared for NS %s: %w", ns, err) + } + if isShared { + log.Info("Shared Namespace, skipping deletion of NSX VPC", "Namespace", ns) + return nil } + nsSet, idSet, err := r.listNamespaceCRsName(ctx) + if err != nil { + log.Error(err, "Failed to list Kubernetes Namespaces") + return fmt.Errorf("failed to list Kubernetes Namespaces while deleting VPCs: %v", err) + } + // Retrieve stale VPCs associated with the Namespace + staleVPCs := r.Service.GetVPCsByNamespace(ns) + if len(staleVPCs) == 0 { + log.Info("There is no VPCs found in store, skipping deletion of NSX VPC", "Namespace", ns) + return nil + } + var deleteErrs []error + for _, nsxVPC := range staleVPCs { + namespaceIDofVPC := filterTagFromNSXVPC(nsxVPC, commonservice.TagScopeNamespaceUID) + namespaceNameofVPC := filterTagFromNSXVPC(nsxVPC, commonservice.TagScopeNamespace) + // if delete with Name, the id is "", `id != namespaceIDofVPC` will always be true + // if delete with Name and id, namespaceNameofVPC and namespaceIDofVPC will in K8s, if id == namespaceIDofVPC, we should delete it, otherwise skip deleting it + if nsSet.Has(namespaceNameofVPC) && idSet.Has(namespaceIDofVPC) && id != namespaceIDofVPC { + log.Info("Namespace still exists in Kubernetes, skipping deletion of NSX VPC", "Namespace", ns) + continue + } + if nsxVPC.Path == nil { + log.Error(nil, "VPC path is nil, skipping", "VPC", nsxVPC) + continue + } + if err := r.Service.DeleteVPC(*nsxVPC.Path); err != nil { + log.Error(err, "Failed to delete VPC in NSX", "VPC", nsxVPC.Path) + deleteErrs = append(deleteErrs, fmt.Errorf("failed to delete VPC %s: %w", *nsxVPC.Path, err)) + } + } + if len(deleteErrs) > 0 { + return fmt.Errorf("multiple errors occurred while deleting VPCs: %v", deleteErrs) + } + + // Update the VPCNetworkConfiguration Status + ncName, err := r.Service.GetNetworkconfigNameFromNS(ns) + if err != nil { + return fmt.Errorf("failed to get VPCNetworkConfiguration for Namespace when deleting stale VPCs %s: %w", ns, err) + } + deleteVPCNetworkConfigurationStatus(ctx, r.Client, ncName, staleVPCs, r.Service.ListVPC()) + return nil } diff --git a/pkg/controllers/networkinfo/networkinfo_controller_test.go b/pkg/controllers/networkinfo/networkinfo_controller_test.go index f89cb51c5..0e5463426 100644 --- a/pkg/controllers/networkinfo/networkinfo_controller_test.go +++ b/pkg/controllers/networkinfo/networkinfo_controller_test.go @@ -5,11 +5,14 @@ package networkinfo import ( "context" + "errors" + "fmt" "reflect" "testing" "github.com/agiledragon/gomonkey" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "github.com/vmware/vsphere-automation-sdk-go/services/nsxt/model" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" @@ -21,7 +24,6 @@ import ( "github.com/vmware-tanzu/nsx-operator/pkg/config" "github.com/vmware-tanzu/nsx-operator/pkg/controllers/common" "github.com/vmware-tanzu/nsx-operator/pkg/nsx" - servicecommon "github.com/vmware-tanzu/nsx-operator/pkg/nsx/services/common" "github.com/vmware-tanzu/nsx-operator/pkg/nsx/services/vpc" ) @@ -93,11 +95,28 @@ func TestNetworkInfoReconciler_Reconcile(t *testing.T) { wantErr bool }{ { - name: "Empty", - prepareFunc: nil, - args: requestArgs, - want: common.ResultNormal, - wantErr: false, + name: "Empty", + prepareFunc: func(t *testing.T, r *NetworkInfoReconciler, ctx context.Context) (patches *gomonkey.Patches) { + patches = gomonkey.ApplyMethod(reflect.TypeOf(r.Service), "IsSharedVPCNamespaceByNS", func(_ *vpc.VPCService, _ string) (bool, error) { + return false, nil + }) + patches.ApplyMethod(reflect.TypeOf(r.Service), "GetVPCsByNamespace", func(_ *vpc.VPCService, _ string) []*model.Vpc { + return nil + }) + patches.ApplyMethod(reflect.TypeOf(r.Service), "GetNetworkconfigNameFromNS", func(_ *vpc.VPCService, _ string) (string, error) { + return "", nil + }) + patches.ApplyMethod(reflect.TypeOf(r.Service), "ListVPC", func(_ *vpc.VPCService) []model.Vpc { + return nil + }) + patches.ApplyFunc(deleteVPCNetworkConfigurationStatus, func(ctx context.Context, client client.Client, ncName string, staleVPCs []*model.Vpc, aliveVPCs []model.Vpc) { + return + }) + return patches + }, + args: requestArgs, + want: common.ResultNormal, + wantErr: false, }, { name: "GatewayConnectionReadyInSystemVPC", @@ -722,3 +741,123 @@ func TestNetworkInfoReconciler_Reconcile(t *testing.T) { }) } } + +func TestNetworkInfoReconciler_deleteStaleVPCs(t *testing.T) { + r := createNetworkInfoReconciler() + + ctx := context.TODO() + namespace := "test-ns" + + t.Run("shared namespace, skip deletion", func(t *testing.T) { + patches := gomonkey.ApplyMethod(reflect.TypeOf(r.Service), "IsSharedVPCNamespaceByNS", func(_ *vpc.VPCService, _ string) (bool, error) { + return true, nil + }) + defer patches.Reset() + + err := r.deleteStaleVPCs(ctx, namespace, "") + require.NoError(t, err) + }) + + t.Run("non-shared namespace, no VPCs found", func(t *testing.T) { + patches := gomonkey.ApplyMethod(reflect.TypeOf(r.Service), "IsSharedVPCNamespaceByNS", func(_ *vpc.VPCService, _ string) (bool, error) { + return false, nil + }) + patches.ApplyMethod(reflect.TypeOf(r.Service), "GetVPCsByNamespace", func(_ *vpc.VPCService, _ string) []*model.Vpc { + return nil + }) + defer patches.Reset() + + err := r.deleteStaleVPCs(ctx, namespace, "") + require.NoError(t, err) + }) + + t.Run("failed to delete VPC", func(t *testing.T) { + patches := gomonkey.ApplyMethod(reflect.TypeOf(r.Service), "IsSharedVPCNamespaceByNS", func(_ *vpc.VPCService, _ string) (bool, error) { + return false, nil + }) + patches.ApplyMethod(reflect.TypeOf(r.Service), "GetVPCsByNamespace", func(_ *vpc.VPCService, _ string) []*model.Vpc { + vpcPath := "/vpc/1" + return []*model.Vpc{{Path: &vpcPath}} + }) + patches.ApplyMethod(reflect.TypeOf(r.Service), "DeleteVPC", func(_ *vpc.VPCService, _ string) error { + return fmt.Errorf("delete failed") + }) + defer patches.Reset() + + err := r.deleteStaleVPCs(ctx, namespace, "") + assert.Error(t, err) + assert.Contains(t, err.Error(), "delete failed") + }) + + t.Run("successful deletion of VPCs", func(t *testing.T) { + patches := gomonkey.ApplyMethod(reflect.TypeOf(r.Service), "IsSharedVPCNamespaceByNS", func(_ *vpc.VPCService, _ string) (bool, error) { + return false, nil + }) + patches.ApplyMethod(reflect.TypeOf(r.Service), "GetVPCsByNamespace", func(_ *vpc.VPCService, _ string) []*model.Vpc { + vpcPath1 := "/vpc/1" + vpcPath2 := "/vpc/2" + return []*model.Vpc{{Path: &vpcPath1}, {Path: &vpcPath2}} + }) + patches.ApplyMethod(reflect.TypeOf(r.Service), "DeleteVPC", func(_ *vpc.VPCService, _ string) error { + return nil + }) + patches.ApplyMethod(reflect.TypeOf(r.Service), "ListVPC", func(_ *vpc.VPCService) []model.Vpc { + return nil + }) + patches.ApplyMethod(reflect.TypeOf(r.Service), "GetNetworkconfigNameFromNS", func(_ *vpc.VPCService, _ string) (string, error) { + return "", nil + }) + patches.ApplyFunc(deleteVPCNetworkConfigurationStatus, func(ctx context.Context, client client.Client, ncName string, staleVPCs []*model.Vpc, aliveVPCs []model.Vpc) { + return + }) + defer patches.Reset() + + err := r.deleteStaleVPCs(ctx, namespace, "") + require.NoError(t, err) + }) +} + +func TestNetworkInfoReconciler_CollectGarbage(t *testing.T) { + r := createNetworkInfoReconciler() + + ctx := context.TODO() + + t.Run("no VPCs found in the store", func(t *testing.T) { + patches := gomonkey.ApplyMethod(reflect.TypeOf(r.Service), "ListVPC", func(_ *vpc.VPCService) []model.Vpc { + return nil + }) + defer patches.Reset() + + r.CollectGarbage(ctx) + // No errors expected + }) + + t.Run("successful garbage collection", func(t *testing.T) { + patches := gomonkey.ApplyMethod(reflect.TypeOf(r.Service), "ListVPC", func(_ *vpc.VPCService) []model.Vpc { + vpcPath1 := "/vpc/1" + vpcPath2 := "/vpc/2" + return []model.Vpc{{Path: &vpcPath1}, {Path: &vpcPath2}} + }) + patches.ApplyMethod(reflect.TypeOf(r.Service), "DeleteVPC", func(_ *vpc.VPCService, _ string) error { + return nil + }) + defer patches.Reset() + + r.CollectGarbage(ctx) + }) + + t.Run("failed to delete VPC", func(t *testing.T) { + patches := gomonkey.ApplyMethod(reflect.TypeOf(r.Service), "ListVPC", func(_ *vpc.VPCService) []model.Vpc { + vpcPath1 := "/vpc/1" + vpcPath2 := "/vpc/2" + return []model.Vpc{{Path: &vpcPath1}, {Path: &vpcPath2}} + }) + patches.ApplyMethod(reflect.TypeOf(r.Service), "DeleteVPC", func(_ *vpc.VPCService, _ string) error { + return errors.New("deletion error") + }) + defer patches.Reset() + + r.CollectGarbage(ctx) + }) + +} diff --git a/pkg/controllers/networkinfo/networkinfo_utils.go b/pkg/controllers/networkinfo/networkinfo_utils.go index 2a033ed3e..4933f657f 100644 --- a/pkg/controllers/networkinfo/networkinfo_utils.go +++ b/pkg/controllers/networkinfo/networkinfo_utils.go @@ -237,10 +237,10 @@ func deleteVPCNetworkConfigurationStatus(ctx context.Context, client client.Clie log.Info("Deleted stale VPCNetworkConfiguration status", "Name", ncName, "nc.Status.VPCs", nc.Status.VPCs, "staleVPCs", staleVPCNames) } -func getNamespaceFromNSXVPC(nsxVPC *model.Vpc) string { +func filterTagFromNSXVPC(nsxVPC *model.Vpc, tagName string) string { tags := nsxVPC.Tags for _, tag := range tags { - if *tag.Scope == svccommon.TagScopeNamespace { + if *tag.Scope == tagName { return *tag.Tag } } diff --git a/pkg/nsx/services/common/types.go b/pkg/nsx/services/common/types.go index 02a559e67..13971d30f 100644 --- a/pkg/nsx/services/common/types.go +++ b/pkg/nsx/services/common/types.go @@ -106,7 +106,6 @@ const ( T1SecurityPolicyFinalizerName = "securitypolicy.nsx.vmware.com/finalizer" StaticRouteFinalizerName = "staticroute.crd.nsx.vmware.com/finalizer" SubnetPortFinalizerName = "subnetport.crd.nsx.vmware.com/finalizer" - NetworkInfoFinalizerName = "networkinfo.crd.nsx.vmware.com/finalizer" PodFinalizerName = "pod.crd.nsx.vmware.com/finalizer" IPPoolFinalizerName = "ippool.crd.nsx.vmware.com/finalizer" IPAddressAllocationFinalizerName = "ipaddressallocation.crd.nsx.vmware.com/finalizer" diff --git a/pkg/nsx/services/vpc/vpc.go b/pkg/nsx/services/vpc/vpc.go index ed525790f..0495fe499 100644 --- a/pkg/nsx/services/vpc/vpc.go +++ b/pkg/nsx/services/vpc/vpc.go @@ -698,13 +698,13 @@ func (s *VPCService) IsLBProviderChanged(existingVPC *model.Vpc, lbProvider LBPr return false } func (s *VPCService) CreateOrUpdateVPC(obj *v1alpha1.NetworkInfo, nc *common.VPCNetworkConfigInfo, lbProvider LBProvider) (*model.Vpc, error) { - // check from VPC store if vpc already exist + // check from VPC store if VPC already exist ns := obj.Namespace updateVpc := false nsObj := &v1.Namespace{} - // get name obj + // get Namespace if err := s.Client.Get(ctx, types.NamespacedName{Name: obj.Namespace}, nsObj); err != nil { - log.Error(err, "unable to fetch namespace", "name", obj.Namespace) + log.Error(err, "unable to fetch Namespace", "Name", obj.Namespace) return nil, err } diff --git a/pkg/util/utils.go b/pkg/util/utils.go index d1eebde26..8d1e06b4e 100644 --- a/pkg/util/utils.go +++ b/pkg/util/utils.go @@ -30,9 +30,7 @@ import ( ) const ( - wcpSystemResource = "vmware-system-shared-t1" - SubnetTypeSubnet = "subnet" - SubnetTypeSubnetSet = "subnetset" + wcpSystemResource = "vmware-system-shared-t1" ) var ( @@ -312,24 +310,6 @@ func If(condition bool, trueVal, falseVal interface{}) interface{} { } } -func GetMapValues(in interface{}) []string { - if in == nil { - return make([]string, 0) - } - switch in.(type) { - case map[string]string: - ssMap := in.(map[string]string) - values := make([]string, 0, len(ssMap)) - for _, v := range ssMap { - values = append(values, v) - } - return values - default: - log.Info("Unsupported map format") - return nil - } -} - // the changes map contains key/value map that you want to change. // if giving empty value for a key in changes map like: "mykey":"", that means removing this annotation from k8s resource func UpdateK8sResourceAnnotation(client client.Client, ctx context.Context, k8sObj client.Object, changes map[string]string) error { @@ -423,10 +403,6 @@ func GenerateTruncName(limit int, resName string, prefix, suffix, project, clust return generateDisplayName(common.ConnectorUnderline, resName, prefix, suffix, project, cluster) } -func CombineNamespaceName(name, namespace string) string { - return fmt.Sprintf("%s/%s", namespace, name) -} - func BuildBasicTags(cluster string, obj interface{}, namespaceID types.UID) []model.Tag { tags := []model.Tag{ {