-
Notifications
You must be signed in to change notification settings - Fork 259
fix: fixing Stateless CNI delete in SwiftV2 scenario #3967
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,7 +18,6 @@ import ( | |
| "github.com/Azure/azure-container-networking/cni/util" | ||
| "github.com/Azure/azure-container-networking/cns" | ||
| cnscli "github.com/Azure/azure-container-networking/cns/client" | ||
| "github.com/Azure/azure-container-networking/cns/fsnotify" | ||
| "github.com/Azure/azure-container-networking/common" | ||
| "github.com/Azure/azure-container-networking/dhcp" | ||
| "github.com/Azure/azure-container-networking/iptables" | ||
|
|
@@ -716,7 +715,7 @@ func (plugin *NetPlugin) createEpInfo(opt *createEpInfoOpt) (*network.EndpointIn | |
| *opt.infraSeen = true | ||
| } else { | ||
| ifName = "eth" + strconv.Itoa(opt.endpointIndex) | ||
| endpointID = plugin.nm.GetEndpointID(opt.args.ContainerID, ifName) | ||
| endpointID = plugin.nm.GetEndpointIDByNicType(opt.args.ContainerID, ifName, opt.ifInfo.NICType) | ||
| } | ||
|
|
||
| endpointInfo := network.EndpointInfo{ | ||
|
|
@@ -1069,32 +1068,45 @@ func (plugin *NetPlugin) Delete(args *cniSkel.CmdArgs) error { | |
| if plugin.nm.IsStatelessCNIMode() { | ||
| // network ID is passed in and used only for migration | ||
| // otherwise, in stateless, we don't need the network id for deletion | ||
| epInfos, err = plugin.nm.GetEndpointState(networkID, args.ContainerID) | ||
| // if stateless CNI fail to get the endpoint from CNS for any reason other than Endpoint Not found | ||
| epInfos, err = plugin.nm.GetEndpointState(networkID, args.ContainerID, args.Netns) | ||
| // if stateless CNI fail to get the endpoint from CNS for any reason other than Endpoint Not found or CNS connection failure | ||
| // return a retriable error so the container runtime will retry this DEL later | ||
| // the implementation of this function returns nil if the endpoint doesn't exist, so | ||
| // we don't have to check that here | ||
| if err != nil { | ||
| if errors.Is(err, network.ErrConnectionFailure) { | ||
| logger.Info("failed to connect to CNS", zap.String("containerID", args.ContainerID), zap.Error(err)) | ||
| addErr := fsnotify.AddFile(args.ContainerID, args.ContainerID, watcherPath) | ||
| logger.Info("add containerid file for Asynch delete", zap.String("containerID", args.ContainerID), zap.Error(addErr)) | ||
| if addErr != nil { | ||
| logger.Error("failed to add file to watcher", zap.String("containerID", args.ContainerID), zap.Error(addErr)) | ||
| return errors.Wrap(addErr, fmt.Sprintf("failed to add file to watcher with containerID %s", args.ContainerID)) | ||
| switch { | ||
| case errors.Is(err, network.ErrConnectionFailure): | ||
| logger.Error("Failed to connect to CNS", zap.Error(err)) | ||
| logger.Info("Endpoint will be deleted from state file asynchronously", zap.String("containerID", args.ContainerID)) | ||
| // In SwiftV2 Linux stateless CNI mode, if the plugin cannot connect to CNS, | ||
| // we asynchronously remove the secondary (delegated) interface from the pod’s network namespace in the absence of the endpoint state. | ||
| // This is necessary because leaving the delegated NIC in the pod netns can cause the kernel to block rtnetlink operations. | ||
| // When that happens, kubelet and containerd hang during sandbox creation or teardown. | ||
| // The delegated NIC (SR-IOV VF) used by SwiftV2 for multitenant pods remains tied to the pod namespace, | ||
| // triggering hot-unplug/re-register events and leaving the node in an unhealthy state. | ||
| // This workaround mitigates the issue by removing the secondary NIC from the pod netns when CNS is unreachable during DEL to provide the endpoint state. | ||
| if err = plugin.nm.RemoveSecondaryEndpointFromPodNetNS(args.IfName, args.Netns); err != nil { | ||
| logger.Error("Failed to remove secondary endpoint from pod netns", zap.String("netns", args.Netns), zap.Error(err)) | ||
| return plugin.RetriableError(fmt.Errorf("failed to remove secondary endpoint from pod netns: %w", err)) | ||
| } | ||
| return nil | ||
| } | ||
| if errors.Is(err, network.ErrEndpointStateNotFound) { | ||
| case errors.Is(err, network.ErrEndpointStateNotFound): | ||
| logger.Info("Endpoint Not found", zap.String("containerID", args.ContainerID), zap.Error(err)) | ||
| return nil | ||
| default: | ||
| logger.Error("Get Endpoint State API returned error", zap.String("containerID", args.ContainerID), zap.Error(err)) | ||
| return plugin.RetriableError(fmt.Errorf("failed to delete endpoint: %w", err)) | ||
| } | ||
| } else { | ||
| for _, epInfo := range epInfos { | ||
| logger.Info("Found endpoint to delete", zap.String("IfName", epInfo.IfName), zap.String("EndpointID", epInfo.EndpointID), zap.Any("NICType", epInfo.NICType)) | ||
| } | ||
| logger.Error("Get Endpoint State API returned error", zap.String("containerID", args.ContainerID), zap.Error(err)) | ||
| return plugin.RetriableError(fmt.Errorf("failed to delete endpoint: %w", err)) | ||
| } | ||
| } else { | ||
| epInfos = plugin.nm.GetEndpointInfosFromContainerID(args.ContainerID) | ||
| } | ||
|
|
||
| // for when the endpoint is not created, but the ips are already allocated (only works if single network, single infra) | ||
| // this block is not applied to stateless CNI | ||
| // for Stateful CNI when the endpoint is not created, but the ips are already allocated (only works if single network, single infra) | ||
| // this block is applied to stateless CNI only if there was a connection failure in previous block and asynchronous delete by CNS will remover the endpoint from state file | ||
|
||
| if len(epInfos) == 0 { | ||
| endpointID := plugin.nm.GetEndpointID(args.ContainerID, args.IfName) | ||
| if !nwCfg.MultiTenancy { | ||
|
|
@@ -1120,7 +1132,7 @@ func (plugin *NetPlugin) Delete(args *cniSkel.CmdArgs) error { | |
| if err = plugin.nm.DeleteEndpoint(epInfo.NetworkID, epInfo.EndpointID, epInfo); err != nil { | ||
| // An error will not be returned if the endpoint is not found | ||
| // return a retriable error so the container runtime will retry this DEL later | ||
| // the implementation of this function returns nil if the endpoint doens't exist, so | ||
| // the implementation of this function returns nil if the endpoint doesn't exist, so | ||
behzad-mir marked this conversation as resolved.
Show resolved
Hide resolved
behzad-mir marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| // we don't have to check that here | ||
| return plugin.RetriableError(fmt.Errorf("failed to delete endpoint: %w", err)) | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -116,11 +116,13 @@ type NetworkManager interface { | |
| UpdateEndpoint(networkID string, existingEpInfo *EndpointInfo, targetEpInfo *EndpointInfo) error | ||
| GetNumberOfEndpoints(ifName string, networkID string) int | ||
| GetEndpointID(containerID, ifName string) string | ||
| GetEndpointIDByNicType(containerID, ifName string, nicType cns.NICType) string | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: GetEndpointIDByNICType as nic is an acronym |
||
| IsStatelessCNIMode() bool | ||
| SaveState(eps []*endpoint) error | ||
| DeleteState(epInfos []*EndpointInfo) error | ||
| GetEndpointInfosFromContainerID(containerID string) []*EndpointInfo | ||
| GetEndpointState(networkID, containerID string) ([]*EndpointInfo, error) | ||
| GetEndpointState(networkID, containerID, netns string) ([]*EndpointInfo, error) | ||
| RemoveSecondaryEndpointFromPodNetNS(ifName string, netns string) error | ||
| } | ||
|
|
||
| // Creates a new network manager. | ||
|
|
@@ -455,7 +457,7 @@ func validateUpdateEndpointState(endpointID string, ifNameToIPInfoMap map[string | |
| // GetEndpointState will make a call to CNS GetEndpointState API in the stateless CNI mode to fetch the endpointInfo | ||
| // TODO unit tests need to be added, WorkItem: 26606939 | ||
| // In stateless cni, container id is the endpoint id, so you can pass in either | ||
| func (nm *networkManager) GetEndpointState(networkID, containerID string) ([]*EndpointInfo, error) { | ||
| func (nm *networkManager) GetEndpointState(networkID, containerID, netns string) ([]*EndpointInfo, error) { | ||
| endpointResponse, err := nm.CnsClient.GetEndpoint(context.TODO(), containerID) | ||
| if err != nil { | ||
| if endpointResponse.Response.ReturnCode == types.NotFound { | ||
|
|
@@ -466,7 +468,7 @@ func (nm *networkManager) GetEndpointState(networkID, containerID string) ([]*En | |
| } | ||
| return nil, ErrGetEndpointStateFailure | ||
| } | ||
| epInfos := cnsEndpointInfotoCNIEpInfos(endpointResponse.EndpointInfo, containerID) | ||
| epInfos := cnsEndpointInfotoCNIEpInfos(endpointResponse.EndpointInfo, containerID, netns) | ||
|
|
||
| for i := 0; i < len(epInfos); i++ { | ||
| if epInfos[i].NICType == cns.InfraNIC { | ||
|
|
@@ -514,7 +516,7 @@ func (nm *networkManager) DeleteEndpointState(networkID string, epInfo *Endpoint | |
| nw := &network{ | ||
| Id: networkID, // currently unused in stateless cni | ||
| HnsId: epInfo.HNSNetworkID, | ||
behzad-mir marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| Mode: opModeTransparentVlan, | ||
| Mode: opModeTransparent, | ||
behzad-mir marked this conversation as resolved.
Show resolved
Hide resolved
behzad-mir marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| SnatBridgeIP: "", | ||
| NetNs: dummyGUID, // to trigger hns v2, windows | ||
| extIf: &externalInterface{ | ||
|
|
@@ -529,6 +531,7 @@ func (nm *networkManager) DeleteEndpointState(networkID string, epInfo *Endpoint | |
| HNSNetworkID: epInfo.HNSNetworkID, // unused (we use nw.HnsId for deleting the network) | ||
| HostIfName: epInfo.HostIfName, | ||
| LocalIP: "", | ||
behzad-mir marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| IPAddresses: epInfo.IPAddresses, | ||
| VlanID: 0, | ||
| AllowInboundFromHostToNC: false, // stateless currently does not support apipa | ||
| AllowInboundFromNCToHost: false, | ||
|
|
@@ -537,11 +540,12 @@ func (nm *networkManager) DeleteEndpointState(networkID string, epInfo *Endpoint | |
| NetworkContainerID: epInfo.NetworkContainerID, // we don't use this as long as AllowInboundFromHostToNC and AllowInboundFromNCToHost are false | ||
| NetNs: dummyGUID, // to trigger hnsv2, windows | ||
| NICType: epInfo.NICType, | ||
| NetworkNameSpace: epInfo.NetNsPath, | ||
| IfName: epInfo.IfName, // TODO: For stateless cni linux populate IfName here to use in deletion in secondary endpoint client | ||
behzad-mir marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } | ||
| logger.Info("Deleting endpoint with", zap.String("Endpoint Info: ", epInfo.PrettyString()), zap.String("HNISID : ", ep.HnsId)) | ||
|
|
||
| err := nw.deleteEndpointImpl(netlink.NewNetlink(), platform.NewExecClient(logger), nil, nil, nil, nil, nil, ep) | ||
| err := nw.deleteEndpointImpl(nm.netlink, nm.plClient, nil, nm.netio, nm.nsClient, nm.iptablesClient, nm.dhcpClient, ep) | ||
| if err != nil { | ||
| return err | ||
| } | ||
|
|
@@ -562,7 +566,7 @@ func (nm *networkManager) GetEndpointInfo(networkID, endpointID string) (*Endpoi | |
|
|
||
| if nm.IsStatelessCNIMode() { | ||
| logger.Info("calling cns getEndpoint API") | ||
| epInfos, err := nm.GetEndpointState(networkID, endpointID) | ||
| epInfos, err := nm.GetEndpointState(networkID, endpointID, "") | ||
| if err != nil { | ||
| return nil, err | ||
| } | ||
|
|
@@ -745,6 +749,16 @@ func (nm *networkManager) GetEndpointID(containerID, ifName string) string { | |
| return containerID + "-" + ifName | ||
| } | ||
|
|
||
| // GetEndpointIDByNicType returns a unique endpoint ID based on the CNI mode and NIC type. | ||
| func (nm *networkManager) GetEndpointIDByNicType(containerID, ifName string, nicType cns.NICType) string { | ||
| // For stateless CNI, secondary NICs use containerID-ifName as endpointID. | ||
| if nm.IsStatelessCNIMode() && nicType != cns.InfraNIC { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. for stateful cni, this is not an issue? what's the impact if we remove statelesscnimode check here? |
||
| return containerID + "-" + ifName | ||
| } | ||
| // For InfraNIC, use GetEndpointID() logic. | ||
| return nm.GetEndpointID(containerID, ifName) | ||
| } | ||
|
|
||
| // saves the map of network ids to endpoints to the state file | ||
| func (nm *networkManager) SaveState(eps []*endpoint) error { | ||
| nm.Lock() | ||
|
|
@@ -779,7 +793,7 @@ func (nm *networkManager) DeleteState(_ []*EndpointInfo) error { | |
| } | ||
|
|
||
| // called to convert a cns restserver EndpointInfo into a network EndpointInfo | ||
| func cnsEndpointInfotoCNIEpInfos(endpointInfo restserver.EndpointInfo, endpointID string) []*EndpointInfo { | ||
| func cnsEndpointInfotoCNIEpInfos(endpointInfo restserver.EndpointInfo, endpointID, netns string) []*EndpointInfo { | ||
| ret := []*EndpointInfo{} | ||
|
|
||
| for ifName, ipInfo := range endpointInfo.IfnameToIPMap { | ||
|
|
@@ -809,6 +823,10 @@ func cnsEndpointInfotoCNIEpInfos(endpointInfo restserver.EndpointInfo, endpointI | |
| epInfo.NICType = ipInfo.NICType | ||
| epInfo.HNSNetworkID = ipInfo.HnsNetworkID | ||
| epInfo.MacAddress = net.HardwareAddr(ipInfo.MacAddress) | ||
| // fill out the netns if it is empty via args passed by container runtime | ||
| if epInfo.NetNsPath == "" { | ||
| epInfo.NetNsPath = netns | ||
| } | ||
| ret = append(ret, epInfo) | ||
| } | ||
| return ret | ||
|
|
@@ -847,3 +865,14 @@ func generateCNSIPInfoMap(eps []*endpoint) map[string]*restserver.IPInfo { | |
|
|
||
| return ifNametoIPInfoMap | ||
| } | ||
|
|
||
| // RemoveSecondaryEndpointFromPodNetNS removes the secondary endpoint from the pod netns | ||
| func (nm *networkManager) RemoveSecondaryEndpointFromPodNetNS(ifName, netns string) error { | ||
| ep := &endpoint{ | ||
| NetworkNameSpace: netns, | ||
| IfName: ifName, // TODO: For stateless cni linux populate IfName here to use in deletion in secondary endpoint client | ||
| } | ||
| logger.Info("Removing Secondary Endpoint from", zap.String("NetworkNameSpace: ", netns)) | ||
| err := ep.removeSecondaryEndpointFromPodNetNSImpl(nm.nsClient) | ||
| return err | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
it should still create file for cns to release ip allocated for pod
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Addressed in the new commit