Skip to content

Commit 6b96a7c

Browse files
committed
fix: generating endpoint locally when CNS is not reachabe.
1 parent 6fbcfd0 commit 6b96a7c

File tree

7 files changed

+172
-160
lines changed

7 files changed

+172
-160
lines changed

cni/network/network.go

Lines changed: 22 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1064,49 +1064,11 @@ func (plugin *NetPlugin) Delete(args *cniSkel.CmdArgs) error {
10641064
}
10651065
logger.Info("Retrieved network info, populating endpoint infos with container id", zap.String("containerID", args.ContainerID))
10661066

1067-
var epInfos []*network.EndpointInfo
1068-
if plugin.nm.IsStatelessCNIMode() {
1069-
// network ID is passed in and used only for migration
1070-
// otherwise, in stateless, we don't need the network id for deletion
1071-
epInfos, err = plugin.nm.GetEndpointState(networkID, args.ContainerID, args.Netns)
1072-
// if stateless CNI fail to get the endpoint from CNS for any reason other than Endpoint Not found or CNS connection failure
1073-
// return a retriable error so the container runtime will retry this DEL later
1074-
// the implementation of this function returns nil if the endpoint doesn't exist, so
1075-
// we don't have to check that here
1076-
if err != nil {
1077-
switch {
1078-
case errors.Is(err, network.ErrConnectionFailure):
1079-
logger.Error("Failed to connect to CNS", zap.Error(err))
1080-
logger.Info("Endpoint will be deleted from state file asynchronously", zap.String("containerID", args.ContainerID))
1081-
// In SwiftV2 Linux stateless CNI mode, if the plugin cannot connect to CNS,
1082-
// we asynchronously remove the secondary (delegated) interface from the pod’s network namespace in the absence of the endpoint state.
1083-
// This is necessary because leaving the delegated NIC in the pod netns can cause the kernel to block rtnetlink operations.
1084-
// When that happens, kubelet and containerd hang during sandbox creation or teardown.
1085-
// The delegated NIC (SR-IOV VF) used by SwiftV2 for multitenant pods remains tied to the pod namespace,
1086-
// triggering hot-unplug/re-register events and leaving the node in an unhealthy state.
1087-
// This workaround mitigates the issue by removing the secondary NIC from the pod netns when CNS is unreachable during DEL to provide the endpoint state.
1088-
if err = plugin.nm.RemoveSecondaryEndpointFromPodNetNS(args.IfName, args.Netns); err != nil {
1089-
logger.Error("Failed to remove secondary endpoint from pod netns", zap.String("netns", args.Netns), zap.Error(err))
1090-
return plugin.RetriableError(fmt.Errorf("failed to remove secondary endpoint from pod netns: %w", err))
1091-
}
1092-
case errors.Is(err, network.ErrEndpointStateNotFound):
1093-
logger.Info("Endpoint Not found", zap.String("containerID", args.ContainerID), zap.Error(err))
1094-
return nil
1095-
default:
1096-
logger.Error("Get Endpoint State API returned error", zap.String("containerID", args.ContainerID), zap.Error(err))
1097-
return plugin.RetriableError(fmt.Errorf("failed to delete endpoint: %w", err))
1098-
}
1099-
} else {
1100-
for _, epInfo := range epInfos {
1101-
logger.Info("Found endpoint to delete", zap.String("IfName", epInfo.IfName), zap.String("EndpointID", epInfo.EndpointID), zap.Any("NICType", epInfo.NICType))
1102-
}
1103-
}
1104-
} else {
1105-
epInfos = plugin.nm.GetEndpointInfosFromContainerID(args.ContainerID)
1067+
epInfos, err := plugin.nm.GetEndpoint(networkID, args)
1068+
if err != nil {
1069+
return plugin.RetriableError(fmt.Errorf("failed to retrieve endpoint: %w", err))
11061070
}
1107-
1108-
// for Stateful CNI when the endpoint is not created, but the ips are already allocated (only works if single network, single infra)
1109-
// this block is applied to stateless CNI only if there was a connection failure in previous block and asynchronous delete by CNS will remover the endpoint from state file
1071+
// when the endpoint is not created, but the ips are already allocated (only works if single network, single infra)
11101072
if len(epInfos) == 0 {
11111073
endpointID := plugin.nm.GetEndpointID(args.ContainerID, args.IfName)
11121074
if !nwCfg.MultiTenancy {
@@ -1144,15 +1106,25 @@ func (plugin *NetPlugin) Delete(args *cniSkel.CmdArgs) error {
11441106
zap.String("endpointID", epInfo.EndpointID))
11451107
telemetryClient.SendEvent("Deleting endpoint: " + epInfo.EndpointID)
11461108

1109+
// Delegated/secondary nic ips are statically allocated so we don't need to release
1110+
// Call into IPAM plugin to release the endpoint's addresses.
11471111
if !nwCfg.MultiTenancy && (epInfo.NICType == cns.InfraNIC || epInfo.NICType == "") {
1148-
// Delegated/secondary nic ips are statically allocated so we don't need to release
1149-
// Call into IPAM plugin to release the endpoint's addresses.
1150-
for i := range epInfo.IPAddresses {
1151-
logger.Info("Release ip", zap.String("ip", epInfo.IPAddresses[i].IP.String()))
1152-
telemetryClient.SendEvent(fmt.Sprintf("Release ip: %s container id: %s endpoint id: %s", epInfo.IPAddresses[i].IP.String(), args.ContainerID, epInfo.EndpointID))
1153-
err = plugin.ipamInvoker.Delete(&epInfo.IPAddresses[i], nwCfg, args, nwInfo.Options)
1154-
if err != nil {
1155-
return plugin.RetriableError(fmt.Errorf("failed to release address: %w", err))
1112+
// This is an special case for stateless CNI when Asychronous DEL to CNS will take place
1113+
// At this point the endpoint is already deleted in previous block and CNS will release the IP whenever it is up
1114+
if epInfo.IPAddresses == nil && plugin.nm.IsStatelessCNIMode() {
1115+
logger.Warn("Release ip Asynchronously by CNS",
1116+
zap.String("containerID", args.ContainerID))
1117+
if err = plugin.ipamInvoker.Delete(nil, nwCfg, args, nwInfo.Options); err != nil {
1118+
return plugin.RetriableError(fmt.Errorf("failed to release address(no endpoint): %w", err))
1119+
}
1120+
} else {
1121+
for i := range epInfo.IPAddresses {
1122+
logger.Info("Release ip", zap.String("ip", epInfo.IPAddresses[i].IP.String()))
1123+
telemetryClient.SendEvent(fmt.Sprintf("Release ip: %s container id: %s endpoint id: %s", epInfo.IPAddresses[i].IP.String(), args.ContainerID, epInfo.EndpointID))
1124+
err = plugin.ipamInvoker.Delete(&epInfo.IPAddresses[i], nwCfg, args, nwInfo.Options)
1125+
if err != nil {
1126+
return plugin.RetriableError(fmt.Errorf("failed to release address: %w", err))
1127+
}
11561128
}
11571129
}
11581130
} else if epInfo.EnableInfraVnet { // remove in future PR

network/endpoint_linux.go

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -548,11 +548,28 @@ func (epInfo *EndpointInfo) GetEndpointInfoByIPImpl(_ []net.IPNet, _ string) (*E
548548
return epInfo, nil
549549
}
550550

551-
// removeSecondaryEndpointFromPodNetNSImpl deletes an existing secondary endpoint from the pod network namespace.
552-
func (ep *endpoint) removeSecondaryEndpointFromPodNetNSImpl(nsc NamespaceClientInterface) error {
553-
secondaryepClient := NewSecondaryEndpointClient(nil, nil, nil, nsc, nil, ep)
554-
if err := secondaryepClient.RemoveInterfacesFromNetnsPath(ep.IfName, ep.NetworkNameSpace); err != nil {
555-
return err
551+
// getEndpointInfoByIfNameImpl returns an array of EndpointInfo for the given endpoint based on the IfName(s) found in the network namespace.
552+
func (nm *networkManager) getEndpointInfoByIfNameImpl(ep *endpoint) ([]*EndpointInfo, error) {
553+
epInfo := &EndpointInfo{
554+
EndpointID: ep.Id,
555+
NetNsPath: ep.NetworkNameSpace,
556+
NICType: cns.InfraNIC,
557+
IfName: ep.IfName, // TODO: For stateless cni linux populate IfName here to use in deletion in secondary endpoint client
556558
}
557-
return nil
559+
ret := []*EndpointInfo{}
560+
ret = append(ret, epInfo)
561+
logger.Info("Fetching Secondary Endpoint from", zap.String("NetworkNameSpace: ", ep.NetworkNameSpace))
562+
secondaryepClient := NewSecondaryEndpointClient(nil, nil, nil, nm.nsClient, nil, ep)
563+
ifnames, err := secondaryepClient.FetchInterfacesFromNetnsPath(ep.IfName, ep.NetworkNameSpace)
564+
if err != nil {
565+
return nil, fmt.Errorf("failed to fetch secondary interfaces: %w", err)
566+
}
567+
for _, ifName := range ifnames {
568+
ret = append(ret, &EndpointInfo{
569+
NetNsPath: ep.NetworkNameSpace,
570+
IfName: ifName,
571+
NICType: cns.NodeNetworkInterfaceFrontendNIC,
572+
})
573+
}
574+
return ret, nil
558575
}

network/endpoint_windows.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -747,7 +747,6 @@ func getPnpDeviceState(instanceID string, plc platform.ExecClient) (string, stri
747747
return devpkeyDeviceIsPresent, devpkeyDeviceProblemCode, nil
748748
}
749749

750-
// removeSecondaryEndpointFromPodNetNSImpl removes an existing secondary endpoint from the pod network namespace.
751-
func (ep *endpoint) removeSecondaryEndpointFromPodNetNSImpl(_ NamespaceClientInterface) error {
752-
return nil
750+
func (nm *networkManager) getEndpointInfoByIfNameImpl(_ *endpoint) ([]*EndpointInfo, error) {
751+
return nil, nil
753752
}

network/errors.go

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,11 @@ package network
33
import "errors"
44

55
var (
6-
errSubnetV6NotFound = errors.New("Couldn't find ipv6 subnet in network info") // nolint
7-
errV6SnatRuleNotSet = errors.New("ipv6 snat rule not set. Might be VM ipv6 address missing") // nolint
8-
ErrEndpointStateNotFound = errors.New("endpoint state could not be found in the statefile")
9-
ErrConnectionFailure = errors.New("couldn't connect to CNS")
10-
ErrGetEndpointStateFailure = errors.New("failure to obtain the endpoint state")
6+
errSubnetV6NotFound = errors.New("Couldn't find ipv6 subnet in network info") // nolint
7+
errV6SnatRuleNotSet = errors.New("ipv6 snat rule not set. Might be VM ipv6 address missing") // nolint
8+
ErrEndpointStateNotFound = errors.New("endpoint state could not be found in the statefile")
9+
ErrConnectionFailure = errors.New("couldn't connect to CNS")
10+
ErrEndpointRemovalFailure = errors.New("Failed to remove endpoint")
11+
ErrEndpointRetrievalFailure = errors.New("Failed to obtain endpoint")
12+
ErrGetEndpointStateFailure = errors.New("failure to obtain the endpoint state")
1113
)

network/manager.go

Lines changed: 50 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ package network
55

66
import (
77
"context"
8+
"fmt"
89
"net"
910
"sync"
1011
"time"
@@ -19,6 +20,7 @@ import (
1920
"github.com/Azure/azure-container-networking/netlink"
2021
"github.com/Azure/azure-container-networking/platform"
2122
"github.com/Azure/azure-container-networking/store"
23+
cniSkel "github.com/containernetworking/cni/pkg/skel"
2224
"github.com/pkg/errors"
2325
"go.uber.org/zap"
2426
)
@@ -120,9 +122,9 @@ type NetworkManager interface {
120122
IsStatelessCNIMode() bool
121123
SaveState(eps []*endpoint) error
122124
DeleteState(epInfos []*EndpointInfo) error
125+
GetEndpoint(networkID string, args *cniSkel.CmdArgs) ([]*EndpointInfo, error)
123126
GetEndpointInfosFromContainerID(containerID string) []*EndpointInfo
124127
GetEndpointState(networkID, containerID, netns string) ([]*EndpointInfo, error)
125-
RemoveSecondaryEndpointFromPodNetNS(ifName string, netns string) error
126128
}
127129

128130
// Creates a new network manager.
@@ -866,13 +868,53 @@ func generateCNSIPInfoMap(eps []*endpoint) map[string]*restserver.IPInfo {
866868
return ifNametoIPInfoMap
867869
}
868870

869-
// RemoveSecondaryEndpointFromPodNetNS removes the secondary endpoint from the pod netns
870-
func (nm *networkManager) RemoveSecondaryEndpointFromPodNetNS(ifName, netns string) error {
871+
func (nm *networkManager) GetEndpoint(networkID string, args *cniSkel.CmdArgs) ([]*EndpointInfo, error) {
872+
if nm.IsStatelessCNIMode() {
873+
logger.Info("calling cns getEndpoint API")
874+
epInfos, err := nm.GetEndpointState(networkID, args.ContainerID, args.Netns)
875+
if err != nil {
876+
switch {
877+
case errors.Is(err, ErrConnectionFailure):
878+
logger.Error("Failed to connect to CNS", zap.Error(err))
879+
logger.Info("Endpoint will be deleted from state file asynchronously", zap.String("containerID", args.ContainerID))
880+
// In SwiftV2 Linux stateless CNI mode, if the plugin cannot connect to CNS,
881+
// we still have to remove the secondary (delegated) interface from the pod’s network namespace in the absence of the endpoint state.
882+
// This is necessary because leaving the delegated NIC in the pod netns can cause the kernel to block rtnetlink operations.
883+
// When that happens, kubelet and containerd hang during sandbox creation or teardown.
884+
// The delegated NIC (SR-IOV VF) used by SwiftV2 for multitenant pods remains tied to the pod namespace,
885+
// triggering hot-unplug/re-register events and leaving the node in an unhealthy state.
886+
// This workaround mitigates the issue by generating a minimal endpointInfo via containerd args and netlink APIs that can be then passed to DeleteEndpoint API.
887+
epInfos, err = nm.generateEndpointLocally(args)
888+
if err != nil {
889+
logger.Error("Failed to fetch secondary endpoint from pod netns", zap.String("netns", args.Netns), zap.Error(err))
890+
return nil, fmt.Errorf("failed to fetch secondary interfaces: %w", err)
891+
}
892+
case errors.Is(err, ErrEndpointStateNotFound):
893+
logger.Info("Endpoint Not found", zap.String("containerID", args.ContainerID), zap.Error(err))
894+
return nil, nil
895+
default:
896+
logger.Error("Get Endpoint State API returned error", zap.String("containerID", args.ContainerID), zap.Error(err))
897+
return nil, ErrEndpointRetrievalFailure
898+
}
899+
}
900+
for _, epInfo := range epInfos {
901+
logger.Info("Found endpoint to delete", zap.String("IfName", epInfo.IfName), zap.String("EndpointID", epInfo.EndpointID), zap.Any("NICType", epInfo.NICType))
902+
}
903+
return epInfos, nil
904+
}
905+
return nm.GetEndpointInfosFromContainerID(args.ContainerID), nil
906+
}
907+
908+
// generateEndpointLocally fetches the endpoint information using containerd args and netlink APIs
909+
func (nm *networkManager) generateEndpointLocally(args *cniSkel.CmdArgs) ([]*EndpointInfo, error) {
871910
ep := &endpoint{
872-
NetworkNameSpace: netns,
873-
IfName: ifName, // TODO: For stateless cni linux populate IfName here to use in deletion in secondary endpoint client
911+
Id: args.ContainerID,
912+
NetworkNameSpace: args.Netns,
913+
IfName: args.IfName, // TODO: For stateless cni linux populate IfName here to use in deletion in secondary endpoint client
874914
}
875-
logger.Info("Removing Secondary Endpoint from", zap.String("NetworkNameSpace: ", netns))
876-
err := ep.removeSecondaryEndpointFromPodNetNSImpl(nm.nsClient)
877-
return err
915+
epInfo, err := nm.getEndpointInfoByIfNameImpl(ep)
916+
if err != nil {
917+
return nil, fmt.Errorf("failed to fetch secondary interfaces: %w", err)
918+
}
919+
return epInfo, nil
878920
}

network/manager_mock.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package network
33
import (
44
"github.com/Azure/azure-container-networking/cns"
55
"github.com/Azure/azure-container-networking/common"
6+
cniSkel "github.com/containernetworking/cni/pkg/skel"
67
)
78

89
// MockNetworkManager is a mock structure for Network Manager
@@ -222,6 +223,6 @@ func (nm *MockNetworkManager) GetEndpointState(_, _, _ string) ([]*EndpointInfo,
222223
return []*EndpointInfo{}, nil
223224
}
224225

225-
func (nm *MockNetworkManager) RemoveSecondaryEndpointFromPodNetNS(_, _ string) error {
226-
return nil
226+
func (nm *MockNetworkManager) GetEndpoint(_ string, args *cniSkel.CmdArgs) ([]*EndpointInfo, error) {
227+
return nm.GetEndpointInfosFromContainerID(args.ContainerID), nil
227228
}

0 commit comments

Comments
 (0)