Skip to content

Commit

Permalink
DHCP lease maintenance should terminate when interface no longer exists.
Browse files Browse the repository at this point in the history
Due to oberservations that threads can grow and the dhcp daemon uses an increasing amount of memory.

This situation can happen organically when using say, bridge CNI, and the bridge has been removed outside of the bridge CNI lifecycle, and an interface no longer exists on a pod.

Does so on a retry loop using the `backoffRetry()` method.

Signed-off-by: dougbtv <[email protected]>
  • Loading branch information
dougbtv committed Jan 21, 2025
1 parent e4ca66b commit 0834450
Showing 1 changed file with 31 additions and 0 deletions.
31 changes: 31 additions & 0 deletions plugins/ipam/dhcp/lease.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@ const (
leaseStateRebinding
)

// Timing for retrying link existence check
const (
linkCheckRetryMax = 10 * time.Second
linkCheckTotalTimeout = 30 * time.Second
)

// This implementation uses 1 OS thread per lease. This is because
// all the network operations have to be done in network namespace
// of the interface. This can be improved by switching to the proper
Expand Down Expand Up @@ -292,6 +298,14 @@ func (l *DHCPLease) maintain() {
for {
var sleepDur time.Duration

linkCheckCtx, cancel := context.WithTimeoutCause(l.ctx, l.resendTimeout, errNoMoreTries)
defer cancel()
err := l.checkLinkExistsWithBackoff(linkCheckCtx)
if err != nil {
log.Printf("%v: interface %s no longer exists or check failed, terminating lease maintenance (last encountered: %v)", l.clientID, l.link.Attrs().Name, err)
return
}

switch state {
case leaseStateBound:
sleepDur = time.Until(l.renewalTime)
Expand Down Expand Up @@ -344,6 +358,23 @@ func (l *DHCPLease) maintain() {
}
}

// checkLinkExistsWithBackoff uses backoffRetry to check if a network link exists
func (l *DHCPLease) checkLinkExistsWithBackoff(ctx context.Context) error {
checkFunc := func() (*nclient4.Lease, error) {
// Returns the error to trigger a retry.
if _, err := netlink.LinkByName(l.link.Attrs().Name); err != nil {
return nil, err
}
return nil, nil
}

ctx, cancel := context.WithTimeout(ctx, linkCheckTotalTimeout)
defer cancel()
_, err := backoffRetry(ctx, linkCheckRetryMax, checkFunc)
return err
}


func (l *DHCPLease) downIface() {
if err := netlink.LinkSetDown(l.link); err != nil {
log.Printf("%v: failed to bring %v interface DOWN: %v", l.clientID, l.link.Attrs().Name, err)
Expand Down

0 comments on commit 0834450

Please sign in to comment.