Skip to content

Commit f3e691b

Browse files
fix: Ensure I process the initial IPPool state correctly on leader change
Previously, my IPPool controller would only signal that its initial synchronization was complete upon processing an UPDATE event for its target IPPool. This caused me to hang if the IPPool existed at startup (generating an ADD event) but wasn't immediately updated. This commit modifies my IPPool controller to also signal initial sync completion when processing an ADD event or when a DELETE event confirms the target IPPool is no longer present. Additionally, my IPPool event listener's informer is now scoped to watch only the specific IPPool it is responsible for, rather than all IPPools in the namespace. Event handlers in the listener are also updated to correctly queue ADD and DELETE events.
1 parent b60e3ea commit f3e691b

File tree

2 files changed

+138
-8
lines changed

2 files changed

+138
-8
lines changed

pkg/agent/ippool/controller.go

Lines changed: 68 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,14 +97,81 @@ func (c *Controller) sync(event Event) (err error) {
9797
}
9898
// If update was successful, this is a good place to signal initial sync
9999
c.initialSyncOnce.Do(func() {
100-
logrus.Infof("Initial sync completed for IPPool %s/%s, signaling DHCP server.", ipPool.Namespace, ipPool.Name)
100+
logrus.Infof("Initial sync UPDATE completed for IPPool %s/%s, signaling DHCP server.", ipPool.Namespace, ipPool.Name)
101+
close(c.initialSyncDone)
102+
})
103+
case ADD: // Handle ADD for initial sync signal
104+
ipPool, ok := obj.(*networkv1.IPPool)
105+
if !ok {
106+
logrus.Errorf("(controller.sync) failed to assert obj during ADD for key %s", event.key)
107+
return err // Return error to requeue
108+
}
109+
logrus.Infof("(controller.sync) ADD %s/%s", ipPool.Namespace, ipPool.Name)
110+
if err := c.Update(ipPool); err != nil { // Update leases based on this added IPPool
111+
logrus.Errorf("(controller.sync) failed to update DHCP lease store for newly added IPPool %s/%s: %s", ipPool.Namespace, ipPool.Name, err.Error())
112+
return err // Return error to requeue
113+
}
114+
// Signal initial sync because our target pool has been added and processed.
115+
c.initialSyncOnce.Do(func() {
116+
logrus.Infof("Initial sync ADD completed for IPPool %s/%s, signaling DHCP server.", ipPool.Namespace, ipPool.Name)
117+
close(c.initialSyncDone)
118+
})
119+
case DELETE:
120+
// If our target IPPool is deleted.
121+
// If cache.WaitForCacheSync is done, and then we get a DELETE for our pool,
122+
// it means it *was* there.
123+
// If the pool is *not* found by GetByKey (exists == false) and action is DELETE,
124+
// it implies it was already deleted from the cache by the informer.
125+
poolNamespace, poolNameFromKey, keyErr := cache.SplitMetaNamespaceKey(event.key)
126+
if keyErr != nil {
127+
logrus.Errorf("(controller.sync) failed to split key %s for DELETE: %v", event.key, keyErr)
128+
return keyErr
129+
}
130+
131+
// Ensure this delete event is for the specific pool this controller instance is managing.
132+
// This check is already done above with `event.poolName != c.poolRef.Name`,
133+
// but double-checking with `event.key` against `c.poolRef` is more robust for DELETE.
134+
if !(poolNamespace == c.poolRef.Namespace && poolNameFromKey == c.poolRef.Name) {
135+
logrus.Debugf("(controller.sync) DELETE event for key %s is not for our target pool %s. Skipping.", event.key, c.poolRef.String())
136+
return nil
137+
}
138+
139+
logrus.Infof("(controller.sync) DELETE %s. Clearing leases.", event.key)
140+
c.clearLeasesForPool(c.poolRef.String()) // poolRefString is "namespace/name"
141+
142+
// After deletion processing, if this was our target pool, it's now "synced" (as deleted/absent).
143+
c.initialSyncOnce.Do(func() {
144+
logrus.Infof("Initial sync DELETE (target processed for deletion) completed for IPPool %s, signaling DHCP server.", event.key)
101145
close(c.initialSyncDone)
102146
})
103147
}
104148

105149
return
106150
}
107151

152+
// clearLeasesForPool clears all leases for a specific IPPool from the DHCPAllocator and local cache.
153+
func (c *Controller) clearLeasesForPool(poolRefStr string) {
154+
logrus.Infof("(%s) Clearing all leases from DHCPAllocator and local cache due to IPPool deletion or full resync.", poolRefStr)
155+
// Iterate over a copy of keys if modifying map during iteration, or collect keys first
156+
hwAddrsToDelete := []string{}
157+
for hwAddr := range c.poolCache {
158+
// Assuming c.poolCache only contains MACs for *this* controller's poolRef.
159+
// This assumption needs to be true for this to work correctly.
160+
// If c.poolCache could contain MACs from other pools (e.g. if it was shared, which it isn't here),
161+
// we would need to verify that the lease belongs to this poolRefStr before deleting.
162+
// However, since each EventHandler/Controller has its own poolCache for its specific poolRef, this is safe.
163+
hwAddrsToDelete = append(hwAddrsToDelete, hwAddr)
164+
}
165+
166+
for _, hwAddr := range hwAddrsToDelete {
167+
if err := c.dhcpAllocator.DeleteLease(poolRefStr, hwAddr); err != nil {
168+
logrus.Warnf("(%s) Failed to delete lease for MAC %s during clear: %v (may already be gone or belong to a different pool if logic changes)", poolRefStr, hwAddr, err)
169+
}
170+
delete(c.poolCache, hwAddr)
171+
}
172+
logrus.Infof("(%s) Finished clearing leases.", poolRefStr)
173+
}
174+
108175
// Update processes the IPPool and updates the DHCPAllocator's leases.
109176
func (c *Controller) Update(ipPool *networkv1.IPPool) error {
110177
if ipPool == nil {

pkg/agent/ippool/event.go

Lines changed: 70 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -105,20 +105,83 @@ func (e *EventHandler) EventListener(ctx context.Context) {
105105
logrus.Info("(eventhandler.EventListener) starting IPPool event listener")
106106

107107
// TODO: could be more specific on what namespaces we want to watch and what fields we need
108-
watcher := cache.NewListWatchFromClient(e.k8sClientset.NetworkV1alpha1().RESTClient(), "ippools", e.poolRef.Namespace, fields.Everything())
108+
// Watch only the specific IPPool this EventHandler is responsible for.
109+
nameSelector := fields.OneTermEqualSelector("metadata.name", e.poolRef.Name)
110+
watcher := cache.NewListWatchFromClient(e.k8sClientset.NetworkV1alpha1().RESTClient(), "ippools", e.poolRef.Namespace, nameSelector)
109111

110112
queue := workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter())
111113

112114
indexer, informer := cache.NewIndexerInformer(watcher, &networkv1.IPPool{}, 0, cache.ResourceEventHandlerFuncs{
115+
AddFunc: func(obj interface{}) {
116+
key, err := cache.MetaNamespaceKeyFunc(obj)
117+
if err == nil {
118+
ipPool := obj.(*networkv1.IPPool)
119+
// Ensure we only queue events for the specific IPPool this handler is for,
120+
// even though the watcher is now scoped. This is a good safeguard.
121+
if ipPool.Name == e.poolRef.Name && ipPool.Namespace == e.poolRef.Namespace {
122+
queue.Add(Event{
123+
key: key,
124+
action: ADD,
125+
poolName: ipPool.ObjectMeta.Name,
126+
poolNetworkName: ipPool.Spec.NetworkName,
127+
})
128+
}
129+
}
130+
},
113131
UpdateFunc: func(old interface{}, new interface{}) {
114132
key, err := cache.MetaNamespaceKeyFunc(new)
115133
if err == nil {
116-
queue.Add(Event{
117-
key: key,
118-
action: UPDATE,
119-
poolName: new.(*networkv1.IPPool).ObjectMeta.Name,
120-
poolNetworkName: new.(*networkv1.IPPool).Spec.NetworkName,
121-
})
134+
ipPool := new.(*networkv1.IPPool)
135+
// Ensure we only queue events for the specific IPPool this handler is for.
136+
if ipPool.Name == e.poolRef.Name && ipPool.Namespace == e.poolRef.Namespace {
137+
queue.Add(Event{
138+
key: key,
139+
action: UPDATE,
140+
poolName: ipPool.ObjectMeta.Name,
141+
poolNetworkName: ipPool.Spec.NetworkName,
142+
})
143+
}
144+
}
145+
},
146+
DeleteFunc: func(obj interface{}) {
147+
key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj) // Important for handling deleted objects
148+
if err == nil {
149+
var poolName, poolNamespace string
150+
// Attempt to get name and namespace from the object if possible
151+
if ipPool, ok := obj.(*networkv1.IPPool); ok {
152+
poolName = ipPool.ObjectMeta.Name
153+
poolNamespace = ipPool.ObjectMeta.Namespace
154+
} else if dslu, ok := obj.(cache.DeletedFinalStateUnknown); ok {
155+
// Try to get original object
156+
if ipPoolOrig, okOrig := dslu.Obj.(*networkv1.IPPool); okOrig {
157+
poolName = ipPoolOrig.ObjectMeta.Name
158+
poolNamespace = ipPoolOrig.ObjectMeta.Namespace
159+
} else { // Fallback to splitting the key
160+
ns, name, keyErr := cache.SplitMetaNamespaceKey(key)
161+
if keyErr == nil {
162+
poolName = name
163+
poolNamespace = ns
164+
}
165+
}
166+
} else { // Fallback to splitting the key if obj is not IPPool or DeletedFinalStateUnknown
167+
ns, name, keyErr := cache.SplitMetaNamespaceKey(key)
168+
if keyErr == nil {
169+
poolName = name
170+
poolNamespace = ns
171+
}
172+
}
173+
174+
// Ensure we only queue events for the specific IPPool this handler is for.
175+
if poolName == e.poolRef.Name && poolNamespace == e.poolRef.Namespace {
176+
// For DELETE, poolNetworkName might not be available or relevant in the Event struct
177+
// if the controller's delete logic primarily uses the key/poolRef.
178+
queue.Add(Event{
179+
key: key,
180+
action: DELETE,
181+
poolName: poolName,
182+
// poolNetworkName could be omitted or fetched if truly needed for DELETE logic
183+
})
184+
}
122185
}
123186
},
124187
}, cache.Indexers{})

0 commit comments

Comments
 (0)