From cfdaf799a6761799828108febe2c6a6fd5747470 Mon Sep 17 00:00:00 2001
From: Boris Zbarsky <bzbarsky@apple.com>
Date: Fri, 13 Dec 2024 19:41:07 -0500
Subject: [PATCH] Ensure we create a clean subscription on
 _deviceMayBeReachable. (#36842)

Just clear out all existing subscription/session state when this API is called.
Callers should ensure there are no spurious calls.
---
 .../Framework/CHIP/MTRDevice_Concrete.mm      | 72 +++++++++++++------
 1 file changed, 51 insertions(+), 21 deletions(-)

diff --git a/src/darwin/Framework/CHIP/MTRDevice_Concrete.mm b/src/darwin/Framework/CHIP/MTRDevice_Concrete.mm
index 996e41ff12cf2d..264a268e253163 100644
--- a/src/darwin/Framework/CHIP/MTRDevice_Concrete.mm
+++ b/src/darwin/Framework/CHIP/MTRDevice_Concrete.mm
@@ -871,13 +871,7 @@ - (void)invalidate
         // tear down the subscription.  We will get no more callbacks from
         // the subscription after this point.
         std::lock_guard lock(self->_lock);
-        self->_currentReadClient = nullptr;
-        if (self->_currentSubscriptionCallback) {
-            delete self->_currentSubscriptionCallback;
-        }
-        self->_currentSubscriptionCallback = nullptr;
-
-        [self _changeInternalState:MTRInternalDeviceStateUnsubscribed];
+        [self _resetSubscription];
     }
                                      errorHandler:nil];
 
@@ -938,8 +932,8 @@ - (void)_triggerResubscribeWithReason:(NSString *)reason nodeLikelyReachable:(BO
         }
         readClientToResubscribe->TriggerResubscribeIfScheduled(reason.UTF8String);
     } else if (_internalDeviceState == MTRInternalDeviceStateSubscribing && nodeLikelyReachable) {
-        // If we have reason to suspect that the node is now reachable and we haven’t established a
-        // CASE session yet, let’s consider it to be stalled and invalidate the pairing session.
+        // If we have reason to suspect that the node is now reachable and we haven't established a
+        // CASE session yet, let's consider it to be stalled and invalidate the pairing session.
         [self._concreteController asyncGetCommissionerOnMatterQueue:^(Controller::DeviceCommissioner * commissioner) {
             auto caseSessionMgr = commissioner->CASESessionMgr();
             VerifyOrDie(caseSessionMgr != nullptr);
@@ -1164,7 +1158,7 @@ - (void)_handleSubscriptionError:(NSError *)error
     [self _doHandleSubscriptionError:error];
 }
 
-- (void)_doHandleSubscriptionError:(NSError *)error
+- (void)_doHandleSubscriptionError:(nullable NSError *)error
 {
     assertChipStackLockedByCurrentThread();
 
@@ -1305,7 +1299,13 @@ - (void)_handleResubscriptionNeededWithDelay:(NSNumber *)resubscriptionDelayMs
 
     // Change our state before going async.
     [self _changeState:MTRDeviceStateUnknown];
-    [self _changeInternalState:MTRInternalDeviceStateResubscribing];
+
+    // If we have never had a subscription established, stay in the Subscribing
+    // state; don't transition to Resubscribing just because our attempt at
+    // subscribing failed.
+    if (HadSubscriptionEstablishedOnce(self->_internalDeviceState)) {
+        [self _changeInternalState:MTRInternalDeviceStateResubscribing];
+    }
 
     dispatch_async(self.queue, ^{
         [self _handleResubscriptionNeededWithDelayOnDeviceQueue:resubscriptionDelayMs];
@@ -2444,19 +2444,29 @@ - (void)_resetSubscriptionWithReasonString:(NSString *)reasonString
         MTR_LOG("%@ subscription reset disconnecting ReadClient and SubscriptionCallback", self);
 
         std::lock_guard lock(self->_lock);
-        self->_currentReadClient = nullptr;
-        if (self->_currentSubscriptionCallback) {
-            delete self->_currentSubscriptionCallback;
-        }
-        self->_currentSubscriptionCallback = nullptr;
 
-        [self _doHandleSubscriptionError:nil];
+        [self _resetSubscription];
+
         // Use nil reset delay so that this keeps existing backoff timing
         [self _doHandleSubscriptionReset:nil];
     }
                                      errorHandler:nil];
 }
 
+- (void)_resetSubscription
+{
+    assertChipStackLockedByCurrentThread();
+    os_unfair_lock_assert_owner(&_lock);
+
+    _currentReadClient = nullptr;
+    if (_currentSubscriptionCallback) {
+        delete _currentSubscriptionCallback;
+        _currentSubscriptionCallback = nullptr;
+    }
+
+    [self _doHandleSubscriptionError:nil];
+}
+
 #ifdef DEBUG
 - (void)unitTestResetSubscription
 {
@@ -4322,11 +4332,31 @@ - (BOOL)_deviceHasActiveSubscription
 
 - (void)_deviceMayBeReachable
 {
-    MTR_LOG("%@ _deviceMayBeReachable called", self);
+    MTR_LOG("%@ _deviceMayBeReachable called, resetting subscription", self);
     // TODO: This should only be allowed for thread devices
-    [_deviceController asyncDispatchToMatterQueue:^{
-        [self _triggerResubscribeWithReason:@"SPI client indicated the device may now be reachable"
-                        nodeLikelyReachable:YES];
+    [self._concreteController asyncGetCommissionerOnMatterQueue:^(Controller::DeviceCommissioner * commissioner) {
+        // Reset all of our subscription/session state and re-establish it all
+        // from the start.  Reset our subscription first, before tearing
+        // down the session, so we don't have to worry about the
+        // notifications from the latter coming through async and
+        // complicating the situation.  Unfortunately, we do not want to
+        // hold the lock when destroying the session, just in case it still
+        // ends up calling into us somehow, so we have to break the work up
+        // into two separate locked sections...
+        {
+            std::lock_guard lock(self->_lock);
+            [self _clearSubscriptionPoolWork];
+            [self _resetSubscription];
+        }
+
+        auto caseSessionMgr = commissioner->CASESessionMgr();
+        VerifyOrDie(caseSessionMgr != nullptr);
+        caseSessionMgr->ReleaseSession(commissioner->GetPeerScopedId(self->_nodeID.unsignedLongLongValue));
+
+        std::lock_guard lock(self->_lock);
+        // Use _ensureSubscriptionForExistingDelegates so that the subscriptions
+        // will go through the pool as needed, not necessarily happen immediately.
+        [self _ensureSubscriptionForExistingDelegates:@"SPI client indicated the device may now be reachable"];
     } errorHandler:nil];
 }