From cfdaf799a6761799828108febe2c6a6fd5747470 Mon Sep 17 00:00:00 2001 From: Boris Zbarsky Date: Fri, 13 Dec 2024 19:41:07 -0500 Subject: [PATCH] Ensure we create a clean subscription on _deviceMayBeReachable. (#36842) Just clear out all existing subscription/session state when this API is called. Callers should ensure there are no spurious calls. --- .../Framework/CHIP/MTRDevice_Concrete.mm | 72 +++++++++++++------ 1 file changed, 51 insertions(+), 21 deletions(-) diff --git a/src/darwin/Framework/CHIP/MTRDevice_Concrete.mm b/src/darwin/Framework/CHIP/MTRDevice_Concrete.mm index 996e41ff12cf2d..264a268e253163 100644 --- a/src/darwin/Framework/CHIP/MTRDevice_Concrete.mm +++ b/src/darwin/Framework/CHIP/MTRDevice_Concrete.mm @@ -871,13 +871,7 @@ - (void)invalidate // tear down the subscription. We will get no more callbacks from // the subscription after this point. std::lock_guard lock(self->_lock); - self->_currentReadClient = nullptr; - if (self->_currentSubscriptionCallback) { - delete self->_currentSubscriptionCallback; - } - self->_currentSubscriptionCallback = nullptr; - - [self _changeInternalState:MTRInternalDeviceStateUnsubscribed]; + [self _resetSubscription]; } errorHandler:nil]; @@ -938,8 +932,8 @@ - (void)_triggerResubscribeWithReason:(NSString *)reason nodeLikelyReachable:(BO } readClientToResubscribe->TriggerResubscribeIfScheduled(reason.UTF8String); } else if (_internalDeviceState == MTRInternalDeviceStateSubscribing && nodeLikelyReachable) { - // If we have reason to suspect that the node is now reachable and we haven’t established a - // CASE session yet, let’s consider it to be stalled and invalidate the pairing session. + // If we have reason to suspect that the node is now reachable and we haven't established a + // CASE session yet, let's consider it to be stalled and invalidate the pairing session. [self._concreteController asyncGetCommissionerOnMatterQueue:^(Controller::DeviceCommissioner * commissioner) { auto caseSessionMgr = commissioner->CASESessionMgr(); VerifyOrDie(caseSessionMgr != nullptr); @@ -1164,7 +1158,7 @@ - (void)_handleSubscriptionError:(NSError *)error [self _doHandleSubscriptionError:error]; } -- (void)_doHandleSubscriptionError:(NSError *)error +- (void)_doHandleSubscriptionError:(nullable NSError *)error { assertChipStackLockedByCurrentThread(); @@ -1305,7 +1299,13 @@ - (void)_handleResubscriptionNeededWithDelay:(NSNumber *)resubscriptionDelayMs // Change our state before going async. [self _changeState:MTRDeviceStateUnknown]; - [self _changeInternalState:MTRInternalDeviceStateResubscribing]; + + // If we have never had a subscription established, stay in the Subscribing + // state; don't transition to Resubscribing just because our attempt at + // subscribing failed. + if (HadSubscriptionEstablishedOnce(self->_internalDeviceState)) { + [self _changeInternalState:MTRInternalDeviceStateResubscribing]; + } dispatch_async(self.queue, ^{ [self _handleResubscriptionNeededWithDelayOnDeviceQueue:resubscriptionDelayMs]; @@ -2444,19 +2444,29 @@ - (void)_resetSubscriptionWithReasonString:(NSString *)reasonString MTR_LOG("%@ subscription reset disconnecting ReadClient and SubscriptionCallback", self); std::lock_guard lock(self->_lock); - self->_currentReadClient = nullptr; - if (self->_currentSubscriptionCallback) { - delete self->_currentSubscriptionCallback; - } - self->_currentSubscriptionCallback = nullptr; - [self _doHandleSubscriptionError:nil]; + [self _resetSubscription]; + // Use nil reset delay so that this keeps existing backoff timing [self _doHandleSubscriptionReset:nil]; } errorHandler:nil]; } +- (void)_resetSubscription +{ + assertChipStackLockedByCurrentThread(); + os_unfair_lock_assert_owner(&_lock); + + _currentReadClient = nullptr; + if (_currentSubscriptionCallback) { + delete _currentSubscriptionCallback; + _currentSubscriptionCallback = nullptr; + } + + [self _doHandleSubscriptionError:nil]; +} + #ifdef DEBUG - (void)unitTestResetSubscription { @@ -4322,11 +4332,31 @@ - (BOOL)_deviceHasActiveSubscription - (void)_deviceMayBeReachable { - MTR_LOG("%@ _deviceMayBeReachable called", self); + MTR_LOG("%@ _deviceMayBeReachable called, resetting subscription", self); // TODO: This should only be allowed for thread devices - [_deviceController asyncDispatchToMatterQueue:^{ - [self _triggerResubscribeWithReason:@"SPI client indicated the device may now be reachable" - nodeLikelyReachable:YES]; + [self._concreteController asyncGetCommissionerOnMatterQueue:^(Controller::DeviceCommissioner * commissioner) { + // Reset all of our subscription/session state and re-establish it all + // from the start. Reset our subscription first, before tearing + // down the session, so we don't have to worry about the + // notifications from the latter coming through async and + // complicating the situation. Unfortunately, we do not want to + // hold the lock when destroying the session, just in case it still + // ends up calling into us somehow, so we have to break the work up + // into two separate locked sections... + { + std::lock_guard lock(self->_lock); + [self _clearSubscriptionPoolWork]; + [self _resetSubscription]; + } + + auto caseSessionMgr = commissioner->CASESessionMgr(); + VerifyOrDie(caseSessionMgr != nullptr); + caseSessionMgr->ReleaseSession(commissioner->GetPeerScopedId(self->_nodeID.unsignedLongLongValue)); + + std::lock_guard lock(self->_lock); + // Use _ensureSubscriptionForExistingDelegates so that the subscriptions + // will go through the pool as needed, not necessarily happen immediately. + [self _ensureSubscriptionForExistingDelegates:@"SPI client indicated the device may now be reachable"]; } errorHandler:nil]; }