@@ -174,7 +174,7 @@ class gil_safe_call_once_and_store {
174174 template <typename Callable>
175175 gil_safe_call_once_and_store &call_once_and_store_result (Callable &&fn,
176176 void (*finalize_fn)(T &) = nullptr) {
177- if (!is_last_storage_tls_valid ()) {
177+ if (!is_last_storage_valid ()) {
178178 // Multiple threads may enter here, because the GIL is released in the next line and
179179 // CPython API calls in the `fn()` call below may release and reacquire the GIL.
180180 gil_scoped_release gil_rel; // Needed to establish lock ordering.
@@ -201,25 +201,24 @@ class gil_safe_call_once_and_store {
201201 ::new (value->storage ) T (fn ());
202202 value->finalize = finalize_fn;
203203 value->is_initialized = true ;
204+ last_storage_ptr_ = reinterpret_cast <T *>(value->storage );
204205 is_initialized_by_at_least_one_interpreter_ = true ;
205206 });
206207 // All threads will observe `is_initialized_by_at_least_one_interpreter_` as true here.
207- update_storage_tls_cache (reinterpret_cast <T *>(value->storage ));
208208 }
209209 // Intentionally not returning `T &` to ensure the calling code is self-documenting.
210210 return *this ;
211211 }
212212
213213 // This must only be called after `call_once_and_store_result()` was called.
214214 T &get_stored () {
215- T *result = get_storage_tls_cache () ;
216- if (!is_last_storage_tls_valid ()) {
215+ T *result = last_storage_ptr_ ;
216+ if (!is_last_storage_valid ()) {
217217 gil_scoped_acquire gil_acq;
218218 const void *const key = reinterpret_cast <const void *>(this );
219219 auto &storage_map = *get_or_create_call_once_storage_map ();
220220 auto *value = static_cast <call_once_storage<T> *>(storage_map.at (key));
221- result = reinterpret_cast <T *>(value->storage );
222- update_storage_tls_cache (result);
221+ result = last_storage_ptr_ = reinterpret_cast <T *>(value->storage );
223222 }
224223 assert (result != nullptr );
225224 return *result;
@@ -232,55 +231,9 @@ class gil_safe_call_once_and_store {
232231 PYBIND11_DTOR_CONSTEXPR ~gil_safe_call_once_and_store () = default ;
233232
234233private:
235- // Fast local cache to avoid repeated lookups when the interpreter has not changed on the
236- // current thread. Similar to `internals_pp_manager::{internals_p_tls,last_istate_tls}`.
237- static T *&last_storage_ptr_tls () {
238- static thread_local T *last_storage_ptr = nullptr ;
239- return last_storage_ptr;
240- }
241-
242- static PyInterpreterState *&last_istate_tls () {
243- static thread_local PyInterpreterState *last_istate = nullptr ;
244- return last_istate;
245- }
246-
247- // See also: internals_pp_manager::get_pp()
248- T *get_storage_tls_cache () const {
249- // The caller should be aware that the cached pointer may be invalid.
250- // It can only be used after checking `is_last_storage_tls_valid()`.
251- if (detail::get_num_interpreters_seen () > 1 ) {
252- return last_storage_ptr_tls ();
253- }
254- return last_storage_ptr_singleton_;
255- }
256-
257- void update_storage_tls_cache (T *ptr) {
258- gil_scoped_acquire_simple gil;
259- if (detail::get_num_interpreters_seen () > 1 ) {
260- auto *tstate = detail::get_thread_state_unchecked ();
261- if (tstate) {
262- last_istate_tls () = tstate->interp ;
263- }
264- last_storage_ptr_tls () = ptr;
265- } else {
266- last_storage_ptr_singleton_ = ptr;
267- }
268- }
269-
270- bool is_last_storage_tls_valid () const {
271- if (!is_initialized_by_at_least_one_interpreter_) {
272- return false ;
273- }
274- if (detail::get_num_interpreters_seen () > 1 ) {
275- // Whenever the interpreter changes on the current thread we need to invalidate the
276- // cached storage pointer so that it can be pulled from the interpreter's state dict.
277- auto *tstate = detail::get_thread_state_unchecked ();
278- if (!tstate || tstate->interp != last_istate_tls ()) {
279- return false ;
280- }
281- return last_storage_ptr_tls () != nullptr ;
282- }
283- return last_storage_ptr_singleton_ != nullptr ;
234+ bool is_last_storage_valid () const {
235+ return is_initialized_by_at_least_one_interpreter_
236+ && detail::get_num_interpreters_seen () == 1 ;
284237 }
285238
286239 static call_once_storage_map_type *get_or_create_call_once_storage_map () {
@@ -327,9 +280,10 @@ class gil_safe_call_once_and_store {
327280
328281 // Fast local cache to avoid repeated lookups when there are no multiple interpreters.
329282 // This is only valid if there is a single interpreter. Otherwise, it is not used.
330- // This is separate from the thread-local cache above and maybe not initialized by the main
331- // interpreter.
332- T *last_storage_ptr_singleton_ = nullptr ;
283+ // WARNING: We cannot use thread local cache similar to `internals_pp_manager::internals_p_tls`
284+ // because the thread local storage cannot be explicitly invalidated when interpreters
285+ // are destroyed (unlike `internals_pp_manager` which has explicit hooks for that).
286+ T *last_storage_ptr_ = nullptr ;
333287 // This flag is true if the value has been initialized by any interpreter (may not be the
334288 // current one).
335289 detail::atomic_bool is_initialized_by_at_least_one_interpreter_{false };
0 commit comments