Lift checking for init to ThreadAlloc

The check init code was tightly integrated into LocalAllocator. This commit pull that code out into ThreadAlloc, and passes a template parameter into the remaining LocalAllocator to perform the relevant TLS manipulations. This removes some of the awkward layering around register_clean_up.
microsoft · Feb 20, 2025 · 001d2e4 · 001d2e4
1 parent 8ee8026
commit 001d2e4
Show file tree

Hide file tree

Showing 10 changed files with 266 additions and 274 deletions.
diff --git a/src/snmalloc/backend/fixedglobalconfig.h b/src/snmalloc/backend/fixedglobalconfig.h
@@ -75,15 +75,6 @@ namespace snmalloc
       return opts;
     }();
 
-    // This needs to be a forward reference as the
-    // thread local state will need to know about this.
-    // This may allocate, so must be called once a thread
-    // local allocator exists.
-    static void register_clean_up()
-    {
-      snmalloc::register_clean_up();
-    }
-
     static void init(LocalState* local_state, void* base, size_t length)
     {
       UNUSED(local_state);

diff --git a/src/snmalloc/backend/globalconfig.h b/src/snmalloc/backend/globalconfig.h
@@ -8,9 +8,6 @@
 
 namespace snmalloc
 {
-  // Forward reference to thread local cleanup.
-  void register_clean_up();
-
   /**
    * The default configuration for a global snmalloc.  It contains all the
    * datastructures to manage the memory from the OS.  It had several internal
@@ -159,14 +156,5 @@ namespace snmalloc
     {
       return initialised;
     }
-
-    // This needs to be a forward reference as the
-    // thread local state will need to know about this.
-    // This may allocate, so should only be called once
-    // a thread local allocator is available.
-    static void register_clean_up()
-    {
-      snmalloc::register_clean_up();
-    }
   };
 } // namespace snmalloc
diff --git a/src/snmalloc/backend_helpers/commonconfig.h b/src/snmalloc/backend_helpers/commonconfig.h
@@ -4,9 +4,6 @@
 
 namespace snmalloc
 {
-  // Forward reference to thread local cleanup.
-  void register_clean_up();
-
   /**
    * Options for a specific snmalloc configuration.  Every globals object must
    * have one `constexpr` instance of this class called `Options`.  This should

diff --git a/src/snmalloc/global/globalalloc.h b/src/snmalloc/global/globalalloc.h
@@ -323,44 +323,44 @@ namespace snmalloc
   template<size_t size, ZeroMem zero_mem = NoZero, size_t align = 1>
   SNMALLOC_FAST_PATH_INLINE void* alloc()
   {
-    return ThreadAlloc::get().alloc<zero_mem>(aligned_size(align, size));
+    return ThreadAlloc::get().alloc<zero_mem, ThreadAlloc::CheckInit>(aligned_size(align, size));
   }
 
   template<ZeroMem zero_mem = NoZero, size_t align = 1>
   SNMALLOC_FAST_PATH_INLINE void* alloc(size_t size)
   {
-    return ThreadAlloc::get().alloc<zero_mem>(aligned_size(align, size));
+    return ThreadAlloc::get().alloc<zero_mem, ThreadAlloc::CheckInit>(aligned_size(align, size));
   }
 
   template<ZeroMem zero_mem = NoZero>
   SNMALLOC_FAST_PATH_INLINE void* alloc_aligned(size_t align, size_t size)
   {
-    return ThreadAlloc::get().alloc<zero_mem>(aligned_size(align, size));
+    return ThreadAlloc::get().alloc<zero_mem, ThreadAlloc::CheckInit>(aligned_size(align, size));
   }
 
   SNMALLOC_FAST_PATH_INLINE void dealloc(void* p)
   {
-    ThreadAlloc::get().dealloc(p);
+    ThreadAlloc::get().dealloc<ThreadAlloc::CheckInit>(p);
   }
 
   SNMALLOC_FAST_PATH_INLINE void dealloc(void* p, size_t size)
   {
     check_size(p, size);
-    ThreadAlloc::get().dealloc(p);
+    ThreadAlloc::get().dealloc<ThreadAlloc::CheckInit>(p);
   }
 
   template<size_t size>
   SNMALLOC_FAST_PATH_INLINE void dealloc(void* p)
   {
     check_size(p, size);
-    ThreadAlloc::get().dealloc(p);
+    ThreadAlloc::get().dealloc<ThreadAlloc::CheckInit>(p);
   }
 
   SNMALLOC_FAST_PATH_INLINE void dealloc(void* p, size_t size, size_t align)
   {
     auto rsize = aligned_size(align, size);
     check_size(p, rsize);
-    ThreadAlloc::get().dealloc(p);
+    ThreadAlloc::get().dealloc<ThreadAlloc::CheckInit>(p);
   }
 
   SNMALLOC_FAST_PATH_INLINE void debug_teardown()

diff --git a/src/snmalloc/global/threadalloc.h b/src/snmalloc/global/threadalloc.h
@@ -38,36 +38,22 @@ namespace snmalloc
    */
   class ThreadAlloc
   {
-  protected:
-    static void register_cleanup() {}
-
   public:
     static SNMALLOC_FAST_PATH Alloc& get()
     {
       return ThreadAllocExternal::get();
     }
+
+    // This will always call the success path as the client is responsible
+    // handling the initialisation.
+    using CheckInit = CheckInitDefault;
   };
 
-  /**
-   * Function passed as a template parameter to `Allocator` to allow lazy
-   * replacement.  There is nothing to initialise in this case, so we expect
-   * this to never be called.
-   */
-#  ifdef _MSC_VER
-// 32Bit Windows release MSVC is determining this as having unreachable code for
-// f(nullptr), which is true.  But other platforms don't. Disabling the warning
-// seems simplist.
-#    pragma warning(push)
-#    pragma warning(disable : 4702)
-#  endif
-  inline void register_clean_up()
-  {
-    error("Critical Error: This should never be called.");
-  }
-#  ifdef _MSC_VER
-#    pragma warning(pop)
-#  endif
 #else
+
+  class CheckInitPthread;
+  class CheckInitCXX;
+
   /**
    * Holds the thread local state for the allocator.  The state is constant
    * initialised, and has no direct dectructor.  Instead snmalloc will call
@@ -77,6 +63,14 @@ namespace snmalloc
    */
   class ThreadAlloc
   {
+    SNMALLOC_REQUIRE_CONSTINIT static inline thread_local Alloc alloc{};
+
+    // As allocation and deallocation can occur during thread teardown
+    // we need to record if we are already in that state as we will not
+    // receive another teardown call, so each operation needs to release
+    // the underlying data structures after the call.
+    static inline thread_local bool teardown_called{false};
+
   public:
     /**
      * Handle on thread local allocator
@@ -87,76 +81,160 @@ namespace snmalloc
      */
     static SNMALLOC_FAST_PATH Alloc& get()
     {
-      SNMALLOC_REQUIRE_CONSTINIT static thread_local Alloc alloc;
       return alloc;
     }
+
+    template<typename Subclass>
+    class CheckInitBase
+    {
+      template<typename Restart, typename... Args>
+      SNMALLOC_SLOW_PATH static auto check_init_slow(Restart r, Args... args)
+      {
+        bool post_teardown = teardown_called;
+
+        if constexpr (!Config::Options.LocalAllocSupportsLazyInit)
+        {
+          SNMALLOC_CHECK(
+            false &&
+            "lazy_init called on an allocator that doesn't support lazy "
+            "initialisation");
+          // Unreachable, but needed to keep the type checker happy in deducing
+          // the return type of this function.
+          return static_cast<decltype(action(args...))>(nullptr);
+        }
+        else
+        {
+          // Initialise the thread local allocator
+          if constexpr (Config::Options.CoreAllocOwnsLocalState)
+          {
+            alloc.init();
+          }
+
+          // register_clean_up must be called after init.  register clean up
+          // may be implemented with allocation, so need to ensure we have a
+          // valid allocator at this point.
+          if (!post_teardown)
+          {
+            // Must be called at least once per thread.
+            // A pthread implementation only calls the thread destruction handle
+            // if the key has been set.
+            Subclass::register_clean_up();
+
+            // Perform underlying operation
+            return r(args...);
+          }
+
+          OnDestruct od([post_teardown]() {
+#  ifdef SNMALLOC_TRACING
+            message<1024>("post_teardown flush()");
+#  endif
+            // We didn't have an allocator because the thread is being torndown.
+            // We need to return any local state, so we don't leak it.
+            alloc.teardown();
+          });
+
+          // Perform underlying operation
+          return r(args...);
+        }
+      }
+
+    public:
+      template<typename Success, typename Restart, typename... Args>
+      SNMALLOC_FAST_PATH static auto
+      check_init(Success s, Restart r, Args... args)
+      {
+        if (alloc.is_init())
+        {
+          return s();
+        }
+
+        return check_init_slow(r, args...);
+      }
+    };
+#  ifdef SNMALLOC_USE_PTHREAD_DESTRUCTORS
+    using CheckInit = CheckInitPthread;
+#  elif defined(SNMALLOC_USE_CXX_THREAD_DESTRUCTORS)
+    using CheckInit = CheckInitCXX;
+#  else
+    using CheckInit = CheckInitDefault;
+#  endif
   };
 
 #  ifdef SNMALLOC_USE_PTHREAD_DESTRUCTORS
-  /**
-   * Used to give correct signature to teardown required by pthread_key.
-   */
-  inline void pthread_cleanup(void*)
+  class CheckInitPthread : public ThreadAlloc::CheckInitBase<CheckInitPthread>
   {
-    ThreadAlloc::get().teardown();
-  }
+  private:
+    /**
+     * Used to give correct signature to teardown required by pthread_key.
+     */
+    static void pthread_cleanup(void*)
+    {
+      ThreadAlloc::get().teardown();
+    }
 
-  /**
-   * Used to give correct signature to teardown required by atexit.
-   */
-  inline void pthread_cleanup_main_thread()
-  {
-    ThreadAlloc::get().teardown();
-  }
+    /**
+     * Used to give correct signature to teardown required by atexit.
+     */
+    static void pthread_cleanup_main_thread()
+    {
+      ThreadAlloc::get().teardown();
+    }
 
-  /**
-   * Used to give correct signature to the pthread call for the Singleton class.
-   */
-  inline void pthread_create(pthread_key_t* key) noexcept
-  {
-    pthread_key_create(key, &pthread_cleanup);
-    // Main thread does not call pthread_cleanup if `main` returns or `exit` is
-    // called, so use an atexit handler to guarantee that the cleanup is run at
-    // least once.  If the main thread exits with `pthread_exit` then it will be
-    // called twice but this case is already handled because other destructors
-    // can cause the per-thread allocator to be recreated.
-    atexit(&pthread_cleanup_main_thread);
-  }
+    /**
+     * Used to give correct signature to the pthread call for the Singleton
+     * class.
+     */
+    static void pthread_create(pthread_key_t* key) noexcept
+    {
+      pthread_key_create(key, &pthread_cleanup);
+      // Main thread does not call pthread_cleanup if `main` returns or `exit`
+      // is called, so use an atexit handler to guarantee that the cleanup is
+      // run at least once.  If the main thread exits with `pthread_exit` then
+      // it will be called twice but this case is already handled because other
+      // destructors can cause the per-thread allocator to be recreated.
+      atexit(&pthread_cleanup_main_thread);
+    }
 
-  /**
-   * Performs thread local teardown for the allocator using the pthread library.
-   *
-   * This removes the dependence on the C++ runtime.
-   */
-  inline void register_clean_up()
-  {
-    Singleton<pthread_key_t, &pthread_create> p_key;
-    // We need to set a non-null value, so that the destructor is called,
-    // we never look at the value.
-    static char p_teardown_val = 1;
-    pthread_setspecific(p_key.get(), &p_teardown_val);
+  public:
+    /**
+     * Performs thread local teardown for the allocator using the pthread
+     * library.
+     *
+     * This removes the dependence on the C++ runtime.
+     */
+    static void register_clean_up()
+    {
+      Singleton<pthread_key_t, &pthread_create> p_key;
+      // We need to set a non-null value, so that the destructor is called,
+      // we never look at the value.
+      static char p_teardown_val = 1;
+      pthread_setspecific(p_key.get(), &p_teardown_val);
 #    ifdef SNMALLOC_TRACING
-    message<1024>("Using pthread clean up");
+      message<1024>("Using pthread clean up");
 #    endif
-  }
+    }
+  };
 #  elif defined(SNMALLOC_USE_CXX_THREAD_DESTRUCTORS)
-  /**
-   * This function is called by each thread once it starts using the
-   * thread local allocator.
-   *
-   * This implementation depends on nothing outside of a working C++
-   * environment and so should be the simplest for initial bringup on an
-   * unsupported platform.
-   */
-  inline void register_clean_up()
+  class CheckInitCXX : public ThreadAlloc::CheckInitBase<CheckInitCXX>
   {
-    static thread_local OnDestruct dummy(
-      []() { ThreadAlloc::get().teardown(); });
-    UNUSED(dummy);
+  public:
+    /**
+     * This function is called by each thread once it starts using the
+     * thread local allocator.
+     *
+     * This implementation depends on nothing outside of a working C++
+     * environment and so should be the simplest for initial bringup on an
+     * unsupported platform.
+     */
+    inline void register_clean_up()
+    {
+      static thread_local OnDestruct dummy([]() { ThreadAlloc::teardown(); });
+      UNUSED(dummy);
 #    ifdef SNMALLOC_TRACING
-    message<1024>("Using C++ destructor clean up");
+      message<1024>("Using C++ destructor clean up");
 #    endif
-  }
+    }
+  };
 #  endif
 #endif
 } // namespace snmalloc
@@ -171,13 +249,4 @@ inline void _malloc_thread_cleanup()
 {
   snmalloc::ThreadAlloc::get().teardown();
 }
-
-namespace snmalloc
-{
-  /**
-   * No-op version of register_clean_up.  This is called unconditionally by
-   * globalconfig but is not necessary when using a libc hook.
-   */
-  inline void register_clean_up() {}
-}
 #endif