Skip to content

Commit

Permalink
Lift checking for init to ThreadAlloc
Browse files Browse the repository at this point in the history
The check init code was tightly integrated into LocalAllocator.  This commit pull that code out into ThreadAlloc, and passes a template parameter into the remaining LocalAllocator to perform the relevant TLS manipulations.  This removes some of the awkward layering around register_clean_up.
  • Loading branch information
mjp41 committed Feb 20, 2025
1 parent 8ee8026 commit 001d2e4
Show file tree
Hide file tree
Showing 10 changed files with 266 additions and 274 deletions.
9 changes: 0 additions & 9 deletions src/snmalloc/backend/fixedglobalconfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,15 +75,6 @@ namespace snmalloc
return opts;
}();

// This needs to be a forward reference as the
// thread local state will need to know about this.
// This may allocate, so must be called once a thread
// local allocator exists.
static void register_clean_up()
{
snmalloc::register_clean_up();
}

static void init(LocalState* local_state, void* base, size_t length)
{
UNUSED(local_state);
Expand Down
12 changes: 0 additions & 12 deletions src/snmalloc/backend/globalconfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@

namespace snmalloc
{
// Forward reference to thread local cleanup.
void register_clean_up();

/**
* The default configuration for a global snmalloc. It contains all the
* datastructures to manage the memory from the OS. It had several internal
Expand Down Expand Up @@ -159,14 +156,5 @@ namespace snmalloc
{
return initialised;
}

// This needs to be a forward reference as the
// thread local state will need to know about this.
// This may allocate, so should only be called once
// a thread local allocator is available.
static void register_clean_up()
{
snmalloc::register_clean_up();
}
};
} // namespace snmalloc
3 changes: 0 additions & 3 deletions src/snmalloc/backend_helpers/commonconfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,6 @@

namespace snmalloc
{
// Forward reference to thread local cleanup.
void register_clean_up();

/**
* Options for a specific snmalloc configuration. Every globals object must
* have one `constexpr` instance of this class called `Options`. This should
Expand Down
14 changes: 7 additions & 7 deletions src/snmalloc/global/globalalloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -323,44 +323,44 @@ namespace snmalloc
template<size_t size, ZeroMem zero_mem = NoZero, size_t align = 1>
SNMALLOC_FAST_PATH_INLINE void* alloc()
{
return ThreadAlloc::get().alloc<zero_mem>(aligned_size(align, size));
return ThreadAlloc::get().alloc<zero_mem, ThreadAlloc::CheckInit>(aligned_size(align, size));
}

template<ZeroMem zero_mem = NoZero, size_t align = 1>
SNMALLOC_FAST_PATH_INLINE void* alloc(size_t size)
{
return ThreadAlloc::get().alloc<zero_mem>(aligned_size(align, size));
return ThreadAlloc::get().alloc<zero_mem, ThreadAlloc::CheckInit>(aligned_size(align, size));
}

template<ZeroMem zero_mem = NoZero>
SNMALLOC_FAST_PATH_INLINE void* alloc_aligned(size_t align, size_t size)
{
return ThreadAlloc::get().alloc<zero_mem>(aligned_size(align, size));
return ThreadAlloc::get().alloc<zero_mem, ThreadAlloc::CheckInit>(aligned_size(align, size));
}

SNMALLOC_FAST_PATH_INLINE void dealloc(void* p)
{
ThreadAlloc::get().dealloc(p);
ThreadAlloc::get().dealloc<ThreadAlloc::CheckInit>(p);
}

SNMALLOC_FAST_PATH_INLINE void dealloc(void* p, size_t size)
{
check_size(p, size);
ThreadAlloc::get().dealloc(p);
ThreadAlloc::get().dealloc<ThreadAlloc::CheckInit>(p);
}

template<size_t size>
SNMALLOC_FAST_PATH_INLINE void dealloc(void* p)
{
check_size(p, size);
ThreadAlloc::get().dealloc(p);
ThreadAlloc::get().dealloc<ThreadAlloc::CheckInit>(p);
}

SNMALLOC_FAST_PATH_INLINE void dealloc(void* p, size_t size, size_t align)
{
auto rsize = aligned_size(align, size);
check_size(p, rsize);
ThreadAlloc::get().dealloc(p);
ThreadAlloc::get().dealloc<ThreadAlloc::CheckInit>(p);
}

SNMALLOC_FAST_PATH_INLINE void debug_teardown()
Expand Down
241 changes: 155 additions & 86 deletions src/snmalloc/global/threadalloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,36 +38,22 @@ namespace snmalloc
*/
class ThreadAlloc
{
protected:
static void register_cleanup() {}

public:
static SNMALLOC_FAST_PATH Alloc& get()
{
return ThreadAllocExternal::get();
}

// This will always call the success path as the client is responsible
// handling the initialisation.
using CheckInit = CheckInitDefault;
};

/**
* Function passed as a template parameter to `Allocator` to allow lazy
* replacement. There is nothing to initialise in this case, so we expect
* this to never be called.
*/
# ifdef _MSC_VER
// 32Bit Windows release MSVC is determining this as having unreachable code for
// f(nullptr), which is true. But other platforms don't. Disabling the warning
// seems simplist.
# pragma warning(push)
# pragma warning(disable : 4702)
# endif
inline void register_clean_up()
{
error("Critical Error: This should never be called.");
}
# ifdef _MSC_VER
# pragma warning(pop)
# endif
#else

class CheckInitPthread;
class CheckInitCXX;

/**
* Holds the thread local state for the allocator. The state is constant
* initialised, and has no direct dectructor. Instead snmalloc will call
Expand All @@ -77,6 +63,14 @@ namespace snmalloc
*/
class ThreadAlloc
{
SNMALLOC_REQUIRE_CONSTINIT static inline thread_local Alloc alloc{};

// As allocation and deallocation can occur during thread teardown
// we need to record if we are already in that state as we will not
// receive another teardown call, so each operation needs to release
// the underlying data structures after the call.
static inline thread_local bool teardown_called{false};

public:
/**
* Handle on thread local allocator
Expand All @@ -87,76 +81,160 @@ namespace snmalloc
*/
static SNMALLOC_FAST_PATH Alloc& get()
{
SNMALLOC_REQUIRE_CONSTINIT static thread_local Alloc alloc;
return alloc;
}

template<typename Subclass>
class CheckInitBase
{
template<typename Restart, typename... Args>
SNMALLOC_SLOW_PATH static auto check_init_slow(Restart r, Args... args)
{
bool post_teardown = teardown_called;

if constexpr (!Config::Options.LocalAllocSupportsLazyInit)
{
SNMALLOC_CHECK(
false &&
"lazy_init called on an allocator that doesn't support lazy "
"initialisation");
// Unreachable, but needed to keep the type checker happy in deducing
// the return type of this function.
return static_cast<decltype(action(args...))>(nullptr);
}
else
{
// Initialise the thread local allocator
if constexpr (Config::Options.CoreAllocOwnsLocalState)
{
alloc.init();
}

// register_clean_up must be called after init. register clean up
// may be implemented with allocation, so need to ensure we have a
// valid allocator at this point.
if (!post_teardown)
{
// Must be called at least once per thread.
// A pthread implementation only calls the thread destruction handle
// if the key has been set.
Subclass::register_clean_up();

// Perform underlying operation
return r(args...);
}

OnDestruct od([post_teardown]() {
# ifdef SNMALLOC_TRACING
message<1024>("post_teardown flush()");
# endif
// We didn't have an allocator because the thread is being torndown.
// We need to return any local state, so we don't leak it.
alloc.teardown();
});

// Perform underlying operation
return r(args...);
}
}

public:
template<typename Success, typename Restart, typename... Args>
SNMALLOC_FAST_PATH static auto
check_init(Success s, Restart r, Args... args)
{
if (alloc.is_init())
{
return s();
}

return check_init_slow(r, args...);
}
};
# ifdef SNMALLOC_USE_PTHREAD_DESTRUCTORS
using CheckInit = CheckInitPthread;
# elif defined(SNMALLOC_USE_CXX_THREAD_DESTRUCTORS)
using CheckInit = CheckInitCXX;
# else
using CheckInit = CheckInitDefault;
# endif
};

# ifdef SNMALLOC_USE_PTHREAD_DESTRUCTORS
/**
* Used to give correct signature to teardown required by pthread_key.
*/
inline void pthread_cleanup(void*)
class CheckInitPthread : public ThreadAlloc::CheckInitBase<CheckInitPthread>
{
ThreadAlloc::get().teardown();
}
private:
/**
* Used to give correct signature to teardown required by pthread_key.
*/
static void pthread_cleanup(void*)
{
ThreadAlloc::get().teardown();
}

/**
* Used to give correct signature to teardown required by atexit.
*/
inline void pthread_cleanup_main_thread()
{
ThreadAlloc::get().teardown();
}
/**
* Used to give correct signature to teardown required by atexit.
*/
static void pthread_cleanup_main_thread()
{
ThreadAlloc::get().teardown();
}

/**
* Used to give correct signature to the pthread call for the Singleton class.
*/
inline void pthread_create(pthread_key_t* key) noexcept
{
pthread_key_create(key, &pthread_cleanup);
// Main thread does not call pthread_cleanup if `main` returns or `exit` is
// called, so use an atexit handler to guarantee that the cleanup is run at
// least once. If the main thread exits with `pthread_exit` then it will be
// called twice but this case is already handled because other destructors
// can cause the per-thread allocator to be recreated.
atexit(&pthread_cleanup_main_thread);
}
/**
* Used to give correct signature to the pthread call for the Singleton
* class.
*/
static void pthread_create(pthread_key_t* key) noexcept
{
pthread_key_create(key, &pthread_cleanup);
// Main thread does not call pthread_cleanup if `main` returns or `exit`
// is called, so use an atexit handler to guarantee that the cleanup is
// run at least once. If the main thread exits with `pthread_exit` then
// it will be called twice but this case is already handled because other
// destructors can cause the per-thread allocator to be recreated.
atexit(&pthread_cleanup_main_thread);
}

/**
* Performs thread local teardown for the allocator using the pthread library.
*
* This removes the dependence on the C++ runtime.
*/
inline void register_clean_up()
{
Singleton<pthread_key_t, &pthread_create> p_key;
// We need to set a non-null value, so that the destructor is called,
// we never look at the value.
static char p_teardown_val = 1;
pthread_setspecific(p_key.get(), &p_teardown_val);
public:
/**
* Performs thread local teardown for the allocator using the pthread
* library.
*
* This removes the dependence on the C++ runtime.
*/
static void register_clean_up()
{
Singleton<pthread_key_t, &pthread_create> p_key;
// We need to set a non-null value, so that the destructor is called,
// we never look at the value.
static char p_teardown_val = 1;
pthread_setspecific(p_key.get(), &p_teardown_val);
# ifdef SNMALLOC_TRACING
message<1024>("Using pthread clean up");
message<1024>("Using pthread clean up");
# endif
}
}
};
# elif defined(SNMALLOC_USE_CXX_THREAD_DESTRUCTORS)
/**
* This function is called by each thread once it starts using the
* thread local allocator.
*
* This implementation depends on nothing outside of a working C++
* environment and so should be the simplest for initial bringup on an
* unsupported platform.
*/
inline void register_clean_up()
class CheckInitCXX : public ThreadAlloc::CheckInitBase<CheckInitCXX>
{
static thread_local OnDestruct dummy(
[]() { ThreadAlloc::get().teardown(); });
UNUSED(dummy);
public:
/**
* This function is called by each thread once it starts using the
* thread local allocator.
*
* This implementation depends on nothing outside of a working C++
* environment and so should be the simplest for initial bringup on an
* unsupported platform.
*/
inline void register_clean_up()
{
static thread_local OnDestruct dummy([]() { ThreadAlloc::teardown(); });
UNUSED(dummy);
# ifdef SNMALLOC_TRACING
message<1024>("Using C++ destructor clean up");
message<1024>("Using C++ destructor clean up");
# endif
}
}
};
# endif
#endif
} // namespace snmalloc
Expand All @@ -171,13 +249,4 @@ inline void _malloc_thread_cleanup()
{
snmalloc::ThreadAlloc::get().teardown();
}

namespace snmalloc
{
/**
* No-op version of register_clean_up. This is called unconditionally by
* globalconfig but is not necessary when using a libc hook.
*/
inline void register_clean_up() {}
}
#endif
Loading

0 comments on commit 001d2e4

Please sign in to comment.