Skip to content

Commit

Permalink
Remove pageheap_lock from ThreadCache operations.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 707072769
Change-Id: Ib5bb6aaa994b604e6f64a8f88fd0fd70a7bc7702
  • Loading branch information
ckennelly authored and copybara-github committed Dec 17, 2024
1 parent 8318ab3 commit 252891f
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 35 deletions.
3 changes: 1 addition & 2 deletions tcmalloc/global_stats.cc
Original file line number Diff line number Diff line change
Expand Up @@ -118,10 +118,10 @@ void ExtractStats(TCMallocStats* r, uint64_t* class_count,
r->span_stats = tc_globals.span_allocator().stats();
r->stack_stats = tc_globals.sampledallocation_allocator().stats();
r->linked_sample_stats = tc_globals.linked_sample_allocator().stats();
r->tc_stats = ThreadCache::GetStats(&r->thread_bytes, class_count);

{ // scope
PageHeapSpinLockHolder l;
r->tc_stats = ThreadCache::GetStats(&r->thread_bytes, class_count);
r->metadata_bytes = tc_globals.metadata_bytes();
r->pagemap_bytes = tc_globals.pagemap().bytes();
r->pageheap = tc_globals.page_allocator().stats();
Expand Down Expand Up @@ -925,7 +925,6 @@ bool GetNumericProperty(const char* name_data, size_t name_size,
}

if (name == "tcmalloc.max_total_thread_cache_bytes") {
PageHeapSpinLockHolder l;
*value = ThreadCache::overall_thread_cache_size();
return true;
}
Expand Down
3 changes: 0 additions & 3 deletions tcmalloc/parameters.cc
Original file line number Diff line number Diff line change
Expand Up @@ -542,9 +542,6 @@ void TCMalloc_Internal_SetMaxPerCpuCacheSize(int32_t v) {

void TCMalloc_Internal_SetMaxTotalThreadCacheBytes(int64_t v) {
Parameters::max_total_thread_cache_bytes_.store(v, std::memory_order_relaxed);

tcmalloc::tcmalloc_internal::AllocationGuardSpinLockHolder l(
&tcmalloc::tcmalloc_internal::pageheap_lock);
tcmalloc::tcmalloc_internal::ThreadCache::set_overall_thread_cache_size(v);
}

Expand Down
7 changes: 1 addition & 6 deletions tcmalloc/tcmalloc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -427,13 +427,8 @@ extern "C" void MallocExtension_Internal_GetProperties(
(*result)["tcmalloc.thread_cache_free"].value = stats.thread_bytes;
(*result)["tcmalloc.local_bytes"].value = LocalBytes(stats);

size_t overall_thread_cache_size;
{
PageHeapSpinLockHolder l;
overall_thread_cache_size = ThreadCache::overall_thread_cache_size();
}
(*result)["tcmalloc.max_total_thread_cache_bytes"].value =
overall_thread_cache_size;
ThreadCache::overall_thread_cache_size();

// Page Unmapped
(*result)["tcmalloc.pageheap_unmapped_bytes"].value =
Expand Down
24 changes: 17 additions & 7 deletions tcmalloc/thread_cache.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@
#include "tcmalloc/thread_cache.h"

#include <algorithm>
#include <atomic>
#include <cstddef>
#include <cstdint>

#include "absl/base/attributes.h"
#include "absl/base/const_init.h"
#include "absl/base/macros.h"
#include "absl/base/optimization.h"
#include "absl/types/span.h"
Expand All @@ -34,7 +36,8 @@ namespace tcmalloc {
namespace tcmalloc_internal {

size_t ThreadCache::per_thread_cache_size_ = kMaxThreadCacheSize;
size_t ThreadCache::overall_thread_cache_size_ = kDefaultOverallThreadCacheSize;
std::atomic<size_t> ThreadCache::overall_thread_cache_size_ =
kDefaultOverallThreadCacheSize;
int64_t ThreadCache::unclaimed_cache_space_ = kDefaultOverallThreadCacheSize;
ThreadCache* ThreadCache::thread_heaps_ = nullptr;
int ThreadCache::thread_heap_count_ = 0;
Expand All @@ -43,6 +46,8 @@ ABSL_CONST_INIT thread_local ThreadCache* ThreadCache::thread_local_data_
ABSL_ATTRIBUTE_INITIAL_EXEC = nullptr;
ABSL_CONST_INIT bool ThreadCache::tsd_inited_ = false;
pthread_key_t ThreadCache::heap_key_;
ABSL_CONST_INIT absl::base_internal::SpinLock ThreadCache::threadcache_lock_(
absl::kConstInit, absl::base_internal::SCHEDULE_KERNEL_ONLY);

ThreadCache::ThreadCache(pthread_t tid) {
size_ = 0;
Expand Down Expand Up @@ -214,7 +219,7 @@ void ThreadCache::DeallocateSlow(void* ptr, FreeList* list, size_t size_class) {
}

void ThreadCache::IncreaseCacheLimit() {
PageHeapSpinLockHolder l;
AllocationGuardSpinLockHolder l(&threadcache_lock_);
IncreaseCacheLimitLocked();
}

Expand Down Expand Up @@ -267,7 +272,7 @@ ThreadCache* ThreadCache::CreateCacheIfNecessary() {
}

{
PageHeapSpinLockHolder l;
AllocationGuardSpinLockHolder l(&threadcache_lock_);
const pthread_t me = pthread_self();

// This may be a recursive malloc call from pthread_setspecific()
Expand Down Expand Up @@ -356,7 +361,7 @@ void ThreadCache::DeleteCache(ThreadCache* heap) {

// Remove from linked list
{
PageHeapSpinLockHolder l;
AllocationGuardSpinLockHolder l(&threadcache_lock_);
if (heap->next_ != nullptr) heap->next_->prev_ = heap->prev_;
if (heap->prev_ != nullptr) heap->prev_->next_ = heap->next_;
if (thread_heaps_ == heap) thread_heaps_ = heap->next_;
Expand All @@ -373,7 +378,7 @@ void ThreadCache::DeleteCache(ThreadCache* heap) {
void ThreadCache::RecomputePerThreadCacheSize() {
// Divide available space across threads
int n = thread_heap_count_ > 0 ? thread_heap_count_ : 1;
size_t space = overall_thread_cache_size_ / n;
size_t space = overall_thread_cache_size_.load(std::memory_order_relaxed) / n;

// Limit to allowed range
if (space < kMinThreadCacheSize) space = kMinThreadCacheSize;
Expand All @@ -389,12 +394,15 @@ void ThreadCache::RecomputePerThreadCacheSize() {
}
claimed += h->max_size_;
}
unclaimed_cache_space_ = overall_thread_cache_size_ - claimed;
unclaimed_cache_space_ =
overall_thread_cache_size_.load(std::memory_order_relaxed) - claimed;
per_thread_cache_size_ = space;
}

AllocatorStats ThreadCache::GetStats(uint64_t* total_bytes,
uint64_t* class_count) {
AllocationGuardSpinLockHolder l(&threadcache_lock_);

for (ThreadCache* h = thread_heaps_; h != nullptr; h = h->next_) {
*total_bytes += h->size_;
if (class_count) {
Expand All @@ -409,7 +417,9 @@ AllocatorStats ThreadCache::GetStats(uint64_t* total_bytes,
void ThreadCache::set_overall_thread_cache_size(size_t new_size) {
// Clip the value to a reasonable minimum
if (new_size < kMinThreadCacheSize) new_size = kMinThreadCacheSize;
overall_thread_cache_size_ = new_size;

AllocationGuardSpinLockHolder l(&threadcache_lock_);
overall_thread_cache_size_.store(new_size, std::memory_order_relaxed);

RecomputePerThreadCacheSize();
}
Expand Down
38 changes: 21 additions & 17 deletions tcmalloc/thread_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ namespace tcmalloc_internal {
class ABSL_CACHELINE_ALIGNED ThreadCache {
public:
explicit ThreadCache(pthread_t tid)
ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
ABSL_EXCLUSIVE_LOCKS_REQUIRED(threadcache_lock_);

void Cleanup();

// Allocate an object of the given size class.
Expand All @@ -59,16 +60,16 @@ class ABSL_CACHELINE_ALIGNED ThreadCache {
// and this function will increment each element of class_count by the number
// of items in all thread-local freelists of the corresponding size class.
static AllocatorStats GetStats(uint64_t* total_bytes, uint64_t* class_count)
ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
ABSL_LOCKS_EXCLUDED(threadcache_lock_);

// Sets the total thread cache size to new_size, recomputing the
// individual thread cache sizes as necessary.
static void set_overall_thread_cache_size(size_t new_size)
ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
ABSL_LOCKS_EXCLUDED(threadcache_lock_);

static size_t overall_thread_cache_size()
ABSL_SHARED_LOCKS_REQUIRED(pageheap_lock) {
return overall_thread_cache_size_;
ABSL_LOCKS_EXCLUDED(threadcache_lock_) {
return overall_thread_cache_size_.load(std::memory_order_relaxed);
}

private:
Expand Down Expand Up @@ -140,8 +141,11 @@ class ABSL_CACHELINE_ALIGNED ThreadCache {
// the delta is kStealAmount.
void IncreaseCacheLimit();

// Same as above but called with pageheap_lock held.
void IncreaseCacheLimitLocked() ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
static absl::base_internal::SpinLock threadcache_lock_;

// Same as above but called with threadcache_lock_ held.
void IncreaseCacheLimitLocked()
ABSL_EXCLUSIVE_LOCKS_REQUIRED(threadcache_lock_);

void Scavenge();
static ThreadCache* CreateCacheIfNecessary();
Expand Down Expand Up @@ -171,24 +175,24 @@ class ABSL_CACHELINE_ALIGNED ThreadCache {
static pthread_key_t heap_key_;

// Linked list of heap objects.
static ThreadCache* thread_heaps_ ABSL_GUARDED_BY(pageheap_lock);
static int thread_heap_count_ ABSL_GUARDED_BY(pageheap_lock);
static ThreadCache* thread_heaps_ ABSL_GUARDED_BY(threadcache_lock_);
static int thread_heap_count_ ABSL_GUARDED_BY(threadcache_lock_);

// A pointer to one of the objects in thread_heaps_. Represents
// the next ThreadCache from which a thread over its max_size_ should
// steal memory limit. Round-robin through all of the objects in
// thread_heaps_.
static ThreadCache* next_memory_steal_ ABSL_GUARDED_BY(pageheap_lock);
static ThreadCache* next_memory_steal_ ABSL_GUARDED_BY(threadcache_lock_);

// Overall thread cache size.
static size_t overall_thread_cache_size_ ABSL_GUARDED_BY(pageheap_lock);
static std::atomic<size_t> overall_thread_cache_size_;

// Global per-thread cache size.
static size_t per_thread_cache_size_ ABSL_GUARDED_BY(pageheap_lock);
static size_t per_thread_cache_size_ ABSL_GUARDED_BY(threadcache_lock_);

// Represents overall_thread_cache_size_ minus the sum of max_size_
// across all ThreadCaches.
static int64_t unclaimed_cache_space_ ABSL_GUARDED_BY(pageheap_lock);
static int64_t unclaimed_cache_space_ ABSL_GUARDED_BY(threadcache_lock_);

// This class is laid out with the most frequently used fields
// first so that hot elements are placed on the same cache line.
Expand All @@ -203,18 +207,18 @@ class ABSL_CACHELINE_ALIGNED ThreadCache {

// Allocate a new heap.
static ThreadCache* NewHeap(pthread_t tid)
ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
ABSL_EXCLUSIVE_LOCKS_REQUIRED(threadcache_lock_);

// Use only as pthread thread-specific destructor function.
static void DestroyThreadCache(void* ptr);

static void DeleteCache(ThreadCache* heap);
static void RecomputePerThreadCacheSize()
ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
ABSL_EXCLUSIVE_LOCKS_REQUIRED(threadcache_lock_);

// All ThreadCache objects are kept in a linked list (for stats collection)
ThreadCache* next_ ABSL_GUARDED_BY(pageheap_lock);
ThreadCache* prev_ ABSL_GUARDED_BY(pageheap_lock);
ThreadCache* next_ ABSL_GUARDED_BY(threadcache_lock_);
ThreadCache* prev_ ABSL_GUARDED_BY(threadcache_lock_);
};

inline ABSL_ATTRIBUTE_ALWAYS_INLINE void* ThreadCache::Allocate(
Expand Down

0 comments on commit 252891f

Please sign in to comment.