Skip to content
124 changes: 88 additions & 36 deletions system/lib/emmalloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@
#include <emscripten/heap.h>
#include <emscripten/threading.h>

void *sbrk64(int64_t numBytes);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lets call this _sbrk64 to avoid polluting the global namespace.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it will be important for end users to be able to call this function as well. I am already thinking that at Unity we'll patch bdwgc to call to this fixed sbrk64() function.


#ifdef __EMSCRIPTEN_TRACING__
#include <emscripten/trace.h>
#endif
Expand Down Expand Up @@ -137,6 +139,7 @@ static volatile uint8_t multithreadingLock = 0;

#define IS_POWER_OF_2(val) (((val) & ((val)-1)) == 0)
#define ALIGN_UP(ptr, alignment) ((uint8_t*)((((uintptr_t)(ptr)) + ((alignment)-1)) & ~((alignment)-1)))
#define ALIGN_DOWN(ptr, alignment) ((uint8_t*)(((uintptr_t)(ptr)) & ~((alignment)-1)))
#define HAS_ALIGNMENT(ptr, alignment) ((((uintptr_t)(ptr)) & ((alignment)-1)) == 0)

static_assert(IS_POWER_OF_2(MALLOC_ALIGNMENT), "MALLOC_ALIGNMENT must be a power of two value!");
Expand Down Expand Up @@ -467,7 +470,8 @@ static bool claim_more_memory(size_t numBytes) {
numBytes = (size_t)ALIGN_UP(numBytes, MALLOC_ALIGNMENT);

// Claim memory via sbrk
uint8_t *startPtr = (uint8_t*)sbrk(numBytes);
assert((int64_t)numBytes >= 0);
uint8_t *startPtr = (uint8_t*)sbrk64((int64_t)numBytes);
if ((intptr_t)startPtr == -1) {
#ifdef EMMALLOC_VERBOSE
MAIN_THREAD_ASYNC_EM_ASM(err('claim_more_memory: sbrk failed!'));
Expand All @@ -483,6 +487,9 @@ static bool claim_more_memory(size_t numBytes) {
// Create a sentinel region at the end of the new heap block
Region *endSentinelRegion = (Region*)(endPtr - sizeof(Region));
create_used_region(endSentinelRegion, sizeof(Region));
#ifdef EMMALLOC_VERBOSE
MAIN_THREAD_ASYNC_EM_ASM(out('claim_more_memory: created a sentinel memory region at address ' + ptrToString($0)), endSentinelRegion);
#endif

// If we are the sole user of sbrk(), it will feed us continuous/consecutive memory addresses - take advantage
// of that if so: instead of creating two disjoint memory regions blocks, expand the previous one to a larger size.
Expand Down Expand Up @@ -1165,7 +1172,7 @@ struct mallinfo emmalloc_mallinfo() {
struct mallinfo info;
// Non-mmapped space allocated (bytes): For emmalloc,
// let's define this as the difference between heap size and dynamic top end.
info.arena = emscripten_get_heap_size() - (size_t)sbrk(0);
info.arena = emscripten_get_heap_size() - (size_t)sbrk64(0);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like this line can be reverted?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The idea is for emmalloc to only refer to this new sbrk64(). This way if user code only uses emmalloc and nothing else (doesn't sbrk() manually), then emmalloc will not pull in the flawed middle-man sbrk() function at all, but just the sbrk64() variant.

So that is why I changed emmalloc to only call to the "fixed" sbrk64().

// Number of "ordinary" blocks. Let's define this as the number of highest
// size blocks. (subtract one from each, since there is a sentinel node in each list)
info.ordblks = count_linked_list_size(&freeRegionBuckets[NUM_FREE_BUCKETS-1])-1;
Expand Down Expand Up @@ -1227,73 +1234,118 @@ struct mallinfo emmalloc_mallinfo() {
}
EMMALLOC_ALIAS(mallinfo, emmalloc_mallinfo);

#if 0
// Note! This function is not fully multithreading safe: while this function is running, other threads should not be
// allowed to call sbrk()!
static int trim_dynamic_heap_reservation(size_t pad) {
ASSERT_MALLOC_IS_ACQUIRED();

if (!listOfAllRegions) {
#ifdef EMMALLOC_VERBOSE
MAIN_THREAD_ASYNC_EM_ASM(out('emmalloc_trim(): cannot trim memory, emmalloc is currently not initialized to manage any dynamic memory at all.'));
#endif
return 0; // emmalloc is not controlling any dynamic memory at all - cannot release memory.
}
uint8_t *previousSbrkEndAddress = listOfAllRegions->endPtr;
assert(sbrk(0) == previousSbrkEndAddress);
void *sbrk_addr = sbrk64(0);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like this can just be sbrk(0). Hopefully in the longer run we can remove sbrk64 so the fewer references to it the better.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the opposite direction: sbrk() is flawed, so we must migrate to using sbrk64() even in 4GB builds.

(2GB builds don't care, since there the problem doesn't exist)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But we agree that even if sbrk() is limited in how much it can allocate we need to fix our current sbrk() so that it can shrink memory, right? That is what #25138 is for I guess?

So is the idea that rather than updating all our malloc implementations to chop up sbrk() calls into in 2gb chunks, we would instead move them all over to using sbrk64?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should land this change using _sbrk64() and then we can bikeshed the final solution we want in #25138.

Once we make the new function public we can declare it in a header and remove the underscore prefix.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But we agree that even if sbrk() is limited in how much it can allocate we need to fix our current sbrk() so that it can shrink memory, right? That is what #25138 is for I guess?

Yeah, that's right. 25138 means that interpretation of existing 32-bit sbrk() must be turned back to signed again.

So is the idea that rather than updating all our malloc implementations to chop up sbrk() calls into in 2gb chunks, we would instead move them all over to using sbrk64?

That's true. At first I was thinking that callers can just chop their allocs to 2gb pieces, but the issue there is that then there won't be a guarantee that such sbrk() allocs are contiguous.

For example in a multithreaded program with bdwgc, different threads can race to sbrk() for native malloc(), and bdwgc races to sbrk() for itself. So if it was patched to alloc in 2gb slices, another thread could come in between, and the result for the chopped alloc would result in noncontinuous >2GB memory area being received, which would be catastrophic.

So best to just provide a int64_t utilizing sbrk64() api instead.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should land this change using _sbrk64() and then we can bikeshed the final solution we want in #25138.

Once we make the new function public we can declare it in a header and remove the underscore prefix.

Ok, updated.

#ifdef EMMALLOC_VERBOSE
MAIN_THREAD_ASYNC_EM_ASM(out('emmalloc_trim(): sbrk64(0) = ' + ptrToString($0) + ', previousSbrkEndAddress = ' + ptrToString($1)), sbrk_addr, previousSbrkEndAddress);
#endif
assert(sbrk_addr == previousSbrkEndAddress);
size_t lastMemoryRegionSize = ((size_t*)previousSbrkEndAddress)[-1];
assert(lastMemoryRegionSize == 16); // // The last memory region should be a sentinel node of exactly 16 bytes in size.
Region *endSentinelRegion = (Region*)(previousSbrkEndAddress - sizeof(Region));
Region *endSentinelRegion = (Region*)(previousSbrkEndAddress - lastMemoryRegionSize);
Region *lastActualRegion = prev_region(endSentinelRegion);

// Round padding up to multiple of 4 bytes to keep sbrk() and memory region alignment intact.
// Also have at least 8 bytes of payload so that we can form a full free region.
size_t newRegionSize = (size_t)ALIGN_UP(pad, 4);
if (pad > 0) {
newRegionSize += sizeof(Region) - (newRegionSize - pad);
if (!region_is_free(lastActualRegion)) {
#ifdef EMMALLOC_VERBOSE
MAIN_THREAD_ASYNC_EM_ASM(out('emmalloc_trim(): Last actual region ' + ptrToString($0) + ' is in use, there is nothing to trim from.'), lastActualRegion);
#endif
return 0;
}

if (!region_is_free(lastActualRegion) || lastActualRegion->size <= newRegionSize) {
return 0; // Last actual region is in use, or caller desired to leave more free memory intact than there is.
// Sanitize odd alignments for padding values - this is the minimum alignment
// that emmalloc could handle. Align up to be conservative towards caller.
pad = (size_t)ALIGN_UP(pad, 4);

// Calculate how many bytes we can shrink the sbrk() reservation by.
// Is the last free region smaller than what was requested to be left behind?
// If so, then there is nothing we can trim.
if (pad >= lastActualRegion->size) {
#ifdef EMMALLOC_VERBOSE
MAIN_THREAD_ASYNC_EM_ASM(out('emmalloc_trim(): Last actual region does not have enough space to leave ' + Number($0) + ' bytes of free memory in it.'), pad);
#endif
return 0;
}

// This many bytes will be shrunk away.
size_t shrinkAmount = lastActualRegion->size - newRegionSize;
assert(HAS_ALIGNMENT(shrinkAmount, 4));
// Subtract region size members off to calculate the excess bytes in payload.
size_t shrinkAmount = lastActualRegion->size - pad - 2*sizeof(size_t);
// sbrk() alignment is multiple of __alignof__(max_align_t), so round the
// trimming down to ensure that alignment is preserved.
#ifdef EMMALLOC_VERBOSE
MAIN_THREAD_ASYNC_EM_ASM(out('emmalloc_trim(): shrinkAmount ' + Number($0) + '.'), shrinkAmount);
#endif
shrinkAmount = (size_t)ALIGN_DOWN(shrinkAmount, __alignof__(max_align_t));
#ifdef EMMALLOC_VERBOSE
MAIN_THREAD_ASYNC_EM_ASM(out('emmalloc_trim(): shrinkAmount2 ' + Number($0) + '.'), shrinkAmount);
#endif
// Nothing left to trim?
if (!shrinkAmount) {
#ifdef EMMALLOC_VERBOSE
MAIN_THREAD_ASYNC_EM_ASM(out('emmalloc_trim(): Aligning for sbrk() requirements removed opportunity to trim.'));
#endif
return 0;
}

unlink_from_free_list(lastActualRegion);
// If pad == 0, we should delete the last free region altogether. If pad > 0,
// shrink the last free region to the desired size.
if (newRegionSize > 0) {

size_t newRegionSize = lastActualRegion->size - shrinkAmount;

#ifdef EMMALLOC_VERBOSE
MAIN_THREAD_ASYNC_EM_ASM(out('emmalloc_trim(): Shrinking ' + Number($0) + ' bytes off the free heap end region. New free heap end region size: ' + Number($1) + ' bytes.'), shrinkAmount, newRegionSize);
#endif
// If we can't fit a free Region in the shrunk space, we should delete the
// the last free region altogether.
if (newRegionSize >= sizeof(Region)) {
create_free_region(lastActualRegion, newRegionSize);
link_to_free_list(lastActualRegion);
#ifdef EMMALLOC_VERBOSE
MAIN_THREAD_ASYNC_EM_ASM(out('emmalloc_trim(): Created new free heap end region at ' + ptrToString($0) + '. Size: ' + Number($1) + ' bytes.'), lastActualRegion, newRegionSize);
#endif
} else {
#ifdef EMMALLOC_VERBOSE
MAIN_THREAD_ASYNC_EM_ASM(out('emmalloc_trim(): Not enough room to fit a free heap end region. Discarding it altogether.'));
#endif
newRegionSize = 0;
}

// Recreate the sentinel region at the end of the last free region
endSentinelRegion = (Region*)((uint8_t*)lastActualRegion + newRegionSize);
create_used_region(endSentinelRegion, sizeof(Region));
// Call sbrk() to shrink the memory area.
void *oldSbrk = sbrk64(-(int64_t)shrinkAmount);
assert((intptr_t)oldSbrk != -1); // Shrinking with sbrk() should never fail.

// And update the size field of the whole region block.
listOfAllRegions->endPtr = (uint8_t*)endSentinelRegion + sizeof(Region);
// Ask where sbrk() got us at.
void *sbrkNow = sbrk64(0);

// Finally call sbrk() to shrink the memory area.
void *oldSbrk = sbrk(-(intptr_t)shrinkAmount);
assert((intptr_t)oldSbrk != -1); // Shrinking with sbrk() should never fail.
assert(oldSbrk == previousSbrkEndAddress); // Another thread should not have raced to increase sbrk() on us!
// Recreate the sentinel region at the end of the last free region.
Region *newEndSentinelRegion = (Region*)((uint8_t*)lastActualRegion + newRegionSize);
size_t newEndSentinelRegionSize = (uintptr_t)sbrkNow - (uintptr_t)newEndSentinelRegion;

#ifdef EMMALLOC_VERBOSE
MAIN_THREAD_ASYNC_EM_ASM(out('emmalloc_trim(): Created new sentinel end region at ' + ptrToString($0) + '. Size: ' + Number($1) + ' bytes.'), newEndSentinelRegion, newEndSentinelRegionSize);
#endif

create_used_region(newEndSentinelRegion, newEndSentinelRegionSize);

// And update the size field of the whole region block.
listOfAllRegions->endPtr = (uint8_t*)newEndSentinelRegion + newEndSentinelRegionSize;

// All successful, and we actually trimmed memory!
return 1;
}
#endif

int emmalloc_trim(size_t pad) {
// Reducing the size of the sbrk region is currently broken.
// See https://github.com/emscripten-core/emscripten/issues/23343
// And https://github.com/emscripten-core/emscripten/pull/13442
return 0;
/*
MALLOC_ACQUIRE();
int success = trim_dynamic_heap_reservation(pad);
MALLOC_RELEASE();
return success;
*/
}
EMMALLOC_ALIAS(malloc_trim, emmalloc_trim)

Expand Down Expand Up @@ -1376,5 +1428,5 @@ void emmalloc_dump_free_dynamic_memory_fragmentation_map() {
}

size_t emmalloc_unclaimed_heap_memory(void) {
return emscripten_get_heap_max() - (size_t)sbrk(0);
return emscripten_get_heap_max() - (size_t)sbrk64(0);
}
39 changes: 32 additions & 7 deletions system/lib/libc/sbrk.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,20 +56,25 @@ uintptr_t* emscripten_get_sbrk_ptr() {
#define READ_SBRK_PTR(sbrk_ptr) (*(sbrk_ptr))
#endif

void *sbrk(intptr_t increment_) {
uintptr_t increment = (uintptr_t)increment_;
increment = (increment + (SBRK_ALIGNMENT-1)) & ~(SBRK_ALIGNMENT-1);
void *sbrk64(int64_t increment) {
if (increment >= 0) {
increment = (increment + (SBRK_ALIGNMENT-1)) & ~((int64_t)SBRK_ALIGNMENT-1);
} else {
increment = -(-increment & ~((int64_t)SBRK_ALIGNMENT-1));
}

uintptr_t *sbrk_ptr = (uintptr_t*)emscripten_get_sbrk_ptr();

// To make sbrk thread-safe, implement a CAS loop to update the
// value of sbrk_ptr.
while (1) {
uintptr_t old_brk = READ_SBRK_PTR(sbrk_ptr);
uintptr_t new_brk = old_brk + increment;
// Check for a) an overflow, which would indicate that we are trying to
// allocate over maximum addressable memory. and b) if necessary,
int64_t new_brk64 = (int64_t)old_brk + increment;
uintptr_t new_brk = (uintptr_t)new_brk64;
// Check for a) an over/underflow, which would indicate that we are
// allocating over maximum addressable memory. and b) if necessary,
// increase the WebAssembly Memory size, and abort if that fails.
if ((increment > 0 && new_brk <= old_brk)
if (new_brk < 0 || new_brk64 != (int64_t)new_brk
|| (new_brk > emscripten_get_heap_size() && !emscripten_resize_heap(new_brk))) {
errno = ENOMEM;
return (void*)-1;
Expand All @@ -93,6 +98,26 @@ void *sbrk(intptr_t increment_) {
}
}

void *sbrk(intptr_t increment_) {
#if defined(__wasm64__) // TODO || !defined(wasm2gb)
// In the correct https://linux.die.net/man/2/sbrk spec, sbrk() parameter is
// intended to be treated as signed, meaning that it is not possible in a
// 32-bit program to sbrk alloc (or dealloc) more than 2GB of memory at once.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this is indeed the case (I have no reason to believe its not) wouldn't it be better to fix the below bug rather than do this dance?

Maybe at least open a bug so that we can remove this code since it seems like its only need to workaround the bug?

Copy link
Collaborator Author

@juj juj Sep 2, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wouldn't it be better to fix the below bug rather than do this dance?

Yes, totally agree. A later PR. (possibly.. I don't know how hard it will be to do)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you include the full bug URL in the code here so we can clean this up in the future?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

check


// Treat sbrk() parameter as signed.
return sbrk64((int64_t)increment_);
#else
// BUG: Currently the Emscripten test suite codifies expectations that sbrk()
// values passed to this function are to be treated as unsigned, which means
// that in 2GB and 4GB build modes, it is not possible to shrink memory.
// To satisfy that mode, treat sbrk() parameters in 32-bit builds as unsigned.
// https://github.com/emscripten-core/emscripten/issues/25138

// Treat sbrk() parameter as unsigned.
return sbrk64((int64_t)(uintptr_t)increment_);
#endif
}

int brk(void* ptr) {
#ifdef __EMSCRIPTEN_SHARED_MEMORY__
// FIXME
Expand Down
8 changes: 4 additions & 4 deletions test/code_size/test_codesize_cxx_ctors1.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
"a.out.js": 19754,
"a.out.js.gz": 8162,
"a.out.nodebug.wasm": 129504,
"a.out.nodebug.wasm.gz": 49232,
"total": 149258,
"total_gz": 57394,
"a.out.nodebug.wasm": 129509,
"a.out.nodebug.wasm.gz": 49243,
"total": 149263,
"total_gz": 57405,
"sent": [
"__cxa_throw",
"_abort_js",
Expand Down
8 changes: 4 additions & 4 deletions test/code_size/test_codesize_cxx_ctors2.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
"a.out.js": 19732,
"a.out.js.gz": 8148,
"a.out.nodebug.wasm": 128931,
"a.out.nodebug.wasm.gz": 48876,
"total": 148663,
"total_gz": 57024,
"a.out.nodebug.wasm": 128936,
"a.out.nodebug.wasm.gz": 48884,
"total": 148668,
"total_gz": 57032,
"sent": [
"__cxa_throw",
"_abort_js",
Expand Down
8 changes: 4 additions & 4 deletions test/code_size/test_codesize_cxx_except.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
"a.out.js": 23415,
"a.out.js.gz": 9145,
"a.out.nodebug.wasm": 171266,
"a.out.nodebug.wasm.gz": 57323,
"total": 194681,
"total_gz": 66468,
"a.out.nodebug.wasm": 171271,
"a.out.nodebug.wasm.gz": 57338,
"total": 194686,
"total_gz": 66483,
"sent": [
"__cxa_begin_catch",
"__cxa_end_catch",
Expand Down
8 changes: 4 additions & 4 deletions test/code_size/test_codesize_cxx_except_wasm.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
"a.out.js": 19643,
"a.out.js.gz": 8112,
"a.out.nodebug.wasm": 144625,
"a.out.nodebug.wasm.gz": 54883,
"total": 164268,
"total_gz": 62995,
"a.out.nodebug.wasm": 144630,
"a.out.nodebug.wasm.gz": 54894,
"total": 164273,
"total_gz": 63006,
"sent": [
"_abort_js",
"_tzset_js",
Expand Down
8 changes: 4 additions & 4 deletions test/code_size/test_codesize_cxx_except_wasm_legacy.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
"a.out.js": 19643,
"a.out.js.gz": 8112,
"a.out.nodebug.wasm": 142214,
"a.out.nodebug.wasm.gz": 54349,
"total": 161857,
"total_gz": 62461,
"a.out.nodebug.wasm": 142219,
"a.out.nodebug.wasm.gz": 54358,
"total": 161862,
"total_gz": 62470,
"sent": [
"_abort_js",
"_tzset_js",
Expand Down
8 changes: 4 additions & 4 deletions test/code_size/test_codesize_cxx_lto.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
"a.out.js": 19082,
"a.out.js.gz": 7841,
"a.out.nodebug.wasm": 106458,
"a.out.nodebug.wasm.gz": 42588,
"total": 125540,
"total_gz": 50429,
"a.out.nodebug.wasm": 106463,
"a.out.nodebug.wasm.gz": 42596,
"total": 125545,
"total_gz": 50437,
"sent": [
"a (emscripten_resize_heap)",
"b (_setitimer_js)",
Expand Down
8 changes: 4 additions & 4 deletions test/code_size/test_codesize_cxx_mangle.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
"a.out.js": 23465,
"a.out.js.gz": 9164,
"a.out.nodebug.wasm": 235307,
"a.out.nodebug.wasm.gz": 78924,
"total": 258772,
"total_gz": 88088,
"a.out.nodebug.wasm": 235312,
"a.out.nodebug.wasm.gz": 78933,
"total": 258777,
"total_gz": 88097,
"sent": [
"__cxa_begin_catch",
"__cxa_end_catch",
Expand Down
8 changes: 4 additions & 4 deletions test/code_size/test_codesize_cxx_noexcept.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
"a.out.js": 19754,
"a.out.js.gz": 8162,
"a.out.nodebug.wasm": 131921,
"a.out.nodebug.wasm.gz": 50229,
"total": 151675,
"total_gz": 58391,
"a.out.nodebug.wasm": 131926,
"a.out.nodebug.wasm.gz": 50238,
"total": 151680,
"total_gz": 58400,
"sent": [
"__cxa_throw",
"_abort_js",
Expand Down
8 changes: 4 additions & 4 deletions test/code_size/test_codesize_cxx_wasmfs.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
"a.out.js": 7143,
"a.out.js.gz": 3338,
"a.out.nodebug.wasm": 169792,
"a.out.nodebug.wasm.gz": 63078,
"total": 176935,
"total_gz": 66416,
"a.out.nodebug.wasm": 169797,
"a.out.nodebug.wasm.gz": 63087,
"total": 176940,
"total_gz": 66425,
"sent": [
"__cxa_throw",
"_abort_js",
Expand Down
Loading