mmtk · wks · Oct 17, 2025 · Oct 13, 2025 · Oct 13, 2025 · Oct 13, 2025
diff --git a/docs/userguide/src/migration/prefix.md b/docs/userguide/src/migration/prefix.md
@@ -32,6 +32,39 @@ Notes for the mmtk-core developers:
 
 ## 0.32.0
 
+### Allocation options changed
+
+```admonish tldr
+`AllocationOptions` now has multiple boolean fields instead of one `OnAllocationFail` field.  Now
+polling cannot be disabled.  Instead we can now poll and over-commit in one allocation.
+```
+
+API changes:
+
+-   module `util::alloc::allocator`
+    +   `OnAllocationFail`: Removed.
+    +   `AllocationOptions`: It now has two boolean fields:
+        *   `allow_overcommit`
+        *   `at_safepoint`
+        *   `allow_oom_call`
+
+Variants of the old `enum OnAllocationFail` should be migrated to the new API according to the
+following table:
+
+| variant         | `allow_overcommit` | `at_safepoint` | `allow_oom_call` |
+|-----------------|--------------------|----------------|------------------|
+| `RequestGC`     | `false`            | `true`         | `true`           |
+| `ReturnFailure` | `false`            | `false`        | `false`          |
+| `OverCommit`    | `true`             | `false`        | `false`          |
+
+Note that MMTk now always polls before trying to get more pages from the page resource, and it may
+trigger GC.  The old `OnAllocationFail::OverCommit` used to prevent polling, but it is no longer
+possible.
+
+See also:
+
+-   PR: <https://github.com/mmtk/mmtk-core/pull/1400>
+
 ### Removed the notion of "mmap chunk"
 
 ```admonish tldr

diff --git a/src/policy/lockfreeimmortalspace.rs b/src/policy/lockfreeimmortalspace.rs
@@ -151,7 +151,7 @@ impl<VM: VMBinding> Space<VM> for LockFreeImmortalSpace<VM> {
             })
             .expect("update cursor failed");
         if start + bytes > self.limit {
-            if alloc_options.on_fail.allow_oom_call() {
+            if alloc_options.allow_oom_call {
                 panic!("OutOfMemory");
             } else {
                 return Address::ZERO;

diff --git a/src/policy/space.rs b/src/policy/space.rs
@@ -85,7 +85,7 @@ pub trait Space<VM: VMBinding>: 'static + SFT + Sync + Downcast {
         alloc_options: AllocationOptions,
     ) -> bool {
         if self.will_oom_on_acquire(size) {
-            if alloc_options.on_fail.allow_oom_call() {
+            if alloc_options.allow_oom_call {
                 VM::VMCollection::out_of_memory(
                     tls,
                     crate::util::alloc::AllocationError::HeapOutOfMemory,
@@ -108,35 +108,42 @@ pub trait Space<VM: VMBinding>: 'static + SFT + Sync + Downcast {
             "The requested pages is larger than the max heap size. Is will_go_oom_on_acquire used before acquring memory?"
         );
 
-        // Should we poll to attempt to GC?
-        // - If tls is collector, we cannot attempt a GC.
-        // - If gc is disabled, we cannot attempt a GC.
-        // - If overcommit is allowed, we don't attempt a GC.
-        // FIXME: We should allow polling while also allowing over-committing.
-        // We should change the allocation interface.
-        let should_poll = VM::VMActivePlan::is_mutator(tls)
-            && VM::VMCollection::is_collection_enabled()
-            && !alloc_options.on_fail.allow_overcommit();
-
         trace!("Reserving pages");
         let pr = self.get_page_resource();
         let pages_reserved = pr.reserve_pages(pages);
         trace!("Pages reserved");
-        trace!("Polling ..");
 
-        // The actual decision tree.
-        if should_poll && self.get_gc_trigger().poll(false, Some(self.as_space())) {
-            self.not_acquiring(tls, alloc_options, pr, pages_reserved, false);
-            Address::ZERO
-        } else {
-            debug!("Collection not required");
+        // Should we poll before acquring pages from page resources so that it can trigger a GC?
+        // - If tls is collector, we cannot attempt a GC.
+        // - If gc is disabled, we cannot attempt a GC.
+        let should_poll =
+            VM::VMActivePlan::is_mutator(tls) && VM::VMCollection::is_collection_enabled();
+
+        // If we should poll, do it now.  Record if it has triggered a GC.
+        // If we should not poll, GC is not triggered.
+        let gc_triggered = should_poll && {
+            trace!("Polling ..");
+            self.get_gc_trigger().poll(false, Some(self.as_space()))
+        };
+
+        // We can try to get pages if
+        // - GC is not triggered, or
+        // - GC is triggered, but we allow over-committing.
+        let should_get_pages = !gc_triggered || alloc_options.allow_overcommit;
 
+        // Get new pages if we should. If we didn't get new pages from the page resource for any
+        // reason (if we decided not to, or if we tried and failed), this function shall return a
+        // null address.
+        if should_get_pages {
             if let Some(addr) = self.get_new_pages_and_initialize(tls, pages, pr, pages_reserved) {
                 addr
             } else {
-                self.not_acquiring(tls, alloc_options, pr, pages_reserved, true);
+                self.not_acquiring(tls, alloc_options, pr, pages_reserved, false);
                 Address::ZERO
             }
+        } else {
+            self.not_acquiring(tls, alloc_options, pr, pages_reserved, true);
+            Address::ZERO
         }
     }
 
@@ -268,11 +275,17 @@ pub trait Space<VM: VMBinding>: 'static + SFT + Sync + Downcast {
         pages_reserved: usize,
         attempted_allocation_and_failed: bool,
     ) {
+        assert!(
+            VM::VMActivePlan::is_mutator(tls),
+            "A non-mutator thread failed to get pages from page resource.  \
+            Copying GC plans should compute the copying headroom carefully to prevent this."
+        );
+
         // Clear the request
         pr.clear_request(pages_reserved);
 
-        // If we do not want GC on fail, just return.
-        if !alloc_options.on_fail.allow_gc() {
+        // If we are not at a safepoint, return immediately.
+        if !alloc_options.at_safepoint {
             return;
         }
 

diff --git a/src/util/alloc/allocator.rs b/src/util/alloc/allocator.rs
@@ -6,7 +6,7 @@ use crate::util::heap::gc_trigger::GCTrigger;
 use crate::util::options::Options;
 use crate::MMTK;
 
-use atomic::Atomic;
+use std::cell::RefCell;
 use std::sync::atomic::Ordering;
 use std::sync::Arc;
 
@@ -28,39 +28,51 @@ pub enum AllocationError {
     MmapOutOfMemory,
 }
 
-/// Behavior when an allocation fails, and a GC is expected.
-#[repr(u8)]
-#[derive(Copy, Clone, Default, PartialEq, bytemuck::NoUninit, Debug)]
-pub enum OnAllocationFail {
-    /// Request the GC. This is the default behavior.
-    #[default]
-    RequestGC,
-    /// Instead of requesting GC, the allocation request returns with a failure value.
-    ReturnFailure,
-    /// Instead of requesting GC, the allocation request simply overcommits the memory,
-    /// and return a valid result at its best efforts.
-    OverCommit,
-}
-
-impl OnAllocationFail {
-    pub(crate) fn allow_oom_call(&self) -> bool {
-        *self == Self::RequestGC
-    }
-    pub(crate) fn allow_gc(&self) -> bool {
-        *self == Self::RequestGC
-    }
-    pub(crate) fn allow_overcommit(&self) -> bool {
-        *self == Self::OverCommit
-    }
-}
-
 /// Allow specifying different behaviors with [`Allocator::alloc_with_options`].
 #[repr(C)]
-#[derive(Copy, Clone, Default, PartialEq, bytemuck::NoUninit, Debug)]
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
 pub struct AllocationOptions {
-    /// When the allocation fails and a GC is originally expected, on_fail
-    /// allows a different behavior to avoid the GC.
-    pub on_fail: OnAllocationFail,
+    /// Whether over-committing is allowed at this allocation site.  Over-committing means the
+    /// allocation is allowed to go beyond the current heap size.  But it is not guaranteed to
+    /// succeed.
+    ///
+    /// **The default is `false`**.
+    ///
+    /// Note that regardless of the value of `allow_overcommit`, the allocation may trigger GC if
+    /// the GC trigger considers it needed.
+    pub allow_overcommit: bool,
+
+    /// Whether the allocation is at a safepoint.
+    ///
+    /// **The default is `true`**.
+    ///
+    /// If `true`, the allocation is allowed to block for GC.
+    ///
+    /// If `false`, the allocation will immediately return a null address if the allocation cannot
+    /// be satisfied without a GC.
+    pub at_safepoint: bool,
+
+    /// Whether the allocation is allowed to call [`Collection::out_of_memory`].
+    ///
+    /// **The default is `true`**.
+    ///
+    /// If `true`, the allocation will call [`Collection::out_of_memory`] when out of memory and
+    /// return null.
+    ///
+    /// If `fasle`, the allocation will return null immediately when out of memory.
+    pub allow_oom_call: bool,
+}
+
+/// The default value for `AllocationOptions` has the same semantics as calling [`Allocator::alloc`]
+/// directly.
+impl Default for AllocationOptions {
+    fn default() -> Self {
+        Self {
+            allow_overcommit: false,
+            at_safepoint: true,
+            allow_oom_call: true,
+        }
+    }
 }
 
 impl AllocationOptions {
@@ -69,6 +81,56 @@ impl AllocationOptions {
     }
 }
 
+/// A wrapper for [`AllocatorContext`] to hold a [`AllocationOptions`] that can be modified by the
+/// same mutator thread.
+///
+/// All [`Allocator`] instances in `Allocators` share one `AllocationOptions` instance, and it will
+/// only be accessed by the mutator (via `Mutator::allocators`) or the GC worker (via
+/// `GCWorker::copy`) that owns it.  Rust doesn't like multiple mutable references pointing to a
+/// shared data structure.  We cannot use [`atomic::Atomic`] because `AllocationOptions` has
+/// multiple fields. We wrap it in a `RefCell` to make it internally mutable.
+///
+/// Note: The allocation option is called every time [`Allocator::alloc_with_options`] is called.
+/// Because API functions should only be called on allocation slow paths, we believe that `RefCell`
+/// should be good enough for performance.  If this is too slow, we may consider `UnsafeCell`.  If
+/// that's still too slow, we should consider changing the API to make the allocation options a
+/// persistent per-mutator value, and allow the VM binding set its value via a new API function.
+struct AllocationOptionsHolder {
+    alloc_options: RefCell<AllocationOptions>,
+}
+
+/// Strictly speaking, `AllocationOptionsHolder` isn't `Sync`.  Two threads cannot set or clear the
+/// same `AllocationOptionsHolder` at the same time.  However, both `Mutator` and `GCWorker` are
+/// `Send`, and both of which own `Allocators` and require its field `Arc<AllocationContext>` to be
+/// `Send`, which requires `AllocationContext` to be `Sync`, which requires
+/// `AllocationOptionsHolder` to be `Sync`.  (Note that `Arc<T>` can be cloned and given to another
+/// thread, and Rust expects `T` to be `Sync`, too.  But we never share `AllocationContext` between
+/// threads, but only between multiple `Allocator` instances within the same `Allocators` instance.
+/// Rust can't figure this out.)
+unsafe impl Sync for AllocationOptionsHolder {}
+
+impl AllocationOptionsHolder {
+    pub fn new(alloc_options: AllocationOptions) -> Self {
+        Self {
+            alloc_options: RefCell::new(alloc_options),
+        }
+    }
+    pub fn set_alloc_options(&self, options: AllocationOptions) {
+        let mut alloc_options = self.alloc_options.borrow_mut();
+        *alloc_options = options;
+    }
+
+    pub fn clear_alloc_options(&self) {
+        let mut alloc_options = self.alloc_options.borrow_mut();
+        *alloc_options = AllocationOptions::default();
+    }
+
+    pub fn get_alloc_options(&self) -> AllocationOptions {
+        let alloc_options = self.alloc_options.borrow();
+        *alloc_options
+    }
+}
+
 pub fn align_allocation_no_fill<VM: VMBinding>(
     region: Address,
     alignment: usize,
@@ -180,7 +242,7 @@ pub(crate) fn assert_allocation_args<VM: VMBinding>(size: usize, align: usize, o
 
 /// The context an allocator needs to access in order to perform allocation.
 pub struct AllocatorContext<VM: VMBinding> {
-    pub alloc_options: Atomic<AllocationOptions>,
+    alloc_options: AllocationOptionsHolder,
     pub state: Arc<GlobalState>,
     pub options: Arc<Options>,
     pub gc_trigger: Arc<GCTrigger<VM>>,
@@ -191,7 +253,7 @@ pub struct AllocatorContext<VM: VMBinding> {
 impl<VM: VMBinding> AllocatorContext<VM> {
     pub fn new(mmtk: &MMTK<VM>) -> Self {
         Self {
-            alloc_options: Atomic::new(AllocationOptions::default()),
+            alloc_options: AllocationOptionsHolder::new(AllocationOptions::default()),
             state: mmtk.state.clone(),
             options: mmtk.options.clone(),
             gc_trigger: mmtk.gc_trigger.clone(),
@@ -201,16 +263,15 @@ impl<VM: VMBinding> AllocatorContext<VM> {
     }
 
     pub fn set_alloc_options(&self, options: AllocationOptions) {
-        self.alloc_options.store(options, Ordering::Relaxed);
+        self.alloc_options.set_alloc_options(options);
     }
 
     pub fn clear_alloc_options(&self) {
-        self.alloc_options
-            .store(AllocationOptions::default(), Ordering::Relaxed);
+        self.alloc_options.clear_alloc_options();
     }
 
     pub fn get_alloc_options(&self) -> AllocationOptions {
-        self.alloc_options.load(Ordering::Relaxed)
+        self.alloc_options.get_alloc_options()
     }
 }
 
@@ -367,12 +428,6 @@ pub trait Allocator<VM: VMBinding>: Downcast {
                 return result;
             }
 
-            if result.is_zero()
-                && self.get_context().get_alloc_options().on_fail == OnAllocationFail::ReturnFailure
-            {
-                return result;
-            }
-
             if !result.is_zero() {
                 // Report allocation success to assist OutOfMemory handling.
                 if !self
@@ -427,6 +482,17 @@ pub trait Allocator<VM: VMBinding>: Downcast {
                 return result;
             }
 
+            // From here on, we handle the case that alloc_once failed.
+            assert!(result.is_zero());
+
+            if !self.get_context().get_alloc_options().at_safepoint {
+                // If the allocation is not at safepoint, it will not be able to block for GC.  But
+                // the code beyond this point tests OOM conditions and, if not OOM, try to allocate
+                // again.  Since we didn't block for GC, the allocation will fail again if we try
+                // again. So we return null immediately.
+                return Address::ZERO;
+            }
+
             // It is possible to have cases where a thread is blocked for another GC (non emergency)
             // immediately after being blocked for a GC (emergency) (e.g. in stress test), that is saying
             // the thread does not leave this loop between the two GCs. The local var 'emergency_collection'

diff --git a/src/util/alloc/mod.rs b/src/util/alloc/mod.rs
@@ -6,7 +6,6 @@ pub use allocator::fill_alignment_gap;
 pub use allocator::AllocationError;
 pub use allocator::AllocationOptions;
 pub use allocator::Allocator;
-pub use allocator::OnAllocationFail;
 
 /// A list of all the allocators, embedded in Mutator
 pub(crate) mod allocators;