From 65e8c352aa3a887ee08761f893f300097f0a6e7a Mon Sep 17 00:00:00 2001 From: Nathan Howell Date: Tue, 24 Feb 2026 18:12:08 -0800 Subject: [PATCH 1/3] Fix excessive memory usage in bulk_load due to Vec over-capacity The OMT bulk loading algorithm uses Vec::split_off to partition elements into slabs. split_off leaves the original Vec with its full capacity despite now holding only slab_size elements. The first slab at each partitioning level inherits this over-capacity, and it cascades through the recursive partitioning. At the leaf level, Rust's in-place collect optimization (triggered when size_of::() == size_of::>()) reuses the allocation, permanently storing the over-sized buffer in the final tree node. For large inputs (e.g. 18.6M elements) this wastes ~20 GB of memory in a geometric pattern of halving-size, doubling-count allocations. Fix by calling shrink_to_fit() in two places: - ClusterGroupIterator: after split_off, before returning the slab - bulk_load_recursive: at the leaf level, before the into_iter().collect() --- rstar/CHANGELOG.md | 3 + .../bulk_load/bulk_load_sequential.rs | 55 ++++++++++++++++++- .../bulk_load/cluster_group_iterator.rs | 28 +++++++++- 3 files changed, 83 insertions(+), 3 deletions(-) diff --git a/rstar/CHANGELOG.md b/rstar/CHANGELOG.md index 70967e5..68832b1 100644 --- a/rstar/CHANGELOG.md +++ b/rstar/CHANGELOG.md @@ -6,6 +6,9 @@ - Added `root` doc examples and traversal docs - Implemented `RTreeObject` for `Arc` and `Rc` +## Fixed +- Fix excessive memory retention in `bulk_load` from `Vec::split_off` over-capacity + ## Changed - Made `RStar` methods take `Point` and `Envelope` as owned values where it makes sense ([PR](https://github.com/georust/rstar/pull/189)) - Fix Clippy warning (surfaced in Rust 1.89) related to lifetime elision diff --git a/rstar/src/algorithm/bulk_load/bulk_load_sequential.rs b/rstar/src/algorithm/bulk_load/bulk_load_sequential.rs index ca75d8a..12fea7e 100644 --- a/rstar/src/algorithm/bulk_load/bulk_load_sequential.rs +++ b/rstar/src/algorithm/bulk_load/bulk_load_sequential.rs @@ -12,7 +12,7 @@ use num_traits::Float; use super::cluster_group_iterator::{calculate_number_of_clusters_on_axis, ClusterGroupIterator}; -fn bulk_load_recursive(elements: Vec) -> ParentNode +fn bulk_load_recursive(mut elements: Vec) -> ParentNode where T: RTreeObject, ::Point: Point, @@ -20,7 +20,10 @@ where { let m = Params::MAX_SIZE; if elements.len() <= m { - // Reached leaf level + // Reached leaf level. Shrink excess capacity so the in-place collect + // (which reuses the allocation when size_of:: == size_of::>) + // doesn't preserve a massively over-sized buffer in the final tree node. + elements.shrink_to_fit(); let elements: Vec<_> = elements.into_iter().map(RTreeNode::Leaf).collect(); return ParentNode::new_parent(elements); } @@ -146,4 +149,52 @@ mod test { assert_eq!(set1, set2); assert_eq!(tree.size(), points.len()); } + + /// Verify that bulk-loaded tree nodes don't retain excessive Vec capacity. + /// + /// The OMT partitioning uses `Vec::split_off` which leaves the original Vec + /// with its full capacity despite holding fewer elements. Without shrinking, + /// this cascades through the recursive partitioning: each first-slab inherits + /// its parent's over-sized allocation, and Rust's in-place collect optimization + /// (triggered when `size_of::() == size_of::>()`) preserves + /// that allocation in the final tree nodes. For large inputs this can waste + /// many gigabytes of memory. + #[test] + fn test_bulk_load_no_excess_capacity() { + use crate::node::RTreeNode; + + const N: usize = 10_000; + let points: Vec<[i32; 2]> = (0..N as i32).map(|i| [i, i * 3]).collect(); + let tree = RTree::bulk_load(points); + assert_eq!(tree.size(), N); + + // Walk all internal nodes and check that children Vecs are not + // drastically over-allocated. Allow 2x as headroom for normal + // allocator rounding. + let max_allowed_ratio = 2.0_f64; + let mut checked = 0usize; + let mut stack: Vec<&crate::node::ParentNode<[i32; 2]>> = vec![tree.root()]; + while let Some(node) = stack.pop() { + let len = node.children.len(); + let cap = node.children.capacity(); + assert!( + len > 0, + "empty internal node should not exist" + ); + let ratio = cap as f64 / len as f64; + assert!( + ratio <= max_allowed_ratio, + "node children Vec has excessive capacity: len={len}, cap={cap}, ratio={ratio:.1}x \ + (max {max_allowed_ratio}x). This indicates split_off over-capacity is leaking \ + into the tree." + ); + checked += 1; + for child in &node.children { + if let RTreeNode::Parent(ref p) = child { + stack.push(p); + } + } + } + assert!(checked > 1, "expected multiple internal nodes for N={N}"); + } } diff --git a/rstar/src/algorithm/bulk_load/cluster_group_iterator.rs b/rstar/src/algorithm/bulk_load/cluster_group_iterator.rs index 6200442..895b9d1 100644 --- a/rstar/src/algorithm/bulk_load/cluster_group_iterator.rs +++ b/rstar/src/algorithm/bulk_load/cluster_group_iterator.rs @@ -39,7 +39,12 @@ impl Iterator for ClusterGroupIterator { let slab_axis = self.cluster_dimension; T::Envelope::partition_envelopes(slab_axis, &mut self.remaining, self.slab_size); let off_split = self.remaining.split_off(self.slab_size); - ::core::mem::replace(&mut self.remaining, off_split).into() + let mut slab = ::core::mem::replace(&mut self.remaining, off_split); + // split_off leaves the original Vec with its full capacity despite now + // holding only slab_size elements. Shrink to avoid cascading over-capacity + // through the recursive bulk-load partitioning. + slab.shrink_to_fit(); + slab.into() } } } @@ -96,4 +101,25 @@ mod test { } assert_eq!(total_size, SIZE); } + + /// Verify that slabs produced by ClusterGroupIterator don't retain the + /// parent Vec's excess capacity after split_off. + #[test] + fn test_cluster_group_iterator_no_excess_capacity() { + const SIZE: usize = 10_000; + const NUMBER_OF_CLUSTERS_ON_AXIS: usize = 5; + let elements: Vec<_> = (0..SIZE as i32).map(|i| [-i, -i]).collect(); + let slabs: Vec<_> = + ClusterGroupIterator::new(elements, NUMBER_OF_CLUSTERS_ON_AXIS, 0).collect(); + + for (i, slab) in slabs.iter().enumerate() { + let ratio = slab.capacity() as f64 / slab.len() as f64; + assert!( + ratio <= 2.0, + "slab {i} has excessive capacity: len={}, cap={}, ratio={ratio:.1}x", + slab.len(), + slab.capacity(), + ); + } + } } From 0fd04ae040651568948bd3ff65960794d7765201 Mon Sep 17 00:00:00 2001 From: Nathan Howell Date: Wed, 25 Feb 2026 07:01:07 -0800 Subject: [PATCH 2/3] Apply rustfmt --- rstar/src/algorithm/bulk_load/bulk_load_sequential.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/rstar/src/algorithm/bulk_load/bulk_load_sequential.rs b/rstar/src/algorithm/bulk_load/bulk_load_sequential.rs index 12fea7e..bcfd81e 100644 --- a/rstar/src/algorithm/bulk_load/bulk_load_sequential.rs +++ b/rstar/src/algorithm/bulk_load/bulk_load_sequential.rs @@ -177,10 +177,7 @@ mod test { while let Some(node) = stack.pop() { let len = node.children.len(); let cap = node.children.capacity(); - assert!( - len > 0, - "empty internal node should not exist" - ); + assert!(len > 0, "empty internal node should not exist"); let ratio = cap as f64 / len as f64; assert!( ratio <= max_allowed_ratio, From e258ca743b5f8291026b869aeef89f610efdd889 Mon Sep 17 00:00:00 2001 From: Nathan Howell Date: Wed, 25 Feb 2026 11:00:20 -0800 Subject: [PATCH 3/3] Simplify slab partitioning with drain from end Instead of split_off + mem::replace + shrink_to_fit, invert the partition so slab elements end up at the tail and drain them into a new Vec with exact capacity. This eliminates the need for shrink_to_fit on every non-last slab while keeping the code simpler. --- .../bulk_load/bulk_load_sequential.rs | 12 +++--- .../bulk_load/cluster_group_iterator.rs | 39 +++++++++++-------- 2 files changed, 28 insertions(+), 23 deletions(-) diff --git a/rstar/src/algorithm/bulk_load/bulk_load_sequential.rs b/rstar/src/algorithm/bulk_load/bulk_load_sequential.rs index bcfd81e..3ea6789 100644 --- a/rstar/src/algorithm/bulk_load/bulk_load_sequential.rs +++ b/rstar/src/algorithm/bulk_load/bulk_load_sequential.rs @@ -152,13 +152,11 @@ mod test { /// Verify that bulk-loaded tree nodes don't retain excessive Vec capacity. /// - /// The OMT partitioning uses `Vec::split_off` which leaves the original Vec - /// with its full capacity despite holding fewer elements. Without shrinking, - /// this cascades through the recursive partitioning: each first-slab inherits - /// its parent's over-sized allocation, and Rust's in-place collect optimization - /// (triggered when `size_of::() == size_of::>()`) preserves - /// that allocation in the final tree nodes. For large inputs this can waste - /// many gigabytes of memory. + /// Without shrinking over-sized allocations during partitioning, Rust's + /// in-place collect optimization (triggered when + /// `size_of::() == size_of::>()`) can preserve them in + /// the final tree nodes. For large inputs this can waste many gigabytes + /// of memory. #[test] fn test_bulk_load_no_excess_capacity() { use crate::node::RTreeNode; diff --git a/rstar/src/algorithm/bulk_load/cluster_group_iterator.rs b/rstar/src/algorithm/bulk_load/cluster_group_iterator.rs index 895b9d1..da64549 100644 --- a/rstar/src/algorithm/bulk_load/cluster_group_iterator.rs +++ b/rstar/src/algorithm/bulk_load/cluster_group_iterator.rs @@ -34,17 +34,24 @@ impl Iterator for ClusterGroupIterator { fn next(&mut self) -> Option { match self.remaining.len() { 0 => None, - len if len <= self.slab_size => ::core::mem::take(&mut self.remaining).into(), - _ => { + len if len <= self.slab_size => { + let mut last = ::core::mem::take(&mut self.remaining); + // self.remaining retains its full original capacity across iterations + // (drain doesn't shrink), so the final slab needs shrinking. + last.shrink_to_fit(); + last.into() + } + len => { let slab_axis = self.cluster_dimension; - T::Envelope::partition_envelopes(slab_axis, &mut self.remaining, self.slab_size); - let off_split = self.remaining.split_off(self.slab_size); - let mut slab = ::core::mem::replace(&mut self.remaining, off_split); - // split_off leaves the original Vec with its full capacity despite now - // holding only slab_size elements. Shrink to avoid cascading over-capacity - // through the recursive bulk-load partitioning. - slab.shrink_to_fit(); - slab.into() + let partition_point = len - self.slab_size; + // Partition so that the slab elements end up at the tail. + T::Envelope::partition_envelopes(slab_axis, &mut self.remaining, partition_point); + // Drain from the end into a new Vec with exact capacity. + // self.remaining keeps its allocation for reuse on the next iteration. + self.remaining + .drain(partition_point..) + .collect::>() + .into() } } } @@ -92,18 +99,18 @@ mod test { assert_eq!(slab.len(), slab_size); } let mut total_size = 0; - let mut max_element_for_last_slab = i32::MIN; + let mut min_element_for_last_slab = i32::MAX; for slab in &slabs { total_size += slab.len(); - let current_max = slab.iter().max_by_key(|point| point[0]).unwrap(); - assert!(current_max[0] > max_element_for_last_slab); - max_element_for_last_slab = current_max[0]; + let current_min = slab.iter().min_by_key(|point| point[0]).unwrap(); + assert!(current_min[0] < min_element_for_last_slab); + min_element_for_last_slab = current_min[0]; } assert_eq!(total_size, SIZE); } - /// Verify that slabs produced by ClusterGroupIterator don't retain the - /// parent Vec's excess capacity after split_off. + /// Verify that slabs produced by ClusterGroupIterator don't retain + /// excessive capacity from the parent Vec. #[test] fn test_cluster_group_iterator_no_excess_capacity() { const SIZE: usize = 10_000;