Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions rstar/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
- Added `root` doc examples and traversal docs
- Implemented `RTreeObject` for `Arc<T>` and `Rc<T>`

## Fixed
- Fix excessive memory retention in `bulk_load` from `Vec::split_off` over-capacity

## Changed
- Made `RStar` methods take `Point` and `Envelope` as owned values where it makes sense ([PR](https://github.com/georust/rstar/pull/189))
- Fix Clippy warning (surfaced in Rust 1.89) related to lifetime elision
Expand Down
52 changes: 50 additions & 2 deletions rstar/src/algorithm/bulk_load/bulk_load_sequential.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,18 @@ use num_traits::Float;

use super::cluster_group_iterator::{calculate_number_of_clusters_on_axis, ClusterGroupIterator};

fn bulk_load_recursive<T, Params>(elements: Vec<T>) -> ParentNode<T>
fn bulk_load_recursive<T, Params>(mut elements: Vec<T>) -> ParentNode<T>
where
T: RTreeObject,
<T::Envelope as Envelope>::Point: Point,
Params: RTreeParams,
{
let m = Params::MAX_SIZE;
if elements.len() <= m {
// Reached leaf level
// Reached leaf level. Shrink excess capacity so the in-place collect
// (which reuses the allocation when size_of::<T> == size_of::<RTreeNode<T>>)
// doesn't preserve a massively over-sized buffer in the final tree node.
elements.shrink_to_fit();
let elements: Vec<_> = elements.into_iter().map(RTreeNode::Leaf).collect();
return ParentNode::new_parent(elements);
}
Expand Down Expand Up @@ -146,4 +149,49 @@ mod test {
assert_eq!(set1, set2);
assert_eq!(tree.size(), points.len());
}

/// Verify that bulk-loaded tree nodes don't retain excessive Vec capacity.
///
/// The OMT partitioning uses `Vec::split_off` which leaves the original Vec
/// with its full capacity despite holding fewer elements. Without shrinking,
/// this cascades through the recursive partitioning: each first-slab inherits
/// its parent's over-sized allocation, and Rust's in-place collect optimization
/// (triggered when `size_of::<T>() == size_of::<RTreeNode<T>>()`) preserves
/// that allocation in the final tree nodes. For large inputs this can waste
/// many gigabytes of memory.
#[test]
fn test_bulk_load_no_excess_capacity() {
use crate::node::RTreeNode;

const N: usize = 10_000;
let points: Vec<[i32; 2]> = (0..N as i32).map(|i| [i, i * 3]).collect();
let tree = RTree::bulk_load(points);
assert_eq!(tree.size(), N);

// Walk all internal nodes and check that children Vecs are not
// drastically over-allocated. Allow 2x as headroom for normal
// allocator rounding.
let max_allowed_ratio = 2.0_f64;
let mut checked = 0usize;
let mut stack: Vec<&crate::node::ParentNode<[i32; 2]>> = vec![tree.root()];
while let Some(node) = stack.pop() {
let len = node.children.len();
let cap = node.children.capacity();
assert!(len > 0, "empty internal node should not exist");
let ratio = cap as f64 / len as f64;
assert!(
ratio <= max_allowed_ratio,
"node children Vec has excessive capacity: len={len}, cap={cap}, ratio={ratio:.1}x \
(max {max_allowed_ratio}x). This indicates split_off over-capacity is leaking \
into the tree."
);
checked += 1;
for child in &node.children {
if let RTreeNode::Parent(ref p) = child {
stack.push(p);
}
}
}
assert!(checked > 1, "expected multiple internal nodes for N={N}");
}
}
28 changes: 27 additions & 1 deletion rstar/src/algorithm/bulk_load/cluster_group_iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,12 @@ impl<T: RTreeObject> Iterator for ClusterGroupIterator<T> {
let slab_axis = self.cluster_dimension;
T::Envelope::partition_envelopes(slab_axis, &mut self.remaining, self.slab_size);
let off_split = self.remaining.split_off(self.slab_size);
::core::mem::replace(&mut self.remaining, off_split).into()
let mut slab = ::core::mem::replace(&mut self.remaining, off_split);
// split_off leaves the original Vec with its full capacity despite now
// holding only slab_size elements. Shrink to avoid cascading over-capacity
// through the recursive bulk-load partitioning.
slab.shrink_to_fit();
slab.into()
}
}
}
Expand Down Expand Up @@ -96,4 +101,25 @@ mod test {
}
assert_eq!(total_size, SIZE);
}

/// Verify that slabs produced by ClusterGroupIterator don't retain the
/// parent Vec's excess capacity after split_off.
#[test]
fn test_cluster_group_iterator_no_excess_capacity() {
const SIZE: usize = 10_000;
const NUMBER_OF_CLUSTERS_ON_AXIS: usize = 5;
let elements: Vec<_> = (0..SIZE as i32).map(|i| [-i, -i]).collect();
let slabs: Vec<_> =
ClusterGroupIterator::new(elements, NUMBER_OF_CLUSTERS_ON_AXIS, 0).collect();

for (i, slab) in slabs.iter().enumerate() {
let ratio = slab.capacity() as f64 / slab.len() as f64;
assert!(
ratio <= 2.0,
"slab {i} has excessive capacity: len={}, cap={}, ratio={ratio:.1}x",
slab.len(),
slab.capacity(),
);
}
}
}