Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions rstar/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
- Added `root` doc examples and traversal docs
- Implemented `RTreeObject` for `Arc<T>` and `Rc<T>`

## Fixed
- Fix excessive memory retention in `bulk_load` from `Vec::split_off` over-capacity

## Changed
- Made `RStar` methods take `Point` and `Envelope` as owned values where it makes sense ([PR](https://github.com/georust/rstar/pull/189))
- Fix Clippy warning (surfaced in Rust 1.89) related to lifetime elision
Expand Down
50 changes: 48 additions & 2 deletions rstar/src/algorithm/bulk_load/bulk_load_sequential.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,18 @@ use num_traits::Float;

use super::cluster_group_iterator::{calculate_number_of_clusters_on_axis, ClusterGroupIterator};

fn bulk_load_recursive<T, Params>(elements: Vec<T>) -> ParentNode<T>
fn bulk_load_recursive<T, Params>(mut elements: Vec<T>) -> ParentNode<T>
where
T: RTreeObject,
<T::Envelope as Envelope>::Point: Point,
Params: RTreeParams,
{
let m = Params::MAX_SIZE;
if elements.len() <= m {
// Reached leaf level
// Reached leaf level. Shrink excess capacity so the in-place collect
// (which reuses the allocation when size_of::<T> == size_of::<RTreeNode<T>>)
// doesn't preserve a massively over-sized buffer in the final tree node.
elements.shrink_to_fit();
let elements: Vec<_> = elements.into_iter().map(RTreeNode::Leaf).collect();
return ParentNode::new_parent(elements);
}
Expand Down Expand Up @@ -146,4 +149,47 @@ mod test {
assert_eq!(set1, set2);
assert_eq!(tree.size(), points.len());
}

/// Verify that bulk-loaded tree nodes don't retain excessive Vec capacity.
///
/// Without shrinking over-sized allocations during partitioning, Rust's
/// in-place collect optimization (triggered when
/// `size_of::<T>() == size_of::<RTreeNode<T>>()`) can preserve them in
/// the final tree nodes. For large inputs this can waste many gigabytes
/// of memory.
#[test]
fn test_bulk_load_no_excess_capacity() {
use crate::node::RTreeNode;

const N: usize = 10_000;
let points: Vec<[i32; 2]> = (0..N as i32).map(|i| [i, i * 3]).collect();
let tree = RTree::bulk_load(points);
assert_eq!(tree.size(), N);

// Walk all internal nodes and check that children Vecs are not
// drastically over-allocated. Allow 2x as headroom for normal
// allocator rounding.
let max_allowed_ratio = 2.0_f64;
let mut checked = 0usize;
let mut stack: Vec<&crate::node::ParentNode<[i32; 2]>> = vec![tree.root()];
while let Some(node) = stack.pop() {
let len = node.children.len();
let cap = node.children.capacity();
assert!(len > 0, "empty internal node should not exist");
let ratio = cap as f64 / len as f64;
assert!(
ratio <= max_allowed_ratio,
"node children Vec has excessive capacity: len={len}, cap={cap}, ratio={ratio:.1}x \
(max {max_allowed_ratio}x). This indicates split_off over-capacity is leaking \
into the tree."
);
checked += 1;
for child in &node.children {
if let RTreeNode::Parent(ref p) = child {
stack.push(p);
}
}
}
assert!(checked > 1, "expected multiple internal nodes for N={N}");
}
}
51 changes: 42 additions & 9 deletions rstar/src/algorithm/bulk_load/cluster_group_iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,24 @@ impl<T: RTreeObject> Iterator for ClusterGroupIterator<T> {
fn next(&mut self) -> Option<Self::Item> {
match self.remaining.len() {
0 => None,
len if len <= self.slab_size => ::core::mem::take(&mut self.remaining).into(),
_ => {
len if len <= self.slab_size => {
let mut last = ::core::mem::take(&mut self.remaining);
// self.remaining retains its full original capacity across iterations
// (drain doesn't shrink), so the final slab needs shrinking.
last.shrink_to_fit();
last.into()
}
len => {
let slab_axis = self.cluster_dimension;
T::Envelope::partition_envelopes(slab_axis, &mut self.remaining, self.slab_size);
let off_split = self.remaining.split_off(self.slab_size);
::core::mem::replace(&mut self.remaining, off_split).into()
let partition_point = len - self.slab_size;
// Partition so that the slab elements end up at the tail.
T::Envelope::partition_envelopes(slab_axis, &mut self.remaining, partition_point);
// Drain from the end into a new Vec with exact capacity.
// self.remaining keeps its allocation for reuse on the next iteration.
self.remaining
.drain(partition_point..)
.collect::<Vec<_>>()
.into()
}
}
}
Expand Down Expand Up @@ -87,13 +99,34 @@ mod test {
assert_eq!(slab.len(), slab_size);
}
let mut total_size = 0;
let mut max_element_for_last_slab = i32::MIN;
let mut min_element_for_last_slab = i32::MAX;
for slab in &slabs {
total_size += slab.len();
let current_max = slab.iter().max_by_key(|point| point[0]).unwrap();
assert!(current_max[0] > max_element_for_last_slab);
max_element_for_last_slab = current_max[0];
let current_min = slab.iter().min_by_key(|point| point[0]).unwrap();
assert!(current_min[0] < min_element_for_last_slab);
min_element_for_last_slab = current_min[0];
}
assert_eq!(total_size, SIZE);
}

/// Verify that slabs produced by ClusterGroupIterator don't retain
/// excessive capacity from the parent Vec.
#[test]
fn test_cluster_group_iterator_no_excess_capacity() {
const SIZE: usize = 10_000;
const NUMBER_OF_CLUSTERS_ON_AXIS: usize = 5;
let elements: Vec<_> = (0..SIZE as i32).map(|i| [-i, -i]).collect();
let slabs: Vec<_> =
ClusterGroupIterator::new(elements, NUMBER_OF_CLUSTERS_ON_AXIS, 0).collect();

for (i, slab) in slabs.iter().enumerate() {
let ratio = slab.capacity() as f64 / slab.len() as f64;
assert!(
ratio <= 2.0,
"slab {i} has excessive capacity: len={}, cap={}, ratio={ratio:.1}x",
slab.len(),
slab.capacity(),
);
}
}
}