-
Notifications
You must be signed in to change notification settings - Fork 321
feat: recognize and use over sized allocations #523
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
c1ddda2
a983f30
e8370e0
274ae5f
88919fc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,38 @@ | ||
| #![feature(test)] | ||
|
|
||
| extern crate test; | ||
|
|
||
| use hashbrown::HashMap; | ||
| use test::{black_box, Bencher}; | ||
|
|
||
| type Map<K, V> = HashMap<K, V>; | ||
|
|
||
| macro_rules! bench_with_capacity { | ||
| ($name:ident, $cap:expr) => { | ||
| #[bench] | ||
| fn $name(b: &mut Bencher) { | ||
| b.iter(|| { | ||
| // Construct a new empty map with a given capacity and return it to avoid | ||
| // being optimized away. Dropping it measures allocation + minimal setup. | ||
| let m: Map<usize, usize> = Map::with_capacity($cap); | ||
| black_box(m) | ||
| }); | ||
| } | ||
| }; | ||
| } | ||
|
|
||
| bench_with_capacity!(with_capacity_000000, 0); | ||
| bench_with_capacity!(with_capacity_000001, 1); | ||
| bench_with_capacity!(with_capacity_000003, 3); | ||
| bench_with_capacity!(with_capacity_000007, 7); | ||
| bench_with_capacity!(with_capacity_000008, 8); | ||
| bench_with_capacity!(with_capacity_000016, 16); | ||
| bench_with_capacity!(with_capacity_000032, 32); | ||
| bench_with_capacity!(with_capacity_000064, 64); | ||
| bench_with_capacity!(with_capacity_000128, 128); | ||
| bench_with_capacity!(with_capacity_000256, 256); | ||
| bench_with_capacity!(with_capacity_000512, 512); | ||
| bench_with_capacity!(with_capacity_001024, 1024); | ||
| bench_with_capacity!(with_capacity_004096, 4096); | ||
| bench_with_capacity!(with_capacity_016384, 16384); | ||
| bench_with_capacity!(with_capacity_065536, 65536); |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1442,6 +1442,40 @@ impl RawTableInner { | |
| } | ||
| } | ||
|
|
||
| /// Find the previous power of 2. If it's already a power of 2, it's unchanged. | ||
| /// Passing zero is undefined behavior. | ||
| pub(crate) fn prev_pow2(z: usize) -> usize { | ||
| let shift = mem::size_of::<usize>() * 8 - 1; | ||
| 1 << (shift - (z.leading_zeros() as usize)) | ||
| } | ||
|
|
||
| fn maximum_buckets_in( | ||
| allocation_size: usize, | ||
| table_layout: TableLayout, | ||
| group_width: usize, | ||
| ) -> usize { | ||
| // Given an equation like: | ||
| // z >= x * y + x + g | ||
| // x can be maximized by doing: | ||
| // x = (z - g) / (y + 1) | ||
| // If you squint: | ||
| // x is the number of buckets | ||
| // y is the table_layout.size | ||
| // z is the size of the allocation | ||
| // g is the group width | ||
| // But this is ignoring the padding needed for ctrl_align. | ||
| // If we remember these restrictions: | ||
| // x is always a power of 2 | ||
| // Layout size for T must always be a multiple of T | ||
| // Then the alignment can be ignored if we add the constraint: | ||
| // x * y >= table_layout.ctrl_align | ||
| // This is taken care of by `capacity_to_buckets`. | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is brittle: currently
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since #615, it does guarantee this. Well, assuming I didn't make a mistake anyway ^_^. I tried finding issues with it but I don't see any, let me know if you see something. I think adding assertions can be helpful to make this more obvious and be more resilient to future changes, so I've added |
||
| let numerator = allocation_size - group_width; | ||
| let denominator = table_layout.size + 1; // todo: ZSTs? | ||
morrisonlevi marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| let quotient = numerator / denominator; | ||
| prev_pow2(quotient) | ||
| } | ||
|
|
||
| impl RawTableInner { | ||
| /// Allocates a new [`RawTableInner`] with the given number of buckets. | ||
| /// The control bytes and buckets are left uninitialized. | ||
|
|
@@ -1459,7 +1493,7 @@ impl RawTableInner { | |
| unsafe fn new_uninitialized<A>( | ||
| alloc: &A, | ||
| table_layout: TableLayout, | ||
| buckets: usize, | ||
| mut buckets: usize, | ||
| fallibility: Fallibility, | ||
| ) -> Result<Self, TryReserveError> | ||
| where | ||
|
|
@@ -1468,13 +1502,31 @@ impl RawTableInner { | |
| debug_assert!(buckets.is_power_of_two()); | ||
|
|
||
| // Avoid `Option::ok_or_else` because it bloats LLVM IR. | ||
| let (layout, ctrl_offset) = match table_layout.calculate_layout_for(buckets) { | ||
| let (layout, mut ctrl_offset) = match table_layout.calculate_layout_for(buckets) { | ||
| Some(lco) => lco, | ||
| None => return Err(fallibility.capacity_overflow()), | ||
| }; | ||
|
|
||
| let ptr: NonNull<u8> = match do_alloc(alloc, layout) { | ||
| Ok(block) => block.cast(), | ||
| Ok(block) => { | ||
| if block.len() > layout.size() { | ||
morrisonlevi marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| // Utilize over-sized allocations. | ||
| let x = maximum_buckets_in(block.len(), table_layout, Group::WIDTH); | ||
| debug_assert!(x >= buckets); | ||
| // Calculate the new ctrl_offset. | ||
| let (_oversized_layout, oversized_ctrl_offset) = | ||
morrisonlevi marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| match table_layout.calculate_layout_for(x) { | ||
| Some(lco) => lco, | ||
| None => unsafe { hint::unreachable_unchecked() }, | ||
| }; | ||
| debug_assert!(_oversized_layout.size() <= block.len()); | ||
| debug_assert!(oversized_ctrl_offset >= ctrl_offset); | ||
| ctrl_offset = oversized_ctrl_offset; | ||
| buckets = x; | ||
| } | ||
|
|
||
| block.cast() | ||
| } | ||
| Err(_) => return Err(fallibility.alloc_err(layout)), | ||
| }; | ||
|
|
||
|
|
@@ -4168,6 +4220,23 @@ impl<T, A: Allocator> RawExtractIf<'_, T, A> { | |
| mod test_map { | ||
| use super::*; | ||
|
|
||
| #[test] | ||
| fn test_prev_pow2() { | ||
| // Skip 0, not defined for that input. | ||
| let mut pow2: usize = 1; | ||
| while (pow2 << 1) > 0 { | ||
| let next_pow2 = pow2 << 1; | ||
| assert_eq!(pow2, prev_pow2(pow2)); | ||
| // Need to skip 2, because it's also a power of 2, so it doesn't | ||
| // return the previous power of 2. | ||
| if next_pow2 > 2 { | ||
| assert_eq!(pow2, prev_pow2(pow2 + 1)); | ||
| assert_eq!(pow2, prev_pow2(next_pow2 - 1)); | ||
| } | ||
| pow2 = next_pow2; | ||
| } | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_minimum_capacity_for_small_types() { | ||
| #[track_caller] | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I had to spend some time to work out why this was correct. The key insight here is that
ctrl_offset = align(x * y, ctrl_align)in the layout calculation. It might be worth adding a line to the comment with this.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I left a comment, take a look and suggest more if you can think of something more to note.