Skip to content

Commit

Permalink
Perform single copy + pointer select instead of double write for smal…
Browse files Browse the repository at this point in the history
…l types

A previous optimization is no longer deemed worth it, new test results
and understanding suggest that a single call to ptr::copy_nonoverlapping
per element is more efficient for large input that don't fit into the
last level cache. As well as for types like f128.
  • Loading branch information
Voultapher committed Nov 19, 2023
1 parent 83958e9 commit 59d110e
Showing 1 changed file with 10 additions and 5 deletions.
15 changes: 10 additions & 5 deletions src/quicksort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -165,11 +165,12 @@ impl<T> StablePartitionTypeImpl for T {
scratch_rev = scratch_rev.sub(1);

let is_less_than_pivot = is_less(&*scan, &*pivot);
let dst = if is_less_than_pivot {
scratch_base.add(num_lt) // i + num_lt
let dst_base = if is_less_than_pivot {
scratch_base // i + num_lt
} else {
scratch_rev.add(num_lt) // len - (i + 1) + num_lt = len - 1 - num_ge
scratch_rev // len - (i + 1) + num_lt = len - 1 - num_ge
};
let dst = dst_base.add(num_lt);

// Save pivot location in scratch for later.
if const { crate::has_direct_interior_mutability::<T>() }
Expand Down Expand Up @@ -292,8 +293,12 @@ where
state.scratch_rev = state.scratch_rev.sub(1);

let is_less_than_pivot = is_less(&*state.scan, &*pivot);
ptr::copy_nonoverlapping(state.scan, scratch_base.add(state.num_lt), 1);
ptr::copy_nonoverlapping(state.scan, state.scratch_rev.add(state.num_lt), 1);
let dst_base = if is_less_than_pivot {
scratch_base // i + num_lt
} else {
state.scratch_rev // len - (i + 1) + num_lt = len - 1 - num_ge
};
ptr::copy_nonoverlapping(state.scan, dst_base.add(state.num_lt), 1);

state.num_lt += is_less_than_pivot as usize;
state.scan = state.scan.add(1);
Expand Down

0 comments on commit 59d110e

Please sign in to comment.