Skip to content

Commit

Permalink
Improve binary-size
Browse files Browse the repository at this point in the history
Avoid special insertion-sort branch for len < 8, re-use the same merge
logic. Limit use of sort8 to integer like types, because they really
benefit from the additional branch-miss avoidance, while others like
string play inline lotto and don't really benefit in terms of run-time
while paying for it with binary-size.
  • Loading branch information
Voultapher committed Mar 4, 2024
1 parent 9c2d26b commit 5f2edfe
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 13 deletions.
3 changes: 0 additions & 3 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,3 @@ const fn has_direct_interior_mutability<T>() -> bool {
// could lead to double free.
!T::IS_FREEZE
}

trait IsTrue<const B: bool> {}
impl IsTrue<true> for () {}
19 changes: 9 additions & 10 deletions src/smallsort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,21 +71,16 @@ fn sort_small_general<T: crate::Freeze, F: FnMut(&T, &T) -> bool>(
intrinsics::abort();
}

if len < 8 {
insertion_sort_shift_left(v, 1, is_less);
return;
}

let v_base = v.as_mut_ptr();
let len_div_2 = len / 2;

// SAFETY: See individual comments.
unsafe {
let scratch_base = scratch.as_mut_ptr() as *mut T;

let presorted_len = if len >= 16 {
let presorted_len = if const { mem::size_of::<T>() <= 16 } && len >= 16 {
// SAFETY: scratch_base is valid and has enough space.
sort8_stable(v_base, scratch_base, scratch_base.add(len), is_less);

sort8_stable(
v_base.add(len_div_2),
scratch_base.add(len_div_2),
Expand All @@ -94,12 +89,17 @@ fn sort_small_general<T: crate::Freeze, F: FnMut(&T, &T) -> bool>(
);

8
} else {
} else if len >= 8 {
// SAFETY: scratch_base is valid and has enough space.
sort4_stable(v_base, scratch_base, is_less);
sort4_stable(v_base.add(len_div_2), scratch_base.add(len_div_2), is_less);

4
} else {
ptr::copy_nonoverlapping(v_base, scratch_base, 1);
ptr::copy_nonoverlapping(v_base.add(len_div_2), scratch_base.add(len_div_2), 1);

1
};

for offset in [0, len_div_2] {
Expand Down Expand Up @@ -278,7 +278,7 @@ pub unsafe fn sort4_stable<T, F: FnMut(&T, &T) -> bool>(
}

#[inline(always)]
pub fn select<T>(cond: bool, if_true: *const T, if_false: *const T) -> *const T {
fn select<T>(cond: bool, if_true: *const T, if_false: *const T) -> *const T {
if cond {
if_true
} else {
Expand All @@ -289,7 +289,6 @@ pub unsafe fn sort4_stable<T, F: FnMut(&T, &T) -> bool>(

/// SAFETY: The caller MUST guarantee that `v_base` is valid for 8 reads and writes, `scratch_base`
/// and `dst` MUST be valid for 8 writes. The result will be stored in `dst[0..8]`.
#[inline(never)]
unsafe fn sort8_stable<T: crate::Freeze, F: FnMut(&T, &T) -> bool>(
v_base: *mut T,
dst: *mut T,
Expand Down

0 comments on commit 5f2edfe

Please sign in to comment.