From 5f2edfe56ad40c35f8a606d9591c850a52f7f7a1 Mon Sep 17 00:00:00 2001 From: Lukas Bergdoll Date: Mon, 4 Mar 2024 20:50:27 +0100 Subject: [PATCH] Improve binary-size Avoid special insertion-sort branch for len < 8, re-use the same merge logic. Limit use of sort8 to integer like types, because they really benefit from the additional branch-miss avoidance, while others like string play inline lotto and don't really benefit in terms of run-time while paying for it with binary-size. --- src/lib.rs | 3 --- src/smallsort.rs | 19 +++++++++---------- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 849d879..3a4d59b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -196,6 +196,3 @@ const fn has_direct_interior_mutability() -> bool { // could lead to double free. !T::IS_FREEZE } - -trait IsTrue {} -impl IsTrue for () {} diff --git a/src/smallsort.rs b/src/smallsort.rs index 97965ea..8382378 100644 --- a/src/smallsort.rs +++ b/src/smallsort.rs @@ -71,21 +71,16 @@ fn sort_small_general bool>( intrinsics::abort(); } - if len < 8 { - insertion_sort_shift_left(v, 1, is_less); - return; - } - let v_base = v.as_mut_ptr(); let len_div_2 = len / 2; + // SAFETY: See individual comments. unsafe { let scratch_base = scratch.as_mut_ptr() as *mut T; - let presorted_len = if len >= 16 { + let presorted_len = if const { mem::size_of::() <= 16 } && len >= 16 { // SAFETY: scratch_base is valid and has enough space. sort8_stable(v_base, scratch_base, scratch_base.add(len), is_less); - sort8_stable( v_base.add(len_div_2), scratch_base.add(len_div_2), @@ -94,12 +89,17 @@ fn sort_small_general bool>( ); 8 - } else { + } else if len >= 8 { // SAFETY: scratch_base is valid and has enough space. sort4_stable(v_base, scratch_base, is_less); sort4_stable(v_base.add(len_div_2), scratch_base.add(len_div_2), is_less); 4 + } else { + ptr::copy_nonoverlapping(v_base, scratch_base, 1); + ptr::copy_nonoverlapping(v_base.add(len_div_2), scratch_base.add(len_div_2), 1); + + 1 }; for offset in [0, len_div_2] { @@ -278,7 +278,7 @@ pub unsafe fn sort4_stable bool>( } #[inline(always)] - pub fn select(cond: bool, if_true: *const T, if_false: *const T) -> *const T { + fn select(cond: bool, if_true: *const T, if_false: *const T) -> *const T { if cond { if_true } else { @@ -289,7 +289,6 @@ pub unsafe fn sort4_stable bool>( /// SAFETY: The caller MUST guarantee that `v_base` is valid for 8 reads and writes, `scratch_base` /// and `dst` MUST be valid for 8 writes. The result will be stored in `dst[0..8]`. -#[inline(never)] unsafe fn sort8_stable bool>( v_base: *mut T, dst: *mut T,