Skip to content

Commit

Permalink
Move sort_small_general to standalone function (#36)
Browse files Browse the repository at this point in the history
This greatly improves release compile times.
Before:
Benchmark 1: cargo build --release
    Time (mean ± σ):      9.065 s ±  0.080 s    [User: 22.421 s, System: 0.309 s]
    Range (min … max):    8.998 s …  9.154 s    3 runs
After:
Benchmark 1: cargo build --release
    Time (mean ± σ):      6.594 s ±  0.043 s    [User: 20.360 s, System: 0.264 s]
    Range (min … max):    6.562 s …  6.644 s    3 runs

While avoiding significant binary-size regressions. This split-up will
be necessary anyway when sharing this function with ipnsort.
  • Loading branch information
Voultapher authored Feb 28, 2024
1 parent d3e325e commit 248bec8
Showing 1 changed file with 80 additions and 75 deletions.
155 changes: 80 additions & 75 deletions src/smallsort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,7 @@ impl<T> SmallSortTypeImpl for T {

pub const MIN_SMALL_SORT_SCRATCH_LEN: usize = i32::SMALL_SORT_THRESHOLD + 16;

impl<T> SmallSortTypeImpl for T
where
T: crate::Freeze,
{
impl<T: crate::Freeze> SmallSortTypeImpl for T {
const SMALL_SORT_THRESHOLD: usize = 20;

#[inline(always)]
Expand All @@ -51,85 +48,93 @@ where
scratch: &mut [MaybeUninit<T>],
is_less: &mut F,
) {
let len = v.len();
sort_small_general(v, scratch, is_less);
}
}

if len >= 2 {
if scratch.len() < MIN_SMALL_SORT_SCRATCH_LEN {
intrinsics::abort();
}
fn sort_small_general<T: crate::Freeze, F: FnMut(&T, &T) -> bool>(
v: &mut [T],
scratch: &mut [MaybeUninit<T>],
is_less: &mut F,
) {
let len = v.len();

let v_base = v.as_mut_ptr();

let offset = if len >= 8 {
let len_div_2 = len / 2;

// SAFETY: TODO
unsafe {
let scratch_base = scratch.as_mut_ptr() as *mut T;

let presorted_len = if len >= 16 {
// SAFETY: scratch_base is valid and has enough space.
sort8_stable(
v_base,
scratch_base.add(T::SMALL_SORT_THRESHOLD),
scratch_base,
is_less,
);

sort8_stable(
v_base.add(len_div_2),
scratch_base.add(T::SMALL_SORT_THRESHOLD + 8),
scratch_base.add(len_div_2),
is_less,
);

8
} else {
// SAFETY: scratch_base is valid and has enough space.
sort4_stable(v_base, scratch_base, is_less);
sort4_stable(v_base.add(len_div_2), scratch_base.add(len_div_2), is_less);

4
};

for offset in [0, len_div_2] {
let src = scratch_base.add(offset);
let dst = v_base.add(offset);

for i in presorted_len..len_div_2 {
ptr::copy_nonoverlapping(dst.add(i), src.add(i), 1);
insert_tail(src, src.add(i), is_less);
}
}
if len >= 2 {
if scratch.len() < MIN_SMALL_SORT_SCRATCH_LEN {
intrinsics::abort();
}

let v_base = v.as_mut_ptr();

let offset = if len >= 8 {
let len_div_2 = len / 2;

// SAFETY: TODO
unsafe {
let scratch_base = scratch.as_mut_ptr() as *mut T;

let even_len = len - (len % 2);

// SAFETY: scratch_base is initialized with even_len elements,
// and v_base is large enough to copy to.
let drop_guard = CopyOnDrop {
src: scratch_base,
dst: v_base,
len: even_len,
};

// It's faster to merge directly into `v` and copy over the 'safe' elements of
// `scratch` into v only if there was a panic. This technique is similar to
// ping-pong merging.
bi_directional_merge_even(
&*ptr::slice_from_raw_parts(drop_guard.src, drop_guard.len),
drop_guard.dst,
let presorted_len = if len >= 16 {
// SAFETY: scratch_base is valid and has enough space.
sort8_stable(
v_base,
scratch_base.add(T::SMALL_SORT_THRESHOLD),
scratch_base,
is_less,
);
mem::forget(drop_guard);

even_len
sort8_stable(
v_base.add(len_div_2),
scratch_base.add(T::SMALL_SORT_THRESHOLD + 8),
scratch_base.add(len_div_2),
is_less,
);

8
} else {
// SAFETY: scratch_base is valid and has enough space.
sort4_stable(v_base, scratch_base, is_less);
sort4_stable(v_base.add(len_div_2), scratch_base.add(len_div_2), is_less);

4
};

for offset in [0, len_div_2] {
let src = scratch_base.add(offset);
let dst = v_base.add(offset);

for i in presorted_len..len_div_2 {
ptr::copy_nonoverlapping(dst.add(i), src.add(i), 1);
insert_tail(src, src.add(i), is_less);
}
}
} else {
1
};

insertion_sort_shift_left(v, offset, is_less);
}
let even_len = len - (len % 2);

// SAFETY: scratch_base is initialized with even_len elements,
// and v_base is large enough to copy to.
let drop_guard = CopyOnDrop {
src: scratch_base,
dst: v_base,
len: even_len,
};

// It's faster to merge directly into `v` and copy over the 'safe' elements of
// `scratch` into v only if there was a panic. This technique is similar to
// ping-pong merging.
bi_directional_merge_even(
&*ptr::slice_from_raw_parts(drop_guard.src, drop_guard.len),
drop_guard.dst,
is_less,
);
mem::forget(drop_guard);

even_len
}
} else {
1
};

insertion_sort_shift_left(v, offset, is_less);
}
}

Expand Down

0 comments on commit 248bec8

Please sign in to comment.