diff --git a/lib.rs b/lib.rs index 9b1e17ca6..cd0d37f89 100644 --- a/lib.rs +++ b/lib.rs @@ -46,6 +46,7 @@ pub mod src { mod fg_apply; mod filmgrain; mod getbits; + mod unstable_extensions; pub(crate) mod wrap_fn_ptr; // TODO(kkysen) Temporarily `pub(crate)` due to a `pub use` until TAIT. pub(super) mod internal; diff --git a/src/lf_apply.rs b/src/lf_apply.rs index a5624bea8..b646e7cf2 100644 --- a/src/lf_apply.rs +++ b/src/lf_apply.rs @@ -8,6 +8,8 @@ use crate::src::lf_mask::Av1Filter; use crate::src::lr_apply::LR_RESTORE_U; use crate::src::lr_apply::LR_RESTORE_V; use crate::src::lr_apply::LR_RESTORE_Y; +use crate::src::unstable_extensions::as_chunks; +use crate::src::unstable_extensions::flatten; use libc::ptrdiff_t; use std::cmp; use std::ffi::c_int; @@ -341,6 +343,18 @@ pub(crate) unsafe fn rav1d_copy_lpf( } } +/// Slice `[u8; 4]`s from `lvl`, but "unaligned", +/// meaning the `[u8; 4]`s can straddle +/// adjacent `[u8; 4]`s in the `lvl` slice. +/// +/// Note that this does not result in actual unaligned reads, +/// since `[u8; 4]` has an alignment of 1. +/// This optimizes to a single slice with a bounds check. +#[inline(always)] +fn unaligned_lvl_slice(lvl: &[[u8; 4]], y: usize) -> &[[u8; 4]] { + as_chunks(&flatten(lvl)[y..]).0 +} + #[inline] unsafe fn filter_plane_cols_y( f: *const Rav1dFrameContext, @@ -377,7 +391,7 @@ unsafe fn filter_plane_cols_y( dst.offset((x * 4) as isize).cast(), ls, hmask.as_mut_ptr(), - lvl[x as usize][0..].as_ptr() as *const [u8; 4], + &lvl[x as usize], b4_stride, &(*f).lf.lim_lut.0, endy4 - starty4, @@ -416,7 +430,7 @@ unsafe fn filter_plane_rows_y( dst.cast(), ls, vmask.as_ptr(), - lvl[0][1..].as_ptr() as *const [u8; 4], + unaligned_lvl_slice(&lvl[0..], 1).as_ptr(), b4_stride, &(*f).lf.lim_lut.0, w, @@ -462,7 +476,7 @@ unsafe fn filter_plane_cols_uv( u.offset((x * 4) as isize).cast(), ls, hmask.as_mut_ptr(), - lvl[x as usize][2..].as_ptr() as *const [u8; 4], + unaligned_lvl_slice(&lvl[x as usize..], 2).as_ptr(), b4_stride, &(*f).lf.lim_lut.0, endy4 - starty4, @@ -472,7 +486,7 @@ unsafe fn filter_plane_cols_uv( v.offset((x * 4) as isize).cast(), ls, hmask.as_mut_ptr(), - lvl[x as usize][3..].as_ptr() as *const [u8; 4], + unaligned_lvl_slice(&lvl[x as usize..], 3).as_ptr(), b4_stride, &(*f).lf.lim_lut.0, endy4 - starty4, @@ -512,7 +526,7 @@ unsafe fn filter_plane_rows_uv( u.offset(off_l as isize).cast(), ls, vmask.as_ptr(), - lvl[0][2..].as_ptr() as *const [u8; 4], + unaligned_lvl_slice(&lvl[0..], 2).as_ptr(), b4_stride, &(*f).lf.lim_lut.0, w, @@ -522,7 +536,7 @@ unsafe fn filter_plane_rows_uv( v.offset(off_l as isize).cast(), ls, vmask.as_ptr(), - lvl[0][3..].as_ptr() as *const [u8; 4], + unaligned_lvl_slice(&lvl[0..], 3).as_ptr(), b4_stride, &(*f).lf.lim_lut.0, w, @@ -687,10 +701,10 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( } } let mut ptr: *mut BD::Pixel; - let level_ptr = &(*f).lf.level[((*f).b4_stride * sby as isize * sbsz as isize) as usize..]; + let mut level_ptr = &(*f).lf.level[((*f).b4_stride * sby as isize * sbsz as isize) as usize..]; ptr = p[0]; have_left = 0 as c_int; - for (x, level_ptr) in (0..(*f).sb128w).zip(level_ptr.chunks(32)) { + for x in 0..(*f).sb128w { filter_plane_cols_y::( f, have_left, @@ -705,15 +719,17 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( ); have_left = 1 as c_int; ptr = ptr.offset(128); + level_ptr = &level_ptr[32..]; } if frame_hdr.loopfilter.level_u == 0 && frame_hdr.loopfilter.level_v == 0 { return; } let mut uv_off: ptrdiff_t; - let level_ptr = &(*f).lf.level[((*f).b4_stride * (sby * sbsz >> ss_ver) as isize) as usize..]; + let mut level_ptr = + &(*f).lf.level[((*f).b4_stride * (sby * sbsz >> ss_ver) as isize) as usize..]; have_left = 0 as c_int; uv_off = 0; - for (x, level_ptr) in (0..(*f).sb128w).zip(level_ptr.chunks(32 >> ss_hor)) { + for x in 0..(*f).sb128w { filter_plane_cols_uv::( f, have_left, @@ -730,6 +746,7 @@ pub(crate) unsafe fn rav1d_loopfilter_sbrow_cols( ); have_left = 1 as c_int; uv_off += 128 >> ss_hor; + level_ptr = &level_ptr[32 >> ss_hor..]; } } diff --git a/src/unstable_extensions.rs b/src/unstable_extensions.rs new file mode 100644 index 000000000..d29a74c29 --- /dev/null +++ b/src/unstable_extensions.rs @@ -0,0 +1,55 @@ +//! Unstable `fn`s copied directly from `std`, with the following differences: +//! * They are free `fn`s now, not methods. +//! * `self` is replaced by `this`. +//! * Things only accessible by `std` are replaced with stable counterparts, such as: +//! * `exact_div` => `/` +//! * `.unchecked_mul` => `*` +//! * `const` `.expect` => `match` and `panic!` + +use std::mem; +use std::slice::from_raw_parts; + +/// From `1.75.0`. +pub const fn flatten(this: &[[T; N]]) -> &[T] { + let len = if mem::size_of::() == 0 { + match this.len().checked_mul(N) { + None => panic!("slice len overflow"), + Some(it) => it, + } + } else { + // SAFETY: `this.len() * N` cannot overflow because `self` is + // already in the address space. + /* unsafe */ + this.len() * N + }; + // SAFETY: `[T]` is layout-identical to `[T; N]` + unsafe { from_raw_parts(this.as_ptr().cast(), len) } +} + +/// From `1.75.0`. +#[inline] +#[must_use] +pub const unsafe fn as_chunks_unchecked(this: &[T]) -> &[[T; N]] { + // SAFETY: Caller must guarantee that `N` is nonzero and exactly divides the slice length + let new_len = /* unsafe */ { + assert!(N != 0 && this.len() % N == 0); + this.len() / N + }; + // SAFETY: We cast a slice of `new_len * N` elements into + // a slice of `new_len` many `N` elements chunks. + unsafe { from_raw_parts(this.as_ptr().cast(), new_len) } +} + +/// From `1.75.0`. +#[inline] +#[track_caller] +#[must_use] +pub const fn as_chunks(this: &[T]) -> (&[[T; N]], &[T]) { + assert!(N != 0, "chunk size must be non-zero"); + let len = this.len() / N; + let (multiple_of_n, remainder) = this.split_at(len * N); + // SAFETY: We already panicked for zero, and ensured by construction + // that the length of the subslice is a multiple of N. + let array_slice = unsafe { as_chunks_unchecked(multiple_of_n) }; + (array_slice, remainder) +}