Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion src/arm/itx.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,9 @@ decl_itx_fn(BF(dav1d_inv_txfm_add_dct_dct_64x16, neon));
decl_itx_fn(BF(dav1d_inv_txfm_add_dct_dct_64x32, neon));
decl_itx_fn(BF(dav1d_inv_txfm_add_dct_dct_64x64, neon));

static ALWAYS_INLINE void itx_dsp_init_arm(Dav1dInvTxfmDSPContext *const c, int bpc) {
static ALWAYS_INLINE void itx_dsp_init_arm(Dav1dInvTxfmDSPContext *const c, int bpc,
int *const all_simd)
{
const unsigned flags = dav1d_get_cpu_flags();

if (!(flags & DAV1D_ARM_CPU_FLAG_NEON)) return;
Expand Down Expand Up @@ -77,4 +79,5 @@ static ALWAYS_INLINE void itx_dsp_init_arm(Dav1dInvTxfmDSPContext *const c, int
assign_itx1_fn (R, 64, 16, neon);
assign_itx1_fn (R, 64, 32, neon);
assign_itx1_fn ( , 64, 64, neon);
*all_simd = 1;
}
6 changes: 5 additions & 1 deletion src/in_range.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ where

impl<T, const MIN: u128, const MAX: u128> InRange<T, MIN, MAX>
where
T: TryFrom<u128, Error: Debug> + PartialEq + Eq + PartialOrd + Ord,
T: TryFrom<u128, Error: Debug> + PartialEq + Eq + PartialOrd + Ord + Copy,
{
fn in_bounds(&self) -> bool {
*self >= Self::min() && *self <= Self::max()
Expand All @@ -43,6 +43,10 @@ where
unsafe { assert_unchecked(self.in_bounds()) };
self.0
}

pub const fn const_get(&'static self) -> T {
self.0
}
Comment on lines +47 to +49
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
pub const fn const_get(&'static self) -> T {
self.0
}
/// This doesn't [`assert_unchecked`] that the value is in bounds (for optimization)
/// because [`Self::in_bounds`] is not a `const fn` due to its usage of `trait`s.
/// However, because this is meant to be called in a `const` context,
/// everything should be known already, and the [`assert_unchecked`]
/// should be unnecessary for optimization.
pub const fn const_get(self) -> T {
self.0
}

}

impl<T, const MIN: u128, const MAX: u128> Default for InRange<T, MIN, MAX>
Expand Down
158 changes: 82 additions & 76 deletions src/itx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@ use crate::levels::{
FLIPADST_ADST, FLIPADST_DCT, FLIPADST_FLIPADST, H_ADST, H_DCT, H_FLIPADST, IDTX,
N_TX_TYPES_PLUS_LL, V_ADST, V_DCT, V_FLIPADST, WHT_WHT,
};
use crate::scan::DAV1D_LAST_NONZERO_COL_FROM_EOB;
use crate::strided::Strided as _;
use crate::tables::DAV1D_TXFM_DIMENSIONS;
use crate::wrap_fn_ptr::wrap_fn_ptr;

pub type Itx1dFn = fn(c: &mut [i32], stride: NonZeroUsize, min: i32, max: i32);
Expand All @@ -42,16 +44,18 @@ fn inv_txfm_add<BD: BitDepth>(
dst: Rav1dPictureDataComponentOffset,
coeff: &mut [BD::Coef],
eob: i32,
w: usize,
h: usize,
tx: TxfmSize,
shift: u8,
first_1d_fn: Itx1dFn,
second_1d_fn: Itx1dFn,
has_dc_only: bool,
txtp: TxfmType,
bd: BD,
) {
let bitdepth_max = bd.bitdepth_max().as_::<i32>();

let t_dim = &DAV1D_TXFM_DIMENSIONS[tx as usize];
let w = 4 * t_dim.w as usize;
let h = 4 * t_dim.h as usize;
let has_dc_only = txtp == DCT_DCT;

assert!(w >= 4 && w <= 64);
assert!(h >= 4 && h <= 64);
assert!(eob >= 0);
Expand All @@ -78,6 +82,63 @@ fn inv_txfm_add<BD: BitDepth>(
return;
}

#[derive(PartialEq, Clone, Copy)]
enum Type {
Identity,
Dct,
Adst,
FlipAdst,
}
use Type::*;
// For some reason, this is flipped.
let (second, first) = match txtp {
IDTX => (Identity, Identity),
DCT_DCT => (Dct, Dct),
ADST_DCT => (Adst, Dct),
FLIPADST_DCT => (FlipAdst, Dct),
H_DCT => (Identity, Dct),
DCT_ADST => (Dct, Adst),
ADST_ADST => (Adst, Adst),
FLIPADST_ADST => (FlipAdst, Adst),
DCT_FLIPADST => (Dct, FlipAdst),
ADST_FLIPADST => (Adst, FlipAdst),
FLIPADST_FLIPADST => (FlipAdst, FlipAdst),
V_DCT => (Dct, Identity),
H_ADST => (Identity, Adst),
H_FLIPADST => (Identity, FlipAdst),
V_ADST => (Adst, Identity),
V_FLIPADST => (FlipAdst, Identity),

#[cfg(not(all(feature = "asm", target_feature = "neon")))]
WHT_WHT if (w, h) == (4, 4) => return inv_txfm_add_wht_wht_4x4_rust(dst, coeff, bd),

_ => unreachable!(),
};

fn resolve_1d_fn(r#type: Type, n: usize) -> Itx1dFn {
match (r#type, n) {
(Identity, 4) => rav1d_inv_identity4_1d_c,
(Identity, 8) => rav1d_inv_identity8_1d_c,
(Identity, 16) => rav1d_inv_identity16_1d_c,
(Identity, 32) => rav1d_inv_identity32_1d_c,
(Dct, 4) => rav1d_inv_dct4_1d_c,
(Dct, 8) => rav1d_inv_dct8_1d_c,
(Dct, 16) => rav1d_inv_dct16_1d_c,
(Dct, 32) => rav1d_inv_dct32_1d_c,
(Dct, 64) => rav1d_inv_dct64_1d_c,
(Adst, 4) => rav1d_inv_adst4_1d_c,
(Adst, 8) => rav1d_inv_adst8_1d_c,
(Adst, 16) => rav1d_inv_adst16_1d_c,
(FlipAdst, 4) => rav1d_inv_flipadst4_1d_c,
(FlipAdst, 8) => rav1d_inv_flipadst8_1d_c,
(FlipAdst, 16) => rav1d_inv_flipadst16_1d_c,
Comment on lines +120 to +134
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you remove the rav1d_* prefixes? I think dav1d removed its dav1d_* prefixes for all of these.

_ => unreachable!(),
}
}

let first_1d_fn = resolve_1d_fn(first, w);
let second_1d_fn = resolve_1d_fn(second, h);

let sh = cmp::min(h, 32);
let sw = cmp::min(w, 32);

Expand All @@ -96,8 +157,18 @@ fn inv_txfm_add<BD: BitDepth>(
let col_clip_max = !col_clip_min;

let mut tmp = [0; 64 * 64];
let mut c = &mut tmp[..];
for y in 0..sh {
let mut c = &mut tmp[..sh * w];
let eob = eob as usize;
// in first 1d itx
let last_nonzero_col = if second == Identity && first != Identity {
std::cmp::min(sh - 1, eob)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
std::cmp::min(sh - 1, eob)
cmp::min(sh - 1, eob)

} else if first == Identity && second != Identity {
eob >> (t_dim.lw + 2)
} else {
DAV1D_LAST_NONZERO_COL_FROM_EOB[tx as usize][eob as usize] as usize
};
assert!(last_nonzero_col < sh);
for y in 0..=last_nonzero_col {
if is_rect2 {
for x in 0..sw {
c[x] = coeff[y + x * sh].as_::<i32>() * 181 + 128 >> 8;
Expand All @@ -110,6 +181,8 @@ fn inv_txfm_add<BD: BitDepth>(
first_1d_fn(c, 1.try_into().unwrap(), row_clip_min, row_clip_max);
c = &mut c[w..];
}
// fill remaining values in slice `c` with 0
c.fill(0);

coeff.fill(0.into());
for i in 0..w * sh {
Expand Down Expand Up @@ -162,76 +235,9 @@ fn inv_txfm_add_rust<const W: usize, const H: usize, const TYPE: TxfmType, BD: B
(64, 64) => 2,
_ => unreachable!(),
};
let has_dc_only = TYPE == DCT_DCT;

enum Type {
Identity,
Dct,
Adst,
FlipAdst,
}
use Type::*;
// For some reason, this is flipped.
let (second, first) = match TYPE {
IDTX => (Identity, Identity),
DCT_DCT => (Dct, Dct),
ADST_DCT => (Adst, Dct),
FLIPADST_DCT => (FlipAdst, Dct),
H_DCT => (Identity, Dct),
DCT_ADST => (Dct, Adst),
ADST_ADST => (Adst, Adst),
FLIPADST_ADST => (FlipAdst, Adst),
DCT_FLIPADST => (Dct, FlipAdst),
ADST_FLIPADST => (Adst, FlipAdst),
FLIPADST_FLIPADST => (FlipAdst, FlipAdst),
V_DCT => (Dct, Identity),
H_ADST => (Identity, Adst),
H_FLIPADST => (Identity, FlipAdst),
V_ADST => (Adst, Identity),
V_FLIPADST => (FlipAdst, Identity),

#[cfg(not(all(feature = "asm", target_feature = "neon")))]
WHT_WHT if (W, H) == (4, 4) => return inv_txfm_add_wht_wht_4x4_rust(dst, coeff, bd),

_ => unreachable!(),
};

fn resolve_1d_fn(r#type: Type, n: usize) -> Itx1dFn {
match (r#type, n) {
(Identity, 4) => rav1d_inv_identity4_1d_c,
(Identity, 8) => rav1d_inv_identity8_1d_c,
(Identity, 16) => rav1d_inv_identity16_1d_c,
(Identity, 32) => rav1d_inv_identity32_1d_c,
(Dct, 4) => rav1d_inv_dct4_1d_c,
(Dct, 8) => rav1d_inv_dct8_1d_c,
(Dct, 16) => rav1d_inv_dct16_1d_c,
(Dct, 32) => rav1d_inv_dct32_1d_c,
(Dct, 64) => rav1d_inv_dct64_1d_c,
(Adst, 4) => rav1d_inv_adst4_1d_c,
(Adst, 8) => rav1d_inv_adst8_1d_c,
(Adst, 16) => rav1d_inv_adst16_1d_c,
(FlipAdst, 4) => rav1d_inv_flipadst4_1d_c,
(FlipAdst, 8) => rav1d_inv_flipadst8_1d_c,
(FlipAdst, 16) => rav1d_inv_flipadst16_1d_c,
_ => unreachable!(),
}
}

let first_1d_fn = resolve_1d_fn(first, W);
let second_1d_fn = resolve_1d_fn(second, H);

inv_txfm_add(
dst,
coeff,
eob,
W,
H,
shift,
first_1d_fn,
second_1d_fn,
has_dc_only,
bd,
)
let tx = TxfmSize::from_wh(W, H);
inv_txfm_add(dst, coeff, eob, tx, shift, TYPE, bd)
}

/// # Safety
Expand Down
Loading
Loading