diff --git a/src/itx.rs b/src/itx.rs index e512b67e2..a2646d917 100644 --- a/src/itx.rs +++ b/src/itx.rs @@ -18,12 +18,14 @@ use crate::include::common::intops::iclip; use crate::include::dav1d::picture::{ FFISafeRav1dPictureDataComponentOffset, Rav1dPictureDataComponentOffset, }; +#[cfg(not(all(feature = "asm", target_feature = "neon")))] +use crate::itx_1d::rav1d_inv_wht4_1d_c; use crate::itx_1d::{ rav1d_inv_adst16_1d_c, rav1d_inv_adst4_1d_c, rav1d_inv_adst8_1d_c, rav1d_inv_dct16_1d_c, rav1d_inv_dct32_1d_c, rav1d_inv_dct4_1d_c, rav1d_inv_dct64_1d_c, rav1d_inv_dct8_1d_c, rav1d_inv_flipadst16_1d_c, rav1d_inv_flipadst4_1d_c, rav1d_inv_flipadst8_1d_c, rav1d_inv_identity16_1d_c, rav1d_inv_identity32_1d_c, rav1d_inv_identity4_1d_c, - rav1d_inv_identity8_1d_c, rav1d_inv_wht4_1d_c, + rav1d_inv_identity8_1d_c, }; use crate::levels::{ TxfmSize, TxfmType, ADST_ADST, ADST_DCT, ADST_FLIPADST, DCT_ADST, DCT_DCT, DCT_FLIPADST, @@ -187,7 +189,10 @@ fn inv_txfm_add_rust (Identity, FlipAdst), V_ADST => (Adst, Identity), V_FLIPADST => (FlipAdst, Identity), + + #[cfg(not(all(feature = "asm", target_feature = "neon")))] WHT_WHT if (W, H) == (4, 4) => return inv_txfm_add_wht_wht_4x4_rust(dst, coeff, bd), + _ => unreachable!(), }; @@ -288,6 +293,7 @@ pub struct Rav1dInvTxfmDSPContext { pub itxfm_add: [[itxfm::Fn; N_TX_TYPES_PLUS_LL]; TxfmSize::COUNT], } +#[cfg(not(all(feature = "asm", target_feature = "neon")))] fn inv_txfm_add_wht_wht_4x4_rust( dst: Rav1dPictureDataComponentOffset, coeff: &mut [BD::Coef], diff --git a/src/itx_1d.c b/src/itx_1d.c index ca14fc8c4..8f75c653a 100644 --- a/src/itx_1d.c +++ b/src/itx_1d.c @@ -1016,6 +1016,10 @@ void dav1d_inv_identity32_1d_c(int32_t *const c, const ptrdiff_t stride, c[stride * i] *= 4; } +#if !(HAVE_ASM && TRIM_DSP_FUNCTIONS && ( \ + ARCH_AARCH64 || \ + (ARCH_ARM && (defined(__ARM_NEON) || defined(__APPLE__) || defined(_WIN32))) \ +)) void dav1d_inv_wht4_1d_c(int32_t *const c, const ptrdiff_t stride) { assert(stride > 0); const int in0 = c[0 * stride], in1 = c[1 * stride]; @@ -1032,3 +1036,4 @@ void dav1d_inv_wht4_1d_c(int32_t *const c, const ptrdiff_t stride) { c[2 * stride] = t1; c[3 * stride] = t2 + t1; } +#endif diff --git a/src/itx_1d.rs b/src/itx_1d.rs index 1dd0b453a..7970c335c 100644 --- a/src/itx_1d.rs +++ b/src/itx_1d.rs @@ -1133,6 +1133,7 @@ pub fn rav1d_inv_identity32_1d_c(c: &mut [i32], stride: NonZeroUsize, _min: c_in } } +#[cfg(not(all(feature = "asm", target_feature = "neon")))] pub fn rav1d_inv_wht4_1d_c(c: &mut [i32], stride: NonZeroUsize) { let stride = stride.get(); diff --git a/src/itx_tmpl.c b/src/itx_tmpl.c index d3859892d..1a37c3d54 100644 --- a/src/itx_tmpl.c +++ b/src/itx_tmpl.c @@ -159,6 +159,10 @@ inv_txfm_fn64(64, 16, 2) inv_txfm_fn64(64, 32, 1) inv_txfm_fn64(64, 64, 2) +#if !(HAVE_ASM && TRIM_DSP_FUNCTIONS && ( \ + ARCH_AARCH64 || \ + (ARCH_ARM && (defined(__ARM_NEON) || defined(__APPLE__) || defined(_WIN32))) \ +)) static void inv_txfm_add_wht_wht_4x4_c(pixel *dst, const ptrdiff_t stride, coef *const coeff, const int eob HIGHBD_DECL_SUFFIX) @@ -179,6 +183,7 @@ static void inv_txfm_add_wht_wht_4x4_c(pixel *dst, const ptrdiff_t stride, for (int x = 0; x < 4; x++) dst[x] = iclip_pixel(dst[x] + *c++); } +#endif #if HAVE_ASM #if ARCH_AARCH64 || ARCH_ARM @@ -232,7 +237,12 @@ COLD void bitfn(dav1d_itx_dsp_init)(Dav1dInvTxfmDSPContext *const c, int bpc) { c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \ inv_txfm_add_identity_adst_##w##x##h##_c; \ +#if !(HAVE_ASM && TRIM_DSP_FUNCTIONS && ( \ + ARCH_AARCH64 || \ + (ARCH_ARM && (defined(__ARM_NEON) || defined(__APPLE__) || defined(_WIN32))) \ +)) c->itxfm_add[TX_4X4][WHT_WHT] = inv_txfm_add_wht_wht_4x4_c; +#endif assign_itx_all_fn84( 4, 4, ); assign_itx_all_fn84( 4, 8, R); assign_itx_all_fn84( 4, 16, R);