Skip to content

Conversation

fbossen
Copy link
Collaborator

@fbossen fbossen commented Jun 30, 2025

Main item is "restrict number of columns iterated over based on EOB" which applies to the generic code for transforms (not asm).

@fbossen fbossen force-pushed the bp-1.5.0-0011 branch 2 times, most recently from 08f9149 to 036b750 Compare June 30, 2025 02:45
@kkysen kkysen self-requested a review July 7, 2025 15:49
@kkysen kkysen added the backports Backports from dav1d label Aug 16, 2025
@kkysen kkysen changed the base branch from main to kkysen/backport-trim-dav1d_inv_wht4_1d_c August 16, 2025 06:37
@kkysen kkysen force-pushed the kkysen/backport-trim-dav1d_inv_wht4_1d_c branch from c7de0bb to 6c66002 Compare August 16, 2025 06:55
@kkysen kkysen force-pushed the kkysen/backport-trim-dav1d_inv_wht4_1d_c branch from 6c66002 to 595b3da Compare August 16, 2025 07:14
Comment on lines +47 to +49
pub const fn const_get(&'static self) -> T {
self.0
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
pub const fn const_get(&'static self) -> T {
self.0
}
/// This doesn't [`assert_unchecked`] that the value is in bounds (for optimization)
/// because [`Self::in_bounds`] is not a `const fn` due to its usage of `trait`s.
/// However, because this is meant to be called in a `const` context,
/// everything should be known already, and the [`assert_unchecked`]
/// should be unnecessary for optimization.
pub const fn const_get(self) -> T {
self.0
}

let eob = eob as usize;
// in first 1d itx
let last_nonzero_col = if second == Identity && first != Identity {
std::cmp::min(sh - 1, eob)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
std::cmp::min(sh - 1, eob)
cmp::min(sh - 1, eob)

let mut last_nonzero_col_from_eob: [u8; S] = [0; S];

let mut max_col: u8 = 0;
const_for!(n in 0..S => {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we name S N instead? Unless there's a reason you used S.

const_for!(n in 0..S => {
let rc = scan[n].const_get();
let rcx = (rc & (h - 1) as u16) as u8;
max_col = if rcx > max_col { rcx } else {max_col };
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we have a const_max! macro. Or at least const_min! and we can add const_max!.

last_nonzero_col_from_eob
}

static LAST_NONZERO_COL_FROM_EOB_4X4: [u8; 16] = init_tbl(&SCAN_4X4.0, 4);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you leave the array lengths still in their factored state?

Comment on lines +120 to +134
(Identity, 4) => rav1d_inv_identity4_1d_c,
(Identity, 8) => rav1d_inv_identity8_1d_c,
(Identity, 16) => rav1d_inv_identity16_1d_c,
(Identity, 32) => rav1d_inv_identity32_1d_c,
(Dct, 4) => rav1d_inv_dct4_1d_c,
(Dct, 8) => rav1d_inv_dct8_1d_c,
(Dct, 16) => rav1d_inv_dct16_1d_c,
(Dct, 32) => rav1d_inv_dct32_1d_c,
(Dct, 64) => rav1d_inv_dct64_1d_c,
(Adst, 4) => rav1d_inv_adst4_1d_c,
(Adst, 8) => rav1d_inv_adst8_1d_c,
(Adst, 16) => rav1d_inv_adst16_1d_c,
(FlipAdst, 4) => rav1d_inv_flipadst4_1d_c,
(FlipAdst, 8) => rav1d_inv_flipadst8_1d_c,
(FlipAdst, 16) => rav1d_inv_flipadst16_1d_c,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you remove the rav1d_* prefixes? I think dav1d removed its dav1d_* prefixes for all of these.

Comment on lines +233 to +245
const fn init_tbl<const S: usize>(scan: &'static [Scan; S], h: u16) -> [u8; S] {
let mut last_nonzero_col_from_eob: [u8; S] = [0; S];

let mut max_col: u8 = 0;
const_for!(n in 0..S => {
let rc = scan[n].const_get();
let rcx = (rc & (h - 1) as u16) as u8;
max_col = if rcx > max_col { rcx } else {max_col };
last_nonzero_col_from_eob[n] = max_col;
});

last_nonzero_col_from_eob
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
const fn init_tbl<const S: usize>(scan: &'static [Scan; S], h: u16) -> [u8; S] {
let mut last_nonzero_col_from_eob: [u8; S] = [0; S];
let mut max_col: u8 = 0;
const_for!(n in 0..S => {
let rc = scan[n].const_get();
let rcx = (rc & (h - 1) as u16) as u8;
max_col = if rcx > max_col { rcx } else {max_col };
last_nonzero_col_from_eob[n] = max_col;
});
last_nonzero_col_from_eob
}
const fn init_tbl<const N: usize>(scan: &'static [Scan; N], h: u16) -> [u8; N] {
let mut last_nonzero_col_from_eob = [0; N];
let mut max_col = 0;
const_for!(n in 0..N => {
let rc = scan[n].const_get();
let rcx = (rc & (h - 1)) as u8;
max_col = const_max!(max_col, rcx);
last_nonzero_col_from_eob[n] = max_col;
});
last_nonzero_col_from_eob
}

with

macro_rules! const_max {
    ($a:expr, $b:expr) => {{
        let a = $a;
        let b = $b;
        if a < b {
            a
        } else {
            b
        }
    }};
}

/// [`std::cmp::max`] is not `const` since it would need `const` `trait` `fn`s,
/// so this implements it with a macro instead so it can remain `const`.
pub(crate) use const_max;

Comment on lines +262 to +282
pub static DAV1D_LAST_NONZERO_COL_FROM_EOB: [&'static [u8]; TxfmSize::COUNT] = [
&LAST_NONZERO_COL_FROM_EOB_4X4,
&LAST_NONZERO_COL_FROM_EOB_8X8,
&LAST_NONZERO_COL_FROM_EOB_16X16,
&LAST_NONZERO_COL_FROM_EOB_32X32,
&LAST_NONZERO_COL_FROM_EOB_32X32,
&LAST_NONZERO_COL_FROM_EOB_4X8,
&LAST_NONZERO_COL_FROM_EOB_8X4,
&LAST_NONZERO_COL_FROM_EOB_8X16,
&LAST_NONZERO_COL_FROM_EOB_16X8,
&LAST_NONZERO_COL_FROM_EOB_16X32,
&LAST_NONZERO_COL_FROM_EOB_32X16,
&LAST_NONZERO_COL_FROM_EOB_32X32,
&LAST_NONZERO_COL_FROM_EOB_32X32,
&LAST_NONZERO_COL_FROM_EOB_4X16,
&LAST_NONZERO_COL_FROM_EOB_16X4,
&LAST_NONZERO_COL_FROM_EOB_8X32,
&LAST_NONZERO_COL_FROM_EOB_32X8,
&LAST_NONZERO_COL_FROM_EOB_16X32,
&LAST_NONZERO_COL_FROM_EOB_32X16,
];
Copy link
Collaborator

@kkysen kkysen Aug 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's better to use enum_map! here so that the order isn't implicit.

Suggested change
pub static DAV1D_LAST_NONZERO_COL_FROM_EOB: [&'static [u8]; TxfmSize::COUNT] = [
&LAST_NONZERO_COL_FROM_EOB_4X4,
&LAST_NONZERO_COL_FROM_EOB_8X8,
&LAST_NONZERO_COL_FROM_EOB_16X16,
&LAST_NONZERO_COL_FROM_EOB_32X32,
&LAST_NONZERO_COL_FROM_EOB_32X32,
&LAST_NONZERO_COL_FROM_EOB_4X8,
&LAST_NONZERO_COL_FROM_EOB_8X4,
&LAST_NONZERO_COL_FROM_EOB_8X16,
&LAST_NONZERO_COL_FROM_EOB_16X8,
&LAST_NONZERO_COL_FROM_EOB_16X32,
&LAST_NONZERO_COL_FROM_EOB_32X16,
&LAST_NONZERO_COL_FROM_EOB_32X32,
&LAST_NONZERO_COL_FROM_EOB_32X32,
&LAST_NONZERO_COL_FROM_EOB_4X16,
&LAST_NONZERO_COL_FROM_EOB_16X4,
&LAST_NONZERO_COL_FROM_EOB_8X32,
&LAST_NONZERO_COL_FROM_EOB_32X8,
&LAST_NONZERO_COL_FROM_EOB_16X32,
&LAST_NONZERO_COL_FROM_EOB_32X16,
];
pub static RAV1D_LAST_NONZERO_COL_FROM_EOB: enum_map_ty!(TxfmSize, &'static [u8]) = enum_map!(TxfmSize => &'static [u8]; match key {
S4x4 => LAST_NONZERO_COL_FROM_EOB_4X4.as_slice(),
S8x8 => LAST_NONZERO_COL_FROM_EOB_8X8.as_slice(),
S16x16 => LAST_NONZERO_COL_FROM_EOB_16X16.as_slice(),
S32x32 => LAST_NONZERO_COL_FROM_EOB_32X32.as_slice(),
S64x64 => LAST_NONZERO_COL_FROM_EOB_32X32.as_slice(),
R4x8 => LAST_NONZERO_COL_FROM_EOB_4X8.as_slice(),
R8x4 => LAST_NONZERO_COL_FROM_EOB_8X4.as_slice(),
R8x16 => LAST_NONZERO_COL_FROM_EOB_8X16.as_slice(),
R16x8 => LAST_NONZERO_COL_FROM_EOB_16X8.as_slice(),
R16x32 => LAST_NONZERO_COL_FROM_EOB_16X32.as_slice(),
R32x16 => LAST_NONZERO_COL_FROM_EOB_32X16.as_slice(),
R32x64 => LAST_NONZERO_COL_FROM_EOB_32X32.as_slice(),
R64x32 => LAST_NONZERO_COL_FROM_EOB_32X32.as_slice(),
R4x16 => LAST_NONZERO_COL_FROM_EOB_4X16.as_slice(),
R16x4 => LAST_NONZERO_COL_FROM_EOB_16X4.as_slice(),
R8x32 => LAST_NONZERO_COL_FROM_EOB_8X32.as_slice(),
R32x8 => LAST_NONZERO_COL_FROM_EOB_32X8.as_slice(),
R16x64 => LAST_NONZERO_COL_FROM_EOB_16X32.as_slice(),
R64x16 => LAST_NONZERO_COL_FROM_EOB_32X16.as_slice(),
});

This requires a couple extra small impls, so I can add those if you want.

Also, we should name it RAV1D_*.

kkysen added a commit that referenced this pull request Aug 26, 2025
kkysen added a commit that referenced this pull request Aug 26, 2025
@kkysen kkysen deleted the branch memorysafety:kkysen/backport-trim-dav1d_inv_wht4_1d_c August 26, 2025 17:45
@kkysen kkysen closed this Aug 26, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backports Backports from dav1d
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants