Skip to content

Commit db89b12

Browse files
Frank Bossenkkysen
authored andcommitted
itx: backport restrict number of columns iterated over based on EOB from dav1d 1.5.0
1 parent acc73e8 commit db89b12

File tree

3 files changed

+146
-83
lines changed

3 files changed

+146
-83
lines changed

src/in_range.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ where
2323

2424
impl<T, const MIN: u128, const MAX: u128> InRange<T, MIN, MAX>
2525
where
26-
T: TryFrom<u128, Error: Debug> + PartialEq + Eq + PartialOrd + Ord,
26+
T: TryFrom<u128, Error: Debug> + PartialEq + Eq + PartialOrd + Ord + Copy,
2727
{
2828
fn in_bounds(&self) -> bool {
2929
*self >= Self::min() && *self <= Self::max()
@@ -43,6 +43,10 @@ where
4343
unsafe { assert_unchecked(self.in_bounds()) };
4444
self.0
4545
}
46+
47+
pub const fn const_get(&'static self) -> T {
48+
self.0
49+
}
4650
}
4751

4852
impl<T, const MIN: u128, const MAX: u128> Default for InRange<T, MIN, MAX>

src/itx.rs

Lines changed: 89 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use std::num::NonZeroUsize;
22
use std::{cmp, slice};
33

4+
use cfg_if::cfg_if;
45
use strum::EnumCount;
56

67
use crate::cpu::CpuFlags;
@@ -32,7 +33,9 @@ use crate::levels::{
3233
FLIPADST_ADST, FLIPADST_DCT, FLIPADST_FLIPADST, H_ADST, H_DCT, H_FLIPADST, IDTX,
3334
N_TX_TYPES_PLUS_LL, V_ADST, V_DCT, V_FLIPADST, WHT_WHT,
3435
};
36+
use crate::scan::DAV1D_LAST_NONZERO_COL_FROM_EOB;
3537
use crate::strided::Strided as _;
38+
use crate::tables::DAV1D_TXFM_DIMENSIONS;
3639
use crate::wrap_fn_ptr::wrap_fn_ptr;
3740

3841
pub type Itx1dFn = fn(c: &mut [i32], stride: NonZeroUsize, min: i32, max: i32);
@@ -42,16 +45,18 @@ fn inv_txfm_add<BD: BitDepth>(
4245
dst: Rav1dPictureDataComponentOffset,
4346
coeff: &mut [BD::Coef],
4447
eob: i32,
45-
w: usize,
46-
h: usize,
48+
tx: TxfmSize,
4749
shift: u8,
48-
first_1d_fn: Itx1dFn,
49-
second_1d_fn: Itx1dFn,
50-
has_dc_only: bool,
50+
txtp: TxfmType,
5151
bd: BD,
5252
) {
5353
let bitdepth_max = bd.bitdepth_max().as_::<i32>();
5454

55+
let t_dim = &DAV1D_TXFM_DIMENSIONS[tx as usize];
56+
let w = 4 * t_dim.w as usize;
57+
let h = 4 * t_dim.h as usize;
58+
let has_dc_only = txtp == DCT_DCT;
59+
5560
assert!(w >= 4 && w <= 64);
5661
assert!(h >= 4 && h <= 64);
5762
assert!(eob >= 0);
@@ -78,6 +83,69 @@ fn inv_txfm_add<BD: BitDepth>(
7883
return;
7984
}
8085

86+
#[derive(PartialEq, Clone, Copy)]
87+
enum Type {
88+
Identity,
89+
Dct,
90+
Adst,
91+
FlipAdst,
92+
}
93+
use Type::*;
94+
// For some reason, this is flipped.
95+
let (second, first) = match txtp {
96+
IDTX => (Identity, Identity),
97+
DCT_DCT => (Dct, Dct),
98+
ADST_DCT => (Adst, Dct),
99+
FLIPADST_DCT => (FlipAdst, Dct),
100+
H_DCT => (Identity, Dct),
101+
DCT_ADST => (Dct, Adst),
102+
ADST_ADST => (Adst, Adst),
103+
FLIPADST_ADST => (FlipAdst, Adst),
104+
DCT_FLIPADST => (Dct, FlipAdst),
105+
ADST_FLIPADST => (Adst, FlipAdst),
106+
FLIPADST_FLIPADST => (FlipAdst, FlipAdst),
107+
V_DCT => (Dct, Identity),
108+
H_ADST => (Identity, Adst),
109+
H_FLIPADST => (Identity, FlipAdst),
110+
V_ADST => (Adst, Identity),
111+
V_FLIPADST => (FlipAdst, Identity),
112+
113+
WHT_WHT if (w, h) == (4, 4) => {
114+
cfg_if! {
115+
if #[cfg(not(all(feature = "asm", target_feature = "neon")))] {
116+
return inv_txfm_add_wht_wht_4x4_rust(dst, coeff, bd)
117+
} else {
118+
unreachable!()
119+
}
120+
}
121+
}
122+
_ => unreachable!(),
123+
};
124+
125+
fn resolve_1d_fn(r#type: Type, n: usize) -> Itx1dFn {
126+
match (r#type, n) {
127+
(Identity, 4) => rav1d_inv_identity4_1d_c,
128+
(Identity, 8) => rav1d_inv_identity8_1d_c,
129+
(Identity, 16) => rav1d_inv_identity16_1d_c,
130+
(Identity, 32) => rav1d_inv_identity32_1d_c,
131+
(Dct, 4) => rav1d_inv_dct4_1d_c,
132+
(Dct, 8) => rav1d_inv_dct8_1d_c,
133+
(Dct, 16) => rav1d_inv_dct16_1d_c,
134+
(Dct, 32) => rav1d_inv_dct32_1d_c,
135+
(Dct, 64) => rav1d_inv_dct64_1d_c,
136+
(Adst, 4) => rav1d_inv_adst4_1d_c,
137+
(Adst, 8) => rav1d_inv_adst8_1d_c,
138+
(Adst, 16) => rav1d_inv_adst16_1d_c,
139+
(FlipAdst, 4) => rav1d_inv_flipadst4_1d_c,
140+
(FlipAdst, 8) => rav1d_inv_flipadst8_1d_c,
141+
(FlipAdst, 16) => rav1d_inv_flipadst16_1d_c,
142+
_ => unreachable!(),
143+
}
144+
}
145+
146+
let first_1d_fn = resolve_1d_fn(first, w);
147+
let second_1d_fn = resolve_1d_fn(second, h);
148+
81149
let sh = cmp::min(h, 32);
82150
let sw = cmp::min(w, 32);
83151

@@ -96,8 +164,18 @@ fn inv_txfm_add<BD: BitDepth>(
96164
let col_clip_max = !col_clip_min;
97165

98166
let mut tmp = [0; 64 * 64];
99-
let mut c = &mut tmp[..];
100-
for y in 0..sh {
167+
let mut c = &mut tmp[..sh * w];
168+
let eob = eob as usize;
169+
// in first 1d itx
170+
let last_nonzero_col = if second == Identity && first != Identity {
171+
std::cmp::min(sh - 1, eob)
172+
} else if first == Identity && second != Identity {
173+
eob >> (t_dim.lw + 2)
174+
} else {
175+
DAV1D_LAST_NONZERO_COL_FROM_EOB[tx as usize][eob as usize] as usize
176+
};
177+
assert!(last_nonzero_col < sh);
178+
for y in 0..=last_nonzero_col {
101179
if is_rect2 {
102180
for x in 0..sw {
103181
c[x] = coeff[y + x * sh].as_::<i32>() * 181 + 128 >> 8;
@@ -110,6 +188,8 @@ fn inv_txfm_add<BD: BitDepth>(
110188
first_1d_fn(c, 1.try_into().unwrap(), row_clip_min, row_clip_max);
111189
c = &mut c[w..];
112190
}
191+
// fill remaining values in slice `c` with 0
192+
c.fill(0);
113193

114194
coeff.fill(0.into());
115195
for i in 0..w * sh {
@@ -162,82 +242,9 @@ fn inv_txfm_add_rust<const W: usize, const H: usize, const TYPE: TxfmType, BD: B
162242
(64, 64) => 2,
163243
_ => unreachable!(),
164244
};
165-
let has_dc_only = TYPE == DCT_DCT;
166-
167-
enum Type {
168-
Identity,
169-
Dct,
170-
Adst,
171-
FlipAdst,
172-
}
173-
use Type::*;
174-
// For some reason, this is flipped.
175-
let (second, first) = match TYPE {
176-
IDTX => (Identity, Identity),
177-
DCT_DCT => (Dct, Dct),
178-
ADST_DCT => (Adst, Dct),
179-
FLIPADST_DCT => (FlipAdst, Dct),
180-
H_DCT => (Identity, Dct),
181-
DCT_ADST => (Dct, Adst),
182-
ADST_ADST => (Adst, Adst),
183-
FLIPADST_ADST => (FlipAdst, Adst),
184-
DCT_FLIPADST => (Dct, FlipAdst),
185-
ADST_FLIPADST => (Adst, FlipAdst),
186-
FLIPADST_FLIPADST => (FlipAdst, FlipAdst),
187-
V_DCT => (Dct, Identity),
188-
H_ADST => (Identity, Adst),
189-
H_FLIPADST => (Identity, FlipAdst),
190-
V_ADST => (Adst, Identity),
191-
V_FLIPADST => (FlipAdst, Identity),
192-
193-
WHT_WHT if (W, H) == (4, 4) => {
194-
cfg_if! {
195-
if #[cfg(not(all(feature = "asm", target_feature = "neon")))] {
196-
return inv_txfm_add_wht_wht_4x4_rust(dst, coeff, bd)
197-
} else {
198-
unreachable!()
199-
}
200-
}
201-
}
202-
_ => unreachable!(),
203-
};
204-
205-
fn resolve_1d_fn(r#type: Type, n: usize) -> Itx1dFn {
206-
match (r#type, n) {
207-
(Identity, 4) => rav1d_inv_identity4_1d_c,
208-
(Identity, 8) => rav1d_inv_identity8_1d_c,
209-
(Identity, 16) => rav1d_inv_identity16_1d_c,
210-
(Identity, 32) => rav1d_inv_identity32_1d_c,
211-
(Dct, 4) => rav1d_inv_dct4_1d_c,
212-
(Dct, 8) => rav1d_inv_dct8_1d_c,
213-
(Dct, 16) => rav1d_inv_dct16_1d_c,
214-
(Dct, 32) => rav1d_inv_dct32_1d_c,
215-
(Dct, 64) => rav1d_inv_dct64_1d_c,
216-
(Adst, 4) => rav1d_inv_adst4_1d_c,
217-
(Adst, 8) => rav1d_inv_adst8_1d_c,
218-
(Adst, 16) => rav1d_inv_adst16_1d_c,
219-
(FlipAdst, 4) => rav1d_inv_flipadst4_1d_c,
220-
(FlipAdst, 8) => rav1d_inv_flipadst8_1d_c,
221-
(FlipAdst, 16) => rav1d_inv_flipadst16_1d_c,
222-
_ => unreachable!(),
223-
}
224-
}
225245

226-
let first_1d_fn = resolve_1d_fn(first, W);
227-
let second_1d_fn = resolve_1d_fn(second, H);
228-
229-
inv_txfm_add(
230-
dst,
231-
coeff,
232-
eob,
233-
W,
234-
H,
235-
shift,
236-
first_1d_fn,
237-
second_1d_fn,
238-
has_dc_only,
239-
bd,
240-
)
246+
let tx = TxfmSize::from_wh(W, H);
247+
inv_txfm_add(dst, coeff, eob, tx, shift, TYPE, bd)
241248
}
242249

243250
/// # Safety

src/scan.rs

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use strum::EnumCount;
22

33
use crate::align::Align32;
4+
use crate::const_fn::const_for;
45
use crate::in_range::InRange;
56
use crate::levels::TxfmSize;
67

@@ -228,3 +229,54 @@ pub static DAV1D_SCANS: [&'static [Scan]; TxfmSize::COUNT] = [
228229
&SCAN_16X32.0,
229230
&SCAN_32X16.0,
230231
];
232+
233+
const fn init_tbl<const S: usize>(scan: &'static [Scan; S], h: u16) -> [u8; S] {
234+
let mut last_nonzero_col_from_eob: [u8; S] = [0; S];
235+
236+
let mut max_col: u8 = 0;
237+
const_for!(n in 0..S => {
238+
let rc = scan[n].const_get();
239+
let rcx = (rc & (h - 1) as u16) as u8;
240+
max_col = if rcx > max_col { rcx } else {max_col };
241+
last_nonzero_col_from_eob[n] = max_col;
242+
});
243+
244+
last_nonzero_col_from_eob
245+
}
246+
247+
static LAST_NONZERO_COL_FROM_EOB_4X4: [u8; 16] = init_tbl(&SCAN_4X4.0, 4);
248+
static LAST_NONZERO_COL_FROM_EOB_8X8: [u8; 64] = init_tbl(&SCAN_8X8.0, 8);
249+
static LAST_NONZERO_COL_FROM_EOB_16X16: [u8; 256] = init_tbl(&SCAN_16X16.0, 16);
250+
static LAST_NONZERO_COL_FROM_EOB_32X32: [u8; 1024] = init_tbl(&SCAN_32X32.0, 32);
251+
static LAST_NONZERO_COL_FROM_EOB_4X8: [u8; 32] = init_tbl(&SCAN_4X8.0, 8);
252+
static LAST_NONZERO_COL_FROM_EOB_8X4: [u8; 32] = init_tbl(&SCAN_8X4.0, 4);
253+
static LAST_NONZERO_COL_FROM_EOB_8X16: [u8; 128] = init_tbl(&SCAN_8X16.0, 16);
254+
static LAST_NONZERO_COL_FROM_EOB_16X8: [u8; 128] = init_tbl(&SCAN_16X8.0, 8);
255+
static LAST_NONZERO_COL_FROM_EOB_16X32: [u8; 512] = init_tbl(&SCAN_16X32.0, 32);
256+
static LAST_NONZERO_COL_FROM_EOB_32X16: [u8; 512] = init_tbl(&SCAN_32X16.0, 16);
257+
static LAST_NONZERO_COL_FROM_EOB_4X16: [u8; 64] = init_tbl(&SCAN_4X16.0, 16);
258+
static LAST_NONZERO_COL_FROM_EOB_16X4: [u8; 64] = init_tbl(&SCAN_16X4.0, 4);
259+
static LAST_NONZERO_COL_FROM_EOB_8X32: [u8; 256] = init_tbl(&SCAN_8X32.0, 32);
260+
static LAST_NONZERO_COL_FROM_EOB_32X8: [u8; 256] = init_tbl(&SCAN_32X8.0, 8);
261+
262+
pub static DAV1D_LAST_NONZERO_COL_FROM_EOB: [&'static [u8]; TxfmSize::COUNT] = [
263+
&LAST_NONZERO_COL_FROM_EOB_4X4,
264+
&LAST_NONZERO_COL_FROM_EOB_8X8,
265+
&LAST_NONZERO_COL_FROM_EOB_16X16,
266+
&LAST_NONZERO_COL_FROM_EOB_32X32,
267+
&LAST_NONZERO_COL_FROM_EOB_32X32,
268+
&LAST_NONZERO_COL_FROM_EOB_4X8,
269+
&LAST_NONZERO_COL_FROM_EOB_8X4,
270+
&LAST_NONZERO_COL_FROM_EOB_8X16,
271+
&LAST_NONZERO_COL_FROM_EOB_16X8,
272+
&LAST_NONZERO_COL_FROM_EOB_16X32,
273+
&LAST_NONZERO_COL_FROM_EOB_32X16,
274+
&LAST_NONZERO_COL_FROM_EOB_32X32,
275+
&LAST_NONZERO_COL_FROM_EOB_32X32,
276+
&LAST_NONZERO_COL_FROM_EOB_4X16,
277+
&LAST_NONZERO_COL_FROM_EOB_16X4,
278+
&LAST_NONZERO_COL_FROM_EOB_8X32,
279+
&LAST_NONZERO_COL_FROM_EOB_32X8,
280+
&LAST_NONZERO_COL_FROM_EOB_16X32,
281+
&LAST_NONZERO_COL_FROM_EOB_32X16,
282+
];

0 commit comments

Comments
 (0)