Skip to content

Commit acc73e8

Browse files
rbultjekkysen
authored andcommitted
itx: restrict number of columns iterated over based on EOB
1 parent 6c66002 commit acc73e8

File tree

8 files changed

+248
-99
lines changed

8 files changed

+248
-99
lines changed

src/arm/itx.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,9 @@ decl_itx_fn(BF(dav1d_inv_txfm_add_dct_dct_64x16, neon));
4949
decl_itx_fn(BF(dav1d_inv_txfm_add_dct_dct_64x32, neon));
5050
decl_itx_fn(BF(dav1d_inv_txfm_add_dct_dct_64x64, neon));
5151

52-
static ALWAYS_INLINE void itx_dsp_init_arm(Dav1dInvTxfmDSPContext *const c, int bpc) {
52+
static ALWAYS_INLINE void itx_dsp_init_arm(Dav1dInvTxfmDSPContext *const c, int bpc,
53+
int *const all_simd)
54+
{
5355
const unsigned flags = dav1d_get_cpu_flags();
5456

5557
if (!(flags & DAV1D_ARM_CPU_FLAG_NEON)) return;
@@ -77,4 +79,5 @@ static ALWAYS_INLINE void itx_dsp_init_arm(Dav1dInvTxfmDSPContext *const c, int
7779
assign_itx1_fn (R, 64, 16, neon);
7880
assign_itx1_fn (R, 64, 32, neon);
7981
assign_itx1_fn ( , 64, 64, neon);
82+
*all_simd = 1;
8083
}

src/itx_1d.c

Lines changed: 65 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,8 @@ inv_dct4_1d_internal_c(int32_t *const c, const ptrdiff_t stride,
8989
c[3 * stride] = CLIP(t0 - t3);
9090
}
9191

92-
void dav1d_inv_dct4_1d_c(int32_t *const c, const ptrdiff_t stride,
93-
const int min, const int max)
92+
static void inv_dct4_1d_c(int32_t *const c, const ptrdiff_t stride,
93+
const int min, const int max)
9494
{
9595
inv_dct4_1d_internal_c(c, stride, min, max, 0);
9696
}
@@ -142,8 +142,8 @@ inv_dct8_1d_internal_c(int32_t *const c, const ptrdiff_t stride,
142142
c[7 * stride] = CLIP(t0 - t7);
143143
}
144144

145-
void dav1d_inv_dct8_1d_c(int32_t *const c, const ptrdiff_t stride,
146-
const int min, const int max)
145+
static void inv_dct8_1d_c(int32_t *const c, const ptrdiff_t stride,
146+
const int min, const int max)
147147
{
148148
inv_dct8_1d_internal_c(c, stride, min, max, 0);
149149
}
@@ -237,8 +237,8 @@ inv_dct16_1d_internal_c(int32_t *const c, const ptrdiff_t stride,
237237
c[15 * stride] = CLIP(t0 - t15a);
238238
}
239239

240-
void dav1d_inv_dct16_1d_c(int32_t *const c, const ptrdiff_t stride,
241-
const int min, const int max)
240+
static void inv_dct16_1d_c(int32_t *const c, const ptrdiff_t stride,
241+
const int min, const int max)
242242
{
243243
inv_dct16_1d_internal_c(c, stride, min, max, 0);
244244
}
@@ -427,14 +427,14 @@ inv_dct32_1d_internal_c(int32_t *const c, const ptrdiff_t stride,
427427
c[31 * stride] = CLIP(t0 - t31);
428428
}
429429

430-
void dav1d_inv_dct32_1d_c(int32_t *const c, const ptrdiff_t stride,
431-
const int min, const int max)
430+
static void inv_dct32_1d_c(int32_t *const c, const ptrdiff_t stride,
431+
const int min, const int max)
432432
{
433433
inv_dct32_1d_internal_c(c, stride, min, max, 0);
434434
}
435435

436-
void dav1d_inv_dct64_1d_c(int32_t *const c, const ptrdiff_t stride,
437-
const int min, const int max)
436+
static void inv_dct64_1d_c(int32_t *const c, const ptrdiff_t stride,
437+
const int min, const int max)
438438
{
439439
assert(stride > 0);
440440
inv_dct32_1d_internal_c(c, stride << 1, min, max, 1);
@@ -962,13 +962,13 @@ inv_adst16_1d_internal_c(const int32_t *const in, const ptrdiff_t in_s,
962962
}
963963

964964
#define inv_adst_1d(sz) \
965-
void dav1d_inv_adst##sz##_1d_c(int32_t *const c, const ptrdiff_t stride, \
966-
const int min, const int max) \
965+
static void inv_adst##sz##_1d_c(int32_t *const c, const ptrdiff_t stride, \
966+
const int min, const int max) \
967967
{ \
968968
inv_adst##sz##_1d_internal_c(c, stride, min, max, c, stride); \
969969
} \
970-
void dav1d_inv_flipadst##sz##_1d_c(int32_t *const c, const ptrdiff_t stride, \
971-
const int min, const int max) \
970+
static void inv_flipadst##sz##_1d_c(int32_t *const c, const ptrdiff_t stride, \
971+
const int min, const int max) \
972972
{ \
973973
inv_adst##sz##_1d_internal_c(c, stride, min, max, \
974974
&c[(sz - 1) * stride], -stride); \
@@ -980,8 +980,8 @@ inv_adst_1d(16)
980980

981981
#undef inv_adst_1d
982982

983-
void dav1d_inv_identity4_1d_c(int32_t *const c, const ptrdiff_t stride,
984-
const int min, const int max)
983+
static void inv_identity4_1d_c(int32_t *const c, const ptrdiff_t stride,
984+
const int min, const int max)
985985
{
986986
assert(stride > 0);
987987
for (int i = 0; i < 4; i++) {
@@ -990,16 +990,16 @@ void dav1d_inv_identity4_1d_c(int32_t *const c, const ptrdiff_t stride,
990990
}
991991
}
992992

993-
void dav1d_inv_identity8_1d_c(int32_t *const c, const ptrdiff_t stride,
994-
const int min, const int max)
993+
static void inv_identity8_1d_c(int32_t *const c, const ptrdiff_t stride,
994+
const int min, const int max)
995995
{
996996
assert(stride > 0);
997997
for (int i = 0; i < 8; i++)
998998
c[stride * i] *= 2;
999999
}
10001000

1001-
void dav1d_inv_identity16_1d_c(int32_t *const c, const ptrdiff_t stride,
1002-
const int min, const int max)
1001+
static void inv_identity16_1d_c(int32_t *const c, const ptrdiff_t stride,
1002+
const int min, const int max)
10031003
{
10041004
assert(stride > 0);
10051005
for (int i = 0; i < 16; i++) {
@@ -1008,14 +1008,57 @@ void dav1d_inv_identity16_1d_c(int32_t *const c, const ptrdiff_t stride,
10081008
}
10091009
}
10101010

1011-
void dav1d_inv_identity32_1d_c(int32_t *const c, const ptrdiff_t stride,
1012-
const int min, const int max)
1011+
static void inv_identity32_1d_c(int32_t *const c, const ptrdiff_t stride,
1012+
const int min, const int max)
10131013
{
10141014
assert(stride > 0);
10151015
for (int i = 0; i < 32; i++)
10161016
c[stride * i] *= 4;
10171017
}
10181018

1019+
const itx_1d_fn dav1d_tx1d_fns[N_TX_SIZES][N_TX_1D_TYPES] = {
1020+
[TX_4X4] = {
1021+
[DCT] = inv_dct4_1d_c,
1022+
[ADST] = inv_adst4_1d_c,
1023+
[FLIPADST] = inv_flipadst4_1d_c,
1024+
[IDENTITY] = inv_identity4_1d_c,
1025+
}, [TX_8X8] = {
1026+
[DCT] = inv_dct8_1d_c,
1027+
[ADST] = inv_adst8_1d_c,
1028+
[FLIPADST] = inv_flipadst8_1d_c,
1029+
[IDENTITY] = inv_identity8_1d_c,
1030+
}, [TX_16X16] = {
1031+
[DCT] = inv_dct16_1d_c,
1032+
[ADST] = inv_adst16_1d_c,
1033+
[FLIPADST] = inv_flipadst16_1d_c,
1034+
[IDENTITY] = inv_identity16_1d_c,
1035+
}, [TX_32X32] = {
1036+
[DCT] = inv_dct32_1d_c,
1037+
[IDENTITY] = inv_identity32_1d_c,
1038+
}, [TX_64X64] = {
1039+
[DCT] = inv_dct64_1d_c,
1040+
},
1041+
};
1042+
1043+
const uint8_t /* enum Tx1dType */ dav1d_tx1d_types[N_TX_TYPES][2] = {
1044+
[DCT_DCT] = { DCT, DCT },
1045+
[ADST_DCT] = { ADST, DCT },
1046+
[DCT_ADST] = { DCT, ADST },
1047+
[ADST_ADST] = { ADST, ADST },
1048+
[FLIPADST_DCT] = { FLIPADST, DCT },
1049+
[DCT_FLIPADST] = { DCT, FLIPADST },
1050+
[FLIPADST_FLIPADST] = { FLIPADST, FLIPADST },
1051+
[ADST_FLIPADST] = { ADST, FLIPADST },
1052+
[FLIPADST_ADST] = { FLIPADST, ADST },
1053+
[IDTX] = { IDENTITY, IDENTITY },
1054+
[V_DCT] = { DCT, IDENTITY },
1055+
[H_DCT] = { IDENTITY, DCT },
1056+
[V_ADST] = { ADST, IDENTITY },
1057+
[H_ADST] = { IDENTITY, ADST },
1058+
[V_FLIPADST] = { FLIPADST, IDENTITY },
1059+
[H_FLIPADST] = { IDENTITY, FLIPADST },
1060+
};
1061+
10191062
#if !(HAVE_ASM && TRIM_DSP_FUNCTIONS && ( \
10201063
ARCH_AARCH64 || \
10211064
(ARCH_ARM && (defined(__ARM_NEON) || defined(__APPLE__) || defined(_WIN32))) \

src/itx_1d.h

Lines changed: 12 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -28,31 +28,25 @@
2828
#include <stddef.h>
2929
#include <stdint.h>
3030

31+
#include "src/levels.h"
32+
3133
#ifndef DAV1D_SRC_ITX_1D_H
3234
#define DAV1D_SRC_ITX_1D_H
3335

36+
enum Tx1dType {
37+
DCT,
38+
ADST,
39+
IDENTITY,
40+
FLIPADST,
41+
N_TX_1D_TYPES,
42+
};
43+
3444
#define decl_itx_1d_fn(name) \
3545
void (name)(int32_t *c, ptrdiff_t stride, int min, int max)
3646
typedef decl_itx_1d_fn(*itx_1d_fn);
3747

38-
decl_itx_1d_fn(dav1d_inv_dct4_1d_c);
39-
decl_itx_1d_fn(dav1d_inv_dct8_1d_c);
40-
decl_itx_1d_fn(dav1d_inv_dct16_1d_c);
41-
decl_itx_1d_fn(dav1d_inv_dct32_1d_c);
42-
decl_itx_1d_fn(dav1d_inv_dct64_1d_c);
43-
44-
decl_itx_1d_fn(dav1d_inv_adst4_1d_c);
45-
decl_itx_1d_fn(dav1d_inv_adst8_1d_c);
46-
decl_itx_1d_fn(dav1d_inv_adst16_1d_c);
47-
48-
decl_itx_1d_fn(dav1d_inv_flipadst4_1d_c);
49-
decl_itx_1d_fn(dav1d_inv_flipadst8_1d_c);
50-
decl_itx_1d_fn(dav1d_inv_flipadst16_1d_c);
51-
52-
decl_itx_1d_fn(dav1d_inv_identity4_1d_c);
53-
decl_itx_1d_fn(dav1d_inv_identity8_1d_c);
54-
decl_itx_1d_fn(dav1d_inv_identity16_1d_c);
55-
decl_itx_1d_fn(dav1d_inv_identity32_1d_c);
48+
EXTERN const itx_1d_fn dav1d_tx1d_fns[N_TX_SIZES][N_TX_1D_TYPES];
49+
EXTERN const uint8_t /* enum Tx1dType */ dav1d_tx1d_types[N_TX_TYPES][2];
5650

5751
void dav1d_inv_wht4_1d_c(int32_t *c, ptrdiff_t stride);
5852

0 commit comments

Comments
 (0)