From 585a8144cc101ce0e4a9a1e353082f2167f74773 Mon Sep 17 00:00:00 2001 From: v0lt Date: Sat, 29 Jun 2024 09:17:24 +0300 Subject: [PATCH] =?UTF-8?q?=D0=9E=D0=B1=D0=BD=D0=BE=D0=B2=D0=BB=D0=B5?= =?UTF-8?q?=D0=BD=D0=B8=D0=B5=20ffmpeg=20n7.1-dev-1585-g0b330d8642.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/Changelog.Rus.txt | 4 +- docs/Changelog.txt | 4 +- src/ExtLib/ffmpeg/libavcodec/aac/aacdec_lpd.c | 7 +- .../ffmpeg/libavcodec/aac/aacdec_usac.c | 122 ++- src/ExtLib/ffmpeg/libavcodec/aacsbr.h | 11 + .../ffmpeg/libavcodec/aacsbr_template.c | 232 ++++- src/ExtLib/ffmpeg/libavcodec/cbs_h264.h | 20 + src/ExtLib/ffmpeg/libavcodec/cbs_h2645.c | 34 +- .../libavcodec/cbs_h264_syntax_template.c | 34 + src/ExtLib/ffmpeg/libavcodec/cbs_h266.h | 18 +- .../libavcodec/cbs_h266_syntax_template.c | 36 +- src/ExtLib/ffmpeg/libavcodec/cbs_sei.h | 30 + .../libavcodec/cbs_sei_syntax_template.c | 62 ++ src/ExtLib/ffmpeg/libavcodec/dovi_rpudec.c | 2 +- src/ExtLib/ffmpeg/libavcodec/mjpegdec.c | 3 +- src/ExtLib/ffmpeg/libavcodec/mpeg12dec.c | 12 +- src/ExtLib/ffmpeg/libavcodec/sbr.h | 32 +- src/ExtLib/ffmpeg/libavcodec/version.h | 2 +- src/ExtLib/ffmpeg/libavcodec/vvc.h | 3 + src/ExtLib/ffmpeg/libavcodec/vvc/ctu.c | 19 +- src/ExtLib/ffmpeg/libavcodec/vvc/ctu.h | 9 +- src/ExtLib/ffmpeg/libavcodec/vvc/dec.c | 16 +- src/ExtLib/ffmpeg/libavcodec/vvc/dec.h | 9 +- src/ExtLib/ffmpeg/libavcodec/vvc/filter.c | 931 +++++++++--------- src/ExtLib/ffmpeg/libavcodec/vvc/inter.c | 7 - src/ExtLib/ffmpeg/libavcodec/vvc/mvs.c | 21 +- src/ExtLib/ffmpeg/libavcodec/vvc/mvs.h | 4 +- src/ExtLib/ffmpeg/libavcodec/vvc/ps.c | 45 + src/ExtLib/ffmpeg/libavcodec/vvc/ps.h | 6 + src/ExtLib/ffmpeg/libavcodec/vvc/refs.c | 2 +- src/ExtLib/ffmpeg/libavcodec/vvc/thread.c | 9 +- .../ffmpeg/libavcodec/x86/vvc/vvc_alf.asm | 85 +- src/ExtLib/ffmpeg/libavutil/executor.c | 28 +- src/ExtLib/ffmpeg/libavutil/executor.h | 2 +- src/ExtLib/ffmpeg/libavutil/stereo3d.c | 11 + src/ExtLib/ffmpeg/libavutil/stereo3d.h | 22 +- src/ExtLib/ffmpeg/libavutil/version.h | 2 +- src/ExtLib/ffmpeg/libswscale/output.c | 18 +- src/ExtLib/ffmpeg/libswscale/yuv2rgb.c | 813 +++++++-------- 39 files changed, 1600 insertions(+), 1127 deletions(-) diff --git a/docs/Changelog.Rus.txt b/docs/Changelog.Rus.txt index fe5e7159f6..0feccd8a0b 100644 --- a/docs/Changelog.Rus.txt +++ b/docs/Changelog.Rus.txt @@ -11,8 +11,8 @@ Обновлен японский перевод (автор tsubasanouta). Обновлены библиотеки: - ffmpeg git-n7.1-dev-1509-g0c0e7ec81e; - vvdec git-v2.3.0-24-gad8f4bb. + ffmpeg n7.1-dev-1585-g0b330d8642; + vvdec v2.3.0-24-gad8f4bb. 1.7.2 - 2024-06-07 diff --git a/docs/Changelog.txt b/docs/Changelog.txt index 03a36d8fef..6986ee93fb 100644 --- a/docs/Changelog.txt +++ b/docs/Changelog.txt @@ -11,8 +11,8 @@ Updated Korean translation (by Hackjjang). Updated Japanese translation (by tsubasanouta). Updated libraries: - ffmpeg git-n7.1-dev-1509-g0c0e7ec81e; - vvdec git-v2.3.0-24-gad8f4bb. + ffmpeg n7.1-dev-1585-g0b330d8642; + vvdec v2.3.0-24-gad8f4bb. 1.7.2 - 2024-06-07 diff --git a/src/ExtLib/ffmpeg/libavcodec/aac/aacdec_lpd.c b/src/ExtLib/ffmpeg/libavcodec/aac/aacdec_lpd.c index 796edd2ab5..91a3268889 100644 --- a/src/ExtLib/ffmpeg/libavcodec/aac/aacdec_lpd.c +++ b/src/ExtLib/ffmpeg/libavcodec/aac/aacdec_lpd.c @@ -99,7 +99,7 @@ static void parse_qn(GetBitContext *gb, int *qn, int nk_mode, int no_qn) static int parse_codebook_idx(GetBitContext *gb, uint32_t *kv, int nk_mode, int no_qn) { - int idx, n, nk; + int n, nk; int qn[2]; parse_qn(gb, qn, nk_mode, no_qn); @@ -114,7 +114,7 @@ static int parse_codebook_idx(GetBitContext *gb, uint32_t *kv, } } - idx = get_bits(gb, 4*n); + skip_bits(gb, 4*n); if (nk > 0) for (int i = 0; i < 8; i++) @@ -145,7 +145,6 @@ int ff_aac_ldp_parse_channel_stream(AACDecContext *ac, AACUSACConfig *usac, int k; const uint8_t *mod; int first_ldp_flag; - int first_tcx_flag; ce->ldp.acelp_core_mode = get_bits(gb, 3); ce->ldp.lpd_mode = get_bits(gb, 5); @@ -157,7 +156,6 @@ int ff_aac_ldp_parse_channel_stream(AACDecContext *ac, AACUSACConfig *usac, mod = ff_aac_lpd_mode_tab[ce->ldp.lpd_mode]; first_ldp_flag = !ce->ldp.core_mode_last; - first_tcx_flag = 1; if (first_ldp_flag) ce->ldp.last_lpd_mode = -1; /* last_ldp_mode is a **STATEFUL** value */ @@ -179,7 +177,6 @@ int ff_aac_ldp_parse_channel_stream(AACDecContext *ac, AACUSACConfig *usac, // parse_tcx_coding(); ce->ldp.last_lpd_mode = mod[k]; k += (1 << (mod[k] - 1)); - first_tcx_flag = 0; } } diff --git a/src/ExtLib/ffmpeg/libavcodec/aac/aacdec_usac.c b/src/ExtLib/ffmpeg/libavcodec/aac/aacdec_usac.c index e5504117d0..4856c1786b 100644 --- a/src/ExtLib/ffmpeg/libavcodec/aac/aacdec_usac.c +++ b/src/ExtLib/ffmpeg/libavcodec/aac/aacdec_usac.c @@ -23,6 +23,8 @@ #include "aacdec_lpd.h" #include "aacdec_ac.h" +#include "libavcodec/aacsbr.h" + #include "libavcodec/aactab.h" #include "libavutil/mem.h" #include "libavcodec/mpeg4audio.h" @@ -145,7 +147,8 @@ static int decode_loudness_set(AACDecContext *ac, AACUSACConfig *usac, return 0; } -static void decode_usac_sbr_data(AACUsacElemConfig *e, GetBitContext *gb) +static int decode_usac_sbr_data(AACDecContext *ac, + AACUsacElemConfig *e, GetBitContext *gb) { uint8_t header_extra1; uint8_t header_extra2; @@ -153,6 +156,10 @@ static void decode_usac_sbr_data(AACUsacElemConfig *e, GetBitContext *gb) e->sbr.harmonic_sbr = get_bits1(gb); /* harmonicSBR */ e->sbr.bs_intertes = get_bits1(gb); /* bs_interTes */ e->sbr.bs_pvc = get_bits1(gb); /* bs_pvc */ + if (e->sbr.harmonic_sbr || e->sbr.bs_intertes || e->sbr.bs_pvc) { + avpriv_report_missing_feature(ac->avctx, "AAC USAC eSBR"); + return AVERROR_PATCHWELCOME; + } e->sbr.dflt.start_freq = get_bits(gb, 4); /* dflt_start_freq */ e->sbr.dflt.stop_freq = get_bits(gb, 4); /* dflt_stop_freq */ @@ -179,6 +186,8 @@ static void decode_usac_sbr_data(AACUsacElemConfig *e, GetBitContext *gb) e->sbr.dflt.interpol_freq = get_bits1(gb); /* dflt_interpol_freq */ e->sbr.dflt.smoothing_mode = get_bits1(gb); /* dflt_smoothing_mode */ } + + return 0; } static void decode_usac_element_core(AACUsacElemConfig *e, @@ -190,13 +199,17 @@ static void decode_usac_element_core(AACUsacElemConfig *e, e->sbr.ratio = sbr_ratio; } -static void decode_usac_element_pair(AACUsacElemConfig *e, GetBitContext *gb) +static int decode_usac_element_pair(AACDecContext *ac, + AACUsacElemConfig *e, GetBitContext *gb) { e->stereo_config_index = 0; if (e->sbr.ratio) { - decode_usac_sbr_data(e, gb); + int ret = decode_usac_sbr_data(ac, e, gb); + if (ret < 0) + return ret; e->stereo_config_index = get_bits(gb, 2); } + if (e->stereo_config_index) { e->mps.freq_res = get_bits(gb, 3); /* bsFreqRes */ e->mps.fixed_gain = get_bits(gb, 3); /* bsFixedGainDMX */ @@ -216,6 +229,8 @@ static void decode_usac_element_pair(AACUsacElemConfig *e, GetBitContext *gb) if (e->mps.temp_shape_config == 2) e->mps.env_quant_mode = get_bits1(gb); /* bsEnvQuantMode */ } + + return 0; } static int decode_usac_extension(AACDecContext *ac, AACUsacElemConfig *e, @@ -294,6 +309,9 @@ int ff_aac_usac_reset_state(AACDecContext *ac, OutputConfiguration *oc) AACUsacStereo *us = &che->us; memset(us, 0, sizeof(*us)); + if (e->sbr.ratio) + ff_aac_sbr_config_usac(ac, che, e); + for (int j = 0; j < ch; j++) { SingleChannelElement *sce = &che->ch[ch]; AACUsacElemData *ue = &sce->ue; @@ -320,6 +338,7 @@ int ff_aac_usac_config_decode(AACDecContext *ac, AVCodecContext *avctx, uint8_t freq_idx; uint8_t channel_config_idx; int nb_channels = 0; + int ratio_mult, ratio_dec; int samplerate; int sbr_ratio; MPEG4AudioConfig *m4ac = &oc->m4ac; @@ -329,6 +348,9 @@ int ff_aac_usac_config_decode(AACDecContext *ac, AVCodecContext *avctx, int map_pos_set = 0; uint8_t layout_map[MAX_ELEM_ID*4][3] = { 0 }; + if (!ac) + return AVERROR_PATCHWELCOME; + memset(usac, 0, sizeof(*usac)); freq_idx = get_bits(gb, 5); /* usacSamplingFrequencyIndex */ @@ -340,8 +362,6 @@ int ff_aac_usac_config_decode(AACDecContext *ac, AVCodecContext *avctx, return AVERROR(EINVAL); } - m4ac->sample_rate = avctx->sample_rate = samplerate; - usac->core_sbr_frame_len_idx = get_bits(gb, 3); /* coreSbrFrameLengthIndex */ m4ac->frame_length_short = usac->core_sbr_frame_len_idx == 0 || usac->core_sbr_frame_len_idx == 2; @@ -354,7 +374,26 @@ int ff_aac_usac_config_decode(AACDecContext *ac, AVCodecContext *avctx, usac->core_sbr_frame_len_idx == 4 ? 1 : 0; + if (sbr_ratio == 2) { + ratio_mult = 8; + ratio_dec = 3; + } else if (sbr_ratio == 3) { + ratio_mult = 2; + ratio_dec = 1; + } else if (sbr_ratio == 4) { + ratio_mult = 4; + ratio_dec = 1; + } else { + ratio_mult = 1; + ratio_dec = 1; + } + + avctx->sample_rate = samplerate; + m4ac->ext_sample_rate = samplerate; + m4ac->sample_rate = (samplerate * ratio_dec) / ratio_mult; + m4ac->sampling_index = ff_aac_sample_rate_idx(m4ac->sample_rate); + m4ac->sbr = sbr_ratio > 0; channel_config_idx = get_bits(gb, 5); /* channelConfigurationIndex */ if (!channel_config_idx) { @@ -426,8 +465,11 @@ int ff_aac_usac_config_decode(AACDecContext *ac, AVCodecContext *avctx, case ID_USAC_SCE: /* SCE */ /* UsacCoreConfig */ decode_usac_element_core(e, gb, sbr_ratio); - if (e->sbr.ratio > 0) - decode_usac_sbr_data(e, gb); + if (e->sbr.ratio > 0) { + ret = decode_usac_sbr_data(ac, e, gb); + if (ret < 0) + return ret; + } layout_map[map_count][0] = TYPE_SCE; layout_map[map_count][1] = elem_id[0]++; if (!map_pos_set) @@ -437,7 +479,9 @@ int ff_aac_usac_config_decode(AACDecContext *ac, AVCodecContext *avctx, case ID_USAC_CPE: /* UsacChannelPairElementConf */ /* UsacCoreConfig */ decode_usac_element_core(e, gb, sbr_ratio); - decode_usac_element_pair(e, gb); + ret = decode_usac_element_pair(ac, e, gb); + if (ret < 0) + return ret; layout_map[map_count][0] = TYPE_CPE; layout_map[map_count][1] = elem_id[1]++; if (!map_pos_set) @@ -1307,13 +1351,14 @@ static int decode_usac_core_coder(AACDecContext *ac, AACUSACConfig *usac, int ret; int arith_reset_flag; AACUsacStereo *us = &che->us; + int core_nb_channels = nb_channels; /* Local symbols */ uint8_t global_gain; us->common_window = 0; - for (int ch = 0; ch < nb_channels; ch++) { + for (int ch = 0; ch < core_nb_channels; ch++) { SingleChannelElement *sce = &che->ch[ch]; AACUsacElemData *ue = &sce->ue; @@ -1323,13 +1368,16 @@ static int decode_usac_core_coder(AACDecContext *ac, AACUSACConfig *usac, ue->core_mode = get_bits1(gb); } - if (nb_channels == 2) { + if (nb_channels > 1 && ec->stereo_config_index == 1) + core_nb_channels = 1; + + if (core_nb_channels == 2) { ret = decode_usac_stereo_info(ac, usac, ec, che, gb, indep_flag); if (ret) return ret; } - for (int ch = 0; ch < nb_channels; ch++) { + for (int ch = 0; ch < core_nb_channels; ch++) { SingleChannelElement *sce = &che->ch[ch]; IndividualChannelStream *ics = &sce->ics; AACUsacElemData *ue = &sce->ue; @@ -1341,7 +1389,7 @@ static int decode_usac_core_coder(AACDecContext *ac, AACUSACConfig *usac, continue; } - if ((nb_channels == 1) || + if ((core_nb_channels == 1) || (che->ch[0].ue.core_mode != che->ch[1].ue.core_mode)) ue->tns_data_present = get_bits1(gb); @@ -1424,7 +1472,29 @@ static int decode_usac_core_coder(AACDecContext *ac, AACUSACConfig *usac, } } - spectrum_decode(ac, usac, che, nb_channels); + if (ec->sbr.ratio) { + int sbr_ch = nb_channels; + if (nb_channels == 2 && + !(ec->stereo_config_index == 0 || ec->stereo_config_index == 3)) + sbr_ch = 1; + + ret = ff_aac_sbr_decode_usac_data(ac, che, ec, gb, sbr_ch, indep_flag); + if (ret < 0) + return ret; + + if (ec->stereo_config_index) { + avpriv_report_missing_feature(ac->avctx, "AAC USAC Mps212"); + return AVERROR_PATCHWELCOME; + } + } + + spectrum_decode(ac, usac, che, core_nb_channels); + + if (ac->oc[1].m4ac.sbr > 0) { + ac->proc.sbr_apply(ac, che, nb_channels == 2 ? TYPE_CPE : TYPE_SCE, + che->ch[0].output, + che->ch[1].output); + } return 0; } @@ -1591,9 +1661,29 @@ int ff_aac_usac_decode_frame(AVCodecContext *avctx, AACDecContext *ac, int indep_flag, samples = 0; int audio_found = 0; int elem_id[3 /* SCE, CPE, LFE */] = { 0, 0, 0 }; - AVFrame *frame = ac->frame; + int ratio_mult, ratio_dec; + AACUSACConfig *usac = &ac->oc[1].usac; + int sbr_ratio = usac->core_sbr_frame_len_idx == 2 ? 2 : + usac->core_sbr_frame_len_idx == 3 ? 3 : + usac->core_sbr_frame_len_idx == 4 ? 1 : + 0; + + if (sbr_ratio == 2) { + ratio_mult = 8; + ratio_dec = 3; + } else if (sbr_ratio == 3) { + ratio_mult = 2; + ratio_dec = 1; + } else if (sbr_ratio == 4) { + ratio_mult = 4; + ratio_dec = 1; + } else { + ratio_mult = 1; + ratio_dec = 1; + } + ff_aac_output_configure(ac, ac->oc[1].layout_map, ac->oc[1].layout_map_tags, ac->oc[1].status, 0); @@ -1660,8 +1750,10 @@ int ff_aac_usac_decode_frame(AVCodecContext *avctx, AACDecContext *ac, if (audio_found) samples = ac->oc[1].m4ac.frame_length_short ? 768 : 1024; + samples = (samples * ratio_mult) / ratio_dec; + if (ac->oc[1].status && audio_found) { - avctx->sample_rate = ac->oc[1].m4ac.sample_rate; + avctx->sample_rate = ac->oc[1].m4ac.ext_sample_rate; avctx->frame_size = samples; ac->oc[1].status = OC_LOCKED; } diff --git a/src/ExtLib/ffmpeg/libavcodec/aacsbr.h b/src/ExtLib/ffmpeg/libavcodec/aacsbr.h index b83e18a7f9..cae9aeb78a 100644 --- a/src/ExtLib/ffmpeg/libavcodec/aacsbr.h +++ b/src/ExtLib/ffmpeg/libavcodec/aacsbr.h @@ -88,6 +88,17 @@ int ff_aac_sbr_decode_extension(AACDecContext *ac, ChannelElement *che, int ff_aac_sbr_decode_extension_fixed(AACDecContext *ac, ChannelElement *che, GetBitContext *gb, int crc, int cnt, int id_aac); +/** Due to channel allocation not being known upon SBR parameter transmission, + * supply the parameters separately. + * Functionally identical to ff_aac_sbr_decode_extension() */ +int ff_aac_sbr_config_usac(AACDecContext *ac, ChannelElement *che, + AACUsacElemConfig *ue); + +/** Decode frame SBR data, USAC. */ +int ff_aac_sbr_decode_usac_data(AACDecContext *ac, ChannelElement *che, + AACUsacElemConfig *ue, GetBitContext *gb, + int sbr_ch, int indep_flag); + /** Apply one SBR element to one AAC element. */ void ff_aac_sbr_apply(AACDecContext *ac, ChannelElement *che, int id_aac, void /* float */ *L, void /* float */ *R); diff --git a/src/ExtLib/ffmpeg/libavcodec/aacsbr_template.c b/src/ExtLib/ffmpeg/libavcodec/aacsbr_template.c index 420312d207..8bda251213 100644 --- a/src/ExtLib/ffmpeg/libavcodec/aacsbr_template.c +++ b/src/ExtLib/ffmpeg/libavcodec/aacsbr_template.c @@ -57,6 +57,7 @@ av_cold void AAC_RENAME(ff_aac_sbr_init)(void) /** Places SBR in pure upsampling mode. */ static void sbr_turnoff(SpectralBandReplication *sbr) { sbr->start = 0; + sbr->usac = 0; sbr->ready_for_dequant = 0; // Init defults used in pure upsampling mode sbr->kx[1] = 32; //Typo in spec, kx' inits to 32 @@ -184,7 +185,8 @@ static void sbr_make_f_tablelim(SpectralBandReplication *sbr) } } -static unsigned int read_sbr_header(SpectralBandReplication *sbr, GetBitContext *gb) +static unsigned int read_sbr_header(SpectralBandReplication *sbr, + GetBitContext *gb, int is_usac) { unsigned int cnt = get_bits_count(gb); uint8_t bs_header_extra_1; @@ -194,15 +196,20 @@ static unsigned int read_sbr_header(SpectralBandReplication *sbr, GetBitContext sbr->start = 1; sbr->ready_for_dequant = 0; + sbr->usac = is_usac; // Save last spectrum parameters variables to compare to new ones memcpy(&old_spectrum_params, &sbr->spectrum_params, sizeof(SpectrumParameters)); - sbr->bs_amp_res_header = get_bits1(gb); + if (!is_usac) + sbr->bs_amp_res_header = get_bits1(gb); + sbr->spectrum_params.bs_start_freq = get_bits(gb, 4); sbr->spectrum_params.bs_stop_freq = get_bits(gb, 4); - sbr->spectrum_params.bs_xover_band = get_bits(gb, 3); - skip_bits(gb, 2); // bs_reserved + + if (!is_usac) + sbr->spectrum_params.bs_xover_band = get_bits(gb, 3); + skip_bits(gb, 2); // bs_reserved bs_header_extra_1 = get_bits1(gb); bs_header_extra_2 = get_bits1(gb); @@ -645,7 +652,7 @@ static int read_sbr_grid(AACDecContext *ac, SpectralBandReplication *sbr, switch (bs_frame_class = get_bits(gb, 2)) { case FIXFIX: bs_num_env = 1 << get_bits(gb, 2); - if (bs_num_env > 4) { + if (bs_num_env > (sbr->usac ? 8 : 5)) { av_log(ac->avctx, AV_LOG_ERROR, "Invalid bitstream, too many SBR envelopes in FIXFIX type SBR frame: %d\n", bs_num_env); @@ -793,10 +800,26 @@ static void copy_sbr_grid(SBRData *dst, const SBRData *src) { /// Read how the envelope and noise floor data is delta coded static void read_sbr_dtdf(SpectralBandReplication *sbr, GetBitContext *gb, - SBRData *ch_data) + SBRData *ch_data, int indep_flag) { - get_bits1_vector(gb, ch_data->bs_df_env, ch_data->bs_num_env); - get_bits1_vector(gb, ch_data->bs_df_noise, ch_data->bs_num_noise); + if (sbr->usac) { + if (indep_flag) { + ch_data->bs_df_env[0] = 0; + get_bits1_vector(gb, &ch_data->bs_df_env[1], ch_data->bs_num_env - 1); + } else { + get_bits1_vector(gb, ch_data->bs_df_env, ch_data->bs_num_env); + } + + if (indep_flag) { + ch_data->bs_df_noise[0] = 0; + get_bits1_vector(gb, &ch_data->bs_df_noise[1], ch_data->bs_num_noise - 1); + } else { + get_bits1_vector(gb, ch_data->bs_df_noise, ch_data->bs_num_noise); + } + } else { + get_bits1_vector(gb, ch_data->bs_df_env, ch_data->bs_num_env); + get_bits1_vector(gb, ch_data->bs_df_noise, ch_data->bs_num_noise); + } } /// Read inverse filtering data @@ -811,7 +834,7 @@ static void read_sbr_invf(SpectralBandReplication *sbr, GetBitContext *gb, } static int read_sbr_envelope(AACDecContext *ac, SpectralBandReplication *sbr, GetBitContext *gb, - SBRData *ch_data, int ch) + SBRData *ch_data, int ch) { int bits; int i, j, k; @@ -881,6 +904,13 @@ static int read_sbr_envelope(AACDecContext *ac, SpectralBandReplication *sbr, Ge } } } + if (sbr->usac) { + if (sbr->inter_tes) { + ch_data->temp_shape[i] = get_bits(gb, 1); + if (ch_data->temp_shape[i]) + ch_data->temp_shape_mode[i] = get_bits(gb, 2); + } + } } //assign 0th elements of env_facs_q from last elements @@ -975,7 +1005,7 @@ static int read_sbr_single_channel_element(AACDecContext *ac, if (read_sbr_grid(ac, sbr, gb, &sbr->data[0])) return -1; - read_sbr_dtdf(sbr, gb, &sbr->data[0]); + read_sbr_dtdf(sbr, gb, &sbr->data[0], 0); read_sbr_invf(sbr, gb, &sbr->data[0]); if((ret = read_sbr_envelope(ac, sbr, gb, &sbr->data[0], 0)) < 0) return ret; @@ -1001,8 +1031,8 @@ static int read_sbr_channel_pair_element(AACDecContext *ac, if (read_sbr_grid(ac, sbr, gb, &sbr->data[0])) return -1; copy_sbr_grid(&sbr->data[1], &sbr->data[0]); - read_sbr_dtdf(sbr, gb, &sbr->data[0]); - read_sbr_dtdf(sbr, gb, &sbr->data[1]); + read_sbr_dtdf(sbr, gb, &sbr->data[0], 0); + read_sbr_dtdf(sbr, gb, &sbr->data[1], 0); read_sbr_invf(sbr, gb, &sbr->data[0]); memcpy(sbr->data[1].bs_invf_mode[1], sbr->data[1].bs_invf_mode[0], sizeof(sbr->data[1].bs_invf_mode[0])); memcpy(sbr->data[1].bs_invf_mode[0], sbr->data[0].bs_invf_mode[0], sizeof(sbr->data[1].bs_invf_mode[0])); @@ -1018,8 +1048,8 @@ static int read_sbr_channel_pair_element(AACDecContext *ac, if (read_sbr_grid(ac, sbr, gb, &sbr->data[0]) || read_sbr_grid(ac, sbr, gb, &sbr->data[1])) return -1; - read_sbr_dtdf(sbr, gb, &sbr->data[0]); - read_sbr_dtdf(sbr, gb, &sbr->data[1]); + read_sbr_dtdf(sbr, gb, &sbr->data[0], 0); + read_sbr_dtdf(sbr, gb, &sbr->data[1], 0); read_sbr_invf(sbr, gb, &sbr->data[0]); read_sbr_invf(sbr, gb, &sbr->data[1]); if((ret = read_sbr_envelope(ac, sbr, gb, &sbr->data[0], 0)) < 0) @@ -1134,7 +1164,7 @@ int AAC_RENAME(ff_aac_sbr_decode_extension)(AACDecContext *ac, ChannelElement *c num_sbr_bits++; if (get_bits1(gb)) // bs_header_flag - num_sbr_bits += read_sbr_header(sbr, gb); + num_sbr_bits += read_sbr_header(sbr, gb, 0); if (sbr->reset) sbr_reset(ac, sbr); @@ -1153,6 +1183,178 @@ int AAC_RENAME(ff_aac_sbr_decode_extension)(AACDecContext *ac, ChannelElement *c return cnt; } +#if !USE_FIXED +static void copy_usac_default_header(SpectralBandReplication *sbr, + AACUsacElemConfig *ue) +{ + sbr->inter_tes = ue->sbr.bs_intertes; + + sbr->spectrum_params.bs_start_freq = ue->sbr.dflt.start_freq; + sbr->spectrum_params.bs_stop_freq = ue->sbr.dflt.stop_freq; + + sbr->spectrum_params.bs_freq_scale = ue->sbr.dflt.freq_scale; + sbr->spectrum_params.bs_alter_scale = ue->sbr.dflt.alter_scale; + sbr->spectrum_params.bs_noise_bands = ue->sbr.dflt.noise_bands; + + sbr->bs_limiter_bands = ue->sbr.dflt.limiter_bands; + sbr->bs_limiter_gains = ue->sbr.dflt.limiter_gains; + sbr->bs_interpol_freq = ue->sbr.dflt.interpol_freq; + sbr->bs_smoothing_mode = ue->sbr.dflt.smoothing_mode; +} + +int ff_aac_sbr_config_usac(AACDecContext *ac, ChannelElement *che, + AACUsacElemConfig *ue) +{ + SpectralBandReplication *sbr = get_sbr(che); + sbr_turnoff(sbr); + return 0; +} + +int ff_aac_sbr_decode_usac_data(AACDecContext *ac, ChannelElement *che, + AACUsacElemConfig *ue, GetBitContext *gb, + int sbr_ch, int indep_flag) +{ + int ret; + SpectralBandReplication *sbr = get_sbr(che); + int info_present = 1; + int header_present = 1; + + sbr->reset = 0; + sbr->usac = 1; + + sbr->sample_rate = ac->oc[1].m4ac.ext_sample_rate; + sbr->id_aac = sbr_ch == 2 ? TYPE_CPE : TYPE_SCE; + + if (!indep_flag) { + info_present = get_bits1(gb); + if (info_present) + header_present = get_bits1(gb); + else + header_present = 0; + } + + if (info_present) { + /* SbrInfo() */ + sbr->bs_amp_res_header = get_bits1(gb); + sbr->spectrum_params.bs_xover_band = get_bits(gb, 4); + sbr->bs_sbr_preprocessing = get_bits1(gb); + /* if (bs_pvc) ... */ + } + + if (header_present) { + if (get_bits1(gb)) { + int old_bs_limiter_bands = sbr->bs_limiter_bands; + SpectrumParameters old_spectrum_params; + memcpy(&old_spectrum_params, &sbr->spectrum_params, + sizeof(SpectrumParameters)); + + copy_usac_default_header(sbr, ue); + // Check if spectrum parameters changed + if (memcmp(&old_spectrum_params, &sbr->spectrum_params, + sizeof(SpectrumParameters))) + sbr->reset = 1; + + if (sbr->bs_limiter_bands != old_bs_limiter_bands && !sbr->reset) + sbr_make_f_tablelim(sbr); + } else { + read_sbr_header(sbr, gb, 1); + } + + sbr->start = 1; + } + + //Save some state from the previous frame. + sbr->kx[0] = sbr->kx[1]; + sbr->m[0] = sbr->m[1]; + sbr->kx_and_m_pushed = 1; + + if (sbr->reset) + sbr_reset(ac, sbr); + + sbr->ready_for_dequant = 1; + + int start = get_bits_count(gb); + + if (sbr_ch == 1) { /* sbr_single_channel_element */ + /* if (harmonicSBR) ... */ + + if (read_sbr_grid(ac, sbr, gb, &sbr->data[0])) + return -1; + + read_sbr_dtdf(sbr, gb, &sbr->data[0], indep_flag); + read_sbr_invf(sbr, gb, &sbr->data[0]); + + if ((ret = read_sbr_envelope(ac, sbr, gb, &sbr->data[0], 0)) < 0) + return ret; + + if ((ret = read_sbr_noise(ac, sbr, gb, &sbr->data[0], 0)) < 0) + return ret; + + if ((sbr->data[0].bs_add_harmonic_flag = get_bits1(gb))) + get_bits1_vector(gb, sbr->data[0].bs_add_harmonic, sbr->n[1]); + } else if (get_bits1(gb)) { /* bs_coupling == 1 */ + /* if (harmonicSBR) ... */ + + if (read_sbr_grid(ac, sbr, gb, &sbr->data[0])) + return -1; + copy_sbr_grid(&sbr->data[1], &sbr->data[0]); + + read_sbr_dtdf(sbr, gb, &sbr->data[0], indep_flag); + read_sbr_dtdf(sbr, gb, &sbr->data[1], indep_flag); + + read_sbr_invf(sbr, gb, &sbr->data[0]); + memcpy(sbr->data[1].bs_invf_mode[1], sbr->data[1].bs_invf_mode[0], + sizeof(sbr->data[1].bs_invf_mode[0])); + memcpy(sbr->data[1].bs_invf_mode[0], sbr->data[0].bs_invf_mode[0], + sizeof(sbr->data[1].bs_invf_mode[0])); + + if ((ret = read_sbr_envelope(ac, sbr, gb, &sbr->data[0], 0)) < 0) + return ret; + if ((ret = read_sbr_noise(ac, sbr, gb, &sbr->data[0], 0)) < 0) + return ret; + + if ((ret = read_sbr_envelope(ac, sbr, gb, &sbr->data[1], 1)) < 0) + return ret; + if ((ret = read_sbr_noise(ac, sbr, gb, &sbr->data[1], 1)) < 0) + return ret; + + if ((sbr->data[0].bs_add_harmonic_flag = get_bits1(gb))) + get_bits1_vector(gb, sbr->data[0].bs_add_harmonic, sbr->n[1]); + if ((sbr->data[1].bs_add_harmonic_flag = get_bits1(gb))) + get_bits1_vector(gb, sbr->data[1].bs_add_harmonic, sbr->n[1]); + } else { /* bs_coupling == 0 */ + /* if (harmonicSBR) ... */ + if (read_sbr_grid(ac, sbr, gb, &sbr->data[0])) + return -1; + if (read_sbr_grid(ac, sbr, gb, &sbr->data[1])) + return -1; + + read_sbr_dtdf(sbr, gb, &sbr->data[0], indep_flag); + read_sbr_dtdf(sbr, gb, &sbr->data[1], indep_flag); + + read_sbr_invf(sbr, gb, &sbr->data[0]); + read_sbr_invf(sbr, gb, &sbr->data[1]); + + if ((ret = read_sbr_envelope(ac, sbr, gb, &sbr->data[0], 0)) < 0) + return ret; + if ((ret = read_sbr_envelope(ac, sbr, gb, &sbr->data[1], 1)) < 0) + return ret; + + if ((ret = read_sbr_noise(ac, sbr, gb, &sbr->data[0], 0)) < 0) + return ret; + if ((ret = read_sbr_noise(ac, sbr, gb, &sbr->data[1], 1)) < 0) + return ret; + + if ((sbr->data[0].bs_add_harmonic_flag = get_bits1(gb))) + get_bits1_vector(gb, sbr->data[0].bs_add_harmonic, sbr->n[1]); + if ((sbr->data[1].bs_add_harmonic_flag = get_bits1(gb))) + get_bits1_vector(gb, sbr->data[1].bs_add_harmonic, sbr->n[1]); + } + + return 0; +} +#endif + /** * Analysis QMF Bank (14496-3 sp04 p206) * diff --git a/src/ExtLib/ffmpeg/libavcodec/cbs_h264.h b/src/ExtLib/ffmpeg/libavcodec/cbs_h264.h index db91231337..3763f2492b 100644 --- a/src/ExtLib/ffmpeg/libavcodec/cbs_h264.h +++ b/src/ExtLib/ffmpeg/libavcodec/cbs_h264.h @@ -293,6 +293,26 @@ typedef struct H264RawFilmGrainCharacteristics { uint8_t film_grain_characteristics_repetition_period; } H264RawFilmGrainCharacteristics; +typedef struct H264RawSEIFramePackingArrangement { + uint32_t frame_packing_arrangement_id; + uint8_t frame_packing_arrangement_cancel_flag; + uint8_t frame_packing_arrangement_type; + uint8_t quincunx_sampling_flag; + uint8_t content_interpretation_type; + uint8_t spatial_flipping_flag; + uint8_t frame0_flipped_flag; + uint8_t field_views_flag; + uint8_t current_frame_is_frame0_flag; + uint8_t frame0_self_contained_flag; + uint8_t frame1_self_contained_flag; + uint8_t frame0_grid_position_x; + uint8_t frame0_grid_position_y; + uint8_t frame1_grid_position_x; + uint8_t frame1_grid_position_y; + uint16_t frame_packing_arrangement_repetition_period; + uint8_t frame_packing_arrangement_extension_flag; +} H264RawSEIFramePackingArrangement; + typedef struct H264RawSEIDisplayOrientation { uint8_t display_orientation_cancel_flag; uint8_t hor_flip; diff --git a/src/ExtLib/ffmpeg/libavcodec/cbs_h2645.c b/src/ExtLib/ffmpeg/libavcodec/cbs_h2645.c index e2389f124e..5ec781ddab 100644 --- a/src/ExtLib/ffmpeg/libavcodec/cbs_h2645.c +++ b/src/ExtLib/ffmpeg/libavcodec/cbs_h2645.c @@ -2128,6 +2128,18 @@ static const SEIMessageTypeDescriptor cbs_sei_common_types[] = { sizeof(SEIRawUserDataUnregistered), SEI_MESSAGE_RW(sei, user_data_unregistered), }, + { + SEI_TYPE_FRAME_PACKING_ARRANGEMENT, + 1, 0, + sizeof(SEIRawFramePackingArrangement), + SEI_MESSAGE_RW(sei, frame_packing_arrangement), + }, + { + SEI_TYPE_DECODED_PICTURE_HASH, + 0, 1, + sizeof(SEIRawDecodedPictureHash), + SEI_MESSAGE_RW(sei, decoded_picture_hash), + }, { SEI_TYPE_MASTERING_DISPLAY_COLOUR_VOLUME, 1, 0, @@ -2186,6 +2198,12 @@ static const SEIMessageTypeDescriptor cbs_sei_h264_types[] = { sizeof(H264RawFilmGrainCharacteristics), SEI_MESSAGE_RW(h264, film_grain_characteristics), }, + { + SEI_TYPE_FRAME_PACKING_ARRANGEMENT, + 1, 0, + sizeof(H264RawSEIFramePackingArrangement), + SEI_MESSAGE_RW(h264, sei_frame_packing_arrangement), + }, { SEI_TYPE_DISPLAY_ORIENTATION, 1, 0, @@ -2260,12 +2278,6 @@ static const SEIMessageTypeDescriptor cbs_sei_h265_types[] = { }; static const SEIMessageTypeDescriptor cbs_sei_h266_types[] = { - { - SEI_TYPE_DECODED_PICTURE_HASH, - 0, 1, - sizeof(H266RawSEIDecodedPictureHash), - SEI_MESSAGE_RW(h266, sei_decoded_picture_hash), - }, SEI_MESSAGE_TYPE_END }; @@ -2275,11 +2287,6 @@ const SEIMessageTypeDescriptor *ff_cbs_sei_find_type(CodedBitstreamContext *ctx, const SEIMessageTypeDescriptor *codec_list; int i; - for (i = 0; cbs_sei_common_types[i].type >= 0; i++) { - if (cbs_sei_common_types[i].type == payload_type) - return &cbs_sei_common_types[i]; - } - switch (ctx->codec->codec_id) { case AV_CODEC_ID_H264: codec_list = cbs_sei_h264_types; @@ -2299,5 +2306,10 @@ const SEIMessageTypeDescriptor *ff_cbs_sei_find_type(CodedBitstreamContext *ctx, return &codec_list[i]; } + for (i = 0; cbs_sei_common_types[i].type >= 0; i++) { + if (cbs_sei_common_types[i].type == payload_type) + return &cbs_sei_common_types[i]; + } + return NULL; } diff --git a/src/ExtLib/ffmpeg/libavcodec/cbs_h264_syntax_template.c b/src/ExtLib/ffmpeg/libavcodec/cbs_h264_syntax_template.c index 4d2d303722..9d0386c24d 100644 --- a/src/ExtLib/ffmpeg/libavcodec/cbs_h264_syntax_template.c +++ b/src/ExtLib/ffmpeg/libavcodec/cbs_h264_syntax_template.c @@ -801,6 +801,40 @@ SEI_FUNC(film_grain_characteristics, (CodedBitstreamContext *ctx, RWContext *rw, return 0; } +SEI_FUNC(sei_frame_packing_arrangement, (CodedBitstreamContext *ctx, RWContext *rw, + H264RawSEIFramePackingArrangement *current, + SEIMessageState *sei)) +{ + int err; + + HEADER("Frame Packing Arrangement"); + + ue(frame_packing_arrangement_id, 0, MAX_UINT_BITS(31)); + flag(frame_packing_arrangement_cancel_flag); + if (!current->frame_packing_arrangement_cancel_flag) { + u(7, frame_packing_arrangement_type, 0, 7); + flag(quincunx_sampling_flag); + u(6, content_interpretation_type, 0, 2); + flag(spatial_flipping_flag); + flag(frame0_flipped_flag); + flag(field_views_flag); + flag(current_frame_is_frame0_flag); + flag(frame0_self_contained_flag); + flag(frame1_self_contained_flag); + if (!current->quincunx_sampling_flag && current->frame_packing_arrangement_type != 5) { + ub(4, frame0_grid_position_x); + ub(4, frame0_grid_position_y); + ub(4, frame1_grid_position_x); + ub(4, frame1_grid_position_y); + } + fixed(8, frame_packing_arrangement_reserved_byte, 0); + ue(frame_packing_arrangement_repetition_period, 0, 16384); + } + flag(frame_packing_arrangement_extension_flag); + + return 0; +} + SEI_FUNC(sei_display_orientation, (CodedBitstreamContext *ctx, RWContext *rw, H264RawSEIDisplayOrientation *current, SEIMessageState *sei)) diff --git a/src/ExtLib/ffmpeg/libavcodec/cbs_h266.h b/src/ExtLib/ffmpeg/libavcodec/cbs_h266.h index 21b9a4196c..d24a8e9313 100644 --- a/src/ExtLib/ffmpeg/libavcodec/cbs_h266.h +++ b/src/ExtLib/ffmpeg/libavcodec/cbs_h266.h @@ -464,9 +464,9 @@ typedef struct H266RawSPS { uint8_t sps_virtual_boundaries_enabled_flag; uint8_t sps_virtual_boundaries_present_flag; uint8_t sps_num_ver_virtual_boundaries; - uint16_t sps_virtual_boundary_pos_x_minus1[3]; + uint16_t sps_virtual_boundary_pos_x_minus1[VVC_MAX_VBS]; uint8_t sps_num_hor_virtual_boundaries; - uint16_t sps_virtual_boundary_pos_y_minus1[3]; + uint16_t sps_virtual_boundary_pos_y_minus1[VVC_MAX_VBS]; uint8_t sps_timing_hrd_params_present_flag; uint8_t sps_sublayer_cpb_params_present_flag; @@ -703,9 +703,9 @@ typedef struct H266RawPictureHeader { uint8_t ph_virtual_boundaries_present_flag; uint8_t ph_num_ver_virtual_boundaries; - uint16_t ph_virtual_boundary_pos_x_minus1[3]; + uint16_t ph_virtual_boundary_pos_x_minus1[VVC_MAX_VBS]; uint8_t ph_num_hor_virtual_boundaries; - uint16_t ph_virtual_boundary_pos_y_minus1[3]; + uint16_t ph_virtual_boundary_pos_y_minus1[VVC_MAX_VBS]; uint8_t ph_pic_output_flag; H266RefPicLists ph_ref_pic_lists; @@ -848,16 +848,6 @@ typedef struct H266RawSlice { int data_bit_start; } H266RawSlice; -typedef struct H266RawSEIDecodedPictureHash { - uint8_t dph_sei_hash_type; - uint8_t dph_sei_single_component_flag; - uint8_t dph_sei_picture_md5[3][16]; - uint16_t dph_sei_picture_crc[3]; - uint32_t dph_sei_picture_checksum[3]; - - uint8_t dph_sei_reserved_zero_7bits; -} H266RawSEIDecodedPictureHash; - typedef struct H266RawSEI { H266RawNALUnitHeader nal_unit_header; SEIRawMessageList message_list; diff --git a/src/ExtLib/ffmpeg/libavcodec/cbs_h266_syntax_template.c b/src/ExtLib/ffmpeg/libavcodec/cbs_h266_syntax_template.c index 53c4b60b0d..9c37996947 100644 --- a/src/ExtLib/ffmpeg/libavcodec/cbs_h266_syntax_template.c +++ b/src/ExtLib/ffmpeg/libavcodec/cbs_h266_syntax_template.c @@ -1562,13 +1562,13 @@ static int FUNC(sps)(CodedBitstreamContext *ctx, RWContext *rw, flag(sps_virtual_boundaries_present_flag); if (current->sps_virtual_boundaries_present_flag) { ue(sps_num_ver_virtual_boundaries, - 0, current->sps_pic_width_max_in_luma_samples <= 8 ? 0 : 3); + 0, current->sps_pic_width_max_in_luma_samples <= 8 ? 0 : VVC_MAX_VBS); for (i = 0; i < current->sps_num_ver_virtual_boundaries; i++) ues(sps_virtual_boundary_pos_x_minus1[i], 0, (current->sps_pic_width_max_in_luma_samples + 7) / 8 - 2, 1, i); ue(sps_num_hor_virtual_boundaries, - 0, current->sps_pic_height_max_in_luma_samples <= 8 ? 0 : 3); + 0, current->sps_pic_height_max_in_luma_samples <= 8 ? 0 : VVC_MAX_VBS); for (i = 0; i < current->sps_num_hor_virtual_boundaries; i++) ues(sps_virtual_boundary_pos_y_minus1[i], 0, (current->sps_pic_height_max_in_luma_samples + 7) / @@ -2714,13 +2714,13 @@ static int FUNC(picture_header) (CodedBitstreamContext *ctx, RWContext *rw, flag(ph_virtual_boundaries_present_flag); if (current->ph_virtual_boundaries_present_flag) { ue(ph_num_ver_virtual_boundaries, - 0, pps->pps_pic_width_in_luma_samples <= 8 ? 0 : 3); + 0, pps->pps_pic_width_in_luma_samples <= 8 ? 0 : VVC_MAX_VBS); for (i = 0; i < current->ph_num_ver_virtual_boundaries; i++) { ues(ph_virtual_boundary_pos_x_minus1[i], 0, (pps->pps_pic_width_in_luma_samples + 7) / 8 - 2, 1, i); } ue(ph_num_hor_virtual_boundaries, - 0, pps->pps_pic_height_in_luma_samples <= 8 ? 0 : 3); + 0, pps->pps_pic_height_in_luma_samples <= 8 ? 0 : VVC_MAX_VBS); for (i = 0; i < current->ph_num_hor_virtual_boundaries; i++) { ues(ph_virtual_boundary_pos_y_minus1[i], 0, (pps->pps_pic_height_in_luma_samples + 7) / 8 - 2, 1, i); @@ -3442,34 +3442,6 @@ static int FUNC(slice_header) (CodedBitstreamContext *ctx, RWContext *rw, return 0; } -SEI_FUNC(sei_decoded_picture_hash, (CodedBitstreamContext *ctx, - RWContext *rw, - H266RawSEIDecodedPictureHash *current, - SEIMessageState *unused)) -{ - int err, c_idx, i; - - HEADER("Decoded Picture Hash"); - - u(8, dph_sei_hash_type, 0, 2); - flag(dph_sei_single_component_flag); - ub(7, dph_sei_reserved_zero_7bits); - - for (c_idx = 0; c_idx < (current->dph_sei_single_component_flag ? 1 : 3); - c_idx++) { - if (current->dph_sei_hash_type == 0) { - for (i = 0; i < 16; i++) - us(8, dph_sei_picture_md5[c_idx][i], 0x00, 0xff, 2, c_idx, i); - } else if (current->dph_sei_hash_type == 1) { - us(16, dph_sei_picture_crc[c_idx], 0x0000, 0xffff, 1, c_idx); - } else if (current->dph_sei_hash_type == 2) { - us(32, dph_sei_picture_checksum[c_idx], 0x00000000, 0xffffffff, 1, - c_idx); - } - } - return 0; -} - static int FUNC(sei) (CodedBitstreamContext *ctx, RWContext *rw, H266RawSEI *current, int prefix) { diff --git a/src/ExtLib/ffmpeg/libavcodec/cbs_sei.h b/src/ExtLib/ffmpeg/libavcodec/cbs_sei.h index ec7cdb62f0..15ef3415ab 100644 --- a/src/ExtLib/ffmpeg/libavcodec/cbs_sei.h +++ b/src/ExtLib/ffmpeg/libavcodec/cbs_sei.h @@ -43,6 +43,36 @@ typedef struct SEIRawUserDataUnregistered { size_t data_length; } SEIRawUserDataUnregistered; +typedef struct SEIRawFramePackingArrangement { + uint32_t fp_arrangement_id; + uint8_t fp_arrangement_cancel_flag; + uint8_t fp_arrangement_type; + uint8_t fp_quincunx_sampling_flag; + uint8_t fp_content_interpretation_type; + uint8_t fp_spatial_flipping_flag; + uint8_t fp_frame0_flipped_flag; + uint8_t fp_field_views_flag; + uint8_t fp_current_frame_is_frame0_flag; + uint8_t fp_frame0_self_contained_flag; + uint8_t fp_frame1_self_contained_flag; + uint8_t fp_frame0_grid_position_x; + uint8_t fp_frame0_grid_position_y; + uint8_t fp_frame1_grid_position_x; + uint8_t fp_frame1_grid_position_y; + uint8_t fp_arrangement_persistence_flag; + uint8_t fp_upsampled_aspect_ratio_flag; +} SEIRawFramePackingArrangement; + +typedef struct SEIRawDecodedPictureHash { + uint8_t dph_sei_hash_type; + uint8_t dph_sei_single_component_flag; + uint8_t dph_sei_picture_md5[3][16]; + uint16_t dph_sei_picture_crc[3]; + uint32_t dph_sei_picture_checksum[3]; + + uint8_t dph_sei_reserved_zero_7bits; +} SEIRawDecodedPictureHash; + typedef struct SEIRawMasteringDisplayColourVolume { uint16_t display_primaries_x[3]; uint16_t display_primaries_y[3]; diff --git a/src/ExtLib/ffmpeg/libavcodec/cbs_sei_syntax_template.c b/src/ExtLib/ffmpeg/libavcodec/cbs_sei_syntax_template.c index 81448ef3f2..0205bb47aa 100644 --- a/src/ExtLib/ffmpeg/libavcodec/cbs_sei_syntax_template.c +++ b/src/ExtLib/ffmpeg/libavcodec/cbs_sei_syntax_template.c @@ -94,6 +94,68 @@ SEI_FUNC(user_data_unregistered, (CodedBitstreamContext *ctx, RWContext *rw, return 0; } +SEI_FUNC(frame_packing_arrangement, (CodedBitstreamContext *ctx, RWContext *rw, + SEIRawFramePackingArrangement *current, + SEIMessageState *unused)) +{ + int err; + + HEADER("Frame Packing Arrangement"); + + ue(fp_arrangement_id, 0, MAX_UINT_BITS(31)); + flag(fp_arrangement_cancel_flag); + if (!current->fp_arrangement_cancel_flag) { + u(7, fp_arrangement_type, 3, 5); + flag(fp_quincunx_sampling_flag); + u(6, fp_content_interpretation_type, 0, 2); + flag(fp_spatial_flipping_flag); + flag(fp_frame0_flipped_flag); + flag(fp_field_views_flag); + flag(fp_current_frame_is_frame0_flag); + flag(fp_frame0_self_contained_flag); + flag(fp_frame1_self_contained_flag); + if (!current->fp_quincunx_sampling_flag && current->fp_arrangement_type != 5) { + ub(4, fp_frame0_grid_position_x); + ub(4, fp_frame0_grid_position_y); + ub(4, fp_frame1_grid_position_x); + ub(4, fp_frame1_grid_position_y); + } + fixed(8, fp_arrangement_reserved_byte, 0); + flag(fp_arrangement_persistence_flag); + } + flag(fp_upsampled_aspect_ratio_flag); + + return 0; +} + +SEI_FUNC(decoded_picture_hash, (CodedBitstreamContext *ctx, + RWContext *rw, + SEIRawDecodedPictureHash *current, + SEIMessageState *unused)) +{ + int err, c_idx, i; + + HEADER("Decoded Picture Hash"); + + u(8, dph_sei_hash_type, 0, 2); + flag(dph_sei_single_component_flag); + ub(7, dph_sei_reserved_zero_7bits); + + for (c_idx = 0; c_idx < (current->dph_sei_single_component_flag ? 1 : 3); + c_idx++) { + if (current->dph_sei_hash_type == 0) { + for (i = 0; i < 16; i++) + us(8, dph_sei_picture_md5[c_idx][i], 0x00, 0xff, 2, c_idx, i); + } else if (current->dph_sei_hash_type == 1) { + us(16, dph_sei_picture_crc[c_idx], 0x0000, 0xffff, 1, c_idx); + } else if (current->dph_sei_hash_type == 2) { + us(32, dph_sei_picture_checksum[c_idx], 0x00000000, 0xffffffff, 1, + c_idx); + } + } + return 0; +} + SEI_FUNC(mastering_display_colour_volume, (CodedBitstreamContext *ctx, RWContext *rw, SEIRawMasteringDisplayColourVolume *current, diff --git a/src/ExtLib/ffmpeg/libavcodec/dovi_rpudec.c b/src/ExtLib/ffmpeg/libavcodec/dovi_rpudec.c index 8cafdcf5e6..c025800206 100644 --- a/src/ExtLib/ffmpeg/libavcodec/dovi_rpudec.c +++ b/src/ExtLib/ffmpeg/libavcodec/dovi_rpudec.c @@ -420,7 +420,7 @@ int ff_dovi_rpu_parse(DOVIContext *s, const uint8_t *rpu, size_t rpu_size, if ((hdr->rpu_format & 0x700) == 0) { int bl_bit_depth_minus8 = get_ue_golomb_31(gb); - int el_bit_depth_minus8 = get_ue_golomb_31(gb); + int el_bit_depth_minus8 = get_ue_golomb_long(gb); int vdr_bit_depth_minus8 = get_ue_golomb_31(gb); int reserved_zero_3bits; /* ext_mapping_idc is in the upper 8 bits of el_bit_depth_minus8 */ diff --git a/src/ExtLib/ffmpeg/libavcodec/mjpegdec.c b/src/ExtLib/ffmpeg/libavcodec/mjpegdec.c index 1481a7f285..7daec649bc 100644 --- a/src/ExtLib/ffmpeg/libavcodec/mjpegdec.c +++ b/src/ExtLib/ffmpeg/libavcodec/mjpegdec.c @@ -843,9 +843,8 @@ static int decode_block(MJpegDecodeContext *s, int16_t *block, int component, return AVERROR_INVALIDDATA; } val = val * (unsigned)quant_matrix[0] + s->last_dc[component]; - val = av_clip_int16(val); s->last_dc[component] = val; - block[0] = val; + block[0] = av_clip_int16(val); /* AC coefs */ i = 0; {OPEN_READER(re, &s->gb); diff --git a/src/ExtLib/ffmpeg/libavcodec/mpeg12dec.c b/src/ExtLib/ffmpeg/libavcodec/mpeg12dec.c index 33383a5913..74946aeaab 100644 --- a/src/ExtLib/ffmpeg/libavcodec/mpeg12dec.c +++ b/src/ExtLib/ffmpeg/libavcodec/mpeg12dec.c @@ -73,7 +73,7 @@ typedef struct Mpeg1Context { MpegEncContext mpeg_enc_ctx; int repeat_field; /* true if we must repeat the field */ AVPanScan pan_scan; /* some temporary storage for the panscan */ - AVStereo3D stereo3d; + enum AVStereo3DType stereo3d_type; int has_stereo3d; AVBufferRef *a53_buf_ref; enum Mpeg2ClosedCaptionsFormat cc_format; @@ -1349,7 +1349,7 @@ static int mpeg_field_start(Mpeg1Context *s1, const uint8_t *buf, int buf_size) if (!stereo) return AVERROR(ENOMEM); - *stereo = s1->stereo3d; + stereo->type = s1->stereo3d_type; s1->has_stereo3d = 0; } @@ -2148,16 +2148,16 @@ static void mpeg_decode_user_data(AVCodecContext *avctx, switch (S3D_video_format_type) { case 0x03: - s1->stereo3d.type = AV_STEREO3D_SIDEBYSIDE; + s1->stereo3d_type = AV_STEREO3D_SIDEBYSIDE; break; case 0x04: - s1->stereo3d.type = AV_STEREO3D_TOPBOTTOM; + s1->stereo3d_type = AV_STEREO3D_TOPBOTTOM; break; case 0x08: - s1->stereo3d.type = AV_STEREO3D_2D; + s1->stereo3d_type = AV_STEREO3D_2D; break; case 0x23: - s1->stereo3d.type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX; + s1->stereo3d_type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX; break; } } diff --git a/src/ExtLib/ffmpeg/libavcodec/sbr.h b/src/ExtLib/ffmpeg/libavcodec/sbr.h index bc9ffab127..2bee1dce41 100644 --- a/src/ExtLib/ffmpeg/libavcodec/sbr.h +++ b/src/ExtLib/ffmpeg/libavcodec/sbr.h @@ -68,9 +68,9 @@ typedef struct SBRData { unsigned bs_frame_class; unsigned bs_add_harmonic_flag; AAC_SIGNE bs_num_env; - uint8_t bs_freq_res[7]; + uint8_t bs_freq_res[9]; AAC_SIGNE bs_num_noise; - uint8_t bs_df_env[5]; + uint8_t bs_df_env[9]; uint8_t bs_df_noise[2]; uint8_t bs_invf_mode[2][5]; uint8_t bs_add_harmonic[48]; @@ -95,21 +95,24 @@ typedef struct SBRData { DECLARE_ALIGNED(16, INTFLOAT, Y)[2][38][64][2]; DECLARE_ALIGNED(16, AAC_FLOAT, g_temp)[42][48]; AAC_FLOAT q_temp[42][48]; - uint8_t s_indexmapped[8][48]; + uint8_t s_indexmapped[9][48]; ///Envelope scalefactors - uint8_t env_facs_q[6][48]; - AAC_FLOAT env_facs[6][48]; + uint8_t env_facs_q[9][48]; + AAC_FLOAT env_facs[9][48]; ///Noise scalefactors uint8_t noise_facs_q[3][5]; AAC_FLOAT noise_facs[3][5]; ///Envelope time borders - uint8_t t_env[8]; + uint8_t t_env[9]; ///Envelope time border of the last envelope of the previous frame uint8_t t_env_num_env_old; ///Noise time borders uint8_t t_q[3]; unsigned f_indexnoise; unsigned f_indexsine; + //inter_tes (USAC) + uint8_t temp_shape[6]; + uint8_t temp_shape_mode[6]; /** @} */ } SBRData; @@ -142,9 +145,12 @@ struct SpectralBandReplication { int start; int ready_for_dequant; int id_aac; + int usac; + int inter_tes; // USAC-only int reset; SpectrumParameters spectrum_params; int bs_amp_res_header; + int bs_sbr_preprocessing; // USAC-only /** * @name Variables associated with bs_header_extra_2 * @{ @@ -196,18 +202,18 @@ struct SpectralBandReplication { ///First coefficient used to filter the subband signals DECLARE_ALIGNED(16, INTFLOAT, alpha1)[64][2]; ///Dequantized envelope scalefactors, remapped - AAC_FLOAT e_origmapped[7][48]; + AAC_FLOAT e_origmapped[8][48]; ///Dequantized noise scalefactors, remapped - AAC_FLOAT q_mapped[7][48]; + AAC_FLOAT q_mapped[8][48]; ///Sinusoidal presence, remapped - uint8_t s_mapped[7][48]; + uint8_t s_mapped[8][48]; ///Estimated envelope - AAC_FLOAT e_curr[7][48]; + AAC_FLOAT e_curr[8][48]; ///Amplitude adjusted noise scalefactors - AAC_FLOAT q_m[7][48]; + AAC_FLOAT q_m[8][48]; ///Sinusoidal levels - AAC_FLOAT s_m[7][48]; - AAC_FLOAT gain[7][48]; + AAC_FLOAT s_m[8][48]; + AAC_FLOAT gain[8][48]; DECLARE_ALIGNED(32, INTFLOAT, qmf_filter_scratch)[5][64]; AVTXContext *mdct_ana; av_tx_fn mdct_ana_fn; diff --git a/src/ExtLib/ffmpeg/libavcodec/version.h b/src/ExtLib/ffmpeg/libavcodec/version.h index 37c4c39451..230d5fa13e 100644 --- a/src/ExtLib/ffmpeg/libavcodec/version.h +++ b/src/ExtLib/ffmpeg/libavcodec/version.h @@ -29,7 +29,7 @@ #include "version_major.h" -#define LIBAVCODEC_VERSION_MINOR 8 +#define LIBAVCODEC_VERSION_MINOR 9 #define LIBAVCODEC_VERSION_MICRO 100 #define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \ diff --git a/src/ExtLib/ffmpeg/libavcodec/vvc.h b/src/ExtLib/ffmpeg/libavcodec/vvc.h index c4cec1eb8f..92639779c1 100644 --- a/src/ExtLib/ffmpeg/libavcodec/vvc.h +++ b/src/ExtLib/ffmpeg/libavcodec/vvc.h @@ -151,6 +151,9 @@ enum { // get near that, though, so set a lower limit here with the maximum // possible value for 8K video (at most 135 32x32 Ctb rows). VVC_MAX_ENTRY_POINTS = VVC_MAX_TILE_COLUMNS * 135, + + // {sps, ph}_num_{ver, hor}_virtual_boundaries should in [0, 3] + VVC_MAX_VBS = 3, }; #endif /* AVCODEC_VVC_H */ diff --git a/src/ExtLib/ffmpeg/libavcodec/vvc/ctu.c b/src/ExtLib/ffmpeg/libavcodec/vvc/ctu.c index ce79f14288..bd0be2d821 100644 --- a/src/ExtLib/ffmpeg/libavcodec/vvc/ctu.c +++ b/src/ExtLib/ffmpeg/libavcodec/vvc/ctu.c @@ -1444,20 +1444,25 @@ static void merge_data_block(VVCLocalContext *lc) } } -static void merge_data_ibc(VVCLocalContext *lc) +static int merge_data_ibc(VVCLocalContext *lc) { const VVCFrameContext* fc = lc->fc; const VVCSPS* sps = fc->ps.sps; MotionInfo *mi = &lc->cu->pu.mi; int merge_idx = 0; + int ret; mi->pred_flag = PF_IBC; if (sps->max_num_ibc_merge_cand > 1) merge_idx = ff_vvc_merge_idx(lc); - ff_vvc_luma_mv_merge_ibc(lc, merge_idx, &mi->mv[L0][0]); + ret = ff_vvc_luma_mv_merge_ibc(lc, merge_idx, &mi->mv[L0][0]); + if (ret) + return ret; ff_vvc_store_mv(lc, mi); + + return 0; } static int hls_merge_data(VVCLocalContext *lc) @@ -1466,11 +1471,14 @@ static int hls_merge_data(VVCLocalContext *lc) const VVCPH *ph = &fc->ps.ph; const CodingUnit *cu = lc->cu; PredictionUnit *pu = &lc->cu->pu; + int ret; pu->merge_gpm_flag = 0; pu->mi.num_sb_x = pu->mi.num_sb_y = 1; if (cu->pred_mode == MODE_IBC) { - merge_data_ibc(lc); + ret = merge_data_ibc(lc); + if (ret) + return ret; } else { if (ph->max_num_subblock_merge_cand > 0 && cu->cb_width >= 8 && cu->cb_height >= 8) pu->merge_subblock_flag = ff_vvc_merge_subblock_flag(lc); @@ -1596,6 +1604,7 @@ static int mvp_data_ibc(VVCLocalContext *lc) int mvp_l0_flag = 0; int amvr_shift = 4; Mv *mv = &mi->mv[L0][0]; + int ret; mi->pred_flag = PF_IBC; mi->num_sb_x = 1; @@ -1607,7 +1616,9 @@ static int mvp_data_ibc(VVCLocalContext *lc) if (sps->r->sps_amvr_enabled_flag && (mv->x || mv->y)) amvr_shift = ff_vvc_amvr_shift(lc, pu->inter_affine_flag, cu->pred_mode, 1); - ff_vvc_mvp_ibc(lc, mvp_l0_flag, amvr_shift, mv); + ret = ff_vvc_mvp_ibc(lc, mvp_l0_flag, amvr_shift, mv); + if (ret) + return ret; ff_vvc_store_mv(lc, mi); return 0; diff --git a/src/ExtLib/ffmpeg/libavcodec/vvc/ctu.h b/src/ExtLib/ffmpeg/libavcodec/vvc/ctu.h index a987328d81..d5c3e8d96f 100644 --- a/src/ExtLib/ffmpeg/libavcodec/vvc/ctu.h +++ b/src/ExtLib/ffmpeg/libavcodec/vvc/ctu.h @@ -461,10 +461,15 @@ typedef struct ALFParams { uint8_t ctb_filt_set_idx_y; ///< AlfCtbFiltSetIdxY uint8_t alf_ctb_filter_alt_idx[2]; ///< alf_ctb_filter_alt_idx[] uint8_t ctb_cc_idc[2]; ///< alf_ctb_cc_cb_idc, alf_ctb_cc_cr_idc - - uint8_t applied[3]; } ALFParams; +typedef struct VVCRect { + int l; // left + int t; // top + int r; // right + int b; // bottom +} VVCRect; + /** * parse a CTU * @param lc local context for CTU diff --git a/src/ExtLib/ffmpeg/libavcodec/vvc/dec.c b/src/ExtLib/ffmpeg/libavcodec/vvc/dec.c index 9a50ce2222..c9f25696b4 100644 --- a/src/ExtLib/ffmpeg/libavcodec/vvc/dec.c +++ b/src/ExtLib/ffmpeg/libavcodec/vvc/dec.c @@ -191,14 +191,12 @@ static void bs_tl_init(TabList *l, VVCFrameContext *fc) tl_init(l, 1, changed); - for (int i = 0; i < VVC_MAX_SAMPLE_ARRAYS; i++) { - TL_ADD(horizontal_bs[i], bs_count); - TL_ADD(vertical_bs[i], bs_count); + for (int i = 0; i < 2; i++) { + for (int j = 0; j < VVC_MAX_SAMPLE_ARRAYS; j++) + TL_ADD(bs[i][j], bs_count); + TL_ADD(max_len_p[i], bs_count); + TL_ADD(max_len_q[i], bs_count); } - TL_ADD(horizontal_q, bs_count); - TL_ADD(horizontal_p, bs_count); - TL_ADD(vertical_p, bs_count); - TL_ADD(vertical_q, bs_count); } static void pixel_buffer_nz_tl_init(TabList *l, VVCFrameContext *fc) @@ -1028,7 +1026,7 @@ static av_cold int vvc_decode_init(AVCodecContext *avctx) static AVOnce init_static_once = AV_ONCE_INIT; const int cpu_count = av_cpu_count(); const int delayed = FFMIN(cpu_count, VVC_MAX_DELAYED_FRAMES); - const int thread_count = avctx->thread_count ? avctx->thread_count : delayed; + int thread_count = avctx->thread_count ? avctx->thread_count : delayed; int ret; s->avctx = avctx; @@ -1055,6 +1053,8 @@ static av_cold int vvc_decode_init(AVCodecContext *avctx) return ret; } + if (thread_count == 1) + thread_count = 0; s->executor = ff_vvc_executor_alloc(s, thread_count); if (!s->executor) return AVERROR(ENOMEM); diff --git a/src/ExtLib/ffmpeg/libavcodec/vvc/dec.h b/src/ExtLib/ffmpeg/libavcodec/vvc/dec.h index 1e0b76f283..a8492f1398 100644 --- a/src/ExtLib/ffmpeg/libavcodec/vvc/dec.h +++ b/src/ExtLib/ffmpeg/libavcodec/vvc/dec.h @@ -178,12 +178,9 @@ typedef struct VVCFrameContext { uint8_t *tb_height[2]; uint8_t *pcmf[2]; - uint8_t *horizontal_bs[VVC_MAX_SAMPLE_ARRAYS]; - uint8_t *vertical_bs[VVC_MAX_SAMPLE_ARRAYS]; - uint8_t *horizontal_p; ///< horizontal maxFilterLengthPs for luma - uint8_t *horizontal_q; ///< horizontal maxFilterLengthQs for luma - uint8_t *vertical_p; ///< vertical maxFilterLengthPs for luma - uint8_t *vertical_q; ///< vertical maxFilterLengthQs for luma + uint8_t *bs[2][VVC_MAX_SAMPLE_ARRAYS]; ///< horizontal, vertical boundary filtering strength + uint8_t *max_len_p[2]; ///< horizontal, vertical maxFilterLengthPs for luma + uint8_t *max_len_q[2]; ///< horizontal, vertical maxFilterLengthQs for luma uint8_t *sao_pixel_buffer_h[VVC_MAX_SAMPLE_ARRAYS]; uint8_t *sao_pixel_buffer_v[VVC_MAX_SAMPLE_ARRAYS]; diff --git a/src/ExtLib/ffmpeg/libavcodec/vvc/filter.c b/src/ExtLib/ffmpeg/libavcodec/vvc/filter.c index 7844d34eac..7ffcb29f47 100644 --- a/src/ExtLib/ffmpeg/libavcodec/vvc/filter.c +++ b/src/ExtLib/ffmpeg/libavcodec/vvc/filter.c @@ -20,6 +20,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include "libavutil/frame.h" +#include "libavutil/imgutils.h" #include "ctu.h" #include "data.h" @@ -34,6 +35,10 @@ #define DEFAULT_INTRA_TC_OFFSET 2 +#define POS(c_idx, x, y) \ + &fc->frame->data[c_idx][((y) >> fc->ps.sps->vshift[c_idx]) * fc->frame->linesize[c_idx] + \ + (((x) >> fc->ps.sps->hshift[c_idx]) << fc->ps.sps->pixel_shift)] + //Table 43 Derivation of threshold variables beta' and tc' from input Q static const uint16_t tctable[66] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -51,6 +56,32 @@ static const uint8_t betatable[64] = { 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, }; +// One vertical and one horizontal virtual boundary in a CTU at most. The CTU will be divided into 4 subblocks. +#define MAX_VBBS 4 + +static int get_virtual_boundary(const VVCFrameContext *fc, const int ctu_pos, const int vertical) +{ + const VVCSPS *sps = fc->ps.sps; + const VVCPH *ph = &fc->ps.ph; + const uint16_t *vbs = vertical ? ph->vb_pos_x : ph->vb_pos_y; + const uint8_t nb_vbs = vertical ? ph->num_ver_vbs : ph->num_hor_vbs; + const int pos = ctu_pos << sps->ctb_log2_size_y; + + if (sps->r->sps_virtual_boundaries_enabled_flag) { + for (int i = 0; i < nb_vbs; i++) { + const int o = vbs[i] - pos; + if (o >= 0 && o < sps->ctb_size_y) + return vbs[i]; + } + } + return 0; +} + +static int is_virtual_boundary(const VVCFrameContext *fc, const int pos, const int vertical) +{ + return get_virtual_boundary(fc, pos >> fc->ps.sps->ctb_log2_size_y, vertical) == pos; +} + static int get_qPc(const VVCFrameContext *fc, const int x0, const int y0, const int chroma) { const int x = x0 >> MIN_TU_LOG2; @@ -135,7 +166,7 @@ static void sao_copy_ctb_to_hv(VVCLocalContext *lc, const int rx, const int ry, const int ctb_size_v = ctb_size_y >> fc->ps.sps->vshift[c_idx]; const int width = FFMIN(ctb_size_h, (fc->ps.pps->width >> fc->ps.sps->hshift[c_idx]) - x); const int height = FFMIN(ctb_size_v, (fc->ps.pps->height >> fc->ps.sps->vshift[c_idx]) - y); - const uint8_t *src = &fc->frame->data[c_idx][y * src_stride + (x << fc->ps.sps->pixel_shift)]; + const uint8_t *src = POS(c_idx, x0, y0); copy_ctb_to_hv(fc, src, src_stride, x, y, width, height, c_idx, rx, ry, top); } } @@ -151,154 +182,192 @@ void ff_vvc_sao_copy_ctb_to_hv(VVCLocalContext *lc, const int rx, const int ry, sao_copy_ctb_to_hv(lc, rx, ry, 0); } -void ff_vvc_sao_filter(VVCLocalContext *lc, int x, int y) +static int sao_can_cross_slices(const VVCFrameContext *fc, const int rx, const int ry, const int dx, const int dy) +{ + const uint8_t lfase = fc->ps.pps->r->pps_loop_filter_across_slices_enabled_flag; + + return lfase || CTB(fc->tab.slice_idx, rx, ry) == CTB(fc->tab.slice_idx, rx + dx, ry + dy); +} + +static void sao_get_edges(uint8_t vert_edge[2], uint8_t horiz_edge[2], uint8_t diag_edge[4], int *restore, + const VVCLocalContext *lc, const int edges[4], const int rx, const int ry) +{ + const VVCFrameContext *fc = lc->fc; + const VVCSPS *sps = fc->ps.sps; + const H266RawSPS *rsps = sps->r; + const VVCPPS *pps = fc->ps.pps; + const int subpic_idx = lc->sc->sh.r->curr_subpic_idx; + const uint8_t lfase = fc->ps.pps->r->pps_loop_filter_across_slices_enabled_flag; + const uint8_t no_tile_filter = pps->r->num_tiles_in_pic > 1 && !pps->r->pps_loop_filter_across_tiles_enabled_flag; + const uint8_t no_subpic_filter = rsps->sps_num_subpics_minus1 && !rsps->sps_loop_filter_across_subpic_enabled_flag[subpic_idx]; + uint8_t lf_edge[] = { 0, 0, 0, 0 }; + + *restore = no_subpic_filter || no_tile_filter || !lfase || rsps->sps_virtual_boundaries_enabled_flag; + + if (!*restore) + return; + + if (!edges[LEFT]) { + lf_edge[LEFT] = no_tile_filter && pps->ctb_to_col_bd[rx] == rx; + lf_edge[LEFT] |= no_subpic_filter && rsps->sps_subpic_ctu_top_left_x[subpic_idx] == rx; + lf_edge[LEFT] |= is_virtual_boundary(fc, rx << sps->ctb_log2_size_y, 1); + vert_edge[0] = !sao_can_cross_slices(fc, rx, ry, -1, 0) || lf_edge[LEFT]; + } + if (!edges[RIGHT]) { + lf_edge[RIGHT] = no_tile_filter && pps->ctb_to_col_bd[rx] != pps->ctb_to_col_bd[rx + 1]; + lf_edge[RIGHT] |= no_subpic_filter && rsps->sps_subpic_ctu_top_left_x[subpic_idx] + rsps->sps_subpic_width_minus1[subpic_idx] == rx; + lf_edge[RIGHT] |= is_virtual_boundary(fc, (rx + 1) << sps->ctb_log2_size_y, 1); + vert_edge[1] = !sao_can_cross_slices(fc, rx, ry, 1, 0) || lf_edge[RIGHT]; + } + if (!edges[TOP]) { + lf_edge[TOP] = no_tile_filter && pps->ctb_to_row_bd[ry] == ry; + lf_edge[TOP] |= no_subpic_filter && rsps->sps_subpic_ctu_top_left_y[subpic_idx] == ry; + lf_edge[TOP] |= is_virtual_boundary(fc, ry << sps->ctb_log2_size_y, 0); + horiz_edge[0] = !sao_can_cross_slices(fc, rx, ry, 0, -1) || lf_edge[TOP]; + } + if (!edges[BOTTOM]) { + lf_edge[BOTTOM] = no_tile_filter && pps->ctb_to_row_bd[ry] != pps->ctb_to_row_bd[ry + 1]; + lf_edge[BOTTOM] |= no_subpic_filter && rsps->sps_subpic_ctu_top_left_y[subpic_idx] + rsps->sps_subpic_height_minus1[subpic_idx] == ry; + lf_edge[BOTTOM] |= is_virtual_boundary(fc, (ry + 1) << sps->ctb_log2_size_y, 0); + horiz_edge[1] = !sao_can_cross_slices(fc, rx, ry, 0, 1) || lf_edge[BOTTOM]; + } + + if (!edges[LEFT] && !edges[TOP]) + diag_edge[0] = !sao_can_cross_slices(fc, rx, ry, -1, -1) || lf_edge[LEFT] || lf_edge[TOP]; + + if (!edges[TOP] && !edges[RIGHT]) + diag_edge[1] = !sao_can_cross_slices(fc, rx, ry, 1, -1) || lf_edge[RIGHT] || lf_edge[TOP]; + + if (!edges[RIGHT] && !edges[BOTTOM]) + diag_edge[2] = !sao_can_cross_slices(fc, rx, ry, 1, 1) || lf_edge[RIGHT] || lf_edge[BOTTOM]; + + if (!edges[LEFT] && !edges[BOTTOM]) + diag_edge[3] = !sao_can_cross_slices(fc, rx, ry, -1, 1) || lf_edge[LEFT] || lf_edge[BOTTOM]; +} + +static void sao_copy_hor(uint8_t *dst, const ptrdiff_t dst_stride, + const uint8_t *src, const ptrdiff_t src_stride, const int width, const int edges[4], const int ps) +{ + const int left = 1 - edges[LEFT]; + const int right = 1 - edges[RIGHT]; + int pos = 0; + + src -= left << ps; + dst -= left << ps; + + if (left) { + copy_pixel(dst, src, ps); + pos += (1 << ps); + } + memcpy(dst + pos, src + pos, width << ps); + if (right) { + pos += width << ps; + copy_pixel(dst + pos, src + pos, ps); + } +} + +static void sao_extends_edges(uint8_t *dst, const ptrdiff_t dst_stride, + const uint8_t *src, const ptrdiff_t src_stride, const int width, const int height, + const VVCFrameContext *fc, const int x0, const int y0, const int rx, const int ry, const int edges[4], const int c_idx) +{ + const uint8_t *sao_h = fc->tab.sao_pixel_buffer_h[c_idx]; + const uint8_t *sao_v = fc->tab.sao_pixel_buffer_v[c_idx]; + const int x = x0 >> fc->ps.sps->hshift[c_idx]; + const int y = y0 >> fc->ps.sps->vshift[c_idx]; + const int w = fc->ps.pps->width >> fc->ps.sps->hshift[c_idx]; + const int h = fc->ps.pps->height >> fc->ps.sps->vshift[c_idx]; + const int ps = fc->ps.sps->pixel_shift; + + if (!edges[TOP]) + sao_copy_hor(dst - dst_stride, dst_stride, sao_h + (((2 * ry - 1) * w + x) << ps), src_stride, width, edges, ps); + + if (!edges[BOTTOM]) + sao_copy_hor(dst + height * dst_stride, dst_stride, sao_h + (((2 * ry + 2) * w + x) << ps), src_stride, width, edges, ps); + + if (!edges[LEFT]) + copy_vert(dst - (1 << ps), sao_v + (((2 * rx - 1) * h + y) << ps), ps, height, dst_stride, 1 << ps); + + if (!edges[RIGHT]) + copy_vert(dst + (width << ps), sao_v + (((2 * rx + 2) * h + y) << ps), ps, height, dst_stride, 1 << ps); + + copy_ctb(dst, src, width << ps, height, dst_stride, src_stride); +} + +static void sao_restore_vb(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src, ptrdiff_t src_stride, + const int width, const int height, const int vb_pos, const int ps, const int vertical) +{ + int w = 2; + int h = (vertical ? height : width); + int dx = vb_pos - 1; + int dy = 0; + + if (!vertical) { + FFSWAP(int, w, h); + FFSWAP(int, dx, dy); + } + dst += dy * dst_stride +(dx << ps); + src += dy * src_stride +(dx << ps); + + av_image_copy_plane(dst, dst_stride, src, src_stride, w << ps, h); +} + +void ff_vvc_sao_filter(VVCLocalContext *lc, int x0, int y0) { VVCFrameContext *fc = lc->fc; - const int ctb_size_y = fc->ps.sps->ctb_size_y; - static const uint8_t sao_tab[16] = { 0, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8 }; - int c_idx; - const int rx = x >> fc->ps.sps->ctb_log2_size_y; - const int ry = y >> fc->ps.sps->ctb_log2_size_y; - int edges[4] = { !rx, !ry, rx == fc->ps.pps->ctb_width - 1, ry == fc->ps.pps->ctb_height - 1 }; + const VVCSPS *sps = fc->ps.sps; + const int rx = x0 >> sps->ctb_log2_size_y; + const int ry = y0 >> sps->ctb_log2_size_y; + const int edges[4] = { !rx, !ry, rx == fc->ps.pps->ctb_width - 1, ry == fc->ps.pps->ctb_height - 1 }; const SAOParams *sao = &CTB(fc->tab.sao, rx, ry); // flags indicating unfilterable edges - uint8_t vert_edge[] = { 0, 0 }; - uint8_t horiz_edge[] = { 0, 0 }; - uint8_t diag_edge[] = { 0, 0, 0, 0 }; - uint8_t tile_edge[] = { 0, 0, 0, 0 }; - uint8_t subpic_edge[] = { 0, 0, 0, 0 }; - const int subpic_idx = lc->sc->sh.r->curr_subpic_idx; - const uint8_t lfase = fc->ps.pps->r->pps_loop_filter_across_slices_enabled_flag; - const uint8_t no_tile_filter = fc->ps.pps->r->num_tiles_in_pic > 1 && - !fc->ps.pps->r->pps_loop_filter_across_tiles_enabled_flag; - const uint8_t no_subpic_filter = fc->ps.sps->r->sps_num_subpics_minus1 && - !fc->ps.sps->r->sps_loop_filter_across_subpic_enabled_flag[subpic_idx]; - const uint8_t restore = no_subpic_filter || no_tile_filter || !lfase; - - if (restore) { - if (!edges[LEFT]) { - tile_edge[LEFT] = no_tile_filter && fc->ps.pps->ctb_to_col_bd[rx] == rx; - subpic_edge[LEFT] = no_subpic_filter && fc->ps.sps->r->sps_subpic_ctu_top_left_x[subpic_idx] == rx; - vert_edge[0] = (!lfase && CTB(fc->tab.slice_idx, rx, ry) != CTB(fc->tab.slice_idx, rx - 1, ry)) || tile_edge[LEFT] || subpic_edge[LEFT]; - } - if (!edges[RIGHT]) { - tile_edge[RIGHT] = no_tile_filter && fc->ps.pps->ctb_to_col_bd[rx] != fc->ps.pps->ctb_to_col_bd[rx + 1]; - subpic_edge[RIGHT] = no_subpic_filter && - fc->ps.sps->r->sps_subpic_ctu_top_left_x[subpic_idx] + fc->ps.sps->r->sps_subpic_width_minus1[subpic_idx] == rx; - vert_edge[1] = (!lfase && CTB(fc->tab.slice_idx, rx, ry) != CTB(fc->tab.slice_idx, rx + 1, ry)) || tile_edge[RIGHT] || subpic_edge[RIGHT]; - } - if (!edges[TOP]) { - tile_edge[TOP] = no_tile_filter && fc->ps.pps->ctb_to_row_bd[ry] == ry; - subpic_edge[TOP] = no_subpic_filter && fc->ps.sps->r->sps_subpic_ctu_top_left_y[subpic_idx] == ry; - horiz_edge[0] = (!lfase && CTB(fc->tab.slice_idx, rx, ry) != CTB(fc->tab.slice_idx, rx, ry - 1)) || tile_edge[TOP] || subpic_edge[TOP]; - } - if (!edges[BOTTOM]) { - tile_edge[BOTTOM] = no_tile_filter && fc->ps.pps->ctb_to_row_bd[ry] != fc->ps.pps->ctb_to_row_bd[ry + 1]; - subpic_edge[BOTTOM] = no_subpic_filter && - fc->ps.sps->r->sps_subpic_ctu_top_left_y[subpic_idx] + fc->ps.sps->r->sps_subpic_height_minus1[subpic_idx] == ry; - horiz_edge[1] = (!lfase && CTB(fc->tab.slice_idx, rx, ry) != CTB(fc->tab.slice_idx, rx, ry + 1)) || tile_edge[BOTTOM] || subpic_edge[BOTTOM]; - } - if (!edges[LEFT] && !edges[TOP]) { - diag_edge[0] = (!lfase && CTB(fc->tab.slice_idx, rx, ry) != CTB(fc->tab.slice_idx, rx - 1, ry - 1)) || - tile_edge[LEFT] || tile_edge[TOP] || subpic_edge[LEFT] || subpic_edge[TOP]; - } - if (!edges[TOP] && !edges[RIGHT]) { - diag_edge[1] = (!lfase && CTB(fc->tab.slice_idx, rx, ry) != CTB(fc->tab.slice_idx, rx + 1, ry - 1)) || - tile_edge[RIGHT] || tile_edge[TOP] || subpic_edge[TOP] || subpic_edge[RIGHT]; - } - if (!edges[RIGHT] && !edges[BOTTOM]) { - diag_edge[2] = (!lfase && CTB(fc->tab.slice_idx, rx, ry) != CTB(fc->tab.slice_idx, rx + 1, ry + 1)) || - tile_edge[RIGHT] || tile_edge[BOTTOM] || subpic_edge[RIGHT] || subpic_edge[BOTTOM]; - } - if (!edges[LEFT] && !edges[BOTTOM]) { - diag_edge[3] = (!lfase && CTB(fc->tab.slice_idx, rx, ry) != CTB(fc->tab.slice_idx, rx - 1, ry + 1)) || - tile_edge[LEFT] || tile_edge[BOTTOM] || subpic_edge[LEFT] || subpic_edge[BOTTOM]; - } + uint8_t vert_edge[] = { 0, 0 }; + uint8_t horiz_edge[] = { 0, 0 }; + uint8_t diag_edge[] = { 0, 0, 0, 0 }; + int restore, vb_x = 0, vb_y = 0;; + + if (sps->r->sps_virtual_boundaries_enabled_flag) { + vb_x = get_virtual_boundary(fc, rx, 1); + vb_y = get_virtual_boundary(fc, ry, 0); } - for (c_idx = 0; c_idx < (fc->ps.sps->r->sps_chroma_format_idc ? 3 : 1); c_idx++) { - int x0 = x >> fc->ps.sps->hshift[c_idx]; - int y0 = y >> fc->ps.sps->vshift[c_idx]; - ptrdiff_t src_stride = fc->frame->linesize[c_idx]; - int ctb_size_h = ctb_size_y >> fc->ps.sps->hshift[c_idx]; - int ctb_size_v = ctb_size_y >> fc->ps.sps->vshift[c_idx]; - int width = FFMIN(ctb_size_h, (fc->ps.pps->width >> fc->ps.sps->hshift[c_idx]) - x0); - int height = FFMIN(ctb_size_v, (fc->ps.pps->height >> fc->ps.sps->vshift[c_idx]) - y0); - int tab = sao_tab[(FFALIGN(width, 8) >> 3) - 1]; - uint8_t *src = &fc->frame->data[c_idx][y0 * src_stride + (x0 << fc->ps.sps->pixel_shift)]; - ptrdiff_t dst_stride; - uint8_t *dst; + sao_get_edges(vert_edge, horiz_edge, diag_edge, &restore, lc, edges, rx, ry); + + for (int c_idx = 0; c_idx < (sps->r->sps_chroma_format_idc ? 3 : 1); c_idx++) { + static const uint8_t sao_tab[16] = { 0, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8 }; + const ptrdiff_t src_stride = fc->frame->linesize[c_idx]; + uint8_t *src = POS(c_idx, x0, y0); + const int hs = sps->hshift[c_idx]; + const int vs = sps->vshift[c_idx]; + const int ps = sps->pixel_shift; + const int width = FFMIN(sps->ctb_size_y, fc->ps.pps->width - x0) >> hs; + const int height = FFMIN(sps->ctb_size_y, fc->ps.pps->height - y0) >> vs; + const int tab = sao_tab[(FFALIGN(width, 8) >> 3) - 1]; + const int sao_eo_class = sao->eo_class[c_idx]; switch (sao->type_idx[c_idx]) { - case SAO_BAND: - fc->vvcdsp.sao.band_filter[tab](src, src, src_stride, src_stride, - sao->offset_val[c_idx], sao->band_position[c_idx], width, height); - break; - case SAO_EDGE: - { - const int w = fc->ps.pps->width >> fc->ps.sps->hshift[c_idx]; - const int h = fc->ps.pps->height >> fc->ps.sps->vshift[c_idx]; - const int sh = fc->ps.sps->pixel_shift; - - dst_stride = 2*MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE; - dst = lc->sao_buffer + dst_stride + AV_INPUT_BUFFER_PADDING_SIZE; - - if (!edges[TOP]) { - const int left = 1 - edges[LEFT]; - const int right = 1 - edges[RIGHT]; - const uint8_t *src1; - uint8_t *dst1; - int pos = 0; - - dst1 = dst - dst_stride - (left << sh); - src1 = fc->tab.sao_pixel_buffer_h[c_idx] + (((2 * ry - 1) * w + x0 - left) << sh); - if (left) { - copy_pixel(dst1, src1, sh); - pos += (1 << sh); - } - memcpy(dst1 + pos, src1 + pos, width << sh); - if (right) { - pos += width << sh; - copy_pixel(dst1 + pos, src1 + pos, sh); - } + case SAO_BAND: + fc->vvcdsp.sao.band_filter[tab](src, src, src_stride, src_stride, + sao->offset_val[c_idx], sao->band_position[c_idx], width, height); + break; + case SAO_EDGE: + { + const ptrdiff_t dst_stride = 2 * MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE; + uint8_t *dst = lc->sao_buffer + dst_stride + AV_INPUT_BUFFER_PADDING_SIZE; + + sao_extends_edges(dst, dst_stride, src, src_stride, width, height, fc, x0, y0, rx, ry, edges, c_idx); + + fc->vvcdsp.sao.edge_filter[tab](src, dst, src_stride, sao->offset_val[c_idx], + sao->eo_class[c_idx], width, height); + fc->vvcdsp.sao.edge_restore[restore](src, dst, src_stride, dst_stride, + sao, edges, width, height, c_idx, vert_edge, horiz_edge, diag_edge); + + if (vb_x > x0 && sao_eo_class != SAO_EO_VERT) + sao_restore_vb(src, src_stride, dst, dst_stride, width, height, (vb_x - x0) >> hs, ps, 1); + if (vb_y > y0 && sao_eo_class != SAO_EO_HORIZ) + sao_restore_vb(src, src_stride, dst, dst_stride, width, height, (vb_y - y0) >> vs, ps, 0); + + break; } - if (!edges[BOTTOM]) { - const int left = 1 - edges[LEFT]; - const int right = 1 - edges[RIGHT]; - const uint8_t *src1; - uint8_t *dst1; - int pos = 0; - - dst1 = dst + height * dst_stride - (left << sh); - src1 = fc->tab.sao_pixel_buffer_h[c_idx] + (((2 * ry + 2) * w + x0 - left) << sh); - if (left) { - copy_pixel(dst1, src1, sh); - pos += (1 << sh); - } - memcpy(dst1 + pos, src1 + pos, width << sh); - if (right) { - pos += width << sh; - copy_pixel(dst1 + pos, src1 + pos, sh); - } - } - if (!edges[LEFT]) { - copy_vert(dst - (1 << sh), - fc->tab.sao_pixel_buffer_v[c_idx] + (((2 * rx - 1) * h + y0) << sh), - sh, height, dst_stride, 1 << sh); - } - if (!edges[RIGHT]) { - copy_vert(dst + (width << sh), - fc->tab.sao_pixel_buffer_v[c_idx] + (((2 * rx + 2) * h + y0) << sh), - sh, height, dst_stride, 1 << sh); - } - - copy_ctb(dst, src, width << sh, height, dst_stride, src_stride); - fc->vvcdsp.sao.edge_filter[tab](src, dst, src_stride, sao->offset_val[c_idx], - sao->eo_class[c_idx], width, height); - fc->vvcdsp.sao.edge_restore[restore](src, dst, src_stride, dst_stride, - sao, edges, width, height, c_idx, vert_edge, horiz_edge, diag_edge); - break; - } } } } @@ -406,30 +475,41 @@ static void derive_max_filter_length_luma(const VVCFrameContext *fc, const int q *max_len_p = FFMIN(5, *max_len_p); } -static void vvc_deblock_subblock_bs_vertical(const VVCLocalContext *lc, - const int cb_x, const int cb_y, const int x0, const int y0, const int width, const int height) +static void vvc_deblock_subblock_bs(const VVCLocalContext *lc, + const int cb, int x0, int y0, int width, int height, const int vertical) { const VVCFrameContext *fc = lc->fc; const MvField *tab_mvf = fc->tab.mvf; const RefPicList *rpl = lc->sc->rpl; - const int min_pu_width = fc->ps.pps->min_pu_width; + int stridea = fc->ps.pps->min_pu_width; + int strideb = 1; const int log2_min_pu_size = MIN_PU_LOG2; + if (!vertical) { + FFSWAP(int, x0, y0); + FFSWAP(int, width, height); + FFSWAP(int, stridea, strideb); + } + // bs for TU internal vertical PU boundaries - for (int j = 0; j < height; j += 4) { - const int y_pu = (y0 + j) >> log2_min_pu_size; - - for (int i = 8 - ((x0 - cb_x) % 8); i < width; i += 8) { - const int xp_pu = (x0 + i - 1) >> log2_min_pu_size; - const int xq_pu = (x0 + i) >> log2_min_pu_size; - const MvField *left = &tab_mvf[y_pu * min_pu_width + xp_pu]; - const MvField *curr = &tab_mvf[y_pu * min_pu_width + xq_pu]; - const int x = x0 + i; - const int y = y0 + j; - const int bs = boundary_strength(lc, curr, left, rpl); + for (int i = 8 - ((x0 - cb) % 8); i < width; i += 8) { + const int is_vb = is_virtual_boundary(fc, x0 + i, vertical); + const int xp_pu = (x0 + i - 1) >> log2_min_pu_size; + const int xq_pu = (x0 + i) >> log2_min_pu_size; + + for (int j = 0; j < height; j += 4) { + const int y_pu = (y0 + j) >> log2_min_pu_size; + const MvField *mvf_p = &tab_mvf[y_pu * stridea + xp_pu * strideb]; + const MvField *mvf_q = &tab_mvf[y_pu * stridea + xq_pu * strideb]; + const int bs = is_vb ? 0 : boundary_strength(lc, mvf_q, mvf_p, rpl); + int x = x0 + i; + int y = y0 + j; uint8_t max_len_p = 0, max_len_q = 0; - TAB_BS(fc->tab.vertical_bs[LUMA], x, y) = bs; + if (!vertical) + FFSWAP(int, x, y); + + TAB_BS(fc->tab.bs[vertical][LUMA], x, y) = bs; if (i == 4 || i == width - 4) max_len_p = max_len_q = 1; @@ -438,48 +518,8 @@ static void vvc_deblock_subblock_bs_vertical(const VVCLocalContext *lc, else max_len_p = max_len_q = 3; - TAB_MAX_LEN(fc->tab.vertical_p, x, y) = max_len_p; - TAB_MAX_LEN(fc->tab.vertical_q, x, y) = max_len_q; - } - } -} - -static void vvc_deblock_subblock_bs_horizontal(const VVCLocalContext *lc, - const int cb_x, const int cb_y, const int x0, const int y0, const int width, const int height) -{ - const VVCFrameContext *fc = lc->fc; - const MvField* tab_mvf = fc->tab.mvf; - const RefPicList* rpl = lc->sc->rpl; - const int min_pu_width = fc->ps.pps->min_pu_width; - const int log2_min_pu_size = MIN_PU_LOG2; - - // bs for TU internal horizontal PU boundaries - for (int j = 8 - ((y0 - cb_y) % 8); j < height; j += 8) { - int yp_pu = (y0 + j - 1) >> log2_min_pu_size; - int yq_pu = (y0 + j) >> log2_min_pu_size; - - for (int i = 0; i < width; i += 4) { - const int x_pu = (x0 + i) >> log2_min_pu_size; - const MvField *top = &tab_mvf[yp_pu * min_pu_width + x_pu]; - const MvField *curr = &tab_mvf[yq_pu * min_pu_width + x_pu]; - const int x = x0 + i; - const int y = y0 + j; - const int bs = boundary_strength(lc, curr, top, rpl); - uint8_t max_len_p = 0, max_len_q = 0; - - TAB_BS(fc->tab.horizontal_bs[LUMA], x, y) = bs; - - //fixme: - //edgeTbFlags[ x − sbW ][ y ] is equal to 1 - //edgeTbFlags[ x + sbW ][ y ] is equal to 1 - if (j == 4 || j == height - 4) - max_len_p = max_len_q = 1; - else if (j == 8 || j == height - 8) - max_len_p = max_len_q = 2; - else - max_len_p = max_len_q = 3; - TAB_MAX_LEN(fc->tab.horizontal_p, x, y) = max_len_p; - TAB_MAX_LEN(fc->tab.horizontal_q, x, y) = max_len_q; + TAB_MAX_LEN(fc->tab.max_len_p[vertical], x, y) = max_len_p; + TAB_MAX_LEN(fc->tab.max_len_q[vertical], x, y) = max_len_q; } } } @@ -565,142 +605,78 @@ static int deblock_is_boundary(const VVCLocalContext *lc, const int boundary, return boundary; } -static void vvc_deblock_bs_luma_vertical(const VVCLocalContext *lc, - const int x0, const int y0, const int width, const int height, const int rs) +static void vvc_deblock_bs_luma(const VVCLocalContext *lc, + const int x0, const int y0, const int width, const int height, const int rs, const int vertical) { const VVCFrameContext *fc = lc->fc; const MvField *tab_mvf = fc->tab.mvf; + const int mask = LUMA_GRID - 1; const int log2_min_pu_size = MIN_PU_LOG2; const int min_pu_width = fc->ps.pps->min_pu_width; const int min_cb_log2 = fc->ps.sps->min_cb_log2_size_y; const int min_cb_width = fc->ps.pps->min_cb_width; - const int is_intra = tab_mvf[(y0 >> log2_min_pu_size) * min_pu_width + - (x0 >> log2_min_pu_size)].pred_flag == PF_INTRA; - int boundary_left; - int has_vertical_sb = 0; - + const int pos = vertical ? x0 : y0; const int off_q = (y0 >> min_cb_log2) * min_cb_width + (x0 >> min_cb_log2); - const int cb_x = fc->tab.cb_pos_x[LUMA][off_q]; - const int cb_y = fc->tab.cb_pos_y[LUMA][off_q]; - const int cb_width = fc->tab.cb_width[LUMA][off_q]; - const int off_x = cb_x - x0; - - if (!is_intra) { - if (fc->tab.msf[off_q] || fc->tab.iaf[off_q]) - has_vertical_sb = cb_width > 8; - } - - // bs for vertical TU boundaries - boundary_left = deblock_is_boundary(lc, x0 > 0 && !(x0 & 3), x0, rs, 1); - - if (boundary_left) { - const RefPicList *rpl_left = - (lc->boundary_flags & BOUNDARY_LEFT_SLICE) ? ff_vvc_get_ref_list(fc, fc->ref, x0 - 1, y0) : lc->sc->rpl; - for (int i = 0; i < height; i += 4) { - uint8_t max_len_p, max_len_q; - const int bs = deblock_bs(lc, x0 - 1, y0 + i, x0, y0 + i, rpl_left, 0, off_x, has_vertical_sb); - - TAB_BS(fc->tab.vertical_bs[LUMA], x0, (y0 + i)) = bs; - - derive_max_filter_length_luma(fc, x0, y0 + i, is_intra, has_vertical_sb, 1, &max_len_p, &max_len_q); - TAB_MAX_LEN(fc->tab.vertical_p, x0, y0 + i) = max_len_p; - TAB_MAX_LEN(fc->tab.vertical_q, x0, y0 + i) = max_len_q; - } - } - - if (!is_intra) { - if (fc->tab.msf[off_q] || fc->tab.iaf[off_q]) - vvc_deblock_subblock_bs_vertical(lc, cb_x, cb_y, x0, y0, width, height); - } -} - -static void vvc_deblock_bs_luma_horizontal(const VVCLocalContext *lc, - const int x0, const int y0, const int width, const int height, const int rs) -{ - const VVCFrameContext *fc = lc->fc; - const MvField *tab_mvf = fc->tab.mvf; - const int log2_min_pu_size = MIN_PU_LOG2; - const int min_pu_width = fc->ps.pps->min_pu_width; - const int min_cb_log2 = fc->ps.sps->min_cb_log2_size_y; - const int min_cb_width = fc->ps.pps->min_cb_width; - const int is_intra = tab_mvf[(y0 >> log2_min_pu_size) * min_pu_width + - (x0 >> log2_min_pu_size)].pred_flag == PF_INTRA; - int boundary_upper; - int has_horizontal_sb = 0; - - const int off_q = (y0 >> min_cb_log2) * min_cb_width + (x0 >> min_cb_log2); - const int cb_x = fc->tab.cb_pos_x[LUMA][off_q]; - const int cb_y = fc->tab.cb_pos_y[LUMA][off_q]; - const int cb_height = fc->tab.cb_height[LUMA][off_q]; - const int off_y = y0 - cb_y; - - if (!is_intra) { - if (fc->tab.msf[off_q] || fc->tab.iaf[off_q]) - has_horizontal_sb = cb_height > 8; - } - - boundary_upper = deblock_is_boundary(lc, y0 > 0 && !(y0 & 3), y0, rs, 0); - - if (boundary_upper) { - const RefPicList *rpl_top = - (lc->boundary_flags & BOUNDARY_UPPER_SLICE) ? ff_vvc_get_ref_list(fc, fc->ref, x0, y0 - 1) : lc->sc->rpl; - - for (int i = 0; i < width; i += 4) { + const int cb = (vertical ? fc->tab.cb_pos_x : fc->tab.cb_pos_y )[LUMA][off_q]; + const int is_intra = tab_mvf[(y0 >> log2_min_pu_size) * min_pu_width + + (x0 >> log2_min_pu_size)].pred_flag == PF_INTRA; + + if (deblock_is_boundary(lc, pos > 0 && !(pos & mask), pos, rs, vertical)) { + const int is_vb = is_virtual_boundary(fc, pos, vertical); + const int size = vertical ? height : width; + const int off = cb - pos; + const int cb_size = (vertical ? fc->tab.cb_width : fc->tab.cb_height)[LUMA][off_q]; + const int has_sb = !is_intra && (fc->tab.msf[off_q] || fc->tab.iaf[off_q]) && cb_size > 8; + const int flag = vertical ? BOUNDARY_LEFT_SLICE : BOUNDARY_UPPER_SLICE; + const RefPicList *rpl_p = + (lc->boundary_flags & flag) ? ff_vvc_get_ref_list(fc, fc->ref, x0 - vertical, y0 - !vertical) : lc->sc->rpl; + + for (int i = 0; i < size; i += 4) { + const int x = x0 + i * !vertical; + const int y = y0 + i * vertical; uint8_t max_len_p, max_len_q; - const int bs = deblock_bs(lc, x0 + i, y0 - 1, x0 + i, y0, rpl_top, 0, off_y, has_horizontal_sb); + const int bs = is_vb ? 0 : deblock_bs(lc, x - vertical, y - !vertical, x, y, rpl_p, LUMA, off, has_sb); - TAB_BS(fc->tab.horizontal_bs[LUMA], x0 + i, y0) = bs; + TAB_BS(fc->tab.bs[vertical][LUMA], x, y) = bs; - derive_max_filter_length_luma(fc, x0 + i, y0, is_intra, has_horizontal_sb, 0, &max_len_p, &max_len_q); - TAB_MAX_LEN(fc->tab.horizontal_p, x0 + i, y0) = max_len_p; - TAB_MAX_LEN(fc->tab.horizontal_q, x0 + i, y0) = max_len_q; + derive_max_filter_length_luma(fc, x, y, is_intra, has_sb, vertical, &max_len_p, &max_len_q); + TAB_MAX_LEN(fc->tab.max_len_p[vertical], x, y) = max_len_p; + TAB_MAX_LEN(fc->tab.max_len_q[vertical], x, y) = max_len_q; } } if (!is_intra) { if (fc->tab.msf[off_q] || fc->tab.iaf[off_q]) - vvc_deblock_subblock_bs_horizontal(lc, cb_x, cb_y, x0, y0, width, height); + vvc_deblock_subblock_bs(lc, cb, x0, y0, width, height, vertical); } } -static void vvc_deblock_bs_chroma_vertical(const VVCLocalContext *lc, - const int x0, const int y0, const int width, const int height, const int rs) +static void vvc_deblock_bs_chroma(const VVCLocalContext *lc, + const int x0, const int y0, const int width, const int height, const int rs, const int vertical) { const VVCFrameContext *fc = lc->fc; - const int boundary_left = deblock_is_boundary(lc, - x0 > 0 && !(x0 & ((CHROMA_GRID << fc->ps.sps->hshift[CHROMA]) - 1)), x0, rs, 1); + const int shift = (vertical ? fc->ps.sps->hshift : fc->ps.sps->vshift)[CHROMA]; + const int mask = (CHROMA_GRID << shift) - 1; + const int pos = vertical ? x0 : y0; - if (boundary_left) { - for (int i = 0; i < height; i += 2) { - for (int c_idx = CB; c_idx <= CR; c_idx++) { - const int bs = deblock_bs(lc, x0 - 1, y0 + i, x0, y0 + i, NULL, c_idx, 0, 0); + if (deblock_is_boundary(lc, pos > 0 && !(pos & mask), pos, rs, vertical)) { + const int is_vb = is_virtual_boundary(fc, pos, vertical); + const int size = vertical ? height : width; - TAB_BS(fc->tab.vertical_bs[c_idx], x0, (y0 + i)) = bs; - } - } - } -} + for (int c_idx = CB; c_idx <= CR; c_idx++) { + for (int i = 0; i < size; i += 2) { + const int x = x0 + i * !vertical; + const int y = y0 + i * vertical; + const int bs = is_vb ? 0 : deblock_bs(lc, x - vertical, y - !vertical, x, y, NULL, c_idx, 0, 0); -static void vvc_deblock_bs_chroma_horizontal(const VVCLocalContext *lc, - const int x0, const int y0, const int width, const int height, const int rs) -{ - const VVCFrameContext *fc = lc->fc; - const int boundary_upper = deblock_is_boundary(lc, - y0 > 0 && !(y0 & ((CHROMA_GRID << fc->ps.sps->vshift[CHROMA]) - 1)), y0, rs, 0); - - if (boundary_upper) { - for (int i = 0; i < width; i += 2) { - for (int c_idx = CB; c_idx <= CR; c_idx++) { - const int bs = deblock_bs(lc, x0 + i, y0 - 1, x0 + i, y0, NULL, c_idx, 0, 0); - - TAB_BS(fc->tab.horizontal_bs[c_idx], x0 + i, y0) = bs; + TAB_BS(fc->tab.bs[vertical][c_idx], x, y) = bs; } } } } typedef void (*deblock_bs_fn)(const VVCLocalContext *lc, const int x0, const int y0, - const int width, const int height, const int rs); + const int width, const int height, const int rs, const int vertical); static void vvc_deblock_bs(const VVCLocalContext *lc, const int x0, const int y0, const int rs, const int vertical) { @@ -710,9 +686,8 @@ static void vvc_deblock_bs(const VVCLocalContext *lc, const int x0, const int y0 const int ctb_size = sps->ctb_size_y; const int x_end = FFMIN(x0 + ctb_size, pps->width) >> MIN_TU_LOG2; const int y_end = FFMIN(y0 + ctb_size, pps->height) >> MIN_TU_LOG2; - deblock_bs_fn deblock_bs[2][2] = { - { vvc_deblock_bs_luma_horizontal, vvc_deblock_bs_chroma_horizontal }, - { vvc_deblock_bs_luma_vertical, vvc_deblock_bs_chroma_vertical } + deblock_bs_fn deblock_bs[] = { + vvc_deblock_bs_luma, vvc_deblock_bs_chroma }; for (int is_chroma = 0; is_chroma <= 1; is_chroma++) { @@ -722,8 +697,8 @@ static void vvc_deblock_bs(const VVCLocalContext *lc, const int x0, const int y0 for (int x = x0 >> MIN_TU_LOG2; x < x_end; x++) { const int off = y * fc->ps.pps->min_tu_width + x; if ((fc->tab.tb_pos_x0[is_chroma][off] >> MIN_TU_LOG2) == x && (fc->tab.tb_pos_y0[is_chroma][off] >> MIN_TU_LOG2) == y) { - deblock_bs[vertical][is_chroma](lc, x << MIN_TU_LOG2, y << MIN_TU_LOG2, - fc->tab.tb_width[is_chroma][off] << hs, fc->tab.tb_height[is_chroma][off] << vs, rs); + deblock_bs[is_chroma](lc, x << MIN_TU_LOG2, y << MIN_TU_LOG2, + fc->tab.tb_width[is_chroma][off] << hs, fc->tab.tb_height[is_chroma][off] << vs, rs, vertical); } } } @@ -734,10 +709,8 @@ static void vvc_deblock_bs(const VVCLocalContext *lc, const int x0, const int y0 static void max_filter_length_luma(const VVCFrameContext *fc, const int qx, const int qy, const int vertical, uint8_t *max_len_p, uint8_t *max_len_q) { - const uint8_t *tab_len_p = vertical ? fc->tab.vertical_p : fc->tab.horizontal_p; - const uint8_t *tab_len_q = vertical ? fc->tab.vertical_q : fc->tab.horizontal_q; - *max_len_p = TAB_MAX_LEN(tab_len_p, qx, qy); - *max_len_q = TAB_MAX_LEN(tab_len_q, qx, qy); + *max_len_p = TAB_MAX_LEN(fc->tab.max_len_p[vertical], qx, qy); + *max_len_q = TAB_MAX_LEN(fc->tab.max_len_q[vertical], qx, qy); } //part of 8.8.3.3 Derivation process of transform block boundary @@ -807,144 +780,79 @@ static int get_qp(const VVCFrameContext *fc, const uint8_t *src, const int x, co return get_qp_c(fc, x, y, c_idx, vertical); } -void ff_vvc_deblock_vertical(const VVCLocalContext *lc, const int x0, const int y0, const int rs) +static void vvc_deblock(const VVCLocalContext *lc, int x0, int y0, const int rs, const int vertical) { - VVCFrameContext *fc = lc->fc; - const VVCSPS *sps = fc->ps.sps; - const int c_end = sps->r->sps_chroma_format_idc ? VVC_MAX_SAMPLE_ARRAYS : 1; - uint8_t *src; - int x, y, qp; + VVCFrameContext *fc = lc->fc; + const VVCSPS *sps = fc->ps.sps; + const int c_end = sps->r->sps_chroma_format_idc ? VVC_MAX_SAMPLE_ARRAYS : 1; + const int ctb_size = fc->ps.sps->ctb_size_y; + const DBParams *params = fc->tab.deblock + rs; + int x_end = FFMIN(x0 + ctb_size, fc->ps.pps->width); + int y_end = FFMIN(y0 + ctb_size, fc->ps.pps->height); //not use this yet, may needed by plt. - const uint8_t no_p[4] = { 0 }; - const uint8_t no_q[4] = { 0 } ; + const uint8_t no_p[4] = { 0 }; + const uint8_t no_q[4] = { 0 } ; - const int ctb_log2_size_y = fc->ps.sps->ctb_log2_size_y; - int x_end, y_end; - const int ctb_size = 1 << ctb_log2_size_y; - const DBParams *params = fc->tab.deblock + rs; - - vvc_deblock_bs(lc, x0, y0, rs, 1); + vvc_deblock_bs(lc, x0, y0, rs, vertical); - x_end = x0 + ctb_size; - if (x_end > fc->ps.pps->width) - x_end = fc->ps.pps->width; - y_end = y0 + ctb_size; - if (y_end > fc->ps.pps->height) - y_end = fc->ps.pps->height; + if (!vertical) { + FFSWAP(int, x_end, y_end); + FFSWAP(int, x0, y0); + } for (int c_idx = 0; c_idx < c_end; c_idx++) { - const int hs = sps->hshift[c_idx]; - const int vs = sps->vshift[c_idx]; + const int hs = (vertical ? sps->hshift : sps->vshift)[c_idx]; + const int vs = (vertical ? sps->vshift : sps->hshift)[c_idx]; const int grid = c_idx ? (CHROMA_GRID << hs) : LUMA_GRID; const int tc_offset = params->tc_offset[c_idx]; const int beta_offset = params->beta_offset[c_idx]; + const int src_stride = fc->frame->linesize[c_idx]; - for (y = y0; y < y_end; y += (DEBLOCK_STEP << vs)) { - for (x = x0 ? x0 : grid; x < x_end; x += grid) { - int32_t bs[4], beta[4], tc[4], all_zero_bs = 1; + for (int y = y0; y < y_end; y += (DEBLOCK_STEP << vs)) { + for (int x = x0 ? x0 : grid; x < x_end; x += grid) { + const uint8_t horizontal_ctu_edge = !vertical && !(x % ctb_size); + int32_t bs[4], beta[4], tc[4] = { 0 }, all_zero_bs = 1; uint8_t max_len_p[4], max_len_q[4]; for (int i = 0; i < DEBLOCK_STEP >> (2 - vs); i++) { - const int dy = i << 2; - bs[i] = (y + dy < y_end) ? TAB_BS(fc->tab.vertical_bs[c_idx], x, y + dy) : 0; - if (bs[i]) { - src = &fc->frame->data[c_idx][((y + dy) >> vs) * fc->frame->linesize[c_idx] + ((x >> hs) << fc->ps.sps->pixel_shift)]; - qp = get_qp(fc, src, x, y + dy, c_idx, 1); + int tx = x; + int ty = y + (i << 2); + const int end = ty >= y_end; - beta[i] = betatable[av_clip(qp + beta_offset, 0, MAX_QP)]; + if (!vertical) + FFSWAP(int, tx, ty); - max_filter_length(fc, x, y + dy, c_idx, 1, 0, bs[i], &max_len_p[i], &max_len_q[i]); + bs[i] = end ? 0 : TAB_BS(fc->tab.bs[vertical][c_idx], tx, ty); + if (bs[i]) { + const int qp = get_qp(fc, POS(c_idx, tx, ty), tx, ty, c_idx, vertical); + beta[i] = betatable[av_clip(qp + beta_offset, 0, MAX_QP)]; + tc[i] = TC_CALC(qp, bs[i]) ; + max_filter_length(fc, tx, ty, c_idx, vertical, horizontal_ctu_edge, bs[i], &max_len_p[i], &max_len_q[i]); all_zero_bs = 0; } - tc[i] = bs[i] ? TC_CALC(qp, bs[i]) : 0; } if (!all_zero_bs) { - src = &fc->frame->data[c_idx][(y >> vs) * fc->frame->linesize[c_idx] + ((x >> hs) << fc->ps.sps->pixel_shift)]; - if (!c_idx) { - fc->vvcdsp.lf.filter_luma[1](src, fc->frame->linesize[c_idx], - beta, tc, no_p, no_q, max_len_p, max_len_q, 0); - } else { - fc->vvcdsp.lf.filter_chroma[1](src, fc->frame->linesize[c_idx], - beta, tc, no_p, no_q, max_len_p, max_len_q, vs); - } + uint8_t *src = vertical ? POS(c_idx, x, y) : POS(c_idx, y, x); + if (!c_idx) + fc->vvcdsp.lf.filter_luma[vertical](src, src_stride, beta, tc, no_p, no_q, max_len_p, max_len_q, horizontal_ctu_edge); + else + fc->vvcdsp.lf.filter_chroma[vertical](src, src_stride, beta, tc, no_p, no_q, max_len_p, max_len_q, vs); } } } } } -void ff_vvc_deblock_horizontal(const VVCLocalContext *lc, const int x0, const int y0, const int rs) +void ff_vvc_deblock_vertical(const VVCLocalContext *lc, const int x0, const int y0, const int rs) { - VVCFrameContext *fc = lc->fc; - const VVCSPS *sps = fc->ps.sps; - const int c_end = fc->ps.sps->r->sps_chroma_format_idc ? VVC_MAX_SAMPLE_ARRAYS : 1; - uint8_t* src; - int x, y, qp; - - //not use this yet, may needed by plt. - const uint8_t no_p[4] = { 0 }; - const uint8_t no_q[4] = { 0 } ; - - const int ctb_log2_size_y = fc->ps.sps->ctb_log2_size_y; - int x_end, y_end; - const int ctb_size = 1 << ctb_log2_size_y; - const DBParams *params = fc->tab.deblock + rs; - - vvc_deblock_bs(lc, x0, y0, rs, 0); - - x_end = x0 + ctb_size; - if (x_end > fc->ps.pps->width) - x_end = fc->ps.pps->width; - y_end = y0 + ctb_size; - if (y_end > fc->ps.pps->height) - y_end = fc->ps.pps->height; - - for (int c_idx = 0; c_idx < c_end; c_idx++) { - const int hs = sps->hshift[c_idx]; - const int vs = sps->vshift[c_idx]; - const int grid = c_idx ? (CHROMA_GRID << vs) : LUMA_GRID; - const int beta_offset = params->beta_offset[c_idx]; - const int tc_offset = params->tc_offset[c_idx]; - - for (y = y0; y < y_end; y += grid) { - const uint8_t horizontal_ctu_edge = !(y % fc->ps.sps->ctb_size_y); - if (!y) - continue; - - for (x = x0 ? x0: 0; x < x_end; x += (DEBLOCK_STEP << hs)) { - int32_t bs[4], beta[4], tc[4], all_zero_bs = 1; - uint8_t max_len_p[4], max_len_q[4]; - - for (int i = 0; i < DEBLOCK_STEP >> (2 - hs); i++) { - const int dx = i << 2; - - bs[i] = (x + dx < x_end) ? TAB_BS(fc->tab.horizontal_bs[c_idx], x + dx, y) : 0; - if (bs[i]) { - src = &fc->frame->data[c_idx][(y >> vs) * fc->frame->linesize[c_idx] + (((x + dx)>> hs) << fc->ps.sps->pixel_shift)]; - qp = get_qp(fc, src, x + dx, y, c_idx, 0); - - beta[i] = betatable[av_clip(qp + beta_offset, 0, MAX_QP)]; + vvc_deblock(lc, x0, y0, rs, 1); +} - max_filter_length(fc, x + dx, y, c_idx, 0, horizontal_ctu_edge, bs[i], &max_len_p[i], &max_len_q[i]); - all_zero_bs = 0; - } - tc[i] = bs[i] ? TC_CALC(qp, bs[i]) : 0; - } - if (!all_zero_bs) { - src = &fc->frame->data[c_idx][(y >> vs) * fc->frame->linesize[c_idx] + ((x >> hs) << fc->ps.sps->pixel_shift)]; - if (!c_idx) { - fc->vvcdsp.lf.filter_luma[0](src, fc->frame->linesize[c_idx], - beta, tc, no_p, no_q, max_len_p, max_len_q, horizontal_ctu_edge); - } else { - fc->vvcdsp.lf.filter_chroma[0](src, fc->frame->linesize[c_idx], - beta, tc, no_p, no_q, max_len_p, max_len_q, hs); - } - } - } - } - } +void ff_vvc_deblock_horizontal(const VVCLocalContext *lc, const int x0, const int y0, const int rs) +{ + vvc_deblock(lc, x0, y0, rs, 0); } static void alf_copy_border(uint8_t *dst, const uint8_t *src, @@ -1085,7 +993,7 @@ static void alf_prepare_buffer(VVCFrameContext *fc, uint8_t *_dst, const uint8_t #define ALF_MAX_FILTER_SIZE (ALF_MAX_BLOCKS_IN_CTU * ALF_NUM_COEFF_LUMA) static void alf_get_coeff_and_clip(VVCLocalContext *lc, int16_t *coeff, int16_t *clip, - const uint8_t *src, ptrdiff_t src_stride, int width, int height, int vb_pos, ALFParams *alf) + const uint8_t *src, ptrdiff_t src_stride, int width, int height, int vb_pos, const ALFParams *alf) { const VVCFrameContext *fc = lc->fc; const H266RawSliceHeader *rsh = lc->sc->sh.r; @@ -1116,7 +1024,7 @@ static void alf_get_coeff_and_clip(VVCLocalContext *lc, int16_t *coeff, int16_t static void alf_filter_luma(VVCLocalContext *lc, uint8_t *dst, const uint8_t *src, const ptrdiff_t dst_stride, const ptrdiff_t src_stride, const int x0, const int y0, - const int width, const int height, const int _vb_pos, ALFParams *alf) + const int width, const int height, const int _vb_pos, const ALFParams *alf) { const VVCFrameContext *fc = lc->fc; int vb_pos = _vb_pos - y0; @@ -1140,7 +1048,7 @@ static int alf_clip_from_idx(const VVCFrameContext *fc, const int idx) static void alf_filter_chroma(VVCLocalContext *lc, uint8_t *dst, const uint8_t *src, const ptrdiff_t dst_stride, const ptrdiff_t src_stride, const int c_idx, - const int width, const int height, const int vb_pos, ALFParams *alf) + const int width, const int height, const int vb_pos, const ALFParams *alf) { VVCFrameContext *fc = lc->fc; const H266RawSliceHeader *rsh = lc->sc->sh.r; @@ -1157,7 +1065,7 @@ static void alf_filter_chroma(VVCLocalContext *lc, uint8_t *dst, const uint8_t * static void alf_filter_cc(VVCLocalContext *lc, uint8_t *dst, const uint8_t *luma, const ptrdiff_t dst_stride, const ptrdiff_t luma_stride, const int c_idx, - const int width, const int height, const int hs, const int vs, const int vb_pos, ALFParams *alf) + const int width, const int height, const int hs, const int vs, const int vb_pos, const ALFParams *alf) { const VVCFrameContext *fc = lc->fc; const H266RawSliceHeader *rsh = lc->sc->sh.r; @@ -1178,7 +1086,6 @@ void ff_vvc_alf_copy_ctu_to_hv(VVCLocalContext* lc, const int x0, const int y0) const int rx = x0 >> fc->ps.sps->ctb_log2_size_y; const int ry = y0 >> fc->ps.sps->ctb_log2_size_y; const int ctb_size_y = fc->ps.sps->ctb_size_y; - const int ps = fc->ps.sps->pixel_shift; const int c_end = fc->ps.sps->r->sps_chroma_format_idc ? VVC_MAX_SAMPLE_ARRAYS : 1; for (int c_idx = 0; c_idx < c_end; c_idx++) { @@ -1190,85 +1097,143 @@ void ff_vvc_alf_copy_ctu_to_hv(VVCLocalContext* lc, const int x0, const int y0) const int height = FFMIN(fc->ps.pps->height - y0, ctb_size_y) >> vs; const int src_stride = fc->frame->linesize[c_idx]; - uint8_t* src = &fc->frame->data[c_idx][y * src_stride + (x << ps)]; + uint8_t *src = POS(c_idx, x0, y0); alf_copy_ctb_to_hv(fc, src, src_stride, x, y, width, height, rx, ry, c_idx); } } -void ff_vvc_alf_filter(VVCLocalContext *lc, const int x0, const int y0) +static void alf_get_edges(const VVCLocalContext *lc, int edges[MAX_EDGES], const int rx, const int ry) { - VVCFrameContext *fc = lc->fc; - const VVCSPS *sps = fc->ps.sps; - const VVCPPS *pps = fc->ps.pps; - const int rx = x0 >> fc->ps.sps->ctb_log2_size_y; - const int ry = y0 >> fc->ps.sps->ctb_log2_size_y; - const int ctb_size_y = fc->ps.sps->ctb_size_y; - const int ps = fc->ps.sps->pixel_shift; - const int padded_stride = EDGE_EMU_BUFFER_STRIDE << ps; - const int padded_offset = padded_stride * ALF_PADDING_SIZE + (ALF_PADDING_SIZE << ps); - const int c_end = fc->ps.sps->r->sps_chroma_format_idc ? VVC_MAX_SAMPLE_ARRAYS : 1; - const int subpic_idx = lc->sc->sh.r->curr_subpic_idx; - ALFParams *alf = &CTB(fc->tab.alf, rx, ry); - int edges[MAX_EDGES] = { rx == 0, ry == 0, rx == pps->ctb_width - 1, ry == pps->ctb_height - 1 }; + VVCFrameContext *fc = lc->fc; + const VVCSPS *sps = fc->ps.sps; + const VVCPPS *pps = fc->ps.pps; + const int subpic_idx = lc->sc->sh.r->curr_subpic_idx; + + // we can't use |= instead of || in this function; |= is not a shortcut operator if (!pps->r->pps_loop_filter_across_tiles_enabled_flag) { - edges[LEFT] = edges[LEFT] || (lc->boundary_flags & BOUNDARY_LEFT_TILE); - edges[TOP] = edges[TOP] || (lc->boundary_flags & BOUNDARY_UPPER_TILE); - edges[RIGHT] = edges[RIGHT] || pps->ctb_to_col_bd[rx] != pps->ctb_to_col_bd[rx + 1]; + edges[LEFT] = edges[LEFT] || (lc->boundary_flags & BOUNDARY_LEFT_TILE); + edges[TOP] = edges[TOP] || (lc->boundary_flags & BOUNDARY_UPPER_TILE); + edges[RIGHT] = edges[RIGHT] || pps->ctb_to_col_bd[rx] != pps->ctb_to_col_bd[rx + 1]; edges[BOTTOM] = edges[BOTTOM] || pps->ctb_to_row_bd[ry] != pps->ctb_to_row_bd[ry + 1]; } if (!pps->r->pps_loop_filter_across_slices_enabled_flag) { - edges[LEFT] = edges[LEFT] || (lc->boundary_flags & BOUNDARY_LEFT_SLICE); - edges[TOP] = edges[TOP] || (lc->boundary_flags & BOUNDARY_UPPER_SLICE); - edges[RIGHT] = edges[RIGHT] || CTB(fc->tab.slice_idx, rx, ry) != CTB(fc->tab.slice_idx, rx + 1, ry); + edges[LEFT] = edges[LEFT] || (lc->boundary_flags & BOUNDARY_LEFT_SLICE); + edges[TOP] = edges[TOP] || (lc->boundary_flags & BOUNDARY_UPPER_SLICE); + edges[RIGHT] = edges[RIGHT] || CTB(fc->tab.slice_idx, rx, ry) != CTB(fc->tab.slice_idx, rx + 1, ry); edges[BOTTOM] = edges[BOTTOM] || CTB(fc->tab.slice_idx, rx, ry) != CTB(fc->tab.slice_idx, rx, ry + 1); } if (!sps->r->sps_loop_filter_across_subpic_enabled_flag[subpic_idx]) { - edges[LEFT] = edges[LEFT] || (lc->boundary_flags & BOUNDARY_LEFT_SUBPIC); - edges[TOP] = edges[TOP] || (lc->boundary_flags & BOUNDARY_UPPER_SUBPIC); - edges[RIGHT] = edges[RIGHT] || fc->ps.sps->r->sps_subpic_ctu_top_left_x[subpic_idx] + fc->ps.sps->r->sps_subpic_width_minus1[subpic_idx] == rx; + edges[LEFT] = edges[LEFT] || (lc->boundary_flags & BOUNDARY_LEFT_SUBPIC); + edges[TOP] = edges[TOP] || (lc->boundary_flags & BOUNDARY_UPPER_SUBPIC); + edges[RIGHT] = edges[RIGHT] || fc->ps.sps->r->sps_subpic_ctu_top_left_x[subpic_idx] + fc->ps.sps->r->sps_subpic_width_minus1[subpic_idx] == rx; edges[BOTTOM] = edges[BOTTOM] || fc->ps.sps->r->sps_subpic_ctu_top_left_y[subpic_idx] + fc->ps.sps->r->sps_subpic_height_minus1[subpic_idx] == ry; } - for (int c_idx = 0; c_idx < c_end; c_idx++) { - const int hs = fc->ps.sps->hshift[c_idx]; - const int vs = fc->ps.sps->vshift[c_idx]; - const int ctb_size_h = ctb_size_y >> hs; - const int ctb_size_v = ctb_size_y >> vs; - const int x = x0 >> hs; - const int y = y0 >> vs; - const int pic_width = fc->ps.pps->width >> hs; - const int pic_height = fc->ps.pps->height >> vs; - const int width = FFMIN(pic_width - x, ctb_size_h); - const int height = FFMIN(pic_height - y, ctb_size_v); - const int src_stride = fc->frame->linesize[c_idx]; - uint8_t *src = &fc->frame->data[c_idx][y * src_stride + (x << ps)]; - uint8_t *padded; + if (sps->r->sps_virtual_boundaries_enabled_flag) { + edges[LEFT] = edges[LEFT] || is_virtual_boundary(fc, rx << sps->ctb_log2_size_y, 1); + edges[TOP] = edges[TOP] || is_virtual_boundary(fc, ry << sps->ctb_log2_size_y, 0); + edges[RIGHT] = edges[RIGHT] || is_virtual_boundary(fc, (rx + 1) << sps->ctb_log2_size_y, 1); + edges[BOTTOM] = edges[BOTTOM] || is_virtual_boundary(fc, (ry + 1) << sps->ctb_log2_size_y, 0); + } +} - if (alf->ctb_flag[c_idx] || (!c_idx && (alf->ctb_cc_idc[0] || alf->ctb_cc_idc[1]))) { - padded = (c_idx ? lc->alf_buffer_chroma : lc->alf_buffer_luma) + padded_offset; - alf_prepare_buffer(fc, padded, src, x, y, rx, ry, width, height, - padded_stride, src_stride, c_idx, edges); - } - if (alf->ctb_flag[c_idx]) { - if (!c_idx) { - alf_filter_luma(lc, src, padded, src_stride, padded_stride, x, y, - width, height, y + ctb_size_v - ALF_VB_POS_ABOVE_LUMA, alf); - } else { - alf_filter_chroma(lc, src, padded, src_stride, padded_stride, c_idx, - width, height, ctb_size_v - ALF_VB_POS_ABOVE_CHROMA, alf); - } +static void alf_init_subblock(VVCRect *sb, int sb_edges[MAX_EDGES], const VVCRect *b, const int edges[MAX_EDGES]) +{ + *sb = *b; + memcpy(sb_edges, edges, sizeof(int) * MAX_EDGES); +} + +static void alf_get_subblock(VVCRect *sb, int edges[MAX_EDGES], const int bx, const int by, const int vb_pos[2], const int has_vb[2]) +{ + int *pos[] = { &sb->l, &sb->t, &sb->r, &sb->b }; + + for (int vertical = 0; vertical <= 1; vertical++) { + if (has_vb[vertical]) { + const int c = vertical ? (bx ? LEFT : RIGHT) : (by ? TOP : BOTTOM); + *pos[c] = vb_pos[vertical]; + edges[c] = 1; } - if (c_idx && alf->ctb_cc_idc[c_idx - 1]) { - padded = lc->alf_buffer_luma + padded_offset; - alf_filter_cc(lc, src, padded, src_stride, padded_stride, c_idx, - width, height, hs, vs, (ctb_size_v << vs) - ALF_VB_POS_ABOVE_LUMA, alf); + } +} + +static void alf_get_subblocks(const VVCLocalContext *lc, VVCRect sbs[MAX_VBBS], int sb_edges[MAX_VBBS][MAX_EDGES], int *nb_sbs, + const int x0, const int y0, const int rx, const int ry) +{ + VVCFrameContext *fc = lc->fc; + const VVCSPS *sps = fc->ps.sps; + const VVCPPS *pps = fc->ps.pps; + const int ctu_size_y = sps->ctb_size_y; + const int vb_pos[] = { get_virtual_boundary(fc, ry, 0), get_virtual_boundary(fc, rx, 1) }; + const int has_vb[] = { vb_pos[0] > y0, vb_pos[1] > x0 }; + const VVCRect b = { x0, y0, FFMIN(x0 + ctu_size_y, pps->width), FFMIN(y0 + ctu_size_y, pps->height) }; + int edges[MAX_EDGES] = { !rx, !ry, rx == pps->ctb_width - 1, ry == pps->ctb_height - 1 }; + int i = 0; + + alf_get_edges(lc, edges, rx, ry); + + for (int by = 0; by <= has_vb[0]; by++) { + for (int bx = 0; bx <= has_vb[1]; bx++, i++) { + alf_init_subblock(sbs + i, sb_edges[i], &b, edges); + alf_get_subblock(sbs + i, sb_edges[i], bx, by, vb_pos, has_vb); } + } + *nb_sbs = i; +} - alf->applied[c_idx] = 1; +void ff_vvc_alf_filter(VVCLocalContext *lc, const int x0, const int y0) +{ + VVCFrameContext *fc = lc->fc; + const VVCSPS *sps = fc->ps.sps; + const int rx = x0 >> sps->ctb_log2_size_y; + const int ry = y0 >> sps->ctb_log2_size_y; + const int ps = sps->pixel_shift; + const int padded_stride = EDGE_EMU_BUFFER_STRIDE << ps; + const int padded_offset = padded_stride * ALF_PADDING_SIZE + (ALF_PADDING_SIZE << ps); + const int c_end = sps->r->sps_chroma_format_idc ? VVC_MAX_SAMPLE_ARRAYS : 1; + const int ctu_end = y0 + sps->ctb_size_y; + const ALFParams *alf = &CTB(fc->tab.alf, rx, ry); + int sb_edges[MAX_VBBS][MAX_EDGES], nb_sbs; + VVCRect sbs[MAX_VBBS]; + + alf_get_subblocks(lc, sbs, sb_edges, &nb_sbs, x0, y0, rx, ry); + + for (int i = 0; i < nb_sbs; i++) { + const VVCRect *sb = sbs + i; + for (int c_idx = 0; c_idx < c_end; c_idx++) { + const int hs = fc->ps.sps->hshift[c_idx]; + const int vs = fc->ps.sps->vshift[c_idx]; + const int x = sb->l >> hs; + const int y = sb->t >> vs; + const int width = (sb->r - sb->l) >> hs; + const int height = (sb->b - sb->t) >> vs; + const int src_stride = fc->frame->linesize[c_idx]; + uint8_t *src = POS(c_idx, sb->l, sb->t); + uint8_t *padded; + + if (alf->ctb_flag[c_idx] || (!c_idx && (alf->ctb_cc_idc[0] || alf->ctb_cc_idc[1]))) { + padded = (c_idx ? lc->alf_buffer_chroma : lc->alf_buffer_luma) + padded_offset; + alf_prepare_buffer(fc, padded, src, x, y, rx, ry, width, height, + padded_stride, src_stride, c_idx, sb_edges[i]); + } + if (alf->ctb_flag[c_idx]) { + if (!c_idx) { + alf_filter_luma(lc, src, padded, src_stride, padded_stride, x, y, + width, height, ctu_end - ALF_VB_POS_ABOVE_LUMA, alf); + } else { + alf_filter_chroma(lc, src, padded, src_stride, padded_stride, c_idx, + width, height, ((ctu_end - sb->t) >> vs) - ALF_VB_POS_ABOVE_CHROMA, alf); + } + } + if (c_idx && alf->ctb_cc_idc[c_idx - 1]) { + padded = lc->alf_buffer_luma + padded_offset; + alf_filter_cc(lc, src, padded, src_stride, padded_stride, c_idx, + width, height, hs, vs, ctu_end - sb->t - ALF_VB_POS_ABOVE_LUMA, alf); + } + } } } @@ -1280,7 +1245,7 @@ void ff_vvc_lmcs_filter(const VVCLocalContext *lc, const int x, const int y) const int ctb_size = fc->ps.sps->ctb_size_y; const int width = FFMIN(fc->ps.pps->width - x, ctb_size); const int height = FFMIN(fc->ps.pps->height - y, ctb_size); - uint8_t *data = fc->frame->data[LUMA] + y * fc->frame->linesize[LUMA] + (x << fc->ps.sps->pixel_shift); + uint8_t *data = POS(LUMA, x, y); if (sc->sh.r->sh_lmcs_used_flag) fc->vvcdsp.lmcs.filter(data, fc->frame->linesize[LUMA], width, height, &fc->ps.lmcs.inv_lut); } diff --git a/src/ExtLib/ffmpeg/libavcodec/vvc/inter.c b/src/ExtLib/ffmpeg/libavcodec/vvc/inter.c index 344a0a8c13..9578fd8de4 100644 --- a/src/ExtLib/ffmpeg/libavcodec/vvc/inter.c +++ b/src/ExtLib/ffmpeg/libavcodec/vvc/inter.c @@ -30,13 +30,6 @@ #define PROF_TEMP_OFFSET (MAX_PB_SIZE + 32) static const int bcw_w_lut[] = {4, 5, 3, 10, -2}; -typedef struct VVCRect { - int l; // left - int t; // top - int r; // right - int b; // bottom -} VVCRect; - static void subpic_get_rect(VVCRect *r, const VVCFrame *src_frame, const int subpic_idx, const int is_chroma) { const VVCSPS *sps = src_frame->sps; diff --git a/src/ExtLib/ffmpeg/libavcodec/vvc/mvs.c b/src/ExtLib/ffmpeg/libavcodec/vvc/mvs.c index 42564b3e6f..1788a7150b 100644 --- a/src/ExtLib/ffmpeg/libavcodec/vvc/mvs.c +++ b/src/ExtLib/ffmpeg/libavcodec/vvc/mvs.c @@ -1695,17 +1695,34 @@ static void ibc_merge_candidates(VVCLocalContext *lc, const int merge_idx, Mv *m memset(mv, 0, sizeof(*mv)); } -void ff_vvc_mvp_ibc(VVCLocalContext *lc, const int mvp_l0_flag, const int amvr_shift, Mv *mv) +static int ibc_check_mv(VVCLocalContext *lc, Mv *mv) +{ + const VVCFrameContext *fc = lc->fc; + const VVCSPS *sps = lc->fc->ps.sps; + const CodingUnit *cu = lc->cu; + const Mv *bv = &cu->pu.mi.mv[L0][0]; + + if (sps->ctb_size_y < ((cu->y0 + (bv->y >> 4)) & (sps->ctb_size_y - 1)) + cu->cb_height) { + av_log(fc->log_ctx, AV_LOG_ERROR, "IBC region spans multiple CTBs.\n"); + return AVERROR_INVALIDDATA; + } + + return 0; +} + +int ff_vvc_mvp_ibc(VVCLocalContext *lc, const int mvp_l0_flag, const int amvr_shift, Mv *mv) { LOCAL_ALIGNED_8(Mv, mvp, [1]); ibc_merge_candidates(lc, mvp_l0_flag, mvp); ibc_add_mvp(mv, mvp, amvr_shift); + return ibc_check_mv(lc, mv); } -void ff_vvc_luma_mv_merge_ibc(VVCLocalContext *lc, const int merge_idx, Mv *mv) +int ff_vvc_luma_mv_merge_ibc(VVCLocalContext *lc, const int merge_idx, Mv *mv) { ibc_merge_candidates(lc, merge_idx, mv); + return ibc_check_mv(lc, mv); } static int affine_mvp_constructed_cp(NeighbourContext *ctx, diff --git a/src/ExtLib/ffmpeg/libavcodec/vvc/mvs.h b/src/ExtLib/ffmpeg/libavcodec/vvc/mvs.h index 3f0c8b08e9..b2242b2a4d 100644 --- a/src/ExtLib/ffmpeg/libavcodec/vvc/mvs.h +++ b/src/ExtLib/ffmpeg/libavcodec/vvc/mvs.h @@ -30,9 +30,9 @@ void ff_vvc_clip_mv(Mv *mv); void ff_vvc_mv_scale(Mv *dst, const Mv *src, int td, int tb); void ff_vvc_luma_mv_merge_mode(VVCLocalContext *lc, int merge_idx, int ciip_flag, MvField *mv); void ff_vvc_luma_mv_merge_gpm(VVCLocalContext *lc, const int merge_gpm_idx[2], MvField *mv); -void ff_vvc_luma_mv_merge_ibc(VVCLocalContext *lc, int merge_idx, Mv *mv); +int ff_vvc_luma_mv_merge_ibc(VVCLocalContext *lc, int merge_idx, Mv *mv); void ff_vvc_mvp(VVCLocalContext *lc, const int *mvp_lx_flag, const int amvr_shift, MotionInfo *mi); -void ff_vvc_mvp_ibc(VVCLocalContext *lc, int mvp_l0_flag, int amvr_shift, Mv *mv); +int ff_vvc_mvp_ibc(VVCLocalContext *lc, int mvp_l0_flag, int amvr_shift, Mv *mv); void ff_vvc_sb_mv_merge_mode(VVCLocalContext *lc, int merge_subblock_idx, PredictionUnit *pu); void ff_vvc_affine_mvp(VVCLocalContext *lc, const int *mvp_lx_flag, const int amvr_shift, MotionInfo* mi); void ff_vvc_store_sb_mvs(const VVCLocalContext *lc, PredictionUnit *pu); diff --git a/src/ExtLib/ffmpeg/libavcodec/vvc/ps.c b/src/ExtLib/ffmpeg/libavcodec/vvc/ps.c index 92368eafc2..58496c9fba 100644 --- a/src/ExtLib/ffmpeg/libavcodec/vvc/ps.c +++ b/src/ExtLib/ffmpeg/libavcodec/vvc/ps.c @@ -796,8 +796,49 @@ static int ph_max_num_subblock_merge_cand(const H266RawSPS *sps, const H266RawPi return sps->sps_sbtmvp_enabled_flag && ph->ph_temporal_mvp_enabled_flag; } +static int ph_vb_pos(uint16_t *vbs, uint8_t *num_vbs, const uint16_t *pos_minus_1, const uint8_t num_pos, uint16_t max, const int ctb_size_y) +{ + max = FF_CEIL_RSHIFT(max, 3) - 2; + for (int i = 0; i < num_pos; i++) { + if (pos_minus_1[i] > max) + return AVERROR_INVALIDDATA; + + vbs[i] = (pos_minus_1[i] + 1) << 3; + + // The distance between any two vertical virtual boundaries shall be greater than or equal to CtbSizeY luma samples + if (i && vbs[i] < vbs[i - 1] + ctb_size_y) + return AVERROR_INVALIDDATA; + } + *num_vbs = num_pos; + + return 0; +} + +#define VBF(f) (sps->sps_virtual_boundaries_present_flag ? sps->sps_##f : ph->r->ph_##f) +#define VBFS(c, d) VBF(virtual_boundary_pos_##c##_minus1), VBF(num_##d##_virtual_boundaries) + +static int ph_vb(VVCPH *ph, const H266RawSPS *sps, const H266RawPPS *pps) +{ + const int ctb_size_y = 1 << (sps->sps_log2_ctu_size_minus5 + 5); + int ret; + + if (!sps->sps_virtual_boundaries_enabled_flag) + return 0; + + ret = ph_vb_pos(ph->vb_pos_x, &ph->num_ver_vbs, VBFS(x, ver), pps->pps_pic_width_in_luma_samples, ctb_size_y); + if (ret < 0) + return ret; + + ret = ph_vb_pos(ph->vb_pos_y, &ph->num_hor_vbs, VBFS(y, hor), pps->pps_pic_height_in_luma_samples, ctb_size_y); + if (ret < 0) + return ret; + + return 0; +} + static int ph_derive(VVCPH *ph, const H266RawSPS *sps, const H266RawPPS *pps, const int poc_tid0, const int is_clvss) { + int ret; ph->max_num_subblock_merge_cand = ph_max_num_subblock_merge_cand(sps, ph->r); ph->poc = ph_compute_poc(ph->r, sps, poc_tid0, is_clvss); @@ -805,6 +846,10 @@ static int ph_derive(VVCPH *ph, const H266RawSPS *sps, const H266RawPPS *pps, co if (pps->pps_wp_info_in_ph_flag) pred_weight_table(&ph->pwt, &ph->r->ph_pred_weight_table); + ret = ph_vb(ph, sps, pps); + if (ret < 0) + return ret; + return 0; } diff --git a/src/ExtLib/ffmpeg/libavcodec/vvc/ps.h b/src/ExtLib/ffmpeg/libavcodec/vvc/ps.h index 6656a06320..9203e2c57f 100644 --- a/src/ExtLib/ffmpeg/libavcodec/vvc/ps.h +++ b/src/ExtLib/ffmpeg/libavcodec/vvc/ps.h @@ -151,6 +151,12 @@ typedef struct VVCPH { //derived values uint32_t max_num_subblock_merge_cand; ///< MaxNumSubblockMergeCand int32_t poc; ///< PicOrderCntVal + + uint8_t num_ver_vbs; ///< NumVerVirtualBoundaries + uint16_t vb_pos_x[VVC_MAX_VBS]; ///< VirtualBoundaryPosX + uint8_t num_hor_vbs; ///< NumHorVirtualBoundaries + uint16_t vb_pos_y[VVC_MAX_VBS]; ///< VirtualBoundaryPosY + PredWeightTable pwt; } VVCPH; diff --git a/src/ExtLib/ffmpeg/libavcodec/vvc/refs.c b/src/ExtLib/ffmpeg/libavcodec/vvc/refs.c index 8b7ba639a3..26a5b0b34c 100644 --- a/src/ExtLib/ffmpeg/libavcodec/vvc/refs.c +++ b/src/ExtLib/ffmpeg/libavcodec/vvc/refs.c @@ -191,7 +191,7 @@ int ff_vvc_set_new_ref(VVCContext *s, VVCFrameContext *fc, AVFrame **frame) fc->ref = ref; if (s->no_output_before_recovery_flag && (IS_RASL(s) || !GDR_IS_RECOVERED(s))) - ref->flags = 0; + ref->flags = VVC_FRAME_FLAG_SHORT_REF; else if (ph->r->ph_pic_output_flag) ref->flags = VVC_FRAME_FLAG_OUTPUT; diff --git a/src/ExtLib/ffmpeg/libavcodec/vvc/thread.c b/src/ExtLib/ffmpeg/libavcodec/vvc/thread.c index 8777d380bf..0cacb1f51c 100644 --- a/src/ExtLib/ffmpeg/libavcodec/vvc/thread.c +++ b/src/ExtLib/ffmpeg/libavcodec/vvc/thread.c @@ -443,8 +443,11 @@ static int run_inter(VVCContext *s, VVCLocalContext *lc, VVCTask *t) { VVCFrameContext *fc = lc->fc; const CTU *ctu = fc->tab.ctus + t->rs; + int ret; - ff_vvc_predict_inter(lc, t->rs); + ret = ff_vvc_predict_inter(lc, t->rs); + if (ret < 0) + return ret; if (ctu->has_dmvr) report_frame_progress(fc, t->ry, VVC_PROGRESS_MV); @@ -454,9 +457,7 @@ static int run_inter(VVCContext *s, VVCLocalContext *lc, VVCTask *t) static int run_recon(VVCContext *s, VVCLocalContext *lc, VVCTask *t) { - ff_vvc_reconstruct(lc, t->rs, t->rx, t->ry); - - return 0; + return ff_vvc_reconstruct(lc, t->rs, t->rx, t->ry); } static int run_lmcs(VVCContext *s, VVCLocalContext *lc, VVCTask *t) diff --git a/src/ExtLib/ffmpeg/libavcodec/x86/vvc/vvc_alf.asm b/src/ExtLib/ffmpeg/libavcodec/x86/vvc/vvc_alf.asm index b35dd9b0e9..f69a69f05f 100644 --- a/src/ExtLib/ffmpeg/libavcodec/x86/vvc/vvc_alf.asm +++ b/src/ExtLib/ffmpeg/libavcodec/x86/vvc/vvc_alf.asm @@ -324,18 +324,69 @@ SECTION .text %endif %endmacro -; STORE_PIXELS(dst, src) -%macro STORE_PIXELS 2 +; STORE_PIXELS_W16(dst, src) +%macro STORE_PIXELS_W16 2 %if ps == 2 - movu %1, m%2 + movu [%1], m%2 %else + movu [%1], xm%2 + %endif +%endmacro + +%macro STORE_PIXELS_W8 2 + %if ps == 2 + movu [%1], xm%2 + %else + movq [%1], xm%2 + %endif +%endmacro + +; STORE_PIXELS_W4(dst, src, offset) +%macro STORE_PIXELS_W4 3 + %if ps == 2 + movq [%1 + %3 * ps], xm%2 + %else + movd [%1 + %3], xm%2 + %endif +%endmacro + +%macro STORE_PIXELS_W8LE 3 + cmp %3, 8 + jl .w4 + STORE_PIXELS_W8 %1, %2 + cmp %3, 12 + %if ps == 2 + vpermq m%2, m%2, q0302 + %else + vpermq m%2, m%2, q0101 + %endif + jl .end + STORE_PIXELS_W4 %1, %2, 8 + jmp .end +.w4: + STORE_PIXELS_W4 %1, %2, 0 +.end: +%endmacro + +; STORE_PIXELS(dst, src, width) +%macro STORE_PIXELS 3 + %if ps == 1 packuswb m%2, m%2 vpermq m%2, m%2, 0x8 - movu %1, xm%2 + %endif + + %ifidn %3, 16 + STORE_PIXELS_W16 %1, %2 + %else + %if LUMA + STORE_PIXELS_W8 %1, %2 + %else + STORE_PIXELS_W8LE %1, %2, %3 + %endif %endif %endmacro -%macro FILTER_16x4 0 +%macro FILTER_16x4 1 %if LUMA push clipq push strideq @@ -362,7 +413,7 @@ SECTION .text ; clip to pixel CLIPW m0, m14, m15 - STORE_PIXELS [dstq], 0 + STORE_PIXELS dstq, 0, %1 lea srcq, [srcq + src_strideq] lea dstq, [dstq + dst_strideq] @@ -399,7 +450,7 @@ SECTION .text ; const uint8_t *src, ptrdiff_t src_stride, const ptrdiff_t width, cosnt ptr_diff_t height, ; const int16_t *filter, const int16_t *clip, ptrdiff_t stride, ptrdiff_t vb_pos, ptrdiff_t pixel_max); ; ****************************** -cglobal vvc_alf_filter_%2_%1bpc, 11, 15, 16, 0-0x28, dst, dst_stride, src, src_stride, width, height, filter, clip, stride, vb_pos, pixel_max, \ +cglobal vvc_alf_filter_%2_%1bpc, 11, 15, 16, 0-0x30, dst, dst_stride, src, src_stride, width, height, filter, clip, stride, vb_pos, pixel_max, \ offset, x, s5, s6 %define ps (%1 / 8) ; pixel size movd xm15, pixel_maxd @@ -409,18 +460,32 @@ cglobal vvc_alf_filter_%2_%1bpc, 11, 15, 16, 0-0x28, dst, dst_stride, src, src_s .loop: push srcq push dstq + push widthq xor xq, xq .loop_w: + cmp widthq, 16 + jl .loop_w_end + LOAD_PARAMS - FILTER_16x4 + FILTER_16x4 16 add srcq, 16 * ps add dstq, 16 * ps add xq, 16 - cmp xq, widthq - jl .loop_w + sub widthq, 16 + jmp .loop_w + +.loop_w_end: + cmp widthq, 0 + je .w_end + + LOAD_PARAMS + FILTER_16x4 widthq + +.w_end: + pop widthq pop dstq pop srcq lea srcq, [srcq + 4 * src_strideq] diff --git a/src/ExtLib/ffmpeg/libavutil/executor.c b/src/ExtLib/ffmpeg/libavutil/executor.c index 26691fe157..fb20104b58 100644 --- a/src/ExtLib/ffmpeg/libavutil/executor.c +++ b/src/ExtLib/ffmpeg/libavutil/executor.c @@ -82,9 +82,11 @@ static int run_one_task(AVExecutor *e, void *lc) /* nothing */; if (*prev) { AVTask *t = remove_task(prev, *prev); - ff_mutex_unlock(&e->lock); + if (e->thread_count > 0) + ff_mutex_unlock(&e->lock); cb->run(t, lc, cb->user_data); - ff_mutex_lock(&e->lock); + if (e->thread_count > 0) + ff_mutex_lock(&e->lock); return 1; } return 0; @@ -146,14 +148,17 @@ AVExecutor* av_executor_alloc(const AVTaskCallbacks *cb, int thread_count) return NULL; e->cb = *cb; - e->local_contexts = av_calloc(thread_count, e->cb.local_context_size); + e->local_contexts = av_calloc(FFMAX(thread_count, 1), e->cb.local_context_size); if (!e->local_contexts) goto free_executor; - e->threads = av_calloc(thread_count, sizeof(*e->threads)); + e->threads = av_calloc(FFMAX(thread_count, 1), sizeof(*e->threads)); if (!e->threads) goto free_executor; + if (!thread_count) + return e; + has_lock = !ff_mutex_init(&e->lock, NULL); has_cond = !ff_cond_init(&e->cond, NULL); @@ -175,9 +180,12 @@ AVExecutor* av_executor_alloc(const AVTaskCallbacks *cb, int thread_count) void av_executor_free(AVExecutor **executor) { + int thread_count; + if (!executor || !*executor) return; - executor_free(*executor, 1, 1); + thread_count = (*executor)->thread_count; + executor_free(*executor, thread_count, thread_count); *executor = NULL; } @@ -195,9 +203,9 @@ void av_executor_execute(AVExecutor *e, AVTask *t) ff_cond_signal(&e->cond); ff_mutex_unlock(&e->lock); -#if !HAVE_THREADS - // We are running in a single-threaded environment, so we must handle all tasks ourselves - while (run_one_task(e, e->local_contexts)) - /* nothing */; -#endif + if (!e->thread_count || !HAVE_THREADS) { + // We are running in a single-threaded environment, so we must handle all tasks ourselves + while (run_one_task(e, e->local_contexts)) + /* nothing */; + } } diff --git a/src/ExtLib/ffmpeg/libavutil/executor.h b/src/ExtLib/ffmpeg/libavutil/executor.h index c602bcb613..0eb21c10c8 100644 --- a/src/ExtLib/ffmpeg/libavutil/executor.h +++ b/src/ExtLib/ffmpeg/libavutil/executor.h @@ -46,7 +46,7 @@ typedef struct AVTaskCallbacks { /** * Alloc executor * @param callbacks callback structure for executor - * @param thread_count worker thread number + * @param thread_count worker thread number, 0 for run on caller's thread directly * @return return the executor */ AVExecutor* av_executor_alloc(const AVTaskCallbacks *callbacks, int thread_count); diff --git a/src/ExtLib/ffmpeg/libavutil/stereo3d.c b/src/ExtLib/ffmpeg/libavutil/stereo3d.c index b10423dc48..d5728178a8 100644 --- a/src/ExtLib/ffmpeg/libavutil/stereo3d.c +++ b/src/ExtLib/ffmpeg/libavutil/stereo3d.c @@ -29,9 +29,15 @@ static void get_defaults(AVStereo3D *stereo) { stereo->horizontal_disparity_adjustment = (AVRational) { 0, 1 }; + stereo->horizontal_field_of_view = (AVRational) { 0, 1 }; } AVStereo3D *av_stereo3d_alloc(void) +{ + return av_stereo3d_alloc_size(NULL); +} + +AVStereo3D *av_stereo3d_alloc_size(size_t *size) { AVStereo3D *stereo = av_mallocz(sizeof(AVStereo3D)); if (!stereo) @@ -39,6 +45,9 @@ AVStereo3D *av_stereo3d_alloc(void) get_defaults(stereo); + if (size) + *size = sizeof(*stereo); + return stereo; } @@ -65,12 +74,14 @@ static const char * const stereo3d_type_names[] = { [AV_STEREO3D_SIDEBYSIDE_QUINCUNX] = "side by side (quincunx subsampling)", [AV_STEREO3D_LINES] = "interleaved lines", [AV_STEREO3D_COLUMNS] = "interleaved columns", + [AV_STEREO3D_UNSPEC] = "unspecified", }; static const char * const stereo3d_view_names[] = { [AV_STEREO3D_VIEW_PACKED] = "packed", [AV_STEREO3D_VIEW_LEFT] = "left", [AV_STEREO3D_VIEW_RIGHT] = "right", + [AV_STEREO3D_VIEW_UNSPEC] = "unspecified", }; static const char * const stereo3d_primary_eye_names[] = { diff --git a/src/ExtLib/ffmpeg/libavutil/stereo3d.h b/src/ExtLib/ffmpeg/libavutil/stereo3d.h index 00a5c3900e..c0a4ab3f2d 100644 --- a/src/ExtLib/ffmpeg/libavutil/stereo3d.h +++ b/src/ExtLib/ffmpeg/libavutil/stereo3d.h @@ -136,6 +136,11 @@ enum AVStereo3DType { * @endcode */ AV_STEREO3D_COLUMNS, + + /** + * Video is stereoscopic but the packing is unspecified. + */ + AV_STEREO3D_UNSPEC, }; /** @@ -156,6 +161,11 @@ enum AVStereo3DView { * Frame contains only the right view. */ AV_STEREO3D_VIEW_RIGHT, + + /** + * Content is unspecified. + */ + AV_STEREO3D_VIEW_UNSPEC, }; /** @@ -224,9 +234,9 @@ typedef struct AVStereo3D { AVRational horizontal_disparity_adjustment; /** - * Horizontal field of view in thousanths of a degree. Zero if unset. + * Horizontal field of view, in degrees. Zero if unset. */ - uint32_t horizontal_field_of_view; + AVRational horizontal_field_of_view; } AVStereo3D; /** @@ -237,6 +247,14 @@ typedef struct AVStereo3D { */ AVStereo3D *av_stereo3d_alloc(void); +/** + * Allocate an AVStereo3D structure and set its fields to default values. + * The resulting struct can be freed using av_freep(). + * + * @return An AVStereo3D filled with default values or NULL on failure. + */ +AVStereo3D *av_stereo3d_alloc_size(size_t *size); + /** * Allocate a complete AVFrameSideData and add it to the frame. * diff --git a/src/ExtLib/ffmpeg/libavutil/version.h b/src/ExtLib/ffmpeg/libavutil/version.h index 78e6431642..a8962734e7 100644 --- a/src/ExtLib/ffmpeg/libavutil/version.h +++ b/src/ExtLib/ffmpeg/libavutil/version.h @@ -79,7 +79,7 @@ */ #define LIBAVUTIL_VERSION_MAJOR 59 -#define LIBAVUTIL_VERSION_MINOR 25 +#define LIBAVUTIL_VERSION_MINOR 27 #define LIBAVUTIL_VERSION_MICRO 100 #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \ diff --git a/src/ExtLib/ffmpeg/libswscale/output.c b/src/ExtLib/ffmpeg/libswscale/output.c index b234f9c6b9..0e6181b3e0 100644 --- a/src/ExtLib/ffmpeg/libswscale/output.c +++ b/src/ExtLib/ffmpeg/libswscale/output.c @@ -1221,8 +1221,8 @@ yuv2rgba64_1_c_template(SwsContext *c, const int32_t *buf0, Y2 += (1 << 13) - (1 << 29); if (hasAlpha) { - A1 = abuf0[i * 2 ] << 11; - A2 = abuf0[i * 2 + 1] << 11; + A1 = abuf0[i * 2 ] * (1 << 11); + A2 = abuf0[i * 2 + 1] * (1 << 11); A1 += 1 << 13; A2 += 1 << 13; @@ -1267,8 +1267,8 @@ yuv2rgba64_1_c_template(SwsContext *c, const int32_t *buf0, Y2 += (1 << 13) - (1 << 29); if (hasAlpha) { - A1 = abuf0[i * 2 ] << 11; - A2 = abuf0[i * 2 + 1] << 11; + A1 = abuf0[i * 2 ] * (1 << 11); + A2 = abuf0[i * 2 + 1] * (1 << 11); A1 += 1 << 13; A2 += 1 << 13; @@ -1439,7 +1439,7 @@ yuv2rgba64_full_1_c_template(SwsContext *c, const int32_t *buf0, Y += (1 << 13) - (1 << 29); if (hasAlpha) { - A = abuf0[i] << 11; + A = abuf0[i] * (1 << 11); A += 1 << 13; } @@ -1472,7 +1472,7 @@ yuv2rgba64_full_1_c_template(SwsContext *c, const int32_t *buf0, Y += (1 << 13) - (1 << 29); if (hasAlpha) { - A = abuf0[i] << 11; + A = abuf0[i] * (1 << 11); A += 1 << 13; } @@ -1925,9 +1925,9 @@ static av_always_inline void yuv2rgb_write_full(SwsContext *c, Y -= c->yuv2rgb_y_offset; Y *= c->yuv2rgb_y_coeff; Y += 1 << 21; - R = (unsigned)Y + V*c->yuv2rgb_v2r_coeff; - G = (unsigned)Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff; - B = (unsigned)Y + U*c->yuv2rgb_u2b_coeff; + R = (unsigned)Y + V*(unsigned)c->yuv2rgb_v2r_coeff; + G = (unsigned)Y + V*(unsigned)c->yuv2rgb_v2g_coeff + U*(unsigned)c->yuv2rgb_u2g_coeff; + B = (unsigned)Y + U*(unsigned)c->yuv2rgb_u2b_coeff; if ((R | G | B) & 0xC0000000) { R = av_clip_uintp2(R, 30); G = av_clip_uintp2(G, 30); diff --git a/src/ExtLib/ffmpeg/libswscale/yuv2rgb.c b/src/ExtLib/ffmpeg/libswscale/yuv2rgb.c index c1d6236f37..977eb3a7dd 100644 --- a/src/ExtLib/ffmpeg/libswscale/yuv2rgb.c +++ b/src/ExtLib/ffmpeg/libswscale/yuv2rgb.c @@ -65,20 +65,20 @@ const int *sws_getCoefficients(int colorspace) return ff_yuv2rgb_coeffs[colorspace]; } -#define LOADCHROMA(i) \ +#define LOADCHROMA(pu, pv, i) \ U = pu[i]; \ V = pv[i]; \ r = (void *)c->table_rV[V+YUVRGB_TABLE_HEADROOM]; \ g = (void *)(c->table_gU[U+YUVRGB_TABLE_HEADROOM] + c->table_gV[V+YUVRGB_TABLE_HEADROOM]); \ b = (void *)c->table_bU[U+YUVRGB_TABLE_HEADROOM]; -#define PUTRGB(dst, src, i) \ +#define PUTRGB(dst, src, asrc, i, abase) \ Y = src[2 * i]; \ dst[2 * i] = r[Y] + g[Y] + b[Y]; \ Y = src[2 * i + 1]; \ dst[2 * i + 1] = r[Y] + g[Y] + b[Y]; -#define PUTRGB24(dst, src, i) \ +#define PUTRGB24(dst, src, asrc, i, abase) \ Y = src[2 * i]; \ dst[6 * i + 0] = r[Y]; \ dst[6 * i + 1] = g[Y]; \ @@ -88,7 +88,7 @@ const int *sws_getCoefficients(int colorspace) dst[6 * i + 4] = g[Y]; \ dst[6 * i + 5] = b[Y]; -#define PUTBGR24(dst, src, i) \ +#define PUTBGR24(dst, src, asrc, i, abase) \ Y = src[2 * i]; \ dst[6 * i + 0] = b[Y]; \ dst[6 * i + 1] = g[Y]; \ @@ -98,13 +98,13 @@ const int *sws_getCoefficients(int colorspace) dst[6 * i + 4] = g[Y]; \ dst[6 * i + 5] = r[Y]; -#define PUTRGBA(dst, ysrc, asrc, i, s) \ +#define PUTRGBA(dst, ysrc, asrc, i, abase) \ Y = ysrc[2 * i]; \ - dst[2 * i] = r[Y] + g[Y] + b[Y] + (asrc[2 * i] << s); \ + dst[2 * i] = r[Y] + g[Y] + b[Y] + (asrc[2 * i] << abase); \ Y = ysrc[2 * i + 1]; \ - dst[2 * i + 1] = r[Y] + g[Y] + b[Y] + (asrc[2 * i + 1] << s); + dst[2 * i + 1] = r[Y] + g[Y] + b[Y] + (asrc[2 * i + 1] << abase); -#define PUTRGB48(dst, src, i) \ +#define PUTRGB48(dst, src, asrc, i, abase) \ Y = src[ 2 * i]; \ dst[12 * i + 0] = dst[12 * i + 1] = r[Y]; \ dst[12 * i + 2] = dst[12 * i + 3] = g[Y]; \ @@ -114,7 +114,7 @@ const int *sws_getCoefficients(int colorspace) dst[12 * i + 8] = dst[12 * i + 9] = g[Y]; \ dst[12 * i + 10] = dst[12 * i + 11] = b[Y]; -#define PUTBGR48(dst, src, i) \ +#define PUTBGR48(dst, src, asrc, i, abase) \ Y = src[2 * i]; \ dst[12 * i + 0] = dst[12 * i + 1] = b[Y]; \ dst[12 * i + 2] = dst[12 * i + 3] = g[Y]; \ @@ -124,17 +124,13 @@ const int *sws_getCoefficients(int colorspace) dst[12 * i + 8] = dst[12 * i + 9] = g[Y]; \ dst[12 * i + 10] = dst[12 * i + 11] = r[Y]; -#define YUV2RGBFUNC(func_name, dst_type, alpha) \ +#define YUV2RGBFUNC(func_name, dst_type, alpha, yuv422) \ static int func_name(SwsContext *c, const uint8_t *src[], \ int srcStride[], int srcSliceY, int srcSliceH, \ uint8_t *dst[], int dstStride[]) \ { \ int y; \ \ - if (!alpha && c->srcFormat == AV_PIX_FMT_YUV422P) { \ - srcStride[1] *= 2; \ - srcStride[2] *= 2; \ - } \ for (y = 0; y < srcSliceH; y += 2) { \ int yd = y + srcSliceY; \ dst_type *dst_1 = \ @@ -144,10 +140,15 @@ const int *sws_getCoefficients(int colorspace) dst_type av_unused *r, *g, *b; \ const uint8_t *py_1 = src[0] + y * srcStride[0]; \ const uint8_t *py_2 = py_1 + srcStride[0]; \ - const uint8_t av_unused *pu = src[1] + (y >> 1) * srcStride[1]; \ - const uint8_t av_unused *pv = src[2] + (y >> 1) * srcStride[2]; \ + const uint8_t av_unused *pu_1 = src[1] + (y >> !yuv422) * srcStride[1]; \ + const uint8_t av_unused *pv_1 = src[2] + (y >> !yuv422) * srcStride[2]; \ + const uint8_t av_unused *pu_2, *pv_2; \ const uint8_t av_unused *pa_1, *pa_2; \ unsigned int h_size = c->dstW >> 3; \ + if (yuv422) { \ + pu_2 = pu_1 + srcStride[1]; \ + pv_2 = pv_1 + srcStride[2]; \ + } \ if (alpha) { \ pa_1 = src[3] + y * srcStride[3]; \ pa_2 = pa_1 + srcStride[3]; \ @@ -155,11 +156,19 @@ const int *sws_getCoefficients(int colorspace) while (h_size--) { \ int av_unused U, V, Y; \ -#define ENDYUV2RGBLINE(dst_delta, ss) \ - pu += 4 >> ss; \ - pv += 4 >> ss; \ +#define ENDYUV2RGBLINE(dst_delta, ss, alpha, yuv422) \ + pu_1 += 4 >> ss; \ + pv_1 += 4 >> ss; \ + if (yuv422) { \ + pu_2 += 4 >> ss; \ + pv_2 += 4 >> ss; \ + } \ py_1 += 8 >> ss; \ py_2 += 8 >> ss; \ + if (alpha) { \ + pa_1 += 8 >> ss; \ + pa_2 += 8 >> ss; \ + } \ dst_1 += dst_delta >> ss; \ dst_2 += dst_delta >> ss; \ } \ @@ -172,240 +181,173 @@ const int *sws_getCoefficients(int colorspace) return srcSliceH; \ } -#define CLOSEYUV2RGBFUNC(dst_delta) \ - ENDYUV2RGBLINE(dst_delta, 0) \ +#define YUV420FUNC(func_name, dst_type, alpha, abase, PUTFUNC, dst_delta) \ + YUV2RGBFUNC(func_name, dst_type, alpha, 0) \ + LOADCHROMA(pu_1, pv_1, 0); \ + PUTFUNC(dst_1, py_1, pa_1, 0, abase); \ + PUTFUNC(dst_2, py_2, pa_2, 0, abase); \ + \ + LOADCHROMA(pu_1, pv_1, 1); \ + PUTFUNC(dst_2, py_2, pa_2, 1, abase); \ + PUTFUNC(dst_1, py_1, pa_1, 1, abase); \ + \ + LOADCHROMA(pu_1, pv_1, 2); \ + PUTFUNC(dst_1, py_1, pa_1, 2, abase); \ + PUTFUNC(dst_2, py_2, pa_2, 2, abase); \ + \ + LOADCHROMA(pu_1, pv_1, 3); \ + PUTFUNC(dst_2, py_2, pa_2, 3, abase); \ + PUTFUNC(dst_1, py_1, pa_1, 3, abase); \ + ENDYUV2RGBLINE(dst_delta, 0, alpha, 0) \ + LOADCHROMA(pu_1, pv_1, 0); \ + PUTFUNC(dst_1, py_1, pa_1, 0, abase); \ + PUTFUNC(dst_2, py_2, pa_2, 0, abase); \ + \ + LOADCHROMA(pu_1, pv_1, 1); \ + PUTFUNC(dst_2, py_2, pa_2, 1, abase); \ + PUTFUNC(dst_1, py_1, pa_1, 1, abase); \ + ENDYUV2RGBLINE(dst_delta, 1, alpha, 0) \ + LOADCHROMA(pu_1, pv_1, 0); \ + PUTFUNC(dst_1, py_1, pa_1, 0, abase); \ + PUTFUNC(dst_2, py_2, pa_2, 0, abase); \ ENDYUV2RGBFUNC() -YUV2RGBFUNC(yuv2rgb_c_48, uint8_t, 0) - LOADCHROMA(0); - PUTRGB48(dst_1, py_1, 0); - PUTRGB48(dst_2, py_2, 0); - - LOADCHROMA(1); - PUTRGB48(dst_2, py_2, 1); - PUTRGB48(dst_1, py_1, 1); - - LOADCHROMA(2); - PUTRGB48(dst_1, py_1, 2); - PUTRGB48(dst_2, py_2, 2); - - LOADCHROMA(3); - PUTRGB48(dst_2, py_2, 3); - PUTRGB48(dst_1, py_1, 3); -ENDYUV2RGBLINE(48, 0) - LOADCHROMA(0); - PUTRGB48(dst_1, py_1, 0); - PUTRGB48(dst_2, py_2, 0); - - LOADCHROMA(1); - PUTRGB48(dst_2, py_2, 1); - PUTRGB48(dst_1, py_1, 1); -ENDYUV2RGBLINE(48, 1) - LOADCHROMA(0); - PUTRGB48(dst_1, py_1, 0); - PUTRGB48(dst_2, py_2, 0); -ENDYUV2RGBFUNC() - -YUV2RGBFUNC(yuv2rgb_c_bgr48, uint8_t, 0) - LOADCHROMA(0); - PUTBGR48(dst_1, py_1, 0); - PUTBGR48(dst_2, py_2, 0); - - LOADCHROMA(1); - PUTBGR48(dst_2, py_2, 1); - PUTBGR48(dst_1, py_1, 1); - - LOADCHROMA(2); - PUTBGR48(dst_1, py_1, 2); - PUTBGR48(dst_2, py_2, 2); - - LOADCHROMA(3); - PUTBGR48(dst_2, py_2, 3); - PUTBGR48(dst_1, py_1, 3); -ENDYUV2RGBLINE(48, 0) - LOADCHROMA(0); - PUTBGR48(dst_1, py_1, 0); - PUTBGR48(dst_2, py_2, 0); - - LOADCHROMA(1); - PUTBGR48(dst_2, py_2, 1); - PUTBGR48(dst_1, py_1, 1); -ENDYUV2RGBLINE(48, 1) - LOADCHROMA(0); - PUTBGR48(dst_1, py_1, 0); - PUTBGR48(dst_2, py_2, 0); -ENDYUV2RGBFUNC() - -YUV2RGBFUNC(yuv2rgb_c_32, uint32_t, 0) - LOADCHROMA(0); - PUTRGB(dst_1, py_1, 0); - PUTRGB(dst_2, py_2, 0); - - LOADCHROMA(1); - PUTRGB(dst_2, py_2, 1); - PUTRGB(dst_1, py_1, 1); - - LOADCHROMA(2); - PUTRGB(dst_1, py_1, 2); - PUTRGB(dst_2, py_2, 2); - - LOADCHROMA(3); - PUTRGB(dst_2, py_2, 3); - PUTRGB(dst_1, py_1, 3); -ENDYUV2RGBLINE(8, 0) - LOADCHROMA(0); - PUTRGB(dst_1, py_1, 0); - PUTRGB(dst_2, py_2, 0); - - LOADCHROMA(1); - PUTRGB(dst_2, py_2, 1); - PUTRGB(dst_1, py_1, 1); -ENDYUV2RGBLINE(8, 1) - LOADCHROMA(0); - PUTRGB(dst_1, py_1, 0); - PUTRGB(dst_2, py_2, 0); -ENDYUV2RGBFUNC() - -#if HAVE_BIGENDIAN -YUV2RGBFUNC(yuva2argb_c, uint32_t, 1) -#else -YUV2RGBFUNC(yuva2rgba_c, uint32_t, 1) -#endif - LOADCHROMA(0); - PUTRGBA(dst_1, py_1, pa_1, 0, 24); - PUTRGBA(dst_2, py_2, pa_2, 0, 24); - - LOADCHROMA(1); - PUTRGBA(dst_2, py_2, pa_2, 1, 24); - PUTRGBA(dst_1, py_1, pa_1, 1, 24); - - LOADCHROMA(2); - PUTRGBA(dst_1, py_1, pa_1, 2, 24); - PUTRGBA(dst_2, py_2, pa_2, 2, 24); - - LOADCHROMA(3); - PUTRGBA(dst_2, py_2, pa_2, 3, 24); - PUTRGBA(dst_1, py_1, pa_1, 3, 24); - pa_1 += 8; - pa_2 += 8; -ENDYUV2RGBLINE(8, 0) - LOADCHROMA(0); - PUTRGBA(dst_1, py_1, pa_1, 0, 24); - PUTRGBA(dst_2, py_2, pa_2, 0, 24); - - LOADCHROMA(1); - PUTRGBA(dst_2, py_2, pa_2, 1, 24); - PUTRGBA(dst_1, py_1, pa_1, 1, 24); - pa_1 += 4; - pa_2 += 4; -ENDYUV2RGBLINE(8, 1) - LOADCHROMA(0); - PUTRGBA(dst_1, py_1, pa_1, 0, 24); - PUTRGBA(dst_2, py_2, pa_2, 0, 24); -ENDYUV2RGBFUNC() - -#if HAVE_BIGENDIAN -YUV2RGBFUNC(yuva2rgba_c, uint32_t, 1) -#else -YUV2RGBFUNC(yuva2argb_c, uint32_t, 1) -#endif - LOADCHROMA(0); - PUTRGBA(dst_1, py_1, pa_1, 0, 0); - PUTRGBA(dst_2, py_2, pa_2, 0, 0); - - LOADCHROMA(1); - PUTRGBA(dst_2, py_2, pa_2, 1, 0); - PUTRGBA(dst_1, py_1, pa_1, 1, 0); - - LOADCHROMA(2); - PUTRGBA(dst_1, py_1, pa_1, 2, 0); - PUTRGBA(dst_2, py_2, pa_2, 2, 0); - - LOADCHROMA(3); - PUTRGBA(dst_2, py_2, pa_2, 3, 0); - PUTRGBA(dst_1, py_1, pa_1, 3, 0); - pa_1 += 8; - pa_2 += 8; -ENDYUV2RGBLINE(8, 0) - LOADCHROMA(0); - PUTRGBA(dst_1, py_1, pa_1, 0, 0); - PUTRGBA(dst_2, py_2, pa_2, 0, 0); - - LOADCHROMA(1); - PUTRGBA(dst_2, py_2, pa_2, 1, 0); - PUTRGBA(dst_1, py_1, pa_1, 1, 0); - pa_1 += 4; - pa_2 += 4; -ENDYUV2RGBLINE(8, 1) - LOADCHROMA(0); - PUTRGBA(dst_1, py_1, pa_1, 0, 0); - PUTRGBA(dst_2, py_2, pa_2, 0, 0); -ENDYUV2RGBFUNC() +#define YUV422FUNC(func_name, dst_type, alpha, abase, PUTFUNC, dst_delta) \ + YUV2RGBFUNC(func_name, dst_type, alpha, 1) \ + LOADCHROMA(pu_1, pv_1, 0); \ + PUTFUNC(dst_1, py_1, pa_1, 0, abase); \ + \ + LOADCHROMA(pu_2, pv_2, 0); \ + PUTFUNC(dst_2, py_2, pa_2, 0, abase); \ + \ + LOADCHROMA(pu_2, pv_2, 1); \ + PUTFUNC(dst_2, py_2, pa_2, 1, abase); \ + \ + LOADCHROMA(pu_1, pv_1, 1); \ + PUTFUNC(dst_1, py_1, pa_1, 1, abase); \ + \ + LOADCHROMA(pu_1, pv_1, 2); \ + PUTFUNC(dst_1, py_1, pa_1, 2, abase); \ + \ + LOADCHROMA(pu_2, pv_2, 2); \ + PUTFUNC(dst_2, py_2, pa_2, 2, abase); \ + \ + LOADCHROMA(pu_2, pv_2, 3); \ + PUTFUNC(dst_2, py_2, pa_2, 3, abase); \ + \ + LOADCHROMA(pu_1, pv_1, 3); \ + PUTFUNC(dst_1, py_1, pa_1, 3, abase); \ + ENDYUV2RGBLINE(dst_delta, 0, alpha, 1) \ + LOADCHROMA(pu_1, pv_1, 0); \ + PUTFUNC(dst_1, py_1, pa_1, 0, abase); \ + \ + LOADCHROMA(pu_2, pv_2, 0); \ + PUTFUNC(dst_2, py_2, pa_2, 0, abase); \ + \ + LOADCHROMA(pu_2, pv_2, 1); \ + PUTFUNC(dst_2, py_2, pa_2, 1, abase); \ + \ + LOADCHROMA(pu_1, pv_1, 1); \ + PUTFUNC(dst_1, py_1, pa_1, 1, abase); \ + ENDYUV2RGBLINE(dst_delta, 1, alpha, 1) \ + LOADCHROMA(pu_1, pv_1, 0); \ + PUTFUNC(dst_1, py_1, pa_1, 0, abase); \ + \ + LOADCHROMA(pu_2, pv_2, 0); \ + PUTFUNC(dst_2, py_2, pa_2, 0, abase); \ + ENDYUV2RGBFUNC() -YUV2RGBFUNC(yuv2rgb_c_24_rgb, uint8_t, 0) - LOADCHROMA(0); - PUTRGB24(dst_1, py_1, 0); - PUTRGB24(dst_2, py_2, 0); - - LOADCHROMA(1); - PUTRGB24(dst_2, py_2, 1); - PUTRGB24(dst_1, py_1, 1); - - LOADCHROMA(2); - PUTRGB24(dst_1, py_1, 2); - PUTRGB24(dst_2, py_2, 2); - - LOADCHROMA(3); - PUTRGB24(dst_2, py_2, 3); - PUTRGB24(dst_1, py_1, 3); -ENDYUV2RGBLINE(24, 0) - LOADCHROMA(0); - PUTRGB24(dst_1, py_1, 0); - PUTRGB24(dst_2, py_2, 0); - - LOADCHROMA(1); - PUTRGB24(dst_2, py_2, 1); - PUTRGB24(dst_1, py_1, 1); -ENDYUV2RGBLINE(24, 1) - LOADCHROMA(0); - PUTRGB24(dst_1, py_1, 0); - PUTRGB24(dst_2, py_2, 0); -ENDYUV2RGBFUNC() +#define YUV420FUNC_DITHER(func_name, dst_type, LOADDITHER, PUTFUNC, dst_delta) \ + YUV2RGBFUNC(func_name, dst_type, 0, 0) \ + LOADDITHER \ + \ + LOADCHROMA(pu_1, pv_1, 0); \ + PUTFUNC(dst_1, py_1, 0, 0); \ + PUTFUNC(dst_2, py_2, 0, 0 + 8); \ + \ + LOADCHROMA(pu_1, pv_1, 1); \ + PUTFUNC(dst_2, py_2, 1, 2 + 8); \ + PUTFUNC(dst_1, py_1, 1, 2); \ + \ + LOADCHROMA(pu_1, pv_1, 2); \ + PUTFUNC(dst_1, py_1, 2, 4); \ + PUTFUNC(dst_2, py_2, 2, 4 + 8); \ + \ + LOADCHROMA(pu_1, pv_1, 3); \ + PUTFUNC(dst_2, py_2, 3, 6 + 8); \ + PUTFUNC(dst_1, py_1, 3, 6); \ + ENDYUV2RGBLINE(dst_delta, 0, 0, 0) \ + LOADDITHER \ + \ + LOADCHROMA(pu_1, pv_1, 0); \ + PUTFUNC(dst_1, py_1, 0, 0); \ + PUTFUNC(dst_2, py_2, 0, 0 + 8); \ + \ + LOADCHROMA(pu_1, pv_1, 1); \ + PUTFUNC(dst_2, py_2, 1, 2 + 8); \ + PUTFUNC(dst_1, py_1, 1, 2); \ + ENDYUV2RGBLINE(dst_delta, 1, 0, 0) \ + LOADDITHER \ + \ + LOADCHROMA(pu_1, pv_1, 0); \ + PUTFUNC(dst_1, py_1, 0, 0); \ + PUTFUNC(dst_2, py_2, 0, 0 + 8); \ + ENDYUV2RGBFUNC() -// only trivial mods from yuv2rgb_c_24_rgb -YUV2RGBFUNC(yuv2rgb_c_24_bgr, uint8_t, 0) - LOADCHROMA(0); - PUTBGR24(dst_1, py_1, 0); - PUTBGR24(dst_2, py_2, 0); - - LOADCHROMA(1); - PUTBGR24(dst_2, py_2, 1); - PUTBGR24(dst_1, py_1, 1); - - LOADCHROMA(2); - PUTBGR24(dst_1, py_1, 2); - PUTBGR24(dst_2, py_2, 2); - - LOADCHROMA(3); - PUTBGR24(dst_2, py_2, 3); - PUTBGR24(dst_1, py_1, 3); -ENDYUV2RGBLINE(24, 0) - LOADCHROMA(0); - PUTBGR24(dst_1, py_1, 0); - PUTBGR24(dst_2, py_2, 0); - - LOADCHROMA(1); - PUTBGR24(dst_2, py_2, 1); - PUTBGR24(dst_1, py_1, 1); -ENDYUV2RGBLINE(24, 1) - LOADCHROMA(0); - PUTBGR24(dst_1, py_1, 0); - PUTBGR24(dst_2, py_2, 0); -ENDYUV2RGBFUNC() +#define YUV422FUNC_DITHER(func_name, dst_type, LOADDITHER, PUTFUNC, dst_delta) \ + YUV2RGBFUNC(func_name, dst_type, 0, 1) \ + LOADDITHER \ + \ + LOADCHROMA(pu_1, pv_1, 0); \ + PUTFUNC(dst_1, py_1, 0, 0); \ + \ + LOADCHROMA(pu_2, pv_2, 0); \ + PUTFUNC(dst_2, py_2, 0, 0 + 8); \ + \ + LOADCHROMA(pu_2, pv_2, 1); \ + PUTFUNC(dst_2, py_2, 1, 2 + 8); \ + \ + LOADCHROMA(pu_1, pv_1, 1); \ + PUTFUNC(dst_1, py_1, 1, 2); \ + \ + LOADCHROMA(pu_1, pv_1, 2); \ + PUTFUNC(dst_1, py_1, 2, 4); \ + \ + LOADCHROMA(pu_2, pv_2, 2); \ + PUTFUNC(dst_2, py_2, 2, 4 + 8); \ + \ + LOADCHROMA(pu_2, pv_2, 3); \ + PUTFUNC(dst_2, py_2, 3, 6 + 8); \ + \ + LOADCHROMA(pu_1, pv_1, 3); \ + PUTFUNC(dst_1, py_1, 3, 6); \ + ENDYUV2RGBLINE(dst_delta, 0, 0, 1) \ + LOADDITHER \ + \ + LOADCHROMA(pu_1, pv_1, 0); \ + PUTFUNC(dst_1, py_1, 0, 0); \ + \ + LOADCHROMA(pu_2, pv_2, 0); \ + PUTFUNC(dst_2, py_2, 0, 0 + 8); \ + \ + LOADCHROMA(pu_2, pv_2, 1); \ + PUTFUNC(dst_2, py_2, 1, 2 + 8); \ + \ + LOADCHROMA(pu_1, pv_1, 1); \ + PUTFUNC(dst_1, py_1, 1, 2); \ + ENDYUV2RGBLINE(dst_delta, 1, 0, 1) \ + LOADDITHER \ + \ + LOADCHROMA(pu_1, pv_1, 0); \ + PUTFUNC(dst_1, py_1, 0, 0); \ + \ + LOADCHROMA(pu_2, pv_2, 0); \ + PUTFUNC(dst_2, py_2, 0, 0 + 8); \ + ENDYUV2RGBFUNC() -YUV2RGBFUNC(yuv2rgb_c_16_ordered_dither, uint16_t, 0) - const uint8_t *d16 = ff_dither_2x2_8[y & 1]; - const uint8_t *e16 = ff_dither_2x2_4[y & 1]; +#define LOADDITHER16 \ + const uint8_t *d16 = ff_dither_2x2_8[y & 1]; \ + const uint8_t *e16 = ff_dither_2x2_4[y & 1]; \ const uint8_t *f16 = ff_dither_2x2_8[(y & 1)^1]; #define PUTRGB16(dst, src, i, o) \ @@ -417,25 +359,9 @@ YUV2RGBFUNC(yuv2rgb_c_16_ordered_dither, uint16_t, 0) dst[2 * i + 1] = r[Y + d16[1 + o]] + \ g[Y + e16[1 + o]] + \ b[Y + f16[1 + o]]; - LOADCHROMA(0); - PUTRGB16(dst_1, py_1, 0, 0); - PUTRGB16(dst_2, py_2, 0, 0 + 8); - LOADCHROMA(1); - PUTRGB16(dst_2, py_2, 1, 2 + 8); - PUTRGB16(dst_1, py_1, 1, 2); - - LOADCHROMA(2); - PUTRGB16(dst_1, py_1, 2, 4); - PUTRGB16(dst_2, py_2, 2, 4 + 8); - - LOADCHROMA(3); - PUTRGB16(dst_2, py_2, 3, 6 + 8); - PUTRGB16(dst_1, py_1, 3, 6); -CLOSEYUV2RGBFUNC(8) - -YUV2RGBFUNC(yuv2rgb_c_15_ordered_dither, uint16_t, 0) - const uint8_t *d16 = ff_dither_2x2_8[y & 1]; +#define LOADDITHER15 \ + const uint8_t *d16 = ff_dither_2x2_8[y & 1]; \ const uint8_t *e16 = ff_dither_2x2_8[(y & 1)^1]; #define PUTRGB15(dst, src, i, o) \ @@ -447,25 +373,8 @@ YUV2RGBFUNC(yuv2rgb_c_15_ordered_dither, uint16_t, 0) dst[2 * i + 1] = r[Y + d16[1 + o]] + \ g[Y + d16[0 + o]] + \ b[Y + e16[1 + o]]; - LOADCHROMA(0); - PUTRGB15(dst_1, py_1, 0, 0); - PUTRGB15(dst_2, py_2, 0, 0 + 8); - - LOADCHROMA(1); - PUTRGB15(dst_2, py_2, 1, 2 + 8); - PUTRGB15(dst_1, py_1, 1, 2); - - LOADCHROMA(2); - PUTRGB15(dst_1, py_1, 2, 4); - PUTRGB15(dst_2, py_2, 2, 4 + 8); - - LOADCHROMA(3); - PUTRGB15(dst_2, py_2, 3, 6 + 8); - PUTRGB15(dst_1, py_1, 3, 6); -CLOSEYUV2RGBFUNC(8) -// r, g, b, dst_1, dst_2 -YUV2RGBFUNC(yuv2rgb_c_12_ordered_dither, uint16_t, 0) +#define LOADDITHER12 \ const uint8_t *d16 = ff_dither_4x4_16[y & 3]; #define PUTRGB12(dst, src, i, o) \ @@ -478,26 +387,8 @@ YUV2RGBFUNC(yuv2rgb_c_12_ordered_dither, uint16_t, 0) g[Y + d16[1 + o]] + \ b[Y + d16[1 + o]]; - LOADCHROMA(0); - PUTRGB12(dst_1, py_1, 0, 0); - PUTRGB12(dst_2, py_2, 0, 0 + 8); - - LOADCHROMA(1); - PUTRGB12(dst_2, py_2, 1, 2 + 8); - PUTRGB12(dst_1, py_1, 1, 2); - - LOADCHROMA(2); - PUTRGB12(dst_1, py_1, 2, 4); - PUTRGB12(dst_2, py_2, 2, 4 + 8); - - LOADCHROMA(3); - PUTRGB12(dst_2, py_2, 3, 6 + 8); - PUTRGB12(dst_1, py_1, 3, 6); -CLOSEYUV2RGBFUNC(8) - -// r, g, b, dst_1, dst_2 -YUV2RGBFUNC(yuv2rgb_c_8_ordered_dither, uint8_t, 0) - const uint8_t *d32 = ff_dither_8x8_32[yd & 7]; +#define LOADDITHER8 \ + const uint8_t *d32 = ff_dither_8x8_32[yd & 7]; \ const uint8_t *d64 = ff_dither_8x8_73[yd & 7]; #define PUTRGB8(dst, src, i, o) \ @@ -510,46 +401,9 @@ YUV2RGBFUNC(yuv2rgb_c_8_ordered_dither, uint8_t, 0) g[Y + d32[1 + o]] + \ b[Y + d64[1 + o]]; - LOADCHROMA(0); - PUTRGB8(dst_1, py_1, 0, 0); - PUTRGB8(dst_2, py_2, 0, 0 + 8); - - LOADCHROMA(1); - PUTRGB8(dst_2, py_2, 1, 2 + 8); - PUTRGB8(dst_1, py_1, 1, 2); - - LOADCHROMA(2); - PUTRGB8(dst_1, py_1, 2, 4); - PUTRGB8(dst_2, py_2, 2, 4 + 8); - - LOADCHROMA(3); - PUTRGB8(dst_2, py_2, 3, 6 + 8); - PUTRGB8(dst_1, py_1, 3, 6); - -ENDYUV2RGBLINE(8, 0) - const uint8_t *d32 = ff_dither_8x8_32[yd & 7]; - const uint8_t *d64 = ff_dither_8x8_73[yd & 7]; - LOADCHROMA(0); - PUTRGB8(dst_1, py_1, 0, 0); - PUTRGB8(dst_2, py_2, 0, 0 + 8); - - LOADCHROMA(1); - PUTRGB8(dst_2, py_2, 1, 2 + 8); - PUTRGB8(dst_1, py_1, 1, 2); - -ENDYUV2RGBLINE(8, 1) - const uint8_t *d32 = ff_dither_8x8_32[yd & 7]; - const uint8_t *d64 = ff_dither_8x8_73[yd & 7]; - LOADCHROMA(0); - PUTRGB8(dst_1, py_1, 0, 0); - PUTRGB8(dst_2, py_2, 0, 0 + 8); - -ENDYUV2RGBFUNC() - - -YUV2RGBFUNC(yuv2rgb_c_4_ordered_dither, uint8_t, 0) - const uint8_t * d64 = ff_dither_8x8_73[yd & 7]; - const uint8_t *d128 = ff_dither_8x8_220[yd & 7]; +#define LOADDITHER4D \ + const uint8_t * d64 = ff_dither_8x8_73[yd & 7]; \ + const uint8_t *d128 = ff_dither_8x8_220[yd & 7]; \ int acc; #define PUTRGB4D(dst, src, i, o) \ @@ -563,45 +417,8 @@ YUV2RGBFUNC(yuv2rgb_c_4_ordered_dither, uint8_t, 0) b[Y + d128[1 + o]]) << 4; \ dst[i] = acc; - LOADCHROMA(0); - PUTRGB4D(dst_1, py_1, 0, 0); - PUTRGB4D(dst_2, py_2, 0, 0 + 8); - - LOADCHROMA(1); - PUTRGB4D(dst_2, py_2, 1, 2 + 8); - PUTRGB4D(dst_1, py_1, 1, 2); - - LOADCHROMA(2); - PUTRGB4D(dst_1, py_1, 2, 4); - PUTRGB4D(dst_2, py_2, 2, 4 + 8); - - LOADCHROMA(3); - PUTRGB4D(dst_2, py_2, 3, 6 + 8); - PUTRGB4D(dst_1, py_1, 3, 6); - -ENDYUV2RGBLINE(4, 0) - const uint8_t * d64 = ff_dither_8x8_73[yd & 7]; - const uint8_t *d128 = ff_dither_8x8_220[yd & 7]; - int acc; - LOADCHROMA(0); - PUTRGB4D(dst_1, py_1, 0, 0); - PUTRGB4D(dst_2, py_2, 0, 0 + 8); - - LOADCHROMA(1); - PUTRGB4D(dst_2, py_2, 1, 2 + 8); - PUTRGB4D(dst_1, py_1, 1, 2); - -ENDYUV2RGBLINE(4, 1) - const uint8_t * d64 = ff_dither_8x8_73[yd & 7]; - const uint8_t *d128 = ff_dither_8x8_220[yd & 7]; - int acc; - LOADCHROMA(0); - PUTRGB4D(dst_1, py_1, 0, 0); - PUTRGB4D(dst_2, py_2, 0, 0 + 8); -ENDYUV2RGBFUNC() - -YUV2RGBFUNC(yuv2rgb_c_4b_ordered_dither, uint8_t, 0) - const uint8_t *d64 = ff_dither_8x8_73[yd & 7]; +#define LOADDITHER4DB \ + const uint8_t *d64 = ff_dither_8x8_73[yd & 7]; \ const uint8_t *d128 = ff_dither_8x8_220[yd & 7]; #define PUTRGB4DB(dst, src, i, o) \ @@ -614,40 +431,7 @@ YUV2RGBFUNC(yuv2rgb_c_4b_ordered_dither, uint8_t, 0) g[Y + d64[1 + o]] + \ b[Y + d128[1 + o]]; - LOADCHROMA(0); - PUTRGB4DB(dst_1, py_1, 0, 0); - PUTRGB4DB(dst_2, py_2, 0, 0 + 8); - - LOADCHROMA(1); - PUTRGB4DB(dst_2, py_2, 1, 2 + 8); - PUTRGB4DB(dst_1, py_1, 1, 2); - - LOADCHROMA(2); - PUTRGB4DB(dst_1, py_1, 2, 4); - PUTRGB4DB(dst_2, py_2, 2, 4 + 8); - - LOADCHROMA(3); - PUTRGB4DB(dst_2, py_2, 3, 6 + 8); - PUTRGB4DB(dst_1, py_1, 3, 6); -ENDYUV2RGBLINE(8, 0) - const uint8_t *d64 = ff_dither_8x8_73[yd & 7]; - const uint8_t *d128 = ff_dither_8x8_220[yd & 7]; - LOADCHROMA(0); - PUTRGB4DB(dst_1, py_1, 0, 0); - PUTRGB4DB(dst_2, py_2, 0, 0 + 8); - - LOADCHROMA(1); - PUTRGB4DB(dst_2, py_2, 1, 2 + 8); - PUTRGB4DB(dst_1, py_1, 1, 2); -ENDYUV2RGBLINE(8, 1) - const uint8_t *d64 = ff_dither_8x8_73[yd & 7]; - const uint8_t *d128 = ff_dither_8x8_220[yd & 7]; - LOADCHROMA(0); - PUTRGB4DB(dst_1, py_1, 0, 0); - PUTRGB4DB(dst_2, py_2, 0, 0 + 8); -ENDYUV2RGBFUNC() - -YUV2RGBFUNC(yuv2rgb_c_1_ordered_dither, uint8_t, 0) +YUV2RGBFUNC(yuv2rgb_c_1_ordered_dither, uint8_t, 0, 0) const uint8_t *d128 = ff_dither_8x8_220[yd & 7]; char out_1 = 0, out_2 = 0; g = c->table_gU[128 + YUVRGB_TABLE_HEADROOM] + c->table_gV[128 + YUVRGB_TABLE_HEADROOM]; @@ -672,7 +456,82 @@ YUV2RGBFUNC(yuv2rgb_c_1_ordered_dither, uint8_t, 0) dst_1[0] = out_1; dst_2[0] = out_2; -CLOSEYUV2RGBFUNC(1) + + py_1 += 8; + py_2 += 8; + dst_1 += 1; + dst_2 += 1; + } + if (c->dstW & 7) { + int av_unused Y, U, V; + int pixels_left = c->dstW & 7; + const uint8_t *d128 = ff_dither_8x8_220[yd & 7]; + char out_1 = 0, out_2 = 0; + g = c->table_gU[128 + YUVRGB_TABLE_HEADROOM] + c->table_gV[128 + YUVRGB_TABLE_HEADROOM]; + +#define PUTRGB1_OR00(out, src, i, o) \ + if (pixels_left) { \ + PUTRGB1(out, src, i, o) \ + pixels_left--; \ + } else { \ + out <<= 2; \ + } + + PUTRGB1_OR00(out_1, py_1, 0, 0); + PUTRGB1_OR00(out_2, py_2, 0, 0 + 8); + + PUTRGB1_OR00(out_2, py_2, 1, 2 + 8); + PUTRGB1_OR00(out_1, py_1, 1, 2); + + PUTRGB1_OR00(out_1, py_1, 2, 4); + PUTRGB1_OR00(out_2, py_2, 2, 4 + 8); + + PUTRGB1_OR00(out_2, py_2, 3, 6 + 8); + PUTRGB1_OR00(out_1, py_1, 3, 6); + + dst_1[0] = out_1; + dst_2[0] = out_2; +ENDYUV2RGBFUNC() + +// YUV420 +YUV420FUNC(yuv2rgb_c_48, uint8_t, 0, 0, PUTRGB48, 48) +YUV420FUNC(yuv2rgb_c_bgr48, uint8_t, 0, 0, PUTBGR48, 48) +YUV420FUNC(yuv2rgb_c_32, uint32_t, 0, 0, PUTRGB, 8) +#if HAVE_BIGENDIAN +YUV420FUNC(yuva2argb_c, uint32_t, 1, 24, PUTRGBA, 8) +YUV420FUNC(yuva2rgba_c, uint32_t, 1, 0, PUTRGBA, 8) +#else +YUV420FUNC(yuva2rgba_c, uint32_t, 1, 24, PUTRGBA, 8) +YUV420FUNC(yuva2argb_c, uint32_t, 1, 0, PUTRGBA, 8) +#endif +YUV420FUNC(yuv2rgb_c_24_rgb, uint8_t, 0, 0, PUTRGB24, 24) +YUV420FUNC(yuv2rgb_c_24_bgr, uint8_t, 0, 0, PUTBGR24, 24) +YUV420FUNC_DITHER(yuv2rgb_c_16_ordered_dither, uint16_t, LOADDITHER16, PUTRGB16, 8) +YUV420FUNC_DITHER(yuv2rgb_c_15_ordered_dither, uint16_t, LOADDITHER15, PUTRGB15, 8) +YUV420FUNC_DITHER(yuv2rgb_c_12_ordered_dither, uint16_t, LOADDITHER12, PUTRGB12, 8) +YUV420FUNC_DITHER(yuv2rgb_c_8_ordered_dither, uint8_t, LOADDITHER8, PUTRGB8, 8) +YUV420FUNC_DITHER(yuv2rgb_c_4_ordered_dither, uint8_t, LOADDITHER4D, PUTRGB4D, 4) +YUV420FUNC_DITHER(yuv2rgb_c_4b_ordered_dither, uint8_t, LOADDITHER4DB, PUTRGB4DB, 8) + +// YUV422 +YUV422FUNC(yuv422p_rgb48_c, uint8_t, 0, 0, PUTRGB48, 48) +YUV422FUNC(yuv422p_bgr48_c, uint8_t, 0, 0, PUTBGR48, 48) +YUV422FUNC(yuv422p_rgb32_c, uint32_t, 0, 0, PUTRGB, 8) +#if HAVE_BIGENDIAN +YUV422FUNC(yuva422p_argb_c, uint32_t, 1, 24, PUTRGBA, 8) +YUV422FUNC(yuva422p_rgba_c, uint32_t, 1, 0, PUTRGBA, 8) +#else +YUV422FUNC(yuva422p_rgba_c, uint32_t, 1, 24, PUTRGBA, 8) +YUV422FUNC(yuva422p_argb_c, uint32_t, 1, 0, PUTRGBA, 8) +#endif +YUV422FUNC(yuv422p_rgb24_c, uint8_t, 0, 0, PUTRGB24, 24) +YUV422FUNC(yuv422p_bgr24_c, uint8_t, 0, 0, PUTBGR24, 24) +YUV422FUNC_DITHER(yuv422p_bgr16, uint16_t, LOADDITHER16, PUTRGB16, 8) +YUV422FUNC_DITHER(yuv422p_bgr15, uint16_t, LOADDITHER15, PUTRGB15, 8) +YUV422FUNC_DITHER(yuv422p_bgr12, uint16_t, LOADDITHER12, PUTRGB12, 8) +YUV422FUNC_DITHER(yuv422p_bgr8, uint8_t, LOADDITHER8, PUTRGB8, 8) +YUV422FUNC_DITHER(yuv422p_bgr4, uint8_t, LOADDITHER4D, PUTRGB4D, 4) +YUV422FUNC_DITHER(yuv422p_bgr4_byte, uint8_t, LOADDITHER4DB, PUTRGB4DB, 8) SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c) { @@ -693,44 +552,86 @@ SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c) "No accelerated colorspace conversion found from %s to %s.\n", av_get_pix_fmt_name(c->srcFormat), av_get_pix_fmt_name(c->dstFormat)); - switch (c->dstFormat) { - case AV_PIX_FMT_BGR48BE: - case AV_PIX_FMT_BGR48LE: - return yuv2rgb_c_bgr48; - case AV_PIX_FMT_RGB48BE: - case AV_PIX_FMT_RGB48LE: - return yuv2rgb_c_48; - case AV_PIX_FMT_ARGB: - case AV_PIX_FMT_ABGR: - if (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat)) - return yuva2argb_c; - case AV_PIX_FMT_RGBA: - case AV_PIX_FMT_BGRA: - return (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat)) ? yuva2rgba_c : yuv2rgb_c_32; - case AV_PIX_FMT_RGB24: - return yuv2rgb_c_24_rgb; - case AV_PIX_FMT_BGR24: - return yuv2rgb_c_24_bgr; - case AV_PIX_FMT_RGB565: - case AV_PIX_FMT_BGR565: - return yuv2rgb_c_16_ordered_dither; - case AV_PIX_FMT_RGB555: - case AV_PIX_FMT_BGR555: - return yuv2rgb_c_15_ordered_dither; - case AV_PIX_FMT_RGB444: - case AV_PIX_FMT_BGR444: - return yuv2rgb_c_12_ordered_dither; - case AV_PIX_FMT_RGB8: - case AV_PIX_FMT_BGR8: - return yuv2rgb_c_8_ordered_dither; - case AV_PIX_FMT_RGB4: - case AV_PIX_FMT_BGR4: - return yuv2rgb_c_4_ordered_dither; - case AV_PIX_FMT_RGB4_BYTE: - case AV_PIX_FMT_BGR4_BYTE: - return yuv2rgb_c_4b_ordered_dither; - case AV_PIX_FMT_MONOBLACK: - return yuv2rgb_c_1_ordered_dither; + if (c->srcFormat == AV_PIX_FMT_YUV422P) { + switch (c->dstFormat) { + case AV_PIX_FMT_BGR48BE: + case AV_PIX_FMT_BGR48LE: + return yuv422p_bgr48_c; + case AV_PIX_FMT_RGB48BE: + case AV_PIX_FMT_RGB48LE: + return yuv422p_rgb48_c; + case AV_PIX_FMT_ARGB: + case AV_PIX_FMT_ABGR: + if (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat)) + return yuva422p_argb_c; + case AV_PIX_FMT_RGBA: + case AV_PIX_FMT_BGRA: + return (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat)) ? yuva422p_rgba_c : yuv422p_rgb32_c; + case AV_PIX_FMT_RGB24: + return yuv422p_rgb24_c; + case AV_PIX_FMT_BGR24: + return yuv422p_bgr24_c; + case AV_PIX_FMT_RGB565: + case AV_PIX_FMT_BGR565: + return yuv422p_bgr16; + case AV_PIX_FMT_RGB555: + case AV_PIX_FMT_BGR555: + return yuv422p_bgr15; + case AV_PIX_FMT_RGB444: + case AV_PIX_FMT_BGR444: + return yuv422p_bgr12; + case AV_PIX_FMT_RGB8: + case AV_PIX_FMT_BGR8: + return yuv422p_bgr8; + case AV_PIX_FMT_RGB4: + case AV_PIX_FMT_BGR4: + return yuv422p_bgr4; + case AV_PIX_FMT_RGB4_BYTE: + case AV_PIX_FMT_BGR4_BYTE: + return yuv422p_bgr4_byte; + case AV_PIX_FMT_MONOBLACK: + return yuv2rgb_c_1_ordered_dither; + } + } else { + switch (c->dstFormat) { + case AV_PIX_FMT_BGR48BE: + case AV_PIX_FMT_BGR48LE: + return yuv2rgb_c_bgr48; + case AV_PIX_FMT_RGB48BE: + case AV_PIX_FMT_RGB48LE: + return yuv2rgb_c_48; + case AV_PIX_FMT_ARGB: + case AV_PIX_FMT_ABGR: + if (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat)) + return yuva2argb_c; + case AV_PIX_FMT_RGBA: + case AV_PIX_FMT_BGRA: + return (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat)) ? yuva2rgba_c : yuv2rgb_c_32; + case AV_PIX_FMT_RGB24: + return yuv2rgb_c_24_rgb; + case AV_PIX_FMT_BGR24: + return yuv2rgb_c_24_bgr; + case AV_PIX_FMT_RGB565: + case AV_PIX_FMT_BGR565: + return yuv2rgb_c_16_ordered_dither; + case AV_PIX_FMT_RGB555: + case AV_PIX_FMT_BGR555: + return yuv2rgb_c_15_ordered_dither; + case AV_PIX_FMT_RGB444: + case AV_PIX_FMT_BGR444: + return yuv2rgb_c_12_ordered_dither; + case AV_PIX_FMT_RGB8: + case AV_PIX_FMT_BGR8: + return yuv2rgb_c_8_ordered_dither; + case AV_PIX_FMT_RGB4: + case AV_PIX_FMT_BGR4: + return yuv2rgb_c_4_ordered_dither; + case AV_PIX_FMT_RGB4_BYTE: + case AV_PIX_FMT_BGR4_BYTE: + return yuv2rgb_c_4b_ordered_dither; + case AV_PIX_FMT_MONOBLACK: + return yuv2rgb_c_1_ordered_dither; + } } return NULL; }