diff --git a/src/core/codestream/ojph_codeblock_fun.cpp b/src/core/codestream/ojph_codeblock_fun.cpp index 08d8d73..565744d 100644 --- a/src/core/codestream/ojph_codeblock_fun.cpp +++ b/src/core/codestream/ojph_codeblock_fun.cpp @@ -158,6 +158,8 @@ namespace ojph { tx_from_cb64 = NULL; } encode_cb64 = ojph_encode_codeblock64; + bool result = initialize_block_encoder_tables(); + assert(result); ojph_unused(result); #ifndef OJPH_DISABLE_SIMD diff --git a/src/core/coding/ojph_block_decoder32.cpp b/src/core/coding/ojph_block_decoder32.cpp index f54c77e..daf2312 100644 --- a/src/core/coding/ojph_block_decoder32.cpp +++ b/src/core/coding/ojph_block_decoder32.cpp @@ -578,7 +578,7 @@ namespace ojph { /** @brief State structure for reading and unstuffing of forward-growing * bitstreams; these are: MagSgn and SPP bitstreams */ - struct frwd_struct { + struct frwd_struct32 { const ui8* data; //! static inline - void frwd_read(frwd_struct *msp) + void frwd_read(frwd_struct32 *msp) { assert(msp->bits <= 32); // assert that there is a space for 32 bits @@ -653,17 +653,17 @@ namespace ojph { } //************************************************************************/ - /** @brief Initialize frwd_struct struct and reads some bytes + /** @brief Initialize frwd_struct32 struct and reads some bytes * * @tparam X is the value fed in when the bitstream is exhausted. * See frwd_read regarding the template - * @param [in] msp is a pointer to frwd_struct + * @param [in] msp is a pointer to frwd_struct32 * @param [in] data is a pointer to the start of data * @param [in] size is the number of byte in the bitstream */ template static inline - void frwd_init(frwd_struct *msp, const ui8* data, int size) + void frwd_init(frwd_struct32 *msp, const ui8* data, int size) { msp->data = data; msp->tmp = 0; @@ -689,13 +689,13 @@ namespace ojph { } //************************************************************************/ - /** @brief Consume num_bits bits from the bitstream of frwd_struct + /** @brief Consume num_bits bits from the bitstream of frwd_struct32 * - * @param [in] msp is a pointer to frwd_struct + * @param [in] msp is a pointer to frwd_struct32 * @param [in] num_bits is the number of bit to consume */ static inline - void frwd_advance(frwd_struct *msp, ui32 num_bits) + void frwd_advance(frwd_struct32 *msp, ui32 num_bits) { assert(num_bits <= msp->bits); msp->tmp >>= num_bits; // consume num_bits @@ -703,15 +703,15 @@ namespace ojph { } //************************************************************************/ - /** @brief Fetches 32 bits from the frwd_struct bitstream + /** @brief Fetches 32 bits from the frwd_struct32 bitstream * * @tparam X is the value fed in when the bitstream is exhausted. * See frwd_read regarding the template - * @param [in] msp is a pointer to frwd_struct + * @param [in] msp is a pointer to frwd_struct32 */ template static inline - ui32 frwd_fetch(frwd_struct *msp) + ui32 frwd_fetch(frwd_struct32 *msp) { if (msp->bits < 32) { @@ -1099,7 +1099,7 @@ namespace ojph { const int v_n_size = 512 + 4; ui32 v_n_scratch[v_n_size] = {0}; // 2+ kB - frwd_struct magsgn; + frwd_struct32 magsgn; frwd_init<0xFF>(&magsgn, coded_data, lcup - scup); ui16 *sp = scratch; @@ -1368,7 +1368,7 @@ namespace ojph { // We add an extra 8 entries, just in case we need more ui16 prev_row_sig[256 + 8] = {0}; // 528 Bytes - frwd_struct sigprop; + frwd_struct32 sigprop; frwd_init<0>(&sigprop, coded_data + lengths1, (int)lengths2); for (ui32 y = 0; y < height; y += 4) diff --git a/src/core/coding/ojph_block_decoder64.cpp b/src/core/coding/ojph_block_decoder64.cpp index 8801735..bce5b9e 100644 --- a/src/core/coding/ojph_block_decoder64.cpp +++ b/src/core/coding/ojph_block_decoder64.cpp @@ -530,7 +530,7 @@ namespace ojph { /** @brief State structure for reading and unstuffing of forward-growing * bitstreams; these are: MagSgn and SPP bitstreams */ - struct frwd_struct { + struct frwd_struct64 { const ui8* data; //! static inline - void frwd_read(frwd_struct *msp) + void frwd_read(frwd_struct64 *msp) { assert(msp->bits <= 32); // assert that there is a space for 32 bits @@ -617,12 +617,12 @@ namespace ojph { * MSB of the next byte is set 0 and must be ignored during decoding. * * @tparam X is the value fed in when the bitstream is exhausted - * @param [in] msp is a pointer to frwd_struct structure + * @param [in] msp is a pointer to frwd_struct64 structure * */ template static inline - void frwd_read8(frwd_struct *msp) + void frwd_read8(frwd_struct64 *msp) { ui8 val = X; if (msp->size > 0) { @@ -640,17 +640,17 @@ namespace ojph { } //************************************************************************/ - /** @brief Initialize frwd_struct struct and reads some bytes + /** @brief Initialize frwd_struct64 struct and reads some bytes * * @tparam X is the value fed in when the bitstream is exhausted. * See frwd_read regarding the template - * @param [in] msp is a pointer to frwd_struct + * @param [in] msp is a pointer to frwd_struct64 * @param [in] data is a pointer to the start of data * @param [in] size is the number of byte in the bitstream */ template static inline - void frwd_init(frwd_struct *msp, const ui8* data, int size) + void frwd_init(frwd_struct64 *msp, const ui8* data, int size) { msp->data = data; msp->tmp = 0; @@ -676,17 +676,17 @@ namespace ojph { } //************************************************************************/ - /** @brief Initialize frwd_struct struct and reads some bytes + /** @brief Initialize frwd_struct64 struct and reads some bytes * * @tparam X is the value fed in when the bitstream is exhausted. * See frwd_read regarding the template - * @param [in] msp is a pointer to frwd_struct + * @param [in] msp is a pointer to frwd_struct64 * @param [in] data is a pointer to the start of data * @param [in] size is the number of byte in the bitstream */ template static inline - void frwd_init8(frwd_struct *msp, const ui8* data, int size) + void frwd_init8(frwd_struct64 *msp, const ui8* data, int size) { msp->data = data; msp->tmp = 0; @@ -697,13 +697,13 @@ namespace ojph { } //************************************************************************/ - /** @brief Consume num_bits bits from the bitstream of frwd_struct + /** @brief Consume num_bits bits from the bitstream of frwd_struct64 * - * @param [in] msp is a pointer to frwd_struct + * @param [in] msp is a pointer to frwd_struct64 * @param [in] num_bits is the number of bit to consume */ static inline - void frwd_advance(frwd_struct *msp, ui32 num_bits) + void frwd_advance(frwd_struct64 *msp, ui32 num_bits) { assert(num_bits <= msp->bits); msp->tmp >>= num_bits; // consume num_bits @@ -711,15 +711,15 @@ namespace ojph { } //************************************************************************/ - /** @brief Fetches 32 bits from the frwd_struct bitstream + /** @brief Fetches 32 bits from the frwd_struct64 bitstream * * @tparam X is the value fed in when the bitstream is exhausted. * See frwd_read regarding the template - * @param [in] msp is a pointer to frwd_struct + * @param [in] msp is a pointer to frwd_struct64 */ template static inline - ui32 frwd_fetch(frwd_struct *msp) + ui32 frwd_fetch(frwd_struct64 *msp) { if (msp->bits < 32) { @@ -731,15 +731,15 @@ namespace ojph { } //************************************************************************/ - /** @brief Fetches up to 64 bits from the frwd_struct bitstream + /** @brief Fetches up to 64 bits from the frwd_struct64 bitstream * * @tparam X is the value fed in when the bitstream is exhausted. * See frwd_read regarding the template - * @param [in] msp is a pointer to frwd_struct + * @param [in] msp is a pointer to frwd_struct64 */ template static inline - ui64 frwd_fetch64(frwd_struct *msp) + ui64 frwd_fetch64(frwd_struct64 *msp) { while (msp->bits <= 56) frwd_read8(msp); @@ -1147,7 +1147,7 @@ namespace ojph { const int v_n_size = 512 + 4; ui64 v_n_scratch[v_n_size] = {0}; // 4+ kB - frwd_struct magsgn; + frwd_struct64 magsgn; frwd_init8<0xFF>(&magsgn, coded_data, lcup - scup); const ui16 *sp = scratch; @@ -1415,7 +1415,7 @@ namespace ojph { // We add an extra 8 entries, just in case we need more ui16 prev_row_sig[256 + 8] = {0}; // 528 Bytes - frwd_struct sigprop; + frwd_struct64 sigprop; frwd_init<0>(&sigprop, coded_data + lengths1, (int)lengths2); for (ui32 y = 0; y < height; y += 4) diff --git a/src/core/coding/ojph_block_decoder_avx2.cpp b/src/core/coding/ojph_block_decoder_avx2.cpp index 156ba1a..ee47d03 100644 --- a/src/core/coding/ojph_block_decoder_avx2.cpp +++ b/src/core/coding/ojph_block_decoder_avx2.cpp @@ -582,7 +582,7 @@ namespace ojph { /** @brief State structure for reading and unstuffing of forward-growing * bitstreams; these are: MagSgn and SPP bitstreams */ - struct frwd_struct { + struct frwd_struct_avx2 { const ui8* data; //! static inline - void frwd_read(frwd_struct *msp) + void frwd_read(frwd_struct_avx2 *msp) { assert(msp->bits <= 128); @@ -689,17 +689,17 @@ namespace ojph { } //************************************************************************/ - /** @brief Initialize frwd_struct struct and reads some bytes + /** @brief Initialize frwd_struct_avx2 struct and reads some bytes * * @tparam X is the value fed in when the bitstream is exhausted. * See frwd_read regarding the template - * @param [in] msp is a pointer to frwd_struct + * @param [in] msp is a pointer to frwd_struct_avx2 * @param [in] data is a pointer to the start of data * @param [in] size is the number of byte in the bitstream */ template static inline - void frwd_init(frwd_struct *msp, const ui8* data, int size) + void frwd_init(frwd_struct_avx2 *msp, const ui8* data, int size) { msp->data = data; _mm_storeu_si128((__m128i *)msp->tmp, _mm_setzero_si128()); @@ -714,13 +714,13 @@ namespace ojph { } //************************************************************************/ - /** @brief Consume num_bits bits from the bitstream of frwd_struct + /** @brief Consume num_bits bits from the bitstream of frwd_struct_avx2 * - * @param [in] msp is a pointer to frwd_struct + * @param [in] msp is a pointer to frwd_struct_avx2 * @param [in] num_bits is the number of bit to consume */ static inline - void frwd_advance(frwd_struct *msp, ui32 num_bits) + void frwd_advance(frwd_struct_avx2 *msp, ui32 num_bits) { assert(num_bits > 0 && num_bits <= msp->bits && num_bits < 128); msp->bits -= num_bits; @@ -752,15 +752,15 @@ namespace ojph { } //************************************************************************/ - /** @brief Fetches 32 bits from the frwd_struct bitstream + /** @brief Fetches 32 bits from the frwd_struct_avx2 bitstream * * @tparam X is the value fed in when the bitstream is exhausted. * See frwd_read regarding the template - * @param [in] msp is a pointer to frwd_struct + * @param [in] msp is a pointer to frwd_struct_avx2 */ template static inline - __m128i frwd_fetch(frwd_struct *msp) + __m128i frwd_fetch(frwd_struct_avx2 *msp) { if (msp->bits <= 128) { @@ -782,7 +782,7 @@ namespace ojph { * @param vn used for handling E values (stores v_n values) * @return __m256i decoded two quads */ - static inline __m256i decode_two_quad32_avx2(__m256i inf_u_q, __m256i U_q, frwd_struct* magsgn, ui32 p, __m128i& vn) { + static inline __m256i decode_two_quad32_avx2(__m256i inf_u_q, __m256i U_q, frwd_struct_avx2* magsgn, ui32 p, __m128i& vn) { __m256i row = _mm256_setzero_si256(); // we keeps e_k, e_1, and rho in w2 @@ -896,7 +896,7 @@ namespace ojph { * @return __m128i decoded quad */ - static inline __m256i decode_four_quad16(const __m128i inf_u_q, __m128i U_q, frwd_struct* magsgn, ui32 p, __m128i& vn) { + static inline __m256i decode_four_quad16(const __m128i inf_u_q, __m128i U_q, frwd_struct_avx2* magsgn, ui32 p, __m128i& vn) { __m256i w0; // workers __m256i insig; // lanes hold FF's if samples are insignificant @@ -1435,7 +1435,7 @@ namespace ojph { const int v_n_size = 512 + 16; ui32 v_n_scratch[2 * v_n_size] = {0}; // 4+ kB - frwd_struct magsgn; + frwd_struct_avx2 magsgn; frwd_init<0xFF>(&magsgn, coded_data, lcup - scup); const __m256i avx_mmsbp2 = _mm256_set1_epi32((int)mmsbp2); @@ -1551,7 +1551,7 @@ namespace ojph { ui16 v_n_scratch[v_n_size] = {0}; // 1+ kB ui32 v_n_scratch_32[v_n_size] = {0}; // 2+ kB - frwd_struct magsgn; + frwd_struct_avx2 magsgn; frwd_init<0xFF>(&magsgn, coded_data, lcup - scup); { @@ -1728,7 +1728,7 @@ namespace ojph { // We add an extra 8 entries, just in case we need more ui16 prev_row_sig[256 + 8] = {0}; // 528 Bytes - frwd_struct sigprop; + frwd_struct_avx2 sigprop; frwd_init<0>(&sigprop, coded_data + lengths1, (int)lengths2); for (ui32 y = 0; y < height; y += 4) diff --git a/src/core/coding/ojph_block_decoder_ssse3.cpp b/src/core/coding/ojph_block_decoder_ssse3.cpp index 9fa5800..93efc12 100644 --- a/src/core/coding/ojph_block_decoder_ssse3.cpp +++ b/src/core/coding/ojph_block_decoder_ssse3.cpp @@ -579,7 +579,7 @@ namespace ojph { /** @brief State structure for reading and unstuffing of forward-growing * bitstreams; these are: MagSgn and SPP bitstreams */ - struct frwd_struct { + struct frwd_struct_ssse3 { const ui8* data; //! static inline - void frwd_read(frwd_struct *msp) + void frwd_read(frwd_struct_ssse3 *msp) { assert(msp->bits <= 128); @@ -686,17 +686,17 @@ namespace ojph { } //************************************************************************/ - /** @brief Initialize frwd_struct struct and reads some bytes + /** @brief Initialize frwd_struct_ssse3 struct and reads some bytes * * @tparam X is the value fed in when the bitstream is exhausted. * See frwd_read regarding the template - * @param [in] msp is a pointer to frwd_struct + * @param [in] msp is a pointer to frwd_struct_ssse3 * @param [in] data is a pointer to the start of data * @param [in] size is the number of byte in the bitstream */ template static inline - void frwd_init(frwd_struct *msp, const ui8* data, int size) + void frwd_init(frwd_struct_ssse3 *msp, const ui8* data, int size) { msp->data = data; _mm_storeu_si128((__m128i *)msp->tmp, _mm_setzero_si128()); @@ -711,13 +711,13 @@ namespace ojph { } //************************************************************************/ - /** @brief Consume num_bits bits from the bitstream of frwd_struct + /** @brief Consume num_bits bits from the bitstream of frwd_struct_ssse3 * - * @param [in] msp is a pointer to frwd_struct + * @param [in] msp is a pointer to frwd_struct_ssse3 * @param [in] num_bits is the number of bit to consume */ static inline - void frwd_advance(frwd_struct *msp, ui32 num_bits) + void frwd_advance(frwd_struct_ssse3 *msp, ui32 num_bits) { assert(num_bits > 0 && num_bits <= msp->bits && num_bits < 128); msp->bits -= num_bits; @@ -749,15 +749,15 @@ namespace ojph { } //************************************************************************/ - /** @brief Fetches 32 bits from the frwd_struct bitstream + /** @brief Fetches 32 bits from the frwd_struct_ssse3 bitstream * * @tparam X is the value fed in when the bitstream is exhausted. * See frwd_read regarding the template - * @param [in] msp is a pointer to frwd_struct + * @param [in] msp is a pointer to frwd_struct_ssse3 */ template static inline - __m128i frwd_fetch(frwd_struct *msp) + __m128i frwd_fetch(frwd_struct_ssse3 *msp) { if (msp->bits <= 128) { @@ -784,7 +784,7 @@ namespace ojph { template static inline __m128i decode_one_quad32(const __m128i inf_u_q, __m128i U_q, - frwd_struct* magsgn, ui32 p, __m128i& vn) + frwd_struct_ssse3* magsgn, ui32 p, __m128i& vn) { __m128i w0; // workers __m128i insig; // lanes hold FF's if samples are insignificant @@ -894,7 +894,7 @@ namespace ojph { */ static inline __m128i decode_two_quad16(const __m128i inf_u_q, __m128i U_q, - frwd_struct* magsgn, ui32 p, __m128i& vn) + frwd_struct_ssse3* magsgn, ui32 p, __m128i& vn) { __m128i w0; // workers __m128i insig; // lanes hold FF's if samples are insignificant @@ -1389,7 +1389,7 @@ namespace ojph { const int v_n_size = 512 + 8; ui32 v_n_scratch[2 * v_n_size] = {0}; // 4+ kB - frwd_struct magsgn; + frwd_struct_ssse3 magsgn; frwd_init<0xFF>(&magsgn, coded_data, lcup - scup); { @@ -1540,7 +1540,7 @@ namespace ojph { const int v_n_size = 512 + 8; ui16 v_n_scratch[2 * v_n_size] = {0}; // 2+ kB - frwd_struct magsgn; + frwd_struct_ssse3 magsgn; frwd_init<0xFF>(&magsgn, coded_data, lcup - scup); { @@ -1753,7 +1753,7 @@ namespace ojph { // We add an extra 8 entries, just in case we need more ui16 prev_row_sig[256 + 8] = {0}; // 528 Bytes - frwd_struct sigprop; + frwd_struct_ssse3 sigprop; frwd_init<0>(&sigprop, coded_data + lengths1, (int)lengths2); for (ui32 y = 0; y < height; y += 4) diff --git a/src/core/coding/ojph_block_encoder.cpp b/src/core/coding/ojph_block_encoder.cpp index ffc9e8d..f9c8d89 100644 --- a/src/core/coding/ojph_block_encoder.cpp +++ b/src/core/coding/ojph_block_encoder.cpp @@ -254,8 +254,18 @@ namespace ojph { } ///////////////////////////////////////////////////////////////////////// - static bool vlc_tables_initialized = vlc_init_tables(); - static bool uvlc_tables_initialized = uvlc_init_tables(); + static bool tables_initialized = false; + + ///////////////////////////////////////////////////////////////////////// + bool initialize_block_encoder_tables() { + if (!tables_initialized) { + memset(vlc_tbl0, 0, 2048 * sizeof(ui16)); + memset(vlc_tbl1, 0, 2048 * sizeof(ui16)); + tables_initialized = vlc_init_tables(); + tables_initialized = tables_initialized && uvlc_init_tables(); + } + return tables_initialized; + } ///////////////////////////////////////////////////////////////////////// // diff --git a/src/core/coding/ojph_block_encoder.h b/src/core/coding/ojph_block_encoder.h index 72b3c0d..c0af892 100644 --- a/src/core/coding/ojph_block_encoder.h +++ b/src/core/coding/ojph_block_encoder.h @@ -79,6 +79,7 @@ namespace ojph { ojph::mem_elastic_allocator *elastic, ojph::coded_lists *& coded); + bool initialize_block_encoder_tables(); bool initialize_block_encoder_tables_avx2(); bool initialize_block_encoder_tables_avx512(); } diff --git a/src/core/coding/ojph_block_encoder_avx2.cpp b/src/core/coding/ojph_block_encoder_avx2.cpp index 7624272..d9a76c2 100644 --- a/src/core/coding/ojph_block_encoder_avx2.cpp +++ b/src/core/coding/ojph_block_encoder_avx2.cpp @@ -305,7 +305,7 @@ namespace ojph { ///////////////////////////////////////////////////////////////////////// // ///////////////////////////////////////////////////////////////////////// - struct vlc_struct { + struct vlc_struct_avx2 { //storage ui8* buf; //pointer to data buffer ui32 pos; //position of next writing within buf @@ -318,7 +318,7 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// static inline void - vlc_init(vlc_struct* vlcp, ui32 buffer_size, ui8* data) + vlc_init(vlc_struct_avx2* vlcp, ui32 buffer_size, ui8* data) { vlcp->buf = data + buffer_size - 1; //points to last byte vlcp->pos = 1; //locations will be all -pos @@ -332,7 +332,7 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// static inline void - vlc_encode(vlc_struct* vlcp, ui32 cwd, int cwd_len) + vlc_encode(vlc_struct_avx2* vlcp, ui32 cwd, int cwd_len) { vlcp->tmp |= (ui64)cwd << vlcp->used_bits; vlcp->used_bits += cwd_len; @@ -372,7 +372,7 @@ namespace ojph { // ////////////////////////////////////////////////////////////////////////// static inline void - terminate_mel_vlc(mel_struct* melp, vlc_struct* vlcp) + terminate_mel_vlc(mel_struct* melp, vlc_struct_avx2* vlcp) { if (melp->run > 0) mel_emit_bit(melp, 1); @@ -890,7 +890,7 @@ static void proc_mel_encode2(mel_struct *melp, __m256i &cq_vec, using fn_proc_mel_encode = void (*)(mel_struct *, __m256i &, __m256i &, __m256i, ui32, const __m256i); -static void proc_vlc_encode1(vlc_struct *vlcp, ui32 *tuple, +static void proc_vlc_encode1(vlc_struct_avx2 *vlcp, ui32 *tuple, ui32 *u_q, ui32 ignore) { ui32 i_max = 8 - (ignore / 2); @@ -958,7 +958,7 @@ static void proc_vlc_encode1(vlc_struct *vlcp, ui32 *tuple, } } -static void proc_vlc_encode2(vlc_struct *vlcp, ui32 *tuple, +static void proc_vlc_encode2(vlc_struct_avx2 *vlcp, ui32 *tuple, ui32 *u_q, ui32 ignore) { ui32 i_max = 8 - (ignore / 2); @@ -994,7 +994,7 @@ static void proc_vlc_encode2(vlc_struct *vlcp, ui32 *tuple, } } -using fn_proc_vlc_encode = void (*)(vlc_struct *, ui32 *, ui32 *, ui32); +using fn_proc_vlc_encode = void (*)(vlc_struct_avx2 *, ui32 *, ui32 *, ui32); void ojph_encode_codeblock_avx2(ui32* buf, ui32 missing_msbs, ui32 num_passes, ui32 _width, ui32 height, @@ -1018,7 +1018,7 @@ void ojph_encode_codeblock_avx2(ui32* buf, ui32 missing_msbs, mel_struct mel; mel_init(&mel, mel_size, mel_buf); - vlc_struct vlc; + vlc_struct_avx2 vlc; vlc_init(&vlc, vlc_size, vlc_buf); ms_struct ms; ms_init(&ms, ms_size, ms_buf); diff --git a/src/core/coding/ojph_block_encoder_avx512.cpp b/src/core/coding/ojph_block_encoder_avx512.cpp index b35373a..5bcd433 100644 --- a/src/core/coding/ojph_block_encoder_avx512.cpp +++ b/src/core/coding/ojph_block_encoder_avx512.cpp @@ -305,7 +305,7 @@ namespace ojph { ///////////////////////////////////////////////////////////////////////// // ///////////////////////////////////////////////////////////////////////// - struct vlc_struct { + struct vlc_struct_avx512 { //storage ui8* buf; //pointer to data buffer ui32 pos; //position of next writing within buf @@ -318,7 +318,7 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// static inline void - vlc_init(vlc_struct* vlcp, ui32 buffer_size, ui8* data) + vlc_init(vlc_struct_avx512* vlcp, ui32 buffer_size, ui8* data) { vlcp->buf = data + buffer_size - 1; //points to last byte vlcp->pos = 1; //locations will be all -pos @@ -332,7 +332,7 @@ namespace ojph { ////////////////////////////////////////////////////////////////////////// static inline void - vlc_encode(vlc_struct* vlcp, ui32 cwd, int cwd_len) + vlc_encode(vlc_struct_avx512* vlcp, ui32 cwd, int cwd_len) { vlcp->tmp |= (ui64)cwd << vlcp->used_bits; vlcp->used_bits += cwd_len; @@ -372,7 +372,7 @@ namespace ojph { // ////////////////////////////////////////////////////////////////////////// static inline void - terminate_mel_vlc(mel_struct* melp, vlc_struct* vlcp) + terminate_mel_vlc(mel_struct* melp, vlc_struct_avx512* vlcp) { if (melp->run > 0) mel_emit_bit(melp, 1); @@ -898,7 +898,7 @@ static void proc_mel_encode2(mel_struct *melp, __m512i &cq_vec, using fn_proc_mel_encode = void (*)(mel_struct *, __m512i &, __m512i &, __m512i, ui32, const __m512i); -static void proc_vlc_encode1(vlc_struct *vlcp, ui32 *tuple, +static void proc_vlc_encode1(vlc_struct_avx512 *vlcp, ui32 *tuple, ui32 *u_q, ui32 ignore) { ui32 i_max = 16 - (ignore / 2); @@ -966,7 +966,7 @@ static void proc_vlc_encode1(vlc_struct *vlcp, ui32 *tuple, } } -static void proc_vlc_encode2(vlc_struct *vlcp, ui32 *tuple, +static void proc_vlc_encode2(vlc_struct_avx512 *vlcp, ui32 *tuple, ui32 *u_q, ui32 ignore) { ui32 i_max = 16 - (ignore / 2); @@ -1002,7 +1002,7 @@ static void proc_vlc_encode2(vlc_struct *vlcp, ui32 *tuple, } } -using fn_proc_vlc_encode = void (*)(vlc_struct *, ui32 *, ui32 *, ui32); +using fn_proc_vlc_encode = void (*)(vlc_struct_avx512 *, ui32 *, ui32 *, ui32); void ojph_encode_codeblock_avx512(ui32* buf, ui32 missing_msbs, ui32 num_passes, ui32 _width, ui32 height, @@ -1026,7 +1026,7 @@ void ojph_encode_codeblock_avx512(ui32* buf, ui32 missing_msbs, mel_struct mel; mel_init(&mel, mel_size, mel_buf); - vlc_struct vlc; + vlc_struct_avx512 vlc; vlc_init(&vlc, vlc_size, vlc_buf); ms_struct ms; ms_init(&ms, ms_size, ms_buf); diff --git a/src/core/common/ojph_version.h b/src/core/common/ojph_version.h index 00faf75..44b59d4 100644 --- a/src/core/common/ojph_version.h +++ b/src/core/common/ojph_version.h @@ -35,4 +35,4 @@ #define OPENJPH_VERSION_MAJOR 0 #define OPENJPH_VERSION_MINOR 18 -#define OPENJPH_VERSION_PATCH 0 +#define OPENJPH_VERSION_PATCH 1 diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 8cc1d72..547f1c5 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -17,9 +17,8 @@ FetchContent_Declare( jp2k_test_codestreams URL https://github.com/aous72/jp2k_test_codestreams/archive/refs/heads/main.zip SOURCE_DIR jp2k_test_codestreams/ - CONFIGURE_COMMAND "" ) -FetchContent_Populate(jp2k_test_codestreams) +FetchContent_MakeAvailable(jp2k_test_codestreams) # create the mse_pae executable include(mse_pae.cmake)