Skip to content

Commit

Permalink
Merge pull request #160 from aous72/addressing_lto
Browse files Browse the repository at this point in the history
This address the illegal instruction issue when -flto flag is used (link time optimization) as details in this thread
https://bugzilla.redhat.com/show_bug.cgi?id=2307795
  • Loading branch information
aous72 authored Dec 5, 2024
2 parents 811f3a2 + c262935 commit 4c1cad5
Show file tree
Hide file tree
Showing 11 changed files with 103 additions and 91 deletions.
2 changes: 2 additions & 0 deletions src/core/codestream/ojph_codeblock_fun.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,8 @@ namespace ojph {
tx_from_cb64 = NULL;
}
encode_cb64 = ojph_encode_codeblock64;
bool result = initialize_block_encoder_tables();
assert(result); ojph_unused(result);

#ifndef OJPH_DISABLE_SIMD

Expand Down
28 changes: 14 additions & 14 deletions src/core/coding/ojph_block_decoder32.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -578,7 +578,7 @@ namespace ojph {
/** @brief State structure for reading and unstuffing of forward-growing
* bitstreams; these are: MagSgn and SPP bitstreams
*/
struct frwd_struct {
struct frwd_struct32 {
const ui8* data; //!<pointer to bitstream
ui64 tmp; //!<temporary buffer of read data
ui32 bits; //!<number of bits stored in tmp
Expand All @@ -601,12 +601,12 @@ namespace ojph {
* Reading can go beyond the end of buffer by up to 3 bytes.
*
* @tparam X is the value fed in when the bitstream is exhausted
* @param [in] msp is a pointer to frwd_struct structure
* @param [in] msp is a pointer to frwd_struct32 structure
*
*/
template<int X>
static inline
void frwd_read(frwd_struct *msp)
void frwd_read(frwd_struct32 *msp)
{
assert(msp->bits <= 32); // assert that there is a space for 32 bits

Expand Down Expand Up @@ -653,17 +653,17 @@ namespace ojph {
}

//************************************************************************/
/** @brief Initialize frwd_struct struct and reads some bytes
/** @brief Initialize frwd_struct32 struct and reads some bytes
*
* @tparam X is the value fed in when the bitstream is exhausted.
* See frwd_read regarding the template
* @param [in] msp is a pointer to frwd_struct
* @param [in] msp is a pointer to frwd_struct32
* @param [in] data is a pointer to the start of data
* @param [in] size is the number of byte in the bitstream
*/
template<int X>
static inline
void frwd_init(frwd_struct *msp, const ui8* data, int size)
void frwd_init(frwd_struct32 *msp, const ui8* data, int size)
{
msp->data = data;
msp->tmp = 0;
Expand All @@ -689,29 +689,29 @@ namespace ojph {
}

//************************************************************************/
/** @brief Consume num_bits bits from the bitstream of frwd_struct
/** @brief Consume num_bits bits from the bitstream of frwd_struct32
*
* @param [in] msp is a pointer to frwd_struct
* @param [in] msp is a pointer to frwd_struct32
* @param [in] num_bits is the number of bit to consume
*/
static inline
void frwd_advance(frwd_struct *msp, ui32 num_bits)
void frwd_advance(frwd_struct32 *msp, ui32 num_bits)
{
assert(num_bits <= msp->bits);
msp->tmp >>= num_bits; // consume num_bits
msp->bits -= num_bits;
}

//************************************************************************/
/** @brief Fetches 32 bits from the frwd_struct bitstream
/** @brief Fetches 32 bits from the frwd_struct32 bitstream
*
* @tparam X is the value fed in when the bitstream is exhausted.
* See frwd_read regarding the template
* @param [in] msp is a pointer to frwd_struct
* @param [in] msp is a pointer to frwd_struct32
*/
template<int X>
static inline
ui32 frwd_fetch(frwd_struct *msp)
ui32 frwd_fetch(frwd_struct32 *msp)
{
if (msp->bits < 32)
{
Expand Down Expand Up @@ -1099,7 +1099,7 @@ namespace ojph {
const int v_n_size = 512 + 4;
ui32 v_n_scratch[v_n_size] = {0}; // 2+ kB

frwd_struct magsgn;
frwd_struct32 magsgn;
frwd_init<0xFF>(&magsgn, coded_data, lcup - scup);

ui16 *sp = scratch;
Expand Down Expand Up @@ -1368,7 +1368,7 @@ namespace ojph {
// We add an extra 8 entries, just in case we need more
ui16 prev_row_sig[256 + 8] = {0}; // 528 Bytes

frwd_struct sigprop;
frwd_struct32 sigprop;
frwd_init<0>(&sigprop, coded_data + lengths1, (int)lengths2);

for (ui32 y = 0; y < height; y += 4)
Expand Down
44 changes: 22 additions & 22 deletions src/core/coding/ojph_block_decoder64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -530,7 +530,7 @@ namespace ojph {
/** @brief State structure for reading and unstuffing of forward-growing
* bitstreams; these are: MagSgn and SPP bitstreams
*/
struct frwd_struct {
struct frwd_struct64 {
const ui8* data; //!<pointer to bitstream
ui64 tmp; //!<temporary buffer of read data
ui32 bits; //!<number of bits stored in tmp
Expand All @@ -553,12 +553,12 @@ namespace ojph {
* Reading can go beyond the end of buffer by up to 3 bytes.
*
* @tparam X is the value fed in when the bitstream is exhausted
* @param [in] msp is a pointer to frwd_struct structure
* @param [in] msp is a pointer to frwd_struct64 structure
*
*/
template<int X>
static inline
void frwd_read(frwd_struct *msp)
void frwd_read(frwd_struct64 *msp)
{
assert(msp->bits <= 32); // assert that there is a space for 32 bits

Expand Down Expand Up @@ -617,12 +617,12 @@ namespace ojph {
* MSB of the next byte is set 0 and must be ignored during decoding.
*
* @tparam X is the value fed in when the bitstream is exhausted
* @param [in] msp is a pointer to frwd_struct structure
* @param [in] msp is a pointer to frwd_struct64 structure
*
*/
template<ui8 X>
static inline
void frwd_read8(frwd_struct *msp)
void frwd_read8(frwd_struct64 *msp)
{
ui8 val = X;
if (msp->size > 0) {
Expand All @@ -640,17 +640,17 @@ namespace ojph {
}

//************************************************************************/
/** @brief Initialize frwd_struct struct and reads some bytes
/** @brief Initialize frwd_struct64 struct and reads some bytes
*
* @tparam X is the value fed in when the bitstream is exhausted.
* See frwd_read regarding the template
* @param [in] msp is a pointer to frwd_struct
* @param [in] msp is a pointer to frwd_struct64
* @param [in] data is a pointer to the start of data
* @param [in] size is the number of byte in the bitstream
*/
template<int X>
static inline
void frwd_init(frwd_struct *msp, const ui8* data, int size)
void frwd_init(frwd_struct64 *msp, const ui8* data, int size)
{
msp->data = data;
msp->tmp = 0;
Expand All @@ -676,17 +676,17 @@ namespace ojph {
}

//************************************************************************/
/** @brief Initialize frwd_struct struct and reads some bytes
/** @brief Initialize frwd_struct64 struct and reads some bytes
*
* @tparam X is the value fed in when the bitstream is exhausted.
* See frwd_read regarding the template
* @param [in] msp is a pointer to frwd_struct
* @param [in] msp is a pointer to frwd_struct64
* @param [in] data is a pointer to the start of data
* @param [in] size is the number of byte in the bitstream
*/
template<ui8 X>
static inline
void frwd_init8(frwd_struct *msp, const ui8* data, int size)
void frwd_init8(frwd_struct64 *msp, const ui8* data, int size)
{
msp->data = data;
msp->tmp = 0;
Expand All @@ -697,29 +697,29 @@ namespace ojph {
}

//************************************************************************/
/** @brief Consume num_bits bits from the bitstream of frwd_struct
/** @brief Consume num_bits bits from the bitstream of frwd_struct64
*
* @param [in] msp is a pointer to frwd_struct
* @param [in] msp is a pointer to frwd_struct64
* @param [in] num_bits is the number of bit to consume
*/
static inline
void frwd_advance(frwd_struct *msp, ui32 num_bits)
void frwd_advance(frwd_struct64 *msp, ui32 num_bits)
{
assert(num_bits <= msp->bits);
msp->tmp >>= num_bits; // consume num_bits
msp->bits -= num_bits;
}

//************************************************************************/
/** @brief Fetches 32 bits from the frwd_struct bitstream
/** @brief Fetches 32 bits from the frwd_struct64 bitstream
*
* @tparam X is the value fed in when the bitstream is exhausted.
* See frwd_read regarding the template
* @param [in] msp is a pointer to frwd_struct
* @param [in] msp is a pointer to frwd_struct64
*/
template<int X>
static inline
ui32 frwd_fetch(frwd_struct *msp)
ui32 frwd_fetch(frwd_struct64 *msp)
{
if (msp->bits < 32)
{
Expand All @@ -731,15 +731,15 @@ namespace ojph {
}

//************************************************************************/
/** @brief Fetches up to 64 bits from the frwd_struct bitstream
/** @brief Fetches up to 64 bits from the frwd_struct64 bitstream
*
* @tparam X is the value fed in when the bitstream is exhausted.
* See frwd_read regarding the template
* @param [in] msp is a pointer to frwd_struct
* @param [in] msp is a pointer to frwd_struct64
*/
template<ui8 X>
static inline
ui64 frwd_fetch64(frwd_struct *msp)
ui64 frwd_fetch64(frwd_struct64 *msp)
{
while (msp->bits <= 56)
frwd_read8<X>(msp);
Expand Down Expand Up @@ -1147,7 +1147,7 @@ namespace ojph {
const int v_n_size = 512 + 4;
ui64 v_n_scratch[v_n_size] = {0}; // 4+ kB

frwd_struct magsgn;
frwd_struct64 magsgn;
frwd_init8<0xFF>(&magsgn, coded_data, lcup - scup);

const ui16 *sp = scratch;
Expand Down Expand Up @@ -1415,7 +1415,7 @@ namespace ojph {
// We add an extra 8 entries, just in case we need more
ui16 prev_row_sig[256 + 8] = {0}; // 528 Bytes

frwd_struct sigprop;
frwd_struct64 sigprop;
frwd_init<0>(&sigprop, coded_data + lengths1, (int)lengths2);

for (ui32 y = 0; y < height; y += 4)
Expand Down
34 changes: 17 additions & 17 deletions src/core/coding/ojph_block_decoder_avx2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -582,7 +582,7 @@ namespace ojph {
/** @brief State structure for reading and unstuffing of forward-growing
* bitstreams; these are: MagSgn and SPP bitstreams
*/
struct frwd_struct {
struct frwd_struct_avx2 {
const ui8* data; //!<pointer to bitstream
ui8 tmp[48]; //!<temporary buffer of read data + 16 extra
ui32 bits; //!<number of bits stored in tmp
Expand All @@ -605,12 +605,12 @@ namespace ojph {
* Reading can go beyond the end of buffer by up to 16 bytes.
*
* @tparam X is the value fed in when the bitstream is exhausted
* @param [in] msp is a pointer to frwd_struct structure
* @param [in] msp is a pointer to frwd_struct_avx2 structure
*
*/
template<int X>
static inline
void frwd_read(frwd_struct *msp)
void frwd_read(frwd_struct_avx2 *msp)
{
assert(msp->bits <= 128);

Expand Down Expand Up @@ -689,17 +689,17 @@ namespace ojph {
}

//************************************************************************/
/** @brief Initialize frwd_struct struct and reads some bytes
/** @brief Initialize frwd_struct_avx2 struct and reads some bytes
*
* @tparam X is the value fed in when the bitstream is exhausted.
* See frwd_read regarding the template
* @param [in] msp is a pointer to frwd_struct
* @param [in] msp is a pointer to frwd_struct_avx2
* @param [in] data is a pointer to the start of data
* @param [in] size is the number of byte in the bitstream
*/
template<int X>
static inline
void frwd_init(frwd_struct *msp, const ui8* data, int size)
void frwd_init(frwd_struct_avx2 *msp, const ui8* data, int size)
{
msp->data = data;
_mm_storeu_si128((__m128i *)msp->tmp, _mm_setzero_si128());
Expand All @@ -714,13 +714,13 @@ namespace ojph {
}

//************************************************************************/
/** @brief Consume num_bits bits from the bitstream of frwd_struct
/** @brief Consume num_bits bits from the bitstream of frwd_struct_avx2
*
* @param [in] msp is a pointer to frwd_struct
* @param [in] msp is a pointer to frwd_struct_avx2
* @param [in] num_bits is the number of bit to consume
*/
static inline
void frwd_advance(frwd_struct *msp, ui32 num_bits)
void frwd_advance(frwd_struct_avx2 *msp, ui32 num_bits)
{
assert(num_bits > 0 && num_bits <= msp->bits && num_bits < 128);
msp->bits -= num_bits;
Expand Down Expand Up @@ -752,15 +752,15 @@ namespace ojph {
}

//************************************************************************/
/** @brief Fetches 32 bits from the frwd_struct bitstream
/** @brief Fetches 32 bits from the frwd_struct_avx2 bitstream
*
* @tparam X is the value fed in when the bitstream is exhausted.
* See frwd_read regarding the template
* @param [in] msp is a pointer to frwd_struct
* @param [in] msp is a pointer to frwd_struct_avx2
*/
template<int X>
static inline
__m128i frwd_fetch(frwd_struct *msp)
__m128i frwd_fetch(frwd_struct_avx2 *msp)
{
if (msp->bits <= 128)
{
Expand All @@ -782,7 +782,7 @@ namespace ojph {
* @param vn used for handling E values (stores v_n values)
* @return __m256i decoded two quads
*/
static inline __m256i decode_two_quad32_avx2(__m256i inf_u_q, __m256i U_q, frwd_struct* magsgn, ui32 p, __m128i& vn) {
static inline __m256i decode_two_quad32_avx2(__m256i inf_u_q, __m256i U_q, frwd_struct_avx2* magsgn, ui32 p, __m128i& vn) {
__m256i row = _mm256_setzero_si256();

// we keeps e_k, e_1, and rho in w2
Expand Down Expand Up @@ -896,7 +896,7 @@ namespace ojph {
* @return __m128i decoded quad
*/

static inline __m256i decode_four_quad16(const __m128i inf_u_q, __m128i U_q, frwd_struct* magsgn, ui32 p, __m128i& vn) {
static inline __m256i decode_four_quad16(const __m128i inf_u_q, __m128i U_q, frwd_struct_avx2* magsgn, ui32 p, __m128i& vn) {

__m256i w0; // workers
__m256i insig; // lanes hold FF's if samples are insignificant
Expand Down Expand Up @@ -1435,7 +1435,7 @@ namespace ojph {
const int v_n_size = 512 + 16;
ui32 v_n_scratch[2 * v_n_size] = {0}; // 4+ kB

frwd_struct magsgn;
frwd_struct_avx2 magsgn;
frwd_init<0xFF>(&magsgn, coded_data, lcup - scup);

const __m256i avx_mmsbp2 = _mm256_set1_epi32((int)mmsbp2);
Expand Down Expand Up @@ -1551,7 +1551,7 @@ namespace ojph {
ui16 v_n_scratch[v_n_size] = {0}; // 1+ kB
ui32 v_n_scratch_32[v_n_size] = {0}; // 2+ kB

frwd_struct magsgn;
frwd_struct_avx2 magsgn;
frwd_init<0xFF>(&magsgn, coded_data, lcup - scup);

{
Expand Down Expand Up @@ -1728,7 +1728,7 @@ namespace ojph {
// We add an extra 8 entries, just in case we need more
ui16 prev_row_sig[256 + 8] = {0}; // 528 Bytes

frwd_struct sigprop;
frwd_struct_avx2 sigprop;
frwd_init<0>(&sigprop, coded_data + lengths1, (int)lengths2);

for (ui32 y = 0; y < height; y += 4)
Expand Down
Loading

0 comments on commit 4c1cad5

Please sign in to comment.