Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Stride access for DWT and some bug fixes and small improvements #171

Merged
merged 6 commits into from
Nov 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Set the default behavior, in case people don't have core.autocrlf set.
* text=auto

# Explicitly declare text files you want to always be normalized and converted
# to native line endings on checkout.
*.c text
*.h text
*.cpp text
*.hpp text

# Declare files that will always have CRLF line endings on checkout.
*.sln text eol=crlf

# Denote all files that are truly binary and should not be modified.
*.pgx binary
*.j2k binary
*.ppm binary
6 changes: 6 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# [0.2.8] - 2024-11-12

* Fix incorrect packet parsing for RPCL, PCRL, CPRL
* Introduce stride access into DWT
* Change cmake configuration for MinGW environments

# [0.2.7] - 2024-06-13

* Refactor non-SIMD HT cleanup decoding
Expand Down
4 changes: 1 addition & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -153,9 +153,7 @@ endif()

if(NOT EMSCRIPTEN)
if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "^[xX]86_64$|^[aA][mM][dD]64$") # x86_64
if(NOT MINGW)
option(ENABLE_AVX2 "Enable the use of Intel AVX2 intrinsics" ON)
endif()
option(ENABLE_AVX2 "Enable the use of Intel AVX2 intrinsics" ON)
if(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
set(CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} /arch:AVX2 /EHsc /D \"_CRT_SECURE_NO_WARNINGS\"")
Expand Down
16 changes: 9 additions & 7 deletions source/apps/imgcmp/image_class.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ class image {
bitDepth(0),
isSigned(false),
isBigendian(false),
data(nullptr) {};
data(nullptr){};
// // destructor
~image() { delete[] data; }

Expand Down Expand Up @@ -110,9 +110,9 @@ class image {

// parsing PNM/PGX header
int read_pnmpgx(const char *name) {
constexpr char SP = ' ';
constexpr char LF = '\n';
constexpr char CR = 13;
constexpr char SP = ' ';
constexpr char LF = '\n';
[[maybe_unused]] constexpr char CR = 13;

FILE *fp = fopen(name, "rb");
if (fp == nullptr) {
Expand Down Expand Up @@ -223,7 +223,7 @@ class image {
}
}
// read numerical value
while (c != SP && c != LF && c != CR) {
while (c >= '0' && c <= '9') {
val *= 10;
val += c - '0';
c = fgetc(fp);
Expand Down Expand Up @@ -255,8 +255,10 @@ class image {
}
}
// easting trailing spaces/LF/CR or comments
c = fgetc(fp);
c = fgetc(fp);
int count = 0;
while (c == SP || c == LF) {
count++;
c = fgetc(fp);
if (c == '#') {
char *nouse = fgets(comment, sizeof(comment), fp);
Expand All @@ -267,7 +269,7 @@ class image {
c = fgetc(fp);
}
}
fseek(fp, -1, SEEK_CUR);
fseek(fp, -count - 1, SEEK_CUR);

const uint_fast8_t nbytes = static_cast<uint_fast8_t>((bitDepth + 7) / 8); // ceil bitDepth to byte
const size_t num_samples = this->width * this->height * num_components;
Expand Down
37 changes: 18 additions & 19 deletions source/core/coding/coding_units.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -267,8 +267,8 @@ void j2k_codeblock::create_compressed_buffer(buf_chain *tile_buf, int32_t buf_li
*******************************************************************************/
j2k_precinct_subband::j2k_precinct_subband(uint8_t orientation, uint8_t M_b, uint8_t R_b,
uint8_t transformation, float stepsize, sprec_t *ibuf,
const element_siz &bp0, const element_siz &bp1,
const element_siz &p0, const element_siz &p1,
const element_siz &bp0, const element_siz &p0,
const element_siz &p1, const uint32_t band_stride,
const uint16_t &num_layers, const element_siz &codeblock_size,
const uint8_t &Cmodes)
: j2k_region(p0, p1),
Expand All @@ -290,7 +290,7 @@ j2k_precinct_subband::j2k_precinct_subband(uint8_t orientation, uint8_t M_b, uin
}

const uint32_t num_codeblocks = this->num_codeblock_x * this->num_codeblock_y;
const uint32_t band_stride = bp1.x - bp0.x;
// const uint32_t band_stride = stride;
if (num_codeblocks != 0) {
inclusion_info = new tagtree(this->num_codeblock_x, this->num_codeblock_y);
ZBP_info = new tagtree(this->num_codeblock_x, this->num_codeblock_y);
Expand Down Expand Up @@ -551,9 +551,7 @@ void j2k_precinct_subband::parse_packet_header(buf_chain *packet_header, uint16_
if (!(block->Cmodes & HT_MIXED)) {
// Must be the first HT Cleanup pass
if (segment_bytes < 2) {
printf(
"ERROR: Length information for a HT-codeblock is "
"invalid\n");
printf("ERROR: Length information for a HT-codeblock is invalid\n");
throw std::exception();
}
next_segment_passes = 2;
Expand Down Expand Up @@ -595,9 +593,7 @@ void j2k_precinct_subband::parse_packet_header(buf_chain *packet_header, uint16_
if (block->Cmodes & HT_MIXED) {
block->Cmodes &= static_cast<uint16_t>(~(HT_PHLD | HT));
} else {
printf(
"ERROR: Length information for a HT-codeblock is "
"invalid\n");
printf("ERROR: Length information for a HT-codeblock is invalid\n");
throw std::exception();
}
}
Expand Down Expand Up @@ -1028,7 +1024,7 @@ j2k_precinct::j2k_precinct(const uint8_t &r, const uint32_t &idx, const element_
ceil_int(pos1.y - yob[subband[i]->orientation], sr));
this->pband[i] = MAKE_UNIQUE<j2k_precinct_subband>(
subband[i]->orientation, subband[i]->M_b, subband[i]->R_b, subband[i]->transformation,
subband[i]->delta, subband[i]->i_samples, subband[i]->pos0, subband[i]->pos1, pbpos0, pbpos1,
subband[i]->delta, subband[i]->i_samples, subband[i]->pos0, pbpos0, pbpos1, subband[i]->stride,
num_layers, codeblock_size, Cmodes);
}
}
Expand Down Expand Up @@ -1059,8 +1055,9 @@ j2k_subband::j2k_subband(element_siz p0, element_siz p1, uint8_t orientation, ui
if (num_samples) {
if (orientation != BAND_LL) {
// If not the lowest resolution, buffers for subbands shall be created.
i_samples = static_cast<sprec_t *>(aligned_mem_alloc(sizeof(sprec_t) * num_samples, 32));
memset(i_samples, 0, sizeof(sprec_t) * num_samples);
i_samples =
static_cast<sprec_t *>(aligned_mem_alloc(sizeof(sprec_t) * this->stride * (pos1.y - pos0.y), 32));
memset(i_samples, 0, sizeof(sprec_t) * this->stride * (pos1.y - pos0.y));
} else {
i_samples = ibuf;
}
Expand Down Expand Up @@ -1096,10 +1093,12 @@ j2k_resolution::j2k_resolution(const uint8_t &r, const element_siz &p0, const el
i_samples = nullptr;
if (!is_empty) {
if (index == 0) {
i_samples = static_cast<sprec_t *>(aligned_mem_alloc(sizeof(sprec_t) * num_samples, 32));
i_samples =
static_cast<sprec_t *>(aligned_mem_alloc(sizeof(sprec_t) * this->stride * (pos1.y - pos0.y), 32));
memset(i_samples, 0, sizeof(sprec_t) * num_samples);
} else {
i_samples = static_cast<sprec_t *>(aligned_mem_alloc(sizeof(sprec_t) * num_samples, 32));
i_samples =
static_cast<sprec_t *>(aligned_mem_alloc(sizeof(sprec_t) * this->stride * (pos1.y - pos0.y), 32));
}
}
}
Expand Down Expand Up @@ -2504,7 +2503,7 @@ void j2k_tile::decode() {

// copy samples in resolution buffer to that in tile component buffer
uint32_t height = tc1.y - tc0.y;
uint32_t width = tc1.x - tc0.x;
uint32_t width = round_up(tc1.x - tc0.x, 32U);
uint32_t stride = round_up(width, 32U);
// size_t num_samples = static_cast<size_t>(tc1.x - tc0.x) * (tc1.y - tc0.y);
#if defined(OPENHTJ2K_ENABLE_ARM_NEON)
Expand Down Expand Up @@ -2664,7 +2663,7 @@ void j2k_tile::find_gcd_of_precinct_size(element_siz &out) {
for (uint8_t r = 0; r <= this->tcomp[c].get_dwt_levels(); r++) {
PP = this->tcomp[c].get_precinct_size(r);
PPx = (PPx > PP.x) ? static_cast<uint8_t>(PP.x) : PPx;
PPy = (PPy > PP.y) ? static_cast<uint8_t>(PP.y) : PPx;
PPy = (PPy > PP.y) ? static_cast<uint8_t>(PP.y) : PPy;
}
}
out.x = PPx;
Expand Down Expand Up @@ -3010,7 +3009,7 @@ uint8_t *j2k_tile::encode() {
#if defined(OPENHTJ2K_TRY_AVX2) && defined(__AVX2__)
for (uint32_t y = 0; y < height; ++y) {
int32_t *sp = src + y * stride;
sprec_t *dp = cr->i_samples + y * (bottom_right.x - top_left.x);
sprec_t *dp = cr->i_samples + y * stride;
uint32_t num_tc_samples = bottom_right.x - top_left.x;
for (; num_tc_samples >= 16; num_tc_samples -= 16) {
__m256i v0 = _mm256_load_si256((__m256i *)sp);
Expand All @@ -3027,7 +3026,7 @@ uint8_t *j2k_tile::encode() {
#elif defined(OPENHTJ2K_ENABLE_ARM_NEON)
for (uint32_t y = 0; y < height; ++y) {
int32_t *sp = src + y * stride;
sprec_t *dp = cr->i_samples + y * (bottom_right.x - top_left.x);
sprec_t *dp = cr->i_samples + y * stride;
uint32_t num_tc_samples = bottom_right.x - top_left.x;
for (; num_tc_samples >= 8; num_tc_samples -= 8) {
auto vsrc0 = vld1q_s32(sp);
Expand All @@ -3043,7 +3042,7 @@ uint8_t *j2k_tile::encode() {
#else
for (uint32_t y = 0; y < height; ++y) {
int32_t *sp = src + y * stride;
sprec_t *dp = cr->i_samples + y * (bottom_right.x - top_left.x);
sprec_t *dp = cr->i_samples + y * round_up(bottom_right.x - top_left.x, 32U);
uint32_t num_tc_samples = bottom_right.x - top_left.x;
for (; num_tc_samples > 0; --num_tc_samples) {
*dp++ = static_cast<sprec_t>(*sp++);
Expand Down
15 changes: 9 additions & 6 deletions source/core/coding/coding_units.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ class j2k_region {
element_siz pos0;
// bottom-right coordinate (exclusive) of a region in the reference grid
element_siz pos1;
// width for line buffer
uint32_t stride;

// return top-left coordinate (inclusive)
[[nodiscard]] element_siz get_pos0() const { return pos0; }
// return bottom-right coordinate (exclusive)
Expand All @@ -55,7 +58,7 @@ class j2k_region {
// set bottom-right coordinate (exclusive)
void set_pos1(element_siz in) { pos1 = in; }
j2k_region() = default;
j2k_region(element_siz p0, element_siz p1) : pos0(p0), pos1(p1) {}
j2k_region(element_siz p0, element_siz p1) : pos0(p0), pos1(p1), stride(round_up(pos1.x - pos0.x, 32U)) {}
};

/********************************************************************************
Expand Down Expand Up @@ -176,8 +179,8 @@ class j2k_precinct_subband : public j2k_region {
uint32_t num_codeblock_x;
uint32_t num_codeblock_y;
j2k_precinct_subband(uint8_t orientation, uint8_t M_b, uint8_t R_b, uint8_t transformation,
float stepsize, sprec_t *ibuf, const element_siz &bp0, const element_siz &bp1,
const element_siz &p0, const element_siz &p1, const uint16_t &num_layers,
float stepsize, sprec_t *ibuf, const element_siz &bp0, const element_siz &p0,
const element_siz &p1, const uint32_t stride, const uint16_t &num_layers,
const element_siz &codeblock_size, const uint8_t &Cmodes);
~j2k_precinct_subband() {
delete inclusion_info;
Expand Down Expand Up @@ -312,7 +315,7 @@ class j2k_resolution : public j2k_region {
void scale();
void destroy() {
aligned_mem_free(i_samples);
for (auto b = 0; b < num_bands; ++b) {
for (uint8_t b = 0; b < num_bands; ++b) {
if (subbands != nullptr) {
subbands[b]->destroy();
}
Expand Down Expand Up @@ -420,7 +423,7 @@ class j2k_tile_component : public j2k_tile_base {
void perform_dc_offset(uint8_t transformation, bool is_signed);

void destroy() {
for (auto r = 0; r < this->NL; ++r) {
for (uint8_t r = 0; r < this->NL; ++r) {
if (resolution != nullptr) {
auto p = resolution[r].get();
if (p != nullptr) resolution[r]->destroy();
Expand Down Expand Up @@ -491,7 +494,7 @@ class j2k_tile : public j2k_tile_base {
public:
j2k_tile();
void destroy() {
for (auto c = 0; c < this->num_components; ++c) {
for (uint16_t c = 0; c < this->num_components; ++c) {
tcomp[c].destroy();
}
}
Expand Down
Loading