Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Develop #29

Merged
merged 2 commits into from
Oct 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions apps/main_enc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ int main(int argc, char *argv[]) {
return EXIT_FAILURE;
}
FILE *fp;
int fpos = read_pnm(fp, infile, width, height, nc);
size_t fpos = read_pnm(fp, infile, width, height, nc);
jpegenc::im_info inimg(fp, fpos, width, height, nc);

size_t duration = 0;
Expand All @@ -31,7 +31,7 @@ int main(int argc, char *argv[]) {
constexpr double warmuptime = 2000.0; // duration of warmup in milliseconds
constexpr double benchtime = 1000.0; // duration of benchmark in milliseconds
int iter = 0;
while (1) {
while (true) {
encoder.invoke();
iter++;
auto stop = std::chrono::high_resolution_clock::now() - start;
Expand Down
4 changes: 2 additions & 2 deletions include/jpegenc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@ namespace jpegenc {

struct im_info {
FILE *data;
const int32_t pos;
const size_t pos;
const int32_t width;
const int32_t height;
const int32_t nc;
im_info(FILE *buf, int32_t fpos, int32_t w, int32_t h, int32_t c)
im_info(FILE *buf, size_t fpos, int32_t w, int32_t h, int32_t c)
: data(buf), pos(fpos), width(w), height(h), nc(c) {}
};

Expand Down
2 changes: 1 addition & 1 deletion lib/bitstream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ HWY_ATTR void trial(uint8_t *HWY_RESTRICT in, uint8_t *HWY_RESTRICT out) {
#if HWY_ONCE
namespace jpegenc_hwy {
HWY_EXPORT(trial);
void send_8_bytes(uint8_t *in, uint8_t *out) {
[[maybe_unused]] void send_8_bytes(uint8_t *in, uint8_t *out) {
HWY_DYNAMIC_DISPATCH(trial)
(in, out);
}
Expand Down
23 changes: 12 additions & 11 deletions lib/bitstream.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#define USE_VECTOR 0

namespace jpegenc_hwy {
void send_8_bytes(uint8_t *in, uint8_t *out);
[[maybe_unused]] void send_8_bytes(uint8_t *in, uint8_t *out);
} // namespace jpegenc_hwy

class stream_buf {
Expand Down Expand Up @@ -54,15 +54,15 @@ class stream_buf {
if (pos + 8 > len) {
expand();
}
// // #if HWY_TARGET == HWY_NEON
// #if (HWY_TARGET | HWY_NEON_WITHOUT_AES) == HWY_NEON_WITHOUT_AES
// *(uint64_t *)cur_byte = __builtin_bswap64(val);
// #elif (HWY_TARGET | HWY_NEON) == HWY_NEON
// *(uint64_t *)cur_byte = __builtin_bswap64(val);
// #elif HWY_TARGET <= HWY_SSE2
// *(uint64_t *)cur_byte = __bswap_64(val);
// #endif
jpegenc_hwy::send_8_bytes((uint8_t *)&val, cur_byte);
// emits eight uint8_t values at once
#if (HWY_TARGET | HWY_NEON_WITHOUT_AES) == HWY_NEON_WITHOUT_AES
*(uint64_t *)cur_byte = __builtin_bswap64(val);
#elif (HWY_TARGET | HWY_NEON) == HWY_NEON
*(uint64_t *)cur_byte = __builtin_bswap64(val);
#elif HWY_TARGET <= HWY_SSE2
*(uint64_t *)cur_byte = __bswap_64(val);
#endif
// jpegenc_hwy::send_8_bytes((uint8_t *)&val, cur_byte);
cur_byte += 8;
pos += 8;
}
Expand Down Expand Up @@ -216,7 +216,8 @@ class bitstream {
flush();
put_word(RST[n]);
}
auto get_stream() {

[[maybe_unused]] auto get_stream() {
flush();
return &stream;
}
Expand Down
1 change: 0 additions & 1 deletion lib/block_coding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
#include "block_coding.hpp"
#include "constants.hpp"
#include "dct.hpp"
#include "huffman_tables.hpp"
#include "quantization.hpp"
#include "ycctype.hpp"

Expand Down
39 changes: 25 additions & 14 deletions lib/block_coding_256.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,20 +30,31 @@ auto row67 = TwoTablesLookupLanes(s16, v2, v3, SetTableIndices(s16, &indices[3
auto row23_1 = TwoTablesLookupLanes(s16, v2, v3, SetTableIndices(s16, &indices[4 * 16]));
auto row45_1 = TwoTablesLookupLanes(s16, v0, v1, SetTableIndices(s16, &indices[5 * 16]));

HWY_ALIGN int16_t m[32] = {
-1, -1, -1, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, -1, -1, -1,
};
auto maskv1 = Load(s16, m);
auto maskv2 = Load(s16, m + 16);
row23 = IfThenElseZero(MaskFromVec(maskv1), row23);
row45 = IfThenElseZero(MaskFromVec(maskv2), row45);
row23_1 = IfThenZeroElse(MaskFromVec(maskv1), row23_1);
row45_1 = IfThenZeroElse(MaskFromVec(maskv2), row45_1);
row23 = Or(row23, row23_1);
row45 = Or(row45, row45_1);
row01 = InsertLane(row01, 10, ExtractLane(v2, 0));
row67 = InsertLane(row67, 5, ExtractLane(v1, 15));
// HWY_ALIGN int16_t m[32] = {
// -1, -1, -1, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,
// -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, -1, -1, -1,
// };
// auto maskv1 = Load(s16, m);
// auto maskv2 = Load(s16, m + 16);
//
// row23 = IfThenElseZero(MaskFromVec(maskv1), row23);
// row45 = IfThenElseZero(MaskFromVec(maskv2), row45);
// row23_1 = IfThenZeroElse(MaskFromVec(maskv1), row23_1);
// row45_1 = IfThenZeroElse(MaskFromVec(maskv2), row45_1);

HWY_ALIGN uint8_t m1[8] = {0x07, 0xFF};
HWY_ALIGN uint8_t m2[8] = {0xFF, 0xE0};
auto maskv1 = LoadMaskBits(s16, m1);
auto maskv2 = LoadMaskBits(s16, m2);

row23 = IfThenElseZero(maskv1, row23);
row45 = IfThenElseZero(maskv2, row45);
row23_1 = IfThenZeroElse(maskv1, row23_1);
row45_1 = IfThenZeroElse(maskv2, row45_1);
row23 = Or(row23, row23_1);
row45 = Or(row45, row45_1);
row01 = InsertLane(row01, 10, ExtractLane(v2, 0));
row67 = InsertLane(row67, 5, ExtractLane(v1, 15));

/* DCT block is now in zig-zag order; start Huffman encoding process. */

Expand Down
2 changes: 0 additions & 2 deletions lib/color.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@

#include <hwy/highway.h>

#include <utility>

#include "color.hpp"
#include "ycctype.hpp"
#include "constants.hpp"
Expand Down
1 change: 0 additions & 1 deletion lib/jpegenc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
#include "block_coding.hpp"
#include "color.hpp"
#include "constants.hpp"
#include "dct.hpp"
#include "image_chunk.hpp"
#include "huffman_tables.hpp"
#include "jpgheaders.hpp"
Expand Down
1 change: 0 additions & 1 deletion lib/jpgheaders.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#include <algorithm>
#include <vector>

#include "bitstream.hpp"
Expand Down
3 changes: 0 additions & 3 deletions lib/quantization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,6 @@
#include <hwy/highway.h>

#include <cmath>
#include <utility>
#include "ycctype.hpp"
#include "constants.hpp"
#include "quantization.hpp"

namespace jpegenc_hwy {
Expand Down
2 changes: 1 addition & 1 deletion lib/quantization.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,6 @@ constexpr float qmatrix[2][64] = {
void create_scaled_qtable(int c, int QF, int16_t *qtable);
namespace jpegenc_hwy {
namespace HWY_NAMESPACE {
HWY_ATTR void quantize_core(int16_t *HWY_RESTRICT data, const int *HWY_RESTRICT qtable);
HWY_ATTR void quantize_core(int16_t *HWY_RESTRICT data, const int16_t *HWY_RESTRICT qtable);
} // namespace HWY_NAMESPACE
} // namespace jpegenc_hwy