Skip to content

Commit

Permalink
Merge pull request #23 from osamu620/mcu
Browse files Browse the repository at this point in the history
Change internal method of buffering and precision of quantization
  • Loading branch information
osamu620 authored Sep 29, 2023
2 parents 1a2b85d + 7b1eb6c commit fba7583
Show file tree
Hide file tree
Showing 18 changed files with 754 additions and 603 deletions.
23 changes: 17 additions & 6 deletions apps/main_enc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@ int main(int argc, char *argv[]) {
if (parse_args(argc, argv, infile, outfile, QF, YCCtype, benchmark)) {
return EXIT_FAILURE;
}
uint8_t *imdata = read_pnm(infile, width, height, nc);
jpegenc::im_info inimg(imdata, width, height, nc);
FILE *fp;
int fpos = read_pnm(fp, infile, width, height, nc);
jpegenc::im_info inimg(fp, fpos, width, height, nc);

size_t duration = 0;
auto start = std::chrono::high_resolution_clock::now();
Expand All @@ -26,21 +27,31 @@ int main(int argc, char *argv[]) {
printf("Elapsed time for encoding: %7.3lf [ms]\n", static_cast<double>(duration) / 1000.0);
printf("Throughput: %7.3lf [MP/s]\n", (width * height) / static_cast<double>(duration));
} else {
constexpr double benchtime = 1000.0; // duration of benchmark in milliseconds
int iter = 0;
bool warmup = true;
constexpr double warmuptime = 2000.0; // duration of warmup in milliseconds
constexpr double benchtime = 1000.0; // duration of benchmark in milliseconds
int iter = 0;
while (1) {
encoder.invoke();
iter++;
auto stop = std::chrono::high_resolution_clock::now() - start;
duration = std::chrono::duration_cast<std::chrono::microseconds>(stop).count();
if ((static_cast<double>(duration) / 1000.0) >= benchtime) break;
if (warmup) {
if ((static_cast<double>(duration) / 1000.0) >= warmuptime) {
start = std::chrono::high_resolution_clock::now();
iter = 0;
warmup = false;
}
} else {
if ((static_cast<double>(duration) / 1000.0) >= benchtime) break;
}
}

double et = benchtime / (static_cast<double>(duration) / 1000.0);
printf("Frames rate: %7.3lf [fps]\n", iter * et / (benchtime / 1000.0));
printf("Throughput: %7.3lf [MP/s]\n", (width * height * iter * et) / (benchtime * 1000.0));
}
std::free(imdata);
fclose(fp);

const std::vector<uint8_t> codestream = encoder.get_codestream();
std::cout << "Codestream bytes = " << codestream.size() << std::endl;
Expand Down
2 changes: 1 addition & 1 deletion apps/parse_args.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ void print_help() {
std::cout << " -h print this help" << std::endl;
}

int parse_args(int argc, char *argv[], std::string &inname, std::string &outname, int &QF, int &YCCtype,
int parse_args(int argc, char **&argv, std::string &inname, std::string &outname, int &QF, int &YCCtype,
bool &benchmark) {
YCCtype = YCC::YUV420;
QF = 75;
Expand Down
59 changes: 32 additions & 27 deletions apps/pnm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@
* shall be called in the user code.
* @author Osamu Watanabe
*/
unsigned char *read_pnm(const std::string &name, int &width, int &height, int &ncomp) {
FILE *fp = fopen(name.c_str(), "rb");
size_t read_pnm(FILE *&fp, const std::string &name, int &width, int &height, int &ncomp) {
fp = fopen(name.c_str(), "rb");
if (fp == nullptr) {
printf("File %s is not found.\n", name.c_str());
exit(EXIT_FAILURE);
Expand Down Expand Up @@ -48,6 +48,11 @@ unsigned char *read_pnm(const std::string &name, int &width, int &height, int &n
if (c == '2' || c == '3') {
isASCII = true;
}

if (isASCII) {
printf("ASCII ppm is not supported.\n");
exit(EXIT_FAILURE);
}
while (status < DONE) {
c = fgetc(fp);
// eat spaces, LF or CR, or comments
Expand Down Expand Up @@ -91,29 +96,29 @@ unsigned char *read_pnm(const std::string &name, int &width, int &height, int &n
printf("Maximum value greater than 255 is not supported\n");
exit(EXIT_FAILURE);
}

int numpixels = width * height * ncomp;
auto *buf = (unsigned char *)malloc(numpixels * sizeof(unsigned char));
if (buf == nullptr) {
printf("malloc() error\n");
exit(EXIT_FAILURE);
}

// read pixel values into buffer
if (!isASCII) {
fread(buf, sizeof(unsigned char), numpixels, fp);
} else {
for (int i = 0; i < numpixels; ++i) {
val = 0;
c = fgetc(fp);
while (c != ' ' && c != '\n' && c != EOF) {
val *= 10;
val += c - '0';
c = fgetc(fp);
}
buf[i] = val;
}
}
fclose(fp);
return buf;
return ftell(fp);
// int numpixels = width * height * ncomp;
// auto *buf_extended = (unsigned char *)malloc(numpixels * sizeof(unsigned char));
// if (buf_extended == nullptr) {
// printf("malloc() error\n");
// exit(EXIT_FAILURE);
// }
//
// // read pixel values into buffer
// if (!isASCII) {
// fread(buf_extended, sizeof(unsigned char), numpixels, fp);
// } else {
// for (int i = 0; i < numpixels; ++i) {
// val = 0;
// c = fgetc(fp);
// while (c != ' ' && c != '\n' && c != EOF) {
// val *= 10;
// val += c - '0';
// c = fgetc(fp);
// }
// buf_extended[i] = val;
// }
// }
// fclose(fp);
// return buf_extended;
}
6 changes: 4 additions & 2 deletions include/jpegenc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,13 @@
namespace jpegenc {

struct im_info {
uint8_t *data;
FILE *data;
const int32_t pos;
const int32_t width;
const int32_t height;
const int32_t nc;
im_info(uint8_t *buf, int32_t w, int32_t h, int32_t c) : data(buf), width(w), height(h), nc(c) {}
im_info(FILE *buf, int32_t fpos, int32_t w, int32_t h, int32_t c)
: data(buf), pos(fpos), width(w), height(h), nc(c) {}
};

class jpeg_encoder {
Expand Down
1 change: 1 addition & 0 deletions lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ target_sources(jpegenc
quantization.cpp
block_coding.cpp
jpegenc.cpp
bitstream.cpp
)
36 changes: 36 additions & 0 deletions lib/bitstream.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
// Generates code for every target that this compiler can support.
#undef HWY_TARGET_INCLUDE
#define HWY_TARGET_INCLUDE "bitstream.cpp" // this file
#include <hwy/foreach_target.h> // must come before highway.h
#include <hwy/highway.h>

#include "bitstream.hpp"

namespace jpegenc_hwy {
namespace HWY_NAMESPACE {
namespace hn = hwy::HWY_NAMESPACE;

HWY_ATTR void trial(uint8_t *HWY_RESTRICT in, uint8_t *HWY_RESTRICT out) {
#if HWY_TARGET != HWY_SCALAR
HWY_CAPPED(uint8_t, 8) u8;
auto vin = Load(u8, in);
vin = Reverse(u8, vin);
Store(vin, u8, out);
#else
for (int i = 7; i >= 0; --i) {
*out++ = in[i];
}
#endif
}
} // namespace HWY_NAMESPACE
} // namespace jpegenc_hwy

#if HWY_ONCE
namespace jpegenc_hwy {
HWY_EXPORT(trial);
void send_8_bytes(uint8_t *in, uint8_t *out) {
HWY_DYNAMIC_DISPATCH(trial)
(in, out);
}
} // namespace jpegenc_hwy
#endif
108 changes: 89 additions & 19 deletions lib/bitstream.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,84 @@

#include <cstdint>
#include <vector>
#include <memory>
#include <cstring>

#include "jpgmarkers.hpp"

// #define NAIVE
#define USE_VECTOR 0

namespace jpegenc_hwy {
void send_8_bytes(uint8_t *in, uint8_t *out);
} // namespace jpegenc_hwy

class stream_buf {
private:
std::unique_ptr<uint8_t[]> buf;
size_t len;

public:
size_t pos;
uint8_t *cur_byte;

stream_buf() : buf(nullptr), len(0), pos(0), cur_byte(nullptr){};
explicit stream_buf(size_t size) : buf(std::make_unique<uint8_t[]>(size)), len(size) {
pos = 0;
cur_byte = buf.get();
}

inline void expand() {
uint8_t *p = buf.release();
std::unique_ptr<uint8_t[]> new_buf = std::make_unique<uint8_t[]>(len + len);
memcpy(new_buf.get(), p, len);
buf = std::move(new_buf);
len += len;
delete[] p;
// __builtin_prefetch(buf.get() + pos, 0, 1);
cur_byte = buf.get() + pos;
}

inline void put_byte(uint8_t val) {
if (pos == len) {
expand();
}
*cur_byte++ = val;
pos++;
}

inline void put_qword(uint64_t val) {
if (pos + 8 > len) {
expand();
}
// // #if HWY_TARGET == HWY_NEON
// #if (HWY_TARGET | HWY_NEON_WITHOUT_AES) == HWY_NEON_WITHOUT_AES
// *(uint64_t *)cur_byte = __builtin_bswap64(val);
// #elif (HWY_TARGET | HWY_NEON) == HWY_NEON
// *(uint64_t *)cur_byte = __builtin_bswap64(val);
// #elif HWY_TARGET <= HWY_SSE2
// *(uint64_t *)cur_byte = __bswap_64(val);
// #endif
jpegenc_hwy::send_8_bytes((uint8_t *)&val, cur_byte);
cur_byte += 8;
pos += 8;
}

uint8_t *get_buf() {
pos = 0;
cur_byte = buf.get();
return buf.get();
}
};

class bitstream {
private:
int32_t bits;
uint64_t tmp;
#if USE_VECTOR != 0
std::vector<uint8_t> stream;
#else
stream_buf stream;
#endif

inline void emit_qword(uint64_t d) {
uint64_t val;
Expand All @@ -27,10 +95,14 @@ class bitstream {
}
}
} else {
#if USE_VECTOR != 0
for (int i = 56; i >= 0; i -= 8) {
val = d >> i;
put_byte(val);
}
#else
stream.put_qword(d);
#endif
}
}

Expand All @@ -53,7 +125,6 @@ class bitstream {
}

void flush() {
#if not defined(NAIVE)
int n = (bits + 8 - 1) / 8;
tmp <<= 8 * n - bits;
tmp |= ~(0xFFFFFFFFFFFFFFFFUL << (8 * n - bits));
Expand All @@ -69,34 +140,25 @@ class bitstream {
}
tmp = 0;
bits = 0;
#else
if (bits) {
// stuff bit = '1'
uint8_t stuff = 0xFFU >> bits;
tmp <<= (8 - bits);
tmp |= stuff;
put_byte(tmp);
if (tmp == 0xFF) {
// byte stuff
put_byte(0x00);
}
}
tmp = 0;
bits = 0;
#endif
}

public:
bitstream() : bits(0), tmp(0) {}
// bitstream() : bits(0), tmp(0) {}

#if USE_VECTOR != 0
explicit bitstream(size_t length) : bits(0), tmp(0) { stream.reserve(length); }
inline void put_byte(uint8_t d) { stream.push_back(d); }
#else
explicit bitstream(size_t length) : bits(0), tmp(0), stream(length) {}
inline void put_byte(uint8_t d) { stream.put_byte(d); }
#endif

inline void put_word(uint16_t d) {
put_byte(d >> 8);
put_byte(d & 0xFF);
}

inline void put_bits(uint32_t cwd, int32_t len) {
inline void put_bits(uint32_t cwd, const int32_t len) {
#ifndef NDEBUG
assert(len > 0);
#endif
Expand Down Expand Up @@ -148,6 +210,14 @@ class bitstream {
std::vector<uint8_t> finalize() {
flush();
put_word(EOI);
#if USE_VECTOR != 0
return std::move(stream);
#else
size_t size = stream.pos;
std::vector<uint8_t> out;
out.resize(size);
memcpy(out.data(), stream.get_buf(), size);
return out;
#endif
}
};
Loading

0 comments on commit fba7583

Please sign in to comment.