From d83b2791aedec02a2adcfca7dd3596cbf4aab53b Mon Sep 17 00:00:00 2001 From: Daniel Stien Date: Tue, 9 Jan 2024 19:49:00 +1100 Subject: [PATCH] Heuretics for determining format type. --- include/stunpack.h | 12 ++++++---- src/lib/stunpack.c | 35 ++++++++++++++++++++++++++++- src/lib/stunts.c | 51 +++++++++++++++++++++++++++++++++++-------- src/lib/stunts.h | 18 ++++++++++----- src/lib/stunts_huff.c | 13 +++++++++++ src/lib/stunts_huff.h | 1 + src/lib/stunts_rle.c | 14 +++++++++++- src/lib/stunts_rle.h | 1 + src/lib/util.h | 1 + 9 files changed, 126 insertions(+), 20 deletions(-) diff --git a/include/stunpack.h b/include/stunpack.h index c75b59f..4e20581 100644 --- a/include/stunpack.h +++ b/include/stunpack.h @@ -26,9 +26,10 @@ #define STPK_NAME "stunpack" #define STPK_BUGS "daniel@stien.org" -#define STPK_RET_OK 0 -#define STPK_RET_ERR 1 -#define STPK_RET_ERR_DATA_LEFT 2 +#define STPK_RET_OK 0 +#define STPK_RET_ERR 1 +#define STPK_RET_ERR_UNKNOWN_FMT 3 +#define STPK_RET_ERR_DATA_LEFT 10 typedef enum { // Automatic format detection when decompressing. @@ -38,7 +39,8 @@ typedef enum { // EA compression library by Frank Barchard used by 4-D Sports Driving for Amiga and 4-D Driving for PC98. STPK_FMT_BARCHARD, // Amiga RPck archiver format used for 3-d shapes in 4-D Sports Driving for Amiga. - STPK_FMT_RPCK + STPK_FMT_RPCK, + STPK_FMT_UNKNOWN } stpk_FmtType; typedef enum { @@ -103,6 +105,8 @@ void stpk_deinit(stpk_Context *ctx); unsigned int stpk_decompress(stpk_Context *ctx); +stpk_FmtType stpk_getFmtType(stpk_Context *ctx); + const char *stpk_fmtStuntsVerStr(stpk_FmtStuntsVer version); #endif diff --git a/src/lib/stunpack.c b/src/lib/stunpack.c index 2ddbad6..a0cb160 100644 --- a/src/lib/stunpack.c +++ b/src/lib/stunpack.c @@ -62,7 +62,40 @@ void stpk_deinit(stpk_Context *ctx) unsigned int stpk_decompress(stpk_Context *ctx) { - return stunts_decompress(ctx); + switch (stpk_getFmtType(ctx)) { + case STPK_FMT_STUNTS: + return stunts_decompress(ctx); + default: + return STPK_RET_ERR_UNKNOWN_FMT; + } +} + +// Guess format type if user didn't specify format in context. +stpk_FmtType stpk_getFmtType(stpk_Context *ctx) +{ + if (ctx->format.type == STPK_FMT_AUTO) { + // TODO: Check other header details, cleanup, move to rpck.c. + if (ctx->src.data[0] == 'R' + && ctx->src.data[1] == 'P' + && ctx->src.data[2] == 'c' + && ctx->src.data[3] == 'k') { + ctx->format.type = STPK_FMT_RPCK; + } + // TODO: Check other header details, cleanup, move to barchard.c. + else if (ctx->src.data[1] == 0xFB) { + ctx->format.type = STPK_FMT_BARCHARD; + } + else if (stunts_isValid(ctx)) { + ctx->format.type = STPK_FMT_STUNTS; + ctx->format.stunts.version = STPK_FMT_STUNTS_VER_AUTO; + ctx->format.stunts.maxPasses = 0; + } + else { + ctx->format.type = STPK_FMT_UNKNOWN; + } + } + + return ctx->format.type; } const char *stpk_fmtStuntsVerStr(stpk_FmtStuntsVer version) diff --git a/src/lib/stunts.c b/src/lib/stunts.c index 079900d..6df3291 100644 --- a/src/lib/stunts.c +++ b/src/lib/stunts.c @@ -23,7 +23,45 @@ #include "stunts.h" -int inline stunts_isRle(stpk_Buffer *buf); +// Stunts compression does not have any identifier bytes, so we check if the +// contents corresponds to legal combinations of header values. +int stunts_isValid(stpk_Context *ctx) +{ + // Check if the source length is within the limits of what the format supports. + if (ctx->src.len < STUNTS_SIZE_MIN || ctx->src.len > STUNTS_SIZE_MAX) { + return 0; + } + + unsigned int totalLength = stunts_peekLength(ctx->src.data, 2); + + // Check if total uncompressed length is larger than the source length. + if (totalLength < UTIL_MAX(STUNTS_SIZE_MIN, ctx->src.len - STUNTS_SIZE_MIN)) { + return 0; + } + + // If the flag for multiple passes is set, a sane file will have + // - 2 passes + // - Total length longer than first pass' length + // - First pass' length between SIZE_MIN and source length - SIZE_MIN + // - First pass has either a valid RLE or Huffman header + if (UTIL_GET_FLAG(ctx->src.data[0], STUNTS_PASSES_RECUR)) { + unsigned char passes = ctx->src.data[0] & STUNTS_PASSES_MASK; + unsigned int passLength = stunts_peekLength(ctx->src.data, 5); + + return passes == 2 + && totalLength > passLength + && passLength > UTIL_MAX(STUNTS_SIZE_MIN, ctx->src.len - STUNTS_SIZE_MIN) + && ( + stunts_rle_isValid(&ctx->src, 4) + || stunts_huff_isValid(&ctx->src, 4) + ); + } + // A single pass file simply have a valid RLE of Huffman header + else { + return stunts_rle_isValid(&ctx->src, 0) + || stunts_huff_isValid(&ctx->src, 0); + } +} // Decompress sub-files in source buffer. unsigned int stunts_decompress(stpk_Context *ctx) @@ -40,7 +78,7 @@ unsigned int stunts_decompress(stpk_Context *ctx) passes &= STUNTS_PASSES_MASK; UTIL_VERBOSE1(" %-10s %d\n", "passes", passes); - stunts_getLength(&ctx->src, &finalLen); + finalLen = stunts_readLength(&ctx->src); UTIL_VERBOSE1(" %-10s %d\n", "finalLen", finalLen); UTIL_VERBOSE1(" %-8s %d\n", "srcLen", ctx->src.len); UTIL_VERBOSE1(" %-8s %.2f\n", "ratio", (float)finalLen / ctx->src.len); @@ -59,7 +97,7 @@ unsigned int stunts_decompress(stpk_Context *ctx) UTIL_VERBOSE1("\nPass %d/%d\n", i + 1, passes); type = ctx->src.data[ctx->src.offset++]; - stunts_getLength(&ctx->src, &ctx->dst.len); + ctx->dst.len = stunts_readLength(&ctx->src); UTIL_VERBOSE1(" %-10s %d\n", "dstLen", ctx->dst.len); if (util_allocDst(ctx)) { @@ -83,7 +121,7 @@ unsigned int stunts_decompress(stpk_Context *ctx) // Decompression had source data left, but it is the last pass. || (retval == STPK_RET_ERR_DATA_LEFT && (i == (passes - 1))) // There are more passes, but the next is not valid RLE. - || ((i < (passes - 1)) && !stunts_isRle(&ctx->dst)) + || ((i < (passes - 1)) && !stunts_rle_isValid(&ctx->dst, 0)) ) ) { UTIL_WARN("Huffman decompression with Stunts 1.1 bit stream format failed, retrying with Stunts 1.0 format.\n"); @@ -124,8 +162,3 @@ unsigned int stunts_decompress(stpk_Context *ctx) return 0; } - -int inline stunts_isRle(stpk_Buffer *buf) -{ - return buf->data[0] == STUNTS_TYPE_RLE && buf->data[7] == 0; -} diff --git a/src/lib/stunts.h b/src/lib/stunts.h index 591949b..9d5fcc8 100644 --- a/src/lib/stunts.h +++ b/src/lib/stunts.h @@ -22,21 +22,29 @@ #include -#define STUNTS_MAX_SIZE 0xFFFFFF +#define STUNTS_SIZE_MAX 0xFFFFFF +#define STUNTS_SIZE_MIN 0x10 #define STUNTS_PASSES_MASK 0x7F #define STUNTS_PASSES_RECUR 0x80 #define STUNTS_TYPE_RLE 0x01 #define STUNTS_TYPE_HUFF 0x02 +int stunts_isValid(stpk_Context *ctx); unsigned int stunts_decompress(stpk_Context *ctx); -// Read file length: WORD remainder + BYTE multiplier * 0x10000. -inline void stunts_getLength(stpk_Buffer *buf, unsigned int *len) +// Peek at 24-bit data length. +inline unsigned int stunts_peekLength(unsigned char *data, unsigned int offset) { - *len = buf->data[buf->offset] | buf->data[buf->offset + 1] << 8; // Read remainder. - *len += 0x10000 * buf->data[buf->offset + 2]; // Add multiplier. + return data[offset] | data[offset + 1] << 8 | data[offset + 2] << 16; +} + +// Read 24-bit data length and advance buffer offset. +inline unsigned int stunts_readLength(stpk_Buffer *buf) +{ + unsigned int len = stunts_peekLength(buf->data, buf->offset); buf->offset += 3; + return len; } #endif diff --git a/src/lib/stunts_huff.c b/src/lib/stunts_huff.c index b831334..3f687ed 100644 --- a/src/lib/stunts_huff.c +++ b/src/lib/stunts_huff.c @@ -17,12 +17,25 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ +#include "stunts.h" #include "util.h" #include "stunts_huff.h" inline unsigned char stpk_getHuffByte(stpk_Context *ctx); +// Check if data at given offset is a likely Huffman header: +// - Type is Huffman +// - Tree levels between 2 and 16 +// - No leaves at root node +int stunts_huff_isValid(stpk_Buffer *buf, unsigned int offset) +{ + return buf->data[offset + 0] == STUNTS_TYPE_HUFF + && (buf->data[offset + 4] & STUNTS_HUFF_LEVELS_MASK) >= 2 + && (buf->data[offset + 4] & STUNTS_HUFF_LEVELS_MASK) <= STUNTS_HUFF_LEVELS_MAX + && buf->data[offset + 5] == 0; // Leaves at root +} + // Decompress Huffman coded sub-file. unsigned int stunts_huff_decompress(stpk_Context *ctx) { diff --git a/src/lib/stunts_huff.h b/src/lib/stunts_huff.h index 32a4c36..45f14bd 100644 --- a/src/lib/stunts_huff.h +++ b/src/lib/stunts_huff.h @@ -32,6 +32,7 @@ #define STUNTS_HUFF_PREFIX_MSB (1 << (STUNTS_HUFF_PREFIX_WIDTH - 1)) #define STUNTS_HUFF_WIDTH_ESC 0x40 +int stunts_huff_isValid(stpk_Buffer *buf, unsigned int offset); unsigned int stunts_huff_decompress(stpk_Context *ctx); unsigned int stunts_huff_genOffsets(stpk_Context *ctx, unsigned int levels, const unsigned char *leafNodesPerLevel, short *codeOffsets, unsigned short *totalCodes); void stunts_huff_genPrefix(stpk_Context *ctx, unsigned int levels, const unsigned char *leafNodesPerLevel, const unsigned char *alphabet, unsigned char *symbols, unsigned char *widths); diff --git a/src/lib/stunts_rle.c b/src/lib/stunts_rle.c index 86b86ab..42ec5ca 100644 --- a/src/lib/stunts_rle.c +++ b/src/lib/stunts_rle.c @@ -24,13 +24,25 @@ inline unsigned int stunts_rle_repeatByte(stpk_Context *ctx, unsigned char cur, unsigned int rep); +// Check if data at given offset is a likely RLE header: +// - Type is RLE +// - Reserved byte after length is 0x00 +// - Escape code length between 1 and 10 +int stunts_rle_isValid(stpk_Buffer *buf, unsigned int offset) +{ + return buf->data[offset + 0] == STUNTS_TYPE_RLE + && buf->data[offset + 7] == 0 // Reserved, always 0 + && (buf->data[offset + 8] & STUNTS_RLE_ESCLEN_MASK) >= 1 + && (buf->data[offset + 8] & STUNTS_RLE_ESCLEN_MASK) <= STUNTS_RLE_ESCLEN_MAX; +} + // Decompress run-length encoded sub-file. unsigned int stunts_rle_decompress(stpk_Context *ctx) { unsigned int srcLen, dstLen, i; unsigned char unk, escLen, esc[STUNTS_RLE_ESCLEN_MAX], escLookup[STUNTS_RLE_ESCLOOKUP_LEN]; - stunts_getLength(&ctx->src, &srcLen); + srcLen = stunts_readLength(&ctx->src); UTIL_VERBOSE1(" %-10s %d\n", "srcLen", srcLen); unk = ctx->src.data[ctx->src.offset++]; diff --git a/src/lib/stunts_rle.h b/src/lib/stunts_rle.h index b040458..19f9ef7 100644 --- a/src/lib/stunts_rle.h +++ b/src/lib/stunts_rle.h @@ -28,6 +28,7 @@ #define STUNTS_RLE_ESCLOOKUP_LEN 0x100 #define STUNTS_RLE_ESCSEQ_POS 0x01 +int stunts_rle_isValid(stpk_Buffer *buf, unsigned int offset); unsigned int stunts_rle_decompress(stpk_Context *ctx); unsigned int stunts_rle_decodeSeq(stpk_Context *ctx, unsigned char esc); unsigned int stunts_rle_decodeOne(stpk_Context *ctx, const unsigned char *escLookup); diff --git a/src/lib/util.h b/src/lib/util.h index 76cc3cc..1954db2 100644 --- a/src/lib/util.h +++ b/src/lib/util.h @@ -35,6 +35,7 @@ util_stringBits16(curWord), code, ## __VA_ARGS__) #define UTIL_GET_FLAG(data, mask) ((data & mask) == mask) +#define UTIL_MAX(X, Y) (((X) > (Y)) ? (X) : (Y)) #define UTIL_MIN(X, Y) (((X) < (Y)) ? (X) : (Y)) int util_allocDst(stpk_Context *ctx);