Skip to content

Commit

Permalink
Heuretics for determining format type.
Browse files Browse the repository at this point in the history
  • Loading branch information
dstien committed Jan 9, 2024
1 parent 4e7c2fd commit d83b279
Show file tree
Hide file tree
Showing 9 changed files with 126 additions and 20 deletions.
12 changes: 8 additions & 4 deletions include/stunpack.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,10 @@
#define STPK_NAME "stunpack"
#define STPK_BUGS "[email protected]"

#define STPK_RET_OK 0
#define STPK_RET_ERR 1
#define STPK_RET_ERR_DATA_LEFT 2
#define STPK_RET_OK 0
#define STPK_RET_ERR 1
#define STPK_RET_ERR_UNKNOWN_FMT 3
#define STPK_RET_ERR_DATA_LEFT 10

typedef enum {
// Automatic format detection when decompressing.
Expand All @@ -38,7 +39,8 @@ typedef enum {
// EA compression library by Frank Barchard used by 4-D Sports Driving for Amiga and 4-D Driving for PC98.
STPK_FMT_BARCHARD,
// Amiga RPck archiver format used for 3-d shapes in 4-D Sports Driving for Amiga.
STPK_FMT_RPCK
STPK_FMT_RPCK,
STPK_FMT_UNKNOWN
} stpk_FmtType;

typedef enum {
Expand Down Expand Up @@ -103,6 +105,8 @@ void stpk_deinit(stpk_Context *ctx);

unsigned int stpk_decompress(stpk_Context *ctx);

stpk_FmtType stpk_getFmtType(stpk_Context *ctx);

const char *stpk_fmtStuntsVerStr(stpk_FmtStuntsVer version);

#endif
35 changes: 34 additions & 1 deletion src/lib/stunpack.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,40 @@ void stpk_deinit(stpk_Context *ctx)

unsigned int stpk_decompress(stpk_Context *ctx)
{
return stunts_decompress(ctx);
switch (stpk_getFmtType(ctx)) {
case STPK_FMT_STUNTS:
return stunts_decompress(ctx);
default:
return STPK_RET_ERR_UNKNOWN_FMT;
}
}

// Guess format type if user didn't specify format in context.
stpk_FmtType stpk_getFmtType(stpk_Context *ctx)
{
if (ctx->format.type == STPK_FMT_AUTO) {
// TODO: Check other header details, cleanup, move to rpck.c.
if (ctx->src.data[0] == 'R'
&& ctx->src.data[1] == 'P'
&& ctx->src.data[2] == 'c'
&& ctx->src.data[3] == 'k') {
ctx->format.type = STPK_FMT_RPCK;
}
// TODO: Check other header details, cleanup, move to barchard.c.
else if (ctx->src.data[1] == 0xFB) {
ctx->format.type = STPK_FMT_BARCHARD;
}
else if (stunts_isValid(ctx)) {
ctx->format.type = STPK_FMT_STUNTS;
ctx->format.stunts.version = STPK_FMT_STUNTS_VER_AUTO;
ctx->format.stunts.maxPasses = 0;
}
else {
ctx->format.type = STPK_FMT_UNKNOWN;
}
}

return ctx->format.type;
}

const char *stpk_fmtStuntsVerStr(stpk_FmtStuntsVer version)
Expand Down
51 changes: 42 additions & 9 deletions src/lib/stunts.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,45 @@

#include "stunts.h"

int inline stunts_isRle(stpk_Buffer *buf);
// Stunts compression does not have any identifier bytes, so we check if the
// contents corresponds to legal combinations of header values.
int stunts_isValid(stpk_Context *ctx)
{
// Check if the source length is within the limits of what the format supports.
if (ctx->src.len < STUNTS_SIZE_MIN || ctx->src.len > STUNTS_SIZE_MAX) {
return 0;
}

unsigned int totalLength = stunts_peekLength(ctx->src.data, 2);

// Check if total uncompressed length is larger than the source length.
if (totalLength < UTIL_MAX(STUNTS_SIZE_MIN, ctx->src.len - STUNTS_SIZE_MIN)) {
return 0;
}

// If the flag for multiple passes is set, a sane file will have
// - 2 passes
// - Total length longer than first pass' length
// - First pass' length between SIZE_MIN and source length - SIZE_MIN
// - First pass has either a valid RLE or Huffman header
if (UTIL_GET_FLAG(ctx->src.data[0], STUNTS_PASSES_RECUR)) {
unsigned char passes = ctx->src.data[0] & STUNTS_PASSES_MASK;
unsigned int passLength = stunts_peekLength(ctx->src.data, 5);

return passes == 2
&& totalLength > passLength
&& passLength > UTIL_MAX(STUNTS_SIZE_MIN, ctx->src.len - STUNTS_SIZE_MIN)
&& (
stunts_rle_isValid(&ctx->src, 4)
|| stunts_huff_isValid(&ctx->src, 4)
);
}
// A single pass file simply have a valid RLE of Huffman header
else {
return stunts_rle_isValid(&ctx->src, 0)
|| stunts_huff_isValid(&ctx->src, 0);
}
}

// Decompress sub-files in source buffer.
unsigned int stunts_decompress(stpk_Context *ctx)
Expand All @@ -40,7 +78,7 @@ unsigned int stunts_decompress(stpk_Context *ctx)
passes &= STUNTS_PASSES_MASK;
UTIL_VERBOSE1(" %-10s %d\n", "passes", passes);

stunts_getLength(&ctx->src, &finalLen);
finalLen = stunts_readLength(&ctx->src);
UTIL_VERBOSE1(" %-10s %d\n", "finalLen", finalLen);
UTIL_VERBOSE1(" %-8s %d\n", "srcLen", ctx->src.len);
UTIL_VERBOSE1(" %-8s %.2f\n", "ratio", (float)finalLen / ctx->src.len);
Expand All @@ -59,7 +97,7 @@ unsigned int stunts_decompress(stpk_Context *ctx)
UTIL_VERBOSE1("\nPass %d/%d\n", i + 1, passes);

type = ctx->src.data[ctx->src.offset++];
stunts_getLength(&ctx->src, &ctx->dst.len);
ctx->dst.len = stunts_readLength(&ctx->src);
UTIL_VERBOSE1(" %-10s %d\n", "dstLen", ctx->dst.len);

if (util_allocDst(ctx)) {
Expand All @@ -83,7 +121,7 @@ unsigned int stunts_decompress(stpk_Context *ctx)
// Decompression had source data left, but it is the last pass.
|| (retval == STPK_RET_ERR_DATA_LEFT && (i == (passes - 1)))
// There are more passes, but the next is not valid RLE.
|| ((i < (passes - 1)) && !stunts_isRle(&ctx->dst))
|| ((i < (passes - 1)) && !stunts_rle_isValid(&ctx->dst, 0))
)
) {
UTIL_WARN("Huffman decompression with Stunts 1.1 bit stream format failed, retrying with Stunts 1.0 format.\n");
Expand Down Expand Up @@ -124,8 +162,3 @@ unsigned int stunts_decompress(stpk_Context *ctx)

return 0;
}

int inline stunts_isRle(stpk_Buffer *buf)
{
return buf->data[0] == STUNTS_TYPE_RLE && buf->data[7] == 0;
}
18 changes: 13 additions & 5 deletions src/lib/stunts.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,21 +22,29 @@

#include <stunpack.h>

#define STUNTS_MAX_SIZE 0xFFFFFF
#define STUNTS_SIZE_MAX 0xFFFFFF
#define STUNTS_SIZE_MIN 0x10
#define STUNTS_PASSES_MASK 0x7F
#define STUNTS_PASSES_RECUR 0x80

#define STUNTS_TYPE_RLE 0x01
#define STUNTS_TYPE_HUFF 0x02

int stunts_isValid(stpk_Context *ctx);
unsigned int stunts_decompress(stpk_Context *ctx);

// Read file length: WORD remainder + BYTE multiplier * 0x10000.
inline void stunts_getLength(stpk_Buffer *buf, unsigned int *len)
// Peek at 24-bit data length.
inline unsigned int stunts_peekLength(unsigned char *data, unsigned int offset)
{
*len = buf->data[buf->offset] | buf->data[buf->offset + 1] << 8; // Read remainder.
*len += 0x10000 * buf->data[buf->offset + 2]; // Add multiplier.
return data[offset] | data[offset + 1] << 8 | data[offset + 2] << 16;
}

// Read 24-bit data length and advance buffer offset.
inline unsigned int stunts_readLength(stpk_Buffer *buf)
{
unsigned int len = stunts_peekLength(buf->data, buf->offset);
buf->offset += 3;
return len;
}

#endif
13 changes: 13 additions & 0 deletions src/lib/stunts_huff.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,25 @@
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/

#include "stunts.h"
#include "util.h"

#include "stunts_huff.h"

inline unsigned char stpk_getHuffByte(stpk_Context *ctx);

// Check if data at given offset is a likely Huffman header:
// - Type is Huffman
// - Tree levels between 2 and 16
// - No leaves at root node
int stunts_huff_isValid(stpk_Buffer *buf, unsigned int offset)
{
return buf->data[offset + 0] == STUNTS_TYPE_HUFF
&& (buf->data[offset + 4] & STUNTS_HUFF_LEVELS_MASK) >= 2
&& (buf->data[offset + 4] & STUNTS_HUFF_LEVELS_MASK) <= STUNTS_HUFF_LEVELS_MAX
&& buf->data[offset + 5] == 0; // Leaves at root
}

// Decompress Huffman coded sub-file.
unsigned int stunts_huff_decompress(stpk_Context *ctx)
{
Expand Down
1 change: 1 addition & 0 deletions src/lib/stunts_huff.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#define STUNTS_HUFF_PREFIX_MSB (1 << (STUNTS_HUFF_PREFIX_WIDTH - 1))
#define STUNTS_HUFF_WIDTH_ESC 0x40

int stunts_huff_isValid(stpk_Buffer *buf, unsigned int offset);
unsigned int stunts_huff_decompress(stpk_Context *ctx);
unsigned int stunts_huff_genOffsets(stpk_Context *ctx, unsigned int levels, const unsigned char *leafNodesPerLevel, short *codeOffsets, unsigned short *totalCodes);
void stunts_huff_genPrefix(stpk_Context *ctx, unsigned int levels, const unsigned char *leafNodesPerLevel, const unsigned char *alphabet, unsigned char *symbols, unsigned char *widths);
Expand Down
14 changes: 13 additions & 1 deletion src/lib/stunts_rle.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,25 @@

inline unsigned int stunts_rle_repeatByte(stpk_Context *ctx, unsigned char cur, unsigned int rep);

// Check if data at given offset is a likely RLE header:
// - Type is RLE
// - Reserved byte after length is 0x00
// - Escape code length between 1 and 10
int stunts_rle_isValid(stpk_Buffer *buf, unsigned int offset)
{
return buf->data[offset + 0] == STUNTS_TYPE_RLE
&& buf->data[offset + 7] == 0 // Reserved, always 0
&& (buf->data[offset + 8] & STUNTS_RLE_ESCLEN_MASK) >= 1
&& (buf->data[offset + 8] & STUNTS_RLE_ESCLEN_MASK) <= STUNTS_RLE_ESCLEN_MAX;
}

// Decompress run-length encoded sub-file.
unsigned int stunts_rle_decompress(stpk_Context *ctx)
{
unsigned int srcLen, dstLen, i;
unsigned char unk, escLen, esc[STUNTS_RLE_ESCLEN_MAX], escLookup[STUNTS_RLE_ESCLOOKUP_LEN];

stunts_getLength(&ctx->src, &srcLen);
srcLen = stunts_readLength(&ctx->src);
UTIL_VERBOSE1(" %-10s %d\n", "srcLen", srcLen);

unk = ctx->src.data[ctx->src.offset++];
Expand Down
1 change: 1 addition & 0 deletions src/lib/stunts_rle.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#define STUNTS_RLE_ESCLOOKUP_LEN 0x100
#define STUNTS_RLE_ESCSEQ_POS 0x01

int stunts_rle_isValid(stpk_Buffer *buf, unsigned int offset);
unsigned int stunts_rle_decompress(stpk_Context *ctx);
unsigned int stunts_rle_decodeSeq(stpk_Context *ctx, unsigned char esc);
unsigned int stunts_rle_decodeOne(stpk_Context *ctx, const unsigned char *escLookup);
Expand Down
1 change: 1 addition & 0 deletions src/lib/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
util_stringBits16(curWord), code, ## __VA_ARGS__)

#define UTIL_GET_FLAG(data, mask) ((data & mask) == mask)
#define UTIL_MAX(X, Y) (((X) > (Y)) ? (X) : (Y))
#define UTIL_MIN(X, Y) (((X) < (Y)) ? (X) : (Y))

int util_allocDst(stpk_Context *ctx);
Expand Down

0 comments on commit d83b279

Please sign in to comment.