Skip to content

Commit

Permalink
Optimize ADPCM (sosCODEC) decompression
Browse files Browse the repository at this point in the history
This commit optimizes ADPCM decompression in game by:

* Removing any unsupported modes that the game do not use.
* Using a table of possible values instead of computing them on each
  iteration.

This new table is built by employing a dynamic programming techinque.
The possible values are bound and the product of the dimensions are
small, hence this techinque is quite effective in optimizing things
by precomputing the values.

Signed-off-by: Giuliano Belinassi <[email protected]>
  • Loading branch information
giulianobelinassi committed Feb 9, 2023
1 parent 51e3349 commit b66f3ff
Showing 1 changed file with 96 additions and 143 deletions.
239 changes: 96 additions & 143 deletions common/soscodec.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "soscomp.h"
#include <string.h>
#include <assert.h>

// index table for stepping into step table.
static const short wCODECIndexTab[16] = {-1, -1, -1, -1, 2, 4, 6, 8, -1, -1, -1, -1, 2, 4, 6, 8};
Expand Down Expand Up @@ -33,180 +34,132 @@ void sosCODECInitStream(_SOS_COMPRESS_INFO* stream)
stream->dwSampleIndex2 = 0;
}

//
// decompress data from a 4:1 ADPCM compressed file. the number of
// bytes decompressed is returned.
//
unsigned int sosCODECDecompressData(_SOS_COMPRESS_INFO* stream, unsigned int bytes)
/* Number of possible wIndex. Comes from the fact that:
*
* next_index = clamp(next_index, 0, 88);
*
* which means 0 <= index <= 88, hence 89 indexes.
*/
#define NUM_INDEXES 89

/* Number of possible nybbles. Comes from the fact that:
*
* next_nybble = wCodeBuf & 0xF
*
* which means 0 <= next_nybble <= 15, hence 16 possibilites.
*/
#define NUM_NYBBLES 16

/* Define a dynamic programming table mapping all possible indexes and nybbles
* into their next value. Pack things together into a struct so a cache miss
* will retrieve both next index and diff value.
*
* This table should consume ~12kb, which is quite small.
*
*/
static struct
{
short current_nybble;
unsigned step;
int sample;
unsigned full_length;
int diff;
short index;
} SosDecompTable[NUM_INDEXES][NUM_NYBBLES];

full_length = bytes;
stream->dwSampleIndex = 0;
stream->dwSampleIndex2 = 0;

if (stream->wBitSize == 16) {
bytes /= 2;
}

char* src = stream->lpSource;
short* dst = (short*)(stream->lpDest);
/* Flag if above table was initialized. */
static bool SosDecompTableGenerated = false;

// Handle stereo.
if (stream->wChannels == 2) {
current_nybble = 0;
for (int i = bytes; i > 0; i -= 2) {
if ((stream->dwSampleIndex & 1) != 0) {
current_nybble = stream->wCodeBuf >> 4;
stream->wCode = current_nybble;
} else {
stream->wCodeBuf = *src;
// Stereo is interleaved so skip a byte for this channel.
src += 2;
current_nybble = stream->wCodeBuf & 0xF;
stream->wCode = current_nybble;
}

step = stream->wStep;
stream->dwDifference = step >> 3;
/* Generate decompression table for 16-bit mono samples. Precompute every
* possible value of dwDifference and wIndex based on every possible
* combination of index and nybble values. */
void sosCODECGenerateDecompressTable(void)
{
short index, nybble;
int diff;

if ((current_nybble & 4) != 0) {
stream->dwDifference += step;
}
for (index = 0; index < NUM_INDEXES; index++) {
short step = wCODECStepTab[index];
for (nybble = 0; nybble < NUM_NYBBLES; nybble++) {
diff = step >> 3;

if ((current_nybble & 2) != 0) {
stream->dwDifference += step >> 1;
if ((nybble & 4) != 0) {
diff += step;
}

if ((current_nybble & 1) != 0) {
stream->dwDifference += step >> 2;
if ((nybble & 2) != 0) {
diff += step >> 1;
}

if ((current_nybble & 8) != 0) {
stream->dwDifference = -stream->dwDifference;
if ((nybble & 1) != 0) {
diff += step >> 2;
}

sample = clamp(stream->dwDifference + stream->dwPredicted, -32768, 32767);
stream->dwPredicted = sample;

if (stream->wBitSize == 16) {
*dst = sample;
// Stereo is interleaved so skip a sample for this channel.
dst += 2;
} else {
*dst++ = ((sample & 0xFF00) >> 8) ^ 0x80;
if ((nybble & 8) != 0) {
diff = -diff;
}

stream->wIndex += wCODECIndexTab[stream->wCode & 0x7];
stream->wIndex = clamp(stream->wIndex, 0, 88);
++stream->dwSampleIndex;
stream->wStep = wCODECStepTab[stream->wIndex];
}

src = stream->lpSource + 1;
dst = (short*)(stream->lpDest + 1);
short next_index = index + wCODECIndexTab[nybble & 0x7];
next_index = clamp(next_index, 0, 88);

if (stream->wBitSize == 16) {
dst = (short*)(stream->lpDest) + 1;
SosDecompTable[index][nybble].diff = diff;
SosDecompTable[index][nybble].index = next_index;
}
}
}

for (int i = bytes; i > 0; i -= 2) {
if ((stream->dwSampleIndex2 & 1) != 0) {
current_nybble = stream->wCodeBuf2 >> 4;
stream->wCode2 = current_nybble;
} else {
stream->wCodeBuf2 = *src;
// Stereo is interleaved so skip a byte for this channel.
src += 2;
current_nybble = stream->wCodeBuf2 & 0xF;
stream->wCode2 = current_nybble;
}

step = stream->wStep2;
stream->dwDifference2 = step >> 3;

if ((current_nybble & 4) != 0) {
stream->dwDifference2 += step;
}

if ((current_nybble & 2) != 0) {
stream->dwDifference2 += step >> 1;
}
//
// decompress data from a 4:1 ADPCM compressed file. the number of
// bytes decompressed is returned.
//
//
unsigned int sosCODECDecompressData(_SOS_COMPRESS_INFO* stream, unsigned int bytes)
{
if (SosDecompTableGenerated == false) {
sosCODECGenerateDecompressTable();
SosDecompTableGenerated = true;
}

if ((current_nybble & 1) != 0) {
stream->dwDifference2 += step >> 2;
}
assert(stream->wBitSize == 16 && "Only 16-bit samples are supported");
assert(stream->wChannels == 1 && "Only mono samples are supported");

if ((current_nybble & 8) != 0) {
stream->dwDifference2 = -stream->dwDifference2;
}
unsigned full_length = bytes;
bytes /= 4;

sample = clamp(stream->dwDifference2 + stream->dwPredicted2, -32768, 32767);
stream->dwPredicted2 = sample;
/* Quickly return if we are not going to write anything. */
if (bytes == 0) {
return full_length;
}

if (stream->wBitSize == 16) {
*dst = sample;
// Stereo is interleaved so skip a sample for this channel.
dst += 2;
} else {
*dst++ = ((sample & 0xFF00) >> 8) ^ 0x80;
}
unsigned char* src = (unsigned char*)stream->lpSource;
short* dst = (short*)(stream->lpDest);
short index = stream->wIndex;

stream->wIndex2 += wCODECIndexTab[stream->wCode2 & 0x7];
stream->wIndex2 = clamp(stream->wIndex2, 0, 88);
++stream->dwSampleIndex2;
stream->wStep2 = wCODECStepTab[stream->wIndex2];
}
} else {
for (int i = bytes; i > 0; --i) {
if ((stream->dwSampleIndex & 1) != 0) {
current_nybble = stream->wCodeBuf >> 4;
stream->wCode = current_nybble;
} else {
stream->wCodeBuf = *src++;
current_nybble = stream->wCodeBuf & 0xF;
stream->wCode = current_nybble;
}
int sample = stream->dwPredicted;

step = stream->wStep;
stream->dwDifference = step >> 3;
while (bytes-- > 0) {
unsigned char codebuf = *src++;

if ((current_nybble & 4) != 0) {
stream->dwDifference += step;
}
/* First step: case dwSampleIndex is even (unrolled). */
char current_nybble = codebuf & 0xF;

if ((current_nybble & 2) != 0) {
stream->dwDifference += step >> 1;
}
sample += SosDecompTable[index][current_nybble].diff;
sample = clamp(sample, -32768, 32767);

if ((current_nybble & 1) != 0) {
stream->dwDifference += step >> 2;
}
*dst++ = sample;

if ((current_nybble & 8) != 0) {
stream->dwDifference = -stream->dwDifference;
}
index = SosDecompTable[index][current_nybble].index;

sample = clamp(stream->dwDifference + stream->dwPredicted, -32768, 32767);
stream->dwPredicted = sample;
/* Second step: case dwSampleIndex is odd (unrolled). */
current_nybble = codebuf >> 4;
sample += SosDecompTable[index][current_nybble].diff;
sample = clamp(sample, -32768, 32767);

if (stream->wBitSize == 16) {
*dst++ = sample;
} else {
*dst = ((sample & 0xFF00) >> 8) ^ 0x80;
dst = (short*)((char*)(dst) + 1);
}
*dst++ = sample;

stream->wIndex += wCODECIndexTab[stream->wCode & 0x7];
stream->wIndex = clamp(stream->wIndex, 0, 88);
++stream->dwSampleIndex;
stream->wStep = wCODECStepTab[stream->wIndex];
};
index = SosDecompTable[index][current_nybble].index;
}

/* Write back the important stuff from the loop back to the struct. */
stream->dwPredicted = sample;
stream->wIndex = index;

return full_length;
}

Expand Down

0 comments on commit b66f3ff

Please sign in to comment.