Skip to content

Commit a2e2f73

Browse files
committed
Reduce allocations during image decoding.
Related to 7632a0a. A caller may use IImageAllocator to amortize allocations for the decoded image buffer when decoding multiple images. However, several other transient allocations are made which create memory traffic. By reducing these transient allocations, we reduce the number of garbage collections required for a performance boost. We achieve this via a few approaches: - Use stackalloc to avoid allocating small arrays on the heap. As we are targeting netstandard 2.0 and not 2.1, we lack the ability to access the allocation safely via a Span. Instead, we must unsafely access the allocation via a pointer. - Use Array.Empty to avoid allocating new, empty arrays for mipmaps. - Use singletons for the IDecodeTarga classes, as they are stateless. - In INTColor.ToF16, use out parameters to avoid allocating a small array. This provides the following benchmark improvements. ---- Before | Method | Payload | Mean | Error | StdDev | StdErr | Median | Gen0 | Allocated | |------- |---------- |----------:|----------:|----------:|----------:|----------:|-------:|----------:| | Pfim | 32bit.dds | 2.015 us | 0.0112 us | 0.0088 us | 0.0025 us | 2.016 us | 0.0381 | 496 B | | Pfim | dxt1.dds | 38.347 us | 0.7373 us | 0.8491 us | 0.1899 us | 38.417 us | 2.8687 | 36480 B | | Pfim | dxt3.dds | 43.061 us | 0.6037 us | 0.5647 us | 0.1458 us | 42.982 us | 2.8687 | 36480 B | | Pfim | dxt5.dds | 48.005 us | 0.5501 us | 0.5146 us | 0.1329 us | 47.931 us | 2.8687 | 36520 B | | Method | Payload | Mean | Error | StdDev | StdErr | Median | Gen0 | Allocated | |------- |-------------- |---------:|----------:|----------:|----------:|---------:|-------:|----------:| | Pfim | 24bit-rle.tga | 3.180 us | 0.0371 us | 0.0347 us | 0.0090 us | 3.183 us | 0.0191 | 272 B | | Pfim | 32bit-rle.tga | 3.441 us | 0.0266 us | 0.0222 us | 0.0062 us | 3.436 us | 0.0191 | 272 B | | Pfim | 32bit.tga | 2.012 us | 0.0107 us | 0.0100 us | 0.0026 us | 2.012 us | 0.0191 | 272 B | ---- After | Method | Payload | Mean | Error | StdDev | StdErr | Median | Gen0 | Allocated | |------- |---------- |----------:|----------:|----------:|----------:|----------:|-------:|----------:| | Pfim | 32bit.dds | 2.034 us | 0.0242 us | 0.0226 us | 0.0058 us | 2.032 us | 0.0343 | 472 B | | Pfim | dxt1.dds | 32.704 us | 0.6247 us | 0.6135 us | 0.1534 us | 32.393 us | - | 456 B | | Pfim | dxt3.dds | 40.869 us | 0.1294 us | 0.1211 us | 0.0313 us | 40.860 us | - | 456 B | | Pfim | dxt5.dds | 43.623 us | 0.2501 us | 0.2340 us | 0.0604 us | 43.594 us | - | 456 B | | Method | Payload | Mean | Error | StdDev | StdErr | Median | Gen0 | Allocated | |------- |-------------- |---------:|----------:|----------:|----------:|---------:|-------:|----------:| | Pfim | 24bit-rle.tga | 2.965 us | 0.0502 us | 0.0469 us | 0.0121 us | 2.973 us | 0.0191 | 248 B | | Pfim | 32bit-rle.tga | 3.332 us | 0.0531 us | 0.0443 us | 0.0123 us | 3.329 us | 0.0191 | 248 B | | Pfim | 32bit.tga | 2.367 us | 0.0404 us | 0.0358 us | 0.0096 us | 2.358 us | 0.0191 | 248 B |
1 parent 5ba6ee1 commit a2e2f73

File tree

12 files changed

+46
-31
lines changed

12 files changed

+46
-31
lines changed

src/Pfim/dds/Bc5Dds.cs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,6 @@
22
{
33
public class Bc5Dds : CompressedDds
44
{
5-
private readonly byte[] _firstGradient = new byte[8];
6-
private readonly byte[] _secondGradient = new byte[8];
7-
85
public Bc5Dds(DdsHeader header, PfimConfig config) : base(header, config)
96
{
107
}
@@ -15,8 +12,11 @@ public Bc5Dds(DdsHeader header, PfimConfig config) : base(header, config)
1512
protected override byte DivSize => 4;
1613
protected override byte CompressedBytesPerBlock => 16;
1714

18-
protected override int Decode(byte[] stream, byte[] data, int streamIndex, uint dataIndex, uint stride)
15+
protected override unsafe int Decode(byte[] stream, byte[] data, int streamIndex, uint dataIndex, uint stride)
1916
{
17+
byte* _firstGradient = stackalloc byte[8];
18+
byte* _secondGradient = stackalloc byte[8];
19+
2020
streamIndex = ExtractGradient(_firstGradient, stream, streamIndex);
2121
ulong firstCodes = stream[streamIndex++];
2222
firstCodes |= ((ulong)stream[streamIndex++] << 8);
@@ -50,7 +50,7 @@ protected override int Decode(byte[] stream, byte[] data, int streamIndex, uint
5050
return streamIndex;
5151
}
5252

53-
internal static int ExtractGradient(byte[] gradient, byte[] stream, int bIndex)
53+
internal static unsafe int ExtractGradient(byte* gradient, byte[] stream, int bIndex)
5454
{
5555
byte endpoint0;
5656
byte endpoint1;

src/Pfim/dds/Bc6hBc7/INTColor.cs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,11 +63,11 @@ private static int SIGN_EXTEND(int x, int nb)
6363
return ((((x) & (1 << ((nb) - 1))) != 0 ? ((~0) ^ ((1 << (nb)) - 1)) : 0) | (x));
6464
}
6565

66-
public void ToF16(ushort[] aF16, bool bSigned)
66+
public void ToF16(out ushort red, out ushort green, out ushort blue, bool bSigned)
6767
{
68-
aF16[0] = INT2F16(r, bSigned);
69-
aF16[1] = INT2F16(g, bSigned);
70-
aF16[2] = INT2F16(b, bSigned);
68+
red = INT2F16(r, bSigned);
69+
green = INT2F16(g, bSigned);
70+
blue = INT2F16(b, bSigned);
7171
}
7272

7373
private static ushort INT2F16(int input, bool bSigned)

src/Pfim/dds/Bc6hDds.cs

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -421,13 +421,12 @@ protected override int Decode(byte[] stream, byte[] data, int streamIndex, uint
421421
fc.g = FinishUnquantize((g1 * (Constants.BC67_WEIGHT_MAX - aWeights[uIndex]) + g2 * aWeights[uIndex] + Constants.BC67_WEIGHT_ROUND) >> Constants.BC67_WEIGHT_SHIFT, bSigned);
422422
fc.b = FinishUnquantize((b1 * (Constants.BC67_WEIGHT_MAX - aWeights[uIndex]) + b2 * aWeights[uIndex] + Constants.BC67_WEIGHT_ROUND) >> Constants.BC67_WEIGHT_SHIFT, bSigned);
423423

424-
ushort[] rgb = new ushort[3];
425-
fc.ToF16(rgb, bSigned);
424+
fc.ToF16(out ushort r, out ushort g, out ushort b, bSigned);
426425

427426
// Clamp 0..1, and convert to byte (we're losing high dynamic range)
428-
data[dataIndex++] = (byte)((Math.Max(0.0f, Math.Min(1.0f, ConvertHalfToFloat(rgb[2]))) * 255.0f) + 0.5f); // blue
429-
data[dataIndex++] = (byte)((Math.Max(0.0f, Math.Min(1.0f, ConvertHalfToFloat(rgb[1]))) * 255.0f) + 0.5f); // green
430-
data[dataIndex++] = (byte)((Math.Max(0.0f, Math.Min(1.0f, ConvertHalfToFloat(rgb[0]))) * 255.0f) + 0.5f); // red
427+
data[dataIndex++] = (byte)((Math.Max(0.0f, Math.Min(1.0f, ConvertHalfToFloat(b))) * 255.0f) + 0.5f); // blue
428+
data[dataIndex++] = (byte)((Math.Max(0.0f, Math.Min(1.0f, ConvertHalfToFloat(g))) * 255.0f) + 0.5f); // green
429+
data[dataIndex++] = (byte)((Math.Max(0.0f, Math.Min(1.0f, ConvertHalfToFloat(r))) * 255.0f) + 0.5f); // red
431430
data[dataIndex++] = 255;
432431

433432
// Is mult 4?

src/Pfim/dds/Bc7Dds.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ public Bc7Dds(DdsHeader header, PfimConfig config) : base(header, config)
6565
protected override byte DivSize => 4;
6666
protected override byte CompressedBytesPerBlock => 16;
6767

68-
protected override int Decode(byte[] stream, byte[] data, int streamIndex, uint dataIndex, uint stride)
68+
protected override unsafe int Decode(byte[] stream, byte[] data, int streamIndex, uint dataIndex, uint stride)
6969
{
7070
// I would prefer to use Span, but not sure if I should reference System.Memory in this project
7171
// copy data instead
@@ -86,7 +86,7 @@ protected override int Decode(byte[] stream, byte[] data, int streamIndex, uint
8686
byte uIndexPrec2 = ms_aInfo[uMode].uIndexPrec2;
8787
int i;
8888
uint uStartBit = uMode + 1u;
89-
int[] P = new int[6];
89+
int* P = stackalloc int[6];
9090
byte uShape = GetBits(ref uStartBit, ms_aInfo[uMode].uPartitionBits);
9191
Debug.Assert(uShape < Constants.BC7_MAX_SHAPES);
9292

src/Pfim/dds/CompressedDds.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ namespace Pfim
99
public abstract class CompressedDds : Dds
1010
{
1111
private bool _compressed;
12-
private MipMapOffset[] _mipMaps = new MipMapOffset[0];
12+
private MipMapOffset[] _mipMaps = Array.Empty<MipMapOffset>();
1313

1414
public override MipMapOffset[] MipMaps => _mipMaps;
1515

src/Pfim/dds/Dxt1Dds.cs

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ public Dxt1Dds(DdsHeader header, PfimConfig config) : base(header, config)
1515
public override ImageFormat Format => ImageFormat.Rgba32;
1616
public override int BitsPerPixel => 8 * PIXEL_DEPTH;
1717

18-
protected override int Decode(byte[] stream, byte[] data, int streamIndex, uint dataIndex, uint stride)
18+
protected override unsafe int Decode(byte[] stream, byte[] data, int streamIndex, uint dataIndex, uint stride)
1919
{
2020
// Colors are stored in a pair of 16 bits
2121
ushort color0 = stream[streamIndex++];
@@ -31,9 +31,21 @@ protected override int Decode(byte[] stream, byte[] data, int streamIndex, uint
3131
// Used the two extracted colors to create two new colors that are
3232
// slightly different.
3333
(var i0, var i1) = (c0.As8BitA(), c1.As8BitA());
34-
Color8888[] colors = color0 > color1 ?
35-
new[] { i0, i1, c0.Lerp(c1, 1f / 3).As8BitA(), c0.Lerp(c1, 2f / 3).As8BitA() } :
36-
new[] { i0, i1, c0.Lerp(c1, 0.5f).As8BitA(), default };
34+
Color8888* colors = stackalloc Color8888[4];
35+
if (color0 > color1)
36+
{
37+
colors[0] = i0;
38+
colors[1] = i1;
39+
colors[2] = c0.Lerp(c1, 1f / 3).As8BitA();
40+
colors[3] = c0.Lerp(c1, 2f / 3).As8BitA();
41+
}
42+
else
43+
{
44+
colors[0] = i0;
45+
colors[1] = i1;
46+
colors[2] = c0.Lerp(c1, 0.5f).As8BitA();
47+
colors[3] = default;
48+
}
3749

3850
for (int i = 0; i < 4; i++)
3951
{

src/Pfim/dds/Dxt3Dds.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ public Dxt3Dds(DdsHeader header, PfimConfig config) : base(header, config)
1515
{
1616
}
1717

18-
protected override int Decode(byte[] stream, byte[] data, int streamIndex, uint dataIndex, uint stride)
18+
protected override unsafe int Decode(byte[] stream, byte[] data, int streamIndex, uint dataIndex, uint stride)
1919
{
2020
/*
2121
* Strategy for decompression:
@@ -42,7 +42,7 @@ protected override int Decode(byte[] stream, byte[] data, int streamIndex, uint
4242
var c1 = ColorFloatRgb.FromRgb565(color1);
4343

4444
(var i0, var i1) = (c0.As8Bit(), c1.As8Bit());
45-
Color888[] colors = new[] { i0, i1, c0.Lerp(c1, 1f / 3).As8Bit(), c0.Lerp(c1, 2f / 3).As8Bit() };
45+
Color888* colors = stackalloc Color888[] { i0, i1, c0.Lerp(c1, 1f / 3).As8Bit(), c0.Lerp(c1, 2f / 3).As8Bit() };
4646

4747
for (int i = 0; i < 4; i++)
4848
{

src/Pfim/dds/Dxt5Dds.cs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@ public class Dxt5Dds : CompressedDds
77
private const byte PIXEL_DEPTH = 4;
88
private const byte DIV_SIZE = 4;
99

10-
private readonly byte[] alpha = new byte[8];
11-
1210
public override int BitsPerPixel => 8 * PIXEL_DEPTH;
1311
public override ImageFormat Format => ImageFormat.Rgba32;
1412
protected override byte DivSize => DIV_SIZE;
@@ -20,8 +18,10 @@ public Dxt5Dds(DdsHeader header, PfimConfig config) : base(header, config)
2018

2119
protected override byte PixelDepthBytes => PIXEL_DEPTH;
2220

23-
protected override int Decode(byte[] stream, byte[] data, int streamIndex, uint dataIndex, uint stride)
21+
protected override unsafe int Decode(byte[] stream, byte[] data, int streamIndex, uint dataIndex, uint stride)
2422
{
23+
byte* alpha = stackalloc byte[8];
24+
2525
streamIndex = Bc5Dds.ExtractGradient(alpha, stream, streamIndex);
2626

2727
ulong alphaCodes = stream[streamIndex++];
@@ -43,7 +43,7 @@ protected override int Decode(byte[] stream, byte[] data, int streamIndex, uint
4343
var c1 = ColorFloatRgb.FromRgb565(color1);
4444

4545
(var i0, var i1) = (c0.As8Bit(), c1.As8Bit());
46-
Color888[] colors = new[] { i0, i1, c0.Lerp(c1, 1f / 3).As8Bit(), c0.Lerp(c1, 2f / 3).As8Bit() };
46+
Color888* colors = stackalloc Color888[] { i0, i1, c0.Lerp(c1, 1f / 3).As8Bit(), c0.Lerp(c1, 2f / 3).As8Bit() };
4747

4848
for (int alphaShift = 0; alphaShift < 48; alphaShift += 12)
4949
{

src/Pfim/dds/UncompressedDds.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ public class UncompressedDds : Dds
1212
private readonly uint? _bitsPerPixel;
1313
private readonly bool? _rgbSwapped;
1414
private ImageFormat _format;
15-
private MipMapOffset[] _mipMaps = new MipMapOffset[0];
15+
private MipMapOffset[] _mipMaps = Array.Empty<MipMapOffset>();
1616

1717

1818
internal UncompressedDds(DdsHeader header, PfimConfig config, uint bitsPerPixel, bool rgbSwapped) : base(header, config)

src/Pfim/targa/CompressedTarga.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ namespace Pfim
99
/// </summary>
1010
public class CompressedTarga : IDecodeTarga
1111
{
12+
internal static readonly CompressedTarga Instance = new CompressedTarga();
13+
1214
unsafe byte[] FastPass(byte[] data, ArraySegment<byte> arr, TargaHeader header, int stride, long arrPosition)
1315
{
1416
var dataLen = header.Height * stride;

0 commit comments

Comments
 (0)