From 2f7ffab0779213dd88093441471eea7c7c06e9d4 Mon Sep 17 00:00:00 2001 From: Brant Burnett Date: Sun, 24 Dec 2023 00:15:26 -0500 Subject: [PATCH] Eliminate most CharTable range checks Motivation ---------- The index into the CharTable is completely safe when indexed via a byte, so we can eliminate it in hot paths safely. Also, we can use RVA statics with ushort on newer versions of C#. Modifications ------------- Use ref and Unsafe.Add to index into the char table in DecompressAllTags, and make CharTable an RVA static. Results ------- BenchmarkDotNet v0.13.10, Windows 11 (10.0.22621.2861/22H2/2022Update/SunValley2) 12th Gen Intel Core i7-1270P, 1 CPU, 16 logical and 12 physical cores .NET SDK 8.0.100 [Host] : .NET 8.0.0 (8.0.23.53103), X64 RyuJIT AVX2 Job-IVITYO : .NET Framework 4.8.1 (4.8.9181.0), X64 RyuJIT VectorSize=256 Job-SVSNXE : .NET Framework 4.8.1 (4.8.9181.0), X64 RyuJIT VectorSize=256 Job-UZLHWZ : .NET 6.0.25 (6.0.2523.51912), X64 RyuJIT AVX2 Job-VTIJJN : .NET 6.0.25 (6.0.2523.51912), X64 RyuJIT AVX2 Job-BDCQZW : .NET 8.0.0 (8.0.23.53103), X64 RyuJIT AVX2 Job-WMAPZR : .NET 8.0.0 (8.0.23.53103), X64 RyuJIT AVX2 Job-KAZCEW : .NET 8.0.0 (8.0.23.53103), X64 RyuJIT AVX2 Job-QKBVXP : .NET 8.0.0 (8.0.23.53103), X64 RyuJIT AVX2 | Method | Runtime | BuildConfiguration | PGO | Mean | Error | StdDev | Ratio | Rank | |----------- |------------------- |------------------- |---- |---------:|---------:|---------:|------:|-----:| | Decompress | .NET Framework 4.8 | Previous | N | 14.61 us | 0.104 us | 0.092 us | 1.00 | 1 | | Decompress | .NET Framework 4.8 | Default | N | 14.68 us | 0.099 us | 0.093 us | 1.01 | 1 | | | | | | | | | | | | Decompress | .NET 6.0 | Previous | N | 12.90 us | 0.063 us | 0.049 us | 1.00 | 1 | | Decompress | .NET 6.0 | Default | N | 12.80 us | 0.114 us | 0.106 us | 0.99 | 1 | | | | | | | | | | | | Decompress | .NET 8.0 | Previous | N | 10.41 us | 0.089 us | 0.083 us | 1.00 | 1 | | Decompress | .NET 8.0 | Default | N | 10.29 us | 0.075 us | 0.070 us | 0.99 | 1 | | | | | | | | | | | | Decompress | .NET 8.0 | Previous | Y | 10.50 us | 0.108 us | 0.101 us | 1.00 | 2 | | Decompress | .NET 8.0 | Default | Y | 10.28 us | 0.085 us | 0.075 us | 0.98 | 1 | --- .../Configuration/VersionComparisonConfig.cs | 20 +++++++++---------- .../Snappier.Benchmarks.csproj | 2 +- Snappier/Internal/Constants.cs | 10 ++++++---- Snappier/Internal/SnappyDecompressor.cs | 11 +++++----- 4 files changed, 23 insertions(+), 20 deletions(-) diff --git a/Snappier.Benchmarks/Configuration/VersionComparisonConfig.cs b/Snappier.Benchmarks/Configuration/VersionComparisonConfig.cs index ab9dfde..89b6cf9 100644 --- a/Snappier.Benchmarks/Configuration/VersionComparisonConfig.cs +++ b/Snappier.Benchmarks/Configuration/VersionComparisonConfig.cs @@ -20,23 +20,23 @@ public VersionComparisonConfig(Job baseJob) var jobBefore48 = jobBefore.WithRuntime(ClrRuntime.Net48).AsBaseline(); var jobBefore60 = jobBefore.WithRuntime(CoreRuntime.Core60).AsBaseline(); - var jobBefore70 = jobBefore.WithRuntime(CoreRuntime.Core70).AsBaseline(); - var jobBefore70Pgo = jobBefore70.WithPgo(); + var jobBefore80 = jobBefore.WithRuntime(CoreRuntime.Core80).AsBaseline(); + var jobBefore80Pgo = jobBefore80.WithPgo(); var jobAfter48 = baseJob.WithRuntime(ClrRuntime.Net48); var jobAfter60 = baseJob.WithRuntime(CoreRuntime.Core60); - var jobAfter70 = baseJob.WithRuntime(CoreRuntime.Core70); - var jobAfter70Pgo = jobAfter70.WithPgo(); + var jobAfter80 = baseJob.WithRuntime(CoreRuntime.Core80); + var jobAfter80Pgo = jobAfter80.WithPgo(); AddJob(jobBefore48); AddJob(jobBefore60); - AddJob(jobBefore70); - AddJob(jobBefore70Pgo); + AddJob(jobBefore80); + AddJob(jobBefore80Pgo); AddJob(jobAfter48); AddJob(jobAfter60); - AddJob(jobAfter70); - AddJob(jobAfter70Pgo); + AddJob(jobAfter80); + AddJob(jobAfter80Pgo); WithOrderer(VersionComparisonOrderer.Default); @@ -53,8 +53,8 @@ public IEnumerable GetExecutionOrder(ImmutableArray p.Job.Environment.Runtime.MsBuildMoniker) .ThenBy(p => PgoColumn.IsPgo(p) ? 1 : 0) - .ThenBy(p => p.DisplayInfo) - .ThenBy(p => !p.Descriptor.Baseline); + .ThenBy(p => !p.Descriptor.Baseline) + .ThenBy(p => p.DisplayInfo); public IEnumerable GetSummaryOrder(ImmutableArray benchmarksCases, Summary summary) => diff --git a/Snappier.Benchmarks/Snappier.Benchmarks.csproj b/Snappier.Benchmarks/Snappier.Benchmarks.csproj index 9dc7308..4e1fb96 100644 --- a/Snappier.Benchmarks/Snappier.Benchmarks.csproj +++ b/Snappier.Benchmarks/Snappier.Benchmarks.csproj @@ -33,7 +33,7 @@ - + diff --git a/Snappier/Internal/Constants.cs b/Snappier/Internal/Constants.cs index 32263e3..3e62229 100644 --- a/Snappier/Internal/Constants.cs +++ b/Snappier/Internal/Constants.cs @@ -1,4 +1,6 @@ -namespace Snappier.Internal +using System; + +namespace Snappier.Internal { internal static class Constants { @@ -35,8 +37,8 @@ public enum ChunkType : byte /// (1) Extracting a byte is faster than a bit-field /// (2) It properly aligns copy offset so we do not need a <<8 /// - public static readonly ushort[] CharTable = - { + public static ReadOnlySpan CharTable => + [ 0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002, 0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004, 0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006, @@ -69,6 +71,6 @@ public enum ChunkType : byte 0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c, 0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e, 0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040 - }; + ]; } } diff --git a/Snappier/Internal/SnappyDecompressor.cs b/Snappier/Internal/SnappyDecompressor.cs index eade2e4..a76e01a 100644 --- a/Snappier/Internal/SnappyDecompressor.cs +++ b/Snappier/Internal/SnappyDecompressor.cs @@ -3,6 +3,7 @@ using System.Diagnostics; using System.IO; using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; namespace Snappier.Internal { @@ -178,10 +179,10 @@ public static int ReadUncompressedLength(ReadOnlySpan input) internal void DecompressAllTags(ReadOnlySpan inputSpan) { - // Put Constants.CharTable on the stack to simplify lookups within the loops below. - // Slicing with length 256 here allows the JIT compiler to recognize the size is greater than - // the size of the byte we're indexing with and optimize out range checks. - ReadOnlySpan charTable = Constants.CharTable.AsSpan(0, 256); + // We only index into this array with a byte, and the list is 256 long, so it's safe to skip range checks. + // JIT doesn't seem to recognize this currently, so we'll use a ref and Unsafe.Add to avoid the checks. + Debug.Assert(Constants.CharTable.Length >= 256); + ref ushort charTable = ref MemoryMarshal.GetReference(Constants.CharTable); unchecked { @@ -323,7 +324,7 @@ internal void DecompressAllTags(ReadOnlySpan inputSpan) } else { - ushort entry = charTable[c]; + ushort entry = Unsafe.Add(ref charTable, c); // We don't use BitConverter to read because we might be reading past the end of the span // But we know that's safe because we'll be doing it in _scratch with extra data on the end.