From 70730f5ae74a1db866defff575096105a61150ae Mon Sep 17 00:00:00 2001 From: Pedram Rezaei Date: Tue, 26 Nov 2024 08:52:44 -0800 Subject: [PATCH] Performance optimization extension for `RepeatedField` (#19115) .NET 8 added support for `AddRange` using the `ReadOnlySpan` data structure. This can significantly improve the performance of `RepeatedField` when adding multiple elements and can avoid the potential need to create extra arrays when using spans. Here is the code that was added to .NET to support `AddRange` on `List`: https://github.com/dotnet/runtime/blob/85f6d2d9449fb438cd8a5ca41d4beb4c657594e9/src/libraries/System.Private.CoreLib/src/System/Collections/Generic/CollectionExtensions.cs#L85-L103 In the current implementation of `RepeatedField`, the `AddRange` method only operates on `IEnumerable`. Unfortunately, this means that the callers using `Span` have to copy the content of the span to an array or to some other `IEnumerbale` before calling `AddRange`. This is an expensive operation and can be avoided by adding support for `AddRange` that takes a `ReadOnlySpan` directly. In this proposal, `AddRange` is added as an extension method instead of directly adding it to `RepeatedField` because we want to avoid ambiguity between `RepeatedField.AddRange(IEnumerbale)` and `RepeatedField.AddRange(Span)` for types such as `T[]`. This is the same exact approach taken by .NET runtime itself. Closes #19115 COPYBARA_INTEGRATE_REVIEW=https://github.com/protocolbuffers/protobuf/pull/19115 from prezaei:pedramr/repeated-field-extensions 8d0ac6a0c36a9cb919f7b89c7071ff281747291a PiperOrigin-RevId: 700357702 --- .../Collections/RepeatedFieldTest.cs | 38 +++++++++++++++++ .../Collections/RepeatedField.cs | 42 ++++++++++++++++--- .../RepeatedFieldExtensions.cs | 38 +++++++++++++++++ 3 files changed, 113 insertions(+), 5 deletions(-) create mode 100644 csharp/src/Google.Protobuf/RepeatedFieldExtensions.cs diff --git a/csharp/src/Google.Protobuf.Test/Collections/RepeatedFieldTest.cs b/csharp/src/Google.Protobuf.Test/Collections/RepeatedFieldTest.cs index 6d0911ff13c3..49d2bd7d11a0 100644 --- a/csharp/src/Google.Protobuf.Test/Collections/RepeatedFieldTest.cs +++ b/csharp/src/Google.Protobuf.Test/Collections/RepeatedFieldTest.cs @@ -127,6 +127,44 @@ public void AddRange_Optimized_NullsProhibited_NullableValueType() Assert.Catch(() => list.AddRange(new List { 20, null })); } + [Test] + public void AddRange_ReadOnlySpanPath() + { + var list = new RepeatedField(); + list.AddRange(new[] { "foo", "bar" }.AsSpan()); + Assert.AreEqual(2, list.Count); + Assert.AreEqual("foo", list[0]); + Assert.AreEqual("bar", list[1]); + } + + [Test] + public void AddRange_ReadOnlySpan_NullsProhibited_ReferenceType() + { + // We don't just trust that a collection with a nullable element type doesn't contain nulls + var list = new RepeatedField(); + // It's okay for this to throw ArgumentNullException if necessary. + // It's not ideal, but not awful. + Assert.Catch(() => list.AddRange(new string[] { "foo", null }.AsSpan())); + } + + [Test] + public void AddRange_ReadOnlySpan_NullsProhibited_NullableValueType() + { + // We don't just trust that a collection with a nullable element type doesn't contain nulls + var list = new RepeatedField(); + // It's okay for this to throw ArgumentNullException if necessary. + // It's not ideal, but not awful. + Assert.Catch(() => list.AddRange(new int?[] { 20, null }.AsSpan())); + } + + [Test] + public void AddRange_ReadOnlySpan_AlreadyNotEmpty() + { + var list = new RepeatedField { 1, 2, 3 }; + list.AddRange(new int[] { 4, 5, 6 }.AsSpan()); + CollectionAssert.AreEqual(new[] { 1, 2, 3, 4, 5, 6 }, list); + } + [Test] public void AddRange_AlreadyNotEmpty() { diff --git a/csharp/src/Google.Protobuf/Collections/RepeatedField.cs b/csharp/src/Google.Protobuf/Collections/RepeatedField.cs index 8bf410aa85c7..a18f998294ee 100644 --- a/csharp/src/Google.Protobuf/Collections/RepeatedField.cs +++ b/csharp/src/Google.Protobuf/Collections/RepeatedField.cs @@ -355,7 +355,7 @@ public bool Remove(T item) if (index == -1) { return false; - } + } Array.Copy(array, index + 1, array, index, count - index - 1); count--; array[count] = default; @@ -429,6 +429,38 @@ public void AddRange(IEnumerable values) } } + /// + /// Adds the elements of the specified span to the end of the collection. + /// + /// The span whose elements should be added to the end of the collection. + [SecuritySafeCritical] + internal void AddRangeSpan(ReadOnlySpan source) + { + if (source.IsEmpty) + { + return; + } + + // For reference types and nullable value types, we need to check that there are no nulls + // present. (This isn't a thread-safe approach, but we don't advertise this is thread-safe.) + // We expect the JITter to optimize this test to true/false, so it's effectively conditional + // specialization. + if (default(T) == null) + { + for (int i = 0; i < source.Length; i++) + { + if (source[i] == null) + { + throw new ArgumentException("ReadOnlySpan contained null element", nameof(source)); + } + } + } + + EnsureSize(count + source.Length); + source.CopyTo(array.AsSpan(count)); + count += source.Length; + } + /// /// Adds all of the specified values into this collection. This method is present to /// allow repeated fields to be constructed from queries within collection initializers. @@ -463,7 +495,7 @@ public IEnumerator GetEnumerator() /// true if the specified is equal to this instance; otherwise, false. /// public override bool Equals(object obj) => Equals(obj as RepeatedField); - + /// /// Returns an enumerator that iterates through a collection. /// @@ -476,7 +508,7 @@ public IEnumerator GetEnumerator() /// Returns a hash code for this instance. /// /// - /// A hash code for this instance, suitable for use in hashing algorithms and data structures like a hash table. + /// A hash code for this instance, suitable for use in hashing algorithms and data structures like a hash table. /// public override int GetHashCode() { @@ -645,7 +677,7 @@ void IList.Remove(object value) Remove(t); } } - #endregion + #endregion private sealed class RepeatedFieldDebugView { @@ -660,4 +692,4 @@ public RepeatedFieldDebugView(RepeatedField list) public T[] Items => list.ToArray(); } } -} +} \ No newline at end of file diff --git a/csharp/src/Google.Protobuf/RepeatedFieldExtensions.cs b/csharp/src/Google.Protobuf/RepeatedFieldExtensions.cs new file mode 100644 index 000000000000..83ae21b35b99 --- /dev/null +++ b/csharp/src/Google.Protobuf/RepeatedFieldExtensions.cs @@ -0,0 +1,38 @@ +#region Copyright notice and license +// Protocol Buffers - Google's data interchange format +// Copyright 2015 Google Inc. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file or at +// https://developers.google.com/open-source/licenses/bsd +#endregion + +using System; +using System.Security; +using Google.Protobuf.Collections; + +// Note: The choice of the namespace here is important because it's rare that people will directly reference Google.Protobuf.Collections +// in their app as they're typically using collections created by generated code. We are keeping the extension method in the Google.Protobuf +// namespace so that it is discoverable by most users. + +namespace Google.Protobuf +{ + /// + /// A set of extension methods on + /// + public static class RepeatedFieldExtensions + { + // Note: This method is an extension method to avoid ambiguous overload conflict with existing AddRange(IEnumerable) when the source is an array. + /// Adds the elements of the specified span to the end of the . + /// The type of elements in the . + /// The list to which the elements should be added. + /// The span whose elements should be added to the end of the . + /// The is null. + [SecuritySafeCritical] + public static void AddRange(this RepeatedField repeatedField, ReadOnlySpan source) + { + ProtoPreconditions.CheckNotNull(repeatedField, nameof(repeatedField)); + repeatedField.AddRangeSpan(source); + } + } +} \ No newline at end of file