From b27b78f24d754ef9528d3a1a959014ece5d0b1c7 Mon Sep 17 00:00:00 2001 From: CeciliaAvila Date: Thu, 17 Oct 2024 14:11:33 -0300 Subject: [PATCH] Create deterministic GeHashCode method --- .../CosmosDBKeyEscape.cs | 34 ++++++++++++++++++- .../CosmosDBKeyEscapeTests.cs | 4 ++- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/libraries/Microsoft.Bot.Builder.Azure/CosmosDBKeyEscape.cs b/libraries/Microsoft.Bot.Builder.Azure/CosmosDBKeyEscape.cs index e6e154d669..c191315941 100644 --- a/libraries/Microsoft.Bot.Builder.Azure/CosmosDBKeyEscape.cs +++ b/libraries/Microsoft.Bot.Builder.Azure/CosmosDBKeyEscape.cs @@ -117,11 +117,43 @@ private static string TruncateKeyIfNeeded(string key, bool truncateKeysForCompat if (key.Length > MaxKeyLength) { - var hash = key.GetHashCode().ToString("x", CultureInfo.InvariantCulture); + var hash = key.GetDeterministicHashCode().ToString("x", CultureInfo.InvariantCulture); key = key.Substring(0, MaxKeyLength - hash.Length) + hash; } return key; } + + /// + /// Creates a deterministic hash code by iterating through the string two characters at a time, + /// updating two separate hash values, and then combining them at the end. + /// This approach helps in reducing hash collisions and provides a consistent hash code for the same string across + /// different runs and environments. + /// + /// The string to calculate the hash on. + /// The hash code. + private static int GetDeterministicHashCode(this string str) + { + unchecked + { + var hash1 = (5381 << 16) + 5381; //shifts 5381 left by 16 bits and adds 5381 to it + var hash2 = hash1; + for (var i = 0; i < str.Length; i += 2) + { + // ((hash1 << 5) + hash1) is equivalent to hash1 * 33, which is a common multiplier in hash functions. + // The character str[i] is then XORed with this value. + hash1 = ((hash1 << 5) + hash1) ^ str[i]; + if (i == str.Length - 1) + { + break; + } + + hash2 = ((hash2 << 5) + hash2) ^ str[i + 1]; + } + + //1566083941 is a large prime number used to mix the two hash values together, ensuring a more uniform distribution of hash codes. + return hash1 + (hash2 * 1566083941); + } + } } } diff --git a/tests/Microsoft.Bot.Builder.Azure.Tests/CosmosDBKeyEscapeTests.cs b/tests/Microsoft.Bot.Builder.Azure.Tests/CosmosDBKeyEscapeTests.cs index 1f5ec6f3f2..f924bde1f8 100644 --- a/tests/Microsoft.Bot.Builder.Azure.Tests/CosmosDBKeyEscapeTests.cs +++ b/tests/Microsoft.Bot.Builder.Azure.Tests/CosmosDBKeyEscapeTests.cs @@ -2,6 +2,7 @@ // Licensed under the MIT License. using System; +using System.Reflection; using Xunit; namespace Microsoft.Bot.Builder.Azure.Tests @@ -40,7 +41,8 @@ public void Long_Key_Should_Be_Truncated() Assert.True(sanitizedKey.Length <= CosmosDbKeyEscape.MaxKeyLength, "Key too long"); // The resulting key should be: - var hash = tooLongKey.GetHashCode().ToString("x"); + var getHashMethod = typeof(CosmosDbKeyEscape).GetMethod("GetDeterministicHashCode", BindingFlags.NonPublic | BindingFlags.Static); + var hash = ((int)getHashMethod.Invoke(null, new object[] { tooLongKey })).ToString("x"); var correctKey = sanitizedKey.Substring(0, CosmosDbKeyEscape.MaxKeyLength - hash.Length) + hash; Assert.Equal(correctKey, sanitizedKey);