Skip to content

Commit

Permalink
Fix bug where chunking could use different hash than the client (#4684)
Browse files Browse the repository at this point in the history
* Use same hashtype as the dedupclient when generating the hashes

* Delete unneeded factory class variable.

---------
  • Loading branch information
b-barthel authored Feb 29, 2024
1 parent b5e92c4 commit c886dc3
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ public static class BlobStoreUtils
bool enableReporting = false)
{
// Create chunks and identifier
traceOutput(StringUtil.Loc("BuildingFileTree"));
var fileNodes = await GenerateHashes(itemPaths, cancellationToken);
traceOutput(StringUtil.Loc("BuildingFileTree"));
var fileNodes = await GenerateHashes(itemPaths, dedupClient.HashType, cancellationToken);
var rootNode = CreateNodeToUpload(fileNodes.Where(x => x.Success).Select(y => y.Node));

// If there are multiple paths to one DedupId (duplicate files)
Expand Down Expand Up @@ -111,7 +111,7 @@ private static Task StartReportingTask(Action<string> traceOutput, long totalByt
});
}

private static async Task<List<BlobFileInfo>> GenerateHashes(IReadOnlyList<string> filePaths, CancellationToken cancellationToken)
private static async Task<List<BlobFileInfo>> GenerateHashes(IReadOnlyList<string> filePaths, HashType hashType, CancellationToken cancellationToken)
{
var nodes = new BlobFileInfo[filePaths.Count];
var queue = NonSwallowingActionBlock.Create<int>(
Expand All @@ -120,7 +120,7 @@ private static async Task<List<BlobFileInfo>> GenerateHashes(IReadOnlyList<strin
var itemPath = filePaths[i];
try
{
var dedupNode = await ChunkerHelper.CreateFromFileAsync(FileSystem.Instance, itemPath, false, ChunkerHelper.DefaultChunkHashType, cancellationToken);
var dedupNode = await ChunkerHelper.CreateFromFileAsync(FileSystem.Instance, itemPath, false, hashType, cancellationToken);
nodes[i] = new BlobFileInfo
{
Path = itemPath,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,6 @@ public class DedupManifestArtifactClientFactory : IDedupManifestArtifactClientFa
// At 192x it was around 16 seconds and 256x was no faster.
private const int DefaultDedupStoreClientMaxParallelism = 192;

private HashType? HashType { get; set; }

public static readonly DedupManifestArtifactClientFactory Instance = new();

private DedupManifestArtifactClientFactory()
Expand Down Expand Up @@ -152,17 +150,17 @@ private DedupManifestArtifactClientFactory()
IDedupStoreHttpClient dedupStoreHttpClient = GetDedupStoreHttpClient(connection, domainId, maxRetries, tracer, cancellationToken);

var telemetry = new BlobStoreClientTelemetry(tracer, dedupStoreHttpClient.BaseAddress);
this.HashType = clientSettings.GetClientHashType(context);
HashType hashType= clientSettings.GetClientHashType(context);

if (this.HashType == BuildXL.Cache.ContentStore.Hashing.HashType.Dedup1024K)
if (hashType == BuildXL.Cache.ContentStore.Hashing.HashType.Dedup1024K)
{
dedupStoreHttpClient.RecommendedChunkCountPerCall = 10; // This is to workaround IIS limit - https://learn.microsoft.com/en-us/iis/configuration/system.webserver/security/requestfiltering/requestlimits/
}
traceOutput($"Hashtype: {this.HashType.Value}");
traceOutput($"Hashtype: {hashType}");

dedupStoreHttpClient.SetRedirectTimeout(clientSettings.GetRedirectTimeout());

var dedupClient = new DedupStoreClientWithDataport(dedupStoreHttpClient, new DedupStoreClientContext(maxParallelism), this.HashType.Value);
var dedupClient = new DedupStoreClientWithDataport(dedupStoreHttpClient, new DedupStoreClientContext(maxParallelism), hashType);
return (new DedupManifestArtifactClient(telemetry, dedupClient, tracer), telemetry);
}

Expand Down Expand Up @@ -222,6 +220,7 @@ private static IDedupStoreHttpClient GetDedupStoreHttpClient(VssConnection conne
dedupStoreHttpClient.SetRedirectTimeout(redirectTimeoutSeconds);
var telemetry = new BlobStoreClientTelemetryTfs(tracer, dedupStoreHttpClient.BaseAddress, connection);
var client = new DedupStoreClient(dedupStoreHttpClient, maxParallelism);
traceOutput($" - Hash type: {client.HashType}");
return (client, telemetry);
}

Expand Down

0 comments on commit c886dc3

Please sign in to comment.