diff --git a/docs/path-templates.md b/docs/path-templates.md new file mode 100644 index 000000000..372bfa3c5 --- /dev/null +++ b/docs/path-templates.md @@ -0,0 +1,54 @@ +# PathTemplates + +The default path template for requests is `/{prefix}/{customer}/{space}/{assetPath}`, where: + +* `prefix` is route path (e.g. `iiif-manifest`, `iiif-av`, `iiif-img`) and includes version. +* `customer` and `space` are self explanatory +* `assetPath` is the asset identifier plus any specific elements for the current request - e.g. for image requests it will contain the full IIIF image request. + +By default the above format is reflected on info.json (from Thumbs and Orchestrator). + +To facilitate using proxy servers to receive alternative URLs that are then rewritten to standard DLCS URLs, overrides to the default rules can be specified. These are used when outputting any self-referencing URIs (e.g. info.json `id` element). + +> [!IMPORTANT] +> For the below to work the expectation is that the `x-forwarded-host` header is set in the proxy. + +``` +"PathRules": { + "Default": "/{prefix}/{customer}/{space}/{assetPath}", + "Overrides": { + "exclude-space.com": "/{prefix}/{customer}/extra/{assetPath}/", + "customer-specific.io": "/{prefix}/{assetPath}" + "i-have-ark.io": "/{prefix}/ark:{assetPath:US}" + } +} +``` + +As an convenience you can specify `"PathRules:OverridesAsJson"` appSetting, for Orchestrator only, that includes a string-based config. This makes it easier to configure via environment variables etc + +## Formatters + +`assetPath` supports formatting via a known formatting parameter, e.g. `{assetPath}` can be formatted with `{assetPath:FMT}`. + +Supported format parameter values are: + +* `3US` - replaces triple _U_nderscores with _S_lashes (e.g. assetPath `"foo___bar_baz"` -> `"foo/bar_baz"`). + +## Auth PathTemplates + +There is a similar config block availabe for authentication under the `"Auth"` key for Orchestrator. + +For auth the path replacements are simpler: +* `customer` is the customer the auth service is for +* `behaviour` is the name of the auth service. + +``` +"Auth": { + "AuthPathRules": { + "Default": "/auth/{customer}/{behaviour}", + "Overrides": { + "exclude-space.com": "/auth/{behaviour}" + } + } +}, +``` \ No newline at end of file diff --git a/src/protagonist/DLCS.Core.Tests/DlcsPathHelpersTests.cs b/src/protagonist/DLCS.Core.Tests/DlcsPathHelpersTests.cs index 3207cdac4..ad1d55e95 100644 --- a/src/protagonist/DLCS.Core.Tests/DlcsPathHelpersTests.cs +++ b/src/protagonist/DLCS.Core.Tests/DlcsPathHelpersTests.cs @@ -1,4 +1,6 @@ -namespace DLCS.Core.Tests; +using System; + +namespace DLCS.Core.Tests; public class DlcsPathHelpersTests { @@ -70,4 +72,41 @@ public void GeneratePathFromTemplate_RemovesDoubleSlashes(string template, strin // Assert replaced.Should().Be(expected); } + + // Specific example here is for ARK id https://en.wikipedia.org/wiki/Archival_Resource_Key#Structure + [Theory] + [InlineData("https://dlcs.digirati.io/{prefix}/{version}/{customer}/{space}/path/ark:{assetPath}", + "https://dlcs.digirati.io/images/first-space/path/ark:NAAN___Name")] + [InlineData("https://dlcs.digirati.io/{prefix}/{version}/{customer}/{space}/path/ark:{assetPath:3US}", + "https://dlcs.digirati.io/images/first-space/path/ark:NAAN/Name")] + [InlineData("https://dlcs.digirati.io/{prefix}/{assetPath}/path/ark:{assetPath:3US}", + "https://dlcs.digirati.io/images/NAAN___Name/path/ark:NAAN/Name")] + public void GeneratePathFromTemplate_AssetPath_ObeysFormattingInstruction(string template, string expected) + { + // Act + var replaced = DlcsPathHelpers.GeneratePathFromTemplate(template, + prefix: "images", + space: "first-space", + assetPath: "NAAN___Name"); + + // Assert + replaced.Should().Be(expected); + } + + [Fact] + public void GeneratePathFromTemplate_AssetPath_Throws_IfUnknownFormattingInstruction() + { + // Arrange + const string template = "https://dlcs.digirati.io/{prefix}/{version}/{customer}/{space}/path/ark:{assetPath:XY}"; + + // Act + Action action = () => DlcsPathHelpers.GeneratePathFromTemplate(template, + prefix: "images", + space: "first-space", + assetPath: "NAAN___Name"); + + // Assert + action.Should().Throw() + .WithMessage("'XY' is not a known assetPath format (Parameter 'format')"); + } } \ No newline at end of file diff --git a/src/protagonist/DLCS.Core/DlcsPathHelpers.cs b/src/protagonist/DLCS.Core/DlcsPathHelpers.cs index e31b354dd..3d2bceb9e 100644 --- a/src/protagonist/DLCS.Core/DlcsPathHelpers.cs +++ b/src/protagonist/DLCS.Core/DlcsPathHelpers.cs @@ -1,4 +1,5 @@ using System.Text.RegularExpressions; +using DLCS.Core.Formats; using DLCS.Core.Types; namespace DLCS.Core; @@ -18,7 +19,7 @@ public static class DlcsPathHelpers /// Value to replace {version} with /// Value to replace {customer} with /// Value to replace {space} with - /// Value to replace {assetPath} with + /// Value to replace {assetPath} with, optionally formatted /// Template with string replacements made public static string GeneratePathFromTemplate( string template, @@ -33,8 +34,8 @@ public static string GeneratePathFromTemplate( .Replace("{version}", version ?? string.Empty) .Replace("{customer}", customer ?? string.Empty) .Replace("{space}", space ?? string.Empty) - .Replace("{assetPath}", assetPath ?? string.Empty), "/"); - + .ReplaceAssetPath(assetPath ?? string.Empty), "/"); + /// /// Replace known slugs in DLCS auth path template. /// diff --git a/src/protagonist/DLCS.Core/Formats/AssetPathFormat.cs b/src/protagonist/DLCS.Core/Formats/AssetPathFormat.cs new file mode 100644 index 000000000..0a4954628 --- /dev/null +++ b/src/protagonist/DLCS.Core/Formats/AssetPathFormat.cs @@ -0,0 +1,54 @@ +using System; +using System.Text.RegularExpressions; + +namespace DLCS.Core.Formats; + +/// +/// Helper function for formatting {assetPath} template value, handling replacements +/// +internal static class AssetPathFormatter +{ + // match {assetPath} or {assetPath:FMT} + private static readonly Regex AssetPath = new("({assetPath:?.*})", RegexOptions.Compiled); + + public static string ReplaceAssetPath(this string template, string assetPath) + { + var match = AssetPath.Match(template); + if (!match.Success) return template; + + for (var x = 0; x < match.Captures.Count; x++) + { + var capture = match.Captures[x].Value; + var forFormat = capture.Replace("assetPath", "0"); + template = template.Replace(capture, string.Format(AssetPathFormat.Instance, forFormat, assetPath)); + } + + return template; + } +} + +internal class AssetPathFormat : IFormatProvider, ICustomFormatter +{ + public static AssetPathFormat Instance { get; } = new(); + + // Replace "___" with "/" + private const string UnderscoreToSlash = "3US"; + + public object? GetFormat(Type? formatType) + => formatType == typeof(ICustomFormatter) ? this : null; + + public string Format(string? format, object? arg, IFormatProvider? formatProvider) + { + if (string.IsNullOrEmpty(format) || arg == null) return arg?.ToString() ?? string.Empty; + + var result = arg.ToString(); + if (string.IsNullOrEmpty(result)) return string.Empty; + + if (format == UnderscoreToSlash) + { + return result.Replace("___", "/"); + } + + throw new ArgumentException($"'{format}' is not a known assetPath format", nameof(format)); + } +} diff --git a/src/protagonist/DLCS.Web/Response/ConfigDrivenAssetPathGenerator.cs b/src/protagonist/DLCS.Web/Response/ConfigDrivenAssetPathGenerator.cs index 3ccc49f30..97271cea8 100644 --- a/src/protagonist/DLCS.Web/Response/ConfigDrivenAssetPathGenerator.cs +++ b/src/protagonist/DLCS.Web/Response/ConfigDrivenAssetPathGenerator.cs @@ -12,7 +12,7 @@ namespace DLCS.Web.Response; /// /// This class uses to determine different URL patterns for different hostnames, /// this allows e.g. "id" values on manifests to use different URL structures than the default DLCS paths. -/// e.g. /images/{image}/ rather than default of /iiif-img/{cust}/{space}/{image} +/// e.g. /images/{assetPath}/ rather than default of /iiif-img/{cust}/{space}/{assetPath} /// public class ConfigDrivenAssetPathGenerator : IAssetPathGenerator { diff --git a/src/protagonist/Orchestrator/readme.md b/src/protagonist/Orchestrator/readme.md index f42453c7a..9c5c8f7df 100644 --- a/src/protagonist/Orchestrator/readme.md +++ b/src/protagonist/Orchestrator/readme.md @@ -96,51 +96,6 @@ E.g., the following shows IIPImage supports v2 only and Cantaloupe supports v2 + } ``` -### PathTemplates - -The default path template for requests is `/{prefix}/{customer}/{space}/{assetPath}`, where: - -* `prefix` is route path (e.g. `iiif-manifest`, `iiif-av`, `iiif-img`) and includes version. -* `customer` and `space` are self explanatory -* `assetPath` is the asset identifier plus any specific elements for the current request - e.g. for image requests it will contain the full IIIF image request. - -By default the above format is reflected on info.json and single-item manifests. - -To facilitate using proxy servers to receive alternative URLs that are then rewritten to standard DLCS URLs, overrides to the default rules can be specified. These are used when outputting any self-referencing URIs (e.g. info.json `id` element). - -> For the below to work the expectation is that the `x-forwarded-host` header is set in the proxy. - -``` -"PathRules": { - "Default": "/{prefix}/{customer}/{space}/{assetPath}", - "Overrides": { - "exclude-space.com": "/{prefix}/{customer}/extra/{assetPath}/", - "customer-specific.io": "/{prefix}/{assetPath}" - } -} -``` - -As an convenience you can specify "PathRules:OverridesAsJson" appSetting that includes a string-based config. This makes it easier to configure via environment variables etc - -#### Auth PathTemplates - -There is a similar config block availabe for authentication under the `"Auth"` key. - -For auth the path replacements are simpler: -* `customer` is the customer the auth service is for -* `behaviour` is the name of the auth service. - -``` -"Auth": { - "AuthPathRules": { - "Default": "/auth/{customer}/{behaviour}", - "Overrides": { - "exclude-space.com": "/auth/{behaviour}" - } - } -}, -``` - ### Versioned Requests `DefaultIIIFImageVersion` and `DefaultIIIFPresentationVersion` specify the default IIIF Image and Presentation API's supported.