From b17fa151f4af71c4e0eeadfd6828ec50e3b23cde Mon Sep 17 00:00:00 2001 From: Henrique Dias Date: Tue, 16 Apr 2024 15:21:41 +0200 Subject: [PATCH 1/3] gateway: set Content-Location for non-default response formats --- CHANGELOG.md | 1 + gateway/gateway.go | 7 +++- gateway/gateway_test.go | 74 ++++++++++++++++++++++++++++++++++++++++ gateway/handler.go | 75 +++++++++++++++++++++++++++++++---------- gateway/hostname.go | 3 ++ 5 files changed, 142 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 765bdd574..ca083b47f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ The following emojis are used to highlight certain changes: * ✨ `gateway` has new backend possibilities: * `NewRemoteBlocksBackend` allows you to create a gateway backend that uses one or multiple other gateways as backend. These gateways must support RAW block requests (`application/vnd.ipld.raw`), as well as IPNS Record requests (`application/vnd.ipfs.ipns-record`). With this, we also introduced `NewCacheBlockStore`, `NewRemoteBlockstore` and `NewRemoteValueStore`. * `NewRemoteCarBackend` allows you to create a gateway backend that uses one or multiple Trustless Gateways as backend. These gateways must support CAR requests (`application/vnd.ipld.car`), as well as the extensions describe in [IPIP-402](https://specs.ipfs.tech/ipips/ipip-0402/). With this, we also introduced `NewCarBackend`, `NewRemoteCarFetcher` and `NewRetryCarFetcher`. +* `gateway` now sets the [`Content-Location`](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Location) header for requests with non-default content format, as a result of content negotiation. This allows generic and misconfigured HTTP caches to store Deserialized, CAR and Block responses separately, under distinct cache keys. ### Changed diff --git a/gateway/gateway.go b/gateway/gateway.go index be9501281..dccdaf792 100644 --- a/gateway/gateway.go +++ b/gateway/gateway.go @@ -405,6 +405,11 @@ const ( // [Subdomain Gateway]: https://specs.ipfs.tech/http-gateways/subdomain-gateway/ SubdomainHostnameKey RequestContextKey = "subdomain-hostname" - // ContentPathKey is the key for the original [http.Request] URL Path, as an [ipath.Path]. + // OriginalPathKey is the key for the original [http.Request] [url.URL.Path], + // as a string. This is the original path of the request, before [NewHostnameHandler]. + OriginalPathKey RequestContextKey = "original-path-key" + + // ContentPathKey is the key for the content [path.Path] of the current request. + // This already accounts with changes made with [NewHostnameHandler]. ContentPathKey RequestContextKey = "content-path" ) diff --git a/gateway/gateway_test.go b/gateway/gateway_test.go index 289faad01..391447d02 100644 --- a/gateway/gateway_test.go +++ b/gateway/gateway_test.go @@ -417,6 +417,80 @@ func TestHeaders(t *testing.T) { testCORSPreflightRequest(t, "/", cid+".ipfs.subgw.example.com", "https://other.example.net", http.StatusOK) }) }) + + t.Run("Content-Location is set when possible", func(t *testing.T) { + backend, root := newMockBackend(t, "fixtures.car") + backend.namesys["/ipns/dnslink-gateway.com"] = newMockNamesysItem(path.FromCid(root), 0) + + ts := newTestServerWithConfig(t, backend, Config{ + NoDNSLink: false, + PublicGateways: map[string]*PublicGateway{ + "dnslink-gateway.com": { + Paths: []string{}, + NoDNSLink: false, + DeserializedResponses: true, + }, + "subdomain-gateway.com": { + Paths: []string{"/ipfs", "/ipns"}, + UseSubdomains: true, + NoDNSLink: true, + DeserializedResponses: true, + }, + }, + DeserializedResponses: true, + }) + + runTest := func(name, path, accept, host, expectedContentPath string) { + t.Run(name, func(t *testing.T) { + t.Parallel() + + req := mustNewRequest(t, http.MethodGet, ts.URL+path, nil) + + if accept != "" { + req.Header.Set("Accept", accept) + } + + if host != "" { + req.Host = host + } + + resp := mustDoWithoutRedirect(t, req) + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + require.NoError(t, err) + + require.Equal(t, http.StatusOK, resp.StatusCode, string(body)) + require.Equal(t, expectedContentPath, resp.Header.Get("Content-Location")) + }) + } + + contentPath := path.FromCid(root).String() + "/empty-dir/" + subdomainGatewayHost := root.String() + ".ipfs.subdomain-gateway.com" + dnslinkGatewayHost := "dnslink-gateway.com" + + runTest("Regular gateway with default format", contentPath, "", "", "") + runTest("Regular gateway with Accept: application/vnd.ipld.car has no Content-Location", contentPath, "application/vnd.ipld.car;version=1;order=dfs;dups=n", "", "") + runTest("Regular gateway with ?dag-scope=entity&format=car", contentPath+"?dag-scope=entity&format=car", "", "", "") + runTest("Regular gateway preserves query parameters", contentPath+"?a=b&c=d", dagCborResponseFormat, "", contentPath+"?a=b&c=d&format=dag-cbor") + runTest("Subdomain gateway with default format", "/empty-dir/", "", subdomainGatewayHost, "") + runTest("DNSLink gateway with default format", "/empty-dir/", "", dnslinkGatewayHost, "") + + for responseFormat, formatParam := range responseFormatToFormatParam { + if responseFormat == ipnsRecordResponseFormat { + continue + } + + runTest("Regular gateway with Accept: "+responseFormat, contentPath, responseFormat, "", contentPath+"?format="+formatParam) + runTest("Regular gateway with ?format="+formatParam, contentPath+"?format="+formatParam, "", "", "") + + runTest("Subdomain gateway with Accept: "+responseFormat, "/empty-dir/", responseFormat, subdomainGatewayHost, "/empty-dir/?format="+formatParam) + runTest("Subdomain gateway with ?format="+formatParam, "/empty-dir/?format="+formatParam, "", subdomainGatewayHost, "") + + runTest("DNSLink gateway with Accept: "+responseFormat, "/empty-dir/", responseFormat, dnslinkGatewayHost, "/empty-dir/?format="+formatParam) + runTest("DNSLink gateway with ?format="+formatParam, "/empty-dir/?format="+formatParam, "", dnslinkGatewayHost, "") + } + }) } func TestGoGetSupport(t *testing.T) { diff --git a/gateway/handler.go b/gateway/handler.go index 6963bdebf..d8aa82e5e 100644 --- a/gateway/handler.go +++ b/gateway/handler.go @@ -259,6 +259,8 @@ func (i *handler) getOrHeadHandler(w http.ResponseWriter, r *http.Request) { responseParams: formatParams, } + addContentLocation(r, w, rq) + // IPNS Record response format can be handled now, since (1) it needs the // non-resolved mutable path, and (2) has custom If-None-Match header handling // due to custom ETag. @@ -586,6 +588,27 @@ const ( ipnsRecordResponseFormat = "application/vnd.ipfs.ipns-record" ) +var ( + formatParamToResponseFormat = map[string]string{ + "raw": rawResponseFormat, + "car": carResponseFormat, + "tar": tarResponseFormat, + "json": jsonResponseFormat, + "cbor": cborResponseFormat, + "dag-json": dagJsonResponseFormat, + "dag-cbor": dagCborResponseFormat, + "ipns-record": ipnsRecordResponseFormat, + } + + responseFormatToFormatParam = map[string]string{} +) + +func init() { + for k, v := range formatParamToResponseFormat { + responseFormatToFormatParam[v] = k + } +} + // return explicit response format if specified in request as query parameter or via Accept HTTP header func customResponseFormat(r *http.Request) (mediaType string, params map[string]string, err error) { // First, inspect Accept header, as it may not only include content type, but also optional parameters. @@ -615,23 +638,8 @@ func customResponseFormat(r *http.Request) (mediaType string, params map[string] // If no Accept header, translate query param to a content type, if present. if formatParam := r.URL.Query().Get("format"); formatParam != "" { - switch formatParam { - case "raw": - return rawResponseFormat, nil, nil - case "car": - return carResponseFormat, nil, nil - case "tar": - return tarResponseFormat, nil, nil - case "json": - return jsonResponseFormat, nil, nil - case "cbor": - return cborResponseFormat, nil, nil - case "dag-json": - return dagJsonResponseFormat, nil, nil - case "dag-cbor": - return dagCborResponseFormat, nil, nil - case "ipns-record": - return ipnsRecordResponseFormat, nil, nil + if responseFormat, ok := formatParamToResponseFormat[formatParam]; ok { + return responseFormat, nil, nil } } @@ -640,6 +648,39 @@ func customResponseFormat(r *http.Request) (mediaType string, params map[string] return "", nil, nil } +// Add 'Content-Location' headers for non-default response formats. This allows +// correct caching of such format requests when the format is passed via the +// Accept header, for example. +func addContentLocation(r *http.Request, w http.ResponseWriter, rq *requestData) { + // Skip Content-Location if no explicit format was requested via the HTTP + // Accept header, or if it was requested via URL query parameter. + if rq.responseFormat == "" || r.URL.Query().Get("format") != "" { + return + } + + // Response format parameters, such as 'dups' and 'order' for CAR requests + // cannot be translated into the URL. Therefore, we cannot add a 'Content-Location' + // header. + if len(rq.responseParams) != 0 { + return + } + + param := responseFormatToFormatParam[rq.responseFormat] + path := r.URL.Path + if p, ok := r.Context().Value(OriginalPathKey).(string); ok { + path = p + } + + // Copy all existing query parameters. + query := url.Values{} + for k, v := range r.URL.Query() { + query[k] = v + } + query.Set("format", param) + + w.Header().Set("Content-Location", path+"?"+query.Encode()) +} + // returns unquoted path with all special characters revealed as \u codes func debugStr(path string) string { q := fmt.Sprintf("%+q", path) diff --git a/gateway/hostname.go b/gateway/hostname.go index 665cf1663..ebdbe9273 100644 --- a/gateway/hostname.go +++ b/gateway/hostname.go @@ -28,6 +28,9 @@ func NewHostnameHandler(c Config, backend IPFSBackend, next http.Handler) http.H return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { defer panicHandler(w) + ctx := context.WithValue(r.Context(), OriginalPathKey, r.URL.Path) + r = r.WithContext(ctx) + // First check for protocol handler redirects. if handleProtocolHandlerRedirect(w, r, &c) { return From 08e3e40c3253c4fc8f805964fe1c7ef8f00e0e35 Mon Sep 17 00:00:00 2001 From: Henrique Dias Date: Thu, 18 Apr 2024 00:38:28 +0200 Subject: [PATCH 2/3] gateway: add support for car-* query parameters (#604) Co-authored-by: Marcin Rataj --- CHANGELOG.md | 1 + gateway/gateway_test.go | 2 +- gateway/handler.go | 16 +++++++--------- gateway/handler_car.go | 24 +++++++++++++++++++++--- gateway/handler_car_test.go | 20 ++++++++++++-------- 5 files changed, 42 insertions(+), 21 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ca083b47f..1efccae44 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ The following emojis are used to highlight certain changes: * `NewRemoteBlocksBackend` allows you to create a gateway backend that uses one or multiple other gateways as backend. These gateways must support RAW block requests (`application/vnd.ipld.raw`), as well as IPNS Record requests (`application/vnd.ipfs.ipns-record`). With this, we also introduced `NewCacheBlockStore`, `NewRemoteBlockstore` and `NewRemoteValueStore`. * `NewRemoteCarBackend` allows you to create a gateway backend that uses one or multiple Trustless Gateways as backend. These gateways must support CAR requests (`application/vnd.ipld.car`), as well as the extensions describe in [IPIP-402](https://specs.ipfs.tech/ipips/ipip-0402/). With this, we also introduced `NewCarBackend`, `NewRemoteCarFetcher` and `NewRetryCarFetcher`. * `gateway` now sets the [`Content-Location`](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Location) header for requests with non-default content format, as a result of content negotiation. This allows generic and misconfigured HTTP caches to store Deserialized, CAR and Block responses separately, under distinct cache keys. +* `gateway` now supports `car-dups`, `car-order` and `car-version` as query parameters in addition to the `application/vnd.ipld.car` parameters sent via `Accept` header. The parameters in the `Accept` header have always priority, but including them in URL simplifies HTTP caching and allows use in `Content-Location` header on CAR responses to maximize interoperability with wide array of HTTP caches. ### Changed diff --git a/gateway/gateway_test.go b/gateway/gateway_test.go index 391447d02..d59f0d66f 100644 --- a/gateway/gateway_test.go +++ b/gateway/gateway_test.go @@ -470,7 +470,7 @@ func TestHeaders(t *testing.T) { dnslinkGatewayHost := "dnslink-gateway.com" runTest("Regular gateway with default format", contentPath, "", "", "") - runTest("Regular gateway with Accept: application/vnd.ipld.car has no Content-Location", contentPath, "application/vnd.ipld.car;version=1;order=dfs;dups=n", "", "") + runTest("Regular gateway with Accept: application/vnd.ipld.car;version=1;order=dfs;dups=n sets correct Content-Location", contentPath, "application/vnd.ipld.car;version=1;order=dfs;dups=n", "", contentPath+"?car-dups=n&car-order=dfs&car-version=1&format=car") runTest("Regular gateway with ?dag-scope=entity&format=car", contentPath+"?dag-scope=entity&format=car", "", "", "") runTest("Regular gateway preserves query parameters", contentPath+"?a=b&c=d", dagCborResponseFormat, "", contentPath+"?a=b&c=d&format=dag-cbor") runTest("Subdomain gateway with default format", "/empty-dir/", "", subdomainGatewayHost, "") diff --git a/gateway/handler.go b/gateway/handler.go index d8aa82e5e..522af1eb5 100644 --- a/gateway/handler.go +++ b/gateway/handler.go @@ -658,14 +658,7 @@ func addContentLocation(r *http.Request, w http.ResponseWriter, rq *requestData) return } - // Response format parameters, such as 'dups' and 'order' for CAR requests - // cannot be translated into the URL. Therefore, we cannot add a 'Content-Location' - // header. - if len(rq.responseParams) != 0 { - return - } - - param := responseFormatToFormatParam[rq.responseFormat] + format := responseFormatToFormatParam[rq.responseFormat] path := r.URL.Path if p, ok := r.Context().Value(OriginalPathKey).(string); ok { path = p @@ -676,7 +669,12 @@ func addContentLocation(r *http.Request, w http.ResponseWriter, rq *requestData) for k, v := range r.URL.Query() { query[k] = v } - query.Set("format", param) + query.Set("format", format) + + // Set response params as query elements. + for k, v := range rq.responseParams { + query.Set(format+"-"+k, v) + } w.Header().Set("Content-Location", path+"?"+query.Encode()) } diff --git a/gateway/handler_car.go b/gateway/handler_car.go index e42c0fde2..9c63d2d16 100644 --- a/gateway/handler_car.go +++ b/gateway/handler_car.go @@ -23,6 +23,9 @@ import ( const ( carRangeBytesKey = "entity-bytes" carTerminalElementTypeKey = "dag-scope" + carVersionKey = "car-version" + carDuplicatesKey = "car-dups" + carOrderKey = "car-order" ) // serveCAR returns a CAR stream for specific DAG+selector @@ -144,8 +147,23 @@ func buildCarParams(r *http.Request, contentTypeParams map[string]string) (CarPa // application/vnd.ipld.car content type parameters from Accept header + // Get CAR version, duplicates and order from the query parameters and override + // with parameters from Accept header if they exist, since they have priority. + versionStr := queryParams.Get(carVersionKey) + duplicatesStr := queryParams.Get(carDuplicatesKey) + orderStr := queryParams.Get(carOrderKey) + if v, ok := contentTypeParams["version"]; ok { + versionStr = v + } + if v, ok := contentTypeParams["order"]; ok { + orderStr = v + } + if v, ok := contentTypeParams["dups"]; ok { + duplicatesStr = v + } + // version of CAR format - switch contentTypeParams["version"] { + switch versionStr { case "": // noop, client does not care about version case "1": // noop, we support this default: @@ -153,7 +171,7 @@ func buildCarParams(r *http.Request, contentTypeParams map[string]string) (CarPa } // optional order from IPIP-412 - if order := DagOrder(contentTypeParams["order"]); order != DagOrderUnspecified { + if order := DagOrder(orderStr); order != DagOrderUnspecified { switch order { case DagOrderUnknown, DagOrderDFS: params.Order = order @@ -168,7 +186,7 @@ func buildCarParams(r *http.Request, contentTypeParams map[string]string) (CarPa } // optional dups from IPIP-412 - dups, err := NewDuplicateBlocksPolicy(contentTypeParams["dups"]) + dups, err := NewDuplicateBlocksPolicy(duplicatesStr) if err != nil { return CarParams{}, err } diff --git a/gateway/handler_car_test.go b/gateway/handler_car_test.go index da2d16255..a9c3667d2 100644 --- a/gateway/handler_car_test.go +++ b/gateway/handler_car_test.go @@ -2,6 +2,7 @@ package gateway import ( "net/http" + "net/url" "testing" "github.com/ipfs/boxo/path" @@ -81,19 +82,22 @@ func TestCarParams(t *testing.T) { // from the value read from Accept header tests := []struct { acceptHeader string + params url.Values expectedOrder DagOrder expectedDuplicates DuplicateBlocksPolicy }{ - {"application/vnd.ipld.car; order=dfs; dups=y", DagOrderDFS, DuplicateBlocksIncluded}, - {"application/vnd.ipld.car; order=unk; dups=n", DagOrderUnknown, DuplicateBlocksExcluded}, - {"application/vnd.ipld.car; order=unk", DagOrderUnknown, DuplicateBlocksExcluded}, - {"application/vnd.ipld.car; dups=y", DagOrderDFS, DuplicateBlocksIncluded}, - {"application/vnd.ipld.car; dups=n", DagOrderDFS, DuplicateBlocksExcluded}, - {"application/vnd.ipld.car", DagOrderDFS, DuplicateBlocksExcluded}, - {"application/vnd.ipld.car;version=1;order=dfs;dups=y", DagOrderDFS, DuplicateBlocksIncluded}, + {"application/vnd.ipld.car; order=dfs; dups=y", nil, DagOrderDFS, DuplicateBlocksIncluded}, + {"application/vnd.ipld.car; order=unk; dups=n", nil, DagOrderUnknown, DuplicateBlocksExcluded}, + {"application/vnd.ipld.car; order=unk", nil, DagOrderUnknown, DuplicateBlocksExcluded}, + {"application/vnd.ipld.car; dups=y", nil, DagOrderDFS, DuplicateBlocksIncluded}, + {"application/vnd.ipld.car; dups=n", nil, DagOrderDFS, DuplicateBlocksExcluded}, + {"application/vnd.ipld.car", nil, DagOrderDFS, DuplicateBlocksExcluded}, + {"application/vnd.ipld.car;version=1;order=dfs;dups=y", nil, DagOrderDFS, DuplicateBlocksIncluded}, + {"application/vnd.ipld.car;version=1;order=dfs;dups=y", url.Values{"car-order": []string{"unk"}}, DagOrderDFS, DuplicateBlocksIncluded}, + {"application/vnd.ipld.car;version=1;dups=y", url.Values{"car-order": []string{"unk"}}, DagOrderUnknown, DuplicateBlocksIncluded}, } for _, test := range tests { - r := mustNewRequest(t, http.MethodGet, "http://example.com/", nil) + r := mustNewRequest(t, http.MethodGet, "http://example.com/?"+test.params.Encode(), nil) r.Header.Set("Accept", test.acceptHeader) mediaType, formatParams, err := customResponseFormat(r) From f96974f1473ea19719369ee7600d3c5c2fb8cf22 Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Thu, 18 Apr 2024 01:35:05 +0200 Subject: [PATCH 3/3] gateway: test Accept header overides conflicting ?format --- gateway/gateway_test.go | 6 ++++-- gateway/handler.go | 15 ++++++++++++--- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/gateway/gateway_test.go b/gateway/gateway_test.go index d59f0d66f..d48334b92 100644 --- a/gateway/gateway_test.go +++ b/gateway/gateway_test.go @@ -440,7 +440,7 @@ func TestHeaders(t *testing.T) { DeserializedResponses: true, }) - runTest := func(name, path, accept, host, expectedContentPath string) { + runTest := func(name, path, accept, host, expectedContentLocationHdr string) { t.Run(name, func(t *testing.T) { t.Parallel() @@ -461,7 +461,7 @@ func TestHeaders(t *testing.T) { require.NoError(t, err) require.Equal(t, http.StatusOK, resp.StatusCode, string(body)) - require.Equal(t, expectedContentPath, resp.Header.Get("Content-Location")) + require.Equal(t, expectedContentLocationHdr, resp.Header.Get("Content-Location")) }) } @@ -490,6 +490,8 @@ func TestHeaders(t *testing.T) { runTest("DNSLink gateway with Accept: "+responseFormat, "/empty-dir/", responseFormat, dnslinkGatewayHost, "/empty-dir/?format="+formatParam) runTest("DNSLink gateway with ?format="+formatParam, "/empty-dir/?format="+formatParam, "", dnslinkGatewayHost, "") } + + runTest("Accept: application/vnd.ipld.car overrides ?format=raw in Content-Location", contentPath+"?format=raw", "application/vnd.ipld.car", "", contentPath+"?format=car") }) } diff --git a/gateway/handler.go b/gateway/handler.go index 522af1eb5..4360d2163 100644 --- a/gateway/handler.go +++ b/gateway/handler.go @@ -652,13 +652,22 @@ func customResponseFormat(r *http.Request) (mediaType string, params map[string] // correct caching of such format requests when the format is passed via the // Accept header, for example. func addContentLocation(r *http.Request, w http.ResponseWriter, rq *requestData) { - // Skip Content-Location if no explicit format was requested via the HTTP - // Accept header, or if it was requested via URL query parameter. - if rq.responseFormat == "" || r.URL.Query().Get("format") != "" { + // Skip Content-Location if no explicit format was requested + // via Accept HTTP header or ?format URL param + if rq.responseFormat == "" { return } format := responseFormatToFormatParam[rq.responseFormat] + + // Skip Content-Location if there is no conflict between + // 'format' in URL and value in 'Accept' header. + // If both are present and don't match, we continue and generate + // Content-Location to ensure value from Accept overrides 'format' from URL. + if urlFormat := r.URL.Query().Get("format"); urlFormat != "" && urlFormat == format { + return + } + path := r.URL.Path if p, ok := r.Context().Value(OriginalPathKey).(string); ok { path = p