From d0cd066959169ccb85266aa9ecf3450cb8632dd3 Mon Sep 17 00:00:00 2001 From: Henrique Dias Date: Wed, 6 Mar 2024 14:57:04 +0100 Subject: [PATCH 01/19] gateway: split shared backend functionality --- gateway/backend.go | 95 +++++++++++++++++ .../{blocks_backend.go => backend_blocks.go} | 100 +++--------------- 2 files changed, 107 insertions(+), 88 deletions(-) create mode 100644 gateway/backend.go rename gateway/{blocks_backend.go => backend_blocks.go} (91%) diff --git a/gateway/backend.go b/gateway/backend.go new file mode 100644 index 000000000..280791c0d --- /dev/null +++ b/gateway/backend.go @@ -0,0 +1,95 @@ +package gateway + +import ( + "context" + "errors" + "fmt" + "net/http" + "time" + + "github.com/ipfs/boxo/ipns" + "github.com/ipfs/boxo/namesys" + "github.com/ipfs/boxo/path" + "github.com/ipfs/go-cid" + routinghelpers "github.com/libp2p/go-libp2p-routing-helpers" + "github.com/libp2p/go-libp2p/core/routing" +) + +// baseBackend contains some common backend functionalities that are shared by +// different backend implementations. +type baseBackend struct { + routing routing.ValueStore + namesys namesys.NameSystem +} + +func newBaseBackend(vs routing.ValueStore, ns namesys.NameSystem) (baseBackend, error) { + if vs == nil { + vs = routinghelpers.Null{} + } + + if ns == nil { + dns, err := NewDNSResolver(nil, nil) + if err != nil { + return baseBackend{}, err + } + + ns, err = namesys.NewNameSystem(vs, namesys.WithDNSResolver(dns)) + if err != nil { + return baseBackend{}, err + } + } + + return baseBackend{ + routing: vs, + namesys: ns, + }, nil +} + +func (bb *baseBackend) ResolveMutable(ctx context.Context, p path.Path) (path.ImmutablePath, time.Duration, time.Time, error) { + switch p.Namespace() { + case path.IPNSNamespace: + res, err := namesys.Resolve(ctx, bb.namesys, p) + if err != nil { + return path.ImmutablePath{}, 0, time.Time{}, err + } + ip, err := path.NewImmutablePath(res.Path) + if err != nil { + return path.ImmutablePath{}, 0, time.Time{}, err + } + return ip, res.TTL, res.LastMod, nil + case path.IPFSNamespace: + ip, err := path.NewImmutablePath(p) + return ip, 0, time.Time{}, err + default: + return path.ImmutablePath{}, 0, time.Time{}, NewErrorStatusCode(fmt.Errorf("unsupported path namespace: %s", p.Namespace()), http.StatusNotImplemented) + } +} + +func (bb *baseBackend) GetIPNSRecord(ctx context.Context, c cid.Cid) ([]byte, error) { + if bb.routing == nil { + return nil, NewErrorStatusCode(errors.New("IPNS Record responses are not supported by this gateway"), http.StatusNotImplemented) + } + + name, err := ipns.NameFromCid(c) + if err != nil { + return nil, NewErrorStatusCode(err, http.StatusBadRequest) + } + + return bb.routing.GetValue(ctx, string(name.RoutingKey())) +} + +func (bb *baseBackend) GetDNSLinkRecord(ctx context.Context, hostname string) (path.Path, error) { + if bb.namesys != nil { + p, err := path.NewPath("/ipns/" + hostname) + if err != nil { + return nil, err + } + res, err := bb.namesys.Resolve(ctx, p, namesys.ResolveWithDepth(1)) + if err == namesys.ErrResolveRecursion { + err = nil + } + return res.Path, err + } + + return nil, NewErrorStatusCode(errors.New("not implemented"), http.StatusNotImplemented) +} diff --git a/gateway/blocks_backend.go b/gateway/backend_blocks.go similarity index 91% rename from gateway/blocks_backend.go rename to gateway/backend_blocks.go index d85c2846b..0cfe45a10 100644 --- a/gateway/blocks_backend.go +++ b/gateway/backend_blocks.go @@ -6,9 +6,7 @@ import ( "errors" "fmt" "io" - "net/http" "strings" - "time" "github.com/ipfs/boxo/blockservice" blockstore "github.com/ipfs/boxo/blockstore" @@ -18,7 +16,6 @@ import ( "github.com/ipfs/boxo/ipld/merkledag" ufile "github.com/ipfs/boxo/ipld/unixfs/file" uio "github.com/ipfs/boxo/ipld/unixfs/io" - "github.com/ipfs/boxo/ipns" "github.com/ipfs/boxo/namesys" "github.com/ipfs/boxo/path" "github.com/ipfs/boxo/path/resolver" @@ -38,7 +35,6 @@ import ( "github.com/ipld/go-ipld-prime/traversal" "github.com/ipld/go-ipld-prime/traversal/selector" selectorparse "github.com/ipld/go-ipld-prime/traversal/selector/parse" - routinghelpers "github.com/libp2p/go-libp2p-routing-helpers" "github.com/libp2p/go-libp2p/core/routing" mc "github.com/multiformats/go-multicodec" @@ -51,14 +47,11 @@ import ( // BlocksBackend is an [IPFSBackend] implementation based on a [blockservice.BlockService]. type BlocksBackend struct { + baseBackend blockStore blockstore.Blockstore blockService blockservice.BlockService dagService format.DAGService resolver resolver.Resolver - - // Optional routing system to handle /ipns addresses. - namesys namesys.NameSystem - routing routing.ValueStore } var _ IPFSBackend = (*BlocksBackend)(nil) @@ -108,47 +101,28 @@ func NewBlocksBackend(blockService blockservice.BlockService, opts ...BlocksBack // Setup the DAG services, which use the CAR block store. dagService := merkledag.NewDAGService(blockService) - // Setup a name system so that we are able to resolve /ipns links. - var ( - ns namesys.NameSystem - vs routing.ValueStore - r resolver.Resolver - ) - - vs = compiledOptions.vs - if vs == nil { - vs = routinghelpers.Null{} - } - - ns = compiledOptions.ns - if ns == nil { - dns, err := NewDNSResolver(nil, nil) - if err != nil { - return nil, err - } - - ns, err = namesys.NewNameSystem(vs, namesys.WithDNSResolver(dns)) - if err != nil { - return nil, err - } - } - - r = compiledOptions.r + // Setup the [resolver.Resolver] if not provided. + r := compiledOptions.r if r == nil { - // Setup the UnixFS resolver. fetcherCfg := bsfetcher.NewFetcherConfig(blockService) fetcherCfg.PrototypeChooser = dagpb.AddSupportToChooser(bsfetcher.DefaultPrototypeChooser) fetcher := fetcherCfg.WithReifier(unixfsnode.Reify) r = resolver.NewBasicResolver(fetcher) } + // Setup the [baseBackend] which takes care of some shared functionality, such + // as resolving /ipns links. + baseBackend, err := newBaseBackend(compiledOptions.vs, compiledOptions.ns) + if err != nil { + return nil, err + } + return &BlocksBackend{ + baseBackend: baseBackend, blockStore: blockService.Blockstore(), blockService: blockService, dagService: dagService, resolver: r, - routing: vs, - namesys: ns, }, nil } @@ -630,55 +604,6 @@ func (bb *BlocksBackend) getPathRoots(ctx context.Context, contentPath path.Immu return pathRoots, lastPath, remainder, nil } -func (bb *BlocksBackend) ResolveMutable(ctx context.Context, p path.Path) (path.ImmutablePath, time.Duration, time.Time, error) { - switch p.Namespace() { - case path.IPNSNamespace: - res, err := namesys.Resolve(ctx, bb.namesys, p) - if err != nil { - return path.ImmutablePath{}, 0, time.Time{}, err - } - ip, err := path.NewImmutablePath(res.Path) - if err != nil { - return path.ImmutablePath{}, 0, time.Time{}, err - } - return ip, res.TTL, res.LastMod, nil - case path.IPFSNamespace: - ip, err := path.NewImmutablePath(p) - return ip, 0, time.Time{}, err - default: - return path.ImmutablePath{}, 0, time.Time{}, NewErrorStatusCode(fmt.Errorf("unsupported path namespace: %s", p.Namespace()), http.StatusNotImplemented) - } -} - -func (bb *BlocksBackend) GetIPNSRecord(ctx context.Context, c cid.Cid) ([]byte, error) { - if bb.routing == nil { - return nil, NewErrorStatusCode(errors.New("IPNS Record responses are not supported by this gateway"), http.StatusNotImplemented) - } - - name, err := ipns.NameFromCid(c) - if err != nil { - return nil, NewErrorStatusCode(err, http.StatusBadRequest) - } - - return bb.routing.GetValue(ctx, string(name.RoutingKey())) -} - -func (bb *BlocksBackend) GetDNSLinkRecord(ctx context.Context, hostname string) (path.Path, error) { - if bb.namesys != nil { - p, err := path.NewPath("/ipns/" + hostname) - if err != nil { - return nil, err - } - res, err := bb.namesys.Resolve(ctx, p, namesys.ResolveWithDepth(1)) - if err == namesys.ErrResolveRecursion { - err = nil - } - return res.Path, err - } - - return nil, NewErrorStatusCode(errors.New("not implemented"), http.StatusNotImplemented) -} - func (bb *BlocksBackend) IsCached(ctx context.Context, p path.Path) bool { rp, _, err := bb.resolvePath(ctx, p) if err != nil { @@ -711,11 +636,10 @@ func (bb *BlocksBackend) ResolvePath(ctx context.Context, path path.ImmutablePat func (bb *BlocksBackend) resolvePath(ctx context.Context, p path.Path) (path.ImmutablePath, []string, error) { var err error if p.Namespace() == path.IPNSNamespace { - res, err := namesys.Resolve(ctx, bb.namesys, p) + p, _, _, err = bb.baseBackend.ResolveMutable(ctx, p) if err != nil { return path.ImmutablePath{}, nil, err } - p = res.Path } if p.Namespace() != path.IPFSNamespace { From 1be37307f0e60dfb0f8d27839d2e63c58610fc6e Mon Sep 17 00:00:00 2001 From: Henrique Dias Date: Mon, 25 Mar 2024 17:22:29 +0100 Subject: [PATCH 02/19] wip: import --- examples/go.mod | 7 + examples/go.sum | 13 + gateway/backend_blocks.go | 16 +- gateway/backend_graph.go | 1288 +++++++++++++++++ gateway/backend_graph_files.go | 533 +++++++ gateway/backend_graph_test.go | 1207 +++++++++++++++ gateway/backend_graph_traversal.go | 323 +++++ gateway/backend_graph_utils.go | 132 ++ gateway/backend_graph_utils_test.go | 97 ++ ...h-multilayer-hamt-and-multiblock-files.car | Bin 0 -> 6045 bytes go.mod | 10 +- go.sum | 12 + 12 files changed, 3634 insertions(+), 4 deletions(-) create mode 100644 gateway/backend_graph.go create mode 100644 gateway/backend_graph_files.go create mode 100644 gateway/backend_graph_test.go create mode 100644 gateway/backend_graph_traversal.go create mode 100644 gateway/backend_graph_utils.go create mode 100644 gateway/backend_graph_utils_test.go create mode 100644 gateway/testdata/directory-with-multilayer-hamt-and-multiblock-files.car diff --git a/examples/go.mod b/examples/go.mod index 9290f9158..ebae8a7e4 100644 --- a/examples/go.mod +++ b/examples/go.mod @@ -60,7 +60,11 @@ require ( github.com/huin/goupnp v1.3.0 // indirect github.com/ipfs/bbloom v0.0.4 // indirect github.com/ipfs/go-bitfield v1.1.0 // indirect + github.com/ipfs/go-blockservice v0.5.0 // indirect + github.com/ipfs/go-ipfs-blockstore v1.3.0 // indirect github.com/ipfs/go-ipfs-delay v0.0.1 // indirect + github.com/ipfs/go-ipfs-ds-help v1.1.0 // indirect + github.com/ipfs/go-ipfs-exchange-interface v0.2.0 // indirect github.com/ipfs/go-ipfs-pq v0.0.3 // indirect github.com/ipfs/go-ipfs-redirects-file v0.1.1 // indirect github.com/ipfs/go-ipfs-util v0.0.3 // indirect @@ -69,9 +73,12 @@ require ( github.com/ipfs/go-ipld-legacy v0.2.1 // indirect github.com/ipfs/go-log v1.0.5 // indirect github.com/ipfs/go-log/v2 v2.5.1 // indirect + github.com/ipfs/go-merkledag v0.11.0 // indirect github.com/ipfs/go-metrics-interface v0.0.1 // indirect github.com/ipfs/go-peertaskqueue v0.8.1 // indirect github.com/ipfs/go-unixfsnode v1.9.0 // indirect + github.com/ipfs/go-verifcid v0.0.2 // indirect + github.com/ipld/go-car v0.6.2 // indirect github.com/ipld/go-codec-dagpb v1.6.0 // indirect github.com/jackpal/go-nat-pmp v1.0.2 // indirect github.com/jbenet/go-temp-err-catcher v0.1.0 // indirect diff --git a/examples/go.sum b/examples/go.sum index 27414405f..50ed55608 100644 --- a/examples/go.sum +++ b/examples/go.sum @@ -136,6 +136,7 @@ github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OI github.com/google/pprof v0.0.0-20231229205709-960ae82b1e42 h1:dHLYa5D8/Ta0aLR2XcPsrkpAgGeFs6thhMcQK0oQ0n8= github.com/google/pprof v0.0.0-20231229205709-960ae82b1e42/go.mod h1:czg5+yv1E0ZGTi6S6vVK1mke0fV+FaUhNGcd6VRS9Ik= github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= +github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.5.0 h1:1p67kYwdtXjb0gL0BPiP1Av9wiZPo5A8z2cWkTZ+eyU= github.com/google/uuid v1.5.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= @@ -167,13 +168,17 @@ github.com/ipfs/bbloom v0.0.4 h1:Gi+8EGJ2y5qiD5FbsbpX/TMNcJw8gSqr7eyjHa4Fhvs= github.com/ipfs/bbloom v0.0.4/go.mod h1:cS9YprKXpoZ9lT0n/Mw/a6/aFV6DTjTLYHeA+gyqMG0= github.com/ipfs/go-bitfield v1.1.0 h1:fh7FIo8bSwaJEh6DdTWbCeZ1eqOaOkKFI74SCnsWbGA= github.com/ipfs/go-bitfield v1.1.0/go.mod h1:paqf1wjq/D2BBmzfTVFlJQ9IlFOZpg422HL0HqsGWHU= +github.com/ipfs/go-bitswap v0.11.0 h1:j1WVvhDX1yhG32NTC9xfxnqycqYIlhzEzLXG/cU1HyQ= +github.com/ipfs/go-bitswap v0.11.0/go.mod h1:05aE8H3XOU+LXpTedeAS0OZpcO1WFsj5niYQH9a1Tmk= github.com/ipfs/go-block-format v0.2.0 h1:ZqrkxBA2ICbDRbK8KJs/u0O3dlp6gmAuuXUJNiW1Ycs= github.com/ipfs/go-block-format v0.2.0/go.mod h1:+jpL11nFx5A/SPpsoBn6Bzkra/zaArfSmsknbPMYgzM= github.com/ipfs/go-blockservice v0.5.0 h1:B2mwhhhVQl2ntW2EIpaWPwSCxSuqr5fFA93Ms4bYLEY= github.com/ipfs/go-blockservice v0.5.0/go.mod h1:W6brZ5k20AehbmERplmERn8o2Ni3ZZubvAxaIUeaT6w= +github.com/ipfs/go-cid v0.0.5/go.mod h1:plgt+Y5MnOey4vO4UlUazGqdbEXuFYitED67FexhXog= github.com/ipfs/go-cid v0.0.6/go.mod h1:6Ux9z5e+HpkQdckYoX1PG/6xqKspzlEIR5SDmgqgC/I= github.com/ipfs/go-cid v0.4.1 h1:A/T3qGvxi4kpKWWcPC/PgbvDA2bjVLO7n4UeVwnbs/s= github.com/ipfs/go-cid v0.4.1/go.mod h1:uQHwDeX4c6CtyrFwdqyhpNcxVewur1M7l7fNU7LKwZk= +github.com/ipfs/go-datastore v0.5.0/go.mod h1:9zhEApYMTl17C8YDp7JmU7sQZi2/wqiYh73hakZ90Bk= github.com/ipfs/go-datastore v0.6.0 h1:JKyz+Gvz1QEZw0LsX1IBn+JFCJQH4SJVFtM4uWU0Myk= github.com/ipfs/go-datastore v0.6.0/go.mod h1:rt5M3nNbSO/8q1t4LNkLyUwRs8HupMeN/8O4Vn9YAT8= github.com/ipfs/go-detect-race v0.0.1 h1:qX/xay2W3E4Q1U7d9lNs1sU9nvguX0a7319XbyQ6cOk= @@ -184,6 +189,7 @@ github.com/ipfs/go-ipfs-blocksutil v0.0.1 h1:Eh/H4pc1hsvhzsQoMEP3Bke/aW5P5rVM1IW github.com/ipfs/go-ipfs-blocksutil v0.0.1/go.mod h1:Yq4M86uIOmxmGPUHv/uI7uKqZNtLb449gwKqXjIsnRk= github.com/ipfs/go-ipfs-chunker v0.0.5 h1:ojCf7HV/m+uS2vhUGWcogIIxiO5ubl5O57Q7NapWLY8= github.com/ipfs/go-ipfs-chunker v0.0.5/go.mod h1:jhgdF8vxRHycr00k13FM8Y0E+6BoalYeobXmUyTreP8= +github.com/ipfs/go-ipfs-delay v0.0.0-20181109222059-70721b86a9a8/go.mod h1:8SP1YXK1M1kXuc4KJZINY3TQQ03J2rwBG9QfXmbRPrw= github.com/ipfs/go-ipfs-delay v0.0.1 h1:r/UXYyRcddO6thwOnhiznIAiSvxMECGgtv35Xs1IeRQ= github.com/ipfs/go-ipfs-delay v0.0.1/go.mod h1:8SP1YXK1M1kXuc4KJZINY3TQQ03J2rwBG9QfXmbRPrw= github.com/ipfs/go-ipfs-ds-help v1.1.0 h1:yLE2w9RAsl31LtfMt91tRZcrx+e61O5mDxFRR994w4Q= @@ -196,6 +202,8 @@ github.com/ipfs/go-ipfs-pq v0.0.3 h1:YpoHVJB+jzK15mr/xsWC574tyDLkezVrDNeaalQBsTE github.com/ipfs/go-ipfs-pq v0.0.3/go.mod h1:btNw5hsHBpRcSSgZtiNm/SLj5gYIZ18AKtv3kERkRb4= github.com/ipfs/go-ipfs-redirects-file v0.1.1 h1:Io++k0Vf/wK+tfnhEh63Yte1oQK5VGT2hIEYpD0Rzx8= github.com/ipfs/go-ipfs-redirects-file v0.1.1/go.mod h1:tAwRjCV0RjLTjH8DR/AU7VYvfQECg+lpUy2Mdzv7gyk= +github.com/ipfs/go-ipfs-routing v0.3.0 h1:9W/W3N+g+y4ZDeffSgqhgo7BsBSJwPMcyssET9OWevc= +github.com/ipfs/go-ipfs-routing v0.3.0/go.mod h1:dKqtTFIql7e1zYsEuWLyuOU+E0WJWW8JjbTPLParDWo= github.com/ipfs/go-ipfs-util v0.0.3 h1:2RFdGez6bu2ZlZdI+rWfIdbQb1KudQp3VGwPtdNCmE0= github.com/ipfs/go-ipfs-util v0.0.3/go.mod h1:LHzG1a0Ig4G+iZ26UUOMjHd+lfM84LZCrn17xAKWBvs= github.com/ipfs/go-ipld-cbor v0.1.0 h1:dx0nS0kILVivGhfWuB6dUpMa/LAwElHPw1yOGYopoYs= @@ -221,6 +229,8 @@ github.com/ipfs/go-unixfsnode v1.9.0 h1:ubEhQhr22sPAKO2DNsyVBW7YB/zA8Zkif25aBvz8 github.com/ipfs/go-unixfsnode v1.9.0/go.mod h1:HxRu9HYHOjK6HUqFBAi++7DVoWAHn0o4v/nZ/VA+0g8= github.com/ipfs/go-verifcid v0.0.2 h1:XPnUv0XmdH+ZIhLGKg6U2vaPaRDXb9urMyNVCE7uvTs= github.com/ipfs/go-verifcid v0.0.2/go.mod h1:40cD9x1y4OWnFXbLNJYRe7MpNvWlMn3LZAG5Wb4xnPU= +github.com/ipld/go-car v0.6.2 h1:Hlnl3Awgnq8icK+ze3iRghk805lu8YNq3wlREDTF2qc= +github.com/ipld/go-car v0.6.2/go.mod h1:oEGXdwp6bmxJCZ+rARSkDliTeYnVzv3++eXajZ+Bmr8= github.com/ipld/go-car/v2 v2.13.1 h1:KnlrKvEPEzr5IZHKTXLAEub+tPrzeAFQVRlSQvuxBO4= github.com/ipld/go-car/v2 v2.13.1/go.mod h1:QkdjjFNGit2GIkpQ953KBwowuoukoM75nP/JI1iDJdo= github.com/ipld/go-codec-dagpb v1.6.0 h1:9nYazfyu9B1p3NAgfVdpRco3Fs2nFC72DqVsMj6rOcc= @@ -251,6 +261,7 @@ github.com/klauspost/cpuid/v2 v2.2.6/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZY github.com/koron/go-ssdp v0.0.4 h1:1IDwrghSKYM7yLf7XCzbByg2sJ/JcNOZRXS2jczTwz0= github.com/koron/go-ssdp v0.0.4/go.mod h1:oDXq+E5IL5q0U8uSBcoAXzTzInwy5lEgC91HoKtbmZk= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= @@ -337,6 +348,7 @@ github.com/multiformats/go-multiaddr-dns v0.3.1 h1:QgQgR+LQVt3NPTjbrLLpsaT2ufAA2 github.com/multiformats/go-multiaddr-dns v0.3.1/go.mod h1:G/245BRQ6FJGmryJCrOuTdB37AMA5AMOVuO6NY3JwTk= github.com/multiformats/go-multiaddr-fmt v0.1.0 h1:WLEFClPycPkp4fnIzoFoV9FVd49/eQsuaL3/CWe167E= github.com/multiformats/go-multiaddr-fmt v0.1.0/go.mod h1:hGtDIW4PU4BqJ50gW2quDuPVjyWNZxToGUh/HwTZYJo= +github.com/multiformats/go-multibase v0.0.1/go.mod h1:bja2MqRZ3ggyXtZSEDKpl0uO/gviWFaSteVbWT51qgs= github.com/multiformats/go-multibase v0.0.3/go.mod h1:5+1R4eQrT3PkYZ24C3W2Ue2tPwIdYQD509ZjSb5y9Oc= github.com/multiformats/go-multibase v0.2.0 h1:isdYCVLvksgWlMW9OZRYJEa9pZETFivncJHmHnnd87g= github.com/multiformats/go-multibase v0.2.0/go.mod h1:bFBZX4lKCA/2lyOFSAoKH5SS6oPyjtnzK/XTFDPkNuk= @@ -700,6 +712,7 @@ google.golang.org/protobuf v1.32.0 h1:pPC6BG5ex8PDFnkbrGU3EixyhKcQ2aDuBS36lqK/C7 google.golang.org/protobuf v1.32.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= diff --git a/gateway/backend_blocks.go b/gateway/backend_blocks.go index 0cfe45a10..5d71a51eb 100644 --- a/gateway/backend_blocks.go +++ b/gateway/backend_blocks.go @@ -37,6 +37,7 @@ import ( selectorparse "github.com/ipld/go-ipld-prime/traversal/selector/parse" "github.com/libp2p/go-libp2p/core/routing" mc "github.com/multiformats/go-multicodec" + "github.com/prometheus/client_golang/prometheus" // Ensure basic codecs are registered. _ "github.com/ipld/go-ipld-prime/codec/cbor" @@ -57,9 +58,10 @@ type BlocksBackend struct { var _ IPFSBackend = (*BlocksBackend)(nil) type blocksBackendOptions struct { - ns namesys.NameSystem - vs routing.ValueStore - r resolver.Resolver + ns namesys.NameSystem + vs routing.ValueStore + r resolver.Resolver + promRegistry prometheus.Registerer } // WithNameSystem sets the name system to use with the [BlocksBackend]. If not set @@ -88,6 +90,14 @@ func WithResolver(r resolver.Resolver) BlocksBackendOption { } } +// WithPrometheusRegistry sets the registry to use for metrics collection. +func WithPrometheusRegistry(reg prometheus.Registerer) BlocksBackendOption { + return func(opts *blocksBackendOptions) error { + opts.promRegistry = reg + return nil + } +} + type BlocksBackendOption func(options *blocksBackendOptions) error func NewBlocksBackend(blockService blockservice.BlockService, opts ...BlocksBackendOption) (*BlocksBackend, error) { diff --git a/gateway/backend_graph.go b/gateway/backend_graph.go new file mode 100644 index 000000000..dac3c2784 --- /dev/null +++ b/gateway/backend_graph.go @@ -0,0 +1,1288 @@ +package gateway + +import ( + "bytes" + "context" + "errors" + "fmt" + "io" + "strconv" + "strings" + "time" + + "github.com/hashicorp/go-multierror" + "github.com/ipfs/boxo/files" + "github.com/ipfs/boxo/ipld/merkledag" + "github.com/ipfs/boxo/ipld/unixfs" + "github.com/ipfs/boxo/path" + ipfspath "github.com/ipfs/boxo/path" + "github.com/ipfs/boxo/path/resolver" + blocks "github.com/ipfs/go-block-format" + "github.com/ipfs/go-cid" + format "github.com/ipfs/go-ipld-format" + "github.com/ipfs/go-unixfsnode" + ufsData "github.com/ipfs/go-unixfsnode/data" + "github.com/ipfs/go-unixfsnode/hamt" + ufsiter "github.com/ipfs/go-unixfsnode/iter" + carv2 "github.com/ipld/go-car/v2" + "github.com/ipld/go-car/v2/storage" + dagpb "github.com/ipld/go-codec-dagpb" + "github.com/ipld/go-ipld-prime" + "github.com/ipld/go-ipld-prime/datamodel" + cidlink "github.com/ipld/go-ipld-prime/linking/cid" + "github.com/ipld/go-ipld-prime/node/basicnode" + "github.com/ipld/go-ipld-prime/schema" + "github.com/ipld/go-ipld-prime/traversal" + "github.com/multiformats/go-multicodec" + "github.com/prometheus/client_golang/prometheus" +) + +const GetBlockTimeout = time.Second * 60 + +// type DataCallback = func(resource string, reader io.Reader) error +// TODO: Don't use a caboose type, perhaps ask them to use a type alias instead of a type +// type DataCallback = caboose.DataCallback +type DataCallback func(resource string, reader io.Reader) error + +// TODO: Don't use a caboose type +// type ErrPartialResponse = caboose.ErrPartialResponse + +// ErrPartialResponse can be returned from a DataCallback to indicate that some of the requested resource +// was successfully fetched, and that instead of retrying the full resource, that there are +// one or more more specific resources that should be fetched (via StillNeed) to complete the request. +type ErrPartialResponse struct { + error + StillNeed []string +} + +func (epr ErrPartialResponse) Error() string { + if epr.error != nil { + return fmt.Sprintf("partial response: %s", epr.error.Error()) + } + return "caboose received a partial response" +} + +var ErrFetcherUnexpectedEOF = fmt.Errorf("failed to fetch IPLD data") + +type CarFetcher interface { + Fetch(ctx context.Context, path string, cb DataCallback) error +} + +type GraphGateway struct { + baseBackend + fetcher CarFetcher + + pc traversal.LinkTargetNodePrototypeChooser + + metrics *GraphGatewayMetrics +} + +type GraphGatewayMetrics struct { + contextAlreadyCancelledMetric prometheus.Counter + carFetchAttemptMetric prometheus.Counter + carBlocksFetchedMetric prometheus.Counter + carParamsMetric *prometheus.CounterVec + + bytesRangeStartMetric prometheus.Histogram + bytesRangeSizeMetric prometheus.Histogram +} + +func NewGraphGatewayBackend(f CarFetcher, opts ...BlocksBackendOption) (*GraphGateway, error) { + var compiledOptions blocksBackendOptions + for _, o := range opts { + if err := o(&compiledOptions); err != nil { + return nil, err + } + } + + // Setup the [baseBackend] which takes care of some shared functionality, such + // as resolving /ipns links. + baseBackend, err := newBaseBackend(compiledOptions.vs, compiledOptions.ns) + if err != nil { + return nil, err + } + + var promReg prometheus.Registerer = prometheus.NewRegistry() + if compiledOptions.promRegistry != nil { + promReg = compiledOptions.promRegistry + } + + return &GraphGateway{ + baseBackend: baseBackend, + fetcher: f, + metrics: registerGraphGatewayMetrics(promReg), + pc: dagpb.AddSupportToChooser(func(lnk ipld.Link, lnkCtx ipld.LinkContext) (ipld.NodePrototype, error) { + if tlnkNd, ok := lnkCtx.LinkNode.(schema.TypedLinkNode); ok { + return tlnkNd.LinkTargetNodePrototype(), nil + } + return basicnode.Prototype.Any, nil + }), + }, nil +} + +func registerGraphGatewayMetrics(registerer prometheus.Registerer) *GraphGatewayMetrics { + // How many CAR Fetch attempts we had? Need this to calculate % of various graph request types. + // We only count attempts here, because success/failure with/without retries are provided by caboose: + // - ipfs_caboose_fetch_duration_car_success_count + // - ipfs_caboose_fetch_duration_car_failure_count + // - ipfs_caboose_fetch_duration_car_peer_success_count + // - ipfs_caboose_fetch_duration_car_peer_failure_count + carFetchAttemptMetric := prometheus.NewCounter(prometheus.CounterOpts{ + Namespace: "ipfs", + Subsystem: "gw_graph_backend", + Name: "car_fetch_attempts", + Help: "The number of times a CAR fetch was attempted by IPFSBackend.", + }) + registerer.MustRegister(carFetchAttemptMetric) + + contextAlreadyCancelledMetric := prometheus.NewCounter(prometheus.CounterOpts{ + Namespace: "ipfs", + Subsystem: "gw_graph_backend", + Name: "car_fetch_context_already_cancelled", + Help: "The number of times context is already cancelled when a CAR fetch was attempted by IPFSBackend.", + }) + registerer.MustRegister(contextAlreadyCancelledMetric) + + // How many blocks were read via CARs? + // Need this as a baseline to reason about error ratio vs raw_block_recovery_attempts. + carBlocksFetchedMetric := prometheus.NewCounter(prometheus.CounterOpts{ + Namespace: "ipfs", + Subsystem: "gw_graph_backend", + Name: "car_blocks_fetched", + Help: "The number of blocks successfully read via CAR fetch.", + }) + registerer.MustRegister(carBlocksFetchedMetric) + + carParamsMetric := prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: "ipfs", + Subsystem: "gw_graph_backend", + Name: "car_fetch_params", + Help: "How many times specific CAR parameter was used during CAR data fetch.", + }, []string{"dagScope", "entityRanges"}) // we use 'ranges' instead of 'bytes' here because we only count the number of ranges present + registerer.MustRegister(carParamsMetric) + + bytesRangeStartMetric := prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: "ipfs", + Subsystem: "gw_graph_backend", + Name: "range_request_start", + Help: "Tracks where did the range request start.", + Buckets: prometheus.ExponentialBuckets(1024, 2, 24), // 1024 bytes to 8 GiB + }) + registerer.MustRegister(bytesRangeStartMetric) + + bytesRangeSizeMetric := prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: "ipfs", + Subsystem: "gw_graph_backend", + Name: "range_request_size", + Help: "Tracks the size of range requests.", + Buckets: prometheus.ExponentialBuckets(256*1024, 2, 10), // From 256KiB to 100MiB + }) + registerer.MustRegister(bytesRangeSizeMetric) + + return &GraphGatewayMetrics{ + contextAlreadyCancelledMetric, + carFetchAttemptMetric, + carBlocksFetchedMetric, + carParamsMetric, + bytesRangeStartMetric, + bytesRangeSizeMetric, + } +} + +func (api *GraphGateway) fetchCAR(ctx context.Context, path path.ImmutablePath, params CarParams, cb DataCallback) error { + urlWithoutHost := contentPathToCarUrl(path, params).String() + + api.metrics.carFetchAttemptMetric.Inc() + var ipldError error + fetchErr := api.fetcher.Fetch(ctx, urlWithoutHost, func(resource string, reader io.Reader) error { + return checkRetryableError(&ipldError, func() error { + return cb(resource, reader) + }) + }) + + if ipldError != nil { + fetchErr = ipldError + } else if fetchErr != nil { + fetchErr = GatewayError(fetchErr) + } + + return fetchErr +} + +// resolvePathWithRootsAndBlock takes a path and linksystem and returns the set of non-terminal cids, the terminal cid, the remainder, and the block corresponding to the terminal cid +func resolvePathWithRootsAndBlock(ctx context.Context, fpath ipfspath.ImmutablePath, unixFSLsys *ipld.LinkSystem) ([]cid.Cid, cid.Cid, []string, blocks.Block, error) { + pathRootCids, terminalCid, remainder, terminalBlk, err := resolvePathToLastWithRoots(ctx, fpath, unixFSLsys) + if err != nil { + return nil, cid.Undef, nil, nil, err + } + + if terminalBlk == nil { + lctx := ipld.LinkContext{Ctx: ctx} + lnk := cidlink.Link{Cid: terminalCid} + blockData, err := unixFSLsys.LoadRaw(lctx, lnk) + if err != nil { + return nil, cid.Undef, nil, nil, err + } + terminalBlk, err = blocks.NewBlockWithCid(blockData, terminalCid) + if err != nil { + return nil, cid.Undef, nil, nil, err + } + } + + return pathRootCids, terminalCid, remainder, terminalBlk, err +} + +// resolvePathToLastWithRoots takes a path and linksystem and returns the set of non-terminal cids, the terminal cid, +// the remainder pathing, the last block loaded, and the last node loaded. +// +// Note: the block returned will be nil if the terminal element is a link or the path is just a CID +func resolvePathToLastWithRoots(ctx context.Context, fpath ipfspath.ImmutablePath, unixFSLsys *ipld.LinkSystem) ([]cid.Cid, cid.Cid, []string, blocks.Block, error) { + c, p := fpath.RootCid(), fpath.Segments()[2:] + + if len(p) == 0 { + return nil, c, nil, nil, nil + } + + unixFSLsys.NodeReifier = unixfsnode.Reify + defer func() { unixFSLsys.NodeReifier = nil }() + + var cids []cid.Cid + cids = append(cids, c) + + pc := dagpb.AddSupportToChooser(func(lnk ipld.Link, lnkCtx ipld.LinkContext) (ipld.NodePrototype, error) { + if tlnkNd, ok := lnkCtx.LinkNode.(schema.TypedLinkNode); ok { + return tlnkNd.LinkTargetNodePrototype(), nil + } + return basicnode.Prototype.Any, nil + }) + + loadNode := func(ctx context.Context, c cid.Cid) (blocks.Block, ipld.Node, error) { + lctx := ipld.LinkContext{Ctx: ctx} + rootLnk := cidlink.Link{Cid: c} + np, err := pc(rootLnk, lctx) + if err != nil { + return nil, nil, err + } + nd, blockData, err := unixFSLsys.LoadPlusRaw(lctx, rootLnk, np) + if err != nil { + return nil, nil, err + } + blk, err := blocks.NewBlockWithCid(blockData, c) + if err != nil { + return nil, nil, err + } + return blk, nd, nil + } + + nextBlk, nextNd, err := loadNode(ctx, c) + if err != nil { + return nil, cid.Undef, nil, nil, err + } + + depth := 0 + for i, elem := range p { + nextNd, err = nextNd.LookupBySegment(ipld.ParsePathSegment(elem)) + if err != nil { + return nil, cid.Undef, nil, nil, err + } + if nextNd.Kind() == ipld.Kind_Link { + depth = 0 + lnk, err := nextNd.AsLink() + if err != nil { + return nil, cid.Undef, nil, nil, err + } + cidLnk, ok := lnk.(cidlink.Link) + if !ok { + return nil, cid.Undef, nil, nil, fmt.Errorf("link is not a cidlink: %v", cidLnk) + } + cids = append(cids, cidLnk.Cid) + + if i < len(p)-1 { + nextBlk, nextNd, err = loadNode(ctx, cidLnk.Cid) + if err != nil { + return nil, cid.Undef, nil, nil, err + } + } + } else { + depth++ + } + } + + // if last node is not a link, just return it's cid, add path to remainder and return + if nextNd.Kind() != ipld.Kind_Link { + // return the cid and the remainder of the path + return cids[:len(cids)-1], cids[len(cids)-1], p[len(p)-depth:], nextBlk, nil + } + + return cids[:len(cids)-1], cids[len(cids)-1], nil, nil, nil +} + +func contentMetadataFromRootsAndRemainder(p ipfspath.Path, pathRoots []cid.Cid, terminalCid cid.Cid, remainder []string) (ContentPathMetadata, error) { + var rootCid cid.Cid + if len(pathRoots) > 0 { + rootCid = pathRoots[0] + } else { + rootCid = terminalCid + } + + p, err := path.Join(path.FromCid(rootCid), remainder...) + if err != nil { + return ContentPathMetadata{}, err + } + + imPath, err := path.NewImmutablePath(p) + if err != nil { + return ContentPathMetadata{}, err + } + + md := ContentPathMetadata{ + PathSegmentRoots: pathRoots, + LastSegmentRemainder: remainder, + LastSegment: imPath, + } + return md, nil +} + +var errNotUnixFS = fmt.Errorf("data was not unixfs") + +func (api *GraphGateway) Get(ctx context.Context, path path.ImmutablePath, byteRanges ...ByteRange) (ContentPathMetadata, *GetResponse, error) { + rangeCount := len(byteRanges) + api.metrics.carParamsMetric.With(prometheus.Labels{"dagScope": "entity", "entityRanges": strconv.Itoa(rangeCount)}).Inc() + + carParams := CarParams{Scope: DagScopeEntity} + + // fetch CAR with &bytes= to get minimal set of blocks for the request + // Note: majority of requests have 0 or max 1 ranges. if there are more ranges than one, + // that is a niche edge cache we don't prefetch as CAR and use fallback blockstore instead. + if rangeCount > 0 { + r := byteRanges[0] + carParams.Range = &DagByteRange{ + From: int64(r.From), + } + + // TODO: move to boxo or to loadRequestIntoSharedBlockstoreAndBlocksGateway after we pass params in a humane way + api.metrics.bytesRangeStartMetric.Observe(float64(r.From)) + + if r.To != nil { + carParams.Range.To = r.To + + // TODO: move to boxo or to loadRequestIntoSharedBlockstoreAndBlocksGateway after we pass params in a humane way + api.metrics.bytesRangeSizeMetric.Observe(float64(*r.To) - float64(r.From) + 1) + } + } + + md, terminalElem, err := fetchWithPartialRetries(ctx, path, carParams, loadTerminalEntity, api.metrics, api.fetchCAR) + if err != nil { + return ContentPathMetadata{}, nil, err + } + + var resp *GetResponse + + switch typedTerminalElem := terminalElem.(type) { + case *GetResponse: + resp = typedTerminalElem + case *backpressuredFile: + resp = NewGetResponseFromReader(typedTerminalElem, typedTerminalElem.size) + case *backpressuredHAMTDirIterNoRecursion: + ch := make(chan unixfs.LinkResult) + go func() { + defer close(ch) + for typedTerminalElem.Next() { + l := typedTerminalElem.Link() + select { + case ch <- l: + case <-ctx.Done(): + return + } + } + if err := typedTerminalElem.Err(); err != nil { + select { + case ch <- unixfs.LinkResult{Err: err}: + case <-ctx.Done(): + return + } + } + }() + resp = NewGetResponseFromDirectoryListing(typedTerminalElem.dagSize, ch, nil) + default: + return ContentPathMetadata{}, nil, fmt.Errorf("invalid data type") + } + + return md, resp, nil + +} + +// loadTerminalEntity returns either a [*GetResponse], [*backpressuredFile], or [*backpressuredHAMTDirIterNoRecursion] +func loadTerminalEntity(ctx context.Context, c cid.Cid, blk blocks.Block, lsys *ipld.LinkSystem, params CarParams, getLsys lsysGetter) (interface{}, error) { + var err error + if lsys == nil { + lsys, err = getLsys(ctx, c, params) + if err != nil { + return nil, err + } + } + + lctx := ipld.LinkContext{Ctx: ctx} + + if c.Type() != uint64(multicodec.DagPb) { + var blockData []byte + + if blk != nil { + blockData = blk.RawData() + } else { + blockData, err = lsys.LoadRaw(lctx, cidlink.Link{Cid: c}) + if err != nil { + return nil, err + } + } + + f := files.NewBytesFile(blockData) + if params.Range != nil && params.Range.From != 0 { + if _, err := f.Seek(params.Range.From, io.SeekStart); err != nil { + return nil, err + } + } + + return NewGetResponseFromReader(f, int64(len(blockData))), nil + } + + blockData, pbn, ufsFieldData, fieldNum, err := loadUnixFSBase(ctx, c, blk, lsys) + if err != nil { + return nil, err + } + + switch fieldNum { + case ufsData.Data_Symlink: + if !ufsFieldData.FieldData().Exists() { + return nil, fmt.Errorf("invalid UnixFS symlink object") + } + lnkTarget := string(ufsFieldData.FieldData().Must().Bytes()) + f := NewGetResponseFromSymlink(files.NewLinkFile(lnkTarget, nil).(*files.Symlink), int64(len(lnkTarget))) + return f, nil + case ufsData.Data_Metadata: + return nil, fmt.Errorf("UnixFS Metadata unsupported") + case ufsData.Data_HAMTShard, ufsData.Data_Directory: + blk, err := blocks.NewBlockWithCid(blockData, c) + if err != nil { + return nil, fmt.Errorf("could not create block: %w", err) + } + dirRootNd, err := merkledag.ProtoNodeConverter(blk, pbn) + if err != nil { + return nil, fmt.Errorf("could not create dag-pb universal block from UnixFS directory root: %w", err) + } + pn, ok := dirRootNd.(*merkledag.ProtoNode) + if !ok { + return nil, fmt.Errorf("could not create dag-pb node from UnixFS directory root: %w", err) + } + + dirDagSize, err := pn.Size() + if err != nil { + return nil, fmt.Errorf("could not get cumulative size from dag-pb node: %w", err) + } + + switch fieldNum { + case ufsData.Data_Directory: + ch := make(chan unixfs.LinkResult, pbn.Links.Length()) + defer close(ch) + iter := pbn.Links.Iterator() + for !iter.Done() { + _, v := iter.Next() + c := v.Hash.Link().(cidlink.Link).Cid + var name string + var size int64 + if v.Name.Exists() { + name = v.Name.Must().String() + } + if v.Tsize.Exists() { + size = v.Tsize.Must().Int() + } + lnk := unixfs.LinkResult{Link: &format.Link{ + Name: name, + Size: uint64(size), + Cid: c, + }} + ch <- lnk + } + return NewGetResponseFromDirectoryListing(dirDagSize, ch, nil), nil + case ufsData.Data_HAMTShard: + dirNd, err := unixfsnode.Reify(lctx, pbn, lsys) + if err != nil { + return nil, fmt.Errorf("could not reify sharded directory: %w", err) + } + + d := &backpressuredHAMTDirIterNoRecursion{ + dagSize: dirDagSize, + linksItr: dirNd.MapIterator(), + dirCid: c, + lsys: lsys, + getLsys: getLsys, + ctx: ctx, + closed: make(chan error), + hasClosed: false, + } + return d, nil + default: + return nil, fmt.Errorf("not a basic or HAMT directory: should be unreachable") + } + case ufsData.Data_Raw, ufsData.Data_File: + nd, err := unixfsnode.Reify(lctx, pbn, lsys) + if err != nil { + return nil, err + } + + fnd, ok := nd.(datamodel.LargeBytesNode) + if !ok { + return nil, fmt.Errorf("could not process file since it did not present as large bytes") + } + f, err := fnd.AsLargeBytes() + if err != nil { + return nil, err + } + + fileSize, err := f.Seek(0, io.SeekEnd) + if err != nil { + return nil, fmt.Errorf("unable to get UnixFS file size: %w", err) + } + + from := int64(0) + var byteRange DagByteRange + if params.Range != nil { + from = params.Range.From + byteRange = *params.Range + } + _, err = f.Seek(from, io.SeekStart) + if err != nil { + return nil, fmt.Errorf("unable to get reset UnixFS file reader: %w", err) + } + + return &backpressuredFile{ctx: ctx, fileCid: c, byteRange: byteRange, size: fileSize, f: f, getLsys: getLsys, closed: make(chan error)}, nil + default: + return nil, fmt.Errorf("unknown UnixFS field type") + } +} + +type backpressuredHAMTDirIterNoRecursion struct { + dagSize uint64 + linksItr ipld.MapIterator + dirCid cid.Cid + + lsys *ipld.LinkSystem + getLsys lsysGetter + ctx context.Context + + curLnk unixfs.LinkResult + curProcessed int + + closed chan error + hasClosed bool + err error +} + +func (it *backpressuredHAMTDirIterNoRecursion) AwaitClose() <-chan error { + return it.closed +} + +func (it *backpressuredHAMTDirIterNoRecursion) Link() unixfs.LinkResult { + return it.curLnk +} + +func (it *backpressuredHAMTDirIterNoRecursion) Next() bool { + defer func() { + if it.linksItr.Done() || it.err != nil { + if !it.hasClosed { + it.hasClosed = true + close(it.closed) + } + } + }() + + if it.err != nil { + return false + } + + iter := it.linksItr + if iter.Done() { + return false + } + + /* + Since there is no way to make a graph request for part of a HAMT during errors we can either fill in the HAMT with + block requests, or we can re-request the HAMT and skip over the parts we already have. + + Here we choose the latter, however in the event of a re-request we request the entity rather than the entire DAG as + a compromise between more requests and over-fetching data. + */ + + var err error + for { + if it.ctx.Err() != nil { + it.err = it.ctx.Err() + return false + } + + retry, processedErr := isRetryableError(err) + if !retry { + it.err = processedErr + return false + } + + var nd ipld.Node + if err != nil { + var lsys *ipld.LinkSystem + lsys, err = it.getLsys(it.ctx, it.dirCid, CarParams{Scope: DagScopeEntity}) + if err != nil { + continue + } + + _, pbn, ufsFieldData, _, ufsBaseErr := loadUnixFSBase(it.ctx, it.dirCid, nil, lsys) + if ufsBaseErr != nil { + err = ufsBaseErr + continue + } + + nd, err = hamt.NewUnixFSHAMTShard(it.ctx, pbn, ufsFieldData, lsys) + if err != nil { + err = fmt.Errorf("could not reify sharded directory: %w", err) + continue + } + + iter = nd.MapIterator() + for i := 0; i < it.curProcessed; i++ { + _, _, err = iter.Next() + if err != nil { + continue + } + } + + it.linksItr = iter + } + + var k, v ipld.Node + k, v, err = iter.Next() + if err != nil { + retry, processedErr = isRetryableError(err) + if retry { + err = processedErr + continue + } + it.err = processedErr + return false + } + + var name string + name, err = k.AsString() + if err != nil { + it.err = err + return false + } + + var lnk ipld.Link + lnk, err = v.AsLink() + if err != nil { + it.err = err + return false + } + + cl, ok := lnk.(cidlink.Link) + if !ok { + it.err = fmt.Errorf("link not a cidlink") + return false + } + + c := cl.Cid + + pbLnk, ok := v.(*ufsiter.IterLink) + if !ok { + it.err = fmt.Errorf("HAMT value is not a dag-pb link") + return false + } + + cumulativeDagSize := uint64(0) + if pbLnk.Substrate.Tsize.Exists() { + cumulativeDagSize = uint64(pbLnk.Substrate.Tsize.Must().Int()) + } + + it.curLnk = unixfs.LinkResult{ + Link: &format.Link{ + Name: name, + Size: cumulativeDagSize, + Cid: c, + }, + } + it.curProcessed++ + break + } + + return true +} + +func (it *backpressuredHAMTDirIterNoRecursion) Err() error { + return it.err +} + +var _ AwaitCloser = (*backpressuredHAMTDirIterNoRecursion)(nil) + +func (api *GraphGateway) GetAll(ctx context.Context, path path.ImmutablePath) (ContentPathMetadata, files.Node, error) { + api.metrics.carParamsMetric.With(prometheus.Labels{"dagScope": "all", "entityRanges": "0"}).Inc() + return fetchWithPartialRetries(ctx, path, CarParams{Scope: DagScopeAll}, loadTerminalUnixFSElementWithRecursiveDirectories, api.metrics, api.fetchCAR) +} + +type loadTerminalElement[T any] func(ctx context.Context, c cid.Cid, blk blocks.Block, lsys *ipld.LinkSystem, params CarParams, getLsys lsysGetter) (T, error) +type fetchCarFn = func(ctx context.Context, path path.ImmutablePath, params CarParams, cb DataCallback) error + +type terminalPathType[T any] struct { + resp T + err error + md ContentPathMetadata +} + +type nextReq struct { + c cid.Cid + params CarParams +} + +func fetchWithPartialRetries[T any](ctx context.Context, path path.ImmutablePath, initialParams CarParams, resolveTerminalElementFn loadTerminalElement[T], metrics *GraphGatewayMetrics, fetchCAR fetchCarFn) (ContentPathMetadata, T, error) { + var zeroReturnType T + + terminalPathElementCh := make(chan terminalPathType[T], 1) + + go func() { + cctx, cancel := context.WithCancel(ctx) + defer cancel() + + hasSentAsyncData := false + var closeCh <-chan error + + sendRequest := make(chan nextReq, 1) + sendResponse := make(chan *ipld.LinkSystem, 1) + getLsys := func(ctx context.Context, c cid.Cid, params CarParams) (*ipld.LinkSystem, error) { + select { + case sendRequest <- nextReq{c: c, params: params}: + case <-ctx.Done(): + return nil, ctx.Err() + } + + select { + case lsys := <-sendResponse: + return lsys, nil + case <-ctx.Done(): + return nil, ctx.Err() + } + } + + // FIXME(HACDIAS): p := ipfspath.FromString(path.String()) + p := path + params := initialParams + + err := fetchCAR(cctx, path, params, func(resource string, reader io.Reader) error { + gb, err := carToLinearBlockGetter(cctx, reader, metrics) + if err != nil { + return err + } + + lsys := getLinksystem(gb) + + if hasSentAsyncData { + _, _, _, _, err = resolvePathToLastWithRoots(cctx, p, lsys) + if err != nil { + return err + } + + select { + case sendResponse <- lsys: + case <-cctx.Done(): + return cctx.Err() + } + } else { + // First resolve the path since we always need to. + pathRootCids, terminalCid, remainder, terminalBlk, err := resolvePathWithRootsAndBlock(cctx, p, lsys) + if err != nil { + return err + } + md, err := contentMetadataFromRootsAndRemainder(p, pathRootCids, terminalCid, remainder) + if err != nil { + return err + } + + if len(remainder) > 0 { + terminalPathElementCh <- terminalPathType[T]{err: errNotUnixFS} + return nil + } + + if hasSentAsyncData { + select { + case sendResponse <- lsys: + case <-ctx.Done(): + return ctx.Err() + } + } + + nd, err := resolveTerminalElementFn(cctx, terminalCid, terminalBlk, lsys, params, getLsys) + if err != nil { + return err + } + + ndAc, ok := any(nd).(AwaitCloser) + if !ok { + terminalPathElementCh <- terminalPathType[T]{ + resp: nd, + md: md, + } + return nil + } + + hasSentAsyncData = true + terminalPathElementCh <- terminalPathType[T]{ + resp: nd, + md: md, + } + + closeCh = ndAc.AwaitClose() + } + + select { + case closeErr := <-closeCh: + return closeErr + case req := <-sendRequest: + // set path and params for next iteration + p = ipfspath.FromCid(req.c) + // FIXME(hacdias) + imPath := p + if err != nil { + return err + } + params = req.params + remainderUrl := contentPathToCarUrl(imPath, params).String() + return ErrPartialResponse{StillNeed: []string{remainderUrl}} + case <-cctx.Done(): + return cctx.Err() + } + }) + + if !hasSentAsyncData && err != nil { + terminalPathElementCh <- terminalPathType[T]{err: err} + return + } + + if err != nil { + lsys := getLinksystem(func(ctx context.Context, cid cid.Cid) (blocks.Block, error) { + return nil, multierror.Append(ErrFetcherUnexpectedEOF, format.ErrNotFound{Cid: cid}) + }) + for { + select { + case <-closeCh: + return + case <-sendRequest: + case sendResponse <- lsys: + case <-cctx.Done(): + return + } + } + } + }() + + select { + case t := <-terminalPathElementCh: + if t.err != nil { + return ContentPathMetadata{}, zeroReturnType, t.err + } + return t.md, t.resp, nil + case <-ctx.Done(): + return ContentPathMetadata{}, zeroReturnType, ctx.Err() + } +} + +func (api *GraphGateway) GetBlock(ctx context.Context, path path.ImmutablePath) (ContentPathMetadata, files.File, error) { + api.metrics.carParamsMetric.With(prometheus.Labels{"dagScope": "block", "entityRanges": "0"}).Inc() + // FIXME(HACDIAS): p := ipfspath.FromString(path.String()) + p := path + + var md ContentPathMetadata + var f files.File + // TODO: if path is `/ipfs/cid`, we should use ?format=raw + err := api.fetchCAR(ctx, path, CarParams{Scope: DagScopeBlock}, func(resource string, reader io.Reader) error { + gb, err := carToLinearBlockGetter(ctx, reader, api.metrics) + if err != nil { + return err + } + lsys := getLinksystem(gb) + + // First resolve the path since we always need to. + pathRoots, terminalCid, remainder, terminalBlk, err := resolvePathToLastWithRoots(ctx, p, lsys) + if err != nil { + return err + } + + var blockData []byte + if terminalBlk != nil { + blockData = terminalBlk.RawData() + } else { + lctx := ipld.LinkContext{Ctx: ctx} + lnk := cidlink.Link{Cid: terminalCid} + blockData, err = lsys.LoadRaw(lctx, lnk) + if err != nil { + return err + } + } + + md, err = contentMetadataFromRootsAndRemainder(p, pathRoots, terminalCid, remainder) + if err != nil { + return err + } + + f = files.NewBytesFile(blockData) + return nil + }) + + if err != nil { + return ContentPathMetadata{}, nil, err + } + + return md, f, nil +} + +func (api *GraphGateway) Head(ctx context.Context, path path.ImmutablePath) (ContentPathMetadata, *HeadResponse, error) { + api.metrics.carParamsMetric.With(prometheus.Labels{"dagScope": "entity", "entityRanges": "1"}).Inc() + + // TODO: we probably want to move this either to boxo, or at least to loadRequestIntoSharedBlockstoreAndBlocksGateway + api.metrics.bytesRangeStartMetric.Observe(0) + api.metrics.bytesRangeSizeMetric.Observe(3071) + + // FIXME(HACDIAS): p := ipfspath.FromString(path.String()) + p := path + + var md ContentPathMetadata + var n *HeadResponse + // TODO: fallback to dynamic fetches in case we haven't requested enough data + rangeTo := int64(3071) + err := api.fetchCAR(ctx, path, CarParams{Scope: DagScopeEntity, Range: &DagByteRange{From: 0, To: &rangeTo}}, func(resource string, reader io.Reader) error { + gb, err := carToLinearBlockGetter(ctx, reader, api.metrics) + if err != nil { + return err + } + lsys := getLinksystem(gb) + + // First resolve the path since we always need to. + pathRoots, terminalCid, remainder, terminalBlk, err := resolvePathWithRootsAndBlock(ctx, p, lsys) + if err != nil { + return err + } + + md, err = contentMetadataFromRootsAndRemainder(p, pathRoots, terminalCid, remainder) + if err != nil { + return err + } + + lctx := ipld.LinkContext{Ctx: ctx} + pathTerminalCidLink := cidlink.Link{Cid: terminalCid} + + // Load the block at the root of the terminal path element + dataBytes := terminalBlk.RawData() + + // It's not UnixFS if there is a remainder or it's not dag-pb + if len(remainder) > 0 || terminalCid.Type() != uint64(multicodec.DagPb) { + n = NewHeadResponseForFile(files.NewBytesFile(dataBytes), int64(len(dataBytes))) + return nil + } + + // Let's figure out if the terminal element is valid UnixFS and if so what kind + np, err := api.pc(pathTerminalCidLink, lctx) + if err != nil { + return err + } + + nodeDecoder, err := lsys.DecoderChooser(pathTerminalCidLink) + if err != nil { + return err + } + + nb := np.NewBuilder() + err = nodeDecoder(nb, bytes.NewReader(dataBytes)) + if err != nil { + return err + } + lastCidNode := nb.Build() + + if pbn, ok := lastCidNode.(dagpb.PBNode); !ok { + // This shouldn't be possible since we already checked for dag-pb usage + return fmt.Errorf("node was not go-codec-dagpb node") + } else if !pbn.FieldData().Exists() { + // If it's not valid UnixFS then just return the block bytes + n = NewHeadResponseForFile(files.NewBytesFile(dataBytes), int64(len(dataBytes))) + return nil + } else if unixfsFieldData, decodeErr := ufsData.DecodeUnixFSData(pbn.Data.Must().Bytes()); decodeErr != nil { + // If it's not valid UnixFS then just return the block bytes + n = NewHeadResponseForFile(files.NewBytesFile(dataBytes), int64(len(dataBytes))) + return nil + } else { + switch fieldNum := unixfsFieldData.FieldDataType().Int(); fieldNum { + case ufsData.Data_Directory, ufsData.Data_HAMTShard: + dirRootNd, err := merkledag.ProtoNodeConverter(terminalBlk, lastCidNode) + if err != nil { + return fmt.Errorf("could not create dag-pb universal block from UnixFS directory root: %w", err) + } + pn, ok := dirRootNd.(*merkledag.ProtoNode) + if !ok { + return fmt.Errorf("could not create dag-pb node from UnixFS directory root: %w", err) + } + + sz, err := pn.Size() + if err != nil { + return fmt.Errorf("could not get cumulative size from dag-pb node: %w", err) + } + + n = NewHeadResponseForDirectory(int64(sz)) + return nil + case ufsData.Data_Symlink: + fd := unixfsFieldData.FieldData() + if fd.Exists() { + n = NewHeadResponseForSymlink(int64(len(fd.Must().Bytes()))) + return nil + } + // If there is no target then it's invalid so just return the block + NewHeadResponseForFile(files.NewBytesFile(dataBytes), int64(len(dataBytes))) + return nil + case ufsData.Data_Metadata: + n = NewHeadResponseForFile(files.NewBytesFile(dataBytes), int64(len(dataBytes))) + return nil + case ufsData.Data_Raw, ufsData.Data_File: + ufsNode, err := unixfsnode.Reify(lctx, pbn, lsys) + if err != nil { + return err + } + fileNode, ok := ufsNode.(datamodel.LargeBytesNode) + if !ok { + return fmt.Errorf("data not a large bytes node despite being UnixFS bytes") + } + f, err := fileNode.AsLargeBytes() + if err != nil { + return err + } + + fileSize, err := f.Seek(0, io.SeekEnd) + if err != nil { + return fmt.Errorf("unable to get UnixFS file size: %w", err) + } + _, err = f.Seek(0, io.SeekStart) + if err != nil { + return fmt.Errorf("unable to get reset UnixFS file reader: %w", err) + } + + out, err := io.ReadAll(io.LimitReader(f, 3072)) + if errors.Is(err, io.EOF) { + n = NewHeadResponseForFile(files.NewBytesFile(out), fileSize) + return nil + } + return err + } + } + return nil + }) + + if err != nil { + return ContentPathMetadata{}, nil, err + } + + return md, n, nil +} + +func (api *GraphGateway) ResolvePath(ctx context.Context, path path.ImmutablePath) (ContentPathMetadata, error) { + api.metrics.carParamsMetric.With(prometheus.Labels{"dagScope": "block", "entityRanges": "0"}).Inc() + + var md ContentPathMetadata + err := api.fetchCAR(ctx, path, CarParams{Scope: DagScopeBlock}, func(resource string, reader io.Reader) error { + gb, err := carToLinearBlockGetter(ctx, reader, api.metrics) + if err != nil { + return err + } + lsys := getLinksystem(gb) + + // First resolve the path since we always need to. + // FIXME(HACDIAS): p := ipfspath.FromString(path.String()) + p := path + pathRoots, terminalCid, remainder, _, err := resolvePathToLastWithRoots(ctx, p, lsys) + if err != nil { + return err + } + + md, err = contentMetadataFromRootsAndRemainder(p, pathRoots, terminalCid, remainder) + + return err + }) + + if err != nil { + return ContentPathMetadata{}, err + } + + return md, nil +} + +func (api *GraphGateway) GetCAR(ctx context.Context, path path.ImmutablePath, params CarParams) (ContentPathMetadata, io.ReadCloser, error) { + numRanges := "0" + if params.Range != nil { + numRanges = "1" + } + api.metrics.carParamsMetric.With(prometheus.Labels{"dagScope": string(params.Scope), "entityRanges": numRanges}).Inc() + rootCid, err := getRootCid(path) + if err != nil { + return ContentPathMetadata{}, nil, err + } + // FIXME(HACDIAS): p := ipfspath.FromString(path.String()) + p := path + + switch params.Order { + case DagOrderUnspecified, DagOrderUnknown, DagOrderDFS: + default: + return ContentPathMetadata{}, nil, fmt.Errorf("unsupported dag order %q", params.Order) + } + + r, w := io.Pipe() + go func() { + numBlocksSent := 0 + var cw storage.WritableCar + var blockBuffer []blocks.Block + err = api.fetchCAR(ctx, path, params, func(resource string, reader io.Reader) error { + numBlocksThisCall := 0 + gb, err := carToLinearBlockGetter(ctx, reader, api.metrics) + if err != nil { + return err + } + teeBlock := func(ctx context.Context, c cid.Cid) (blocks.Block, error) { + blk, err := gb(ctx, c) + if err != nil { + return nil, err + } + if numBlocksThisCall >= numBlocksSent { + if cw == nil { + blockBuffer = append(blockBuffer, blk) + } else { + err = cw.Put(ctx, blk.Cid().KeyString(), blk.RawData()) + if err != nil { + return nil, fmt.Errorf("error writing car block: %w", err) + } + } + numBlocksSent++ + } + numBlocksThisCall++ + return blk, nil + } + l := getLinksystem(teeBlock) + + // First resolve the path since we always need to. + _, terminalCid, remainder, terminalBlk, err := resolvePathWithRootsAndBlock(ctx, p, l) + if err != nil { + return err + } + if len(remainder) > 0 { + return nil + } + + if cw == nil { + cw, err = storage.NewWritable(w, []cid.Cid{terminalCid}, carv2.WriteAsCarV1(true), carv2.AllowDuplicatePuts(params.Duplicates.Bool())) + if err != nil { + // io.PipeWriter.CloseWithError always returns nil. + _ = w.CloseWithError(err) + return nil + } + for _, blk := range blockBuffer { + err = cw.Put(ctx, blk.Cid().KeyString(), blk.RawData()) + if err != nil { + _ = w.CloseWithError(fmt.Errorf("error writing car block: %w", err)) + return nil + } + } + blockBuffer = nil + } + + err = walkGatewaySimpleSelector2(ctx, terminalBlk, params.Scope, params.Range, l) + if err != nil { + return err + } + return nil + }) + + _ = w.CloseWithError(err) + }() + + return ContentPathMetadata{ + // PathSegmentRoots: []cid.Cid{rootCid}, + PathSegmentRoots: nil, // FIXME(hacdias): originala bove + LastSegment: ipfspath.FromCid(rootCid), + ContentType: "", + }, r, nil +} + +func getRootCid(imPath path.ImmutablePath) (cid.Cid, error) { + imPathStr := imPath.String() + if !strings.HasPrefix(imPathStr, "/ipfs/") { + return cid.Undef, fmt.Errorf("path does not have /ipfs/ prefix") + } + + firstSegment, _, _ := strings.Cut(imPathStr[6:], "/") + rootCid, err := cid.Decode(firstSegment) + if err != nil { + return cid.Undef, err + } + + return rootCid, nil +} + +func (api *GraphGateway) IsCached(ctx context.Context, path path.Path) bool { + return false +} + +var _ IPFSBackend = (*GraphGateway)(nil) + +func checkRetryableError(e *error, fn func() error) error { + err := fn() + retry, processedErr := isRetryableError(err) + if retry { + return processedErr + } + *e = processedErr + return nil +} + +func isRetryableError(err error) (bool, error) { + if errors.Is(err, ErrFetcherUnexpectedEOF) { + return false, err + } + + if format.IsNotFound(err) { + return true, err + } + initialErr := err + + // Checks if err is of a type that does not implement the .Is interface and + // cannot be directly compared to. Therefore, errors.Is cannot be used. + for { + _, ok := err.(*resolver.ErrNoLink) + if ok { + return false, err + } + + _, ok = err.(datamodel.ErrWrongKind) + if ok { + return false, err + } + + _, ok = err.(datamodel.ErrNotExists) + if ok { + return false, err + } + + errNoSuchField, ok := err.(schema.ErrNoSuchField) + if ok { + // Convert into a more general error type so the gateway code can know what this means + // TODO: Have either a more generally usable error type system for IPLD errors (e.g. a base type indicating that data cannot exist) + // or at least have one that is specific to the gateway consumer and part of the Backend contract instead of this being implicit + err = datamodel.ErrNotExists{Segment: errNoSuchField.Field} + return false, err + } + + err = errors.Unwrap(err) + if err == nil { + return true, initialErr + } + } +} diff --git a/gateway/backend_graph_files.go b/gateway/backend_graph_files.go new file mode 100644 index 000000000..740b3f2e2 --- /dev/null +++ b/gateway/backend_graph_files.go @@ -0,0 +1,533 @@ +package gateway + +import ( + "bytes" + "context" + "fmt" + "io" + + "github.com/ipfs/boxo/files" + blocks "github.com/ipfs/go-block-format" + "github.com/ipfs/go-cid" + "github.com/ipfs/go-unixfsnode" + ufsData "github.com/ipfs/go-unixfsnode/data" + "github.com/ipfs/go-unixfsnode/hamt" + dagpb "github.com/ipld/go-codec-dagpb" + "github.com/ipld/go-ipld-prime" + "github.com/ipld/go-ipld-prime/datamodel" + cidlink "github.com/ipld/go-ipld-prime/linking/cid" + "github.com/ipld/go-ipld-prime/node/basicnode" + "github.com/ipld/go-ipld-prime/schema" + "github.com/multiformats/go-multicodec" +) + +type AwaitCloser interface { + AwaitClose() <-chan error +} + +type backpressuredFile struct { + size int64 + f io.ReadSeeker + getLsys lsysGetter + + ctx context.Context + fileCid cid.Cid + byteRange DagByteRange + retErr error + + closed chan error +} + +func (b *backpressuredFile) AwaitClose() <-chan error { + return b.closed +} + +func (b *backpressuredFile) Close() error { + close(b.closed) + return nil +} + +func (b *backpressuredFile) Size() (int64, error) { + return b.size, nil +} + +func (b *backpressuredFile) Read(p []byte) (n int, err error) { + if b.retErr == nil { + n, err = b.f.Read(p) + if err == nil || err == io.EOF { + return n, err + } + + if n > 0 { + b.retErr = err + return n, nil + } + } else { + err = b.retErr + } + + from, seekErr := b.f.Seek(0, io.SeekCurrent) + if seekErr != nil { + // Return the seek error since by this point seeking failures like this should be impossible + return 0, seekErr + } + + // we had an error while reading so attempt to reset the underlying reader + for { + if b.ctx.Err() != nil { + return 0, b.ctx.Err() + } + + retry, processedErr := isRetryableError(err) + if !retry { + return 0, processedErr + } + + var nd files.Node + nd, err = loadTerminalUnixFSElementWithRecursiveDirectories(b.ctx, b.fileCid, nil, nil, CarParams{Scope: DagScopeEntity, Range: &DagByteRange{From: from, To: b.byteRange.To}}, b.getLsys) + if err != nil { + continue + } + + f, ok := nd.(files.File) + if !ok { + return 0, fmt.Errorf("not a file, should be unreachable") + } + + b.f = f + break + } + + // now that we've reset the reader try reading again + return b.Read(p) +} + +func (b *backpressuredFile) Seek(offset int64, whence int) (int64, error) { + return b.f.Seek(offset, whence) +} + +var _ files.File = (*backpressuredFile)(nil) +var _ AwaitCloser = (*backpressuredFile)(nil) + +type singleUseDirectory struct { + dirIter files.DirIterator + closed chan error +} + +func (b *singleUseDirectory) AwaitClose() <-chan error { + return b.closed +} + +func (b *singleUseDirectory) Close() error { + close(b.closed) + return nil +} + +func (b *singleUseDirectory) Size() (int64, error) { + //TODO implement me + panic("implement me") +} + +func (b *singleUseDirectory) Entries() files.DirIterator { + return b.dirIter +} + +var _ files.Directory = (*singleUseDirectory)(nil) +var _ AwaitCloser = (*singleUseDirectory)(nil) + +type backpressuredFlatDirIter struct { + linksItr *dagpb.PBLinks__Itr + lsys *ipld.LinkSystem + getLsys lsysGetter + ctx context.Context + + curName string + curFile files.Node + + err error +} + +func (it *backpressuredFlatDirIter) Name() string { + return it.curName +} + +func (it *backpressuredFlatDirIter) Node() files.Node { + return it.curFile +} + +func (it *backpressuredFlatDirIter) Next() bool { + if it.err != nil { + return false + } + + iter := it.linksItr + if iter.Done() { + return false + } + + _, v := iter.Next() + c := v.Hash.Link().(cidlink.Link).Cid + var name string + if v.Name.Exists() { + name = v.Name.Must().String() + } + + var nd files.Node + var err error + params := CarParams{Scope: DagScopeAll} + for { + if it.ctx.Err() != nil { + it.err = it.ctx.Err() + return false + } + if err != nil { + it.lsys, err = it.getLsys(it.ctx, c, params) + continue + } + nd, err = loadTerminalUnixFSElementWithRecursiveDirectories(it.ctx, c, nil, it.lsys, params, it.getLsys) + if err != nil { + if ctxErr := it.ctx.Err(); ctxErr != nil { + continue + } + retry, processedErr := isRetryableError(err) + if retry { + err = processedErr + continue + } + it.err = processedErr + return false + } + break + } + + it.curName = name + it.curFile = nd + return true +} + +func (it *backpressuredFlatDirIter) Err() error { + return it.err +} + +var _ files.DirIterator = (*backpressuredFlatDirIter)(nil) + +type backpressuredHAMTDirIter struct { + linksItr ipld.MapIterator + dirCid cid.Cid + + lsys *ipld.LinkSystem + getLsys lsysGetter + ctx context.Context + + curName string + curFile files.Node + curProcessed int + + err error +} + +func (it *backpressuredHAMTDirIter) Name() string { + return it.curName +} + +func (it *backpressuredHAMTDirIter) Node() files.Node { + return it.curFile +} + +func (it *backpressuredHAMTDirIter) Next() bool { + if it.err != nil { + return false + } + + iter := it.linksItr + if iter.Done() { + return false + } + + /* + Since there is no way to make a graph request for part of a HAMT during errors we can either fill in the HAMT with + block requests, or we can re-request the HAMT and skip over the parts we already have. + + Here we choose the latter, however in the event of a re-request we request the entity rather than the entire DAG as + a compromise between more requests and over-fetching data. + */ + + var err error + for { + if it.ctx.Err() != nil { + it.err = it.ctx.Err() + return false + } + + retry, processedErr := isRetryableError(err) + if !retry { + it.err = processedErr + return false + } + + var nd ipld.Node + if err != nil { + var lsys *ipld.LinkSystem + lsys, err = it.getLsys(it.ctx, it.dirCid, CarParams{Scope: DagScopeEntity}) + if err != nil { + continue + } + + _, pbn, ufsFieldData, _, ufsBaseErr := loadUnixFSBase(it.ctx, it.dirCid, nil, lsys) + if ufsBaseErr != nil { + err = ufsBaseErr + continue + } + + nd, err = hamt.NewUnixFSHAMTShard(it.ctx, pbn, ufsFieldData, lsys) + if err != nil { + err = fmt.Errorf("could not reify sharded directory: %w", err) + continue + } + + iter = nd.MapIterator() + for i := 0; i < it.curProcessed; i++ { + _, _, err = iter.Next() + if err != nil { + continue + } + } + + it.linksItr = iter + } + + var k, v ipld.Node + k, v, err = iter.Next() + if err != nil { + retry, processedErr = isRetryableError(err) + if retry { + err = processedErr + continue + } + it.err = processedErr + return false + } + + var name string + name, err = k.AsString() + if err != nil { + it.err = err + return false + } + var lnk ipld.Link + lnk, err = v.AsLink() + if err != nil { + it.err = err + return false + } + + cl, ok := lnk.(cidlink.Link) + if !ok { + it.err = fmt.Errorf("link not a cidlink") + return false + } + + c := cl.Cid + params := CarParams{Scope: DagScopeAll} + var childNd files.Node + for { + if it.ctx.Err() != nil { + it.err = it.ctx.Err() + return false + } + + if err != nil { + retry, processedErr = isRetryableError(err) + if !retry { + it.err = processedErr + return false + } + + it.lsys, err = it.getLsys(it.ctx, c, params) + continue + } + + childNd, err = loadTerminalUnixFSElementWithRecursiveDirectories(it.ctx, c, nil, it.lsys, params, it.getLsys) + if err != nil { + continue + } + break + } + + it.curName = name + it.curFile = childNd + it.curProcessed++ + break + } + + return true +} + +func (it *backpressuredHAMTDirIter) Err() error { + return it.err +} + +var _ files.DirIterator = (*backpressuredHAMTDirIter)(nil) + +/* +1. Run traversal to get the top-level response +2. Response can do a callback for another response +*/ + +type lsysGetter = func(ctx context.Context, c cid.Cid, params CarParams) (*ipld.LinkSystem, error) + +func loadUnixFSBase(ctx context.Context, c cid.Cid, blk blocks.Block, lsys *ipld.LinkSystem) ([]byte, dagpb.PBNode, ufsData.UnixFSData, int64, error) { + lctx := ipld.LinkContext{Ctx: ctx} + pathTerminalCidLink := cidlink.Link{Cid: c} + + var blockData []byte + var err error + + if blk != nil { + blockData = blk.RawData() + } else { + blockData, err = lsys.LoadRaw(lctx, pathTerminalCidLink) + if err != nil { + return nil, nil, nil, 0, err + } + } + + if c.Type() == uint64(multicodec.Raw) { + return blockData, nil, nil, 0, nil + } + + // decode the terminal block into a node + pc := dagpb.AddSupportToChooser(func(lnk ipld.Link, lnkCtx ipld.LinkContext) (ipld.NodePrototype, error) { + if tlnkNd, ok := lnkCtx.LinkNode.(schema.TypedLinkNode); ok { + return tlnkNd.LinkTargetNodePrototype(), nil + } + return basicnode.Prototype.Any, nil + }) + + np, err := pc(pathTerminalCidLink, lctx) + if err != nil { + return nil, nil, nil, 0, err + } + + decoder, err := lsys.DecoderChooser(pathTerminalCidLink) + if err != nil { + return nil, nil, nil, 0, err + } + nb := np.NewBuilder() + if err := decoder(nb, bytes.NewReader(blockData)); err != nil { + return nil, nil, nil, 0, err + } + lastCidNode := nb.Build() + + if pbn, ok := lastCidNode.(dagpb.PBNode); !ok { + // If it's not valid dag-pb then we're done + return nil, nil, nil, 0, errNotUnixFS + } else if !pbn.FieldData().Exists() { + // If it's not valid UnixFS then we're done + return nil, nil, nil, 0, errNotUnixFS + } else if unixfsFieldData, decodeErr := ufsData.DecodeUnixFSData(pbn.Data.Must().Bytes()); decodeErr != nil { + return nil, nil, nil, 0, errNotUnixFS + } else { + switch fieldNum := unixfsFieldData.FieldDataType().Int(); fieldNum { + case ufsData.Data_Symlink, ufsData.Data_Metadata, ufsData.Data_Raw, ufsData.Data_File, ufsData.Data_Directory, ufsData.Data_HAMTShard: + return nil, pbn, unixfsFieldData, fieldNum, nil + default: + return nil, nil, nil, 0, errNotUnixFS + } + } +} + +func loadTerminalUnixFSElementWithRecursiveDirectories(ctx context.Context, c cid.Cid, blk blocks.Block, lsys *ipld.LinkSystem, params CarParams, getLsys lsysGetter) (files.Node, error) { + var err error + if lsys == nil { + lsys, err = getLsys(ctx, c, params) + if err != nil { + return nil, err + } + } + + lctx := ipld.LinkContext{Ctx: ctx} + blockData, pbn, ufsFieldData, fieldNum, err := loadUnixFSBase(ctx, c, blk, lsys) + if err != nil { + return nil, err + } + + if c.Type() == uint64(multicodec.Raw) { + return files.NewBytesFile(blockData), nil + } + + switch fieldNum { + case ufsData.Data_Symlink: + if !ufsFieldData.FieldData().Exists() { + return nil, fmt.Errorf("invalid UnixFS symlink object") + } + lnkTarget := string(ufsFieldData.FieldData().Must().Bytes()) + f := files.NewLinkFile(lnkTarget, nil) + return f, nil + case ufsData.Data_Metadata: + return nil, fmt.Errorf("UnixFS Metadata unsupported") + case ufsData.Data_HAMTShard, ufsData.Data_Directory: + switch fieldNum { + case ufsData.Data_Directory: + d := &singleUseDirectory{&backpressuredFlatDirIter{ + ctx: ctx, + linksItr: pbn.Links.Iterator(), + lsys: lsys, + getLsys: getLsys, + }, make(chan error)} + return d, nil + case ufsData.Data_HAMTShard: + dirNd, err := unixfsnode.Reify(lctx, pbn, lsys) + if err != nil { + return nil, fmt.Errorf("could not reify sharded directory: %w", err) + } + + d := &singleUseDirectory{ + &backpressuredHAMTDirIter{ + linksItr: dirNd.MapIterator(), + dirCid: c, + lsys: lsys, + getLsys: getLsys, + ctx: ctx, + }, make(chan error), + } + return d, nil + default: + return nil, fmt.Errorf("not a basic or HAMT directory: should be unreachable") + } + case ufsData.Data_Raw, ufsData.Data_File: + nd, err := unixfsnode.Reify(lctx, pbn, lsys) + if err != nil { + return nil, err + } + + fnd, ok := nd.(datamodel.LargeBytesNode) + if !ok { + return nil, fmt.Errorf("could not process file since it did not present as large bytes") + } + f, err := fnd.AsLargeBytes() + if err != nil { + return nil, err + } + + fileSize, err := f.Seek(0, io.SeekEnd) + if err != nil { + return nil, fmt.Errorf("unable to get UnixFS file size: %w", err) + } + + from := int64(0) + var byteRange DagByteRange + if params.Range != nil { + byteRange = *params.Range + from = params.Range.From + } + _, err = f.Seek(from, io.SeekStart) + if err != nil { + return nil, fmt.Errorf("unable to get reset UnixFS file reader: %w", err) + } + + return &backpressuredFile{ctx: ctx, fileCid: c, byteRange: byteRange, size: fileSize, f: f, getLsys: getLsys, closed: make(chan error)}, nil + default: + return nil, fmt.Errorf("unknown UnixFS field type") + } +} diff --git a/gateway/backend_graph_test.go b/gateway/backend_graph_test.go new file mode 100644 index 000000000..da015d83f --- /dev/null +++ b/gateway/backend_graph_test.go @@ -0,0 +1,1207 @@ +package gateway + +import ( + "bytes" + "context" + "errors" + "fmt" + "io" + "net/http" + "net/http/httptest" + "testing" + + _ "embed" + + "github.com/ipfs/boxo/blockservice" + "github.com/ipfs/boxo/exchange/offline" + "github.com/ipfs/boxo/files" + "github.com/ipfs/boxo/ipld/merkledag" + unixfile "github.com/ipfs/boxo/ipld/unixfs/file" + "github.com/ipfs/boxo/path" + "github.com/ipfs/go-cid" + carv2 "github.com/ipld/go-car/v2" + carbs "github.com/ipld/go-car/v2/blockstore" + "github.com/ipld/go-car/v2/storage" + "github.com/stretchr/testify/require" +) + +//go:embed testdata/directory-with-multilayer-hamt-and-multiblock-files.car +var dirWithMultiblockHAMTandFiles []byte + +func TestTar(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + requestNum := 0 + s := httptest.NewServer(http.HandlerFunc(func(writer http.ResponseWriter, request *http.Request) { + requestNum++ + switch requestNum { + case 1: + // Expect the full request, but return one that terminates in the middle of the HAMT + expectedUri := "/ipfs/bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi" + if request.URL.Path != expectedUri { + panic(fmt.Errorf("expected URI %s, got %s", expectedUri, request.RequestURI)) + } + + if err := sendBlocks(ctx, dirWithMultiblockHAMTandFiles, writer, []string{ + "bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi", // root dir + "bafybeifdv255wmsrh75vcsrtkcwyktvewgihegeeyhhj2ju4lzt4lqfoze", // basicDir + "bafybeigcisqd7m5nf3qmuvjdbakl5bdnh4ocrmacaqkpuh77qjvggmt2sa", // exampleA + "bafkreie5noke3mb7hqxukzcy73nl23k6lxszxi5w3dtmuwz62wnvkpsscm", + "bafkreih4ephajybraj6wnxsbwjwa77fukurtpl7oj7t7pfq545duhot7cq", + "bafkreigu7buvm3cfunb35766dn7tmqyh2um62zcio63en2btvxuybgcpue", + "bafkreicll3huefkc3qnrzeony7zcfo7cr3nbx64hnxrqzsixpceg332fhe", + "bafkreifst3pqztuvj57lycamoi7z34b4emf7gawxs74nwrc2c7jncmpaqm", + "bafybeid3trcauvcp7fxaai23gkz3qexmlfxnnejgwm57hdvre472dafvha", // exampleB + "bafkreihgbi345degbcyxaf5b3boiyiaxhnuxdysvqmbdyaop2swmhh3s3m", + "bafkreiaugmh5gal5rgiems6gslcdt2ixfncahintrmcqvrgxqamwtlrmz4", + "bafkreiaxwwb7der2qvmteymgtlj7ww7w5vc44phdxfnexog3vnuwdkxuea", + "bafkreic5zyan5rk4ccfum4d4mu3h5eqsllbudlj4texlzj6xdgxvldzngi", + "bafybeignui4g7l6cvqyy4t6vnbl2fjtego4ejmpcia77jhhwnksmm4bejm", // hamtDir + }); err != nil { + panic(err) + } + case 2: + // Expect a request for the HAMT only and give it + // Note: this is an implementation detail, it could be in the future that we request less or more data + // (e.g. requesting the blocks to fill out the HAMT, or with spec changes asking for HAMT ranges, or asking for the HAMT and its children) + expectedUri := "/ipfs/bafybeignui4g7l6cvqyy4t6vnbl2fjtego4ejmpcia77jhhwnksmm4bejm" + if request.URL.Path != expectedUri { + panic(fmt.Errorf("expected URI %s, got %s", expectedUri, request.RequestURI)) + } + + if err := sendBlocks(ctx, dirWithMultiblockHAMTandFiles, writer, []string{ + "bafybeignui4g7l6cvqyy4t6vnbl2fjtego4ejmpcia77jhhwnksmm4bejm", // hamtDir + "bafybeiccgo7euew77gkqkhezn3pozfrciiibqz2u3spdqmgjvd5wqskipm", + "bafybeihjydob4eq5j4m43whjgf5cgftthc42kjno3g24sa3wcw7vonbmfy", + }); err != nil { + panic(err) + } + case 3: + // Starting here expect requests for each file in the directory + expectedUri := "/ipfs/bafybeid3trcauvcp7fxaai23gkz3qexmlfxnnejgwm57hdvre472dafvha" + if request.URL.Path != expectedUri { + panic(fmt.Errorf("expected URI %s, got %s", expectedUri, request.RequestURI)) + } + + if err := sendBlocks(ctx, dirWithMultiblockHAMTandFiles, writer, []string{ + "bafybeid3trcauvcp7fxaai23gkz3qexmlfxnnejgwm57hdvre472dafvha", // exampleB + "bafkreihgbi345degbcyxaf5b3boiyiaxhnuxdysvqmbdyaop2swmhh3s3m", + "bafkreiaugmh5gal5rgiems6gslcdt2ixfncahintrmcqvrgxqamwtlrmz4", + "bafkreiaxwwb7der2qvmteymgtlj7ww7w5vc44phdxfnexog3vnuwdkxuea", + "bafkreic5zyan5rk4ccfum4d4mu3h5eqsllbudlj4texlzj6xdgxvldzngi", + }); err != nil { + panic(err) + } + case 4: + // Expect a request for one of the directory items and give it + expectedUri := "/ipfs/bafkreih2grj7p2bo5yk2guqazxfjzapv6hpm3mwrinv6s3cyayd72ke5he" + if request.URL.Path != expectedUri { + panic(fmt.Errorf("expected URI %s, got %s", expectedUri, request.RequestURI)) + } + + if err := sendBlocks(ctx, dirWithMultiblockHAMTandFiles, writer, []string{ + "bafkreih2grj7p2bo5yk2guqazxfjzapv6hpm3mwrinv6s3cyayd72ke5he", // exampleD + }); err != nil { + panic(err) + } + case 5: + // Expect a request for one of the directory items and give it + expectedUri := "/ipfs/bafkreidqhbqn5htm5qejxpb3hps7dookudo3nncfn6al6niqibi5lq6fee" + if request.URL.Path != expectedUri { + panic(fmt.Errorf("expected URI %s, got %s", expectedUri, request.RequestURI)) + } + + if err := sendBlocks(ctx, dirWithMultiblockHAMTandFiles, writer, []string{ + "bafkreidqhbqn5htm5qejxpb3hps7dookudo3nncfn6al6niqibi5lq6fee", // exampleC + }); err != nil { + panic(err) + } + case 6: + // Expect a request for one of the directory items and give part of it + expectedUri := "/ipfs/bafybeigcisqd7m5nf3qmuvjdbakl5bdnh4ocrmacaqkpuh77qjvggmt2sa" + if request.URL.Path != expectedUri { + panic(fmt.Errorf("expected URI %s, got %s", expectedUri, request.RequestURI)) + } + + if err := sendBlocks(ctx, dirWithMultiblockHAMTandFiles, writer, []string{ + "bafybeigcisqd7m5nf3qmuvjdbakl5bdnh4ocrmacaqkpuh77qjvggmt2sa", // exampleA + "bafkreie5noke3mb7hqxukzcy73nl23k6lxszxi5w3dtmuwz62wnvkpsscm", + "bafkreih4ephajybraj6wnxsbwjwa77fukurtpl7oj7t7pfq545duhot7cq", + }); err != nil { + panic(err) + } + case 7: + // Expect a partial request for one of the directory items and give it + expectedUri := "/ipfs/bafybeigcisqd7m5nf3qmuvjdbakl5bdnh4ocrmacaqkpuh77qjvggmt2sa" + if request.URL.Path != expectedUri { + panic(fmt.Errorf("expected URI %s, got %s", expectedUri, request.RequestURI)) + } + + if err := sendBlocks(ctx, dirWithMultiblockHAMTandFiles, writer, []string{ + "bafybeigcisqd7m5nf3qmuvjdbakl5bdnh4ocrmacaqkpuh77qjvggmt2sa", // exampleA + "bafkreigu7buvm3cfunb35766dn7tmqyh2um62zcio63en2btvxuybgcpue", + "bafkreicll3huefkc3qnrzeony7zcfo7cr3nbx64hnxrqzsixpceg332fhe", + "bafkreifst3pqztuvj57lycamoi7z34b4emf7gawxs74nwrc2c7jncmpaqm", + }); err != nil { + panic(err) + } + default: + t.Fatal("unsupported request number") + } + })) + defer s.Close() + + bs, err := newRemoteCarFetcher([]string{s.URL}) + require.NoError(t, err) + backend, err := NewGraphGatewayBackend(&retryFetcher{inner: bs.(CarFetcher), allowedRetries: 3, retriesRemaining: 3}) + if err != nil { + t.Fatal(err) + } + + p := path.FromCid(cid.MustParse("bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi")) + _, nd, err := backend.GetAll(ctx, p) + if err != nil { + t.Fatal(err) + } + + assertNextEntryNameEquals := func(t *testing.T, dirIter files.DirIterator, expectedName string) { + t.Helper() + if !dirIter.Next() { + iterErr := dirIter.Err() + t.Fatalf("expected entry, but errored with %s", iterErr.Error()) + } + if expectedName != dirIter.Name() { + t.Fatalf("expected %s, got %s", expectedName, dirIter.Name()) + } + } + + robs, err := carbs.NewReadOnly(bytes.NewReader(dirWithMultiblockHAMTandFiles), nil) + if err != nil { + t.Fatal(err) + } + + dsrv := merkledag.NewDAGService(blockservice.New(robs, offline.Exchange(robs))) + assertFileEqual := func(t *testing.T, expectedCidString string, receivedFile files.File) { + t.Helper() + + expected := cid.MustParse(expectedCidString) + receivedFileData, err := io.ReadAll(receivedFile) + if err != nil { + t.Fatal(err) + } + nd, err := dsrv.Get(ctx, expected) + if err != nil { + t.Fatal(err) + } + expectedFile, err := unixfile.NewUnixfsFile(ctx, dsrv, nd) + if err != nil { + t.Fatal(err) + } + + expectedFileData, err := io.ReadAll(expectedFile.(files.File)) + if err != nil { + t.Fatal(err) + } + if !bytes.Equal(expectedFileData, receivedFileData) { + t.Fatalf("expected %s, got %s", string(expectedFileData), string(receivedFileData)) + } + } + + rootDirIter := nd.(files.Directory).Entries() + assertNextEntryNameEquals(t, rootDirIter, "basicDir") + + basicDirIter := rootDirIter.Node().(files.Directory).Entries() + assertNextEntryNameEquals(t, basicDirIter, "exampleA") + assertFileEqual(t, "bafybeigcisqd7m5nf3qmuvjdbakl5bdnh4ocrmacaqkpuh77qjvggmt2sa", basicDirIter.Node().(files.File)) + + assertNextEntryNameEquals(t, basicDirIter, "exampleB") + assertFileEqual(t, "bafybeid3trcauvcp7fxaai23gkz3qexmlfxnnejgwm57hdvre472dafvha", basicDirIter.Node().(files.File)) + + assertNextEntryNameEquals(t, rootDirIter, "hamtDir") + hamtDirIter := rootDirIter.Node().(files.Directory).Entries() + + assertNextEntryNameEquals(t, hamtDirIter, "exampleB") + assertFileEqual(t, "bafybeid3trcauvcp7fxaai23gkz3qexmlfxnnejgwm57hdvre472dafvha", hamtDirIter.Node().(files.File)) + + assertNextEntryNameEquals(t, hamtDirIter, "exampleD-hamt-collide-exampleB-seed-364") + assertFileEqual(t, "bafkreih2grj7p2bo5yk2guqazxfjzapv6hpm3mwrinv6s3cyayd72ke5he", hamtDirIter.Node().(files.File)) + + assertNextEntryNameEquals(t, hamtDirIter, "exampleC-hamt-collide-exampleA-seed-52") + assertFileEqual(t, "bafkreidqhbqn5htm5qejxpb3hps7dookudo3nncfn6al6niqibi5lq6fee", hamtDirIter.Node().(files.File)) + + assertNextEntryNameEquals(t, hamtDirIter, "exampleA") + assertFileEqual(t, "bafybeigcisqd7m5nf3qmuvjdbakl5bdnh4ocrmacaqkpuh77qjvggmt2sa", hamtDirIter.Node().(files.File)) + + if rootDirIter.Next() || basicDirIter.Next() || hamtDirIter.Next() { + t.Fatal("expected directories to be fully enumerated") + } +} + +func TestTarAtEndOfPath(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + requestNum := 0 + s := httptest.NewServer(http.HandlerFunc(func(writer http.ResponseWriter, request *http.Request) { + requestNum++ + switch requestNum { + case 1: + // Expect the full request, but return one that terminates in the middle of the path + expectedUri := "/ipfs/bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi/hamtDir" + if request.URL.Path != expectedUri { + panic(fmt.Errorf("expected URI %s, got %s", expectedUri, request.RequestURI)) + } + + if err := sendBlocks(ctx, dirWithMultiblockHAMTandFiles, writer, []string{ + "bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi", // root dir + }); err != nil { + panic(err) + } + case 2: + // Expect the full request and give the path and the children from one of the HAMT nodes but not the other + // Note: this is an implementation detail, it could be in the future that we request less or more data + expectedUri := "/ipfs/bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi/hamtDir" + if request.URL.Path != expectedUri { + panic(fmt.Errorf("expected URI %s, got %s", expectedUri, request.RequestURI)) + } + + if err := sendBlocks(ctx, dirWithMultiblockHAMTandFiles, writer, []string{ + "bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi", // root dir + "bafybeignui4g7l6cvqyy4t6vnbl2fjtego4ejmpcia77jhhwnksmm4bejm", // hamtDir + "bafybeiccgo7euew77gkqkhezn3pozfrciiibqz2u3spdqmgjvd5wqskipm", + "bafybeid3trcauvcp7fxaai23gkz3qexmlfxnnejgwm57hdvre472dafvha", // exampleB + "bafkreihgbi345degbcyxaf5b3boiyiaxhnuxdysvqmbdyaop2swmhh3s3m", + "bafkreiaugmh5gal5rgiems6gslcdt2ixfncahintrmcqvrgxqamwtlrmz4", + "bafkreiaxwwb7der2qvmteymgtlj7ww7w5vc44phdxfnexog3vnuwdkxuea", + "bafkreic5zyan5rk4ccfum4d4mu3h5eqsllbudlj4texlzj6xdgxvldzngi", + "bafkreih2grj7p2bo5yk2guqazxfjzapv6hpm3mwrinv6s3cyayd72ke5he", // exampleD + }); err != nil { + panic(err) + } + case 3: + // Expect a request for the HAMT only and give it + // Note: this is an implementation detail, it could be in the future that we request less or more data + // (e.g. requesting the blocks to fill out the HAMT, or with spec changes asking for HAMT ranges, or asking for the HAMT and its children) + expectedUri := "/ipfs/bafybeignui4g7l6cvqyy4t6vnbl2fjtego4ejmpcia77jhhwnksmm4bejm" + if request.URL.Path != expectedUri { + panic(fmt.Errorf("expected URI %s, got %s", expectedUri, request.RequestURI)) + } + + if err := sendBlocks(ctx, dirWithMultiblockHAMTandFiles, writer, []string{ + "bafybeignui4g7l6cvqyy4t6vnbl2fjtego4ejmpcia77jhhwnksmm4bejm", // hamtDir + "bafybeiccgo7euew77gkqkhezn3pozfrciiibqz2u3spdqmgjvd5wqskipm", + "bafybeihjydob4eq5j4m43whjgf5cgftthc42kjno3g24sa3wcw7vonbmfy", + }); err != nil { + panic(err) + } + case 4: + // Expect a request for one of the directory items and give it + expectedUri := "/ipfs/bafkreidqhbqn5htm5qejxpb3hps7dookudo3nncfn6al6niqibi5lq6fee" + if request.URL.Path != expectedUri { + panic(fmt.Errorf("expected URI %s, got %s", expectedUri, request.RequestURI)) + } + + if err := sendBlocks(ctx, dirWithMultiblockHAMTandFiles, writer, []string{ + "bafkreidqhbqn5htm5qejxpb3hps7dookudo3nncfn6al6niqibi5lq6fee", // exampleC + }); err != nil { + panic(err) + } + case 5: + // Expect a request for the multiblock file in the directory and give some of it + expectedUri := "/ipfs/bafybeigcisqd7m5nf3qmuvjdbakl5bdnh4ocrmacaqkpuh77qjvggmt2sa" + if request.URL.Path != expectedUri { + panic(fmt.Errorf("expected URI %s, got %s", expectedUri, request.RequestURI)) + } + + if err := sendBlocks(ctx, dirWithMultiblockHAMTandFiles, writer, []string{ + "bafybeigcisqd7m5nf3qmuvjdbakl5bdnh4ocrmacaqkpuh77qjvggmt2sa", // exampleA + "bafkreie5noke3mb7hqxukzcy73nl23k6lxszxi5w3dtmuwz62wnvkpsscm", + "bafkreih4ephajybraj6wnxsbwjwa77fukurtpl7oj7t7pfq545duhot7cq", + "bafkreigu7buvm3cfunb35766dn7tmqyh2um62zcio63en2btvxuybgcpue", + }); err != nil { + panic(err) + } + case 6: + // Expect a request for the rest of the multiblock file in the directory and give it + expectedUri := "/ipfs/bafybeigcisqd7m5nf3qmuvjdbakl5bdnh4ocrmacaqkpuh77qjvggmt2sa?format=car&dag-scope=entity&entity-bytes=768:*" + if request.RequestURI != expectedUri { + panic(fmt.Errorf("expected URI %s, got %s", expectedUri, request.RequestURI)) + } + + if err := sendBlocks(ctx, dirWithMultiblockHAMTandFiles, writer, []string{ + "bafybeigcisqd7m5nf3qmuvjdbakl5bdnh4ocrmacaqkpuh77qjvggmt2sa", // exampleA + "bafkreicll3huefkc3qnrzeony7zcfo7cr3nbx64hnxrqzsixpceg332fhe", + "bafkreifst3pqztuvj57lycamoi7z34b4emf7gawxs74nwrc2c7jncmpaqm", + }); err != nil { + panic(err) + } + default: + t.Fatal("unsupported request number") + } + })) + defer s.Close() + + bs, err := newRemoteCarFetcher([]string{s.URL}) + require.NoError(t, err) + backend, err := NewGraphGatewayBackend(&retryFetcher{inner: bs.(CarFetcher), allowedRetries: 3, retriesRemaining: 3}) + if err != nil { + t.Fatal(err) + } + + p, err := path.Join(path.FromCid(cid.MustParse("bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi")), "hamtDir") + if err != nil { + t.Fatal(err) + } + + imPath, err := path.NewImmutablePath(p) + if err != nil { + t.Fatal(err) + } + + _, nd, err := backend.GetAll(ctx, imPath) + if err != nil { + t.Fatal(err) + } + + assertNextEntryNameEquals := func(t *testing.T, dirIter files.DirIterator, expectedName string) { + t.Helper() + if !dirIter.Next() { + t.Fatal("expected entry") + } + if expectedName != dirIter.Name() { + t.Fatalf("expected %s, got %s", expectedName, dirIter.Name()) + } + } + + robs, err := carbs.NewReadOnly(bytes.NewReader(dirWithMultiblockHAMTandFiles), nil) + if err != nil { + t.Fatal(err) + } + + dsrv := merkledag.NewDAGService(blockservice.New(robs, offline.Exchange(robs))) + assertFileEqual := func(t *testing.T, expectedCidString string, receivedFile files.File) { + t.Helper() + + expected := cid.MustParse(expectedCidString) + receivedFileData, err := io.ReadAll(receivedFile) + if err != nil { + t.Fatal(err) + } + nd, err := dsrv.Get(ctx, expected) + if err != nil { + t.Fatal(err) + } + expectedFile, err := unixfile.NewUnixfsFile(ctx, dsrv, nd) + if err != nil { + t.Fatal(err) + } + + expectedFileData, err := io.ReadAll(expectedFile.(files.File)) + if err != nil { + t.Fatal(err) + } + if !bytes.Equal(expectedFileData, receivedFileData) { + t.Fatalf("expected %s, got %s", string(expectedFileData), string(receivedFileData)) + } + } + + hamtDirIter := nd.(files.Directory).Entries() + + assertNextEntryNameEquals(t, hamtDirIter, "exampleB") + assertFileEqual(t, "bafybeid3trcauvcp7fxaai23gkz3qexmlfxnnejgwm57hdvre472dafvha", hamtDirIter.Node().(files.File)) + + assertNextEntryNameEquals(t, hamtDirIter, "exampleD-hamt-collide-exampleB-seed-364") + assertFileEqual(t, "bafkreih2grj7p2bo5yk2guqazxfjzapv6hpm3mwrinv6s3cyayd72ke5he", hamtDirIter.Node().(files.File)) + + assertNextEntryNameEquals(t, hamtDirIter, "exampleC-hamt-collide-exampleA-seed-52") + assertFileEqual(t, "bafkreidqhbqn5htm5qejxpb3hps7dookudo3nncfn6al6niqibi5lq6fee", hamtDirIter.Node().(files.File)) + + assertNextEntryNameEquals(t, hamtDirIter, "exampleA") + assertFileEqual(t, "bafybeigcisqd7m5nf3qmuvjdbakl5bdnh4ocrmacaqkpuh77qjvggmt2sa", hamtDirIter.Node().(files.File)) + + if hamtDirIter.Next() { + t.Fatal("expected directories to be fully enumerated") + } +} + +func sendBlocks(ctx context.Context, carFixture []byte, writer io.Writer, cidStrList []string) error { + rd, err := storage.OpenReadable(bytes.NewReader(carFixture)) + if err != nil { + return err + } + + cw, err := storage.NewWritable(writer, []cid.Cid{cid.MustParse("bafkqaaa")}, carv2.WriteAsCarV1(true), carv2.AllowDuplicatePuts(true)) + if err != nil { + return err + } + + for _, s := range cidStrList { + c := cid.MustParse(s) + blockData, err := rd.Get(ctx, c.KeyString()) + if err != nil { + return err + } + + if err := cw.Put(ctx, c.KeyString(), blockData); err != nil { + return err + } + } + return nil +} + +func TestGetFile(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + requestNum := 0 + s := httptest.NewServer(http.HandlerFunc(func(writer http.ResponseWriter, request *http.Request) { + requestNum++ + switch requestNum { + case 1: + // Expect the full request, but return one that terminates in the middle of the path + expectedUri := "/ipfs/bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi/hamtDir/exampleA" + if request.URL.Path != expectedUri { + panic(fmt.Errorf("expected URI %s, got %s", expectedUri, request.RequestURI)) + } + + if err := sendBlocks(ctx, dirWithMultiblockHAMTandFiles, writer, []string{ + "bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi", // root dir + }); err != nil { + panic(err) + } + case 2: + // Expect the full request, but return one that terminates in the middle of the file + // Note: this is an implementation detail, it could be in the future that we request less data (e.g. partial path) + expectedUri := "/ipfs/bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi/hamtDir/exampleA" + if request.URL.Path != expectedUri { + panic(fmt.Errorf("expected URI %s, got %s", expectedUri, request.RequestURI)) + } + + if err := sendBlocks(ctx, dirWithMultiblockHAMTandFiles, writer, []string{ + "bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi", // root dir + "bafybeignui4g7l6cvqyy4t6vnbl2fjtego4ejmpcia77jhhwnksmm4bejm", // hamt root + }); err != nil { + panic(err) + } + + case 3: + // Expect the full request and return the path and most of the file + // Note: this is an implementation detail, it could be in the future that we request less data (e.g. partial path and file range) + expectedUri := "/ipfs/bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi/hamtDir/exampleA" + if request.URL.Path != expectedUri { + panic(fmt.Errorf("expected URI %s, got %s", expectedUri, request.RequestURI)) + } + + if err := sendBlocks(ctx, dirWithMultiblockHAMTandFiles, writer, []string{ + "bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi", // root dir + "bafybeignui4g7l6cvqyy4t6vnbl2fjtego4ejmpcia77jhhwnksmm4bejm", // hamt root + "bafybeihjydob4eq5j4m43whjgf5cgftthc42kjno3g24sa3wcw7vonbmfy", // inner hamt + "bafybeigcisqd7m5nf3qmuvjdbakl5bdnh4ocrmacaqkpuh77qjvggmt2sa", // file root + "bafkreie5noke3mb7hqxukzcy73nl23k6lxszxi5w3dtmuwz62wnvkpsscm", // file chunks start here + "bafkreih4ephajybraj6wnxsbwjwa77fukurtpl7oj7t7pfq545duhot7cq", + }); err != nil { + panic(err) + } + + case 4: + // Expect a request for the remainder of the file + // Note: this is an implementation detail, it could be that the requester really asks for more information + expectedUri := "/ipfs/bafybeigcisqd7m5nf3qmuvjdbakl5bdnh4ocrmacaqkpuh77qjvggmt2sa" + if request.URL.Path != expectedUri { + panic(fmt.Errorf("expected URI %s, got %s", expectedUri, request.RequestURI)) + } + + if err := sendBlocks(ctx, dirWithMultiblockHAMTandFiles, writer, []string{ + "bafybeigcisqd7m5nf3qmuvjdbakl5bdnh4ocrmacaqkpuh77qjvggmt2sa", // file root + "bafkreigu7buvm3cfunb35766dn7tmqyh2um62zcio63en2btvxuybgcpue", // middle of the file starts here + "bafkreicll3huefkc3qnrzeony7zcfo7cr3nbx64hnxrqzsixpceg332fhe", + "bafkreifst3pqztuvj57lycamoi7z34b4emf7gawxs74nwrc2c7jncmpaqm", + }); err != nil { + panic(err) + } + + default: + t.Fatal("unsupported request number") + } + })) + defer s.Close() + + bs, err := newRemoteCarFetcher([]string{s.URL}) + require.NoError(t, err) + backend, err := NewGraphGatewayBackend(&retryFetcher{inner: bs.(CarFetcher), allowedRetries: 3, retriesRemaining: 3}) + if err != nil { + t.Fatal(err) + } + + trustedGatewayServer := httptest.NewServer(NewHandler(Config{DeserializedResponses: true}, backend)) + defer trustedGatewayServer.Close() + + resp, err := http.Get(trustedGatewayServer.URL + "/ipfs/bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi/hamtDir/exampleA") + if err != nil { + t.Fatal(err) + } + + data, err := io.ReadAll(resp.Body) + if err != nil { + t.Fatal(err) + } + + robs, err := carbs.NewReadOnly(bytes.NewReader(dirWithMultiblockHAMTandFiles), nil) + if err != nil { + t.Fatal(err) + } + + dsrv := merkledag.NewDAGService(blockservice.New(robs, offline.Exchange(robs))) + fileRootNd, err := dsrv.Get(ctx, cid.MustParse("bafybeigcisqd7m5nf3qmuvjdbakl5bdnh4ocrmacaqkpuh77qjvggmt2sa")) + if err != nil { + t.Fatal(err) + } + uio, err := unixfile.NewUnixfsFile(ctx, dsrv, fileRootNd) + if err != nil { + t.Fatal(err) + } + f := uio.(files.File) + expectedFileData, err := io.ReadAll(f) + if err != nil { + t.Fatal(err) + } + + if !bytes.Equal(data, expectedFileData) { + t.Fatalf("expected %s, got %s", string(expectedFileData), string(data)) + } +} + +func TestGetFileRangeRequest(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + requestNum := 0 + s := httptest.NewServer(http.HandlerFunc(func(writer http.ResponseWriter, request *http.Request) { + requestNum++ + switch requestNum { + case 1: + // Expect the full request, but return one that terminates at the root block + expectedUri := "/ipfs/bafybeigcisqd7m5nf3qmuvjdbakl5bdnh4ocrmacaqkpuh77qjvggmt2sa" + if request.URL.Path != expectedUri { + panic(fmt.Errorf("expected URI %s, got %s", expectedUri, request.RequestURI)) + } + + if err := sendBlocks(ctx, dirWithMultiblockHAMTandFiles, writer, []string{ + "bafybeigcisqd7m5nf3qmuvjdbakl5bdnh4ocrmacaqkpuh77qjvggmt2sa", // file root + }); err != nil { + panic(err) + } + case 2: + // Expect the full request, and return the whole file which should be invalid + expectedUri := "/ipfs/bafybeigcisqd7m5nf3qmuvjdbakl5bdnh4ocrmacaqkpuh77qjvggmt2sa" + if request.URL.Path != expectedUri { + panic(fmt.Errorf("expected URI %s, got %s", expectedUri, request.RequestURI)) + } + + if err := sendBlocks(ctx, dirWithMultiblockHAMTandFiles, writer, []string{ + "bafybeigcisqd7m5nf3qmuvjdbakl5bdnh4ocrmacaqkpuh77qjvggmt2sa", // file root + "bafkreie5noke3mb7hqxukzcy73nl23k6lxszxi5w3dtmuwz62wnvkpsscm", // file chunks start here + "bafkreih4ephajybraj6wnxsbwjwa77fukurtpl7oj7t7pfq545duhot7cq", + "bafkreigu7buvm3cfunb35766dn7tmqyh2um62zcio63en2btvxuybgcpue", + "bafkreicll3huefkc3qnrzeony7zcfo7cr3nbx64hnxrqzsixpceg332fhe", + "bafkreifst3pqztuvj57lycamoi7z34b4emf7gawxs74nwrc2c7jncmpaqm", + }); err != nil { + panic(err) + } + case 3: + // Expect the full request and return the first block + expectedUri := "/ipfs/bafybeigcisqd7m5nf3qmuvjdbakl5bdnh4ocrmacaqkpuh77qjvggmt2sa" + if request.URL.Path != expectedUri { + panic(fmt.Errorf("expected URI %s, got %s", expectedUri, request.RequestURI)) + } + + if err := sendBlocks(ctx, dirWithMultiblockHAMTandFiles, writer, []string{ + "bafybeigcisqd7m5nf3qmuvjdbakl5bdnh4ocrmacaqkpuh77qjvggmt2sa", // file root + "bafkreih4ephajybraj6wnxsbwjwa77fukurtpl7oj7t7pfq545duhot7cq", + }); err != nil { + panic(err) + } + + case 4: + // Expect a request for the remainder of the file + // Note: this is an implementation detail, it could be that the requester really asks for more information + expectedUri := "/ipfs/bafybeigcisqd7m5nf3qmuvjdbakl5bdnh4ocrmacaqkpuh77qjvggmt2sa" + if request.URL.Path != expectedUri { + panic(fmt.Errorf("expected URI %s, got %s", expectedUri, request.RequestURI)) + } + + if err := sendBlocks(ctx, dirWithMultiblockHAMTandFiles, writer, []string{ + "bafybeigcisqd7m5nf3qmuvjdbakl5bdnh4ocrmacaqkpuh77qjvggmt2sa", // file root + "bafkreigu7buvm3cfunb35766dn7tmqyh2um62zcio63en2btvxuybgcpue", + "bafkreicll3huefkc3qnrzeony7zcfo7cr3nbx64hnxrqzsixpceg332fhe", + }); err != nil { + panic(err) + } + + default: + t.Fatal("unsupported request number") + } + })) + defer s.Close() + + bs, err := newRemoteCarFetcher([]string{s.URL}) + require.NoError(t, err) + backend, err := NewGraphGatewayBackend(&retryFetcher{inner: bs.(CarFetcher), allowedRetries: 3, retriesRemaining: 3}) + if err != nil { + t.Fatal(err) + } + + trustedGatewayServer := httptest.NewServer(NewHandler(Config{DeserializedResponses: true}, backend)) + defer trustedGatewayServer.Close() + + req, err := http.NewRequestWithContext(ctx, "GET", trustedGatewayServer.URL+"/ipfs/bafybeigcisqd7m5nf3qmuvjdbakl5bdnh4ocrmacaqkpuh77qjvggmt2sa", nil) + if err != nil { + t.Fatal(err) + } + startIndex := 256 + endIndex := 750 + req.Header.Set("Range", fmt.Sprintf("bytes=%d-%d", startIndex, endIndex)) + resp, err := http.DefaultClient.Do(req) + if err != nil { + t.Fatal(err) + } + + data, err := io.ReadAll(resp.Body) + if err != nil { + t.Fatal(err) + } + + robs, err := carbs.NewReadOnly(bytes.NewReader(dirWithMultiblockHAMTandFiles), nil) + if err != nil { + t.Fatal(err) + } + + dsrv := merkledag.NewDAGService(blockservice.New(robs, offline.Exchange(robs))) + fileRootNd, err := dsrv.Get(ctx, cid.MustParse("bafybeigcisqd7m5nf3qmuvjdbakl5bdnh4ocrmacaqkpuh77qjvggmt2sa")) + if err != nil { + t.Fatal(err) + } + uio, err := unixfile.NewUnixfsFile(ctx, dsrv, fileRootNd) + if err != nil { + t.Fatal(err) + } + f := uio.(files.File) + if _, err := f.Seek(int64(startIndex), io.SeekStart); err != nil { + t.Fatal(err) + } + expectedFileData, err := io.ReadAll(io.LimitReader(f, int64(endIndex)-int64(startIndex)+1)) + if err != nil { + t.Fatal(err) + } + + if !bytes.Equal(data, expectedFileData) { + t.Fatalf("expected %s, got %s", string(expectedFileData), string(data)) + } + + if requestNum != 4 { + t.Fatalf("expected exactly 4 requests, got %d", requestNum) + } +} + +func TestGetFileWithBadBlockReturned(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + requestNum := 0 + s := httptest.NewServer(http.HandlerFunc(func(writer http.ResponseWriter, request *http.Request) { + requestNum++ + switch requestNum { + case 1: + // Expect the full request, but return one that terminates at the root block + expectedUri := "/ipfs/bafybeigcisqd7m5nf3qmuvjdbakl5bdnh4ocrmacaqkpuh77qjvggmt2sa" + if request.URL.Path != expectedUri { + panic(fmt.Errorf("expected URI %s, got %s", expectedUri, request.RequestURI)) + } + + if err := sendBlocks(ctx, dirWithMultiblockHAMTandFiles, writer, []string{ + "bafybeigcisqd7m5nf3qmuvjdbakl5bdnh4ocrmacaqkpuh77qjvggmt2sa", // file root + }); err != nil { + panic(err) + } + case 2: + // Expect the full request, but return a totally unrelated block + expectedUri := "/ipfs/bafybeigcisqd7m5nf3qmuvjdbakl5bdnh4ocrmacaqkpuh77qjvggmt2sa" + if request.URL.Path != expectedUri { + panic(fmt.Errorf("expected URI %s, got %s", expectedUri, request.RequestURI)) + } + + if err := sendBlocks(ctx, dirWithMultiblockHAMTandFiles, writer, []string{ + "bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi", // file root + }); err != nil { + panic(err) + } + case 3: + // Expect the full request and return most of the file + // Note: this is an implementation detail, it could be in the future that we request less data (e.g. partial path and file range) + expectedUri := "/ipfs/bafybeigcisqd7m5nf3qmuvjdbakl5bdnh4ocrmacaqkpuh77qjvggmt2sa" + if request.URL.Path != expectedUri { + panic(fmt.Errorf("expected URI %s, got %s", expectedUri, request.RequestURI)) + } + + if err := sendBlocks(ctx, dirWithMultiblockHAMTandFiles, writer, []string{ + "bafybeigcisqd7m5nf3qmuvjdbakl5bdnh4ocrmacaqkpuh77qjvggmt2sa", // file root + "bafkreie5noke3mb7hqxukzcy73nl23k6lxszxi5w3dtmuwz62wnvkpsscm", // file chunks start here + "bafkreih4ephajybraj6wnxsbwjwa77fukurtpl7oj7t7pfq545duhot7cq", + }); err != nil { + panic(err) + } + + case 4: + // Expect a request for the remainder of the file + // Note: this is an implementation detail, it could be that the requester really asks for more information + expectedUri := "/ipfs/bafybeigcisqd7m5nf3qmuvjdbakl5bdnh4ocrmacaqkpuh77qjvggmt2sa" + if request.URL.Path != expectedUri { + panic(fmt.Errorf("expected URI %s, got %s", expectedUri, request.RequestURI)) + } + + if err := sendBlocks(ctx, dirWithMultiblockHAMTandFiles, writer, []string{ + "bafybeigcisqd7m5nf3qmuvjdbakl5bdnh4ocrmacaqkpuh77qjvggmt2sa", // file root + "bafkreigu7buvm3cfunb35766dn7tmqyh2um62zcio63en2btvxuybgcpue", // middle of the file starts here + "bafkreicll3huefkc3qnrzeony7zcfo7cr3nbx64hnxrqzsixpceg332fhe", + "bafkreifst3pqztuvj57lycamoi7z34b4emf7gawxs74nwrc2c7jncmpaqm", + }); err != nil { + panic(err) + } + + default: + t.Fatal("unsupported request number") + } + })) + defer s.Close() + + bs, err := newRemoteCarFetcher([]string{s.URL}) + require.NoError(t, err) + backend, err := NewGraphGatewayBackend(&retryFetcher{inner: bs.(CarFetcher), allowedRetries: 3, retriesRemaining: 3}) + if err != nil { + t.Fatal(err) + } + + trustedGatewayServer := httptest.NewServer(NewHandler(Config{DeserializedResponses: true}, backend)) + defer trustedGatewayServer.Close() + + resp, err := http.Get(trustedGatewayServer.URL + "/ipfs/bafybeigcisqd7m5nf3qmuvjdbakl5bdnh4ocrmacaqkpuh77qjvggmt2sa") + if err != nil { + t.Fatal(err) + } + + data, err := io.ReadAll(resp.Body) + if err != nil { + t.Fatal(err) + } + + robs, err := carbs.NewReadOnly(bytes.NewReader(dirWithMultiblockHAMTandFiles), nil) + if err != nil { + t.Fatal(err) + } + + dsrv := merkledag.NewDAGService(blockservice.New(robs, offline.Exchange(robs))) + fileRootNd, err := dsrv.Get(ctx, cid.MustParse("bafybeigcisqd7m5nf3qmuvjdbakl5bdnh4ocrmacaqkpuh77qjvggmt2sa")) + if err != nil { + t.Fatal(err) + } + uio, err := unixfile.NewUnixfsFile(ctx, dsrv, fileRootNd) + if err != nil { + t.Fatal(err) + } + f := uio.(files.File) + expectedFileData, err := io.ReadAll(f) + if err != nil { + t.Fatal(err) + } + + if !bytes.Equal(data, expectedFileData) { + t.Fatalf("expected %s, got %s", string(expectedFileData), string(data)) + } +} + +// func TestGetHAMTDirectory(t *testing.T) { +// ctx, cancel := context.WithCancel(context.Background()) +// defer cancel() + +// requestNum := 0 +// s := httptest.NewServer(http.HandlerFunc(func(writer http.ResponseWriter, request *http.Request) { +// requestNum++ +// switch requestNum { +// case 1: +// // Expect the full request, but return one that terminates in the middle of the path +// expectedUri := "/ipfs/bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi/hamtDir/" +// if request.URL.Path != expectedUri { +// panic(fmt.Errorf("expected URI %s, got %s", expectedUri, request.RequestURI)) +// } + +// if err := sendBlocks(ctx, dirWithMultiblockHAMTandFiles, writer, []string{ +// "bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi", // root dir +// }); err != nil { +// panic(err) +// } +// case 2: +// // Expect the full request, but return one that terminates in the middle of the HAMT +// // Note: this is an implementation detail, it could be in the future that we request less data (e.g. partial path) +// expectedUri := "/ipfs/bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi/hamtDir/" +// if request.URL.Path != expectedUri { +// panic(fmt.Errorf("expected URI %s, got %s", expectedUri, request.RequestURI)) +// } + +// if err := sendBlocks(ctx, dirWithMultiblockHAMTandFiles, writer, []string{ +// "bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi", // root dir +// "bafybeignui4g7l6cvqyy4t6vnbl2fjtego4ejmpcia77jhhwnksmm4bejm", // hamt root +// "bafybeiccgo7euew77gkqkhezn3pozfrciiibqz2u3spdqmgjvd5wqskipm", // inner hamt nodes start here +// }); err != nil { +// panic(err) +// } +// case 3: +// // Expect a request for a non-existent index.html file +// // Note: this is an implementation detail related to the directory request above +// // Note: the order of cases 3 and 4 here are implementation specific as well +// expectedUri := "/ipfs/bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi/hamtDir/index.html" +// if request.URL.Path != expectedUri { +// panic(fmt.Errorf("expected URI %s, got %s", expectedUri, request.RequestURI)) +// } + +// if err := sendBlocks(ctx, dirWithMultiblockHAMTandFiles, writer, []string{ +// "bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi", // root dir +// "bafybeignui4g7l6cvqyy4t6vnbl2fjtego4ejmpcia77jhhwnksmm4bejm", // hamt root +// "bafybeiccgo7euew77gkqkhezn3pozfrciiibqz2u3spdqmgjvd5wqskipm", // inner hamt nodes start here +// }); err != nil { +// panic(err) +// } +// case 4: +// // Expect a request for the full HAMT and return it +// // Note: this is an implementation detail, it could be in the future that we request more or less data +// // (e.g. ask for the full path, ask for index.html first, make a spec change to allow asking for index.html with a fallback to the directory, etc.) +// expectedUri := "/ipfs/bafybeignui4g7l6cvqyy4t6vnbl2fjtego4ejmpcia77jhhwnksmm4bejm" +// if request.URL.Path != expectedUri { +// panic(fmt.Errorf("expected URI %s, got %s", expectedUri, request.RequestURI)) +// } + +// if err := sendBlocks(ctx, dirWithMultiblockHAMTandFiles, writer, []string{ +// "bafybeignui4g7l6cvqyy4t6vnbl2fjtego4ejmpcia77jhhwnksmm4bejm", // hamt root +// "bafybeiccgo7euew77gkqkhezn3pozfrciiibqz2u3spdqmgjvd5wqskipm", // inner hamt nodes start here +// "bafybeihjydob4eq5j4m43whjgf5cgftthc42kjno3g24sa3wcw7vonbmfy", +// }); err != nil { +// panic(err) +// } + +// default: +// t.Fatal("unsupported request number") +// } +// })) +// defer s.Close() + +// bs, err := newRemoteCarFetcher([]string{s.URL}) +// require.NoError(t, err) +// backend, err := NewGraphGatewayBackend(&retryFetcher{inner: bs.(CarFetcher), allowedRetries: 3, retriesRemaining: 3}) +// if err != nil { +// t.Fatal(err) +// } + +// trustedGatewayServer := httptest.NewServer(NewHandler(Config{DeserializedResponses: true}, backend)) +// defer trustedGatewayServer.Close() + +// resp, err := http.Get(trustedGatewayServer.URL + "/ipfs/bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi/hamtDir/") +// if err != nil { +// t.Fatal(err) +// } + +// data, err := io.ReadAll(resp.Body) +// if err != nil { +// t.Fatal(err) +// } + +// if strings.Count(string(data), ">exampleD-hamt-collide-exampleB-seed-364<") == 1 && +// strings.Count(string(data), ">exampleC-hamt-collide-exampleA-seed-52<") == 1 && +// strings.Count(string(data), ">exampleA<") == 1 && +// strings.Count(string(data), ">exampleB<") == 1 { +// return +// } +// t.Fatal("directory does not contain the expected links") +// } + +func TestGetCAR(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + requestNum := 0 + s := httptest.NewServer(http.HandlerFunc(func(writer http.ResponseWriter, request *http.Request) { + requestNum++ + switch requestNum { + case 1: + // Expect the full request, but return one that terminates in the middle of the path + expectedUri := "/ipfs/bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi" + if request.URL.Path != expectedUri { + panic(fmt.Errorf("expected URI %s, got %s", expectedUri, request.RequestURI)) + } + + if err := sendBlocks(ctx, dirWithMultiblockHAMTandFiles, writer, []string{ + "bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi", // root dir + }); err != nil { + panic(err) + } + case 2: + // Expect the full request, but return one that terminates in the middle of the HAMT + // Note: this is an implementation detail, it could be in the future that we request less data (e.g. partial path) + expectedUri := "/ipfs/bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi" + if request.URL.Path != expectedUri { + panic(fmt.Errorf("expected URI %s, got %s", expectedUri, request.RequestURI)) + } + + if err := sendBlocks(ctx, dirWithMultiblockHAMTandFiles, writer, []string{ + "bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi", // root dir + "bafybeignui4g7l6cvqyy4t6vnbl2fjtego4ejmpcia77jhhwnksmm4bejm", // hamt root + }); err != nil { + panic(err) + } + + case 3: + // Expect the full request and return the full HAMT + // Note: this is an implementation detail, it could be in the future that we request less data (e.g. requesting the blocks to fill out the HAMT, or with spec changes asking for HAMT ranges) + expectedUri := "/ipfs/bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi" + if request.URL.Path != expectedUri { + panic(fmt.Errorf("expected URI %s, got %s", expectedUri, request.RequestURI)) + } + + if err := sendBlocks(ctx, dirWithMultiblockHAMTandFiles, writer, []string{ + "bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi", // root dir + "bafybeifdv255wmsrh75vcsrtkcwyktvewgihegeeyhhj2ju4lzt4lqfoze", // basicDir + "bafybeigcisqd7m5nf3qmuvjdbakl5bdnh4ocrmacaqkpuh77qjvggmt2sa", // exampleA + "bafkreie5noke3mb7hqxukzcy73nl23k6lxszxi5w3dtmuwz62wnvkpsscm", + "bafkreih4ephajybraj6wnxsbwjwa77fukurtpl7oj7t7pfq545duhot7cq", + "bafkreigu7buvm3cfunb35766dn7tmqyh2um62zcio63en2btvxuybgcpue", + "bafkreicll3huefkc3qnrzeony7zcfo7cr3nbx64hnxrqzsixpceg332fhe", + "bafkreifst3pqztuvj57lycamoi7z34b4emf7gawxs74nwrc2c7jncmpaqm", + "bafybeid3trcauvcp7fxaai23gkz3qexmlfxnnejgwm57hdvre472dafvha", // exampleB + "bafkreihgbi345degbcyxaf5b3boiyiaxhnuxdysvqmbdyaop2swmhh3s3m", + "bafkreiaugmh5gal5rgiems6gslcdt2ixfncahintrmcqvrgxqamwtlrmz4", + "bafkreiaxwwb7der2qvmteymgtlj7ww7w5vc44phdxfnexog3vnuwdkxuea", + "bafkreic5zyan5rk4ccfum4d4mu3h5eqsllbudlj4texlzj6xdgxvldzngi", + "bafybeignui4g7l6cvqyy4t6vnbl2fjtego4ejmpcia77jhhwnksmm4bejm", // hamtDir + "bafybeiccgo7euew77gkqkhezn3pozfrciiibqz2u3spdqmgjvd5wqskipm", + "bafybeid3trcauvcp7fxaai23gkz3qexmlfxnnejgwm57hdvre472dafvha", // exampleB + "bafkreihgbi345degbcyxaf5b3boiyiaxhnuxdysvqmbdyaop2swmhh3s3m", + "bafkreiaugmh5gal5rgiems6gslcdt2ixfncahintrmcqvrgxqamwtlrmz4", + "bafkreiaxwwb7der2qvmteymgtlj7ww7w5vc44phdxfnexog3vnuwdkxuea", + "bafkreic5zyan5rk4ccfum4d4mu3h5eqsllbudlj4texlzj6xdgxvldzngi", + "bafkreih2grj7p2bo5yk2guqazxfjzapv6hpm3mwrinv6s3cyayd72ke5he", // exampleD + "bafybeihjydob4eq5j4m43whjgf5cgftthc42kjno3g24sa3wcw7vonbmfy", + "bafkreidqhbqn5htm5qejxpb3hps7dookudo3nncfn6al6niqibi5lq6fee", // exampleC + "bafybeigcisqd7m5nf3qmuvjdbakl5bdnh4ocrmacaqkpuh77qjvggmt2sa", // exampleA + "bafkreie5noke3mb7hqxukzcy73nl23k6lxszxi5w3dtmuwz62wnvkpsscm", + "bafkreih4ephajybraj6wnxsbwjwa77fukurtpl7oj7t7pfq545duhot7cq", + "bafkreigu7buvm3cfunb35766dn7tmqyh2um62zcio63en2btvxuybgcpue", + "bafkreicll3huefkc3qnrzeony7zcfo7cr3nbx64hnxrqzsixpceg332fhe", + "bafkreifst3pqztuvj57lycamoi7z34b4emf7gawxs74nwrc2c7jncmpaqm", + }); err != nil { + panic(err) + } + + default: + t.Fatal("unsupported request number") + } + })) + defer s.Close() + + bs, err := newRemoteCarFetcher([]string{s.URL}) + require.NoError(t, err) + backend, err := NewGraphGatewayBackend(&retryFetcher{inner: bs.(CarFetcher), allowedRetries: 3, retriesRemaining: 3}) + if err != nil { + t.Fatal(err) + } + + p := path.FromCid(cid.MustParse("bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi")) + var carReader io.Reader + _, carReader, err = backend.GetCAR(ctx, p, CarParams{Scope: DagScopeAll}) + if err != nil { + t.Fatal(err) + } + + carBytes, err := io.ReadAll(carReader) + if err != nil { + t.Fatal(err) + } + carReader = bytes.NewReader(carBytes) + + blkReader, err := carv2.NewBlockReader(carReader) + if err != nil { + t.Fatal(err) + } + + responseCarBlock := []string{ + "bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi", // root dir + "bafybeifdv255wmsrh75vcsrtkcwyktvewgihegeeyhhj2ju4lzt4lqfoze", // basicDir + "bafybeigcisqd7m5nf3qmuvjdbakl5bdnh4ocrmacaqkpuh77qjvggmt2sa", // exampleA + "bafkreie5noke3mb7hqxukzcy73nl23k6lxszxi5w3dtmuwz62wnvkpsscm", + "bafkreih4ephajybraj6wnxsbwjwa77fukurtpl7oj7t7pfq545duhot7cq", + "bafkreigu7buvm3cfunb35766dn7tmqyh2um62zcio63en2btvxuybgcpue", + "bafkreicll3huefkc3qnrzeony7zcfo7cr3nbx64hnxrqzsixpceg332fhe", + "bafkreifst3pqztuvj57lycamoi7z34b4emf7gawxs74nwrc2c7jncmpaqm", + "bafybeid3trcauvcp7fxaai23gkz3qexmlfxnnejgwm57hdvre472dafvha", // exampleB + "bafkreihgbi345degbcyxaf5b3boiyiaxhnuxdysvqmbdyaop2swmhh3s3m", + "bafkreiaugmh5gal5rgiems6gslcdt2ixfncahintrmcqvrgxqamwtlrmz4", + "bafkreiaxwwb7der2qvmteymgtlj7ww7w5vc44phdxfnexog3vnuwdkxuea", + "bafkreic5zyan5rk4ccfum4d4mu3h5eqsllbudlj4texlzj6xdgxvldzngi", + "bafybeignui4g7l6cvqyy4t6vnbl2fjtego4ejmpcia77jhhwnksmm4bejm", // hamtDir + "bafybeiccgo7euew77gkqkhezn3pozfrciiibqz2u3spdqmgjvd5wqskipm", + "bafkreih2grj7p2bo5yk2guqazxfjzapv6hpm3mwrinv6s3cyayd72ke5he", // exampleD + "bafybeihjydob4eq5j4m43whjgf5cgftthc42kjno3g24sa3wcw7vonbmfy", + "bafkreidqhbqn5htm5qejxpb3hps7dookudo3nncfn6al6niqibi5lq6fee", // exampleC + } + + for i := 0; i < len(responseCarBlock); i++ { + expectedCid := cid.MustParse(responseCarBlock[i]) + blk, err := blkReader.Next() + if err != nil { + t.Fatal(err) + } + if !blk.Cid().Equals(expectedCid) { + t.Fatalf("expected cid %s, got %s", expectedCid, blk.Cid()) + } + } + _, err = blkReader.Next() + if !errors.Is(err, io.EOF) { + t.Fatal("expected an EOF") + } +} + +func TestPassthroughErrors(t *testing.T) { + t.Run("PathTraversalError", func(t *testing.T) { + pathTraversalTest := func(t *testing.T, traversal func(ctx context.Context, p path.ImmutablePath, backend *GraphGateway) error) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + var requestNum int + s := httptest.NewServer(http.HandlerFunc(func(writer http.ResponseWriter, request *http.Request) { + requestNum++ + switch requestNum { + case 1: + // Expect the full request, but return one that terminates in the middle of the path + expectedUri := "/ipfs/bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi/hamtDir/exampleA" + if request.URL.Path != expectedUri { + panic(fmt.Errorf("expected URI %s, got %s", expectedUri, request.RequestURI)) + } + + if err := sendBlocks(ctx, dirWithMultiblockHAMTandFiles, writer, []string{ + "bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi", // root dir + }); err != nil { + panic(err) + } + case 2: + // Expect the full request, but return one that terminates in the middle of the file + // Note: this is an implementation detail, it could be in the future that we request less data (e.g. partial path) + expectedUri := "/ipfs/bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi/hamtDir/exampleA" + if request.URL.Path != expectedUri { + panic(fmt.Errorf("expected URI %s, got %s", expectedUri, request.RequestURI)) + } + + if err := sendBlocks(ctx, dirWithMultiblockHAMTandFiles, writer, []string{ + "bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi", // root dir + "bafybeignui4g7l6cvqyy4t6vnbl2fjtego4ejmpcia77jhhwnksmm4bejm", // hamt root + }); err != nil { + panic(err) + } + default: + t.Fatal("unsupported request number") + } + })) + defer s.Close() + + bs, err := newRemoteCarFetcher([]string{s.URL}) + require.NoError(t, err) + + p, err := path.NewPath("/ipfs/bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi/hamtDir/exampleA") + if err != nil { + t.Fatal(err) + } + + imPath, err := path.NewImmutablePath(p) + if err != nil { + t.Fatal(err) + } + + bogusErr := NewErrorStatusCode(fmt.Errorf("this is a test error"), 418) + + clientRequestNum := 0 + backend, err := NewGraphGatewayBackend(&retryFetcher{ + inner: &fetcherWrapper{fn: func(ctx context.Context, path string, cb DataCallback) error { + clientRequestNum++ + if clientRequestNum > 2 { + return bogusErr + } + return bs.(CarFetcher).Fetch(ctx, path, cb) + }}, + allowedRetries: 3, retriesRemaining: 3}) + if err != nil { + t.Fatal(err) + } + + err = traversal(ctx, imPath, backend) + parsedErr := &ErrorStatusCode{} + if errors.As(err, &parsedErr) { + if parsedErr.StatusCode == bogusErr.StatusCode { + return + } + } + t.Fatal("error did not pass through") + } + t.Run("Block", func(t *testing.T) { + pathTraversalTest(t, func(ctx context.Context, p path.ImmutablePath, backend *GraphGateway) error { + _, _, err := backend.GetBlock(ctx, p) + return err + }) + }) + t.Run("File", func(t *testing.T) { + pathTraversalTest(t, func(ctx context.Context, p path.ImmutablePath, backend *GraphGateway) error { + _, _, err := backend.Get(ctx, p) + return err + }) + }) + }) +} + +type fetcherWrapper struct { + fn func(ctx context.Context, path string, cb DataCallback) error +} + +func (w *fetcherWrapper) Fetch(ctx context.Context, path string, cb DataCallback) error { + return w.fn(ctx, path, cb) +} + +type retryFetcher struct { + inner CarFetcher + allowedRetries int + retriesRemaining int +} + +func (r *retryFetcher) Fetch(ctx context.Context, path string, cb DataCallback) error { + err := r.inner.Fetch(ctx, path, cb) + if err == nil { + return nil + } + + if r.retriesRemaining > 0 { + r.retriesRemaining-- + } else { + return fmt.Errorf("retry fetcher out of retries: %w", err) + } + + switch t := err.(type) { + case ErrPartialResponse: + if len(t.StillNeed) > 1 { + panic("only a single request at a time supported") + } + + // Mimicking the Caboose logic reset the number of retries for partials + r.retriesRemaining = r.allowedRetries + + return r.Fetch(ctx, t.StillNeed[0], cb) + default: + return r.Fetch(ctx, path, cb) + } +} + +var _ CarFetcher = (*retryFetcher)(nil) diff --git a/gateway/backend_graph_traversal.go b/gateway/backend_graph_traversal.go new file mode 100644 index 000000000..8f3d46232 --- /dev/null +++ b/gateway/backend_graph_traversal.go @@ -0,0 +1,323 @@ +package gateway + +import ( + "bytes" + "context" + "errors" + "fmt" + "io" + "sync" + "time" + + bsfetcher "github.com/ipfs/boxo/fetcher/impl/blockservice" + "github.com/ipfs/boxo/verifcid" + blocks "github.com/ipfs/go-block-format" + "github.com/ipfs/go-cid" + "github.com/ipfs/go-unixfsnode" + "github.com/ipfs/go-unixfsnode/data" + "github.com/ipld/go-car" + dagpb "github.com/ipld/go-codec-dagpb" + "github.com/ipld/go-ipld-prime" + "github.com/ipld/go-ipld-prime/datamodel" + "github.com/ipld/go-ipld-prime/linking" + cidlink "github.com/ipld/go-ipld-prime/linking/cid" + "github.com/ipld/go-ipld-prime/node/basicnode" + "github.com/ipld/go-ipld-prime/schema" + "github.com/ipld/go-ipld-prime/traversal" + "github.com/ipld/go-ipld-prime/traversal/selector" + selectorparse "github.com/ipld/go-ipld-prime/traversal/selector/parse" + "github.com/multiformats/go-multihash" +) + +type getBlock func(ctx context.Context, cid cid.Cid) (blocks.Block, error) + +// ErrInvalidResponse can be returned from a DataCallback to indicate that the data provided for the +// requested resource was explicitly 'incorrect' - that blocks not in the requested dag, or non-car-conforming +// data was returned. +type ErrInvalidResponse struct { + Message string +} + +func (e ErrInvalidResponse) Error() string { + return e.Message +} + +// var ErrNilBlock = caboose.ErrInvalidResponse{Message: "received a nil block with no error"} +var ErrNilBlock = ErrInvalidResponse{Message: "received a nil block with no error"} + +func carToLinearBlockGetter(ctx context.Context, reader io.Reader, metrics *GraphGatewayMetrics) (getBlock, error) { + cr, err := car.NewCarReaderWithOptions(reader, car.WithErrorOnEmptyRoots(false)) + if err != nil { + return nil, err + } + + cbCtx, cncl := context.WithCancel(ctx) + + type blockRead struct { + block blocks.Block + err error + } + + blkCh := make(chan blockRead, 1) + go func() { + defer cncl() + defer close(blkCh) + for { + blk, rdErr := cr.Next() + select { + case blkCh <- blockRead{blk, rdErr}: + if rdErr != nil { + cncl() + } + case <-cbCtx.Done(): + return + } + } + }() + + isFirstBlock := true + mx := sync.Mutex{} + + return func(ctx context.Context, c cid.Cid) (blocks.Block, error) { + mx.Lock() + defer mx.Unlock() + if err := verifcid.ValidateCid(verifcid.DefaultAllowlist, c); err != nil { + return nil, err + } + + isId, bdata := extractIdentityMultihashCIDContents(c) + if isId { + return blocks.NewBlockWithCid(bdata, c) + } + + // initially set a higher timeout here so that if there's an initial timeout error we get it from the car reader. + var t *time.Timer + if isFirstBlock { + t = time.NewTimer(GetBlockTimeout * 2) + } else { + t = time.NewTimer(GetBlockTimeout) + } + var blkRead blockRead + var ok bool + select { + case blkRead, ok = <-blkCh: + if !t.Stop() { + <-t.C + } + t.Reset(GetBlockTimeout) + case <-t.C: + return nil, ErrGatewayTimeout + } + if !ok || blkRead.err != nil { + if !ok || errors.Is(blkRead.err, io.EOF) { + return nil, io.ErrUnexpectedEOF + } + return nil, GatewayError(blkRead.err) + } + if blkRead.block != nil { + metrics.carBlocksFetchedMetric.Inc() + if !blkRead.block.Cid().Equals(c) { + return nil, ErrInvalidResponse{Message: fmt.Sprintf("received block with cid %s, expected %s", blkRead.block.Cid(), c)} + } + return blkRead.block, nil + } + return nil, ErrNilBlock + }, nil +} + +// extractIdentityMultihashCIDContents will check if a given CID has an identity multihash and if so return true and +// the bytes encoded in the digest, otherwise will return false. +// Taken from https://github.com/ipfs/boxo/blob/b96767cc0971ca279feb36e7844e527a774309ab/blockstore/idstore.go#L30 +func extractIdentityMultihashCIDContents(k cid.Cid) (bool, []byte) { + // Pre-check by calling Prefix(), this much faster than extracting the hash. + if k.Prefix().MhType != multihash.IDENTITY { + return false, nil + } + + dmh, err := multihash.Decode(k.Hash()) + if err != nil || dmh.Code != multihash.IDENTITY { + return false, nil + } + return true, dmh.Digest +} + +func getLinksystem(fn getBlock) *ipld.LinkSystem { + lsys := cidlink.DefaultLinkSystem() + lsys.StorageReadOpener = func(linkContext linking.LinkContext, link datamodel.Link) (io.Reader, error) { + c := link.(cidlink.Link).Cid + blk, err := fn(linkContext.Ctx, c) + if err != nil { + return nil, err + } + return bytes.NewReader(blk.RawData()), nil + } + lsys.TrustedStorage = true + unixfsnode.AddUnixFSReificationToLinkSystem(&lsys) + return &lsys +} + +// walkGatewaySimpleSelector2 walks the subgraph described by the path and terminal element parameters +func walkGatewaySimpleSelector2(ctx context.Context, terminalBlk blocks.Block, dagScope DagScope, entityRange *DagByteRange, lsys *ipld.LinkSystem) error { + lctx := ipld.LinkContext{Ctx: ctx} + var err error + + // If the scope is the block, we only need the root block of the last element of the path, which we have. + if dagScope == DagScopeBlock { + return nil + } + + // decode the terminal block into a node + pc := dagpb.AddSupportToChooser(func(lnk ipld.Link, lnkCtx ipld.LinkContext) (ipld.NodePrototype, error) { + if tlnkNd, ok := lnkCtx.LinkNode.(schema.TypedLinkNode); ok { + return tlnkNd.LinkTargetNodePrototype(), nil + } + return basicnode.Prototype.Any, nil + }) + + pathTerminalCidLink := cidlink.Link{Cid: terminalBlk.Cid()} + np, err := pc(pathTerminalCidLink, lctx) + if err != nil { + return err + } + + decoder, err := lsys.DecoderChooser(pathTerminalCidLink) + if err != nil { + return err + } + nb := np.NewBuilder() + blockData := terminalBlk.RawData() + if err := decoder(nb, bytes.NewReader(blockData)); err != nil { + return err + } + lastCidNode := nb.Build() + + // TODO: Evaluate: + // Does it matter that we're ignoring the "remainder" portion of the traversal in GetCAR? + // Does it matter that we're using a linksystem with the UnixFS reifier for dagscope=all? + + // If we're asking for everything then give it + if dagScope == DagScopeAll { + sel, err := selector.ParseSelector(selectorparse.CommonSelector_ExploreAllRecursively) + if err != nil { + return err + } + + progress := traversal.Progress{ + Cfg: &traversal.Config{ + Ctx: ctx, + LinkSystem: *lsys, + LinkTargetNodePrototypeChooser: bsfetcher.DefaultPrototypeChooser, + LinkVisitOnlyOnce: false, // Despite being safe for the "all" selector we do this walk anyway since this is how we will be receiving the blocks + }, + } + + if err := progress.WalkMatching(lastCidNode, sel, func(progress traversal.Progress, node datamodel.Node) error { + return nil + }); err != nil { + return err + } + return nil + } + + // From now on, dag-scope=entity! + // Since we need more of the graph load it to figure out what we have + // This includes determining if the terminal node is UnixFS or not + if pbn, ok := lastCidNode.(dagpb.PBNode); !ok { + // If it's not valid dag-pb then we're done + return nil + } else if !pbn.FieldData().Exists() { + // If it's not valid UnixFS then we're done + return nil + } else if unixfsFieldData, decodeErr := data.DecodeUnixFSData(pbn.Data.Must().Bytes()); decodeErr != nil { + // If it's not valid dag-pb and UnixFS then we're done + return nil + } else { + switch unixfsFieldData.FieldDataType().Int() { + case data.Data_Directory, data.Data_Symlink: + // These types are non-recursive so we're done + return nil + case data.Data_Raw, data.Data_Metadata: + // TODO: for now, we decided to return nil here. The different implementations are inconsistent + // and UnixFS is not properly specified: https://github.com/ipfs/specs/issues/316. + // - Is Data_Raw different from Data_File? + // - Data_Metadata is handled differently in boxo/ipld/unixfs and go-unixfsnode. + return nil + case data.Data_HAMTShard: + // Return all elements in the map + _, err := lsys.KnownReifiers["unixfs-preload"](lctx, lastCidNode, lsys) + if err != nil { + return err + } + return nil + case data.Data_File: + nd, err := unixfsnode.Reify(lctx, lastCidNode, lsys) + if err != nil { + return err + } + + fnd, ok := nd.(datamodel.LargeBytesNode) + if !ok { + return fmt.Errorf("could not process file since it did not present as large bytes") + } + f, err := fnd.AsLargeBytes() + if err != nil { + return err + } + + // Get the entity range. If it's empty, assume the defaults (whole file). + effectiveRange := entityRange + if effectiveRange == nil { + effectiveRange = &DagByteRange{ + From: 0, + } + } + + from := effectiveRange.From + + // If we're starting to read based on the end of the file, find out where that is. + var fileLength int64 + foundFileLength := false + if effectiveRange.From < 0 { + fileLength, err = f.Seek(0, io.SeekEnd) + if err != nil { + return err + } + from = fileLength + effectiveRange.From + foundFileLength = true + } + + // If we're reading until the end of the file then do it + if effectiveRange.To == nil { + if _, err := f.Seek(from, io.SeekStart); err != nil { + return err + } + _, err = io.Copy(io.Discard, f) + return err + } + + to := *effectiveRange.To + if (*effectiveRange.To) < 0 && !foundFileLength { + fileLength, err = f.Seek(0, io.SeekEnd) + if err != nil { + return err + } + to = fileLength + *effectiveRange.To + foundFileLength = true + } + + numToRead := 1 + to - from + if numToRead < 0 { + return fmt.Errorf("tried to read less than zero bytes") + } + + if _, err := f.Seek(from, io.SeekStart); err != nil { + return err + } + _, err = io.CopyN(io.Discard, f, numToRead) + return err + default: + // Not a supported type, so we're done + return nil + } + } +} diff --git a/gateway/backend_graph_utils.go b/gateway/backend_graph_utils.go new file mode 100644 index 000000000..aac8df62b --- /dev/null +++ b/gateway/backend_graph_utils.go @@ -0,0 +1,132 @@ +package gateway + +import ( + "context" + "errors" + "fmt" + "io" + "math/rand" + "net/http" + "net/url" + "strconv" + "strings" + "time" + + "github.com/ipfs/boxo/path" +) + +// contentPathToCarUrl returns an URL that allows retrieval of specified resource +// from a trustless gateway that implements IPIP-402 +func contentPathToCarUrl(path path.ImmutablePath, params CarParams) *url.URL { + return &url.URL{ + Path: path.String(), + RawQuery: carParamsToString(params), + } +} + +// carParamsToString converts CarParams to URL parameters compatible with IPIP-402 +func carParamsToString(params CarParams) string { + paramsBuilder := strings.Builder{} + paramsBuilder.WriteString("format=car") // always send explicit format in URL, this makes debugging easier, even when Accept header was set + if params.Scope != "" { + paramsBuilder.WriteString("&dag-scope=") + paramsBuilder.WriteString(string(params.Scope)) + } + if params.Range != nil { + paramsBuilder.WriteString("&entity-bytes=") + paramsBuilder.WriteString(strconv.FormatInt(params.Range.From, 10)) + paramsBuilder.WriteString(":") + if params.Range.To != nil { + paramsBuilder.WriteString(strconv.FormatInt(*params.Range.To, 10)) + } else { + paramsBuilder.WriteString("*") + } + } + return paramsBuilder.String() +} + +// GatewayError translates underlying blockstore error into one that gateway code will return as HTTP 502 or 504 +// it also makes sure Retry-After hint from remote blockstore will be passed to HTTP client, if present. +func GatewayError(err error) error { + if errors.Is(err, &ErrorStatusCode{}) || + errors.Is(err, &ErrorRetryAfter{}) { + // already correct error + return err + } + + // All timeouts should produce 504 Gateway Timeout + if errors.Is(err, context.DeadlineExceeded) || + // errors.Is(err, caboose.ErrTimeout) || + // Unfortunately this is not an exported type so we have to check for the content. + strings.Contains(err.Error(), "Client.Timeout exceeded") { + return fmt.Errorf("%w: %s", ErrGatewayTimeout, err.Error()) + } + + // (Saturn) errors that support the RetryAfter interface need to be converted + // to the correct gateway error, such that the HTTP header is set. + for v := err; v != nil; v = errors.Unwrap(v) { + if r, ok := v.(interface{ RetryAfter() time.Duration }); ok { + return NewErrorRetryAfter(err, r.RetryAfter()) + } + } + + // everything else returns 502 Bad Gateway + return fmt.Errorf("%w: %s", ErrBadGateway, err.Error()) +} + +type remoteCarFetcher struct { + httpClient *http.Client + gatewayURL []string + validate bool + rand *rand.Rand +} + +func newRemoteCarFetcher(gatewayURL []string) (CarFetcher, error) { + if len(gatewayURL) == 0 { + return nil, errors.New("missing gateway URLs to which to proxy") + } + + return &remoteCarFetcher{ + gatewayURL: gatewayURL, + httpClient: newRemoteHTTPClient(), + // Enables block validation by default. Important since we are + // proxying block requests to an untrusted gateway. + validate: true, + rand: rand.New(rand.NewSource(time.Now().Unix())), + }, nil +} + +func (ps *remoteCarFetcher) Fetch(ctx context.Context, path string, cb DataCallback) error { + urlStr := fmt.Sprintf("%s%s", ps.getRandomGatewayURL(), path) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, urlStr, nil) + if err != nil { + return err + } + log.Debugw("car fetch", "url", req.URL) + req.Header.Set("Accept", "application/vnd.ipld.car;order=dfs;dups=y") + resp, err := ps.httpClient.Do(req) + if err != nil { + return err + } + + if resp.StatusCode != http.StatusOK { + errData, err := io.ReadAll(resp.Body) + if err != nil { + err = fmt.Errorf("could not read error message: %w", err) + } else { + err = fmt.Errorf("%q", string(errData)) + } + return fmt.Errorf("http error from car gateway: %s: %w", resp.Status, err) + } + + err = cb(path, resp.Body) + if err != nil { + resp.Body.Close() + return err + } + return resp.Body.Close() +} + +func (ps *remoteCarFetcher) getRandomGatewayURL() string { + return ps.gatewayURL[ps.rand.Intn(len(ps.gatewayURL))] +} diff --git a/gateway/backend_graph_utils_test.go b/gateway/backend_graph_utils_test.go new file mode 100644 index 000000000..5b0ec3886 --- /dev/null +++ b/gateway/backend_graph_utils_test.go @@ -0,0 +1,97 @@ +package gateway + +import ( + "errors" + "fmt" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/ipfs/boxo/path" +) + +func TestContentPathToCarUrl(t *testing.T) { + negativeOffset := int64(-42) + testCases := []struct { + contentPath string // to be turned into ImmutablePath + carParams CarParams + expectedUrl string // url.URL.String() + }{ + { + contentPath: "/ipfs/bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi", + carParams: CarParams{}, + expectedUrl: "/ipfs/bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi?format=car", + }, + { + contentPath: "/ipfs/bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi", + carParams: CarParams{Scope: "entity", Range: &DagByteRange{From: 0, To: nil}}, + expectedUrl: "/ipfs/bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi?format=car&dag-scope=entity&entity-bytes=0:*", + }, + { + contentPath: "/ipfs/bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi", + carParams: CarParams{Scope: "block"}, + expectedUrl: "/ipfs/bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi?format=car&dag-scope=block", + }, + { + contentPath: "/ipfs/bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi", + carParams: CarParams{Scope: "entity", Range: &DagByteRange{From: 4, To: &negativeOffset}}, + expectedUrl: "/ipfs/bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi?format=car&dag-scope=entity&entity-bytes=4:-42", + }, + { + // a regression test for case described in https://github.com/ipfs/gateway-conformance/issues/115 + contentPath: "/ipfs/bafybeiaysi4s6lnjev27ln5icwm6tueaw2vdykrtjkwiphwekaywqhcjze/I/Auditorio_de_Tenerife%2C_Santa_Cruz_de_Tenerife%2C_España%2C_2012-12-15%2C_DD_02.jpg.webp", + carParams: CarParams{Scope: "entity", Range: &DagByteRange{From: 0, To: nil}}, + expectedUrl: "/ipfs/bafybeiaysi4s6lnjev27ln5icwm6tueaw2vdykrtjkwiphwekaywqhcjze/I/Auditorio_de_Tenerife%252C_Santa_Cruz_de_Tenerife%252C_Espa%C3%B1a%252C_2012-12-15%252C_DD_02.jpg.webp?format=car&dag-scope=entity&entity-bytes=0:*", + }, + } + + for _, tc := range testCases { + t.Run("TestContentPathToCarUrl", func(t *testing.T) { + p, err := path.NewPath(tc.contentPath) + require.NoError(t, err) + + contentPath, err := path.NewImmutablePath(p) + require.NoError(t, err) + + result := contentPathToCarUrl(contentPath, tc.carParams).String() + if result != tc.expectedUrl { + t.Errorf("Expected %q, but got %q", tc.expectedUrl, result) + } + }) + } +} + +type testErr struct { + message string + retryAfter time.Duration +} + +func (e *testErr) Error() string { + return e.message +} + +func (e *testErr) RetryAfter() time.Duration { + return e.retryAfter +} + +func TestGatewayErrorRetryAfter(t *testing.T) { + originalErr := &testErr{message: "test", retryAfter: time.Minute} + var ( + convertedErr error + gatewayErr *ErrorRetryAfter + ) + + // Test unwrapped + convertedErr = GatewayError(originalErr) + ok := errors.As(convertedErr, &gatewayErr) + assert.True(t, ok) + assert.EqualValues(t, originalErr.retryAfter, gatewayErr.RetryAfter) + + // Test wrapped. + convertedErr = GatewayError(fmt.Errorf("wrapped error: %w", originalErr)) + ok = errors.As(convertedErr, &gatewayErr) + assert.True(t, ok) + assert.EqualValues(t, originalErr.retryAfter, gatewayErr.RetryAfter) +} diff --git a/gateway/testdata/directory-with-multilayer-hamt-and-multiblock-files.car b/gateway/testdata/directory-with-multilayer-hamt-and-multiblock-files.car new file mode 100644 index 0000000000000000000000000000000000000000..cb2a4875dc9169e97cd906a472d069b5bfd0f50f GIT binary patch literal 6045 zcma)A3vg7`8BSJ_?G{F6fo3Vt9*81pvV{Z?gfWm;MXHfU6hWEZ+p zMQuAW6lk@jJTyRSrxg$o1}a4hVh3gjV}(M6Mg~SSGx(q?h}hcyKaab2wKFTjY?8g_ z{?G6KzW?4^*HhPVeDBc{H_g7jq%@l(i$;FBXldPveb06cJ9lw$EcwgD3!8^+pA}y< z_Hx@ccaw_)?kC#$B{T$z4H_1&AExNGCqzT!u}pw`rD-_nf*R5sR0uo=i?CJrZ zfB(zJ=Id1tEl-yGS^MCx?tHOk(hnMp+5dg(FV?&V-dVk2=ZSNN z=H7N}b@OdA`X@_L9kHZ5{6%@!)u%_t7Pod!*=An*#Y@fQw`}>K>D^1u41M>9b$@%i4cteb_KT9KK1!ylvgveY&1*dU#K~k6W|$qeV(Jbm$ z5Z$pos{7OrT%s8)%RHUg8A46wr^#J`trOd^3A3`)H5m1Y4PT|n-LAuIqUmHopjjk< zuUShQ+(cO`wGE<~Y(YT5z&+ISSxaDoW6k!dj3MhXi+Kbrv}%D4*FEC6IwNMF`|vw> z2~6K*I`v4_@dD~nq6aO^WE%W*H9C>Zqy{lrCNMRTP(>s{Ps=dh2m6|<2Vks%h$y<<}yWAfoTluipW0j+?r8_Pf4-0dr7Pw%b4XWi*7rNm&hGs&-CNu+UkO7aa)U~MX2bL;$fxle7 zwz^|kj^RMPSx&Ryb_`ir3Pi}tH!)boW}e3^ehT_;Fz!WEsTjyDGC%Nqa5dE?Z2*8a zOjh$eO0uTrQ|JeS1S}7lFLTUM6H5W!02Ap%LdDq105pMDL8uL}Xg)DDfFAT%)U87H z&xI=)SIaR&!xMOeLT~uJrDbeQU?{u>)k$-lAbw5ih?^19*bkCpz{0L35vfJmZfIGI zOP(azz|1jQb0ywHm5NPwK12@t5YC7pG%e;Q8@SLbgc;-{z~UjWofgMM03*gK!X$H0 zXE{M>G6g3L(y*AkKMa!wq=mHru3+Yb0cdef1E9i0=we)W_f%1NKf%920Ff9^9l61?X;1$4%`0;+zTeO_>Fn+~({`Qw6Vv|q2`M1!2f9kS z58U7HsmAP2=(vYhCg<#*vT4$)^q%LB58BfFi^?iEyfD!Z$okSXd8S?{a8oRWscryd zmxGCb-qLoY9Sddxh6h2c0WQL;l8&;##0K<0(G%kq_)%zhYmi|uKQMww=MC)?Xx7(! zSU!{!p$CHbfy#jTIY(*Sqm5V)5g@T;VFuxD&MW z5n@Bzb(-}lgyg7OMD?Qr&HG5Gs8TV|VwAG)Jm84XhpA8?Zm3N|Aj(+KGzA`2R;(=q z5A}xBfbWs(6$&T|JWwDYT*L$q8R!X+f;7dlBS^&Xg$_|mq zhVvNaR*+3SU$pox!}Xsquq?~eH_^$NujqDODYp39>b-X)-@g2GuOX}K?(@$KukDx0 z%sRDh{HTL}xY9QD_C?9q*xJ;G{gd)|&#R}tmmJzO=#~Z)FvJ zJ~>NlzMqeodFj3M2Lm_EC^>v+&7+@Q=svvdjk*VW%-PrU{?Ev@6OuR9*2>%JEAdgK zQpf%pv{D(X^e8ndtH+H=ElDJXK}M-)CXu8j^Q@jvAS#NvY`WOH72*p=Ot2>47B*c)Q0OgA|xNbHSVd-*L-ouF#nD!ox#)2y^3n5~&}$ zm1U5Whvb7MpeHIMmPO`*%}ws*obS9m=QypcAZVx*YR1t(VFtW9xLy<%A#+ zaYakUS&9dpCQUMRaSHWIM#Q&9uCx-CDm(=piEZd!ST?zA0*mvQ!w{qx3X`hMf|eE7 zL8SX8xFp~%6X^}umtc@J4>1uD*1Wo)+z2lIN}Nh7p@CpMZv=V(52c}j@B%Kr@N_37 zSFkzUh%$*=4N^4x8EyxKjpbm${R>X3rvp9}`~wt9$YMb&r@~V*Y)hyGR0*n-H&sXw zKDCXAlFG_NmIDb>4!baG_#;*InJU&J6 z_}Hq{zTQcxDMerTBpMr~QGCIaxDnW&bd*evkp9yCfpl1nYQOkf>EO@i| zroov2sV*b}i4eZH2)xRB6;9>2=a)xL{Tv?nvV^bwA!;Qm@tus=%=uxVLHt)atZLfw zRzqmP^davf1Mr~u2j?ZxZSfi6onKKm^MS}ui(n8HT$(iF#N=aB&=RyMAjJ>m@rqFZ z@_6W>OP;$}jnia03Ly-xs9oXoaR=~Ng4qHk0fOKCgw6wgSnI7X5346RykrDKOtG2`wmngkRBJgk-?8w5RlvM~_4S zilSmHP^Adf<;fNqRLOzVqbTX(dmea>#fK$)BH@n-;Wy#qqnsOFGL&sjqVNt8W2uu5 QS)$l^wpMjk0ER~W54hCPHUIzs literal 0 HcmV?d00001 diff --git a/go.mod b/go.mod index 4ef91d9f8..353b2b945 100644 --- a/go.mod +++ b/go.mod @@ -13,6 +13,7 @@ require ( github.com/gogo/protobuf v1.3.2 github.com/google/uuid v1.5.0 github.com/gorilla/mux v1.8.1 + github.com/hashicorp/go-multierror v1.1.1 github.com/hashicorp/golang-lru/v2 v2.0.7 github.com/ipfs/bbloom v0.0.4 github.com/ipfs/go-bitfield v1.1.0 @@ -30,6 +31,7 @@ require ( github.com/ipfs/go-metrics-interface v0.0.1 github.com/ipfs/go-peertaskqueue v0.8.1 github.com/ipfs/go-unixfsnode v1.9.0 + github.com/ipld/go-car v0.6.2 github.com/ipld/go-car/v2 v2.13.1 github.com/ipld/go-codec-dagpb v1.6.0 github.com/ipld/go-ipld-prime v0.21.0 @@ -103,14 +105,19 @@ require ( github.com/gorilla/websocket v1.5.0 // indirect github.com/grpc-ecosystem/grpc-gateway/v2 v2.19.0 // indirect github.com/hashicorp/errwrap v1.1.0 // indirect - github.com/hashicorp/go-multierror v1.1.1 // indirect github.com/hashicorp/golang-lru v1.0.2 // indirect github.com/huin/goupnp v1.3.0 // indirect + github.com/ipfs/go-blockservice v0.5.0 // indirect + github.com/ipfs/go-ipfs-blockstore v1.3.0 // indirect + github.com/ipfs/go-ipfs-ds-help v1.1.0 // indirect + github.com/ipfs/go-ipfs-exchange-interface v0.2.0 // indirect github.com/ipfs/go-ipfs-pq v0.0.3 // indirect github.com/ipfs/go-ipfs-util v0.0.3 // indirect github.com/ipfs/go-ipld-cbor v0.1.0 // indirect github.com/ipfs/go-log v1.0.5 // indirect + github.com/ipfs/go-merkledag v0.11.0 // indirect github.com/ipfs/go-unixfs v0.4.5 // indirect + github.com/ipfs/go-verifcid v0.0.2 // indirect github.com/jackpal/go-nat-pmp v1.0.2 // indirect github.com/jbenet/go-temp-err-catcher v0.1.0 // indirect github.com/klauspost/compress v1.17.4 // indirect @@ -156,6 +163,7 @@ require ( go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.21.0 // indirect go.opentelemetry.io/otel/metric v1.24.0 // indirect go.opentelemetry.io/proto/otlp v1.0.0 // indirect + go.uber.org/atomic v1.11.0 // indirect go.uber.org/dig v1.17.1 // indirect go.uber.org/fx v1.20.1 // indirect go.uber.org/mock v0.4.0 // indirect diff --git a/go.sum b/go.sum index bf51a10ed..388ee6d9f 100644 --- a/go.sum +++ b/go.sum @@ -137,6 +137,7 @@ github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OI github.com/google/pprof v0.0.0-20231229205709-960ae82b1e42 h1:dHLYa5D8/Ta0aLR2XcPsrkpAgGeFs6thhMcQK0oQ0n8= github.com/google/pprof v0.0.0-20231229205709-960ae82b1e42/go.mod h1:czg5+yv1E0ZGTi6S6vVK1mke0fV+FaUhNGcd6VRS9Ik= github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= +github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.5.0 h1:1p67kYwdtXjb0gL0BPiP1Av9wiZPo5A8z2cWkTZ+eyU= github.com/google/uuid v1.5.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= @@ -170,17 +171,21 @@ github.com/ipfs/bbloom v0.0.4 h1:Gi+8EGJ2y5qiD5FbsbpX/TMNcJw8gSqr7eyjHa4Fhvs= github.com/ipfs/bbloom v0.0.4/go.mod h1:cS9YprKXpoZ9lT0n/Mw/a6/aFV6DTjTLYHeA+gyqMG0= github.com/ipfs/go-bitfield v1.1.0 h1:fh7FIo8bSwaJEh6DdTWbCeZ1eqOaOkKFI74SCnsWbGA= github.com/ipfs/go-bitfield v1.1.0/go.mod h1:paqf1wjq/D2BBmzfTVFlJQ9IlFOZpg422HL0HqsGWHU= +github.com/ipfs/go-bitswap v0.11.0 h1:j1WVvhDX1yhG32NTC9xfxnqycqYIlhzEzLXG/cU1HyQ= +github.com/ipfs/go-bitswap v0.11.0/go.mod h1:05aE8H3XOU+LXpTedeAS0OZpcO1WFsj5niYQH9a1Tmk= github.com/ipfs/go-block-format v0.0.2/go.mod h1:AWR46JfpcObNfg3ok2JHDUfdiHRgWhJgCQF+KIgOPJY= github.com/ipfs/go-block-format v0.2.0 h1:ZqrkxBA2ICbDRbK8KJs/u0O3dlp6gmAuuXUJNiW1Ycs= github.com/ipfs/go-block-format v0.2.0/go.mod h1:+jpL11nFx5A/SPpsoBn6Bzkra/zaArfSmsknbPMYgzM= github.com/ipfs/go-blockservice v0.5.0 h1:B2mwhhhVQl2ntW2EIpaWPwSCxSuqr5fFA93Ms4bYLEY= github.com/ipfs/go-blockservice v0.5.0/go.mod h1:W6brZ5k20AehbmERplmERn8o2Ni3ZZubvAxaIUeaT6w= github.com/ipfs/go-cid v0.0.1/go.mod h1:GHWU/WuQdMPmIosc4Yn1bcCT7dSeX4lBafM7iqUPQvM= +github.com/ipfs/go-cid v0.0.5/go.mod h1:plgt+Y5MnOey4vO4UlUazGqdbEXuFYitED67FexhXog= github.com/ipfs/go-cid v0.0.6/go.mod h1:6Ux9z5e+HpkQdckYoX1PG/6xqKspzlEIR5SDmgqgC/I= github.com/ipfs/go-cid v0.4.1 h1:A/T3qGvxi4kpKWWcPC/PgbvDA2bjVLO7n4UeVwnbs/s= github.com/ipfs/go-cid v0.4.1/go.mod h1:uQHwDeX4c6CtyrFwdqyhpNcxVewur1M7l7fNU7LKwZk= github.com/ipfs/go-cidutil v0.1.0 h1:RW5hO7Vcf16dplUU60Hs0AKDkQAVPVplr7lk97CFL+Q= github.com/ipfs/go-cidutil v0.1.0/go.mod h1:e7OEVBMIv9JaOxt9zaGEmAoSlXW9jdFZ5lP/0PwcfpA= +github.com/ipfs/go-datastore v0.5.0/go.mod h1:9zhEApYMTl17C8YDp7JmU7sQZi2/wqiYh73hakZ90Bk= github.com/ipfs/go-datastore v0.6.0 h1:JKyz+Gvz1QEZw0LsX1IBn+JFCJQH4SJVFtM4uWU0Myk= github.com/ipfs/go-datastore v0.6.0/go.mod h1:rt5M3nNbSO/8q1t4LNkLyUwRs8HupMeN/8O4Vn9YAT8= github.com/ipfs/go-detect-race v0.0.1 h1:qX/xay2W3E4Q1U7d9lNs1sU9nvguX0a7319XbyQ6cOk= @@ -191,6 +196,7 @@ github.com/ipfs/go-ipfs-blocksutil v0.0.1 h1:Eh/H4pc1hsvhzsQoMEP3Bke/aW5P5rVM1IW github.com/ipfs/go-ipfs-blocksutil v0.0.1/go.mod h1:Yq4M86uIOmxmGPUHv/uI7uKqZNtLb449gwKqXjIsnRk= github.com/ipfs/go-ipfs-chunker v0.0.5 h1:ojCf7HV/m+uS2vhUGWcogIIxiO5ubl5O57Q7NapWLY8= github.com/ipfs/go-ipfs-chunker v0.0.5/go.mod h1:jhgdF8vxRHycr00k13FM8Y0E+6BoalYeobXmUyTreP8= +github.com/ipfs/go-ipfs-delay v0.0.0-20181109222059-70721b86a9a8/go.mod h1:8SP1YXK1M1kXuc4KJZINY3TQQ03J2rwBG9QfXmbRPrw= github.com/ipfs/go-ipfs-delay v0.0.1 h1:r/UXYyRcddO6thwOnhiznIAiSvxMECGgtv35Xs1IeRQ= github.com/ipfs/go-ipfs-delay v0.0.1/go.mod h1:8SP1YXK1M1kXuc4KJZINY3TQQ03J2rwBG9QfXmbRPrw= github.com/ipfs/go-ipfs-ds-help v1.1.0 h1:yLE2w9RAsl31LtfMt91tRZcrx+e61O5mDxFRR994w4Q= @@ -203,6 +209,8 @@ github.com/ipfs/go-ipfs-pq v0.0.3 h1:YpoHVJB+jzK15mr/xsWC574tyDLkezVrDNeaalQBsTE github.com/ipfs/go-ipfs-pq v0.0.3/go.mod h1:btNw5hsHBpRcSSgZtiNm/SLj5gYIZ18AKtv3kERkRb4= github.com/ipfs/go-ipfs-redirects-file v0.1.1 h1:Io++k0Vf/wK+tfnhEh63Yte1oQK5VGT2hIEYpD0Rzx8= github.com/ipfs/go-ipfs-redirects-file v0.1.1/go.mod h1:tAwRjCV0RjLTjH8DR/AU7VYvfQECg+lpUy2Mdzv7gyk= +github.com/ipfs/go-ipfs-routing v0.3.0 h1:9W/W3N+g+y4ZDeffSgqhgo7BsBSJwPMcyssET9OWevc= +github.com/ipfs/go-ipfs-routing v0.3.0/go.mod h1:dKqtTFIql7e1zYsEuWLyuOU+E0WJWW8JjbTPLParDWo= github.com/ipfs/go-ipfs-util v0.0.1/go.mod h1:spsl5z8KUnrve+73pOhSVZND1SIxPW5RyBCNzQxlJBc= github.com/ipfs/go-ipfs-util v0.0.3 h1:2RFdGez6bu2ZlZdI+rWfIdbQb1KudQp3VGwPtdNCmE0= github.com/ipfs/go-ipfs-util v0.0.3/go.mod h1:LHzG1a0Ig4G+iZ26UUOMjHd+lfM84LZCrn17xAKWBvs= @@ -229,6 +237,8 @@ github.com/ipfs/go-unixfsnode v1.9.0 h1:ubEhQhr22sPAKO2DNsyVBW7YB/zA8Zkif25aBvz8 github.com/ipfs/go-unixfsnode v1.9.0/go.mod h1:HxRu9HYHOjK6HUqFBAi++7DVoWAHn0o4v/nZ/VA+0g8= github.com/ipfs/go-verifcid v0.0.2 h1:XPnUv0XmdH+ZIhLGKg6U2vaPaRDXb9urMyNVCE7uvTs= github.com/ipfs/go-verifcid v0.0.2/go.mod h1:40cD9x1y4OWnFXbLNJYRe7MpNvWlMn3LZAG5Wb4xnPU= +github.com/ipld/go-car v0.6.2 h1:Hlnl3Awgnq8icK+ze3iRghk805lu8YNq3wlREDTF2qc= +github.com/ipld/go-car v0.6.2/go.mod h1:oEGXdwp6bmxJCZ+rARSkDliTeYnVzv3++eXajZ+Bmr8= github.com/ipld/go-car/v2 v2.13.1 h1:KnlrKvEPEzr5IZHKTXLAEub+tPrzeAFQVRlSQvuxBO4= github.com/ipld/go-car/v2 v2.13.1/go.mod h1:QkdjjFNGit2GIkpQ953KBwowuoukoM75nP/JI1iDJdo= github.com/ipld/go-codec-dagpb v1.6.0 h1:9nYazfyu9B1p3NAgfVdpRco3Fs2nFC72DqVsMj6rOcc= @@ -260,6 +270,7 @@ github.com/klauspost/cpuid/v2 v2.2.6/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZY github.com/koron/go-ssdp v0.0.4 h1:1IDwrghSKYM7yLf7XCzbByg2sJ/JcNOZRXS2jczTwz0= github.com/koron/go-ssdp v0.0.4/go.mod h1:oDXq+E5IL5q0U8uSBcoAXzTzInwy5lEgC91HoKtbmZk= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= @@ -720,6 +731,7 @@ google.golang.org/protobuf v1.32.0 h1:pPC6BG5ex8PDFnkbrGU3EixyhKcQ2aDuBS36lqK/C7 google.golang.org/protobuf v1.32.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= From 2d615b36b03a20be2784e15c2517ec66210cc381 Mon Sep 17 00:00:00 2001 From: Henrique Dias Date: Mon, 25 Mar 2024 17:36:36 +0100 Subject: [PATCH 03/19] reuse walkGatewaySimpleSelector --- gateway/backend_blocks.go | 69 ++++++---- gateway/backend_graph.go | 37 ++---- gateway/backend_graph_test.go | 206 +++++++++++++++-------------- gateway/backend_graph_traversal.go | 174 ------------------------ go.mod | 1 - 5 files changed, 155 insertions(+), 332 deletions(-) diff --git a/gateway/backend_blocks.go b/gateway/backend_blocks.go index 5d71a51eb..94a2551b9 100644 --- a/gateway/backend_blocks.go +++ b/gateway/backend_blocks.go @@ -351,9 +351,17 @@ func (bb *BlocksBackend) GetCAR(ctx context.Context, p path.ImmutablePath, param unixfsnode.AddUnixFSReificationToLinkSystem(&lsys) lsys.StorageReadOpener = blockOpener(ctx, blockGetter) + // First resolve the path since we always need to. + lastCid, remainder, err := pathResolver.ResolveToLastNode(ctx, p) + if err != nil { + // io.PipeWriter.CloseWithError always returns nil. + _ = w.CloseWithError(err) + return + } + // TODO: support selectors passed as request param: https://github.com/ipfs/kubo/issues/8769 // TODO: this is very slow if blocks are remote due to linear traversal. Do we need deterministic traversals here? - carWriteErr := walkGatewaySimpleSelector(ctx, p, params, &lsys, pathResolver) + carWriteErr := walkGatewaySimpleSelector(ctx, lastCid, nil, remainder, params, &lsys) // io.PipeWriter.CloseWithError always returns nil. _ = w.CloseWithError(carWriteErr) @@ -363,29 +371,49 @@ func (bb *BlocksBackend) GetCAR(ctx context.Context, p path.ImmutablePath, param } // walkGatewaySimpleSelector walks the subgraph described by the path and terminal element parameters -func walkGatewaySimpleSelector(ctx context.Context, p path.ImmutablePath, params CarParams, lsys *ipld.LinkSystem, pathResolver resolver.Resolver) error { - // First resolve the path since we always need to. - lastCid, remainder, err := pathResolver.ResolveToLastNode(ctx, p) - if err != nil { - return err - } - +func walkGatewaySimpleSelector(ctx context.Context, lastCid cid.Cid, terminalBlk blocks.Block, remainder []string, params CarParams, lsys *ipld.LinkSystem) error { lctx := ipld.LinkContext{Ctx: ctx} pathTerminalCidLink := cidlink.Link{Cid: lastCid} // If the scope is the block, now we only need to retrieve the root block of the last element of the path. if params.Scope == DagScopeBlock { - _, err = lsys.LoadRaw(lctx, pathTerminalCidLink) + _, err := lsys.LoadRaw(lctx, pathTerminalCidLink) return err } - // If we're asking for everything then give it - if params.Scope == DagScopeAll { - lastCidNode, err := lsys.Load(lctx, pathTerminalCidLink, basicnode.Prototype.Any) + pc := dagpb.AddSupportToChooser(func(lnk ipld.Link, lnkCtx ipld.LinkContext) (ipld.NodePrototype, error) { + if tlnkNd, ok := lnkCtx.LinkNode.(schema.TypedLinkNode); ok { + return tlnkNd.LinkTargetNodePrototype(), nil + } + return basicnode.Prototype.Any, nil + }) + + np, err := pc(pathTerminalCidLink, lctx) + if err != nil { + return err + } + + var lastCidNode datamodel.Node + if terminalBlk != nil { + decoder, err := lsys.DecoderChooser(pathTerminalCidLink) if err != nil { return err } + nb := np.NewBuilder() + blockData := terminalBlk.RawData() + if err := decoder(nb, bytes.NewReader(blockData)); err != nil { + return err + } + lastCidNode = nb.Build() + } else { + lastCidNode, err = lsys.Load(lctx, pathTerminalCidLink, np) + if err != nil { + return err + } + } + // If we're asking for everything then give it + if params.Scope == DagScopeAll { sel, err := selector.ParseSelector(selectorparse.CommonSelector_ExploreAllRecursively) if err != nil { return err @@ -411,23 +439,6 @@ func walkGatewaySimpleSelector(ctx context.Context, p path.ImmutablePath, params // From now on, dag-scope=entity! // Since we need more of the graph load it to figure out what we have // This includes determining if the terminal node is UnixFS or not - pc := dagpb.AddSupportToChooser(func(lnk ipld.Link, lnkCtx ipld.LinkContext) (ipld.NodePrototype, error) { - if tlnkNd, ok := lnkCtx.LinkNode.(schema.TypedLinkNode); ok { - return tlnkNd.LinkTargetNodePrototype(), nil - } - return basicnode.Prototype.Any, nil - }) - - np, err := pc(pathTerminalCidLink, lctx) - if err != nil { - return err - } - - lastCidNode, err := lsys.Load(lctx, pathTerminalCidLink, np) - if err != nil { - return err - } - if pbn, ok := lastCidNode.(dagpb.PBNode); !ok { // If it's not valid dag-pb then we're done return nil diff --git a/gateway/backend_graph.go b/gateway/backend_graph.go index dac3c2784..b230bb20a 100644 --- a/gateway/backend_graph.go +++ b/gateway/backend_graph.go @@ -317,28 +317,11 @@ func resolvePathToLastWithRoots(ctx context.Context, fpath ipfspath.ImmutablePat return cids[:len(cids)-1], cids[len(cids)-1], nil, nil, nil } -func contentMetadataFromRootsAndRemainder(p ipfspath.Path, pathRoots []cid.Cid, terminalCid cid.Cid, remainder []string) (ContentPathMetadata, error) { - var rootCid cid.Cid - if len(pathRoots) > 0 { - rootCid = pathRoots[0] - } else { - rootCid = terminalCid - } - - p, err := path.Join(path.FromCid(rootCid), remainder...) - if err != nil { - return ContentPathMetadata{}, err - } - - imPath, err := path.NewImmutablePath(p) - if err != nil { - return ContentPathMetadata{}, err - } - +func contentMetadataFromRootsAndRemainder(p ipfspath.ImmutablePath, pathRoots []cid.Cid, remainder []string) (ContentPathMetadata, error) { md := ContentPathMetadata{ PathSegmentRoots: pathRoots, LastSegmentRemainder: remainder, - LastSegment: imPath, + LastSegment: p, } return md, nil } @@ -409,7 +392,6 @@ func (api *GraphGateway) Get(ctx context.Context, path path.ImmutablePath, byteR } return md, resp, nil - } // loadTerminalEntity returns either a [*GetResponse], [*backpressuredFile], or [*backpressuredHAMTDirIterNoRecursion] @@ -799,7 +781,8 @@ func fetchWithPartialRetries[T any](ctx context.Context, path path.ImmutablePath if err != nil { return err } - md, err := contentMetadataFromRootsAndRemainder(p, pathRootCids, terminalCid, remainder) + + md, err := contentMetadataFromRootsAndRemainder(p, pathRootCids, remainder) if err != nil { return err } @@ -925,7 +908,7 @@ func (api *GraphGateway) GetBlock(ctx context.Context, path path.ImmutablePath) } } - md, err = contentMetadataFromRootsAndRemainder(p, pathRoots, terminalCid, remainder) + md, err = contentMetadataFromRootsAndRemainder(p, pathRoots, remainder) if err != nil { return err } @@ -968,7 +951,7 @@ func (api *GraphGateway) Head(ctx context.Context, path path.ImmutablePath) (Con return err } - md, err = contentMetadataFromRootsAndRemainder(p, pathRoots, terminalCid, remainder) + md, err = contentMetadataFromRootsAndRemainder(p, pathRoots, remainder) if err != nil { return err } @@ -1100,12 +1083,12 @@ func (api *GraphGateway) ResolvePath(ctx context.Context, path path.ImmutablePat // First resolve the path since we always need to. // FIXME(HACDIAS): p := ipfspath.FromString(path.String()) p := path - pathRoots, terminalCid, remainder, _, err := resolvePathToLastWithRoots(ctx, p, lsys) + pathRoots, _, remainder, _, err := resolvePathToLastWithRoots(ctx, p, lsys) if err != nil { return err } - md, err = contentMetadataFromRootsAndRemainder(p, pathRoots, terminalCid, remainder) + md, err = contentMetadataFromRootsAndRemainder(p, pathRoots, remainder) return err }) @@ -1194,7 +1177,9 @@ func (api *GraphGateway) GetCAR(ctx context.Context, path path.ImmutablePath, pa blockBuffer = nil } - err = walkGatewaySimpleSelector2(ctx, terminalBlk, params.Scope, params.Range, l) + params.Duplicates = DuplicateBlocksIncluded + err = walkGatewaySimpleSelector(ctx, terminalBlk.Cid(), terminalBlk, []string{}, params, l) + // err = walkGatewaySimpleSelector2(ctx, terminalBlk, params.Scope, params.Range, l) if err != nil { return err } diff --git a/gateway/backend_graph_test.go b/gateway/backend_graph_test.go index da015d83f..89dc98603 100644 --- a/gateway/backend_graph_test.go +++ b/gateway/backend_graph_test.go @@ -8,6 +8,7 @@ import ( "io" "net/http" "net/http/httptest" + "strings" "testing" _ "embed" @@ -820,108 +821,109 @@ func TestGetFileWithBadBlockReturned(t *testing.T) { } } -// func TestGetHAMTDirectory(t *testing.T) { -// ctx, cancel := context.WithCancel(context.Background()) -// defer cancel() - -// requestNum := 0 -// s := httptest.NewServer(http.HandlerFunc(func(writer http.ResponseWriter, request *http.Request) { -// requestNum++ -// switch requestNum { -// case 1: -// // Expect the full request, but return one that terminates in the middle of the path -// expectedUri := "/ipfs/bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi/hamtDir/" -// if request.URL.Path != expectedUri { -// panic(fmt.Errorf("expected URI %s, got %s", expectedUri, request.RequestURI)) -// } - -// if err := sendBlocks(ctx, dirWithMultiblockHAMTandFiles, writer, []string{ -// "bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi", // root dir -// }); err != nil { -// panic(err) -// } -// case 2: -// // Expect the full request, but return one that terminates in the middle of the HAMT -// // Note: this is an implementation detail, it could be in the future that we request less data (e.g. partial path) -// expectedUri := "/ipfs/bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi/hamtDir/" -// if request.URL.Path != expectedUri { -// panic(fmt.Errorf("expected URI %s, got %s", expectedUri, request.RequestURI)) -// } - -// if err := sendBlocks(ctx, dirWithMultiblockHAMTandFiles, writer, []string{ -// "bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi", // root dir -// "bafybeignui4g7l6cvqyy4t6vnbl2fjtego4ejmpcia77jhhwnksmm4bejm", // hamt root -// "bafybeiccgo7euew77gkqkhezn3pozfrciiibqz2u3spdqmgjvd5wqskipm", // inner hamt nodes start here -// }); err != nil { -// panic(err) -// } -// case 3: -// // Expect a request for a non-existent index.html file -// // Note: this is an implementation detail related to the directory request above -// // Note: the order of cases 3 and 4 here are implementation specific as well -// expectedUri := "/ipfs/bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi/hamtDir/index.html" -// if request.URL.Path != expectedUri { -// panic(fmt.Errorf("expected URI %s, got %s", expectedUri, request.RequestURI)) -// } - -// if err := sendBlocks(ctx, dirWithMultiblockHAMTandFiles, writer, []string{ -// "bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi", // root dir -// "bafybeignui4g7l6cvqyy4t6vnbl2fjtego4ejmpcia77jhhwnksmm4bejm", // hamt root -// "bafybeiccgo7euew77gkqkhezn3pozfrciiibqz2u3spdqmgjvd5wqskipm", // inner hamt nodes start here -// }); err != nil { -// panic(err) -// } -// case 4: -// // Expect a request for the full HAMT and return it -// // Note: this is an implementation detail, it could be in the future that we request more or less data -// // (e.g. ask for the full path, ask for index.html first, make a spec change to allow asking for index.html with a fallback to the directory, etc.) -// expectedUri := "/ipfs/bafybeignui4g7l6cvqyy4t6vnbl2fjtego4ejmpcia77jhhwnksmm4bejm" -// if request.URL.Path != expectedUri { -// panic(fmt.Errorf("expected URI %s, got %s", expectedUri, request.RequestURI)) -// } - -// if err := sendBlocks(ctx, dirWithMultiblockHAMTandFiles, writer, []string{ -// "bafybeignui4g7l6cvqyy4t6vnbl2fjtego4ejmpcia77jhhwnksmm4bejm", // hamt root -// "bafybeiccgo7euew77gkqkhezn3pozfrciiibqz2u3spdqmgjvd5wqskipm", // inner hamt nodes start here -// "bafybeihjydob4eq5j4m43whjgf5cgftthc42kjno3g24sa3wcw7vonbmfy", -// }); err != nil { -// panic(err) -// } - -// default: -// t.Fatal("unsupported request number") -// } -// })) -// defer s.Close() - -// bs, err := newRemoteCarFetcher([]string{s.URL}) -// require.NoError(t, err) -// backend, err := NewGraphGatewayBackend(&retryFetcher{inner: bs.(CarFetcher), allowedRetries: 3, retriesRemaining: 3}) -// if err != nil { -// t.Fatal(err) -// } - -// trustedGatewayServer := httptest.NewServer(NewHandler(Config{DeserializedResponses: true}, backend)) -// defer trustedGatewayServer.Close() - -// resp, err := http.Get(trustedGatewayServer.URL + "/ipfs/bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi/hamtDir/") -// if err != nil { -// t.Fatal(err) -// } - -// data, err := io.ReadAll(resp.Body) -// if err != nil { -// t.Fatal(err) -// } - -// if strings.Count(string(data), ">exampleD-hamt-collide-exampleB-seed-364<") == 1 && -// strings.Count(string(data), ">exampleC-hamt-collide-exampleA-seed-52<") == 1 && -// strings.Count(string(data), ">exampleA<") == 1 && -// strings.Count(string(data), ">exampleB<") == 1 { -// return -// } -// t.Fatal("directory does not contain the expected links") -// } +func TestGetHAMTDirectory(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + requestNum := 0 + s := httptest.NewServer(http.HandlerFunc(func(writer http.ResponseWriter, request *http.Request) { + requestNum++ + fmt.Println(requestNum, request.URL.Path) + switch requestNum { + case 1: + // Expect the full request, but return one that terminates in the middle of the path + expectedUri := "/ipfs/bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi/hamtDir/" + if request.URL.Path != expectedUri { + panic(fmt.Errorf("expected URI %s, got %s", expectedUri, request.RequestURI)) + } + + if err := sendBlocks(ctx, dirWithMultiblockHAMTandFiles, writer, []string{ + "bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi", // root dir + }); err != nil { + panic(err) + } + case 2: + // Expect the full request, but return one that terminates in the middle of the HAMT + // Note: this is an implementation detail, it could be in the future that we request less data (e.g. partial path) + expectedUri := "/ipfs/bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi/hamtDir/" + if request.URL.Path != expectedUri { + panic(fmt.Errorf("expected URI %s, got %s", expectedUri, request.RequestURI)) + } + + if err := sendBlocks(ctx, dirWithMultiblockHAMTandFiles, writer, []string{ + "bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi", // root dir + "bafybeignui4g7l6cvqyy4t6vnbl2fjtego4ejmpcia77jhhwnksmm4bejm", // hamt root + "bafybeiccgo7euew77gkqkhezn3pozfrciiibqz2u3spdqmgjvd5wqskipm", // inner hamt nodes start here + }); err != nil { + panic(err) + } + case 3: + // Expect a request for a non-existent index.html file + // Note: this is an implementation detail related to the directory request above + // Note: the order of cases 3 and 4 here are implementation specific as well + expectedUri := "/ipfs/bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi/hamtDir/index.html" + if request.URL.Path != expectedUri { + panic(fmt.Errorf("expected URI %s, got %s", expectedUri, request.RequestURI)) + } + + if err := sendBlocks(ctx, dirWithMultiblockHAMTandFiles, writer, []string{ + "bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi", // root dir + "bafybeignui4g7l6cvqyy4t6vnbl2fjtego4ejmpcia77jhhwnksmm4bejm", // hamt root + "bafybeiccgo7euew77gkqkhezn3pozfrciiibqz2u3spdqmgjvd5wqskipm", // inner hamt nodes start here + }); err != nil { + panic(err) + } + case 4: + // Expect a request for the full HAMT and return it + // Note: this is an implementation detail, it could be in the future that we request more or less data + // (e.g. ask for the full path, ask for index.html first, make a spec change to allow asking for index.html with a fallback to the directory, etc.) + expectedUri := "/ipfs/bafybeignui4g7l6cvqyy4t6vnbl2fjtego4ejmpcia77jhhwnksmm4bejm" + if request.URL.Path != expectedUri { + panic(fmt.Errorf("expected URI %s, got %s", expectedUri, request.RequestURI)) + } + + if err := sendBlocks(ctx, dirWithMultiblockHAMTandFiles, writer, []string{ + "bafybeignui4g7l6cvqyy4t6vnbl2fjtego4ejmpcia77jhhwnksmm4bejm", // hamt root + "bafybeiccgo7euew77gkqkhezn3pozfrciiibqz2u3spdqmgjvd5wqskipm", // inner hamt nodes start here + "bafybeihjydob4eq5j4m43whjgf5cgftthc42kjno3g24sa3wcw7vonbmfy", + }); err != nil { + panic(err) + } + + default: + t.Fatal("unsupported request number") + } + })) + defer s.Close() + + bs, err := newRemoteCarFetcher([]string{s.URL}) + require.NoError(t, err) + backend, err := NewGraphGatewayBackend(&retryFetcher{inner: bs.(CarFetcher), allowedRetries: 3, retriesRemaining: 3}) + if err != nil { + t.Fatal(err) + } + + trustedGatewayServer := httptest.NewServer(NewHandler(Config{DeserializedResponses: true}, backend)) + defer trustedGatewayServer.Close() + + resp, err := http.Get(trustedGatewayServer.URL + "/ipfs/bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi/hamtDir/") + if err != nil { + t.Fatal(err) + } + + data, err := io.ReadAll(resp.Body) + if err != nil { + t.Fatal(err) + } + + if strings.Count(string(data), ">exampleD-hamt-collide-exampleB-seed-364<") == 1 && + strings.Count(string(data), ">exampleC-hamt-collide-exampleA-seed-52<") == 1 && + strings.Count(string(data), ">exampleA<") == 1 && + strings.Count(string(data), ">exampleB<") == 1 { + return + } + t.Fatal("directory does not contain the expected links") +} func TestGetCAR(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) diff --git a/gateway/backend_graph_traversal.go b/gateway/backend_graph_traversal.go index 8f3d46232..ce47d2a92 100644 --- a/gateway/backend_graph_traversal.go +++ b/gateway/backend_graph_traversal.go @@ -9,23 +9,15 @@ import ( "sync" "time" - bsfetcher "github.com/ipfs/boxo/fetcher/impl/blockservice" "github.com/ipfs/boxo/verifcid" blocks "github.com/ipfs/go-block-format" "github.com/ipfs/go-cid" "github.com/ipfs/go-unixfsnode" - "github.com/ipfs/go-unixfsnode/data" "github.com/ipld/go-car" - dagpb "github.com/ipld/go-codec-dagpb" "github.com/ipld/go-ipld-prime" "github.com/ipld/go-ipld-prime/datamodel" "github.com/ipld/go-ipld-prime/linking" cidlink "github.com/ipld/go-ipld-prime/linking/cid" - "github.com/ipld/go-ipld-prime/node/basicnode" - "github.com/ipld/go-ipld-prime/schema" - "github.com/ipld/go-ipld-prime/traversal" - "github.com/ipld/go-ipld-prime/traversal/selector" - selectorparse "github.com/ipld/go-ipld-prime/traversal/selector/parse" "github.com/multiformats/go-multihash" ) @@ -155,169 +147,3 @@ func getLinksystem(fn getBlock) *ipld.LinkSystem { unixfsnode.AddUnixFSReificationToLinkSystem(&lsys) return &lsys } - -// walkGatewaySimpleSelector2 walks the subgraph described by the path and terminal element parameters -func walkGatewaySimpleSelector2(ctx context.Context, terminalBlk blocks.Block, dagScope DagScope, entityRange *DagByteRange, lsys *ipld.LinkSystem) error { - lctx := ipld.LinkContext{Ctx: ctx} - var err error - - // If the scope is the block, we only need the root block of the last element of the path, which we have. - if dagScope == DagScopeBlock { - return nil - } - - // decode the terminal block into a node - pc := dagpb.AddSupportToChooser(func(lnk ipld.Link, lnkCtx ipld.LinkContext) (ipld.NodePrototype, error) { - if tlnkNd, ok := lnkCtx.LinkNode.(schema.TypedLinkNode); ok { - return tlnkNd.LinkTargetNodePrototype(), nil - } - return basicnode.Prototype.Any, nil - }) - - pathTerminalCidLink := cidlink.Link{Cid: terminalBlk.Cid()} - np, err := pc(pathTerminalCidLink, lctx) - if err != nil { - return err - } - - decoder, err := lsys.DecoderChooser(pathTerminalCidLink) - if err != nil { - return err - } - nb := np.NewBuilder() - blockData := terminalBlk.RawData() - if err := decoder(nb, bytes.NewReader(blockData)); err != nil { - return err - } - lastCidNode := nb.Build() - - // TODO: Evaluate: - // Does it matter that we're ignoring the "remainder" portion of the traversal in GetCAR? - // Does it matter that we're using a linksystem with the UnixFS reifier for dagscope=all? - - // If we're asking for everything then give it - if dagScope == DagScopeAll { - sel, err := selector.ParseSelector(selectorparse.CommonSelector_ExploreAllRecursively) - if err != nil { - return err - } - - progress := traversal.Progress{ - Cfg: &traversal.Config{ - Ctx: ctx, - LinkSystem: *lsys, - LinkTargetNodePrototypeChooser: bsfetcher.DefaultPrototypeChooser, - LinkVisitOnlyOnce: false, // Despite being safe for the "all" selector we do this walk anyway since this is how we will be receiving the blocks - }, - } - - if err := progress.WalkMatching(lastCidNode, sel, func(progress traversal.Progress, node datamodel.Node) error { - return nil - }); err != nil { - return err - } - return nil - } - - // From now on, dag-scope=entity! - // Since we need more of the graph load it to figure out what we have - // This includes determining if the terminal node is UnixFS or not - if pbn, ok := lastCidNode.(dagpb.PBNode); !ok { - // If it's not valid dag-pb then we're done - return nil - } else if !pbn.FieldData().Exists() { - // If it's not valid UnixFS then we're done - return nil - } else if unixfsFieldData, decodeErr := data.DecodeUnixFSData(pbn.Data.Must().Bytes()); decodeErr != nil { - // If it's not valid dag-pb and UnixFS then we're done - return nil - } else { - switch unixfsFieldData.FieldDataType().Int() { - case data.Data_Directory, data.Data_Symlink: - // These types are non-recursive so we're done - return nil - case data.Data_Raw, data.Data_Metadata: - // TODO: for now, we decided to return nil here. The different implementations are inconsistent - // and UnixFS is not properly specified: https://github.com/ipfs/specs/issues/316. - // - Is Data_Raw different from Data_File? - // - Data_Metadata is handled differently in boxo/ipld/unixfs and go-unixfsnode. - return nil - case data.Data_HAMTShard: - // Return all elements in the map - _, err := lsys.KnownReifiers["unixfs-preload"](lctx, lastCidNode, lsys) - if err != nil { - return err - } - return nil - case data.Data_File: - nd, err := unixfsnode.Reify(lctx, lastCidNode, lsys) - if err != nil { - return err - } - - fnd, ok := nd.(datamodel.LargeBytesNode) - if !ok { - return fmt.Errorf("could not process file since it did not present as large bytes") - } - f, err := fnd.AsLargeBytes() - if err != nil { - return err - } - - // Get the entity range. If it's empty, assume the defaults (whole file). - effectiveRange := entityRange - if effectiveRange == nil { - effectiveRange = &DagByteRange{ - From: 0, - } - } - - from := effectiveRange.From - - // If we're starting to read based on the end of the file, find out where that is. - var fileLength int64 - foundFileLength := false - if effectiveRange.From < 0 { - fileLength, err = f.Seek(0, io.SeekEnd) - if err != nil { - return err - } - from = fileLength + effectiveRange.From - foundFileLength = true - } - - // If we're reading until the end of the file then do it - if effectiveRange.To == nil { - if _, err := f.Seek(from, io.SeekStart); err != nil { - return err - } - _, err = io.Copy(io.Discard, f) - return err - } - - to := *effectiveRange.To - if (*effectiveRange.To) < 0 && !foundFileLength { - fileLength, err = f.Seek(0, io.SeekEnd) - if err != nil { - return err - } - to = fileLength + *effectiveRange.To - foundFileLength = true - } - - numToRead := 1 + to - from - if numToRead < 0 { - return fmt.Errorf("tried to read less than zero bytes") - } - - if _, err := f.Seek(from, io.SeekStart); err != nil { - return err - } - _, err = io.CopyN(io.Discard, f, numToRead) - return err - default: - // Not a supported type, so we're done - return nil - } - } -} diff --git a/go.mod b/go.mod index 353b2b945..a6c113337 100644 --- a/go.mod +++ b/go.mod @@ -163,7 +163,6 @@ require ( go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.21.0 // indirect go.opentelemetry.io/otel/metric v1.24.0 // indirect go.opentelemetry.io/proto/otlp v1.0.0 // indirect - go.uber.org/atomic v1.11.0 // indirect go.uber.org/dig v1.17.1 // indirect go.uber.org/fx v1.20.1 // indirect go.uber.org/mock v0.4.0 // indirect From fe99a2c418f6e44a86bf6d2328c988c056922e75 Mon Sep 17 00:00:00 2001 From: Henrique Dias Date: Tue, 26 Mar 2024 12:42:39 +0100 Subject: [PATCH 04/19] examples: graph gateway --- examples/README.md | 1 + examples/gateway/graph/README.md | 45 ++++++++++++++++++++++++++++ examples/gateway/graph/main.go | 50 ++++++++++++++++++++++++++++++++ gateway/backend_graph_test.go | 16 +++++----- gateway/backend_graph_utils.go | 2 +- 5 files changed, 105 insertions(+), 9 deletions(-) create mode 100644 examples/gateway/graph/README.md create mode 100644 examples/gateway/graph/main.go diff --git a/examples/README.md b/examples/README.md index fa5408732..30f6e6157 100644 --- a/examples/README.md +++ b/examples/README.md @@ -29,4 +29,5 @@ Once you have your example finished, do not forget to run `go mod tidy` and addi - [Fetching a UnixFS file by CID](./unixfs-file-cid) - [Gateway backed by a CAR file](./gateway/car) - [Gateway backed by a remote blockstore and IPNS resolver](./gateway/proxy) +- [Gateway backed by a remote Trustless Gateway](./gateway/graph) - [Delegated Routing V1 Command Line Client](./routing/delegated-routing-client/) diff --git a/examples/gateway/graph/README.md b/examples/gateway/graph/README.md new file mode 100644 index 000000000..be34b5b0c --- /dev/null +++ b/examples/gateway/graph/README.md @@ -0,0 +1,45 @@ +# Gateway as Proxy for Trustless Remote Backend + +This is an example of building a Gateway that uses `application/vnd.ipld.car` +responses from another gateway acting as a remote Trustless Gateway and IPNS resolver. + +## Build + +```bash +> go build -o graph-proxy +``` + +## Usage + +First, you need a compliant gateway that supports both [CAR requests](https://www.iana.org/assignments/media-types/application/vnd.ipld.car) and IPNS Record response +types. Once you have it, run the proxy gateway with its address as the host parameter: + +``` +./graph-proxy -g https://ipfs.io -p 8040 +``` + +### Subdomain gateway + +Now you can access the gateway in [localhost:8040](http://localhost:8040). It will +behave like a regular [Subdomain IPFS Gateway](https://docs.ipfs.tech/how-to/address-ipfs-on-web/#subdomain-gateway), +except for the fact that it runs no libp2p, and has no local blockstore. +All contents are provided by a remote gateway and fetched as CAR files and IPNS Records, and verified locally. + +### Path gateway + +If you don't need Origin isolation and only care about hosting flat files, +a plain [path gateway](https://docs.ipfs.tech/how-to/address-ipfs-on-web/#path-gateway) at [127.0.0.1:8040](http://127.0.0.1:8040) +may suffice. + +### DNSLink gateway + +Gateway supports hosting of [DNSLink](https://dnslink.dev/) websites. All you need is to pass `Host` header with FQDN that has DNSLink set up: + +```console +$ curl -sH 'Host: en.wikipedia-on-ipfs.org' 'http://127.0.0.1:8080/wiki/' | head -3 + + + Wikipedia, the free encyclopedia +``` + +Put it behind a reverse proxy terminating TLS (like Nginx) and voila! diff --git a/examples/gateway/graph/main.go b/examples/gateway/graph/main.go new file mode 100644 index 000000000..9c2c04c3b --- /dev/null +++ b/examples/gateway/graph/main.go @@ -0,0 +1,50 @@ +package main + +import ( + "context" + "flag" + "log" + "net/http" + "strconv" + + "github.com/ipfs/boxo/examples/gateway/common" + "github.com/ipfs/boxo/gateway" +) + +func main() { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + gatewayUrlPtr := flag.String("g", "", "gateway to proxy to") + port := flag.Int("p", 8040, "port to run this gateway from") + flag.Parse() + + // Setups up tracing. This is optional and only required if the implementer + // wants to be able to enable tracing. + tp, err := common.SetupTracing(ctx, "Graph Gateway Example") + if err != nil { + log.Fatal(err) + } + defer (func() { _ = tp.Shutdown(ctx) })() + + carFetcher, err := gateway.NewRemoteCarFetcher([]string{*gatewayUrlPtr}, nil) + if err != nil { + log.Fatal(err) + } + + // Creates the gateway with the remote graph backend. + backend, err := gateway.NewGraphGatewayBackend(carFetcher) + if err != nil { + log.Fatal(err) + } + + handler := common.NewHandler(backend) + + log.Printf("Listening on http://localhost:%d", *port) + log.Printf("Try loading an image: http://localhost:%d/ipfs/bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi", *port) + log.Printf("Try browsing Wikipedia snapshot: http://localhost:%d/ipfs/bafybeiaysi4s6lnjev27ln5icwm6tueaw2vdykrtjkwiphwekaywqhcjze", *port) + log.Printf("Metrics available at http://127.0.0.1:%d/debug/metrics/prometheus", *port) + if err := http.ListenAndServe(":"+strconv.Itoa(*port), handler); err != nil { + log.Fatal(err) + } +} diff --git a/gateway/backend_graph_test.go b/gateway/backend_graph_test.go index 89dc98603..8d69c3ca0 100644 --- a/gateway/backend_graph_test.go +++ b/gateway/backend_graph_test.go @@ -153,7 +153,7 @@ func TestTar(t *testing.T) { })) defer s.Close() - bs, err := newRemoteCarFetcher([]string{s.URL}) + bs, err := NewRemoteCarFetcher([]string{s.URL}) require.NoError(t, err) backend, err := NewGraphGatewayBackend(&retryFetcher{inner: bs.(CarFetcher), allowedRetries: 3, retriesRemaining: 3}) if err != nil { @@ -343,7 +343,7 @@ func TestTarAtEndOfPath(t *testing.T) { })) defer s.Close() - bs, err := newRemoteCarFetcher([]string{s.URL}) + bs, err := NewRemoteCarFetcher([]string{s.URL}) require.NoError(t, err) backend, err := NewGraphGatewayBackend(&retryFetcher{inner: bs.(CarFetcher), allowedRetries: 3, retriesRemaining: 3}) if err != nil { @@ -528,7 +528,7 @@ func TestGetFile(t *testing.T) { })) defer s.Close() - bs, err := newRemoteCarFetcher([]string{s.URL}) + bs, err := NewRemoteCarFetcher([]string{s.URL}) require.NoError(t, err) backend, err := NewGraphGatewayBackend(&retryFetcher{inner: bs.(CarFetcher), allowedRetries: 3, retriesRemaining: 3}) if err != nil { @@ -646,7 +646,7 @@ func TestGetFileRangeRequest(t *testing.T) { })) defer s.Close() - bs, err := newRemoteCarFetcher([]string{s.URL}) + bs, err := NewRemoteCarFetcher([]string{s.URL}) require.NoError(t, err) backend, err := NewGraphGatewayBackend(&retryFetcher{inner: bs.(CarFetcher), allowedRetries: 3, retriesRemaining: 3}) if err != nil { @@ -776,7 +776,7 @@ func TestGetFileWithBadBlockReturned(t *testing.T) { })) defer s.Close() - bs, err := newRemoteCarFetcher([]string{s.URL}) + bs, err := NewRemoteCarFetcher([]string{s.URL}) require.NoError(t, err) backend, err := NewGraphGatewayBackend(&retryFetcher{inner: bs.(CarFetcher), allowedRetries: 3, retriesRemaining: 3}) if err != nil { @@ -896,7 +896,7 @@ func TestGetHAMTDirectory(t *testing.T) { })) defer s.Close() - bs, err := newRemoteCarFetcher([]string{s.URL}) + bs, err := NewRemoteCarFetcher([]string{s.URL}) require.NoError(t, err) backend, err := NewGraphGatewayBackend(&retryFetcher{inner: bs.(CarFetcher), allowedRetries: 3, retriesRemaining: 3}) if err != nil { @@ -1008,7 +1008,7 @@ func TestGetCAR(t *testing.T) { })) defer s.Close() - bs, err := newRemoteCarFetcher([]string{s.URL}) + bs, err := NewRemoteCarFetcher([]string{s.URL}) require.NoError(t, err) backend, err := NewGraphGatewayBackend(&retryFetcher{inner: bs.(CarFetcher), allowedRetries: 3, retriesRemaining: 3}) if err != nil { @@ -1112,7 +1112,7 @@ func TestPassthroughErrors(t *testing.T) { })) defer s.Close() - bs, err := newRemoteCarFetcher([]string{s.URL}) + bs, err := NewRemoteCarFetcher([]string{s.URL}) require.NoError(t, err) p, err := path.NewPath("/ipfs/bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi/hamtDir/exampleA") diff --git a/gateway/backend_graph_utils.go b/gateway/backend_graph_utils.go index aac8df62b..3f86e40e6 100644 --- a/gateway/backend_graph_utils.go +++ b/gateway/backend_graph_utils.go @@ -81,7 +81,7 @@ type remoteCarFetcher struct { rand *rand.Rand } -func newRemoteCarFetcher(gatewayURL []string) (CarFetcher, error) { +func NewRemoteCarFetcher(gatewayURL []string) (CarFetcher, error) { if len(gatewayURL) == 0 { return nil, errors.New("missing gateway URLs to which to proxy") } From 29a122dec90955bccccc598e6263a4f5101ad22b Mon Sep 17 00:00:00 2001 From: Henrique Dias Date: Tue, 26 Mar 2024 12:44:40 +0100 Subject: [PATCH 05/19] ci: conformance against graph gateway --- .github/workflows/gateway-conformance.yml | 66 ++++++++++++++++++++++- 1 file changed, 65 insertions(+), 1 deletion(-) diff --git a/.github/workflows/gateway-conformance.yml b/.github/workflows/gateway-conformance.yml index c9c3eb072..f3edc9bb7 100644 --- a/.github/workflows/gateway-conformance.yml +++ b/.github/workflows/gateway-conformance.yml @@ -11,8 +11,9 @@ concurrency: cancel-in-progress: true jobs: - gateway-conformance: + gateway-conformance-car: runs-on: ubuntu-latest + name: Gateway Conformance (CAR Gateway) steps: # 1. Download the gateway-conformance fixtures - name: Download gateway-conformance fixtures @@ -67,3 +68,66 @@ jobs: with: name: gateway-conformance.json path: output.json + + gateway-conformance-graph: + runs-on: ubuntu-latest + name: Gateway Conformance (Graph Gateway) + steps: + # 1. Download the gateway-conformance fixtures + - name: Download gateway-conformance fixtures + uses: ipfs/gateway-conformance/.github/actions/extract-fixtures@v0.5 + with: + output: fixtures + merged: true + + # 2. Build the graph-gateway + - name: Setup Go + uses: actions/setup-go@v4 + with: + go-version: 1.21.x + - name: Checkout boxo + uses: actions/checkout@v4 + with: + path: boxo + - name: Build car-gateway + run: go build -o car-gateway + working-directory: boxo/examples/gateway/car + - name: Build graph-gateway + run: go build -o graph-gateway + working-directory: boxo/examples/gateway/graph + + # 3. Start the car-gateway and the graph-gateway + - name: Start car-gateway + run: boxo/examples/gateway/car/car-gateway -c fixtures/fixtures.car -p 8030 & + - name: Start graph-gateway + run: boxo/examples/gateway/graph/graph-gateway -g http://127.0.0.1:8030 -p 8040 & + + # 4. Run the gateway-conformance tests + - name: Run gateway-conformance tests + uses: ipfs/gateway-conformance/.github/actions/test@v0.5 + with: + gateway-url: http://127.0.0.1:8040 + json: output.json + xml: output.xml + html: output.html + markdown: output.md + subdomain-url: http://example.net + specs: -trustless-ipns-gateway,-path-ipns-gateway,-subdomain-ipns-gateway,-dnslink-gateway + args: -skip 'TestGatewayCar/GET_response_for_application/vnd.ipld.car/Header_Content-Length' + + # 5. Upload the results + - name: Upload MD summary + if: failure() || success() + run: cat output.md >> $GITHUB_STEP_SUMMARY + - name: Upload HTML report + if: failure() || success() + uses: actions/upload-artifact@v3 + with: + name: gateway-conformance.html + path: output.html + - name: Upload JSON report + if: failure() || success() + uses: actions/upload-artifact@v3 + with: + name: gateway-conformance.json + path: output.json From 0d2d80a095f89e507a067b158e6fd50e4f44a642 Mon Sep 17 00:00:00 2001 From: Henrique Dias Date: Tue, 26 Mar 2024 13:02:23 +0100 Subject: [PATCH 06/19] cleanups and fix example --- examples/gateway/graph/main.go | 4 +- gateway/backend.go | 45 +++++ gateway/backend_blocks.go | 50 +---- gateway/backend_graph.go | 133 +++++------- gateway/backend_graph_test.go | 302 ++++++++-------------------- gateway/backend_graph_traversal.go | 16 +- gateway/backend_graph_utils.go | 5 +- gateway/backend_graph_utils_test.go | 4 +- gateway/errors.go | 26 +++ gateway/remote_blocks_backend.go | 2 +- 10 files changed, 215 insertions(+), 372 deletions(-) diff --git a/examples/gateway/graph/main.go b/examples/gateway/graph/main.go index 9c2c04c3b..ec7a3582b 100644 --- a/examples/gateway/graph/main.go +++ b/examples/gateway/graph/main.go @@ -27,13 +27,13 @@ func main() { } defer (func() { _ = tp.Shutdown(ctx) })() - carFetcher, err := gateway.NewRemoteCarFetcher([]string{*gatewayUrlPtr}, nil) + carFetcher, err := gateway.NewRemoteCarFetcher([]string{*gatewayUrlPtr}) if err != nil { log.Fatal(err) } // Creates the gateway with the remote graph backend. - backend, err := gateway.NewGraphGatewayBackend(carFetcher) + backend, err := gateway.NewGraphBackend(carFetcher) if err != nil { log.Fatal(err) } diff --git a/gateway/backend.go b/gateway/backend.go index 280791c0d..c4db09798 100644 --- a/gateway/backend.go +++ b/gateway/backend.go @@ -10,11 +10,56 @@ import ( "github.com/ipfs/boxo/ipns" "github.com/ipfs/boxo/namesys" "github.com/ipfs/boxo/path" + "github.com/ipfs/boxo/path/resolver" "github.com/ipfs/go-cid" routinghelpers "github.com/libp2p/go-libp2p-routing-helpers" "github.com/libp2p/go-libp2p/core/routing" + "github.com/prometheus/client_golang/prometheus" ) +type backendOptions struct { + ns namesys.NameSystem + vs routing.ValueStore + r resolver.Resolver + promRegistry prometheus.Registerer +} + +// WithNameSystem sets the name system to use with the different backends. If not set +// it will use the default DNSLink resolver generated by [NewDNSResolver] along +// with any configured [routing.ValueStore]. +func WithNameSystem(ns namesys.NameSystem) BackendOption { + return func(opts *backendOptions) error { + opts.ns = ns + return nil + } +} + +// WithValueStore sets the [routing.ValueStore] to use with the different backends. +func WithValueStore(vs routing.ValueStore) BackendOption { + return func(opts *backendOptions) error { + opts.vs = vs + return nil + } +} + +// WithResolver sets the [resolver.Resolver] to use with the different backends. +func WithResolver(r resolver.Resolver) BackendOption { + return func(opts *backendOptions) error { + opts.r = r + return nil + } +} + +// WithPrometheusRegistry sets the registry to use with [GraphBackend]. +func WithPrometheusRegistry(reg prometheus.Registerer) BackendOption { + return func(opts *backendOptions) error { + opts.promRegistry = reg + return nil + } +} + +type BackendOption func(options *backendOptions) error + // baseBackend contains some common backend functionalities that are shared by // different backend implementations. type baseBackend struct { diff --git a/gateway/backend_blocks.go b/gateway/backend_blocks.go index 94a2551b9..8eafe06af 100644 --- a/gateway/backend_blocks.go +++ b/gateway/backend_blocks.go @@ -16,7 +16,6 @@ import ( "github.com/ipfs/boxo/ipld/merkledag" ufile "github.com/ipfs/boxo/ipld/unixfs/file" uio "github.com/ipfs/boxo/ipld/unixfs/io" - "github.com/ipfs/boxo/namesys" "github.com/ipfs/boxo/path" "github.com/ipfs/boxo/path/resolver" blocks "github.com/ipfs/go-block-format" @@ -35,9 +34,7 @@ import ( "github.com/ipld/go-ipld-prime/traversal" "github.com/ipld/go-ipld-prime/traversal/selector" selectorparse "github.com/ipld/go-ipld-prime/traversal/selector/parse" - "github.com/libp2p/go-libp2p/core/routing" mc "github.com/multiformats/go-multicodec" - "github.com/prometheus/client_golang/prometheus" // Ensure basic codecs are registered. _ "github.com/ipld/go-ipld-prime/codec/cbor" @@ -57,51 +54,8 @@ type BlocksBackend struct { var _ IPFSBackend = (*BlocksBackend)(nil) -type blocksBackendOptions struct { - ns namesys.NameSystem - vs routing.ValueStore - r resolver.Resolver - promRegistry prometheus.Registerer -} - -// WithNameSystem sets the name system to use with the [BlocksBackend]. If not set -// it will use the default DNSLink resolver generated by [NewDNSResolver] along -// with any configured [routing.ValueStore]. -func WithNameSystem(ns namesys.NameSystem) BlocksBackendOption { - return func(opts *blocksBackendOptions) error { - opts.ns = ns - return nil - } -} - -// WithValueStore sets the [routing.ValueStore] to use with the [BlocksBackend]. -func WithValueStore(vs routing.ValueStore) BlocksBackendOption { - return func(opts *blocksBackendOptions) error { - opts.vs = vs - return nil - } -} - -// WithResolver sets the [resolver.Resolver] to use with the [BlocksBackend]. -func WithResolver(r resolver.Resolver) BlocksBackendOption { - return func(opts *blocksBackendOptions) error { - opts.r = r - return nil - } -} - -// WithPrometheusRegistry sets the registry to use for metrics collection. -func WithPrometheusRegistry(reg prometheus.Registerer) BlocksBackendOption { - return func(opts *blocksBackendOptions) error { - opts.promRegistry = reg - return nil - } -} - -type BlocksBackendOption func(options *blocksBackendOptions) error - -func NewBlocksBackend(blockService blockservice.BlockService, opts ...BlocksBackendOption) (*BlocksBackend, error) { - var compiledOptions blocksBackendOptions +func NewBlocksBackend(blockService blockservice.BlockService, opts ...BackendOption) (*BlocksBackend, error) { + var compiledOptions backendOptions for _, o := range opts { if err := o(&compiledOptions); err != nil { return nil, err diff --git a/gateway/backend_graph.go b/gateway/backend_graph.go index b230bb20a..30da4b52e 100644 --- a/gateway/backend_graph.go +++ b/gateway/backend_graph.go @@ -15,7 +15,6 @@ import ( "github.com/ipfs/boxo/ipld/merkledag" "github.com/ipfs/boxo/ipld/unixfs" "github.com/ipfs/boxo/path" - ipfspath "github.com/ipfs/boxo/path" "github.com/ipfs/boxo/path/resolver" blocks "github.com/ipfs/go-block-format" "github.com/ipfs/go-cid" @@ -39,45 +38,24 @@ import ( const GetBlockTimeout = time.Second * 60 -// type DataCallback = func(resource string, reader io.Reader) error -// TODO: Don't use a caboose type, perhaps ask them to use a type alias instead of a type -// type DataCallback = caboose.DataCallback type DataCallback func(resource string, reader io.Reader) error -// TODO: Don't use a caboose type -// type ErrPartialResponse = caboose.ErrPartialResponse - -// ErrPartialResponse can be returned from a DataCallback to indicate that some of the requested resource -// was successfully fetched, and that instead of retrying the full resource, that there are -// one or more more specific resources that should be fetched (via StillNeed) to complete the request. -type ErrPartialResponse struct { - error - StillNeed []string -} - -func (epr ErrPartialResponse) Error() string { - if epr.error != nil { - return fmt.Sprintf("partial response: %s", epr.error.Error()) - } - return "caboose received a partial response" -} - var ErrFetcherUnexpectedEOF = fmt.Errorf("failed to fetch IPLD data") type CarFetcher interface { Fetch(ctx context.Context, path string, cb DataCallback) error } -type GraphGateway struct { +type GraphBackend struct { baseBackend fetcher CarFetcher pc traversal.LinkTargetNodePrototypeChooser - metrics *GraphGatewayMetrics + metrics *GraphBackendMetrics } -type GraphGatewayMetrics struct { +type GraphBackendMetrics struct { contextAlreadyCancelledMetric prometheus.Counter carFetchAttemptMetric prometheus.Counter carBlocksFetchedMetric prometheus.Counter @@ -87,8 +65,8 @@ type GraphGatewayMetrics struct { bytesRangeSizeMetric prometheus.Histogram } -func NewGraphGatewayBackend(f CarFetcher, opts ...BlocksBackendOption) (*GraphGateway, error) { - var compiledOptions blocksBackendOptions +func NewGraphBackend(f CarFetcher, opts ...BackendOption) (*GraphBackend, error) { + var compiledOptions backendOptions for _, o := range opts { if err := o(&compiledOptions); err != nil { return nil, err @@ -107,10 +85,10 @@ func NewGraphGatewayBackend(f CarFetcher, opts ...BlocksBackendOption) (*GraphGa promReg = compiledOptions.promRegistry } - return &GraphGateway{ + return &GraphBackend{ baseBackend: baseBackend, fetcher: f, - metrics: registerGraphGatewayMetrics(promReg), + metrics: registerGraphBackendMetrics(promReg), pc: dagpb.AddSupportToChooser(func(lnk ipld.Link, lnkCtx ipld.LinkContext) (ipld.NodePrototype, error) { if tlnkNd, ok := lnkCtx.LinkNode.(schema.TypedLinkNode); ok { return tlnkNd.LinkTargetNodePrototype(), nil @@ -120,7 +98,7 @@ func NewGraphGatewayBackend(f CarFetcher, opts ...BlocksBackendOption) (*GraphGa }, nil } -func registerGraphGatewayMetrics(registerer prometheus.Registerer) *GraphGatewayMetrics { +func registerGraphBackendMetrics(promReg prometheus.Registerer) *GraphBackendMetrics { // How many CAR Fetch attempts we had? Need this to calculate % of various graph request types. // We only count attempts here, because success/failure with/without retries are provided by caboose: // - ipfs_caboose_fetch_duration_car_success_count @@ -133,7 +111,7 @@ func registerGraphGatewayMetrics(registerer prometheus.Registerer) *GraphGateway Name: "car_fetch_attempts", Help: "The number of times a CAR fetch was attempted by IPFSBackend.", }) - registerer.MustRegister(carFetchAttemptMetric) + promReg.MustRegister(carFetchAttemptMetric) contextAlreadyCancelledMetric := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "ipfs", @@ -141,7 +119,7 @@ func registerGraphGatewayMetrics(registerer prometheus.Registerer) *GraphGateway Name: "car_fetch_context_already_cancelled", Help: "The number of times context is already cancelled when a CAR fetch was attempted by IPFSBackend.", }) - registerer.MustRegister(contextAlreadyCancelledMetric) + promReg.MustRegister(contextAlreadyCancelledMetric) // How many blocks were read via CARs? // Need this as a baseline to reason about error ratio vs raw_block_recovery_attempts. @@ -151,7 +129,7 @@ func registerGraphGatewayMetrics(registerer prometheus.Registerer) *GraphGateway Name: "car_blocks_fetched", Help: "The number of blocks successfully read via CAR fetch.", }) - registerer.MustRegister(carBlocksFetchedMetric) + promReg.MustRegister(carBlocksFetchedMetric) carParamsMetric := prometheus.NewCounterVec(prometheus.CounterOpts{ Namespace: "ipfs", @@ -159,7 +137,7 @@ func registerGraphGatewayMetrics(registerer prometheus.Registerer) *GraphGateway Name: "car_fetch_params", Help: "How many times specific CAR parameter was used during CAR data fetch.", }, []string{"dagScope", "entityRanges"}) // we use 'ranges' instead of 'bytes' here because we only count the number of ranges present - registerer.MustRegister(carParamsMetric) + promReg.MustRegister(carParamsMetric) bytesRangeStartMetric := prometheus.NewHistogram(prometheus.HistogramOpts{ Namespace: "ipfs", @@ -168,7 +146,7 @@ func registerGraphGatewayMetrics(registerer prometheus.Registerer) *GraphGateway Help: "Tracks where did the range request start.", Buckets: prometheus.ExponentialBuckets(1024, 2, 24), // 1024 bytes to 8 GiB }) - registerer.MustRegister(bytesRangeStartMetric) + promReg.MustRegister(bytesRangeStartMetric) bytesRangeSizeMetric := prometheus.NewHistogram(prometheus.HistogramOpts{ Namespace: "ipfs", @@ -177,9 +155,9 @@ func registerGraphGatewayMetrics(registerer prometheus.Registerer) *GraphGateway Help: "Tracks the size of range requests.", Buckets: prometheus.ExponentialBuckets(256*1024, 2, 10), // From 256KiB to 100MiB }) - registerer.MustRegister(bytesRangeSizeMetric) + promReg.MustRegister(bytesRangeSizeMetric) - return &GraphGatewayMetrics{ + return &GraphBackendMetrics{ contextAlreadyCancelledMetric, carFetchAttemptMetric, carBlocksFetchedMetric, @@ -189,7 +167,7 @@ func registerGraphGatewayMetrics(registerer prometheus.Registerer) *GraphGateway } } -func (api *GraphGateway) fetchCAR(ctx context.Context, path path.ImmutablePath, params CarParams, cb DataCallback) error { +func (api *GraphBackend) fetchCAR(ctx context.Context, path path.ImmutablePath, params CarParams, cb DataCallback) error { urlWithoutHost := contentPathToCarUrl(path, params).String() api.metrics.carFetchAttemptMetric.Inc() @@ -203,15 +181,15 @@ func (api *GraphGateway) fetchCAR(ctx context.Context, path path.ImmutablePath, if ipldError != nil { fetchErr = ipldError } else if fetchErr != nil { - fetchErr = GatewayError(fetchErr) + fetchErr = blockstoreErrToGatewayErr(fetchErr) } return fetchErr } // resolvePathWithRootsAndBlock takes a path and linksystem and returns the set of non-terminal cids, the terminal cid, the remainder, and the block corresponding to the terminal cid -func resolvePathWithRootsAndBlock(ctx context.Context, fpath ipfspath.ImmutablePath, unixFSLsys *ipld.LinkSystem) ([]cid.Cid, cid.Cid, []string, blocks.Block, error) { - pathRootCids, terminalCid, remainder, terminalBlk, err := resolvePathToLastWithRoots(ctx, fpath, unixFSLsys) +func resolvePathWithRootsAndBlock(ctx context.Context, p path.ImmutablePath, unixFSLsys *ipld.LinkSystem) ([]cid.Cid, cid.Cid, []string, blocks.Block, error) { + pathRootCids, terminalCid, remainder, terminalBlk, err := resolvePathToLastWithRoots(ctx, p, unixFSLsys) if err != nil { return nil, cid.Undef, nil, nil, err } @@ -236,18 +214,17 @@ func resolvePathWithRootsAndBlock(ctx context.Context, fpath ipfspath.ImmutableP // the remainder pathing, the last block loaded, and the last node loaded. // // Note: the block returned will be nil if the terminal element is a link or the path is just a CID -func resolvePathToLastWithRoots(ctx context.Context, fpath ipfspath.ImmutablePath, unixFSLsys *ipld.LinkSystem) ([]cid.Cid, cid.Cid, []string, blocks.Block, error) { - c, p := fpath.RootCid(), fpath.Segments()[2:] - - if len(p) == 0 { - return nil, c, nil, nil, nil +func resolvePathToLastWithRoots(ctx context.Context, p path.ImmutablePath, unixFSLsys *ipld.LinkSystem) ([]cid.Cid, cid.Cid, []string, blocks.Block, error) { + root, segments := p.RootCid(), p.Segments()[2:] + if len(segments) == 0 { + return nil, root, nil, nil, nil } unixFSLsys.NodeReifier = unixfsnode.Reify defer func() { unixFSLsys.NodeReifier = nil }() var cids []cid.Cid - cids = append(cids, c) + cids = append(cids, root) pc := dagpb.AddSupportToChooser(func(lnk ipld.Link, lnkCtx ipld.LinkContext) (ipld.NodePrototype, error) { if tlnkNd, ok := lnkCtx.LinkNode.(schema.TypedLinkNode); ok { @@ -274,13 +251,13 @@ func resolvePathToLastWithRoots(ctx context.Context, fpath ipfspath.ImmutablePat return blk, nd, nil } - nextBlk, nextNd, err := loadNode(ctx, c) + nextBlk, nextNd, err := loadNode(ctx, root) if err != nil { return nil, cid.Undef, nil, nil, err } depth := 0 - for i, elem := range p { + for i, elem := range segments { nextNd, err = nextNd.LookupBySegment(ipld.ParsePathSegment(elem)) if err != nil { return nil, cid.Undef, nil, nil, err @@ -297,7 +274,7 @@ func resolvePathToLastWithRoots(ctx context.Context, fpath ipfspath.ImmutablePat } cids = append(cids, cidLnk.Cid) - if i < len(p)-1 { + if i < len(segments)-1 { nextBlk, nextNd, err = loadNode(ctx, cidLnk.Cid) if err != nil { return nil, cid.Undef, nil, nil, err @@ -311,13 +288,13 @@ func resolvePathToLastWithRoots(ctx context.Context, fpath ipfspath.ImmutablePat // if last node is not a link, just return it's cid, add path to remainder and return if nextNd.Kind() != ipld.Kind_Link { // return the cid and the remainder of the path - return cids[:len(cids)-1], cids[len(cids)-1], p[len(p)-depth:], nextBlk, nil + return cids[:len(cids)-1], cids[len(cids)-1], segments[len(segments)-depth:], nextBlk, nil } return cids[:len(cids)-1], cids[len(cids)-1], nil, nil, nil } -func contentMetadataFromRootsAndRemainder(p ipfspath.ImmutablePath, pathRoots []cid.Cid, remainder []string) (ContentPathMetadata, error) { +func contentMetadataFromRootsAndRemainder(p path.ImmutablePath, pathRoots []cid.Cid, remainder []string) (ContentPathMetadata, error) { md := ContentPathMetadata{ PathSegmentRoots: pathRoots, LastSegmentRemainder: remainder, @@ -328,7 +305,7 @@ func contentMetadataFromRootsAndRemainder(p ipfspath.ImmutablePath, pathRoots [] var errNotUnixFS = fmt.Errorf("data was not unixfs") -func (api *GraphGateway) Get(ctx context.Context, path path.ImmutablePath, byteRanges ...ByteRange) (ContentPathMetadata, *GetResponse, error) { +func (api *GraphBackend) Get(ctx context.Context, path path.ImmutablePath, byteRanges ...ByteRange) (ContentPathMetadata, *GetResponse, error) { rangeCount := len(byteRanges) api.metrics.carParamsMetric.With(prometheus.Labels{"dagScope": "entity", "entityRanges": strconv.Itoa(rangeCount)}).Inc() @@ -704,7 +681,7 @@ func (it *backpressuredHAMTDirIterNoRecursion) Err() error { var _ AwaitCloser = (*backpressuredHAMTDirIterNoRecursion)(nil) -func (api *GraphGateway) GetAll(ctx context.Context, path path.ImmutablePath) (ContentPathMetadata, files.Node, error) { +func (api *GraphBackend) GetAll(ctx context.Context, path path.ImmutablePath) (ContentPathMetadata, files.Node, error) { api.metrics.carParamsMetric.With(prometheus.Labels{"dagScope": "all", "entityRanges": "0"}).Inc() return fetchWithPartialRetries(ctx, path, CarParams{Scope: DagScopeAll}, loadTerminalUnixFSElementWithRecursiveDirectories, api.metrics, api.fetchCAR) } @@ -723,7 +700,7 @@ type nextReq struct { params CarParams } -func fetchWithPartialRetries[T any](ctx context.Context, path path.ImmutablePath, initialParams CarParams, resolveTerminalElementFn loadTerminalElement[T], metrics *GraphGatewayMetrics, fetchCAR fetchCarFn) (ContentPathMetadata, T, error) { +func fetchWithPartialRetries[T any](ctx context.Context, p path.ImmutablePath, initialParams CarParams, resolveTerminalElementFn loadTerminalElement[T], metrics *GraphBackendMetrics, fetchCAR fetchCarFn) (ContentPathMetadata, T, error) { var zeroReturnType T terminalPathElementCh := make(chan terminalPathType[T], 1) @@ -752,11 +729,9 @@ func fetchWithPartialRetries[T any](ctx context.Context, path path.ImmutablePath } } - // FIXME(HACDIAS): p := ipfspath.FromString(path.String()) - p := path params := initialParams - err := fetchCAR(cctx, path, params, func(resource string, reader io.Reader) error { + err := fetchCAR(cctx, p, params, func(resource string, reader io.Reader) error { gb, err := carToLinearBlockGetter(cctx, reader, metrics) if err != nil { return err @@ -828,14 +803,12 @@ func fetchWithPartialRetries[T any](ctx context.Context, path path.ImmutablePath return closeErr case req := <-sendRequest: // set path and params for next iteration - p = ipfspath.FromCid(req.c) - // FIXME(hacdias) - imPath := p + p = path.FromCid(req.c) if err != nil { return err } params = req.params - remainderUrl := contentPathToCarUrl(imPath, params).String() + remainderUrl := contentPathToCarUrl(p, params).String() return ErrPartialResponse{StillNeed: []string{remainderUrl}} case <-cctx.Done(): return cctx.Err() @@ -875,15 +848,13 @@ func fetchWithPartialRetries[T any](ctx context.Context, path path.ImmutablePath } } -func (api *GraphGateway) GetBlock(ctx context.Context, path path.ImmutablePath) (ContentPathMetadata, files.File, error) { +func (api *GraphBackend) GetBlock(ctx context.Context, p path.ImmutablePath) (ContentPathMetadata, files.File, error) { api.metrics.carParamsMetric.With(prometheus.Labels{"dagScope": "block", "entityRanges": "0"}).Inc() - // FIXME(HACDIAS): p := ipfspath.FromString(path.String()) - p := path var md ContentPathMetadata var f files.File // TODO: if path is `/ipfs/cid`, we should use ?format=raw - err := api.fetchCAR(ctx, path, CarParams{Scope: DagScopeBlock}, func(resource string, reader io.Reader) error { + err := api.fetchCAR(ctx, p, CarParams{Scope: DagScopeBlock}, func(resource string, reader io.Reader) error { gb, err := carToLinearBlockGetter(ctx, reader, api.metrics) if err != nil { return err @@ -924,21 +895,18 @@ func (api *GraphGateway) GetBlock(ctx context.Context, path path.ImmutablePath) return md, f, nil } -func (api *GraphGateway) Head(ctx context.Context, path path.ImmutablePath) (ContentPathMetadata, *HeadResponse, error) { +func (api *GraphBackend) Head(ctx context.Context, p path.ImmutablePath) (ContentPathMetadata, *HeadResponse, error) { api.metrics.carParamsMetric.With(prometheus.Labels{"dagScope": "entity", "entityRanges": "1"}).Inc() // TODO: we probably want to move this either to boxo, or at least to loadRequestIntoSharedBlockstoreAndBlocksGateway api.metrics.bytesRangeStartMetric.Observe(0) api.metrics.bytesRangeSizeMetric.Observe(3071) - // FIXME(HACDIAS): p := ipfspath.FromString(path.String()) - p := path - var md ContentPathMetadata var n *HeadResponse // TODO: fallback to dynamic fetches in case we haven't requested enough data rangeTo := int64(3071) - err := api.fetchCAR(ctx, path, CarParams{Scope: DagScopeEntity, Range: &DagByteRange{From: 0, To: &rangeTo}}, func(resource string, reader io.Reader) error { + err := api.fetchCAR(ctx, p, CarParams{Scope: DagScopeEntity, Range: &DagByteRange{From: 0, To: &rangeTo}}, func(resource string, reader io.Reader) error { gb, err := carToLinearBlockGetter(ctx, reader, api.metrics) if err != nil { return err @@ -1069,11 +1037,11 @@ func (api *GraphGateway) Head(ctx context.Context, path path.ImmutablePath) (Con return md, n, nil } -func (api *GraphGateway) ResolvePath(ctx context.Context, path path.ImmutablePath) (ContentPathMetadata, error) { +func (api *GraphBackend) ResolvePath(ctx context.Context, p path.ImmutablePath) (ContentPathMetadata, error) { api.metrics.carParamsMetric.With(prometheus.Labels{"dagScope": "block", "entityRanges": "0"}).Inc() var md ContentPathMetadata - err := api.fetchCAR(ctx, path, CarParams{Scope: DagScopeBlock}, func(resource string, reader io.Reader) error { + err := api.fetchCAR(ctx, p, CarParams{Scope: DagScopeBlock}, func(resource string, reader io.Reader) error { gb, err := carToLinearBlockGetter(ctx, reader, api.metrics) if err != nil { return err @@ -1081,8 +1049,6 @@ func (api *GraphGateway) ResolvePath(ctx context.Context, path path.ImmutablePat lsys := getLinksystem(gb) // First resolve the path since we always need to. - // FIXME(HACDIAS): p := ipfspath.FromString(path.String()) - p := path pathRoots, _, remainder, _, err := resolvePathToLastWithRoots(ctx, p, lsys) if err != nil { return err @@ -1100,18 +1066,16 @@ func (api *GraphGateway) ResolvePath(ctx context.Context, path path.ImmutablePat return md, nil } -func (api *GraphGateway) GetCAR(ctx context.Context, path path.ImmutablePath, params CarParams) (ContentPathMetadata, io.ReadCloser, error) { +func (api *GraphBackend) GetCAR(ctx context.Context, p path.ImmutablePath, params CarParams) (ContentPathMetadata, io.ReadCloser, error) { numRanges := "0" if params.Range != nil { numRanges = "1" } api.metrics.carParamsMetric.With(prometheus.Labels{"dagScope": string(params.Scope), "entityRanges": numRanges}).Inc() - rootCid, err := getRootCid(path) + rootCid, err := getRootCid(p) if err != nil { return ContentPathMetadata{}, nil, err } - // FIXME(HACDIAS): p := ipfspath.FromString(path.String()) - p := path switch params.Order { case DagOrderUnspecified, DagOrderUnknown, DagOrderDFS: @@ -1124,7 +1088,7 @@ func (api *GraphGateway) GetCAR(ctx context.Context, path path.ImmutablePath, pa numBlocksSent := 0 var cw storage.WritableCar var blockBuffer []blocks.Block - err = api.fetchCAR(ctx, path, params, func(resource string, reader io.Reader) error { + err = api.fetchCAR(ctx, p, params, func(resource string, reader io.Reader) error { numBlocksThisCall := 0 gb, err := carToLinearBlockGetter(ctx, reader, api.metrics) if err != nil { @@ -1190,9 +1154,8 @@ func (api *GraphGateway) GetCAR(ctx context.Context, path path.ImmutablePath, pa }() return ContentPathMetadata{ - // PathSegmentRoots: []cid.Cid{rootCid}, - PathSegmentRoots: nil, // FIXME(hacdias): originala bove - LastSegment: ipfspath.FromCid(rootCid), + PathSegmentRoots: []cid.Cid{rootCid}, + LastSegment: path.FromCid(rootCid), ContentType: "", }, r, nil } @@ -1212,11 +1175,11 @@ func getRootCid(imPath path.ImmutablePath) (cid.Cid, error) { return rootCid, nil } -func (api *GraphGateway) IsCached(ctx context.Context, path path.Path) bool { +func (api *GraphBackend) IsCached(ctx context.Context, path path.Path) bool { return false } -var _ IPFSBackend = (*GraphGateway)(nil) +var _ IPFSBackend = (*GraphBackend)(nil) func checkRetryableError(e *error, fn func() error) error { err := fn() diff --git a/gateway/backend_graph_test.go b/gateway/backend_graph_test.go index 8d69c3ca0..2ed0f237b 100644 --- a/gateway/backend_graph_test.go +++ b/gateway/backend_graph_test.go @@ -29,7 +29,7 @@ import ( //go:embed testdata/directory-with-multilayer-hamt-and-multiblock-files.car var dirWithMultiblockHAMTandFiles []byte -func TestTar(t *testing.T) { +func TestGraphBackendTar(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -155,32 +155,21 @@ func TestTar(t *testing.T) { bs, err := NewRemoteCarFetcher([]string{s.URL}) require.NoError(t, err) - backend, err := NewGraphGatewayBackend(&retryFetcher{inner: bs.(CarFetcher), allowedRetries: 3, retriesRemaining: 3}) - if err != nil { - t.Fatal(err) - } + backend, err := NewGraphBackend(&retryFetcher{inner: bs, allowedRetries: 3, retriesRemaining: 3}) + require.NoError(t, err) p := path.FromCid(cid.MustParse("bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi")) _, nd, err := backend.GetAll(ctx, p) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) assertNextEntryNameEquals := func(t *testing.T, dirIter files.DirIterator, expectedName string) { t.Helper() - if !dirIter.Next() { - iterErr := dirIter.Err() - t.Fatalf("expected entry, but errored with %s", iterErr.Error()) - } - if expectedName != dirIter.Name() { - t.Fatalf("expected %s, got %s", expectedName, dirIter.Name()) - } + require.True(t, dirIter.Next(), dirIter.Err()) + require.Equal(t, expectedName, dirIter.Name()) } robs, err := carbs.NewReadOnly(bytes.NewReader(dirWithMultiblockHAMTandFiles), nil) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) dsrv := merkledag.NewDAGService(blockservice.New(robs, offline.Exchange(robs))) assertFileEqual := func(t *testing.T, expectedCidString string, receivedFile files.File) { @@ -188,25 +177,15 @@ func TestTar(t *testing.T) { expected := cid.MustParse(expectedCidString) receivedFileData, err := io.ReadAll(receivedFile) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) nd, err := dsrv.Get(ctx, expected) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) expectedFile, err := unixfile.NewUnixfsFile(ctx, dsrv, nd) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) expectedFileData, err := io.ReadAll(expectedFile.(files.File)) - if err != nil { - t.Fatal(err) - } - if !bytes.Equal(expectedFileData, receivedFileData) { - t.Fatalf("expected %s, got %s", string(expectedFileData), string(receivedFileData)) - } + require.NoError(t, err) + require.True(t, bytes.Equal(expectedFileData, receivedFileData)) } rootDirIter := nd.(files.Directory).Entries() @@ -234,12 +213,10 @@ func TestTar(t *testing.T) { assertNextEntryNameEquals(t, hamtDirIter, "exampleA") assertFileEqual(t, "bafybeigcisqd7m5nf3qmuvjdbakl5bdnh4ocrmacaqkpuh77qjvggmt2sa", hamtDirIter.Node().(files.File)) - if rootDirIter.Next() || basicDirIter.Next() || hamtDirIter.Next() { - t.Fatal("expected directories to be fully enumerated") - } + require.False(t, rootDirIter.Next() || basicDirIter.Next() || hamtDirIter.Next()) } -func TestTarAtEndOfPath(t *testing.T) { +func TestGraphBackendTarAtEndOfPath(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -345,40 +322,26 @@ func TestTarAtEndOfPath(t *testing.T) { bs, err := NewRemoteCarFetcher([]string{s.URL}) require.NoError(t, err) - backend, err := NewGraphGatewayBackend(&retryFetcher{inner: bs.(CarFetcher), allowedRetries: 3, retriesRemaining: 3}) - if err != nil { - t.Fatal(err) - } + backend, err := NewGraphBackend(&retryFetcher{inner: bs, allowedRetries: 3, retriesRemaining: 3}) + require.NoError(t, err) p, err := path.Join(path.FromCid(cid.MustParse("bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi")), "hamtDir") - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) imPath, err := path.NewImmutablePath(p) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) _, nd, err := backend.GetAll(ctx, imPath) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) assertNextEntryNameEquals := func(t *testing.T, dirIter files.DirIterator, expectedName string) { t.Helper() - if !dirIter.Next() { - t.Fatal("expected entry") - } - if expectedName != dirIter.Name() { - t.Fatalf("expected %s, got %s", expectedName, dirIter.Name()) - } + require.True(t, dirIter.Next()) + require.Equal(t, expectedName, dirIter.Name()) } robs, err := carbs.NewReadOnly(bytes.NewReader(dirWithMultiblockHAMTandFiles), nil) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) dsrv := merkledag.NewDAGService(blockservice.New(robs, offline.Exchange(robs))) assertFileEqual := func(t *testing.T, expectedCidString string, receivedFile files.File) { @@ -386,25 +349,15 @@ func TestTarAtEndOfPath(t *testing.T) { expected := cid.MustParse(expectedCidString) receivedFileData, err := io.ReadAll(receivedFile) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) nd, err := dsrv.Get(ctx, expected) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) expectedFile, err := unixfile.NewUnixfsFile(ctx, dsrv, nd) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) expectedFileData, err := io.ReadAll(expectedFile.(files.File)) - if err != nil { - t.Fatal(err) - } - if !bytes.Equal(expectedFileData, receivedFileData) { - t.Fatalf("expected %s, got %s", string(expectedFileData), string(receivedFileData)) - } + require.NoError(t, err) + require.True(t, bytes.Equal(expectedFileData, receivedFileData)) } hamtDirIter := nd.(files.Directory).Entries() @@ -421,9 +374,7 @@ func TestTarAtEndOfPath(t *testing.T) { assertNextEntryNameEquals(t, hamtDirIter, "exampleA") assertFileEqual(t, "bafybeigcisqd7m5nf3qmuvjdbakl5bdnh4ocrmacaqkpuh77qjvggmt2sa", hamtDirIter.Node().(files.File)) - if hamtDirIter.Next() { - t.Fatal("expected directories to be fully enumerated") - } + require.False(t, hamtDirIter.Next()) } func sendBlocks(ctx context.Context, carFixture []byte, writer io.Writer, cidStrList []string) error { @@ -451,7 +402,7 @@ func sendBlocks(ctx context.Context, carFixture []byte, writer io.Writer, cidStr return nil } -func TestGetFile(t *testing.T) { +func TestGraphBackendGetFile(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -530,50 +481,33 @@ func TestGetFile(t *testing.T) { bs, err := NewRemoteCarFetcher([]string{s.URL}) require.NoError(t, err) - backend, err := NewGraphGatewayBackend(&retryFetcher{inner: bs.(CarFetcher), allowedRetries: 3, retriesRemaining: 3}) - if err != nil { - t.Fatal(err) - } + backend, err := NewGraphBackend(&retryFetcher{inner: bs, allowedRetries: 3, retriesRemaining: 3}) + require.NoError(t, err) trustedGatewayServer := httptest.NewServer(NewHandler(Config{DeserializedResponses: true}, backend)) defer trustedGatewayServer.Close() resp, err := http.Get(trustedGatewayServer.URL + "/ipfs/bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi/hamtDir/exampleA") - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) data, err := io.ReadAll(resp.Body) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) robs, err := carbs.NewReadOnly(bytes.NewReader(dirWithMultiblockHAMTandFiles), nil) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) dsrv := merkledag.NewDAGService(blockservice.New(robs, offline.Exchange(robs))) fileRootNd, err := dsrv.Get(ctx, cid.MustParse("bafybeigcisqd7m5nf3qmuvjdbakl5bdnh4ocrmacaqkpuh77qjvggmt2sa")) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) uio, err := unixfile.NewUnixfsFile(ctx, dsrv, fileRootNd) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) f := uio.(files.File) expectedFileData, err := io.ReadAll(f) - if err != nil { - t.Fatal(err) - } - - if !bytes.Equal(data, expectedFileData) { - t.Fatalf("expected %s, got %s", string(expectedFileData), string(data)) - } + require.NoError(t, err) + require.True(t, bytes.Equal(data, expectedFileData)) } -func TestGetFileRangeRequest(t *testing.T) { +func TestGraphBackendGetFileRangeRequest(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -648,64 +582,41 @@ func TestGetFileRangeRequest(t *testing.T) { bs, err := NewRemoteCarFetcher([]string{s.URL}) require.NoError(t, err) - backend, err := NewGraphGatewayBackend(&retryFetcher{inner: bs.(CarFetcher), allowedRetries: 3, retriesRemaining: 3}) - if err != nil { - t.Fatal(err) - } + backend, err := NewGraphBackend(&retryFetcher{inner: bs, allowedRetries: 3, retriesRemaining: 3}) + require.NoError(t, err) trustedGatewayServer := httptest.NewServer(NewHandler(Config{DeserializedResponses: true}, backend)) defer trustedGatewayServer.Close() req, err := http.NewRequestWithContext(ctx, "GET", trustedGatewayServer.URL+"/ipfs/bafybeigcisqd7m5nf3qmuvjdbakl5bdnh4ocrmacaqkpuh77qjvggmt2sa", nil) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) startIndex := 256 endIndex := 750 req.Header.Set("Range", fmt.Sprintf("bytes=%d-%d", startIndex, endIndex)) resp, err := http.DefaultClient.Do(req) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) data, err := io.ReadAll(resp.Body) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) robs, err := carbs.NewReadOnly(bytes.NewReader(dirWithMultiblockHAMTandFiles), nil) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) dsrv := merkledag.NewDAGService(blockservice.New(robs, offline.Exchange(robs))) fileRootNd, err := dsrv.Get(ctx, cid.MustParse("bafybeigcisqd7m5nf3qmuvjdbakl5bdnh4ocrmacaqkpuh77qjvggmt2sa")) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) uio, err := unixfile.NewUnixfsFile(ctx, dsrv, fileRootNd) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) f := uio.(files.File) - if _, err := f.Seek(int64(startIndex), io.SeekStart); err != nil { - t.Fatal(err) - } + _, err = f.Seek(int64(startIndex), io.SeekStart) + require.NoError(t, err) expectedFileData, err := io.ReadAll(io.LimitReader(f, int64(endIndex)-int64(startIndex)+1)) - if err != nil { - t.Fatal(err) - } - - if !bytes.Equal(data, expectedFileData) { - t.Fatalf("expected %s, got %s", string(expectedFileData), string(data)) - } - - if requestNum != 4 { - t.Fatalf("expected exactly 4 requests, got %d", requestNum) - } + require.NoError(t, err) + require.True(t, bytes.Equal(data, expectedFileData)) + require.Equal(t, 4, requestNum) } -func TestGetFileWithBadBlockReturned(t *testing.T) { +func TestGraphBackendGetFileWithBadBlockReturned(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -778,50 +689,33 @@ func TestGetFileWithBadBlockReturned(t *testing.T) { bs, err := NewRemoteCarFetcher([]string{s.URL}) require.NoError(t, err) - backend, err := NewGraphGatewayBackend(&retryFetcher{inner: bs.(CarFetcher), allowedRetries: 3, retriesRemaining: 3}) - if err != nil { - t.Fatal(err) - } + backend, err := NewGraphBackend(&retryFetcher{inner: bs, allowedRetries: 3, retriesRemaining: 3}) + require.NoError(t, err) trustedGatewayServer := httptest.NewServer(NewHandler(Config{DeserializedResponses: true}, backend)) defer trustedGatewayServer.Close() resp, err := http.Get(trustedGatewayServer.URL + "/ipfs/bafybeigcisqd7m5nf3qmuvjdbakl5bdnh4ocrmacaqkpuh77qjvggmt2sa") - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) data, err := io.ReadAll(resp.Body) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) robs, err := carbs.NewReadOnly(bytes.NewReader(dirWithMultiblockHAMTandFiles), nil) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) dsrv := merkledag.NewDAGService(blockservice.New(robs, offline.Exchange(robs))) fileRootNd, err := dsrv.Get(ctx, cid.MustParse("bafybeigcisqd7m5nf3qmuvjdbakl5bdnh4ocrmacaqkpuh77qjvggmt2sa")) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) uio, err := unixfile.NewUnixfsFile(ctx, dsrv, fileRootNd) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) f := uio.(files.File) expectedFileData, err := io.ReadAll(f) - if err != nil { - t.Fatal(err) - } - - if !bytes.Equal(data, expectedFileData) { - t.Fatalf("expected %s, got %s", string(expectedFileData), string(data)) - } + require.NoError(t, err) + require.True(t, bytes.Equal(data, expectedFileData)) } -func TestGetHAMTDirectory(t *testing.T) { +func TestGraphBackendGetHAMTDirectory(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -898,23 +792,17 @@ func TestGetHAMTDirectory(t *testing.T) { bs, err := NewRemoteCarFetcher([]string{s.URL}) require.NoError(t, err) - backend, err := NewGraphGatewayBackend(&retryFetcher{inner: bs.(CarFetcher), allowedRetries: 3, retriesRemaining: 3}) - if err != nil { - t.Fatal(err) - } + backend, err := NewGraphBackend(&retryFetcher{inner: bs, allowedRetries: 3, retriesRemaining: 3}) + require.NoError(t, err) trustedGatewayServer := httptest.NewServer(NewHandler(Config{DeserializedResponses: true}, backend)) defer trustedGatewayServer.Close() resp, err := http.Get(trustedGatewayServer.URL + "/ipfs/bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi/hamtDir/") - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) data, err := io.ReadAll(resp.Body) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) if strings.Count(string(data), ">exampleD-hamt-collide-exampleB-seed-364<") == 1 && strings.Count(string(data), ">exampleC-hamt-collide-exampleA-seed-52<") == 1 && @@ -925,7 +813,7 @@ func TestGetHAMTDirectory(t *testing.T) { t.Fatal("directory does not contain the expected links") } -func TestGetCAR(t *testing.T) { +func TestGraphBackendGetCAR(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -1010,28 +898,20 @@ func TestGetCAR(t *testing.T) { bs, err := NewRemoteCarFetcher([]string{s.URL}) require.NoError(t, err) - backend, err := NewGraphGatewayBackend(&retryFetcher{inner: bs.(CarFetcher), allowedRetries: 3, retriesRemaining: 3}) - if err != nil { - t.Fatal(err) - } + backend, err := NewGraphBackend(&retryFetcher{inner: bs, allowedRetries: 3, retriesRemaining: 3}) + require.NoError(t, err) p := path.FromCid(cid.MustParse("bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi")) var carReader io.Reader _, carReader, err = backend.GetCAR(ctx, p, CarParams{Scope: DagScopeAll}) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) carBytes, err := io.ReadAll(carReader) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) carReader = bytes.NewReader(carBytes) blkReader, err := carv2.NewBlockReader(carReader) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) responseCarBlock := []string{ "bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi", // root dir @@ -1057,22 +937,16 @@ func TestGetCAR(t *testing.T) { for i := 0; i < len(responseCarBlock); i++ { expectedCid := cid.MustParse(responseCarBlock[i]) blk, err := blkReader.Next() - if err != nil { - t.Fatal(err) - } - if !blk.Cid().Equals(expectedCid) { - t.Fatalf("expected cid %s, got %s", expectedCid, blk.Cid()) - } + require.NoError(t, err) + require.True(t, blk.Cid().Equals(expectedCid)) } _, err = blkReader.Next() - if !errors.Is(err, io.EOF) { - t.Fatal("expected an EOF") - } + require.ErrorIs(t, err, io.EOF) } -func TestPassthroughErrors(t *testing.T) { +func TestGraphBackendPassthroughErrors(t *testing.T) { t.Run("PathTraversalError", func(t *testing.T) { - pathTraversalTest := func(t *testing.T, traversal func(ctx context.Context, p path.ImmutablePath, backend *GraphGateway) error) { + pathTraversalTest := func(t *testing.T, traversal func(ctx context.Context, p path.ImmutablePath, backend *GraphBackend) error) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -1116,30 +990,24 @@ func TestPassthroughErrors(t *testing.T) { require.NoError(t, err) p, err := path.NewPath("/ipfs/bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi/hamtDir/exampleA") - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) imPath, err := path.NewImmutablePath(p) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) bogusErr := NewErrorStatusCode(fmt.Errorf("this is a test error"), 418) clientRequestNum := 0 - backend, err := NewGraphGatewayBackend(&retryFetcher{ + backend, err := NewGraphBackend(&retryFetcher{ inner: &fetcherWrapper{fn: func(ctx context.Context, path string, cb DataCallback) error { clientRequestNum++ if clientRequestNum > 2 { return bogusErr } - return bs.(CarFetcher).Fetch(ctx, path, cb) + return bs.Fetch(ctx, path, cb) }}, allowedRetries: 3, retriesRemaining: 3}) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) err = traversal(ctx, imPath, backend) parsedErr := &ErrorStatusCode{} @@ -1151,13 +1019,13 @@ func TestPassthroughErrors(t *testing.T) { t.Fatal("error did not pass through") } t.Run("Block", func(t *testing.T) { - pathTraversalTest(t, func(ctx context.Context, p path.ImmutablePath, backend *GraphGateway) error { + pathTraversalTest(t, func(ctx context.Context, p path.ImmutablePath, backend *GraphBackend) error { _, _, err := backend.GetBlock(ctx, p) return err }) }) t.Run("File", func(t *testing.T) { - pathTraversalTest(t, func(ctx context.Context, p path.ImmutablePath, backend *GraphGateway) error { + pathTraversalTest(t, func(ctx context.Context, p path.ImmutablePath, backend *GraphBackend) error { _, _, err := backend.Get(ctx, p) return err }) diff --git a/gateway/backend_graph_traversal.go b/gateway/backend_graph_traversal.go index ce47d2a92..2219d6803 100644 --- a/gateway/backend_graph_traversal.go +++ b/gateway/backend_graph_traversal.go @@ -23,21 +23,9 @@ import ( type getBlock func(ctx context.Context, cid cid.Cid) (blocks.Block, error) -// ErrInvalidResponse can be returned from a DataCallback to indicate that the data provided for the -// requested resource was explicitly 'incorrect' - that blocks not in the requested dag, or non-car-conforming -// data was returned. -type ErrInvalidResponse struct { - Message string -} - -func (e ErrInvalidResponse) Error() string { - return e.Message -} - -// var ErrNilBlock = caboose.ErrInvalidResponse{Message: "received a nil block with no error"} var ErrNilBlock = ErrInvalidResponse{Message: "received a nil block with no error"} -func carToLinearBlockGetter(ctx context.Context, reader io.Reader, metrics *GraphGatewayMetrics) (getBlock, error) { +func carToLinearBlockGetter(ctx context.Context, reader io.Reader, metrics *GraphBackendMetrics) (getBlock, error) { cr, err := car.NewCarReaderWithOptions(reader, car.WithErrorOnEmptyRoots(false)) if err != nil { return nil, err @@ -104,7 +92,7 @@ func carToLinearBlockGetter(ctx context.Context, reader io.Reader, metrics *Grap if !ok || errors.Is(blkRead.err, io.EOF) { return nil, io.ErrUnexpectedEOF } - return nil, GatewayError(blkRead.err) + return nil, blockstoreErrToGatewayErr(blkRead.err) } if blkRead.block != nil { metrics.carBlocksFetchedMetric.Inc() diff --git a/gateway/backend_graph_utils.go b/gateway/backend_graph_utils.go index 3f86e40e6..1df74761d 100644 --- a/gateway/backend_graph_utils.go +++ b/gateway/backend_graph_utils.go @@ -45,9 +45,9 @@ func carParamsToString(params CarParams) string { return paramsBuilder.String() } -// GatewayError translates underlying blockstore error into one that gateway code will return as HTTP 502 or 504 +// blockstoreErrToGatewayErr translates underlying blockstore error into one that gateway code will return as HTTP 502 or 504 // it also makes sure Retry-After hint from remote blockstore will be passed to HTTP client, if present. -func GatewayError(err error) error { +func blockstoreErrToGatewayErr(err error) error { if errors.Is(err, &ErrorStatusCode{}) || errors.Is(err, &ErrorRetryAfter{}) { // already correct error @@ -56,7 +56,6 @@ func GatewayError(err error) error { // All timeouts should produce 504 Gateway Timeout if errors.Is(err, context.DeadlineExceeded) || - // errors.Is(err, caboose.ErrTimeout) || // Unfortunately this is not an exported type so we have to check for the content. strings.Contains(err.Error(), "Client.Timeout exceeded") { return fmt.Errorf("%w: %s", ErrGatewayTimeout, err.Error()) diff --git a/gateway/backend_graph_utils_test.go b/gateway/backend_graph_utils_test.go index 5b0ec3886..3ff7cae3d 100644 --- a/gateway/backend_graph_utils_test.go +++ b/gateway/backend_graph_utils_test.go @@ -84,13 +84,13 @@ func TestGatewayErrorRetryAfter(t *testing.T) { ) // Test unwrapped - convertedErr = GatewayError(originalErr) + convertedErr = blockstoreErrToGatewayErr(originalErr) ok := errors.As(convertedErr, &gatewayErr) assert.True(t, ok) assert.EqualValues(t, originalErr.retryAfter, gatewayErr.RetryAfter) // Test wrapped. - convertedErr = GatewayError(fmt.Errorf("wrapped error: %w", originalErr)) + convertedErr = blockstoreErrToGatewayErr(fmt.Errorf("wrapped error: %w", originalErr)) ok = errors.As(convertedErr, &gatewayErr) assert.True(t, ok) assert.EqualValues(t, originalErr.retryAfter, gatewayErr.RetryAfter) diff --git a/gateway/errors.go b/gateway/errors.go index 79cedcee0..5c5b52fa7 100644 --- a/gateway/errors.go +++ b/gateway/errors.go @@ -127,6 +127,32 @@ func (e *ErrorStatusCode) Unwrap() error { return e.Err } +// ErrInvalidResponse can be returned from a [DataCallback] to indicate that the data provided for the +// requested resource was explicitly 'incorrect' - that blocks not in the requested dag, or non-car-conforming +// data was returned. +type ErrInvalidResponse struct { + Message string +} + +func (e ErrInvalidResponse) Error() string { + return e.Message +} + +// ErrPartialResponse can be returned from a [DataCallback] to indicate that some of the requested resource +// was successfully fetched, and that instead of retrying the full resource, that there are +// one or more more specific resources that should be fetched (via StillNeed) to complete the request. +type ErrPartialResponse struct { + error + StillNeed []string +} + +func (epr ErrPartialResponse) Error() string { + if epr.error != nil { + return fmt.Sprintf("partial response: %s", epr.error.Error()) + } + return "caboose received a partial response" +} + func webError(w http.ResponseWriter, r *http.Request, c *Config, err error, defaultCode int) { code := defaultCode diff --git a/gateway/remote_blocks_backend.go b/gateway/remote_blocks_backend.go index 5b96385d8..e020e0b1e 100644 --- a/gateway/remote_blocks_backend.go +++ b/gateway/remote_blocks_backend.go @@ -20,7 +20,7 @@ const getBlockTimeout = time.Second * 60 // If you want to create a more custom [BlocksBackend] with only remote IPNS // Record resolution, or only remote block fetching, we recommend using // [NewBlocksBackend] directly. -func NewRemoteBlocksBackend(gatewayURL []string, opts ...BlocksBackendOption) (*BlocksBackend, error) { +func NewRemoteBlocksBackend(gatewayURL []string, opts ...BackendOption) (*BlocksBackend, error) { blockStore, err := NewRemoteBlockstore(gatewayURL) if err != nil { return nil, err From 72561ca9030b0660d6b29261b9b585a44da13462 Mon Sep 17 00:00:00 2001 From: Henrique Dias Date: Wed, 10 Apr 2024 11:54:13 +0200 Subject: [PATCH 07/19] rename graph backend to car backend --- .github/workflows/gateway-conformance.yml | 38 +++++------ examples/README.md | 6 +- examples/gateway/{car => car-file}/README.md | 4 +- examples/gateway/{car => car-file}/main.go | 0 .../gateway/{car => car-file}/main_test.go | 0 examples/gateway/{car => car-file}/test.car | Bin .../gateway/{proxy => proxy-blocks}/README.md | 4 +- .../gateway/{proxy => proxy-blocks}/main.go | 0 .../{proxy => proxy-blocks}/main_test.go | 0 .../gateway/{graph => proxy-car}/README.md | 6 +- examples/gateway/{graph => proxy-car}/main.go | 2 +- gateway/backend.go | 2 +- gateway/{backend_graph.go => backend_car.go} | 63 ++++++++---------- ...nd_graph_files.go => backend_car_files.go} | 6 +- ...kend_graph_test.go => backend_car_test.go} | 38 +++++------ ..._traversal.go => backend_car_traversal.go} | 8 +-- ...nd_graph_utils.go => backend_car_utils.go} | 0 ...tils_test.go => backend_car_utils_test.go} | 0 18 files changed, 86 insertions(+), 91 deletions(-) rename examples/gateway/{car => car-file}/README.md (94%) rename examples/gateway/{car => car-file}/main.go (100%) rename examples/gateway/{car => car-file}/main_test.go (100%) rename examples/gateway/{car => car-file}/test.car (100%) rename examples/gateway/{proxy => proxy-blocks}/README.md (97%) rename examples/gateway/{proxy => proxy-blocks}/main.go (100%) rename examples/gateway/{proxy => proxy-blocks}/main_test.go (100%) rename examples/gateway/{graph => proxy-car}/README.md (93%) rename examples/gateway/{graph => proxy-car}/main.go (96%) rename gateway/{backend_graph.go => backend_car.go} (94%) rename gateway/{backend_graph_files.go => backend_car_files.go} (98%) rename gateway/{backend_graph_test.go => backend_car_test.go} (97%) rename gateway/{backend_graph_traversal.go => backend_car_traversal.go} (95%) rename gateway/{backend_graph_utils.go => backend_car_utils.go} (100%) rename gateway/{backend_graph_utils_test.go => backend_car_utils_test.go} (100%) diff --git a/.github/workflows/gateway-conformance.yml b/.github/workflows/gateway-conformance.yml index f3edc9bb7..e61ea8540 100644 --- a/.github/workflows/gateway-conformance.yml +++ b/.github/workflows/gateway-conformance.yml @@ -11,9 +11,9 @@ concurrency: cancel-in-progress: true jobs: - gateway-conformance-car: + gateway-conformance-car-file: runs-on: ubuntu-latest - name: Gateway Conformance (CAR Gateway) + name: Gateway Conformance (CAR File Gateway) steps: # 1. Download the gateway-conformance fixtures - name: Download gateway-conformance fixtures @@ -22,7 +22,7 @@ jobs: output: fixtures merged: true - # 2. Build the car-gateway + # 2. Build the gateway binary - name: Setup Go uses: actions/setup-go@v4 with: @@ -32,12 +32,12 @@ jobs: with: path: boxo - name: Build car-gateway - run: go build -o car-gateway - working-directory: boxo/examples/gateway/car + run: go build -o gateway + working-directory: boxo/examples/gateway/car-file - # 3. Start the car-gateway + # 3. Start the gateway binary - name: Start car-gateway - run: boxo/examples/gateway/car/car-gateway -c fixtures/fixtures.car -p 8040 & + run: boxo/examples/gateway/car-file/gateway -c fixtures/fixtures.car -p 8040 & # 4. Run the gateway-conformance tests - name: Run gateway-conformance tests @@ -69,9 +69,9 @@ jobs: name: gateway-conformance.json path: output.json - gateway-conformance-graph: + gateway-conformance-remote-car: runs-on: ubuntu-latest - name: Gateway Conformance (Graph Gateway) + name: Gateway Conformance (Remote CAR Gateway) steps: # 1. Download the gateway-conformance fixtures - name: Download gateway-conformance fixtures @@ -80,7 +80,7 @@ jobs: output: fixtures merged: true - # 2. Build the graph-gateway + # 2. Build the gateway binaries - name: Setup Go uses: actions/setup-go@v4 with: @@ -90,17 +90,17 @@ jobs: with: path: boxo - name: Build car-gateway - run: go build -o car-gateway - working-directory: boxo/examples/gateway/car - - name: Build graph-gateway - run: go build -o graph-gateway - working-directory: boxo/examples/gateway/graph + run: go build -o gateway + working-directory: boxo/examples/gateway/car-file + - name: Build proxy-car-gateway + run: go build -o gateway + working-directory: boxo/examples/gateway/proxy-car - # 3. Start the car-gateway and the graph-gateway + # 3. Start the gateway binaries - name: Start car-gateway - run: boxo/examples/gateway/car/car-gateway -c fixtures/fixtures.car -p 8030 & - - name: Start graph-gateway - run: boxo/examples/gateway/graph/graph-gateway -g http://127.0.0.1:8030 -p 8040 & + run: boxo/examples/gateway/car-file/gateway -c fixtures/fixtures.car -p 8030 & + - name: Start proxy-car-gateway + run: boxo/examples/gateway/proxy-car/gateway -g http://127.0.0.1:8030 -p 8040 & # 4. Run the gateway-conformance tests - name: Run gateway-conformance tests diff --git a/examples/README.md b/examples/README.md index 30f6e6157..5f97cb61b 100644 --- a/examples/README.md +++ b/examples/README.md @@ -27,7 +27,7 @@ Once you have your example finished, do not forget to run `go mod tidy` and addi ## Examples and Tutorials - [Fetching a UnixFS file by CID](./unixfs-file-cid) -- [Gateway backed by a CAR file](./gateway/car) -- [Gateway backed by a remote blockstore and IPNS resolver](./gateway/proxy) -- [Gateway backed by a remote Trustless Gateway](./gateway/graph) +- [Gateway backed by a CAR file](./gateway/car-file) +- [Gateway backed by a remote blockstore and IPNS resolver](./gateway/proxy-blocks) +- [Gateway backed by a remote Trustless CAR Gateway](./gateway/proxy-car) - [Delegated Routing V1 Command Line Client](./routing/delegated-routing-client/) diff --git a/examples/gateway/car/README.md b/examples/gateway/car-file/README.md similarity index 94% rename from examples/gateway/car/README.md rename to examples/gateway/car-file/README.md index 2fea3fa66..5a95063d4 100644 --- a/examples/gateway/car/README.md +++ b/examples/gateway/car-file/README.md @@ -7,7 +7,7 @@ Addressable aRchive that contains blocks. ## Build ```bash -> go build -o car-gateway +> go build -o gateway ``` ## Usage @@ -23,7 +23,7 @@ Then, you can start the gateway with: ``` -./car-gateway -c data.car -p 8040 +./gateway -c data.car -p 8040 ``` ### Subdomain gateway diff --git a/examples/gateway/car/main.go b/examples/gateway/car-file/main.go similarity index 100% rename from examples/gateway/car/main.go rename to examples/gateway/car-file/main.go diff --git a/examples/gateway/car/main_test.go b/examples/gateway/car-file/main_test.go similarity index 100% rename from examples/gateway/car/main_test.go rename to examples/gateway/car-file/main_test.go diff --git a/examples/gateway/car/test.car b/examples/gateway/car-file/test.car similarity index 100% rename from examples/gateway/car/test.car rename to examples/gateway/car-file/test.car diff --git a/examples/gateway/proxy/README.md b/examples/gateway/proxy-blocks/README.md similarity index 97% rename from examples/gateway/proxy/README.md rename to examples/gateway/proxy-blocks/README.md index 4164aad1e..9b4a273b6 100644 --- a/examples/gateway/proxy/README.md +++ b/examples/gateway/proxy-blocks/README.md @@ -18,7 +18,7 @@ gateway using `?format=ipns-record`. In addition, DNSLink lookups are done local ## Build ```bash -> go build -o verifying-proxy +> go build -o gateway ``` ## Usage @@ -28,7 +28,7 @@ types. Once you have it, run the proxy gateway with its address as the host para ``` -./verifying-proxy -g https://ipfs.io -p 8040 +./gateway -g https://ipfs.io -p 8040 ``` ### Subdomain gateway diff --git a/examples/gateway/proxy/main.go b/examples/gateway/proxy-blocks/main.go similarity index 100% rename from examples/gateway/proxy/main.go rename to examples/gateway/proxy-blocks/main.go diff --git a/examples/gateway/proxy/main_test.go b/examples/gateway/proxy-blocks/main_test.go similarity index 100% rename from examples/gateway/proxy/main_test.go rename to examples/gateway/proxy-blocks/main_test.go diff --git a/examples/gateway/graph/README.md b/examples/gateway/proxy-car/README.md similarity index 93% rename from examples/gateway/graph/README.md rename to examples/gateway/proxy-car/README.md index be34b5b0c..f05321e25 100644 --- a/examples/gateway/graph/README.md +++ b/examples/gateway/proxy-car/README.md @@ -1,4 +1,4 @@ -# Gateway as Proxy for Trustless Remote Backend +# Gateway as Proxy for Trustless CAR Remote Backend This is an example of building a Gateway that uses `application/vnd.ipld.car` responses from another gateway acting as a remote Trustless Gateway and IPNS resolver. @@ -6,7 +6,7 @@ responses from another gateway acting as a remote Trustless Gateway and IPNS res ## Build ```bash -> go build -o graph-proxy +> go build -o gateway ``` ## Usage @@ -15,7 +15,7 @@ First, you need a compliant gateway that supports both [CAR requests](https://ww types. Once you have it, run the proxy gateway with its address as the host parameter: ``` -./graph-proxy -g https://ipfs.io -p 8040 +./gateway -g https://ipfs.io -p 8040 ``` ### Subdomain gateway diff --git a/examples/gateway/graph/main.go b/examples/gateway/proxy-car/main.go similarity index 96% rename from examples/gateway/graph/main.go rename to examples/gateway/proxy-car/main.go index ec7a3582b..40eaff956 100644 --- a/examples/gateway/graph/main.go +++ b/examples/gateway/proxy-car/main.go @@ -33,7 +33,7 @@ func main() { } // Creates the gateway with the remote graph backend. - backend, err := gateway.NewGraphBackend(carFetcher) + backend, err := gateway.NewCarBackend(carFetcher) if err != nil { log.Fatal(err) } diff --git a/gateway/backend.go b/gateway/backend.go index c4db09798..5f15e83a9 100644 --- a/gateway/backend.go +++ b/gateway/backend.go @@ -50,7 +50,7 @@ func WithResolver(r resolver.Resolver) BackendOption { } } -// WithPrometheusRegistry sets the registry to use with [GraphBackend]. +// WithPrometheusRegistry sets the registry to use with [CarBackend]. func WithPrometheusRegistry(reg prometheus.Registerer) BackendOption { return func(opts *backendOptions) error { opts.promRegistry = reg diff --git a/gateway/backend_graph.go b/gateway/backend_car.go similarity index 94% rename from gateway/backend_graph.go rename to gateway/backend_car.go index 30da4b52e..6987ecadf 100644 --- a/gateway/backend_graph.go +++ b/gateway/backend_car.go @@ -8,7 +8,6 @@ import ( "io" "strconv" "strings" - "time" "github.com/hashicorp/go-multierror" "github.com/ipfs/boxo/files" @@ -36,26 +35,22 @@ import ( "github.com/prometheus/client_golang/prometheus" ) -const GetBlockTimeout = time.Second * 60 +var ErrFetcherUnexpectedEOF = fmt.Errorf("failed to fetch IPLD data") type DataCallback func(resource string, reader io.Reader) error -var ErrFetcherUnexpectedEOF = fmt.Errorf("failed to fetch IPLD data") - type CarFetcher interface { Fetch(ctx context.Context, path string, cb DataCallback) error } -type GraphBackend struct { +type CarBackend struct { baseBackend fetcher CarFetcher - - pc traversal.LinkTargetNodePrototypeChooser - - metrics *GraphBackendMetrics + pc traversal.LinkTargetNodePrototypeChooser + metrics *CarBackendMetrics } -type GraphBackendMetrics struct { +type CarBackendMetrics struct { contextAlreadyCancelledMetric prometheus.Counter carFetchAttemptMetric prometheus.Counter carBlocksFetchedMetric prometheus.Counter @@ -65,7 +60,7 @@ type GraphBackendMetrics struct { bytesRangeSizeMetric prometheus.Histogram } -func NewGraphBackend(f CarFetcher, opts ...BackendOption) (*GraphBackend, error) { +func NewCarBackend(f CarFetcher, opts ...BackendOption) (*CarBackend, error) { var compiledOptions backendOptions for _, o := range opts { if err := o(&compiledOptions); err != nil { @@ -85,10 +80,10 @@ func NewGraphBackend(f CarFetcher, opts ...BackendOption) (*GraphBackend, error) promReg = compiledOptions.promRegistry } - return &GraphBackend{ + return &CarBackend{ baseBackend: baseBackend, fetcher: f, - metrics: registerGraphBackendMetrics(promReg), + metrics: registerCarBackendMetrics(promReg), pc: dagpb.AddSupportToChooser(func(lnk ipld.Link, lnkCtx ipld.LinkContext) (ipld.NodePrototype, error) { if tlnkNd, ok := lnkCtx.LinkNode.(schema.TypedLinkNode); ok { return tlnkNd.LinkTargetNodePrototype(), nil @@ -98,8 +93,8 @@ func NewGraphBackend(f CarFetcher, opts ...BackendOption) (*GraphBackend, error) }, nil } -func registerGraphBackendMetrics(promReg prometheus.Registerer) *GraphBackendMetrics { - // How many CAR Fetch attempts we had? Need this to calculate % of various graph request types. +func registerCarBackendMetrics(promReg prometheus.Registerer) *CarBackendMetrics { + // How many CAR Fetch attempts we had? Need this to calculate % of various car request types. // We only count attempts here, because success/failure with/without retries are provided by caboose: // - ipfs_caboose_fetch_duration_car_success_count // - ipfs_caboose_fetch_duration_car_failure_count @@ -107,7 +102,7 @@ func registerGraphBackendMetrics(promReg prometheus.Registerer) *GraphBackendMet // - ipfs_caboose_fetch_duration_car_peer_failure_count carFetchAttemptMetric := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "ipfs", - Subsystem: "gw_graph_backend", + Subsystem: "gw_car_backend", Name: "car_fetch_attempts", Help: "The number of times a CAR fetch was attempted by IPFSBackend.", }) @@ -115,7 +110,7 @@ func registerGraphBackendMetrics(promReg prometheus.Registerer) *GraphBackendMet contextAlreadyCancelledMetric := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "ipfs", - Subsystem: "gw_graph_backend", + Subsystem: "gw_car_backend", Name: "car_fetch_context_already_cancelled", Help: "The number of times context is already cancelled when a CAR fetch was attempted by IPFSBackend.", }) @@ -125,7 +120,7 @@ func registerGraphBackendMetrics(promReg prometheus.Registerer) *GraphBackendMet // Need this as a baseline to reason about error ratio vs raw_block_recovery_attempts. carBlocksFetchedMetric := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "ipfs", - Subsystem: "gw_graph_backend", + Subsystem: "gw_car_backend", Name: "car_blocks_fetched", Help: "The number of blocks successfully read via CAR fetch.", }) @@ -133,7 +128,7 @@ func registerGraphBackendMetrics(promReg prometheus.Registerer) *GraphBackendMet carParamsMetric := prometheus.NewCounterVec(prometheus.CounterOpts{ Namespace: "ipfs", - Subsystem: "gw_graph_backend", + Subsystem: "gw_car_backend", Name: "car_fetch_params", Help: "How many times specific CAR parameter was used during CAR data fetch.", }, []string{"dagScope", "entityRanges"}) // we use 'ranges' instead of 'bytes' here because we only count the number of ranges present @@ -141,7 +136,7 @@ func registerGraphBackendMetrics(promReg prometheus.Registerer) *GraphBackendMet bytesRangeStartMetric := prometheus.NewHistogram(prometheus.HistogramOpts{ Namespace: "ipfs", - Subsystem: "gw_graph_backend", + Subsystem: "gw_car_backend", Name: "range_request_start", Help: "Tracks where did the range request start.", Buckets: prometheus.ExponentialBuckets(1024, 2, 24), // 1024 bytes to 8 GiB @@ -150,14 +145,14 @@ func registerGraphBackendMetrics(promReg prometheus.Registerer) *GraphBackendMet bytesRangeSizeMetric := prometheus.NewHistogram(prometheus.HistogramOpts{ Namespace: "ipfs", - Subsystem: "gw_graph_backend", + Subsystem: "gw_car_backend", Name: "range_request_size", Help: "Tracks the size of range requests.", Buckets: prometheus.ExponentialBuckets(256*1024, 2, 10), // From 256KiB to 100MiB }) promReg.MustRegister(bytesRangeSizeMetric) - return &GraphBackendMetrics{ + return &CarBackendMetrics{ contextAlreadyCancelledMetric, carFetchAttemptMetric, carBlocksFetchedMetric, @@ -167,7 +162,7 @@ func registerGraphBackendMetrics(promReg prometheus.Registerer) *GraphBackendMet } } -func (api *GraphBackend) fetchCAR(ctx context.Context, path path.ImmutablePath, params CarParams, cb DataCallback) error { +func (api *CarBackend) fetchCAR(ctx context.Context, path path.ImmutablePath, params CarParams, cb DataCallback) error { urlWithoutHost := contentPathToCarUrl(path, params).String() api.metrics.carFetchAttemptMetric.Inc() @@ -305,7 +300,7 @@ func contentMetadataFromRootsAndRemainder(p path.ImmutablePath, pathRoots []cid. var errNotUnixFS = fmt.Errorf("data was not unixfs") -func (api *GraphBackend) Get(ctx context.Context, path path.ImmutablePath, byteRanges ...ByteRange) (ContentPathMetadata, *GetResponse, error) { +func (api *CarBackend) Get(ctx context.Context, path path.ImmutablePath, byteRanges ...ByteRange) (ContentPathMetadata, *GetResponse, error) { rangeCount := len(byteRanges) api.metrics.carParamsMetric.With(prometheus.Labels{"dagScope": "entity", "entityRanges": strconv.Itoa(rangeCount)}).Inc() @@ -679,9 +674,9 @@ func (it *backpressuredHAMTDirIterNoRecursion) Err() error { return it.err } -var _ AwaitCloser = (*backpressuredHAMTDirIterNoRecursion)(nil) +var _ awaitCloser = (*backpressuredHAMTDirIterNoRecursion)(nil) -func (api *GraphBackend) GetAll(ctx context.Context, path path.ImmutablePath) (ContentPathMetadata, files.Node, error) { +func (api *CarBackend) GetAll(ctx context.Context, path path.ImmutablePath) (ContentPathMetadata, files.Node, error) { api.metrics.carParamsMetric.With(prometheus.Labels{"dagScope": "all", "entityRanges": "0"}).Inc() return fetchWithPartialRetries(ctx, path, CarParams{Scope: DagScopeAll}, loadTerminalUnixFSElementWithRecursiveDirectories, api.metrics, api.fetchCAR) } @@ -700,7 +695,7 @@ type nextReq struct { params CarParams } -func fetchWithPartialRetries[T any](ctx context.Context, p path.ImmutablePath, initialParams CarParams, resolveTerminalElementFn loadTerminalElement[T], metrics *GraphBackendMetrics, fetchCAR fetchCarFn) (ContentPathMetadata, T, error) { +func fetchWithPartialRetries[T any](ctx context.Context, p path.ImmutablePath, initialParams CarParams, resolveTerminalElementFn loadTerminalElement[T], metrics *CarBackendMetrics, fetchCAR fetchCarFn) (ContentPathMetadata, T, error) { var zeroReturnType T terminalPathElementCh := make(chan terminalPathType[T], 1) @@ -780,7 +775,7 @@ func fetchWithPartialRetries[T any](ctx context.Context, p path.ImmutablePath, i return err } - ndAc, ok := any(nd).(AwaitCloser) + ndAc, ok := any(nd).(awaitCloser) if !ok { terminalPathElementCh <- terminalPathType[T]{ resp: nd, @@ -848,7 +843,7 @@ func fetchWithPartialRetries[T any](ctx context.Context, p path.ImmutablePath, i } } -func (api *GraphBackend) GetBlock(ctx context.Context, p path.ImmutablePath) (ContentPathMetadata, files.File, error) { +func (api *CarBackend) GetBlock(ctx context.Context, p path.ImmutablePath) (ContentPathMetadata, files.File, error) { api.metrics.carParamsMetric.With(prometheus.Labels{"dagScope": "block", "entityRanges": "0"}).Inc() var md ContentPathMetadata @@ -895,7 +890,7 @@ func (api *GraphBackend) GetBlock(ctx context.Context, p path.ImmutablePath) (Co return md, f, nil } -func (api *GraphBackend) Head(ctx context.Context, p path.ImmutablePath) (ContentPathMetadata, *HeadResponse, error) { +func (api *CarBackend) Head(ctx context.Context, p path.ImmutablePath) (ContentPathMetadata, *HeadResponse, error) { api.metrics.carParamsMetric.With(prometheus.Labels{"dagScope": "entity", "entityRanges": "1"}).Inc() // TODO: we probably want to move this either to boxo, or at least to loadRequestIntoSharedBlockstoreAndBlocksGateway @@ -1037,7 +1032,7 @@ func (api *GraphBackend) Head(ctx context.Context, p path.ImmutablePath) (Conten return md, n, nil } -func (api *GraphBackend) ResolvePath(ctx context.Context, p path.ImmutablePath) (ContentPathMetadata, error) { +func (api *CarBackend) ResolvePath(ctx context.Context, p path.ImmutablePath) (ContentPathMetadata, error) { api.metrics.carParamsMetric.With(prometheus.Labels{"dagScope": "block", "entityRanges": "0"}).Inc() var md ContentPathMetadata @@ -1066,7 +1061,7 @@ func (api *GraphBackend) ResolvePath(ctx context.Context, p path.ImmutablePath) return md, nil } -func (api *GraphBackend) GetCAR(ctx context.Context, p path.ImmutablePath, params CarParams) (ContentPathMetadata, io.ReadCloser, error) { +func (api *CarBackend) GetCAR(ctx context.Context, p path.ImmutablePath, params CarParams) (ContentPathMetadata, io.ReadCloser, error) { numRanges := "0" if params.Range != nil { numRanges = "1" @@ -1175,11 +1170,11 @@ func getRootCid(imPath path.ImmutablePath) (cid.Cid, error) { return rootCid, nil } -func (api *GraphBackend) IsCached(ctx context.Context, path path.Path) bool { +func (api *CarBackend) IsCached(ctx context.Context, path path.Path) bool { return false } -var _ IPFSBackend = (*GraphBackend)(nil) +var _ IPFSBackend = (*CarBackend)(nil) func checkRetryableError(e *error, fn func() error) error { err := fn() diff --git a/gateway/backend_graph_files.go b/gateway/backend_car_files.go similarity index 98% rename from gateway/backend_graph_files.go rename to gateway/backend_car_files.go index 740b3f2e2..540916280 100644 --- a/gateway/backend_graph_files.go +++ b/gateway/backend_car_files.go @@ -21,7 +21,7 @@ import ( "github.com/multiformats/go-multicodec" ) -type AwaitCloser interface { +type awaitCloser interface { AwaitClose() <-chan error } @@ -107,7 +107,7 @@ func (b *backpressuredFile) Seek(offset int64, whence int) (int64, error) { } var _ files.File = (*backpressuredFile)(nil) -var _ AwaitCloser = (*backpressuredFile)(nil) +var _ awaitCloser = (*backpressuredFile)(nil) type singleUseDirectory struct { dirIter files.DirIterator @@ -133,7 +133,7 @@ func (b *singleUseDirectory) Entries() files.DirIterator { } var _ files.Directory = (*singleUseDirectory)(nil) -var _ AwaitCloser = (*singleUseDirectory)(nil) +var _ awaitCloser = (*singleUseDirectory)(nil) type backpressuredFlatDirIter struct { linksItr *dagpb.PBLinks__Itr diff --git a/gateway/backend_graph_test.go b/gateway/backend_car_test.go similarity index 97% rename from gateway/backend_graph_test.go rename to gateway/backend_car_test.go index 2ed0f237b..44852d80a 100644 --- a/gateway/backend_graph_test.go +++ b/gateway/backend_car_test.go @@ -29,7 +29,7 @@ import ( //go:embed testdata/directory-with-multilayer-hamt-and-multiblock-files.car var dirWithMultiblockHAMTandFiles []byte -func TestGraphBackendTar(t *testing.T) { +func TestCarBackendTar(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -155,7 +155,7 @@ func TestGraphBackendTar(t *testing.T) { bs, err := NewRemoteCarFetcher([]string{s.URL}) require.NoError(t, err) - backend, err := NewGraphBackend(&retryFetcher{inner: bs, allowedRetries: 3, retriesRemaining: 3}) + backend, err := NewCarBackend(&retryFetcher{inner: bs, allowedRetries: 3, retriesRemaining: 3}) require.NoError(t, err) p := path.FromCid(cid.MustParse("bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi")) @@ -216,7 +216,7 @@ func TestGraphBackendTar(t *testing.T) { require.False(t, rootDirIter.Next() || basicDirIter.Next() || hamtDirIter.Next()) } -func TestGraphBackendTarAtEndOfPath(t *testing.T) { +func TestCarBackendTarAtEndOfPath(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -322,7 +322,7 @@ func TestGraphBackendTarAtEndOfPath(t *testing.T) { bs, err := NewRemoteCarFetcher([]string{s.URL}) require.NoError(t, err) - backend, err := NewGraphBackend(&retryFetcher{inner: bs, allowedRetries: 3, retriesRemaining: 3}) + backend, err := NewCarBackend(&retryFetcher{inner: bs, allowedRetries: 3, retriesRemaining: 3}) require.NoError(t, err) p, err := path.Join(path.FromCid(cid.MustParse("bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi")), "hamtDir") @@ -402,7 +402,7 @@ func sendBlocks(ctx context.Context, carFixture []byte, writer io.Writer, cidStr return nil } -func TestGraphBackendGetFile(t *testing.T) { +func TestCarBackendGetFile(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -481,7 +481,7 @@ func TestGraphBackendGetFile(t *testing.T) { bs, err := NewRemoteCarFetcher([]string{s.URL}) require.NoError(t, err) - backend, err := NewGraphBackend(&retryFetcher{inner: bs, allowedRetries: 3, retriesRemaining: 3}) + backend, err := NewCarBackend(&retryFetcher{inner: bs, allowedRetries: 3, retriesRemaining: 3}) require.NoError(t, err) trustedGatewayServer := httptest.NewServer(NewHandler(Config{DeserializedResponses: true}, backend)) @@ -507,7 +507,7 @@ func TestGraphBackendGetFile(t *testing.T) { require.True(t, bytes.Equal(data, expectedFileData)) } -func TestGraphBackendGetFileRangeRequest(t *testing.T) { +func TestCarBackendGetFileRangeRequest(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -582,7 +582,7 @@ func TestGraphBackendGetFileRangeRequest(t *testing.T) { bs, err := NewRemoteCarFetcher([]string{s.URL}) require.NoError(t, err) - backend, err := NewGraphBackend(&retryFetcher{inner: bs, allowedRetries: 3, retriesRemaining: 3}) + backend, err := NewCarBackend(&retryFetcher{inner: bs, allowedRetries: 3, retriesRemaining: 3}) require.NoError(t, err) trustedGatewayServer := httptest.NewServer(NewHandler(Config{DeserializedResponses: true}, backend)) @@ -616,7 +616,7 @@ func TestGraphBackendGetFileRangeRequest(t *testing.T) { require.Equal(t, 4, requestNum) } -func TestGraphBackendGetFileWithBadBlockReturned(t *testing.T) { +func TestCarBackendGetFileWithBadBlockReturned(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -689,7 +689,7 @@ func TestGraphBackendGetFileWithBadBlockReturned(t *testing.T) { bs, err := NewRemoteCarFetcher([]string{s.URL}) require.NoError(t, err) - backend, err := NewGraphBackend(&retryFetcher{inner: bs, allowedRetries: 3, retriesRemaining: 3}) + backend, err := NewCarBackend(&retryFetcher{inner: bs, allowedRetries: 3, retriesRemaining: 3}) require.NoError(t, err) trustedGatewayServer := httptest.NewServer(NewHandler(Config{DeserializedResponses: true}, backend)) @@ -715,7 +715,7 @@ func TestGraphBackendGetFileWithBadBlockReturned(t *testing.T) { require.True(t, bytes.Equal(data, expectedFileData)) } -func TestGraphBackendGetHAMTDirectory(t *testing.T) { +func TestCarBackendGetHAMTDirectory(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -792,7 +792,7 @@ func TestGraphBackendGetHAMTDirectory(t *testing.T) { bs, err := NewRemoteCarFetcher([]string{s.URL}) require.NoError(t, err) - backend, err := NewGraphBackend(&retryFetcher{inner: bs, allowedRetries: 3, retriesRemaining: 3}) + backend, err := NewCarBackend(&retryFetcher{inner: bs, allowedRetries: 3, retriesRemaining: 3}) require.NoError(t, err) trustedGatewayServer := httptest.NewServer(NewHandler(Config{DeserializedResponses: true}, backend)) @@ -813,7 +813,7 @@ func TestGraphBackendGetHAMTDirectory(t *testing.T) { t.Fatal("directory does not contain the expected links") } -func TestGraphBackendGetCAR(t *testing.T) { +func TestCarBackendGetCAR(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -898,7 +898,7 @@ func TestGraphBackendGetCAR(t *testing.T) { bs, err := NewRemoteCarFetcher([]string{s.URL}) require.NoError(t, err) - backend, err := NewGraphBackend(&retryFetcher{inner: bs, allowedRetries: 3, retriesRemaining: 3}) + backend, err := NewCarBackend(&retryFetcher{inner: bs, allowedRetries: 3, retriesRemaining: 3}) require.NoError(t, err) p := path.FromCid(cid.MustParse("bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi")) @@ -944,9 +944,9 @@ func TestGraphBackendGetCAR(t *testing.T) { require.ErrorIs(t, err, io.EOF) } -func TestGraphBackendPassthroughErrors(t *testing.T) { +func TestCarBackendPassthroughErrors(t *testing.T) { t.Run("PathTraversalError", func(t *testing.T) { - pathTraversalTest := func(t *testing.T, traversal func(ctx context.Context, p path.ImmutablePath, backend *GraphBackend) error) { + pathTraversalTest := func(t *testing.T, traversal func(ctx context.Context, p path.ImmutablePath, backend *CarBackend) error) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -998,7 +998,7 @@ func TestGraphBackendPassthroughErrors(t *testing.T) { bogusErr := NewErrorStatusCode(fmt.Errorf("this is a test error"), 418) clientRequestNum := 0 - backend, err := NewGraphBackend(&retryFetcher{ + backend, err := NewCarBackend(&retryFetcher{ inner: &fetcherWrapper{fn: func(ctx context.Context, path string, cb DataCallback) error { clientRequestNum++ if clientRequestNum > 2 { @@ -1019,13 +1019,13 @@ func TestGraphBackendPassthroughErrors(t *testing.T) { t.Fatal("error did not pass through") } t.Run("Block", func(t *testing.T) { - pathTraversalTest(t, func(ctx context.Context, p path.ImmutablePath, backend *GraphBackend) error { + pathTraversalTest(t, func(ctx context.Context, p path.ImmutablePath, backend *CarBackend) error { _, _, err := backend.GetBlock(ctx, p) return err }) }) t.Run("File", func(t *testing.T) { - pathTraversalTest(t, func(ctx context.Context, p path.ImmutablePath, backend *GraphBackend) error { + pathTraversalTest(t, func(ctx context.Context, p path.ImmutablePath, backend *CarBackend) error { _, _, err := backend.Get(ctx, p) return err }) diff --git a/gateway/backend_graph_traversal.go b/gateway/backend_car_traversal.go similarity index 95% rename from gateway/backend_graph_traversal.go rename to gateway/backend_car_traversal.go index 2219d6803..090495530 100644 --- a/gateway/backend_graph_traversal.go +++ b/gateway/backend_car_traversal.go @@ -25,7 +25,7 @@ type getBlock func(ctx context.Context, cid cid.Cid) (blocks.Block, error) var ErrNilBlock = ErrInvalidResponse{Message: "received a nil block with no error"} -func carToLinearBlockGetter(ctx context.Context, reader io.Reader, metrics *GraphBackendMetrics) (getBlock, error) { +func carToLinearBlockGetter(ctx context.Context, reader io.Reader, metrics *CarBackendMetrics) (getBlock, error) { cr, err := car.NewCarReaderWithOptions(reader, car.WithErrorOnEmptyRoots(false)) if err != nil { return nil, err @@ -73,9 +73,9 @@ func carToLinearBlockGetter(ctx context.Context, reader io.Reader, metrics *Grap // initially set a higher timeout here so that if there's an initial timeout error we get it from the car reader. var t *time.Timer if isFirstBlock { - t = time.NewTimer(GetBlockTimeout * 2) + t = time.NewTimer(getBlockTimeout * 2) } else { - t = time.NewTimer(GetBlockTimeout) + t = time.NewTimer(getBlockTimeout) } var blkRead blockRead var ok bool @@ -84,7 +84,7 @@ func carToLinearBlockGetter(ctx context.Context, reader io.Reader, metrics *Grap if !t.Stop() { <-t.C } - t.Reset(GetBlockTimeout) + t.Reset(getBlockTimeout) case <-t.C: return nil, ErrGatewayTimeout } diff --git a/gateway/backend_graph_utils.go b/gateway/backend_car_utils.go similarity index 100% rename from gateway/backend_graph_utils.go rename to gateway/backend_car_utils.go diff --git a/gateway/backend_graph_utils_test.go b/gateway/backend_car_utils_test.go similarity index 100% rename from gateway/backend_graph_utils_test.go rename to gateway/backend_car_utils_test.go From ec79bf88ca2ee1ecea60e97f829e3c6d889bd2b2 Mon Sep 17 00:00:00 2001 From: Henrique Dias Date: Wed, 10 Apr 2024 14:57:38 +0200 Subject: [PATCH 08/19] fix ContentPathMetadata --- gateway/backend_car.go | 105 ++++++++++++++++++++---------------- gateway/backend_car_test.go | 3 +- 2 files changed, 60 insertions(+), 48 deletions(-) diff --git a/gateway/backend_car.go b/gateway/backend_car.go index 6987ecadf..380728ca1 100644 --- a/gateway/backend_car.go +++ b/gateway/backend_car.go @@ -183,36 +183,41 @@ func (api *CarBackend) fetchCAR(ctx context.Context, path path.ImmutablePath, pa } // resolvePathWithRootsAndBlock takes a path and linksystem and returns the set of non-terminal cids, the terminal cid, the remainder, and the block corresponding to the terminal cid -func resolvePathWithRootsAndBlock(ctx context.Context, p path.ImmutablePath, unixFSLsys *ipld.LinkSystem) ([]cid.Cid, cid.Cid, []string, blocks.Block, error) { - pathRootCids, terminalCid, remainder, terminalBlk, err := resolvePathToLastWithRoots(ctx, p, unixFSLsys) +func resolvePathWithRootsAndBlock(ctx context.Context, p path.ImmutablePath, unixFSLsys *ipld.LinkSystem) (ContentPathMetadata, blocks.Block, error) { + md, terminalBlk, err := resolvePathToLastWithRoots(ctx, p, unixFSLsys) if err != nil { - return nil, cid.Undef, nil, nil, err + return ContentPathMetadata{}, nil, err } + terminalCid := md.LastSegment.RootCid() + if terminalBlk == nil { lctx := ipld.LinkContext{Ctx: ctx} lnk := cidlink.Link{Cid: terminalCid} blockData, err := unixFSLsys.LoadRaw(lctx, lnk) if err != nil { - return nil, cid.Undef, nil, nil, err + return ContentPathMetadata{}, nil, err } terminalBlk, err = blocks.NewBlockWithCid(blockData, terminalCid) if err != nil { - return nil, cid.Undef, nil, nil, err + return ContentPathMetadata{}, nil, err } } - return pathRootCids, terminalCid, remainder, terminalBlk, err + return md, terminalBlk, err } // resolvePathToLastWithRoots takes a path and linksystem and returns the set of non-terminal cids, the terminal cid, // the remainder pathing, the last block loaded, and the last node loaded. // // Note: the block returned will be nil if the terminal element is a link or the path is just a CID -func resolvePathToLastWithRoots(ctx context.Context, p path.ImmutablePath, unixFSLsys *ipld.LinkSystem) ([]cid.Cid, cid.Cid, []string, blocks.Block, error) { +func resolvePathToLastWithRoots(ctx context.Context, p path.ImmutablePath, unixFSLsys *ipld.LinkSystem) (ContentPathMetadata, blocks.Block, error) { root, segments := p.RootCid(), p.Segments()[2:] if len(segments) == 0 { - return nil, root, nil, nil, nil + return ContentPathMetadata{ + PathSegmentRoots: []cid.Cid{}, + LastSegment: p, + }, nil, nil } unixFSLsys.NodeReifier = unixfsnode.Reify @@ -248,31 +253,31 @@ func resolvePathToLastWithRoots(ctx context.Context, p path.ImmutablePath, unixF nextBlk, nextNd, err := loadNode(ctx, root) if err != nil { - return nil, cid.Undef, nil, nil, err + return ContentPathMetadata{}, nil, err } depth := 0 for i, elem := range segments { nextNd, err = nextNd.LookupBySegment(ipld.ParsePathSegment(elem)) if err != nil { - return nil, cid.Undef, nil, nil, err + return ContentPathMetadata{}, nil, err } if nextNd.Kind() == ipld.Kind_Link { depth = 0 lnk, err := nextNd.AsLink() if err != nil { - return nil, cid.Undef, nil, nil, err + return ContentPathMetadata{}, nil, err } cidLnk, ok := lnk.(cidlink.Link) if !ok { - return nil, cid.Undef, nil, nil, fmt.Errorf("link is not a cidlink: %v", cidLnk) + return ContentPathMetadata{}, nil, fmt.Errorf("link is not a cidlink: %v", cidLnk) } cids = append(cids, cidLnk.Cid) if i < len(segments)-1 { nextBlk, nextNd, err = loadNode(ctx, cidLnk.Cid) if err != nil { - return nil, cid.Undef, nil, nil, err + return ContentPathMetadata{}, nil, err } } } else { @@ -282,18 +287,38 @@ func resolvePathToLastWithRoots(ctx context.Context, p path.ImmutablePath, unixF // if last node is not a link, just return it's cid, add path to remainder and return if nextNd.Kind() != ipld.Kind_Link { + md, err := contentMetadataFromRootsAndRemainder(cids, segments[len(segments)-depth:]) + if err != nil { + return ContentPathMetadata{}, nil, err + } + // return the cid and the remainder of the path - return cids[:len(cids)-1], cids[len(cids)-1], segments[len(segments)-depth:], nextBlk, nil + return md, nextBlk, nil } - return cids[:len(cids)-1], cids[len(cids)-1], nil, nil, nil + md, err := contentMetadataFromRootsAndRemainder(cids, nil) + return md, nil, err } -func contentMetadataFromRootsAndRemainder(p path.ImmutablePath, pathRoots []cid.Cid, remainder []string) (ContentPathMetadata, error) { +func contentMetadataFromRootsAndRemainder(roots []cid.Cid, remainder []string) (ContentPathMetadata, error) { + if len(roots) == 0 { + return ContentPathMetadata{}, errors.New("invalid pathRoots given with length 0") + } + + p, err := path.Join(path.FromCid(roots[len(roots)-1]), remainder...) + if err != nil { + return ContentPathMetadata{}, err + } + + imPath, err := path.NewImmutablePath(p) + if err != nil { + return ContentPathMetadata{}, err + } + md := ContentPathMetadata{ - PathSegmentRoots: pathRoots, + PathSegmentRoots: roots[:len(roots)-1], LastSegmentRemainder: remainder, - LastSegment: p, + LastSegment: imPath, } return md, nil } @@ -735,7 +760,7 @@ func fetchWithPartialRetries[T any](ctx context.Context, p path.ImmutablePath, i lsys := getLinksystem(gb) if hasSentAsyncData { - _, _, _, _, err = resolvePathToLastWithRoots(cctx, p, lsys) + _, _, err = resolvePathToLastWithRoots(cctx, p, lsys) if err != nil { return err } @@ -747,17 +772,12 @@ func fetchWithPartialRetries[T any](ctx context.Context, p path.ImmutablePath, i } } else { // First resolve the path since we always need to. - pathRootCids, terminalCid, remainder, terminalBlk, err := resolvePathWithRootsAndBlock(cctx, p, lsys) - if err != nil { - return err - } - - md, err := contentMetadataFromRootsAndRemainder(p, pathRootCids, remainder) + md, terminalBlk, err := resolvePathWithRootsAndBlock(cctx, p, lsys) if err != nil { return err } - if len(remainder) > 0 { + if len(md.LastSegmentRemainder) > 0 { terminalPathElementCh <- terminalPathType[T]{err: errNotUnixFS} return nil } @@ -770,6 +790,8 @@ func fetchWithPartialRetries[T any](ctx context.Context, p path.ImmutablePath, i } } + terminalCid := md.LastSegment.RootCid() + nd, err := resolveTerminalElementFn(cctx, terminalCid, terminalBlk, lsys, params, getLsys) if err != nil { return err @@ -857,7 +879,8 @@ func (api *CarBackend) GetBlock(ctx context.Context, p path.ImmutablePath) (Cont lsys := getLinksystem(gb) // First resolve the path since we always need to. - pathRoots, terminalCid, remainder, terminalBlk, err := resolvePathToLastWithRoots(ctx, p, lsys) + var terminalBlk blocks.Block + md, terminalBlk, err = resolvePathToLastWithRoots(ctx, p, lsys) if err != nil { return err } @@ -867,18 +890,13 @@ func (api *CarBackend) GetBlock(ctx context.Context, p path.ImmutablePath) (Cont blockData = terminalBlk.RawData() } else { lctx := ipld.LinkContext{Ctx: ctx} - lnk := cidlink.Link{Cid: terminalCid} + lnk := cidlink.Link{Cid: md.LastSegment.RootCid()} blockData, err = lsys.LoadRaw(lctx, lnk) if err != nil { return err } } - md, err = contentMetadataFromRootsAndRemainder(p, pathRoots, remainder) - if err != nil { - return err - } - f = files.NewBytesFile(blockData) return nil }) @@ -909,16 +927,13 @@ func (api *CarBackend) Head(ctx context.Context, p path.ImmutablePath) (ContentP lsys := getLinksystem(gb) // First resolve the path since we always need to. - pathRoots, terminalCid, remainder, terminalBlk, err := resolvePathWithRootsAndBlock(ctx, p, lsys) - if err != nil { - return err - } - - md, err = contentMetadataFromRootsAndRemainder(p, pathRoots, remainder) + var terminalBlk blocks.Block + md, terminalBlk, err = resolvePathWithRootsAndBlock(ctx, p, lsys) if err != nil { return err } + terminalCid := md.LastSegment.RootCid() lctx := ipld.LinkContext{Ctx: ctx} pathTerminalCidLink := cidlink.Link{Cid: terminalCid} @@ -926,7 +941,7 @@ func (api *CarBackend) Head(ctx context.Context, p path.ImmutablePath) (ContentP dataBytes := terminalBlk.RawData() // It's not UnixFS if there is a remainder or it's not dag-pb - if len(remainder) > 0 || terminalCid.Type() != uint64(multicodec.DagPb) { + if len(md.LastSegmentRemainder) > 0 || terminalCid.Type() != uint64(multicodec.DagPb) { n = NewHeadResponseForFile(files.NewBytesFile(dataBytes), int64(len(dataBytes))) return nil } @@ -1044,13 +1059,11 @@ func (api *CarBackend) ResolvePath(ctx context.Context, p path.ImmutablePath) (C lsys := getLinksystem(gb) // First resolve the path since we always need to. - pathRoots, _, remainder, _, err := resolvePathToLastWithRoots(ctx, p, lsys) + md, _, err = resolvePathToLastWithRoots(ctx, p, lsys) if err != nil { return err } - md, err = contentMetadataFromRootsAndRemainder(p, pathRoots, remainder) - return err }) @@ -1111,16 +1124,16 @@ func (api *CarBackend) GetCAR(ctx context.Context, p path.ImmutablePath, params l := getLinksystem(teeBlock) // First resolve the path since we always need to. - _, terminalCid, remainder, terminalBlk, err := resolvePathWithRootsAndBlock(ctx, p, l) + md, terminalBlk, err := resolvePathWithRootsAndBlock(ctx, p, l) if err != nil { return err } - if len(remainder) > 0 { + if len(md.LastSegmentRemainder) > 0 { return nil } if cw == nil { - cw, err = storage.NewWritable(w, []cid.Cid{terminalCid}, carv2.WriteAsCarV1(true), carv2.AllowDuplicatePuts(params.Duplicates.Bool())) + cw, err = storage.NewWritable(w, []cid.Cid{md.LastSegment.RootCid()}, carv2.WriteAsCarV1(true), carv2.AllowDuplicatePuts(params.Duplicates.Bool())) if err != nil { // io.PipeWriter.CloseWithError always returns nil. _ = w.CloseWithError(err) diff --git a/gateway/backend_car_test.go b/gateway/backend_car_test.go index 44852d80a..f356da0ef 100644 --- a/gateway/backend_car_test.go +++ b/gateway/backend_car_test.go @@ -755,13 +755,12 @@ func TestCarBackendGetHAMTDirectory(t *testing.T) { // Expect a request for a non-existent index.html file // Note: this is an implementation detail related to the directory request above // Note: the order of cases 3 and 4 here are implementation specific as well - expectedUri := "/ipfs/bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi/hamtDir/index.html" + expectedUri := "/ipfs/bafybeignui4g7l6cvqyy4t6vnbl2fjtego4ejmpcia77jhhwnksmm4bejm/index.html" if request.URL.Path != expectedUri { panic(fmt.Errorf("expected URI %s, got %s", expectedUri, request.RequestURI)) } if err := sendBlocks(ctx, dirWithMultiblockHAMTandFiles, writer, []string{ - "bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi", // root dir "bafybeignui4g7l6cvqyy4t6vnbl2fjtego4ejmpcia77jhhwnksmm4bejm", // hamt root "bafybeiccgo7euew77gkqkhezn3pozfrciiibqz2u3spdqmgjvd5wqskipm", // inner hamt nodes start here }); err != nil { From 37437ddfc5daac68bc935b2b01666db98c2e8eb9 Mon Sep 17 00:00:00 2001 From: Henrique Dias Date: Wed, 10 Apr 2024 15:38:26 +0200 Subject: [PATCH 09/19] return car with enough data to prove path does not resolve --- gateway/backend_car.go | 30 +++++++++++++++++++++++------- gateway/errors.go | 3 ++- 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/gateway/backend_car.go b/gateway/backend_car.go index 380728ca1..efd0a9f0e 100644 --- a/gateway/backend_car.go +++ b/gateway/backend_car.go @@ -1123,17 +1123,31 @@ func (api *CarBackend) GetCAR(ctx context.Context, p path.ImmutablePath, params } l := getLinksystem(teeBlock) + var isNotFound bool + // First resolve the path since we always need to. md, terminalBlk, err := resolvePathWithRootsAndBlock(ctx, p, l) if err != nil { - return err + if isErrNotFound(err) { + isNotFound = true + } else { + return err + } } + if len(md.LastSegmentRemainder) > 0 { return nil } if cw == nil { - cw, err = storage.NewWritable(w, []cid.Cid{md.LastSegment.RootCid()}, carv2.WriteAsCarV1(true), carv2.AllowDuplicatePuts(params.Duplicates.Bool())) + var roots []cid.Cid + if isNotFound { + roots = emptyRoot + } else { + roots = []cid.Cid{md.LastSegment.RootCid()} + } + + cw, err = storage.NewWritable(w, roots, carv2.WriteAsCarV1(true), carv2.AllowDuplicatePuts(params.Duplicates.Bool())) if err != nil { // io.PipeWriter.CloseWithError always returns nil. _ = w.CloseWithError(err) @@ -1149,12 +1163,14 @@ func (api *CarBackend) GetCAR(ctx context.Context, p path.ImmutablePath, params blockBuffer = nil } - params.Duplicates = DuplicateBlocksIncluded - err = walkGatewaySimpleSelector(ctx, terminalBlk.Cid(), terminalBlk, []string{}, params, l) - // err = walkGatewaySimpleSelector2(ctx, terminalBlk, params.Scope, params.Range, l) - if err != nil { - return err + if !isNotFound { + params.Duplicates = DuplicateBlocksIncluded + err = walkGatewaySimpleSelector(ctx, terminalBlk.Cid(), terminalBlk, []string{}, params, l) + if err != nil { + return err + } } + return nil }) diff --git a/gateway/errors.go b/gateway/errors.go index 5c5b52fa7..4ec1d9735 100644 --- a/gateway/errors.go +++ b/gateway/errors.go @@ -13,6 +13,7 @@ import ( "github.com/ipfs/boxo/path/resolver" "github.com/ipfs/go-cid" "github.com/ipld/go-ipld-prime/datamodel" + "github.com/ipld/go-ipld-prime/schema" ) var ( @@ -210,7 +211,7 @@ func webError(w http.ResponseWriter, r *http.Request, c *Config, err error, defa // isErrNotFound returns true for IPLD errors that should return 4xx errors (e.g. the path doesn't exist, the data is // the wrong type, etc.), rather than issues with just finding and retrieving the data. func isErrNotFound(err error) bool { - if errors.Is(err, &resolver.ErrNoLink{}) { + if errors.Is(err, &resolver.ErrNoLink{}) || errors.Is(err, schema.ErrNoSuchField{}) { return true } From 6fe39c8c29e09cad5238961a4e2f0b19a76edbfc Mon Sep 17 00:00:00 2001 From: Henrique Dias Date: Wed, 10 Apr 2024 16:37:40 +0200 Subject: [PATCH 10/19] add special case for text/html+codec+range --- gateway/handler_codec.go | 24 ++++++++++++++++++------ gateway/handler_defaults.go | 2 +- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/gateway/handler_codec.go b/gateway/handler_codec.go index 89bff966e..72d8abd59 100644 --- a/gateway/handler_codec.go +++ b/gateway/handler_codec.go @@ -75,10 +75,10 @@ func (i *handler) serveCodec(ctx context.Context, w http.ResponseWriter, r *http return false } - return i.renderCodec(ctx, w, r, rq, blockSize, data) + return i.renderCodec(ctx, w, r, rq, blockSize, data, false) } -func (i *handler) renderCodec(ctx context.Context, w http.ResponseWriter, r *http.Request, rq *requestData, blockSize int64, blockData io.ReadSeekCloser) bool { +func (i *handler) renderCodec(ctx context.Context, w http.ResponseWriter, r *http.Request, rq *requestData, blockSize int64, blockData io.ReadSeekCloser, isRangeRequest bool) bool { resolvedPath := rq.pathMetadata.LastSegment ctx, span := spanTrace(ctx, "Handler.RenderCodec", trace.WithAttributes(attribute.String("path", resolvedPath.String()), attribute.String("requestedContentType", rq.responseFormat))) defer span.End() @@ -124,7 +124,7 @@ func (i *handler) renderCodec(ctx context.Context, w http.ResponseWriter, r *htt download := r.URL.Query().Get("download") == "true" if isDAG && acceptsHTML && !download { - return i.serveCodecHTML(ctx, w, r, blockCid, blockData, resolvedPath, rq.contentPath) + return i.serveCodecHTML(ctx, w, r, blockCid, blockData, resolvedPath, rq.contentPath, isRangeRequest) } else { // This covers CIDs with codec 'json' and 'cbor' as those do not have // an explicit requested content type. @@ -156,7 +156,7 @@ func (i *handler) renderCodec(ctx context.Context, w http.ResponseWriter, r *htt return i.serveCodecConverted(ctx, w, r, blockCid, blockData, rq.contentPath, toCodec, modtime, rq.begin) } -func (i *handler) serveCodecHTML(ctx context.Context, w http.ResponseWriter, r *http.Request, blockCid cid.Cid, blockData io.Reader, resolvedPath path.ImmutablePath, contentPath path.Path) bool { +func (i *handler) serveCodecHTML(ctx context.Context, w http.ResponseWriter, r *http.Request, blockCid cid.Cid, blockData io.Reader, resolvedPath path.ImmutablePath, contentPath path.Path, isRangeRequest bool) bool { // WithHostname may have constructed an IPFS (or IPNS) path using the Host header. // In this case, we need the original path for constructing the redirect. requestURI, err := url.ParseRequestURI(r.RequestURI) @@ -201,7 +201,7 @@ func (i *handler) serveCodecHTML(ctx context.Context, w http.ResponseWriter, r * CID: resolvedPath.RootCid().String(), CodecName: cidCodec.String(), CodecHex: fmt.Sprintf("0x%x", uint64(cidCodec)), - Node: parseNode(blockCid, blockData), + Node: i.parseNode(ctx, blockCid, blockData, isRangeRequest), }) if err != nil { _, _ = w.Write([]byte(fmt.Sprintf("error during body generation: %v", err))) @@ -213,7 +213,7 @@ func (i *handler) serveCodecHTML(ctx context.Context, w http.ResponseWriter, r * // parseNode does a best effort attempt to parse this request's block such that // a preview can be displayed in the gateway. If something fails along the way, // returns nil, therefore not displaying the preview. -func parseNode(blockCid cid.Cid, blockData io.Reader) *assets.ParsedNode { +func (i *handler) parseNode(ctx context.Context, blockCid cid.Cid, blockData io.Reader, tryGetBlockIfFailed bool) *assets.ParsedNode { codec := blockCid.Prefix().Codec decoder, err := multicodec.LookupDecoder(codec) if err != nil { @@ -222,6 +222,18 @@ func parseNode(blockCid cid.Cid, blockData io.Reader) *assets.ParsedNode { nodeBuilder := basicnode.Prototype.Any.NewBuilder() err = decoder(nodeBuilder, blockData) + if err != nil && tryGetBlockIfFailed { + // It is possible we don't have the whole data for this block, e.g., + // if range request is made from a browser where we want to display HTML. + // This does one attempt of fetching the data. + _, blockData, err = i.backend.GetBlock(ctx, path.FromCid(blockCid)) + if err != nil { + return nil + } + + nodeBuilder = basicnode.Prototype.Any.NewBuilder() + err = decoder(nodeBuilder, blockData) + } if err != nil { return nil } diff --git a/gateway/handler_defaults.go b/gateway/handler_defaults.go index 78e5af952..19c34c118 100644 --- a/gateway/handler_defaults.go +++ b/gateway/handler_defaults.go @@ -112,7 +112,7 @@ func (i *handler) serveDefaults(ctx context.Context, w http.ResponseWriter, r *h dataToRender = dataAsReadSeekCloser } - return i.renderCodec(r.Context(), w, r, rq, blockSize, dataToRender) + return i.renderCodec(r.Context(), w, r, rq, blockSize, dataToRender, len(ranges) > 0) default: rq.logger.Debugw("serving unixfs", "path", rq.contentPath) ctx, span := spanTrace(ctx, "Handler.ServeUnixFS", trace.WithAttributes(attribute.String("path", resolvedPath.String()))) From ffd4fe86efc93e28ed5245710eabacfed3d1bda2 Mon Sep 17 00:00:00 2001 From: Henrique Dias Date: Thu, 11 Apr 2024 12:32:02 +0200 Subject: [PATCH 11/19] Apply suggestions from code review Co-authored-by: Marcin Rataj --- examples/gateway/proxy-blocks/README.md | 2 +- examples/gateway/proxy-car/README.md | 4 +++- examples/gateway/proxy-car/main.go | 4 ++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/examples/gateway/proxy-blocks/README.md b/examples/gateway/proxy-blocks/README.md index 9b4a273b6..505ecb131 100644 --- a/examples/gateway/proxy-blocks/README.md +++ b/examples/gateway/proxy-blocks/README.md @@ -28,7 +28,7 @@ types. Once you have it, run the proxy gateway with its address as the host para ``` -./gateway -g https://ipfs.io -p 8040 +./gateway -g https://trustless-gateway.link -p 8040 ``` ### Subdomain gateway diff --git a/examples/gateway/proxy-car/README.md b/examples/gateway/proxy-car/README.md index f05321e25..bc5ef9b2b 100644 --- a/examples/gateway/proxy-car/README.md +++ b/examples/gateway/proxy-car/README.md @@ -3,6 +3,8 @@ This is an example of building a Gateway that uses `application/vnd.ipld.car` responses from another gateway acting as a remote Trustless Gateway and IPNS resolver. +**NOTE:** the CAR backend MUST implement [IPIP-0402: Partial CAR Support on Trustless Gateways](https://specs.ipfs.tech/ipips/ipip-0402/) + ## Build ```bash @@ -15,7 +17,7 @@ First, you need a compliant gateway that supports both [CAR requests](https://ww types. Once you have it, run the proxy gateway with its address as the host parameter: ``` -./gateway -g https://ipfs.io -p 8040 +./gateway -g https://trustless-gateway.link -p 8040 ``` ### Subdomain gateway diff --git a/examples/gateway/proxy-car/main.go b/examples/gateway/proxy-car/main.go index 40eaff956..220c57c22 100644 --- a/examples/gateway/proxy-car/main.go +++ b/examples/gateway/proxy-car/main.go @@ -21,7 +21,7 @@ func main() { // Setups up tracing. This is optional and only required if the implementer // wants to be able to enable tracing. - tp, err := common.SetupTracing(ctx, "Graph Gateway Example") + tp, err := common.SetupTracing(ctx, "CAR Gateway Example") if err != nil { log.Fatal(err) } @@ -32,7 +32,7 @@ func main() { log.Fatal(err) } - // Creates the gateway with the remote graph backend. + // Creates the gateway with the remote car (IPIP-402) backend. backend, err := gateway.NewCarBackend(carFetcher) if err != nil { log.Fatal(err) From 84c7d89f729a403817365e4566bea0c7e7426fb6 Mon Sep 17 00:00:00 2001 From: Henrique Dias Date: Thu, 11 Apr 2024 12:38:36 +0200 Subject: [PATCH 12/19] Revert "add special case for text/html+codec+range" This reverts commit 6fe39c8c29e09cad5238961a4e2f0b19a76edbfc. --- gateway/handler_codec.go | 24 ++++++------------------ gateway/handler_defaults.go | 2 +- 2 files changed, 7 insertions(+), 19 deletions(-) diff --git a/gateway/handler_codec.go b/gateway/handler_codec.go index 72d8abd59..89bff966e 100644 --- a/gateway/handler_codec.go +++ b/gateway/handler_codec.go @@ -75,10 +75,10 @@ func (i *handler) serveCodec(ctx context.Context, w http.ResponseWriter, r *http return false } - return i.renderCodec(ctx, w, r, rq, blockSize, data, false) + return i.renderCodec(ctx, w, r, rq, blockSize, data) } -func (i *handler) renderCodec(ctx context.Context, w http.ResponseWriter, r *http.Request, rq *requestData, blockSize int64, blockData io.ReadSeekCloser, isRangeRequest bool) bool { +func (i *handler) renderCodec(ctx context.Context, w http.ResponseWriter, r *http.Request, rq *requestData, blockSize int64, blockData io.ReadSeekCloser) bool { resolvedPath := rq.pathMetadata.LastSegment ctx, span := spanTrace(ctx, "Handler.RenderCodec", trace.WithAttributes(attribute.String("path", resolvedPath.String()), attribute.String("requestedContentType", rq.responseFormat))) defer span.End() @@ -124,7 +124,7 @@ func (i *handler) renderCodec(ctx context.Context, w http.ResponseWriter, r *htt download := r.URL.Query().Get("download") == "true" if isDAG && acceptsHTML && !download { - return i.serveCodecHTML(ctx, w, r, blockCid, blockData, resolvedPath, rq.contentPath, isRangeRequest) + return i.serveCodecHTML(ctx, w, r, blockCid, blockData, resolvedPath, rq.contentPath) } else { // This covers CIDs with codec 'json' and 'cbor' as those do not have // an explicit requested content type. @@ -156,7 +156,7 @@ func (i *handler) renderCodec(ctx context.Context, w http.ResponseWriter, r *htt return i.serveCodecConverted(ctx, w, r, blockCid, blockData, rq.contentPath, toCodec, modtime, rq.begin) } -func (i *handler) serveCodecHTML(ctx context.Context, w http.ResponseWriter, r *http.Request, blockCid cid.Cid, blockData io.Reader, resolvedPath path.ImmutablePath, contentPath path.Path, isRangeRequest bool) bool { +func (i *handler) serveCodecHTML(ctx context.Context, w http.ResponseWriter, r *http.Request, blockCid cid.Cid, blockData io.Reader, resolvedPath path.ImmutablePath, contentPath path.Path) bool { // WithHostname may have constructed an IPFS (or IPNS) path using the Host header. // In this case, we need the original path for constructing the redirect. requestURI, err := url.ParseRequestURI(r.RequestURI) @@ -201,7 +201,7 @@ func (i *handler) serveCodecHTML(ctx context.Context, w http.ResponseWriter, r * CID: resolvedPath.RootCid().String(), CodecName: cidCodec.String(), CodecHex: fmt.Sprintf("0x%x", uint64(cidCodec)), - Node: i.parseNode(ctx, blockCid, blockData, isRangeRequest), + Node: parseNode(blockCid, blockData), }) if err != nil { _, _ = w.Write([]byte(fmt.Sprintf("error during body generation: %v", err))) @@ -213,7 +213,7 @@ func (i *handler) serveCodecHTML(ctx context.Context, w http.ResponseWriter, r * // parseNode does a best effort attempt to parse this request's block such that // a preview can be displayed in the gateway. If something fails along the way, // returns nil, therefore not displaying the preview. -func (i *handler) parseNode(ctx context.Context, blockCid cid.Cid, blockData io.Reader, tryGetBlockIfFailed bool) *assets.ParsedNode { +func parseNode(blockCid cid.Cid, blockData io.Reader) *assets.ParsedNode { codec := blockCid.Prefix().Codec decoder, err := multicodec.LookupDecoder(codec) if err != nil { @@ -222,18 +222,6 @@ func (i *handler) parseNode(ctx context.Context, blockCid cid.Cid, blockData io. nodeBuilder := basicnode.Prototype.Any.NewBuilder() err = decoder(nodeBuilder, blockData) - if err != nil && tryGetBlockIfFailed { - // It is possible we don't have the whole data for this block, e.g., - // if range request is made from a browser where we want to display HTML. - // This does one attempt of fetching the data. - _, blockData, err = i.backend.GetBlock(ctx, path.FromCid(blockCid)) - if err != nil { - return nil - } - - nodeBuilder = basicnode.Prototype.Any.NewBuilder() - err = decoder(nodeBuilder, blockData) - } if err != nil { return nil } diff --git a/gateway/handler_defaults.go b/gateway/handler_defaults.go index 19c34c118..78e5af952 100644 --- a/gateway/handler_defaults.go +++ b/gateway/handler_defaults.go @@ -112,7 +112,7 @@ func (i *handler) serveDefaults(ctx context.Context, w http.ResponseWriter, r *h dataToRender = dataAsReadSeekCloser } - return i.renderCodec(r.Context(), w, r, rq, blockSize, dataToRender, len(ranges) > 0) + return i.renderCodec(r.Context(), w, r, rq, blockSize, dataToRender) default: rq.logger.Debugw("serving unixfs", "path", rq.contentPath) ctx, span := spanTrace(ctx, "Handler.ServeUnixFS", trace.WithAttributes(attribute.String("path", resolvedPath.String()))) From 7078cd96fb9a12393879130af77826c39c585fcd Mon Sep 17 00:00:00 2001 From: Henrique Dias Date: Thu, 11 Apr 2024 13:34:05 +0200 Subject: [PATCH 13/19] cleanups, add options --- examples/gateway/proxy-blocks/main.go | 2 +- examples/gateway/proxy-blocks/main_test.go | 2 +- examples/gateway/proxy-car/main.go | 7 +- gateway/backend.go | 41 +++- gateway/backend_blocks.go | 23 ++ gateway/backend_car.go | 242 +++++---------------- gateway/backend_car_fetcher.go | 131 +++++++++++ gateway/backend_car_files.go | 164 ++++++++++++++ gateway/backend_car_test.go | 107 ++++----- gateway/backend_car_traversal.go | 14 +- gateway/backend_car_utils.go | 60 ----- gateway/blockstore.go | 23 +- gateway/gateway_test.go | 24 +- gateway/handler_unixfs_dir.go | 6 +- gateway/handler_unixfs_dir_test.go | 2 +- gateway/remote_blocks_backend.go | 53 ----- gateway/utilities_test.go | 4 +- gateway/value_store.go | 14 +- 18 files changed, 509 insertions(+), 410 deletions(-) create mode 100644 gateway/backend_car_fetcher.go delete mode 100644 gateway/remote_blocks_backend.go diff --git a/examples/gateway/proxy-blocks/main.go b/examples/gateway/proxy-blocks/main.go index b1c155015..2953133c0 100644 --- a/examples/gateway/proxy-blocks/main.go +++ b/examples/gateway/proxy-blocks/main.go @@ -28,7 +28,7 @@ func main() { defer (func() { _ = tp.Shutdown(ctx) })() // Creates the gateway with the remote block store backend. - backend, err := gateway.NewRemoteBlocksBackend([]string{*gatewayUrlPtr}) + backend, err := gateway.NewRemoteBlocksBackend([]string{*gatewayUrlPtr}, nil) if err != nil { log.Fatal(err) } diff --git a/examples/gateway/proxy-blocks/main_test.go b/examples/gateway/proxy-blocks/main_test.go index 309ffb59e..8cb86bbff 100644 --- a/examples/gateway/proxy-blocks/main_test.go +++ b/examples/gateway/proxy-blocks/main_test.go @@ -21,7 +21,7 @@ const ( ) func newProxyGateway(t *testing.T, rs *httptest.Server) *httptest.Server { - backend, err := gateway.NewRemoteBlocksBackend([]string{rs.URL}) + backend, err := gateway.NewRemoteBlocksBackend([]string{rs.URL}, nil) require.NoError(t, err) handler := common.NewHandler(backend) ts := httptest.NewServer(handler) diff --git a/examples/gateway/proxy-car/main.go b/examples/gateway/proxy-car/main.go index 220c57c22..d03904549 100644 --- a/examples/gateway/proxy-car/main.go +++ b/examples/gateway/proxy-car/main.go @@ -27,13 +27,8 @@ func main() { } defer (func() { _ = tp.Shutdown(ctx) })() - carFetcher, err := gateway.NewRemoteCarFetcher([]string{*gatewayUrlPtr}) - if err != nil { - log.Fatal(err) - } - // Creates the gateway with the remote car (IPIP-402) backend. - backend, err := gateway.NewCarBackend(carFetcher) + backend, err := gateway.NewRemoteCarBackend([]string{*gatewayUrlPtr}, nil) if err != nil { log.Fatal(err) } diff --git a/gateway/backend.go b/gateway/backend.go index 5f15e83a9..ae54b14f1 100644 --- a/gateway/backend.go +++ b/gateway/backend.go @@ -15,13 +15,17 @@ import ( routinghelpers "github.com/libp2p/go-libp2p-routing-helpers" "github.com/libp2p/go-libp2p/core/routing" "github.com/prometheus/client_golang/prometheus" + "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" ) type backendOptions struct { - ns namesys.NameSystem - vs routing.ValueStore - r resolver.Resolver - promRegistry prometheus.Registerer + ns namesys.NameSystem + vs routing.ValueStore + r resolver.Resolver + + // Only used by [CarBackend]: + promRegistry prometheus.Registerer + getBlockTimeout time.Duration } // WithNameSystem sets the name system to use with the different backends. If not set @@ -58,6 +62,18 @@ func WithPrometheusRegistry(reg prometheus.Registerer) BackendOption { } } +const DefaultGetBlockTimeout = time.Second * 60 + +// WithGetBlockTimeout sets a custom timeout when getting blocks from the +// [CarFetcher] to use with [CarBackend]. By default, [DefaultGetBlockTimeout] +// is used. +func WithGetBlockTimeout(dur time.Duration) BackendOption { + return func(opts *backendOptions) error { + opts.getBlockTimeout = dur + return nil + } +} + type BackendOption func(options *backendOptions) error // baseBackend contains some common backend functionalities that are shared by @@ -138,3 +154,20 @@ func (bb *baseBackend) GetDNSLinkRecord(ctx context.Context, hostname string) (p return nil, NewErrorStatusCode(errors.New("not implemented"), http.StatusNotImplemented) } + +// newRemoteHTTPClient creates a new [http.Client] that is optimized for retrieving +// multiple blocks from a single gateway concurrently. +func newRemoteHTTPClient() *http.Client { + transport := &http.Transport{ + MaxIdleConns: 1000, + MaxConnsPerHost: 100, + MaxIdleConnsPerHost: 100, + IdleConnTimeout: 90 * time.Second, + ForceAttemptHTTP2: true, + } + + return &http.Client{ + Timeout: DefaultGetBlockTimeout, + Transport: otelhttp.NewTransport(transport), + } +} diff --git a/gateway/backend_blocks.go b/gateway/backend_blocks.go index 8eafe06af..42440dfcd 100644 --- a/gateway/backend_blocks.go +++ b/gateway/backend_blocks.go @@ -6,10 +6,12 @@ import ( "errors" "fmt" "io" + "net/http" "strings" "github.com/ipfs/boxo/blockservice" blockstore "github.com/ipfs/boxo/blockstore" + "github.com/ipfs/boxo/exchange/offline" "github.com/ipfs/boxo/fetcher" bsfetcher "github.com/ipfs/boxo/fetcher/impl/blockservice" "github.com/ipfs/boxo/files" @@ -54,6 +56,7 @@ type BlocksBackend struct { var _ IPFSBackend = (*BlocksBackend)(nil) +// NewBlocksBackend creates a new [BlocksBackend] backed by a [blockservice.BlockService]. func NewBlocksBackend(blockService blockservice.BlockService, opts ...BackendOption) (*BlocksBackend, error) { var compiledOptions backendOptions for _, o := range opts { @@ -90,6 +93,26 @@ func NewBlocksBackend(blockService blockservice.BlockService, opts ...BackendOpt }, nil } +// NewRemoteBlocksBackend creates a new [BlocksBackend] backed by one or more +// gateways. These gateways must support RAW block requests and IPNS Record +// requests. See [NewRemoteBlockstore] and [NewRemoteValueStore] for more details. +// +// To create a more custom [BlocksBackend], please use [NewBlocksBackend] directly. +func NewRemoteBlocksBackend(gatewayURL []string, httpClient *http.Client, opts ...BackendOption) (*BlocksBackend, error) { + blockStore, err := NewRemoteBlockstore(gatewayURL, httpClient) + if err != nil { + return nil, err + } + + valueStore, err := NewRemoteValueStore(gatewayURL, httpClient) + if err != nil { + return nil, err + } + + blockService := blockservice.New(blockStore, offline.Exchange(blockStore)) + return NewBlocksBackend(blockService, append(opts, WithValueStore(valueStore))...) +} + func (bb *BlocksBackend) Get(ctx context.Context, path path.ImmutablePath, ranges ...ByteRange) (ContentPathMetadata, *GetResponse, error) { md, nd, err := bb.getNode(ctx, path) if err != nil { diff --git a/gateway/backend_car.go b/gateway/backend_car.go index efd0a9f0e..010a13ed3 100644 --- a/gateway/backend_car.go +++ b/gateway/backend_car.go @@ -6,8 +6,10 @@ import ( "errors" "fmt" "io" + "net/http" "strconv" "strings" + "time" "github.com/hashicorp/go-multierror" "github.com/ipfs/boxo/files" @@ -20,8 +22,6 @@ import ( format "github.com/ipfs/go-ipld-format" "github.com/ipfs/go-unixfsnode" ufsData "github.com/ipfs/go-unixfsnode/data" - "github.com/ipfs/go-unixfsnode/hamt" - ufsiter "github.com/ipfs/go-unixfsnode/iter" carv2 "github.com/ipld/go-car/v2" "github.com/ipld/go-car/v2/storage" dagpb "github.com/ipld/go-codec-dagpb" @@ -37,17 +37,12 @@ import ( var ErrFetcherUnexpectedEOF = fmt.Errorf("failed to fetch IPLD data") -type DataCallback func(resource string, reader io.Reader) error - -type CarFetcher interface { - Fetch(ctx context.Context, path string, cb DataCallback) error -} - type CarBackend struct { baseBackend - fetcher CarFetcher - pc traversal.LinkTargetNodePrototypeChooser - metrics *CarBackendMetrics + fetcher CarFetcher + pc traversal.LinkTargetNodePrototypeChooser + metrics *CarBackendMetrics + getBlockTimeout time.Duration } type CarBackendMetrics struct { @@ -60,8 +55,11 @@ type CarBackendMetrics struct { bytesRangeSizeMetric prometheus.Histogram } +// NewCarBackend returns an [IPFSBackend] backed by a [CarFetcher]. func NewCarBackend(f CarFetcher, opts ...BackendOption) (*CarBackend, error) { - var compiledOptions backendOptions + compiledOptions := backendOptions{ + getBlockTimeout: DefaultGetBlockTimeout, + } for _, o := range opts { if err := o(&compiledOptions); err != nil { return nil, err @@ -81,9 +79,10 @@ func NewCarBackend(f CarFetcher, opts ...BackendOption) (*CarBackend, error) { } return &CarBackend{ - baseBackend: baseBackend, - fetcher: f, - metrics: registerCarBackendMetrics(promReg), + baseBackend: baseBackend, + fetcher: f, + metrics: registerCarBackendMetrics(promReg), + getBlockTimeout: compiledOptions.getBlockTimeout, pc: dagpb.AddSupportToChooser(func(lnk ipld.Link, lnkCtx ipld.LinkContext) (ipld.NodePrototype, error) { if tlnkNd, ok := lnkCtx.LinkNode.(schema.TypedLinkNode); ok { return tlnkNd.LinkTargetNodePrototype(), nil @@ -93,6 +92,30 @@ func NewCarBackend(f CarFetcher, opts ...BackendOption) (*CarBackend, error) { }, nil } +// NewRemoteCarBackend creates a new [CarBackend] instance backed by one or more +// gateways. These gateways must support partial CAR requests, as described in +// [IPIP-402], as well as IPNS Record requests. See [NewRemoteCarFetcher] and +// [NewRemoteValueStore] for more details. +// +// If you want to create a more custom [CarBackend] with only remote IPNS Record +// resolution, or only remote CAR fetching, we recommend using [NewCarBackend] +// directly. +// +// [IPIP-402]: https://specs.ipfs.tech/ipips/ipip-0402/ +func NewRemoteCarBackend(gatewayURL []string, httpClient *http.Client, opts ...BackendOption) (*CarBackend, error) { + carFetcher, err := NewRemoteCarFetcher(gatewayURL, httpClient) + if err != nil { + return nil, err + } + + valueStore, err := NewRemoteValueStore(gatewayURL, httpClient) + if err != nil { + return nil, err + } + + return NewCarBackend(carFetcher, append(opts, WithValueStore(valueStore))...) +} + func registerCarBackendMetrics(promReg prometheus.Registerer) *CarBackendMetrics { // How many CAR Fetch attempts we had? Need this to calculate % of various car request types. // We only count attempts here, because success/failure with/without retries are provided by caboose: @@ -351,7 +374,7 @@ func (api *CarBackend) Get(ctx context.Context, path path.ImmutablePath, byteRan } } - md, terminalElem, err := fetchWithPartialRetries(ctx, path, carParams, loadTerminalEntity, api.metrics, api.fetchCAR) + md, terminalElem, err := fetchWithPartialRetries(ctx, path, carParams, loadTerminalEntity, api.metrics, api.fetchCAR, api.getBlockTimeout) if err != nil { return ContentPathMetadata{}, nil, err } @@ -540,170 +563,9 @@ func loadTerminalEntity(ctx context.Context, c cid.Cid, blk blocks.Block, lsys * } } -type backpressuredHAMTDirIterNoRecursion struct { - dagSize uint64 - linksItr ipld.MapIterator - dirCid cid.Cid - - lsys *ipld.LinkSystem - getLsys lsysGetter - ctx context.Context - - curLnk unixfs.LinkResult - curProcessed int - - closed chan error - hasClosed bool - err error -} - -func (it *backpressuredHAMTDirIterNoRecursion) AwaitClose() <-chan error { - return it.closed -} - -func (it *backpressuredHAMTDirIterNoRecursion) Link() unixfs.LinkResult { - return it.curLnk -} - -func (it *backpressuredHAMTDirIterNoRecursion) Next() bool { - defer func() { - if it.linksItr.Done() || it.err != nil { - if !it.hasClosed { - it.hasClosed = true - close(it.closed) - } - } - }() - - if it.err != nil { - return false - } - - iter := it.linksItr - if iter.Done() { - return false - } - - /* - Since there is no way to make a graph request for part of a HAMT during errors we can either fill in the HAMT with - block requests, or we can re-request the HAMT and skip over the parts we already have. - - Here we choose the latter, however in the event of a re-request we request the entity rather than the entire DAG as - a compromise between more requests and over-fetching data. - */ - - var err error - for { - if it.ctx.Err() != nil { - it.err = it.ctx.Err() - return false - } - - retry, processedErr := isRetryableError(err) - if !retry { - it.err = processedErr - return false - } - - var nd ipld.Node - if err != nil { - var lsys *ipld.LinkSystem - lsys, err = it.getLsys(it.ctx, it.dirCid, CarParams{Scope: DagScopeEntity}) - if err != nil { - continue - } - - _, pbn, ufsFieldData, _, ufsBaseErr := loadUnixFSBase(it.ctx, it.dirCid, nil, lsys) - if ufsBaseErr != nil { - err = ufsBaseErr - continue - } - - nd, err = hamt.NewUnixFSHAMTShard(it.ctx, pbn, ufsFieldData, lsys) - if err != nil { - err = fmt.Errorf("could not reify sharded directory: %w", err) - continue - } - - iter = nd.MapIterator() - for i := 0; i < it.curProcessed; i++ { - _, _, err = iter.Next() - if err != nil { - continue - } - } - - it.linksItr = iter - } - - var k, v ipld.Node - k, v, err = iter.Next() - if err != nil { - retry, processedErr = isRetryableError(err) - if retry { - err = processedErr - continue - } - it.err = processedErr - return false - } - - var name string - name, err = k.AsString() - if err != nil { - it.err = err - return false - } - - var lnk ipld.Link - lnk, err = v.AsLink() - if err != nil { - it.err = err - return false - } - - cl, ok := lnk.(cidlink.Link) - if !ok { - it.err = fmt.Errorf("link not a cidlink") - return false - } - - c := cl.Cid - - pbLnk, ok := v.(*ufsiter.IterLink) - if !ok { - it.err = fmt.Errorf("HAMT value is not a dag-pb link") - return false - } - - cumulativeDagSize := uint64(0) - if pbLnk.Substrate.Tsize.Exists() { - cumulativeDagSize = uint64(pbLnk.Substrate.Tsize.Must().Int()) - } - - it.curLnk = unixfs.LinkResult{ - Link: &format.Link{ - Name: name, - Size: cumulativeDagSize, - Cid: c, - }, - } - it.curProcessed++ - break - } - - return true -} - -func (it *backpressuredHAMTDirIterNoRecursion) Err() error { - return it.err -} - -var _ awaitCloser = (*backpressuredHAMTDirIterNoRecursion)(nil) - func (api *CarBackend) GetAll(ctx context.Context, path path.ImmutablePath) (ContentPathMetadata, files.Node, error) { api.metrics.carParamsMetric.With(prometheus.Labels{"dagScope": "all", "entityRanges": "0"}).Inc() - return fetchWithPartialRetries(ctx, path, CarParams{Scope: DagScopeAll}, loadTerminalUnixFSElementWithRecursiveDirectories, api.metrics, api.fetchCAR) + return fetchWithPartialRetries(ctx, path, CarParams{Scope: DagScopeAll}, loadTerminalUnixFSElementWithRecursiveDirectories, api.metrics, api.fetchCAR, api.getBlockTimeout) } type loadTerminalElement[T any] func(ctx context.Context, c cid.Cid, blk blocks.Block, lsys *ipld.LinkSystem, params CarParams, getLsys lsysGetter) (T, error) @@ -720,7 +582,7 @@ type nextReq struct { params CarParams } -func fetchWithPartialRetries[T any](ctx context.Context, p path.ImmutablePath, initialParams CarParams, resolveTerminalElementFn loadTerminalElement[T], metrics *CarBackendMetrics, fetchCAR fetchCarFn) (ContentPathMetadata, T, error) { +func fetchWithPartialRetries[T any](ctx context.Context, p path.ImmutablePath, initialParams CarParams, resolveTerminalElementFn loadTerminalElement[T], metrics *CarBackendMetrics, fetchCAR fetchCarFn, timeout time.Duration) (ContentPathMetadata, T, error) { var zeroReturnType T terminalPathElementCh := make(chan terminalPathType[T], 1) @@ -752,12 +614,12 @@ func fetchWithPartialRetries[T any](ctx context.Context, p path.ImmutablePath, i params := initialParams err := fetchCAR(cctx, p, params, func(resource string, reader io.Reader) error { - gb, err := carToLinearBlockGetter(cctx, reader, metrics) + gb, err := carToLinearBlockGetter(cctx, reader, timeout, metrics) if err != nil { return err } - lsys := getLinksystem(gb) + lsys := getCarLinksystem(gb) if hasSentAsyncData { _, _, err = resolvePathToLastWithRoots(cctx, p, lsys) @@ -838,7 +700,7 @@ func fetchWithPartialRetries[T any](ctx context.Context, p path.ImmutablePath, i } if err != nil { - lsys := getLinksystem(func(ctx context.Context, cid cid.Cid) (blocks.Block, error) { + lsys := getCarLinksystem(func(ctx context.Context, cid cid.Cid) (blocks.Block, error) { return nil, multierror.Append(ErrFetcherUnexpectedEOF, format.ErrNotFound{Cid: cid}) }) for { @@ -872,11 +734,11 @@ func (api *CarBackend) GetBlock(ctx context.Context, p path.ImmutablePath) (Cont var f files.File // TODO: if path is `/ipfs/cid`, we should use ?format=raw err := api.fetchCAR(ctx, p, CarParams{Scope: DagScopeBlock}, func(resource string, reader io.Reader) error { - gb, err := carToLinearBlockGetter(ctx, reader, api.metrics) + gb, err := carToLinearBlockGetter(ctx, reader, api.getBlockTimeout, api.metrics) if err != nil { return err } - lsys := getLinksystem(gb) + lsys := getCarLinksystem(gb) // First resolve the path since we always need to. var terminalBlk blocks.Block @@ -920,11 +782,11 @@ func (api *CarBackend) Head(ctx context.Context, p path.ImmutablePath) (ContentP // TODO: fallback to dynamic fetches in case we haven't requested enough data rangeTo := int64(3071) err := api.fetchCAR(ctx, p, CarParams{Scope: DagScopeEntity, Range: &DagByteRange{From: 0, To: &rangeTo}}, func(resource string, reader io.Reader) error { - gb, err := carToLinearBlockGetter(ctx, reader, api.metrics) + gb, err := carToLinearBlockGetter(ctx, reader, api.getBlockTimeout, api.metrics) if err != nil { return err } - lsys := getLinksystem(gb) + lsys := getCarLinksystem(gb) // First resolve the path since we always need to. var terminalBlk blocks.Block @@ -1052,11 +914,11 @@ func (api *CarBackend) ResolvePath(ctx context.Context, p path.ImmutablePath) (C var md ContentPathMetadata err := api.fetchCAR(ctx, p, CarParams{Scope: DagScopeBlock}, func(resource string, reader io.Reader) error { - gb, err := carToLinearBlockGetter(ctx, reader, api.metrics) + gb, err := carToLinearBlockGetter(ctx, reader, api.getBlockTimeout, api.metrics) if err != nil { return err } - lsys := getLinksystem(gb) + lsys := getCarLinksystem(gb) // First resolve the path since we always need to. md, _, err = resolvePathToLastWithRoots(ctx, p, lsys) @@ -1098,7 +960,7 @@ func (api *CarBackend) GetCAR(ctx context.Context, p path.ImmutablePath, params var blockBuffer []blocks.Block err = api.fetchCAR(ctx, p, params, func(resource string, reader io.Reader) error { numBlocksThisCall := 0 - gb, err := carToLinearBlockGetter(ctx, reader, api.metrics) + gb, err := carToLinearBlockGetter(ctx, reader, api.getBlockTimeout, api.metrics) if err != nil { return err } @@ -1121,7 +983,7 @@ func (api *CarBackend) GetCAR(ctx context.Context, p path.ImmutablePath, params numBlocksThisCall++ return blk, nil } - l := getLinksystem(teeBlock) + l := getCarLinksystem(teeBlock) var isNotFound bool diff --git a/gateway/backend_car_fetcher.go b/gateway/backend_car_fetcher.go new file mode 100644 index 000000000..cc9f9647d --- /dev/null +++ b/gateway/backend_car_fetcher.go @@ -0,0 +1,131 @@ +package gateway + +import ( + "context" + "errors" + "fmt" + "io" + "math/rand" + "net/http" + "time" +) + +type DataCallback func(resource string, reader io.Reader) error + +// CarFetcher powers a [CarBackend]. +type CarFetcher interface { + Fetch(ctx context.Context, path string, cb DataCallback) error +} + +type remoteCarFetcher struct { + httpClient *http.Client + gatewayURL []string + rand *rand.Rand +} + +// NewRemoteCarFetcher returns a [CarFetcher] that is backed by one or more gateways +// that support partial CAR requests, as described in [IPIP-402]. You can optionally +// pass your own [http.Client]. +// +// [IPIP-402]: https://specs.ipfs.tech/ipips/ipip-0402 +func NewRemoteCarFetcher(gatewayURL []string, httpClient *http.Client) (CarFetcher, error) { + if len(gatewayURL) == 0 { + return nil, errors.New("missing gateway URLs to which to proxy") + } + + if httpClient == nil { + httpClient = newRemoteHTTPClient() + } + + return &remoteCarFetcher{ + gatewayURL: gatewayURL, + httpClient: httpClient, + rand: rand.New(rand.NewSource(time.Now().Unix())), + }, nil +} + +func (ps *remoteCarFetcher) Fetch(ctx context.Context, path string, cb DataCallback) error { + urlStr := fmt.Sprintf("%s%s", ps.getRandomGatewayURL(), path) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, urlStr, nil) + if err != nil { + return err + } + log.Debugw("car fetch", "url", req.URL) + req.Header.Set("Accept", "application/vnd.ipld.car;order=dfs;dups=y") + resp, err := ps.httpClient.Do(req) + if err != nil { + return err + } + + if resp.StatusCode != http.StatusOK { + errData, err := io.ReadAll(resp.Body) + if err != nil { + err = fmt.Errorf("could not read error message: %w", err) + } else { + err = fmt.Errorf("%q", string(errData)) + } + return fmt.Errorf("http error from car gateway: %s: %w", resp.Status, err) + } + + err = cb(path, resp.Body) + if err != nil { + resp.Body.Close() + return err + } + return resp.Body.Close() +} + +func (ps *remoteCarFetcher) getRandomGatewayURL() string { + return ps.gatewayURL[ps.rand.Intn(len(ps.gatewayURL))] +} + +type retryCarFetcher struct { + inner CarFetcher + retries int +} + +// NewRetryCarFetcher returns a [CarFetcher] that retries to fetch up to the given +// [allowedRetries] using the [inner] [CarFetcher]. If the inner fetcher returns +// an [ErrPartialResponse] error, then the number of retries is reset to the initial +// maximum allowed retries. +func NewRetryCarFetcher(inner CarFetcher, allowedRetries int) (CarFetcher, error) { + if allowedRetries <= 0 { + return nil, errors.New("number of retries must be a number larger than 0") + } + + return &retryCarFetcher{ + inner: inner, + retries: allowedRetries, + }, nil +} + +func (r *retryCarFetcher) Fetch(ctx context.Context, path string, cb DataCallback) error { + return r.fetch(ctx, path, cb, r.retries) +} + +func (r *retryCarFetcher) fetch(ctx context.Context, path string, cb DataCallback, retriesLeft int) error { + err := r.inner.Fetch(ctx, path, cb) + if err == nil { + return nil + } + + if retriesLeft > 0 { + retriesLeft-- + } else { + return fmt.Errorf("retry fetcher out of retries: %w", err) + } + + switch t := err.(type) { + case ErrPartialResponse: + if len(t.StillNeed) > 1 { + return errors.New("only a single request at a time is supported") + } + + // Resets the number of retries for partials, mimicking Caboose logic. + retriesLeft = r.retries + + return r.fetch(ctx, t.StillNeed[0], cb, retriesLeft) + default: + return r.fetch(ctx, path, cb, retriesLeft) + } +} diff --git a/gateway/backend_car_files.go b/gateway/backend_car_files.go index 540916280..c384bbe2c 100644 --- a/gateway/backend_car_files.go +++ b/gateway/backend_car_files.go @@ -7,11 +7,14 @@ import ( "io" "github.com/ipfs/boxo/files" + "github.com/ipfs/boxo/ipld/unixfs" blocks "github.com/ipfs/go-block-format" "github.com/ipfs/go-cid" + format "github.com/ipfs/go-ipld-format" "github.com/ipfs/go-unixfsnode" ufsData "github.com/ipfs/go-unixfsnode/data" "github.com/ipfs/go-unixfsnode/hamt" + ufsiter "github.com/ipfs/go-unixfsnode/iter" dagpb "github.com/ipld/go-codec-dagpb" "github.com/ipld/go-ipld-prime" "github.com/ipld/go-ipld-prime/datamodel" @@ -369,6 +372,167 @@ func (it *backpressuredHAMTDirIter) Err() error { var _ files.DirIterator = (*backpressuredHAMTDirIter)(nil) +type backpressuredHAMTDirIterNoRecursion struct { + dagSize uint64 + linksItr ipld.MapIterator + dirCid cid.Cid + + lsys *ipld.LinkSystem + getLsys lsysGetter + ctx context.Context + + curLnk unixfs.LinkResult + curProcessed int + + closed chan error + hasClosed bool + err error +} + +func (it *backpressuredHAMTDirIterNoRecursion) AwaitClose() <-chan error { + return it.closed +} + +func (it *backpressuredHAMTDirIterNoRecursion) Link() unixfs.LinkResult { + return it.curLnk +} + +func (it *backpressuredHAMTDirIterNoRecursion) Next() bool { + defer func() { + if it.linksItr.Done() || it.err != nil { + if !it.hasClosed { + it.hasClosed = true + close(it.closed) + } + } + }() + + if it.err != nil { + return false + } + + iter := it.linksItr + if iter.Done() { + return false + } + + /* + Since there is no way to make a graph request for part of a HAMT during errors we can either fill in the HAMT with + block requests, or we can re-request the HAMT and skip over the parts we already have. + + Here we choose the latter, however in the event of a re-request we request the entity rather than the entire DAG as + a compromise between more requests and over-fetching data. + */ + + var err error + for { + if it.ctx.Err() != nil { + it.err = it.ctx.Err() + return false + } + + retry, processedErr := isRetryableError(err) + if !retry { + it.err = processedErr + return false + } + + var nd ipld.Node + if err != nil { + var lsys *ipld.LinkSystem + lsys, err = it.getLsys(it.ctx, it.dirCid, CarParams{Scope: DagScopeEntity}) + if err != nil { + continue + } + + _, pbn, ufsFieldData, _, ufsBaseErr := loadUnixFSBase(it.ctx, it.dirCid, nil, lsys) + if ufsBaseErr != nil { + err = ufsBaseErr + continue + } + + nd, err = hamt.NewUnixFSHAMTShard(it.ctx, pbn, ufsFieldData, lsys) + if err != nil { + err = fmt.Errorf("could not reify sharded directory: %w", err) + continue + } + + iter = nd.MapIterator() + for i := 0; i < it.curProcessed; i++ { + _, _, err = iter.Next() + if err != nil { + continue + } + } + + it.linksItr = iter + } + + var k, v ipld.Node + k, v, err = iter.Next() + if err != nil { + retry, processedErr = isRetryableError(err) + if retry { + err = processedErr + continue + } + it.err = processedErr + return false + } + + var name string + name, err = k.AsString() + if err != nil { + it.err = err + return false + } + + var lnk ipld.Link + lnk, err = v.AsLink() + if err != nil { + it.err = err + return false + } + + cl, ok := lnk.(cidlink.Link) + if !ok { + it.err = fmt.Errorf("link not a cidlink") + return false + } + + c := cl.Cid + + pbLnk, ok := v.(*ufsiter.IterLink) + if !ok { + it.err = fmt.Errorf("HAMT value is not a dag-pb link") + return false + } + + cumulativeDagSize := uint64(0) + if pbLnk.Substrate.Tsize.Exists() { + cumulativeDagSize = uint64(pbLnk.Substrate.Tsize.Must().Int()) + } + + it.curLnk = unixfs.LinkResult{ + Link: &format.Link{ + Name: name, + Size: cumulativeDagSize, + Cid: c, + }, + } + it.curProcessed++ + break + } + + return true +} + +func (it *backpressuredHAMTDirIterNoRecursion) Err() error { + return it.err +} + +var _ awaitCloser = (*backpressuredHAMTDirIterNoRecursion)(nil) + /* 1. Run traversal to get the top-level response 2. Response can do a callback for another response diff --git a/gateway/backend_car_test.go b/gateway/backend_car_test.go index f356da0ef..37f99d46c 100644 --- a/gateway/backend_car_test.go +++ b/gateway/backend_car_test.go @@ -153,9 +153,13 @@ func TestCarBackendTar(t *testing.T) { })) defer s.Close() - bs, err := NewRemoteCarFetcher([]string{s.URL}) + bs, err := NewRemoteCarFetcher([]string{s.URL}, nil) require.NoError(t, err) - backend, err := NewCarBackend(&retryFetcher{inner: bs, allowedRetries: 3, retriesRemaining: 3}) + + fetcher, err := NewRetryCarFetcher(bs, 3) + require.NoError(t, err) + + backend, err := NewCarBackend(fetcher) require.NoError(t, err) p := path.FromCid(cid.MustParse("bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi")) @@ -320,9 +324,12 @@ func TestCarBackendTarAtEndOfPath(t *testing.T) { })) defer s.Close() - bs, err := NewRemoteCarFetcher([]string{s.URL}) + bs, err := NewRemoteCarFetcher([]string{s.URL}, nil) require.NoError(t, err) - backend, err := NewCarBackend(&retryFetcher{inner: bs, allowedRetries: 3, retriesRemaining: 3}) + fetcher, err := NewRetryCarFetcher(bs, 3) + require.NoError(t, err) + + backend, err := NewCarBackend(fetcher) require.NoError(t, err) p, err := path.Join(path.FromCid(cid.MustParse("bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi")), "hamtDir") @@ -479,9 +486,12 @@ func TestCarBackendGetFile(t *testing.T) { })) defer s.Close() - bs, err := NewRemoteCarFetcher([]string{s.URL}) + bs, err := NewRemoteCarFetcher([]string{s.URL}, nil) require.NoError(t, err) - backend, err := NewCarBackend(&retryFetcher{inner: bs, allowedRetries: 3, retriesRemaining: 3}) + fetcher, err := NewRetryCarFetcher(bs, 3) + require.NoError(t, err) + + backend, err := NewCarBackend(fetcher) require.NoError(t, err) trustedGatewayServer := httptest.NewServer(NewHandler(Config{DeserializedResponses: true}, backend)) @@ -580,9 +590,12 @@ func TestCarBackendGetFileRangeRequest(t *testing.T) { })) defer s.Close() - bs, err := NewRemoteCarFetcher([]string{s.URL}) + bs, err := NewRemoteCarFetcher([]string{s.URL}, nil) require.NoError(t, err) - backend, err := NewCarBackend(&retryFetcher{inner: bs, allowedRetries: 3, retriesRemaining: 3}) + fetcher, err := NewRetryCarFetcher(bs, 3) + require.NoError(t, err) + + backend, err := NewCarBackend(fetcher) require.NoError(t, err) trustedGatewayServer := httptest.NewServer(NewHandler(Config{DeserializedResponses: true}, backend)) @@ -687,9 +700,12 @@ func TestCarBackendGetFileWithBadBlockReturned(t *testing.T) { })) defer s.Close() - bs, err := NewRemoteCarFetcher([]string{s.URL}) + bs, err := NewRemoteCarFetcher([]string{s.URL}, nil) require.NoError(t, err) - backend, err := NewCarBackend(&retryFetcher{inner: bs, allowedRetries: 3, retriesRemaining: 3}) + fetcher, err := NewRetryCarFetcher(bs, 3) + require.NoError(t, err) + + backend, err := NewCarBackend(fetcher) require.NoError(t, err) trustedGatewayServer := httptest.NewServer(NewHandler(Config{DeserializedResponses: true}, backend)) @@ -789,9 +805,12 @@ func TestCarBackendGetHAMTDirectory(t *testing.T) { })) defer s.Close() - bs, err := NewRemoteCarFetcher([]string{s.URL}) + bs, err := NewRemoteCarFetcher([]string{s.URL}, nil) require.NoError(t, err) - backend, err := NewCarBackend(&retryFetcher{inner: bs, allowedRetries: 3, retriesRemaining: 3}) + fetcher, err := NewRetryCarFetcher(bs, 3) + require.NoError(t, err) + + backend, err := NewCarBackend(fetcher) require.NoError(t, err) trustedGatewayServer := httptest.NewServer(NewHandler(Config{DeserializedResponses: true}, backend)) @@ -895,9 +914,12 @@ func TestCarBackendGetCAR(t *testing.T) { })) defer s.Close() - bs, err := NewRemoteCarFetcher([]string{s.URL}) + bs, err := NewRemoteCarFetcher([]string{s.URL}, nil) require.NoError(t, err) - backend, err := NewCarBackend(&retryFetcher{inner: bs, allowedRetries: 3, retriesRemaining: 3}) + fetcher, err := NewRetryCarFetcher(bs, 3) + require.NoError(t, err) + + backend, err := NewCarBackend(fetcher) require.NoError(t, err) p := path.FromCid(cid.MustParse("bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi")) @@ -985,7 +1007,7 @@ func TestCarBackendPassthroughErrors(t *testing.T) { })) defer s.Close() - bs, err := NewRemoteCarFetcher([]string{s.URL}) + bs, err := NewRemoteCarFetcher([]string{s.URL}, nil) require.NoError(t, err) p, err := path.NewPath("/ipfs/bafybeid3fd2xxdcd3dbj7trb433h2aqssn6xovjbwnkargjv7fuog4xjdi/hamtDir/exampleA") @@ -997,15 +1019,17 @@ func TestCarBackendPassthroughErrors(t *testing.T) { bogusErr := NewErrorStatusCode(fmt.Errorf("this is a test error"), 418) clientRequestNum := 0 - backend, err := NewCarBackend(&retryFetcher{ - inner: &fetcherWrapper{fn: func(ctx context.Context, path string, cb DataCallback) error { - clientRequestNum++ - if clientRequestNum > 2 { - return bogusErr - } - return bs.Fetch(ctx, path, cb) - }}, - allowedRetries: 3, retriesRemaining: 3}) + + fetcher, err := NewRetryCarFetcher(&fetcherWrapper{fn: func(ctx context.Context, path string, cb DataCallback) error { + clientRequestNum++ + if clientRequestNum > 2 { + return bogusErr + } + return bs.Fetch(ctx, path, cb) + }}, 3) + require.NoError(t, err) + + backend, err := NewCarBackend(fetcher) require.NoError(t, err) err = traversal(ctx, imPath, backend) @@ -1039,38 +1063,3 @@ type fetcherWrapper struct { func (w *fetcherWrapper) Fetch(ctx context.Context, path string, cb DataCallback) error { return w.fn(ctx, path, cb) } - -type retryFetcher struct { - inner CarFetcher - allowedRetries int - retriesRemaining int -} - -func (r *retryFetcher) Fetch(ctx context.Context, path string, cb DataCallback) error { - err := r.inner.Fetch(ctx, path, cb) - if err == nil { - return nil - } - - if r.retriesRemaining > 0 { - r.retriesRemaining-- - } else { - return fmt.Errorf("retry fetcher out of retries: %w", err) - } - - switch t := err.(type) { - case ErrPartialResponse: - if len(t.StillNeed) > 1 { - panic("only a single request at a time supported") - } - - // Mimicking the Caboose logic reset the number of retries for partials - r.retriesRemaining = r.allowedRetries - - return r.Fetch(ctx, t.StillNeed[0], cb) - default: - return r.Fetch(ctx, path, cb) - } -} - -var _ CarFetcher = (*retryFetcher)(nil) diff --git a/gateway/backend_car_traversal.go b/gateway/backend_car_traversal.go index 090495530..544935b04 100644 --- a/gateway/backend_car_traversal.go +++ b/gateway/backend_car_traversal.go @@ -23,9 +23,9 @@ import ( type getBlock func(ctx context.Context, cid cid.Cid) (blocks.Block, error) -var ErrNilBlock = ErrInvalidResponse{Message: "received a nil block with no error"} +var errNilBlock = ErrInvalidResponse{Message: "received a nil block with no error"} -func carToLinearBlockGetter(ctx context.Context, reader io.Reader, metrics *CarBackendMetrics) (getBlock, error) { +func carToLinearBlockGetter(ctx context.Context, reader io.Reader, timeout time.Duration, metrics *CarBackendMetrics) (getBlock, error) { cr, err := car.NewCarReaderWithOptions(reader, car.WithErrorOnEmptyRoots(false)) if err != nil { return nil, err @@ -73,9 +73,9 @@ func carToLinearBlockGetter(ctx context.Context, reader io.Reader, metrics *CarB // initially set a higher timeout here so that if there's an initial timeout error we get it from the car reader. var t *time.Timer if isFirstBlock { - t = time.NewTimer(getBlockTimeout * 2) + t = time.NewTimer(timeout * 2) } else { - t = time.NewTimer(getBlockTimeout) + t = time.NewTimer(timeout) } var blkRead blockRead var ok bool @@ -84,7 +84,7 @@ func carToLinearBlockGetter(ctx context.Context, reader io.Reader, metrics *CarB if !t.Stop() { <-t.C } - t.Reset(getBlockTimeout) + t.Reset(timeout) case <-t.C: return nil, ErrGatewayTimeout } @@ -101,7 +101,7 @@ func carToLinearBlockGetter(ctx context.Context, reader io.Reader, metrics *CarB } return blkRead.block, nil } - return nil, ErrNilBlock + return nil, errNilBlock }, nil } @@ -121,7 +121,7 @@ func extractIdentityMultihashCIDContents(k cid.Cid) (bool, []byte) { return true, dmh.Digest } -func getLinksystem(fn getBlock) *ipld.LinkSystem { +func getCarLinksystem(fn getBlock) *ipld.LinkSystem { lsys := cidlink.DefaultLinkSystem() lsys.StorageReadOpener = func(linkContext linking.LinkContext, link datamodel.Link) (io.Reader, error) { c := link.(cidlink.Link).Cid diff --git a/gateway/backend_car_utils.go b/gateway/backend_car_utils.go index 1df74761d..0612f374e 100644 --- a/gateway/backend_car_utils.go +++ b/gateway/backend_car_utils.go @@ -4,9 +4,6 @@ import ( "context" "errors" "fmt" - "io" - "math/rand" - "net/http" "net/url" "strconv" "strings" @@ -72,60 +69,3 @@ func blockstoreErrToGatewayErr(err error) error { // everything else returns 502 Bad Gateway return fmt.Errorf("%w: %s", ErrBadGateway, err.Error()) } - -type remoteCarFetcher struct { - httpClient *http.Client - gatewayURL []string - validate bool - rand *rand.Rand -} - -func NewRemoteCarFetcher(gatewayURL []string) (CarFetcher, error) { - if len(gatewayURL) == 0 { - return nil, errors.New("missing gateway URLs to which to proxy") - } - - return &remoteCarFetcher{ - gatewayURL: gatewayURL, - httpClient: newRemoteHTTPClient(), - // Enables block validation by default. Important since we are - // proxying block requests to an untrusted gateway. - validate: true, - rand: rand.New(rand.NewSource(time.Now().Unix())), - }, nil -} - -func (ps *remoteCarFetcher) Fetch(ctx context.Context, path string, cb DataCallback) error { - urlStr := fmt.Sprintf("%s%s", ps.getRandomGatewayURL(), path) - req, err := http.NewRequestWithContext(ctx, http.MethodGet, urlStr, nil) - if err != nil { - return err - } - log.Debugw("car fetch", "url", req.URL) - req.Header.Set("Accept", "application/vnd.ipld.car;order=dfs;dups=y") - resp, err := ps.httpClient.Do(req) - if err != nil { - return err - } - - if resp.StatusCode != http.StatusOK { - errData, err := io.ReadAll(resp.Body) - if err != nil { - err = fmt.Errorf("could not read error message: %w", err) - } else { - err = fmt.Errorf("%q", string(errData)) - } - return fmt.Errorf("http error from car gateway: %s: %w", resp.Status, err) - } - - err = cb(path, resp.Body) - if err != nil { - resp.Body.Close() - return err - } - return resp.Body.Close() -} - -func (ps *remoteCarFetcher) getRandomGatewayURL() string { - return ps.gatewayURL[ps.rand.Intn(len(ps.gatewayURL))] -} diff --git a/gateway/blockstore.go b/gateway/blockstore.go index f5043abe0..c1c4b067b 100644 --- a/gateway/blockstore.go +++ b/gateway/blockstore.go @@ -34,12 +34,19 @@ var _ blockstore.Blockstore = (*cacheBlockStore)(nil) // NewCacheBlockStore creates a new [blockstore.Blockstore] that caches blocks // in memory using a two queue cache. It can be useful, for example, when paired // with a proxy blockstore (see [NewRemoteBlockstore]). -func NewCacheBlockStore(size int) (blockstore.Blockstore, error) { +// +// If the given [prometheus.Registerer] is nil, a new one will be created using +// [prometheus.NewRegistry]. +func NewCacheBlockStore(size int, reg prometheus.Registerer) (blockstore.Blockstore, error) { c, err := lru.New2Q[string, []byte](size) if err != nil { return nil, err } + if reg == nil { + reg = prometheus.NewRegistry() + } + cacheHitsMetric := prometheus.NewCounter(prometheus.CounterOpts{ Namespace: "ipfs", Subsystem: "http", @@ -54,12 +61,12 @@ func NewCacheBlockStore(size int) (blockstore.Blockstore, error) { Help: "The number of global block cache requests.", }) - err = prometheus.Register(cacheHitsMetric) + err = reg.Register(cacheHitsMetric) if err != nil { return nil, err } - err = prometheus.Register(cacheRequestsMetric) + err = reg.Register(cacheRequestsMetric) if err != nil { return nil, err } @@ -152,17 +159,21 @@ type remoteBlockstore struct { // NewRemoteBlockstore creates a new [blockstore.Blockstore] that is backed by one // or more gateways that support RAW block requests. See the [Trustless Gateway] -// specification for more details. +// specification for more details. You can optionally pass your own [http.Client]. // // [Trustless Gateway]: https://specs.ipfs.tech/http-gateways/trustless-gateway/ -func NewRemoteBlockstore(gatewayURL []string) (blockstore.Blockstore, error) { +func NewRemoteBlockstore(gatewayURL []string, httpClient *http.Client) (blockstore.Blockstore, error) { if len(gatewayURL) == 0 { return nil, errors.New("missing gateway URLs to which to proxy") } + if httpClient == nil { + httpClient = newRemoteHTTPClient() + } + return &remoteBlockstore{ gatewayURL: gatewayURL, - httpClient: newRemoteHTTPClient(), + httpClient: httpClient, rand: rand.New(rand.NewSource(time.Now().Unix())), // Enables block validation by default. Important since we are // proxying block requests to untrusted gateways. diff --git a/gateway/gateway_test.go b/gateway/gateway_test.go index 031a184a5..289faad01 100644 --- a/gateway/gateway_test.go +++ b/gateway/gateway_test.go @@ -20,7 +20,7 @@ import ( ) func TestGatewayGet(t *testing.T) { - ts, backend, root := newTestServerAndNode(t, nil, "fixtures.car") + ts, backend, root := newTestServerAndNode(t, "fixtures.car") ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -96,7 +96,7 @@ func TestGatewayGet(t *testing.T) { func TestHeaders(t *testing.T) { t.Parallel() - ts, backend, root := newTestServerAndNode(t, nil, "headers-test.car") + ts, backend, root := newTestServerAndNode(t, "headers-test.car") var ( rootCID = "bafybeidbcy4u6y55gsemlubd64zk53xoxs73ifd6rieejxcr7xy46mjvky" @@ -121,7 +121,7 @@ func TestHeaders(t *testing.T) { t.Run("Cache-Control uses TTL for /ipns/ when it is known", func(t *testing.T) { t.Parallel() - ts, backend, root := newTestServerAndNode(t, nil, "ipns-hostname-redirects.car") + ts, backend, root := newTestServerAndNode(t, "ipns-hostname-redirects.car") backend.namesys["/ipns/example.net"] = newMockNamesysItem(path.FromCid(root), time.Second*30) backend.namesys["/ipns/example.com"] = newMockNamesysItem(path.FromCid(root), time.Second*55) backend.namesys["/ipns/unknown.com"] = newMockNamesysItem(path.FromCid(root), 0) @@ -420,7 +420,7 @@ func TestHeaders(t *testing.T) { } func TestGoGetSupport(t *testing.T) { - ts, _, root := newTestServerAndNode(t, nil, "fixtures.car") + ts, _, root := newTestServerAndNode(t, "fixtures.car") // mimic go-get req := mustNewRequest(t, http.MethodGet, ts.URL+"/ipfs/"+root.String()+"?go-get=1", nil) @@ -432,7 +432,7 @@ func TestRedirects(t *testing.T) { t.Parallel() t.Run("IPNS Base58 Multihash Redirect", func(t *testing.T) { - ts, _, _ := newTestServerAndNode(t, nil, "fixtures.car") + ts, _, _ := newTestServerAndNode(t, "fixtures.car") t.Run("ED25519 Base58-encoded key", func(t *testing.T) { t.Parallel() @@ -453,7 +453,7 @@ func TestRedirects(t *testing.T) { t.Run("URI Query Redirects", func(t *testing.T) { t.Parallel() - ts, _, _ := newTestServerAndNode(t, mockNamesys{}, "fixtures.car") + ts, _, _ := newTestServerAndNode(t, "fixtures.car") cid := "QmbWqxBEKC3P8tqsKc98xmWNzrzDtRLMiMPL8wBuTGsMnR" for _, test := range []struct { @@ -492,7 +492,7 @@ func TestRedirects(t *testing.T) { t.Run("IPNS Hostname Redirects", func(t *testing.T) { t.Parallel() - ts, backend, root := newTestServerAndNode(t, nil, "ipns-hostname-redirects.car") + ts, backend, root := newTestServerAndNode(t, "ipns-hostname-redirects.car") backend.namesys["/ipns/example.net"] = newMockNamesysItem(path.FromCid(root), 0) // make request to directory containing index.html @@ -555,9 +555,11 @@ func TestRedirects(t *testing.T) { // Check statuses and body. require.Equal(t, http.StatusOK, res.StatusCode) - body, err := io.ReadAll(res.Body) - require.NoError(t, err) - require.Equal(t, "hello world\n", string(body)) + if method != http.MethodHead { + body, err := io.ReadAll(res.Body) + require.NoError(t, err) + require.Equal(t, "hello world\n", string(body)) + } // Check Etag. etag := res.Header.Get("Etag") @@ -948,7 +950,7 @@ func TestPanicStatusCode(t *testing.T) { func TestBrowserErrorHTML(t *testing.T) { t.Parallel() - ts, _, root := newTestServerAndNode(t, nil, "fixtures.car") + ts, _, root := newTestServerAndNode(t, "fixtures.car") t.Run("plain error if request does not have Accept: text/html", func(t *testing.T) { t.Parallel() diff --git a/gateway/handler_unixfs_dir.go b/gateway/handler_unixfs_dir.go index 098a77b6a..7a49dcafc 100644 --- a/gateway/handler_unixfs_dir.go +++ b/gateway/handler_unixfs_dir.go @@ -121,11 +121,9 @@ func (i *handler) serveDirectory(ctx context.Context, w http.ResponseWriter, r * i.unixfsDirIndexGetMetric.WithLabelValues(originalContentPath.Namespace()).Observe(time.Since(rq.begin).Seconds()) } return success - } - - if isErrNotFound(err) { + } else if isErrNotFound(err) { rq.logger.Debugw("no index.html; noop", "path", idxPath) - } else if err != nil { + } else { i.webError(w, r, err, http.StatusInternalServerError) return false } diff --git a/gateway/handler_unixfs_dir_test.go b/gateway/handler_unixfs_dir_test.go index e44708687..5727d50c5 100644 --- a/gateway/handler_unixfs_dir_test.go +++ b/gateway/handler_unixfs_dir_test.go @@ -12,7 +12,7 @@ import ( func TestIPNSHostnameBacklinks(t *testing.T) { // Test if directory listing on DNSLink Websites have correct backlinks. - ts, backend, root := newTestServerAndNode(t, nil, "dir-special-chars.car") + ts, backend, root := newTestServerAndNode(t, "dir-special-chars.car") ctx, cancel := context.WithCancel(context.Background()) defer cancel() diff --git a/gateway/remote_blocks_backend.go b/gateway/remote_blocks_backend.go deleted file mode 100644 index e020e0b1e..000000000 --- a/gateway/remote_blocks_backend.go +++ /dev/null @@ -1,53 +0,0 @@ -package gateway - -import ( - "net/http" - "time" - - "github.com/ipfs/boxo/blockservice" - "github.com/ipfs/boxo/exchange/offline" - "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" -) - -// TODO: make this configurable via BlocksBackendOption -const getBlockTimeout = time.Second * 60 - -// NewRemoteBlocksBackend creates a new [BlocksBackend] instance backed by one -// or more gateways. These gateways must support RAW block requests and IPNS -// Record requests. See [NewRemoteBlockstore] and [NewRemoteValueStore] for -// more details. -// -// If you want to create a more custom [BlocksBackend] with only remote IPNS -// Record resolution, or only remote block fetching, we recommend using -// [NewBlocksBackend] directly. -func NewRemoteBlocksBackend(gatewayURL []string, opts ...BackendOption) (*BlocksBackend, error) { - blockStore, err := NewRemoteBlockstore(gatewayURL) - if err != nil { - return nil, err - } - - valueStore, err := NewRemoteValueStore(gatewayURL) - if err != nil { - return nil, err - } - - blockService := blockservice.New(blockStore, offline.Exchange(blockStore)) - return NewBlocksBackend(blockService, append(opts, WithValueStore(valueStore))...) -} - -// newRemoteHTTPClient creates a new [http.Client] that is optimized for retrieving -// multiple blocks from a single gateway concurrently. -func newRemoteHTTPClient() *http.Client { - transport := &http.Transport{ - MaxIdleConns: 1000, - MaxConnsPerHost: 100, - MaxIdleConnsPerHost: 100, - IdleConnTimeout: 90 * time.Second, - ForceAttemptHTTP2: true, - } - - return &http.Client{ - Timeout: getBlockTimeout, - Transport: otelhttp.NewTransport(transport), - } -} diff --git a/gateway/utilities_test.go b/gateway/utilities_test.go index 68db84041..22f5750fa 100644 --- a/gateway/utilities_test.go +++ b/gateway/utilities_test.go @@ -27,7 +27,7 @@ import ( ) func mustNewRequest(t *testing.T, method string, path string, body io.Reader) *http.Request { - r, err := http.NewRequest(http.MethodGet, path, body) + r, err := http.NewRequest(method, path, body) require.NoError(t, err) return r } @@ -224,7 +224,7 @@ func (mb *mockBackend) resolvePathNoRootsReturned(ctx context.Context, ip path.P return md.LastSegment, nil } -func newTestServerAndNode(t *testing.T, ns mockNamesys, fixturesFile string) (*httptest.Server, *mockBackend, cid.Cid) { +func newTestServerAndNode(t *testing.T, fixturesFile string) (*httptest.Server, *mockBackend, cid.Cid) { backend, root := newMockBackend(t, fixturesFile) ts := newTestServer(t, backend) return ts, backend, root diff --git a/gateway/value_store.go b/gateway/value_store.go index d494fc212..ead5a44e7 100644 --- a/gateway/value_store.go +++ b/gateway/value_store.go @@ -20,19 +20,23 @@ type remoteValueStore struct { rand *rand.Rand } -// NewRemoteValueStore creates a new [routing.ValueStore] that is backed by one -// or more gateways that support IPNS Record requests. See the [Trustless Gateway] -// specification for more details. +// NewRemoteValueStore creates a new [routing.ValueStore] backed by one or more +// gateways that support IPNS Record requests. See the [Trustless Gateway] +// specification for more details. You can optionally pass your own [http.Client]. // // [Trustless Gateway]: https://specs.ipfs.tech/http-gateways/trustless-gateway/ -func NewRemoteValueStore(gatewayURL []string) (routing.ValueStore, error) { +func NewRemoteValueStore(gatewayURL []string, httpClient *http.Client) (routing.ValueStore, error) { if len(gatewayURL) == 0 { return nil, errors.New("missing gateway URLs to which to proxy") } + if httpClient == nil { + httpClient = newRemoteHTTPClient() + } + return &remoteValueStore{ gatewayURL: gatewayURL, - httpClient: newRemoteHTTPClient(), + httpClient: httpClient, rand: rand.New(rand.NewSource(time.Now().Unix())), }, nil } From fe546c3ba35f3664bad990fc353cb9ffd4e4abf5 Mon Sep 17 00:00:00 2001 From: Henrique Dias Date: Thu, 11 Apr 2024 13:35:08 +0200 Subject: [PATCH 14/19] docs: changelog --- CHANGELOG.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f2b810bac..ef6a25e86 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,7 +16,9 @@ The following emojis are used to highlight certain changes: ### Added -* `gateway` now includes `NewRemoteBlocksBackend` which allows you to create a gateway backend that uses one or multiple other gateways as backend. These gateways must support RAW block requests (`application/vnd.ipld.raw`), as well as IPNS Record requests (`application/vnd.ipfs.ipns-record`). With this, we also introduced a `NewCacheBlockStore`, `NewRemoteBlockstore` and `NewRemoteValueStore`. +* ✨ `gateway` has new backend possibilities: + * `NewRemoteBlocksBackend` allows you to create a gateway backend that uses one or multiple other gateways as backend. These gateways must support RAW block requests (`application/vnd.ipld.raw`), as well as IPNS Record requests (`application/vnd.ipfs.ipns-record`). With this, we also introduced `NewCacheBlockStore`, `NewRemoteBlockstore` and `NewRemoteValueStore`. + * `NewRemoteCarBackend` allows you to create a gateway backend that uses one or multiple Trustless Gateways as backend. These gateways must support CAR requests (`application/vnd.ipld.car`), as well as the extensions describe in [IPIP-402](https://specs.ipfs.tech/ipips/ipip-0402/). With this, we also introduced `NewCarBackend`, `NewRemoteCarFetcher` and `NewRetryCarFetcher`. ### Changed From 5b5a0a45cb672a26f798bd35d57ee269301fd420 Mon Sep 17 00:00:00 2001 From: Henrique Dias Date: Thu, 11 Apr 2024 13:46:57 +0200 Subject: [PATCH 15/19] make CarFetcher more generic --- gateway/backend_car.go | 49 +++++++++---- gateway/backend_car_fetcher.go | 57 ++++++++++++--- ...ls_test.go => backend_car_fetcher_test.go} | 37 ---------- gateway/backend_car_test.go | 45 ++++++++++-- gateway/backend_car_utils.go | 71 ------------------- gateway/errors.go | 8 ++- 6 files changed, 131 insertions(+), 136 deletions(-) rename gateway/{backend_car_utils_test.go => backend_car_fetcher_test.go} (73%) delete mode 100644 gateway/backend_car_utils.go diff --git a/gateway/backend_car.go b/gateway/backend_car.go index 010a13ed3..d2b33a0fc 100644 --- a/gateway/backend_car.go +++ b/gateway/backend_car.go @@ -185,14 +185,12 @@ func registerCarBackendMetrics(promReg prometheus.Registerer) *CarBackendMetrics } } -func (api *CarBackend) fetchCAR(ctx context.Context, path path.ImmutablePath, params CarParams, cb DataCallback) error { - urlWithoutHost := contentPathToCarUrl(path, params).String() - +func (api *CarBackend) fetchCAR(ctx context.Context, p path.ImmutablePath, params CarParams, cb DataCallback) error { api.metrics.carFetchAttemptMetric.Inc() var ipldError error - fetchErr := api.fetcher.Fetch(ctx, urlWithoutHost, func(resource string, reader io.Reader) error { + fetchErr := api.fetcher.Fetch(ctx, p, params, func(p path.ImmutablePath, reader io.Reader) error { return checkRetryableError(&ipldError, func() error { - return cb(resource, reader) + return cb(p, reader) }) }) @@ -613,7 +611,7 @@ func fetchWithPartialRetries[T any](ctx context.Context, p path.ImmutablePath, i params := initialParams - err := fetchCAR(cctx, p, params, func(resource string, reader io.Reader) error { + err := fetchCAR(cctx, p, params, func(_ path.ImmutablePath, reader io.Reader) error { gb, err := carToLinearBlockGetter(cctx, reader, timeout, metrics) if err != nil { return err @@ -687,8 +685,7 @@ func fetchWithPartialRetries[T any](ctx context.Context, p path.ImmutablePath, i return err } params = req.params - remainderUrl := contentPathToCarUrl(p, params).String() - return ErrPartialResponse{StillNeed: []string{remainderUrl}} + return ErrPartialResponse{StillNeed: []CarResource{{Path: p, Params: params}}} case <-cctx.Done(): return cctx.Err() } @@ -733,7 +730,7 @@ func (api *CarBackend) GetBlock(ctx context.Context, p path.ImmutablePath) (Cont var md ContentPathMetadata var f files.File // TODO: if path is `/ipfs/cid`, we should use ?format=raw - err := api.fetchCAR(ctx, p, CarParams{Scope: DagScopeBlock}, func(resource string, reader io.Reader) error { + err := api.fetchCAR(ctx, p, CarParams{Scope: DagScopeBlock}, func(_ path.ImmutablePath, reader io.Reader) error { gb, err := carToLinearBlockGetter(ctx, reader, api.getBlockTimeout, api.metrics) if err != nil { return err @@ -781,7 +778,7 @@ func (api *CarBackend) Head(ctx context.Context, p path.ImmutablePath) (ContentP var n *HeadResponse // TODO: fallback to dynamic fetches in case we haven't requested enough data rangeTo := int64(3071) - err := api.fetchCAR(ctx, p, CarParams{Scope: DagScopeEntity, Range: &DagByteRange{From: 0, To: &rangeTo}}, func(resource string, reader io.Reader) error { + err := api.fetchCAR(ctx, p, CarParams{Scope: DagScopeEntity, Range: &DagByteRange{From: 0, To: &rangeTo}}, func(_ path.ImmutablePath, reader io.Reader) error { gb, err := carToLinearBlockGetter(ctx, reader, api.getBlockTimeout, api.metrics) if err != nil { return err @@ -913,7 +910,7 @@ func (api *CarBackend) ResolvePath(ctx context.Context, p path.ImmutablePath) (C api.metrics.carParamsMetric.With(prometheus.Labels{"dagScope": "block", "entityRanges": "0"}).Inc() var md ContentPathMetadata - err := api.fetchCAR(ctx, p, CarParams{Scope: DagScopeBlock}, func(resource string, reader io.Reader) error { + err := api.fetchCAR(ctx, p, CarParams{Scope: DagScopeBlock}, func(_ path.ImmutablePath, reader io.Reader) error { gb, err := carToLinearBlockGetter(ctx, reader, api.getBlockTimeout, api.metrics) if err != nil { return err @@ -958,7 +955,7 @@ func (api *CarBackend) GetCAR(ctx context.Context, p path.ImmutablePath, params numBlocksSent := 0 var cw storage.WritableCar var blockBuffer []blocks.Block - err = api.fetchCAR(ctx, p, params, func(resource string, reader io.Reader) error { + err = api.fetchCAR(ctx, p, params, func(_ path.ImmutablePath, reader io.Reader) error { numBlocksThisCall := 0 gb, err := carToLinearBlockGetter(ctx, reader, api.getBlockTimeout, api.metrics) if err != nil { @@ -1120,3 +1117,31 @@ func isRetryableError(err error) (bool, error) { } } } + +// blockstoreErrToGatewayErr translates underlying blockstore error into one that gateway code will return as HTTP 502 or 504 +// it also makes sure Retry-After hint from remote blockstore will be passed to HTTP client, if present. +func blockstoreErrToGatewayErr(err error) error { + if errors.Is(err, &ErrorStatusCode{}) || + errors.Is(err, &ErrorRetryAfter{}) { + // already correct error + return err + } + + // All timeouts should produce 504 Gateway Timeout + if errors.Is(err, context.DeadlineExceeded) || + // Unfortunately this is not an exported type so we have to check for the content. + strings.Contains(err.Error(), "Client.Timeout exceeded") { + return fmt.Errorf("%w: %s", ErrGatewayTimeout, err.Error()) + } + + // (Saturn) errors that support the RetryAfter interface need to be converted + // to the correct gateway error, such that the HTTP header is set. + for v := err; v != nil; v = errors.Unwrap(v) { + if r, ok := v.(interface{ RetryAfter() time.Duration }); ok { + return NewErrorRetryAfter(err, r.RetryAfter()) + } + } + + // everything else returns 502 Bad Gateway + return fmt.Errorf("%w: %s", ErrBadGateway, err.Error()) +} diff --git a/gateway/backend_car_fetcher.go b/gateway/backend_car_fetcher.go index cc9f9647d..ec348694a 100644 --- a/gateway/backend_car_fetcher.go +++ b/gateway/backend_car_fetcher.go @@ -7,14 +7,19 @@ import ( "io" "math/rand" "net/http" + "net/url" + "strconv" + "strings" "time" + + "github.com/ipfs/boxo/path" ) -type DataCallback func(resource string, reader io.Reader) error +type DataCallback func(p path.ImmutablePath, reader io.Reader) error // CarFetcher powers a [CarBackend]. type CarFetcher interface { - Fetch(ctx context.Context, path string, cb DataCallback) error + Fetch(ctx context.Context, path path.ImmutablePath, params CarParams, cb DataCallback) error } type remoteCarFetcher struct { @@ -44,8 +49,10 @@ func NewRemoteCarFetcher(gatewayURL []string, httpClient *http.Client) (CarFetch }, nil } -func (ps *remoteCarFetcher) Fetch(ctx context.Context, path string, cb DataCallback) error { - urlStr := fmt.Sprintf("%s%s", ps.getRandomGatewayURL(), path) +func (ps *remoteCarFetcher) Fetch(ctx context.Context, path path.ImmutablePath, params CarParams, cb DataCallback) error { + url := contentPathToCarUrl(path, params) + + urlStr := fmt.Sprintf("%s%s", ps.getRandomGatewayURL(), url.String()) req, err := http.NewRequestWithContext(ctx, http.MethodGet, urlStr, nil) if err != nil { return err @@ -79,6 +86,36 @@ func (ps *remoteCarFetcher) getRandomGatewayURL() string { return ps.gatewayURL[ps.rand.Intn(len(ps.gatewayURL))] } +// contentPathToCarUrl returns an URL that allows retrieval of specified resource +// from a trustless gateway that implements IPIP-402 +func contentPathToCarUrl(path path.ImmutablePath, params CarParams) *url.URL { + return &url.URL{ + Path: path.String(), + RawQuery: carParamsToString(params), + } +} + +// carParamsToString converts CarParams to URL parameters compatible with IPIP-402 +func carParamsToString(params CarParams) string { + paramsBuilder := strings.Builder{} + paramsBuilder.WriteString("format=car") // always send explicit format in URL, this makes debugging easier, even when Accept header was set + if params.Scope != "" { + paramsBuilder.WriteString("&dag-scope=") + paramsBuilder.WriteString(string(params.Scope)) + } + if params.Range != nil { + paramsBuilder.WriteString("&entity-bytes=") + paramsBuilder.WriteString(strconv.FormatInt(params.Range.From, 10)) + paramsBuilder.WriteString(":") + if params.Range.To != nil { + paramsBuilder.WriteString(strconv.FormatInt(*params.Range.To, 10)) + } else { + paramsBuilder.WriteString("*") + } + } + return paramsBuilder.String() +} + type retryCarFetcher struct { inner CarFetcher retries int @@ -99,12 +136,12 @@ func NewRetryCarFetcher(inner CarFetcher, allowedRetries int) (CarFetcher, error }, nil } -func (r *retryCarFetcher) Fetch(ctx context.Context, path string, cb DataCallback) error { - return r.fetch(ctx, path, cb, r.retries) +func (r *retryCarFetcher) Fetch(ctx context.Context, path path.ImmutablePath, params CarParams, cb DataCallback) error { + return r.fetch(ctx, path, params, cb, r.retries) } -func (r *retryCarFetcher) fetch(ctx context.Context, path string, cb DataCallback, retriesLeft int) error { - err := r.inner.Fetch(ctx, path, cb) +func (r *retryCarFetcher) fetch(ctx context.Context, path path.ImmutablePath, params CarParams, cb DataCallback, retriesLeft int) error { + err := r.inner.Fetch(ctx, path, params, cb) if err == nil { return nil } @@ -124,8 +161,8 @@ func (r *retryCarFetcher) fetch(ctx context.Context, path string, cb DataCallbac // Resets the number of retries for partials, mimicking Caboose logic. retriesLeft = r.retries - return r.fetch(ctx, t.StillNeed[0], cb, retriesLeft) + return r.fetch(ctx, t.StillNeed[0].Path, t.StillNeed[0].Params, cb, retriesLeft) default: - return r.fetch(ctx, path, cb, retriesLeft) + return r.fetch(ctx, path, params, cb, retriesLeft) } } diff --git a/gateway/backend_car_utils_test.go b/gateway/backend_car_fetcher_test.go similarity index 73% rename from gateway/backend_car_utils_test.go rename to gateway/backend_car_fetcher_test.go index 3ff7cae3d..383f20c2d 100644 --- a/gateway/backend_car_utils_test.go +++ b/gateway/backend_car_fetcher_test.go @@ -1,12 +1,8 @@ package gateway import ( - "errors" - "fmt" "testing" - "time" - "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/ipfs/boxo/path" @@ -62,36 +58,3 @@ func TestContentPathToCarUrl(t *testing.T) { }) } } - -type testErr struct { - message string - retryAfter time.Duration -} - -func (e *testErr) Error() string { - return e.message -} - -func (e *testErr) RetryAfter() time.Duration { - return e.retryAfter -} - -func TestGatewayErrorRetryAfter(t *testing.T) { - originalErr := &testErr{message: "test", retryAfter: time.Minute} - var ( - convertedErr error - gatewayErr *ErrorRetryAfter - ) - - // Test unwrapped - convertedErr = blockstoreErrToGatewayErr(originalErr) - ok := errors.As(convertedErr, &gatewayErr) - assert.True(t, ok) - assert.EqualValues(t, originalErr.retryAfter, gatewayErr.RetryAfter) - - // Test wrapped. - convertedErr = blockstoreErrToGatewayErr(fmt.Errorf("wrapped error: %w", originalErr)) - ok = errors.As(convertedErr, &gatewayErr) - assert.True(t, ok) - assert.EqualValues(t, originalErr.retryAfter, gatewayErr.RetryAfter) -} diff --git a/gateway/backend_car_test.go b/gateway/backend_car_test.go index 37f99d46c..eebd8e19b 100644 --- a/gateway/backend_car_test.go +++ b/gateway/backend_car_test.go @@ -10,6 +10,7 @@ import ( "net/http/httptest" "strings" "testing" + "time" _ "embed" @@ -23,6 +24,7 @@ import ( carv2 "github.com/ipld/go-car/v2" carbs "github.com/ipld/go-car/v2/blockstore" "github.com/ipld/go-car/v2/storage" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -1020,12 +1022,12 @@ func TestCarBackendPassthroughErrors(t *testing.T) { clientRequestNum := 0 - fetcher, err := NewRetryCarFetcher(&fetcherWrapper{fn: func(ctx context.Context, path string, cb DataCallback) error { + fetcher, err := NewRetryCarFetcher(&fetcherWrapper{fn: func(ctx context.Context, path path.ImmutablePath, params CarParams, cb DataCallback) error { clientRequestNum++ if clientRequestNum > 2 { return bogusErr } - return bs.Fetch(ctx, path, cb) + return bs.Fetch(ctx, path, params, cb) }}, 3) require.NoError(t, err) @@ -1057,9 +1059,42 @@ func TestCarBackendPassthroughErrors(t *testing.T) { } type fetcherWrapper struct { - fn func(ctx context.Context, path string, cb DataCallback) error + fn func(ctx context.Context, path path.ImmutablePath, params CarParams, cb DataCallback) error } -func (w *fetcherWrapper) Fetch(ctx context.Context, path string, cb DataCallback) error { - return w.fn(ctx, path, cb) +func (w *fetcherWrapper) Fetch(ctx context.Context, path path.ImmutablePath, params CarParams, cb DataCallback) error { + return w.fn(ctx, path, params, cb) +} + +type testErr struct { + message string + retryAfter time.Duration +} + +func (e *testErr) Error() string { + return e.message +} + +func (e *testErr) RetryAfter() time.Duration { + return e.retryAfter +} + +func TestGatewayErrorRetryAfter(t *testing.T) { + originalErr := &testErr{message: "test", retryAfter: time.Minute} + var ( + convertedErr error + gatewayErr *ErrorRetryAfter + ) + + // Test unwrapped + convertedErr = blockstoreErrToGatewayErr(originalErr) + ok := errors.As(convertedErr, &gatewayErr) + assert.True(t, ok) + assert.EqualValues(t, originalErr.retryAfter, gatewayErr.RetryAfter) + + // Test wrapped. + convertedErr = blockstoreErrToGatewayErr(fmt.Errorf("wrapped error: %w", originalErr)) + ok = errors.As(convertedErr, &gatewayErr) + assert.True(t, ok) + assert.EqualValues(t, originalErr.retryAfter, gatewayErr.RetryAfter) } diff --git a/gateway/backend_car_utils.go b/gateway/backend_car_utils.go deleted file mode 100644 index 0612f374e..000000000 --- a/gateway/backend_car_utils.go +++ /dev/null @@ -1,71 +0,0 @@ -package gateway - -import ( - "context" - "errors" - "fmt" - "net/url" - "strconv" - "strings" - "time" - - "github.com/ipfs/boxo/path" -) - -// contentPathToCarUrl returns an URL that allows retrieval of specified resource -// from a trustless gateway that implements IPIP-402 -func contentPathToCarUrl(path path.ImmutablePath, params CarParams) *url.URL { - return &url.URL{ - Path: path.String(), - RawQuery: carParamsToString(params), - } -} - -// carParamsToString converts CarParams to URL parameters compatible with IPIP-402 -func carParamsToString(params CarParams) string { - paramsBuilder := strings.Builder{} - paramsBuilder.WriteString("format=car") // always send explicit format in URL, this makes debugging easier, even when Accept header was set - if params.Scope != "" { - paramsBuilder.WriteString("&dag-scope=") - paramsBuilder.WriteString(string(params.Scope)) - } - if params.Range != nil { - paramsBuilder.WriteString("&entity-bytes=") - paramsBuilder.WriteString(strconv.FormatInt(params.Range.From, 10)) - paramsBuilder.WriteString(":") - if params.Range.To != nil { - paramsBuilder.WriteString(strconv.FormatInt(*params.Range.To, 10)) - } else { - paramsBuilder.WriteString("*") - } - } - return paramsBuilder.String() -} - -// blockstoreErrToGatewayErr translates underlying blockstore error into one that gateway code will return as HTTP 502 or 504 -// it also makes sure Retry-After hint from remote blockstore will be passed to HTTP client, if present. -func blockstoreErrToGatewayErr(err error) error { - if errors.Is(err, &ErrorStatusCode{}) || - errors.Is(err, &ErrorRetryAfter{}) { - // already correct error - return err - } - - // All timeouts should produce 504 Gateway Timeout - if errors.Is(err, context.DeadlineExceeded) || - // Unfortunately this is not an exported type so we have to check for the content. - strings.Contains(err.Error(), "Client.Timeout exceeded") { - return fmt.Errorf("%w: %s", ErrGatewayTimeout, err.Error()) - } - - // (Saturn) errors that support the RetryAfter interface need to be converted - // to the correct gateway error, such that the HTTP header is set. - for v := err; v != nil; v = errors.Unwrap(v) { - if r, ok := v.(interface{ RetryAfter() time.Duration }); ok { - return NewErrorRetryAfter(err, r.RetryAfter()) - } - } - - // everything else returns 502 Bad Gateway - return fmt.Errorf("%w: %s", ErrBadGateway, err.Error()) -} diff --git a/gateway/errors.go b/gateway/errors.go index 4ec1d9735..31ac5e91c 100644 --- a/gateway/errors.go +++ b/gateway/errors.go @@ -10,6 +10,7 @@ import ( "time" "github.com/ipfs/boxo/gateway/assets" + "github.com/ipfs/boxo/path" "github.com/ipfs/boxo/path/resolver" "github.com/ipfs/go-cid" "github.com/ipld/go-ipld-prime/datamodel" @@ -144,7 +145,12 @@ func (e ErrInvalidResponse) Error() string { // one or more more specific resources that should be fetched (via StillNeed) to complete the request. type ErrPartialResponse struct { error - StillNeed []string + StillNeed []CarResource +} + +type CarResource struct { + Path path.ImmutablePath + Params CarParams } func (epr ErrPartialResponse) Error() string { From 9d4ebb52c7d36f4efc4fbe0d4d0b160c81e80632 Mon Sep 17 00:00:00 2001 From: Henrique Dias Date: Thu, 11 Apr 2024 17:05:40 +0200 Subject: [PATCH 16/19] conformance: skip correct tests --- .github/workflows/gateway-conformance.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/gateway-conformance.yml b/.github/workflows/gateway-conformance.yml index e61ea8540..f9ced9f70 100644 --- a/.github/workflows/gateway-conformance.yml +++ b/.github/workflows/gateway-conformance.yml @@ -50,7 +50,6 @@ jobs: markdown: output.md subdomain-url: http://example.net specs: -trustless-ipns-gateway,-path-ipns-gateway,-subdomain-ipns-gateway,-dnslink-gateway - args: -skip 'TestGatewayCar/GET_response_for_application/vnd.ipld.car/Header_Content-Length' # 5. Upload the results - name: Upload MD summary @@ -113,7 +112,7 @@ jobs: markdown: output.md subdomain-url: http://example.net specs: -trustless-ipns-gateway,-path-ipns-gateway,-subdomain-ipns-gateway,-dnslink-gateway - args: -skip 'TestGatewayCar/GET_response_for_application/vnd.ipld.car/Header_Content-Length' + args: -skip 'TestGatewayCache/.*_for_/ipfs/_with_only-if-cached_succeeds_when_in_local_datastore' # 5. Upload the results - name: Upload MD summary From 548a2c5842862ebf94616f87bd824b5e447c1b21 Mon Sep 17 00:00:00 2001 From: Henrique Dias Date: Thu, 11 Apr 2024 17:11:38 +0200 Subject: [PATCH 17/19] percent escaped? --- .github/workflows/gateway-conformance.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/gateway-conformance.yml b/.github/workflows/gateway-conformance.yml index f9ced9f70..d5461e443 100644 --- a/.github/workflows/gateway-conformance.yml +++ b/.github/workflows/gateway-conformance.yml @@ -112,7 +112,7 @@ jobs: markdown: output.md subdomain-url: http://example.net specs: -trustless-ipns-gateway,-path-ipns-gateway,-subdomain-ipns-gateway,-dnslink-gateway - args: -skip 'TestGatewayCache/.*_for_/ipfs/_with_only-if-cached_succeeds_when_in_local_datastore' + args: -skip 'TestGatewayCache/.*_for_%2Fipfs%2F_with_only-if-cached_succeeds_when_in_local_datastore' # 5. Upload the results - name: Upload MD summary From 2ce7502f2b7b3af9eed59aa1c5958243a5d1d508 Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Thu, 11 Apr 2024 22:35:08 +0200 Subject: [PATCH 18/19] test(gateway): remote-block-backend (#600) --- .github/workflows/gateway-conformance.yml | 141 +++++++++++++++++----- examples/gateway/car-file/README.md | 5 +- gateway/backend_car_fetcher.go | 3 +- gateway/blockstore.go | 3 +- 4 files changed, 118 insertions(+), 34 deletions(-) diff --git a/.github/workflows/gateway-conformance.yml b/.github/workflows/gateway-conformance.yml index d5461e443..0928d2650 100644 --- a/.github/workflows/gateway-conformance.yml +++ b/.github/workflows/gateway-conformance.yml @@ -1,19 +1,24 @@ name: Gateway Conformance +# This workflow runs https://github.com/ipfs/gateway-conformance +# against different backend implementations of boxo/gateway on: push: branches: - main pull_request: + workflow_dispatch: concurrency: group: ${{ github.workflow }}-${{ github.event_name }}-${{ github.event_name == 'push' && github.sha || github.ref }} cancel-in-progress: true jobs: - gateway-conformance-car-file: + # This test uses a static CAR file as a local blockstore, + # allowing us to test conformance against BlocksBackend (gateway/backend_blocks.go) + # which is used by implementations like Kubo + local-block-backend: runs-on: ubuntu-latest - name: Gateway Conformance (CAR File Gateway) steps: # 1. Download the gateway-conformance fixtures - name: Download gateway-conformance fixtures @@ -23,21 +28,22 @@ jobs: merged: true # 2. Build the gateway binary - - name: Setup Go - uses: actions/setup-go@v4 - with: - go-version: 1.21.x - name: Checkout boxo uses: actions/checkout@v4 with: path: boxo - - name: Build car-gateway - run: go build -o gateway + - name: Setup Go + uses: actions/setup-go@v5 + with: + go-version-file: 'boxo/examples/go.mod' + cache-dependency-path: "boxo/**/*.sum" + - name: Build test-gateway + run: go build -o test-gateway working-directory: boxo/examples/gateway/car-file # 3. Start the gateway binary - - name: Start car-gateway - run: boxo/examples/gateway/car-file/gateway -c fixtures/fixtures.car -p 8040 & + - name: Start test-gateway + run: boxo/examples/gateway/car-file/test-gateway -c fixtures/fixtures.car -p 8040 & # 4. Run the gateway-conformance tests - name: Run gateway-conformance tests @@ -57,20 +63,24 @@ jobs: run: cat output.md >> $GITHUB_STEP_SUMMARY - name: Upload HTML report if: failure() || success() - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: - name: gateway-conformance.html + name: gateway-conformance_local-block-backend.html path: output.html - name: Upload JSON report if: failure() || success() - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: - name: gateway-conformance.json + name: gateway-conformance_local-block-backend.json path: output.json - gateway-conformance-remote-car: + # This test uses remote block gateway (?format=raw) as a remote blockstore, + # allowing us to test conformance against RemoteBlocksBackend + # (gateway/backend_blocks.go) which is used by implementations like + # rainbow configured to use with remote block backend + # Ref. https://specs.ipfs.tech/http-gateways/trustless-gateway/#block-responses-application-vnd-ipld-raw + remote-block-backend: runs-on: ubuntu-latest - name: Gateway Conformance (Remote CAR Gateway) steps: # 1. Download the gateway-conformance fixtures - name: Download gateway-conformance fixtures @@ -80,32 +90,101 @@ jobs: merged: true # 2. Build the gateway binaries + - name: Checkout boxo + uses: actions/checkout@v4 + with: + path: boxo - name: Setup Go - uses: actions/setup-go@v4 + uses: actions/setup-go@v5 + with: + go-version-file: 'boxo/examples/go.mod' + cache-dependency-path: "boxo/**/*.sum" + - name: Build remote-block-backend # it will act as a trustless CAR gateway + run: go build -o remote-block-backend + working-directory: boxo/examples/gateway/car-file + - name: Build test-gateway # this one will be used for tests, it will use previous one as its remote block backend + run: go build -o test-gateway + working-directory: boxo/examples/gateway/proxy-blocks + + # 3. Start the gateway binaries + - name: Start remote HTTP backend that serves application/vnd.ipld.raw + run: boxo/examples/gateway/car-file/remote-block-backend -c fixtures/fixtures.car -p 8030 & # this endpoint will respond to application/vnd.ipld.car requests + - name: Start gateway that uses the remote block backend + run: boxo/examples/gateway/proxy-blocks/test-gateway -g http://127.0.0.1:8030 -p 8040 & + + # 4. Run the gateway-conformance tests + - name: Run gateway-conformance tests + uses: ipfs/gateway-conformance/.github/actions/test@v0.5 + with: + gateway-url: http://127.0.0.1:8040 # we test gateway that is backed by a remote block gateway + json: output.json + xml: output.xml + html: output.html + markdown: output.md + subdomain-url: http://example.net + specs: -trustless-ipns-gateway,-path-ipns-gateway,-subdomain-ipns-gateway,-dnslink-gateway + args: -skip 'TestGatewayCache/.*_for_%2Fipfs%2F_with_only-if-cached_succeeds_when_in_local_datastore' + + # 5. Upload the results + - name: Upload MD summary + if: failure() || success() + run: cat output.md >> $GITHUB_STEP_SUMMARY + - name: Upload HTML report + if: failure() || success() + uses: actions/upload-artifact@v4 with: - go-version: 1.21.x + name: gateway-conformance_remote-block-backend.html + path: output.html + - name: Upload JSON report + if: failure() || success() + uses: actions/upload-artifact@v4 + with: + name: gateway-conformance_remote-block-backend.json + path: output.json + + # This test uses remote CAR gateway (?format=car, IPIP-402) + # allowing us to test conformance against remote CarFetcher backend. + # (gateway/backend_car_fetcher.go) which is used by implementations like + # rainbow configured to use with remote car backend + # Ref. https://specs.ipfs.tech/http-gateways/trustless-gateway/#car-responses-application-vnd-ipld-car + remote-car-backend: + runs-on: ubuntu-latest + steps: + # 1. Download the gateway-conformance fixtures + - name: Download gateway-conformance fixtures + uses: ipfs/gateway-conformance/.github/actions/extract-fixtures@v0.5 + with: + output: fixtures + merged: true + + # 2. Build the gateway binaries - name: Checkout boxo uses: actions/checkout@v4 with: path: boxo - - name: Build car-gateway - run: go build -o gateway + - name: Setup Go + uses: actions/setup-go@v5 + with: + go-version-file: 'boxo/examples/go.mod' + cache-dependency-path: "boxo/**/*.sum" + - name: Build remote-car-backend # it will act as a trustless CAR gateway + run: go build -o remote-car-backend working-directory: boxo/examples/gateway/car-file - - name: Build proxy-car-gateway - run: go build -o gateway + - name: Build test-gateway # this one will be used for tests, it will use previous one as its remote CAR backend + run: go build -o test-gateway working-directory: boxo/examples/gateway/proxy-car # 3. Start the gateway binaries - - name: Start car-gateway - run: boxo/examples/gateway/car-file/gateway -c fixtures/fixtures.car -p 8030 & - - name: Start proxy-car-gateway - run: boxo/examples/gateway/proxy-car/gateway -g http://127.0.0.1:8030 -p 8040 & + - name: Start remote HTTP backend that serves application/vnd.ipld.car (IPIP-402) + run: boxo/examples/gateway/car-file/remote-car-backend -c fixtures/fixtures.car -p 8030 & # this endpoint will respond to application/vnd.ipld.raw requests + - name: Start gateway that uses the remote CAR backend + run: boxo/examples/gateway/proxy-car/test-gateway -g http://127.0.0.1:8030 -p 8040 & # 4. Run the gateway-conformance tests - name: Run gateway-conformance tests uses: ipfs/gateway-conformance/.github/actions/test@v0.5 with: - gateway-url: http://127.0.0.1:8040 + gateway-url: http://127.0.0.1:8040 # we test gateway that is backed by a remote car gateway json: output.json xml: output.xml html: output.html @@ -120,13 +199,13 @@ jobs: run: cat output.md >> $GITHUB_STEP_SUMMARY - name: Upload HTML report if: failure() || success() - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: - name: gateway-conformance.html + name: gateway-conformance_remote-car-backend.html path: output.html - name: Upload JSON report if: failure() || success() - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: - name: gateway-conformance.json + name: gateway-conformance_remote-car-backend.json path: output.json diff --git a/examples/gateway/car-file/README.md b/examples/gateway/car-file/README.md index 5a95063d4..2645d7b17 100644 --- a/examples/gateway/car-file/README.md +++ b/examples/gateway/car-file/README.md @@ -1,9 +1,12 @@ -# HTTP Gateway backed by a CAR File +# HTTP Gateway backed by a CAR File as BlocksBackend This is an example that shows how to build a Gateway backed by the contents of a CAR file. A [CAR file](https://ipld.io/specs/transport/car/) is a Content Addressable aRchive that contains blocks. +The `main.go` sets up a `blockService` backed by a static CAR file, +and then uses it to initialize `gateway.NewBlocksBackend(blockService)`. + ## Build ```bash diff --git a/gateway/backend_car_fetcher.go b/gateway/backend_car_fetcher.go index ec348694a..cf9d2ec04 100644 --- a/gateway/backend_car_fetcher.go +++ b/gateway/backend_car_fetcher.go @@ -29,9 +29,10 @@ type remoteCarFetcher struct { } // NewRemoteCarFetcher returns a [CarFetcher] that is backed by one or more gateways -// that support partial CAR requests, as described in [IPIP-402]. You can optionally +// that support partial [CAR requests], as described in [IPIP-402]. You can optionally // pass your own [http.Client]. // +// [CAR requests]: https://www.iana.org/assignments/media-types/application/vnd.ipld.car // [IPIP-402]: https://specs.ipfs.tech/ipips/ipip-0402 func NewRemoteCarFetcher(gatewayURL []string, httpClient *http.Client) (CarFetcher, error) { if len(gatewayURL) == 0 { diff --git a/gateway/blockstore.go b/gateway/blockstore.go index c1c4b067b..89ae0a27d 100644 --- a/gateway/blockstore.go +++ b/gateway/blockstore.go @@ -158,10 +158,11 @@ type remoteBlockstore struct { } // NewRemoteBlockstore creates a new [blockstore.Blockstore] that is backed by one -// or more gateways that support RAW block requests. See the [Trustless Gateway] +// or more gateways that support [RAW block] requests. See the [Trustless Gateway] // specification for more details. You can optionally pass your own [http.Client]. // // [Trustless Gateway]: https://specs.ipfs.tech/http-gateways/trustless-gateway/ +// [RAW block]: https://www.iana.org/assignments/media-types/application/vnd.ipld.raw func NewRemoteBlockstore(gatewayURL []string, httpClient *http.Client) (blockstore.Blockstore, error) { if len(gatewayURL) == 0 { return nil, errors.New("missing gateway URLs to which to proxy") From 32ddd9298580b7f0362f2c0a1cddb04aae6d7d8e Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Sat, 13 Apr 2024 00:33:33 +0200 Subject: [PATCH 19/19] chore: cleanup docs and add comments --- examples/README.md | 6 +++--- examples/gateway/proxy-car/README.md | 18 +++++++++++------- gateway/blockstore.go | 4 ++-- gateway/errors.go | 13 +++++++++---- 4 files changed, 25 insertions(+), 16 deletions(-) diff --git a/examples/README.md b/examples/README.md index 5f97cb61b..d1d0021d8 100644 --- a/examples/README.md +++ b/examples/README.md @@ -27,7 +27,7 @@ Once you have your example finished, do not forget to run `go mod tidy` and addi ## Examples and Tutorials - [Fetching a UnixFS file by CID](./unixfs-file-cid) -- [Gateway backed by a CAR file](./gateway/car-file) -- [Gateway backed by a remote blockstore and IPNS resolver](./gateway/proxy-blocks) -- [Gateway backed by a remote Trustless CAR Gateway](./gateway/proxy-car) +- [Gateway backed by a local blockstore in form of a CAR file](./gateway/car-file) +- [Gateway backed by a remote (HTTP) blockstore and IPNS resolver](./gateway/proxy-blocks) +- [Gateway backed by a remote (HTTP) CAR Gateway](./gateway/proxy-car) - [Delegated Routing V1 Command Line Client](./routing/delegated-routing-client/) diff --git a/examples/gateway/proxy-car/README.md b/examples/gateway/proxy-car/README.md index bc5ef9b2b..c06a1a657 100644 --- a/examples/gateway/proxy-car/README.md +++ b/examples/gateway/proxy-car/README.md @@ -1,9 +1,12 @@ # Gateway as Proxy for Trustless CAR Remote Backend -This is an example of building a Gateway that uses `application/vnd.ipld.car` -responses from another gateway acting as a remote Trustless Gateway and IPNS resolver. +This is an example of building a "verifying proxy" Gateway that has no +local on-disk blockstore, but instead, uses `application/vnd.ipld.car` and +`application/vnd.ipfs.ipns-record` responses from a remote HTTP server that +implements CAR support from [Trustless Gateway +Specification](https://specs.ipfs.tech/http-gateways/trustless-gateway/). -**NOTE:** the CAR backend MUST implement [IPIP-0402: Partial CAR Support on Trustless Gateways](https://specs.ipfs.tech/ipips/ipip-0402/) +**NOTE:** the remote CAR backend MUST implement [IPIP-0402: Partial CAR Support on Trustless Gateways](https://specs.ipfs.tech/ipips/ipip-0402/) ## Build @@ -22,15 +25,16 @@ types. Once you have it, run the proxy gateway with its address as the host para ### Subdomain gateway -Now you can access the gateway in [localhost:8040](http://localhost:8040). It will -behave like a regular [Subdomain IPFS Gateway](https://docs.ipfs.tech/how-to/address-ipfs-on-web/#subdomain-gateway), +Now you can access the gateway in [`localhost:8040`](http://localhost:8040/ipfs/bafybeiaysi4s6lnjev27ln5icwm6tueaw2vdykrtjkwiphwekaywqhcjze). It will +behave like a regular [subdomain gateway](https://docs.ipfs.tech/how-to/address-ipfs-on-web/#subdomain-gateway), except for the fact that it runs no libp2p, and has no local blockstore. -All contents are provided by a remote gateway and fetched as CAR files and IPNS Records, and verified locally. +All data is provided by a remote trustless gateway, fetched as CAR files and IPNS Records, and verified locally. ### Path gateway If you don't need Origin isolation and only care about hosting flat files, -a plain [path gateway](https://docs.ipfs.tech/how-to/address-ipfs-on-web/#path-gateway) at [127.0.0.1:8040](http://127.0.0.1:8040) +a plain [path gateway](https://docs.ipfs.tech/how-to/address-ipfs-on-web/#path-gateway) at +[`127.0.0.1:8040`](http://127.0.0.1:8040/ipfs/bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi) may suffice. ### DNSLink gateway diff --git a/gateway/blockstore.go b/gateway/blockstore.go index 89ae0a27d..11e51b93e 100644 --- a/gateway/blockstore.go +++ b/gateway/blockstore.go @@ -165,7 +165,7 @@ type remoteBlockstore struct { // [RAW block]: https://www.iana.org/assignments/media-types/application/vnd.ipld.raw func NewRemoteBlockstore(gatewayURL []string, httpClient *http.Client) (blockstore.Blockstore, error) { if len(gatewayURL) == 0 { - return nil, errors.New("missing gateway URLs to which to proxy") + return nil, errors.New("missing remote block backend URL") } if httpClient == nil { @@ -197,7 +197,7 @@ func (ps *remoteBlockstore) fetch(ctx context.Context, c cid.Cid) (blocks.Block, defer resp.Body.Close() if resp.StatusCode != http.StatusOK { - return nil, fmt.Errorf("http error from block gateway: %s", resp.Status) + return nil, fmt.Errorf("http error from remote block backend: %s", resp.Status) } rb, err := io.ReadAll(resp.Body) diff --git a/gateway/errors.go b/gateway/errors.go index 31ac5e91c..c245ae4c1 100644 --- a/gateway/errors.go +++ b/gateway/errors.go @@ -129,9 +129,10 @@ func (e *ErrorStatusCode) Unwrap() error { return e.Err } -// ErrInvalidResponse can be returned from a [DataCallback] to indicate that the data provided for the -// requested resource was explicitly 'incorrect' - that blocks not in the requested dag, or non-car-conforming -// data was returned. +// ErrInvalidResponse can be returned from a [DataCallback] to indicate that +// the data provided for the requested resource was explicitly 'incorrect', +// for example, when received blocks did not belong to the requested dag, +// or non-car-conforming data was returned. type ErrInvalidResponse struct { Message string } @@ -143,6 +144,10 @@ func (e ErrInvalidResponse) Error() string { // ErrPartialResponse can be returned from a [DataCallback] to indicate that some of the requested resource // was successfully fetched, and that instead of retrying the full resource, that there are // one or more more specific resources that should be fetched (via StillNeed) to complete the request. +// +// This primitive allows for resume mechanism that is useful when a big CAR +// stream gets truncated due to network error, HTTP middleware timeout, etc, +// but some useful blocks were received and should not be fetched again. type ErrPartialResponse struct { error StillNeed []CarResource @@ -157,7 +162,7 @@ func (epr ErrPartialResponse) Error() string { if epr.error != nil { return fmt.Sprintf("partial response: %s", epr.error.Error()) } - return "caboose received a partial response" + return "received a partial CAR response from the backend" } func webError(w http.ResponseWriter, r *http.Request, c *Config, err error, defaultCode int) {