diff --git a/docs/roundabout.md b/docs/roundabout.md index d7ff2f6c..8aa99299 100644 --- a/docs/roundabout.md +++ b/docs/roundabout.md @@ -6,14 +6,16 @@ The given API is currently public. -### `GET /{carCid}` +### `GET /{cid}` -Redirects to a presigned URL where the requested CAR file (by its CID) can be downloaded from. This will use web3.storage `carpark` as the location of the requested CARs. The request will return a `302 Redirect` to a created presigned URL. +Redirects to a presigned URL where the requested CAR file (by its CID) can be downloaded from. The given CID can be the CAR CID, or an equivalent CID to it, such as a PieceCIDv2. The request will return a `302 Redirect` to a created presigned URL. It also supports a query parameter `expires` with the number of seconds this presigned URL should be valid for. You can set a value from one second to 7 days (604,800 seconds). By default the expiration is set for 3 days (259,200 seconds). ### `GET /key/{key}?bucket=bucket-name` +> Deprecated and should not be used in production + Redirects to a presigned URL where the requested bucket value can be downloaded from by its key. Unlike `GET /{carCid}`, this endpoint takes a key and is compatible with any web3.storage account bucket. The request will return a `302 Redirect` to a created presigned URL. It also supports a query parameter `expires` with the number of seconds this presigned URL should be valid for. You can set a value from one second to 7 days (604,800 seconds). By default the expiration is set for 3 days (259,200 seconds). diff --git a/package-lock.json b/package-lock.json index f67a5baa..0eab7600 100644 --- a/package-lock.json +++ b/package-lock.json @@ -6739,7 +6739,6 @@ }, "node_modules/aggregate-error": { "version": "4.0.1", - "dev": true, "license": "MIT", "dependencies": { "clean-stack": "^4.0.0", @@ -8410,7 +8409,6 @@ }, "node_modules/clean-stack": { "version": "4.2.0", - "dev": true, "license": "MIT", "dependencies": { "escape-string-regexp": "5.0.0" @@ -9455,7 +9453,6 @@ }, "node_modules/escape-string-regexp": { "version": "5.0.0", - "dev": true, "license": "MIT", "engines": { "node": ">=12" @@ -11172,7 +11169,6 @@ }, "node_modules/indent-string": { "version": "5.0.0", - "dev": true, "license": "MIT", "engines": { "node": ">=12" @@ -13258,6 +13254,29 @@ "node": ">= 0.8.0" } }, + "node_modules/p-any": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/p-any/-/p-any-4.0.0.tgz", + "integrity": "sha512-S/B50s+pAVe0wmEZHmBs/9yJXeZ5KhHzOsgKzt0hRdgkoR3DxW9ts46fcsWi/r3VnzsnkKS7q4uimze+zjdryw==", + "dependencies": { + "p-cancelable": "^3.0.0", + "p-some": "^6.0.0" + }, + "engines": { + "node": ">=12.20" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/p-cancelable": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/p-cancelable/-/p-cancelable-3.0.0.tgz", + "integrity": "sha512-mlVgR3PGuzlo0MmTdk4cXqXWlwQDLnONTAg6sm62XkMJEiRxN3GL3SffkYvqwonbkJBcrI7Uvv5Zh9yjvn2iUw==", + "engines": { + "node": ">=12.20" + } + }, "node_modules/p-defer": { "version": "4.0.0", "license": "MIT", @@ -13357,6 +13376,21 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/p-some": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/p-some/-/p-some-6.0.0.tgz", + "integrity": "sha512-CJbQCKdfSX3fIh8/QKgS+9rjm7OBNUTmwWswAFQAhc8j1NR1dsEDETUEuVUtQHZpV+J03LqWBEwvu0g1Yn+TYg==", + "dependencies": { + "aggregate-error": "^4.0.0", + "p-cancelable": "^3.0.0" + }, + "engines": { + "node": ">=12.20" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/p-timeout": { "version": "5.1.0", "dev": true, @@ -16291,11 +16325,13 @@ "@sentry/serverless": "^7.22.0", "@web3-storage/content-claims": "^3.0.1", "multiformats": "^11.0.2", + "p-any": "^4.0.0", "undici": "^5.24.0" }, "devDependencies": { "@ipld/car": "^5.1.1", "@ipld/dag-pb": "^3.0.0", + "@ucanto/client": "^9.0.0", "@web3-storage/data-segment": "^5.0.0", "@web3-storage/w3up-client": "^9.2.2", "ava": "^4.3.3", diff --git a/roundabout/claims.js b/roundabout/claims.js new file mode 100644 index 00000000..89abe18b --- /dev/null +++ b/roundabout/claims.js @@ -0,0 +1,69 @@ +// NOTE: shim globals needed by content-claims client deps that would be present in nodejs v18. +// TODO: migrate to sst v2 and nodejs v18+ +import './globals.js' +import { read } from '@web3-storage/content-claims/client' +import { asCarCid } from './piece.js' + +/** + * @typedef {import('multiformats').UnknownLink} UnknownLink + * @typedef {import('@ucanto/client').URI} URI + * @typedef {import('@web3-storage/w3up-client/types').CARLink} CARLink + * @typedef {import('@web3-storage/content-claims/client/api').Claim} Claim + **/ + +/** + * Find the set of CAR CIDs that are claimed to be equivalent to the Piece CID. + * + * @param {UnknownLink} piece + * @param {(link: UnknownLink) => Promise} [fetchClaims] - returns content claims for a cid + */ +export async function findEquivalentCarCids (piece, fetchClaims = createClaimsClientForEnv()) { + /** @type {Set} */ + const cids = new Set() + const claims = await fetchClaims(piece) + for (const claim of claims) { + // claims will include _all_ claims about this cid, so we filter to `equals` + if (claim.type !== 'assert/equals') { + continue + } + // an equivalence claim may have the pieceCid as the content cid _or_ the equals cid + // so check both properties for the car cid. + const carCid = asCarCid(claim.equals) ?? asCarCid(claim.content) + if (carCid) { + cids.add(carCid) + } + } + return cids +} + +/** + * Find the set locations claimed given CID is present. + * + * @param {UnknownLink} link + * @param {(link: UnknownLink) => Promise} [fetchClaims] - returns content claims for a cid + */ +export async function findLocationsForLink (link, fetchClaims = createClaimsClientForEnv()) { + const claims = await fetchClaims(link) + /** @type {Set} */ + const locations = new Set() + + for (const claim of claims) { + // claims will include _all_ claims about this cid, so we filter to `location` + if (claim.type !== 'assert/location') { + continue + } + + for (const l of claim.location) { + locations.add(l) + } + } + return locations +} + +/** @param {'prod' | *} env */ +export function createClaimsClientForEnv (env = process.env.SST_STAGE) { + if (env === 'prod') { + return read + } + return (cid, opts) => read(cid, { serviceURL: 'https://staging.claims.web3.storage', ...opts }) +} diff --git a/roundabout/functions/redirect.js b/roundabout/functions/redirect.js index f030827e..f724ae39 100644 --- a/roundabout/functions/redirect.js +++ b/roundabout/functions/redirect.js @@ -2,9 +2,18 @@ import * as Sentry from '@sentry/serverless' import { S3Client } from '@aws-sdk/client-s3' import { CID } from 'multiformats/cid' -import { getSigner } from '../index.js' -import { findEquivalentCarCids, asPieceCidV1, asPieceCidV2, asCarCid } from '../piece.js' -import { getEnv, parseQueryStringParameters } from '../utils.js' +import { + getSigner, + carLocationResolver, + resolveCar, + resolvePiece, + redirectTo +} from '../index.js' +import { + getEnv, + parseQueryStringParameters, + parseKeyQueryStringParameters, +} from '../utils.js' Sentry.AWSLambda.init({ environment: process.env.SST_STAGE, @@ -13,11 +22,16 @@ Sentry.AWSLambda.init({ }) /** - * AWS HTTP Gateway handler for GET /{cid} by CAR CID or Piece CID + * AWS HTTP Gateway handler for GET /{cid} by CAR CID or an equivalent CID, + * such as a Piece CID. * * @param {import('aws-lambda').APIGatewayProxyEventV2} request */ export async function redirectCarGet(request) { + const { + BUCKET_NAME, + } = getEnv() + let cid, expiresIn try { const parsedQueryParams = parseQueryStringParameters(request.queryStringParameters) @@ -29,9 +43,9 @@ export async function redirectCarGet(request) { } const locateCar = carLocationResolver({ - bucket: getEnv().BUCKET_NAME, s3Client: getS3Client(), - expiresIn + expiresIn, + defaultBucketName: BUCKET_NAME }) const response = await resolveCar(cid, locateCar) ?? await resolvePiece(cid, locateCar) @@ -43,72 +57,9 @@ export async function redirectCarGet(request) { } /** - * Return response for a car CID, or undefined for other CID types - * - * @param {CID} cid - * @param {(cid: CID) => Promise } locateCar - */ -async function resolveCar (cid, locateCar) { - if (asCarCid(cid) !== undefined) { - const url = await locateCar(cid) - if (url) { - return redirectTo(url) - } - return { statusCode: 404, body: 'CAR Not found'} - } -} - -/** - * Return response for a Piece CID, or undefined for other CID types - * - * @param {CID} cid - * @param {(cid: CID) => Promise } locateCar - */ -async function resolvePiece (cid, locateCar) { - if (asPieceCidV2(cid) !== undefined) { - const cars = await findEquivalentCarCids(cid) - if (cars.size === 0) { - return { statusCode: 404, body: 'No equivalent CAR CID for Piece CID found' } - } - for (const cid of cars) { - const url = await locateCar(cid) - if (url) { - return redirectTo(url) - } - } - return { statusCode: 404, body: 'No CARs found for Piece CID' } - } - - if (asPieceCidV1(cid) !== undefined) { - return { - statusCode: 415, - body: 'v1 Piece CIDs are not supported yet. Please provide a V2 Piece CID. https://github.com/filecoin-project/FIPs/blob/master/FRCs/frc-0069.md' - } - } -} - -/** - * Creates a helper function that returns signed bucket url for a car cid, - * or undefined if the CAR does not exist in the bucket. - * - * @param {object} config - * @param {S3Client} config.s3Client - * @param {string} config.bucket - * @param {number} config.expiresIn - */ -function carLocationResolver ({ s3Client, bucket, expiresIn }) { - const signer = getSigner(s3Client, bucket) - /** - * @param {CID} cid - */ - return async function locateCar (cid) { - const key = `${cid}/${cid}.car` - return signer.getUrl(key, { expiresIn }) - } -} - -/** - * AWS HTTP Gateway handler for GET /key/{key} by bucket key + * AWS HTTP Gateway handler for GET /key/{key} by bucket key. + * Note that this is currently used by dagcargo old system and + * should be deprecated once it is decomissioned. * * @param {import('aws-lambda').APIGatewayProxyEventV2} request */ @@ -117,9 +68,9 @@ export async function redirectKeyGet(request) { let key, expiresIn, bucketName try { - const parsedQueryParams = parseQueryStringParameters(request.queryStringParameters) + const parsedQueryParams = parseKeyQueryStringParameters(request.queryStringParameters) expiresIn = parsedQueryParams.expiresIn - bucketName = parsedQueryParams.bucketName + bucketName = parsedQueryParams.bucketName || 'carpark-prod-0' key = request.pathParameters?.key if (!key) { @@ -153,18 +104,6 @@ function toLambdaResponse(signedUrl) { return redirectTo(signedUrl) } -/** - * @param {string} url - */ -function redirectTo (url) { - return { - statusCode: 302, - headers: { - Location: url - } - } -} - function getS3Client(){ const { BUCKET_ENDPOINT, diff --git a/roundabout/index.js b/roundabout/index.js index 3e70307d..4f9195b5 100644 --- a/roundabout/index.js +++ b/roundabout/index.js @@ -1,12 +1,21 @@ -import { getSignedUrl as getR2SignedUrl } from "@aws-sdk/s3-request-presigner" +import { getSignedUrl as getR2SignedUrl } from '@aws-sdk/s3-request-presigner' import { GetObjectCommand, HeadObjectCommand -} from "@aws-sdk/client-s3" +} from '@aws-sdk/client-s3' +import pAny from 'p-any' + +import { asPieceCidV1, asPieceCidV2, asCarCid } from './piece.js' +import { + getBucketKeyPairToRedirect, +} from './utils.js' +import { findEquivalentCarCids, findLocationsForLink } from './claims.js' /** * @typedef {import('@aws-sdk/client-s3').S3Client} S3Client * @typedef {import('@aws-sdk/types').RequestPresigningArguments} RequestPresigningArguments + * @typedef {import('multiformats').UnknownLink} UnknownLink + * @typedef {import('@web3-storage/content-claims/client/api').Claim} Claim */ /** @@ -48,3 +57,105 @@ export function getSigner (s3Client, bucketName) { } } } + +/** + * Return response for a car CID, or undefined for other CID types + * + * @param {UnknownLink} cid + * @param {(cid: UnknownLink) => Promise } locateCar + */ +export async function resolveCar (cid, locateCar) { + if (asCarCid(cid) !== undefined) { + const url = await locateCar(cid) + if (url) { + return redirectTo(url) + } + return { statusCode: 404, body: 'CAR Not found'} + } +} + +/** + * Creates a helper function that returns signed bucket url for a car cid, + * or undefined if the CAR does not exist in the bucket. + * + * @param {object} config + * @param {S3Client} config.s3Client + * @param {number} config.expiresIn + * @param {string} config.defaultBucketName + * @param {(link: UnknownLink) => Promise} [config.fetchClaims] + * @param {string[]} [config.validR2Buckets] + * @param {string[]} [config.validS3Buckets] + */ +export function carLocationResolver ({ s3Client, expiresIn, fetchClaims, validR2Buckets, validS3Buckets, defaultBucketName }) { + /** + * @param {UnknownLink} cid + */ + return async function locateCar (cid) { + const locations = await findLocationsForLink(cid, fetchClaims) + const pairs = getBucketKeyPairToRedirect(locations, { + validR2Buckets, + validS3Buckets + }) + + if (!pairs.length) { + // Fallback to attempt old bucket + const signer = getSigner(s3Client, defaultBucketName) + const key = `${cid}/${cid}.car` + return signer.getUrl(key, { expiresIn }) + } + + // Get first available response + try { + return await pAny(pairs.map(({ bucketName, key }) => { + const signer = getSigner(s3Client, bucketName) + return signer.getUrl(key, { expiresIn }) + }), { + filter: Boolean + }) + } catch { + // Return undefined if not found in any location for redirect + return + } + } +} + +/** + * Return response for a Piece CID, or undefined for other CID types + * + * @param {UnknownLink} cid + * @param {(cid: UnknownLink) => Promise } locateCar + */ +export async function resolvePiece (cid, locateCar) { + if (asPieceCidV2(cid) !== undefined) { + const cars = await findEquivalentCarCids(cid) + if (cars.size === 0) { + return { statusCode: 404, body: 'No equivalent CAR CID for Piece CID found' } + } + for (const cid of cars) { + const url = await locateCar(cid) + if (url) { + return redirectTo(url) + } + } + return { statusCode: 404, body: 'No CARs found for Piece CID' } + } + + if (asPieceCidV1(cid) !== undefined) { + return { + statusCode: 415, + body: 'v1 Piece CIDs are not supported yet. Please provide a V2 Piece CID. https://github.com/filecoin-project/FIPs/blob/master/FRCs/frc-0069.md' + } + } +} + +/** + * @param {string} url + */ +export function redirectTo (url) { + return { + statusCode: 302, + headers: { + Location: url + } + } +} diff --git a/roundabout/package.json b/roundabout/package.json index 2ee70d62..f9d68cab 100644 --- a/roundabout/package.json +++ b/roundabout/package.json @@ -11,16 +11,24 @@ "@sentry/serverless": "^7.22.0", "@web3-storage/content-claims": "^3.0.1", "multiformats": "^11.0.2", + "p-any": "^4.0.0", "undici": "^5.24.0" }, "devDependencies": { "@ipld/car": "^5.1.1", "@ipld/dag-pb": "^3.0.0", + "@ucanto/client":"^9.0.0", "@web3-storage/data-segment": "^5.0.0", "@web3-storage/w3up-client": "^9.2.2", "ava": "^4.3.3", "multiformats": "^11.0.2", "nanoid": "^4.0.0", "testcontainers": "^8.13.0" + }, + "eslintConfig": { + "rules": { + "no-useless-return": "off", + "unicorn/prefer-spread": "off" + } } } diff --git a/roundabout/piece.js b/roundabout/piece.js index 5732fe5b..ad5f66ea 100644 --- a/roundabout/piece.js +++ b/roundabout/piece.js @@ -1,8 +1,3 @@ -// NOTE: shim globals needed by content-claims client deps that would be present in nodejs v18. -// TODO: migrate to sst v2 and nodejs v18+ -import './globals.js' - -import { read } from '@web3-storage/content-claims/client' import * as Raw from 'multiformats/codecs/raw' /** https://github.com/multiformats/multicodec/blob/master/table.csv#L140 */ @@ -18,7 +13,7 @@ export const PIECE_V1_MULTIHASH = 0x10_12 export const PIECE_V2_MULTIHASH = 0x10_11 /** - * @typedef {import('multiformats/cid').Link} Link + * @typedef {import('multiformats').UnknownLink} UnknownLink * @typedef {import('@web3-storage/w3up-client/types').CARLink} CARLink * @typedef {import('@web3-storage/content-claims/client/api').Claim} Claim **/ @@ -26,7 +21,7 @@ export const PIECE_V2_MULTIHASH = 0x10_11 /** * Return the cid if it is a Piece CID or undefined if not * - * @param {Link} cid + * @param {UnknownLink} cid */ export function asPieceCidV2 (cid) { if (cid.multihash.code === PIECE_V2_MULTIHASH && cid.code === Raw.code) { @@ -37,7 +32,7 @@ export function asPieceCidV2 (cid) { /** * Return the cid if it is a v1 Piece CID or undefined if not * - * @param {Link} cid + * @param {UnknownLink} cid */ export function asPieceCidV1 (cid) { if (cid.multihash.code === PIECE_V1_MULTIHASH && cid.code === PIECE_V1_CODE) { @@ -48,43 +43,12 @@ export function asPieceCidV1 (cid) { /** * Return the cid if it is a CAR CID or undefined if not * - * @param {Link} cid + * @param {UnknownLink} cid + * @returns {CARLink | undefined} */ export function asCarCid(cid) { if (cid.code === CAR_CODE) { + // @ts-expect-error types fail to understand this is CAR Link return cid } } - -/** - * Find the set of CAR CIDs that are claimed to be equivalent to the Piece CID. - * - * @param {Link} piece - * @param {(Link) => Promise} [fetchClaims] - returns content claims for a cid - */ -export async function findEquivalentCarCids (piece, fetchClaims = createClaimsClientForEnv()) { - /** @type {Set} */ - const cids = new Set() - const claims = await fetchClaims(piece) - for (const claim of claims) { - // claims will include _all_ claims about this cid, so we filter to `equals` - if (claim.type !== 'assert/equals') { - continue - } - // an equivalence claim may have the pieceCid as the content cid _or_ the equals cid - // so check both properties for the car cid. - const carCid = asCarCid(claim.equals) ?? asCarCid(claim.content) - if (carCid) { - cids.add(carCid) - } - } - return cids -} - -/** @param {'prod' | *} env */ -export function createClaimsClientForEnv (env = process.env.SST_STAGE) { - if (env === 'prod') { - return read - } - return (cid, opts) => read(cid, { serviceURL: 'https://staging.claims.web3.storage', ...opts }) -} diff --git a/roundabout/test/car.test.js b/roundabout/test/car.test.js new file mode 100644 index 00000000..5bf04eb3 --- /dev/null +++ b/roundabout/test/car.test.js @@ -0,0 +1,174 @@ +import { test } from './helpers/context.js' + +import { + PutObjectCommand, +} from '@aws-sdk/client-s3' + +import { encode } from 'multiformats/block' +import { identity } from 'multiformats/hashes/identity' +import { sha256 as hasher } from 'multiformats/hashes/sha2' +import * as pb from '@ipld/dag-pb' +import { CarBufferWriter } from '@ipld/car' +import { CAR } from '@ucanto/transport' + +import { resolveCar, carLocationResolver } from '../index.js' + +import { createS3, createBucket } from './helpers/resources.js' + +test.before(async t => { + const { client } = await createS3({ port: 9000 }) + + t.context.s3Client = client +}) + +test('resolves a CAR in a valid R2 bucket claim', async t => { + const bucketName = await createBucket(t.context.s3Client) + const carCid = await putCarToBucket(t.context.s3Client, bucketName) + const expiresIn = 3 * 24 * 60 * 60 // 3 days in seconds + + const locateCar = carLocationResolver({ + s3Client: t.context.s3Client, + expiresIn, + fetchClaims: (link) => { + return Promise.resolve([ + { type: 'assert/location', content: link, location: [`https://fffa4b4363a7e5250af8357087263b3a.r2.cloudflarestorage.com/${bucketName}/${link.toString()}/${link.toString()}.car`] } + ]) + }, + validR2Buckets: [bucketName] + }) + + const response = await resolveCar(carCid, locateCar) + t.assert(response) + t.deepEqual(response?.statusCode, 302) + t.assert(response?.headers.Location) +}) + +test('resolves CAR in a valid R2 bucket via a S3 bucket claim', async t => { + const bucketName = await createBucket(t.context.s3Client) + const carCid = await putCarToBucket(t.context.s3Client, bucketName) + const expiresIn = 3 * 24 * 60 * 60 // 3 days in seconds + + const locateCar = carLocationResolver({ + s3Client: t.context.s3Client, + expiresIn, + fetchClaims: (link) => { + return Promise.resolve([ + { type: 'assert/location', content: link, location: [ + `https://${bucketName}.s3.amazonaws.com/${link.toString()}/${link.toString()}.car`, + ] } + ]) + }, + validS3Buckets: [bucketName] + }) + + const response = await resolveCar(carCid, locateCar) + t.assert(response) + t.deepEqual(response?.statusCode, 302) + t.assert(response?.headers.Location) +}) + +test('falls back to resolve a CAR if not in a valid bucket for claims, but on default', async t => { + const bucketName = await createBucket(t.context.s3Client) + const carCid = await putCarToBucket(t.context.s3Client, bucketName) + const expiresIn = 3 * 24 * 60 * 60 // 3 days in seconds + + const locateCar = carLocationResolver({ + s3Client: t.context.s3Client, + expiresIn, + fetchClaims: (link) => { + return Promise.resolve([ + { type: 'assert/location', content: link, location: [ + `https://${bucketName}.s3.amazonaws.com/${link.toString()}/${link.toString()}.car`, + `https://fffa4b4363a7e5250af8357087263b3a.r2.cloudflarestorage.com/${bucketName}/${link.toString()}/${link.toString()}.car` + ] } + ]) + }, + defaultBucketName: bucketName + }) + + const response = await resolveCar(carCid, locateCar) + t.assert(response) + t.deepEqual(response?.statusCode, 302) +}) + +test('does not resolve a CAR if not in a valid bucket', async t => { + const bucketName = await createBucket(t.context.s3Client) + const carCid = await CAR.codec.link(new Uint8Array([80, 82, 84, 86])) + const expiresIn = 3 * 24 * 60 * 60 // 3 days in seconds + + const locateCar = carLocationResolver({ + s3Client: t.context.s3Client, + expiresIn, + fetchClaims: (link) => { + return Promise.resolve([ + { type: 'assert/location', content: link, location: [ + `https://${bucketName}.s3.amazonaws.com/${link.toString()}/${link.toString()}.car`, + `https://fffa4b4363a7e5250af8357087263b3a.r2.cloudflarestorage.com/${bucketName}/${link.toString()}/${link.toString()}.car` + ] } + ]) + }, + defaultBucketName: bucketName + }) + + const response = await resolveCar(carCid, locateCar) + t.assert(response) + t.deepEqual(response?.statusCode, 404) +}) + +test('does not resolve a CAR if not available in the bucket but a claim exists', async t => { + const bucketName = await createBucket(t.context.s3Client) + await putCarToBucket(t.context.s3Client, bucketName) + const otherCarLink = await CAR.codec.link(new Uint8Array([80, 82, 84, 86])) + const expiresIn = 3 * 24 * 60 * 60 // 3 days in seconds + + const locateCar = carLocationResolver({ + s3Client: t.context.s3Client, + expiresIn, + fetchClaims: (link) => { + return Promise.resolve([ + { type: 'assert/location', content: link, location: [`https://fffa4b4363a7e5250af8357087263b3a.r2.cloudflarestorage.com/${bucketName}/${otherCarLink.toString()}/${otherCarLink.toString()}.car`] } + ]) + }, + validR2Buckets: [bucketName] + }) + + const response = await resolveCar(otherCarLink, locateCar) + t.assert(response) + t.deepEqual(response?.statusCode, 404) +}) + +/** + * @param {import('@aws-sdk/client-s3').S3Client} s3Client + * @param {string} bucketName + */ +async function putCarToBucket (s3Client, bucketName) { + // Write original car to origin bucket + const id = await encode({ + value: pb.prepare({ Data: 'a red car on the street!' }), + codec: pb, + hasher: identity, + }) + const parent = await encode({ + value: pb.prepare({ Links: [id.cid] }), + codec: pb, + hasher, + }) + const car = CarBufferWriter.createWriter(Buffer.alloc(1000), { + roots: [parent.cid], + }) + car.write(parent) + + const Body = car.close() + + const link = await CAR.codec.link(car.bytes) + const key = `${link.toString()}/${link.toString()}.car` + await s3Client.send( + new PutObjectCommand({ + Bucket: bucketName, + Key: key, + Body, + }) + ) + + return link +} diff --git a/roundabout/test/index.test.js b/roundabout/test/index.test.js index 14ff369a..12b282fc 100644 --- a/roundabout/test/index.test.js +++ b/roundabout/test/index.test.js @@ -10,14 +10,9 @@ import { identity } from 'multiformats/hashes/identity' import { sha256 as hasher } from 'multiformats/hashes/sha2' import * as pb from '@ipld/dag-pb' import { CarBufferWriter } from '@ipld/car' +import { CAR } from '@ucanto/transport' import { getSigner } from '../index.js' -import { - parseQueryStringParameters, - MAX_EXPIRES_IN, - MIN_EXPIRES_IN, - DEFAULT_EXPIRES_IN -} from '../utils.js' import { createS3, createBucket } from './helpers/resources.js' @@ -54,57 +49,6 @@ test('fails to create signed url for object not in bucket', async t => { t.falsy(signedUrl) }) -test('parses valid expires', t => { - const queryParams = { - expires: '900' - } - const param = parseQueryStringParameters(queryParams) - t.is(param.expiresIn, parseInt(queryParams.expires)) -}) - -test('parses bucket name', t => { - const queryParams = { - bucket: 'dagcargo' - } - const param = parseQueryStringParameters(queryParams) - t.is(param.bucketName, queryParams.bucket) -}) - -test('fails to parse bucket name not accepted', t => { - const queryParams = { - bucket: 'dagcargo-not-this' - } - t.throws(() => parseQueryStringParameters(queryParams)) -}) - -test('parses valid expires query parameter', t => { - const queryParams = { - expires: '900' - } - const param = parseQueryStringParameters(queryParams) - t.is(param.expiresIn, parseInt(queryParams.expires)) -}) - -test('defaults expires when there is no query parameter', t => { - const queryParams = { - nosearch: '900' - } - const param = parseQueryStringParameters(queryParams) - t.is(param.expiresIn, DEFAULT_EXPIRES_IN) -}) - -test('fails to parse expires query parameter when not acceptable value', t => { - const queryParamsBigger = { - expires: `${MAX_EXPIRES_IN + 1}` - } - t.throws(() => parseQueryStringParameters(queryParamsBigger)) - - const queryParamsSmaller = { - expires: `${MIN_EXPIRES_IN - 1}` - } - t.throws(() => parseQueryStringParameters(queryParamsSmaller)) -}) - /** * @param {import('@aws-sdk/client-s3').S3Client} s3Client * @param {string} bucketName @@ -128,7 +72,8 @@ async function putCarToBucket (s3Client, bucketName) { const Body = car.close() - const key = `${parent.cid.toString()}/${parent.cid.toString()}.car` + const link = await CAR.codec.link(car.bytes) + const key = `${link.toString()}/${link.toString()}.car` await s3Client.send( new PutObjectCommand({ Bucket: bucketName, @@ -137,5 +82,5 @@ async function putCarToBucket (s3Client, bucketName) { }) ) - return parent.cid + return link } diff --git a/roundabout/test/piece.test.js b/roundabout/test/piece.test.js index b4462aaa..1a57377a 100644 --- a/roundabout/test/piece.test.js +++ b/roundabout/test/piece.test.js @@ -4,13 +4,14 @@ import * as Raw from 'multiformats/codecs/raw' import { sha256 } from 'multiformats/hashes/sha2' import * as Digest from 'multiformats/hashes/digest' import { Piece, MIN_PAYLOAD_SIZE } from '@web3-storage/data-segment' -import { findEquivalentCarCids, asCarCid, asPieceCidV1, asPieceCidV2, CAR_CODE } from '../piece.js' +import { asCarCid, asPieceCidV1, asPieceCidV2, CAR_CODE } from '../piece.js' +import { findEquivalentCarCids } from '../claims.js' test('findEquivalentCarCids', async t => { const bytes = new Uint8Array(MIN_PAYLOAD_SIZE) const pieceCid = Piece.fromPayload(bytes).link - const carCid = CID.createV1(CAR_CODE, sha256.digest(bytes)) - const rawCid = CID.createV1(Raw.code, sha256.digest(bytes)) + const carCid = CID.createV1(CAR_CODE, await sha256.digest(bytes)) + const rawCid = CID.createV1(Raw.code, await sha256.digest(bytes)) const carSet = await findEquivalentCarCids(pieceCid, async () => { return [ { type: 'assert/equals', content: pieceCid, equals: carCid }, // yes! is equivalent carCid @@ -38,36 +39,36 @@ test('findEquivalentCarCids from content-claims api', async t => { t.is(found.toString(), carCid.toString()) }) -test('asCarCid', t => { +test('asCarCid', async t => { const bytes = new Uint8Array(MIN_PAYLOAD_SIZE) const pieceCid = Piece.fromPayload(bytes).link - const carCid = CID.createV1(CAR_CODE, sha256.digest(bytes)) - const rawCid = CID.createV1(Raw.code, sha256.digest(bytes)) + const carCid = CID.createV1(CAR_CODE, await sha256.digest(bytes)) + const rawCid = CID.createV1(Raw.code, await sha256.digest(bytes)) t.is(asCarCid(pieceCid), undefined) t.is(asCarCid(carCid), carCid) t.is(asCarCid(rawCid), undefined) }) -test('asPieceCidv2', t => { +test('asPieceCidv2', async t => { const bytes = new Uint8Array(MIN_PAYLOAD_SIZE) const piece = Piece.fromPayload(bytes) const pieceCidV2 = piece.link const pieceCidV1 = CID.createV1(Piece.FilCommitmentUnsealed, Digest.create(Piece.Sha256Trunc254Padded, piece.root)) - const carCid = CID.createV1(CAR_CODE, sha256.digest(bytes)) - const rawCid = CID.createV1(Raw.code, sha256.digest(bytes)) + const carCid = CID.createV1(CAR_CODE, await sha256.digest(bytes)) + const rawCid = CID.createV1(Raw.code, await sha256.digest(bytes)) t.is(asPieceCidV2(pieceCidV1), undefined) t.is(asPieceCidV2(pieceCidV2), pieceCidV2) t.is(asPieceCidV2(carCid), undefined) t.is(asPieceCidV2(rawCid), undefined) }) -test('asPieceCidv1', t => { +test('asPieceCidv1', async t => { const bytes = new Uint8Array(MIN_PAYLOAD_SIZE) const piece = Piece.fromPayload(bytes) const pieceCidV2 = piece.link const pieceCidV1 = CID.createV1(Piece.FilCommitmentUnsealed, Digest.create(Piece.Sha256Trunc254Padded, piece.root)) - const carCid = CID.createV1(CAR_CODE, sha256.digest(bytes)) - const rawCid = CID.createV1(Raw.code, sha256.digest(bytes)) + const carCid = CID.createV1(CAR_CODE, await sha256.digest(bytes)) + const rawCid = CID.createV1(Raw.code, await sha256.digest(bytes)) t.is(asPieceCidV1(pieceCidV1), pieceCidV1) t.is(asPieceCidV1(pieceCidV2),undefined) t.is(asPieceCidV1(carCid), undefined) diff --git a/roundabout/test/utils.test.js b/roundabout/test/utils.test.js new file mode 100644 index 00000000..126821bc --- /dev/null +++ b/roundabout/test/utils.test.js @@ -0,0 +1,60 @@ +import { test } from './helpers/context.js' + +import { + parseQueryStringParameters, + parseKeyQueryStringParameters, + MAX_EXPIRES_IN, + MIN_EXPIRES_IN, + DEFAULT_EXPIRES_IN +} from '../utils.js' + +test('parses valid expires', t => { + const queryParams = { + expires: '900' + } + const param = parseQueryStringParameters(queryParams) + t.is(param.expiresIn, parseInt(queryParams.expires)) +}) + +test('parses bucket name with key', t => { + const queryParams = { + bucket: 'dagcargo' + } + const param = parseKeyQueryStringParameters(queryParams) + t.is(param.bucketName, queryParams.bucket) +}) + +test('fails to parse bucket name not accepted', t => { + const queryParams = { + bucket: 'dagcargo-not-this' + } + t.throws(() => parseKeyQueryStringParameters(queryParams)) +}) + +test('parses valid expires query parameter', t => { + const queryParams = { + expires: '900' + } + const param = parseQueryStringParameters(queryParams) + t.is(param.expiresIn, parseInt(queryParams.expires)) +}) + +test('defaults expires when there is no query parameter', t => { + const queryParams = { + nosearch: '900' + } + const param = parseQueryStringParameters(queryParams) + t.is(param.expiresIn, DEFAULT_EXPIRES_IN) +}) + +test('fails to parse expires query parameter when not acceptable value', t => { + const queryParamsBigger = { + expires: `${MAX_EXPIRES_IN + 1}` + } + t.throws(() => parseQueryStringParameters(queryParamsBigger)) + + const queryParamsSmaller = { + expires: `${MIN_EXPIRES_IN - 1}` + } + t.throws(() => parseQueryStringParameters(queryParamsSmaller)) +}) diff --git a/roundabout/utils.js b/roundabout/utils.js index 0716a601..445fd113 100644 --- a/roundabout/utils.js +++ b/roundabout/utils.js @@ -3,7 +3,74 @@ export const MAX_EXPIRES_IN = 3 * 24 * 60 * 60 // 7 days in seconds export const MIN_EXPIRES_IN = 1 export const DEFAULT_EXPIRES_IN = 3 * 24 * 60 * 60 // 3 days in seconds by default -export const VALID_BUCKETS = ['dagcargo'] +export const VALID_BUCKETS_BY_KEY = ['dagcargo'] +export const VALID_R2_BUCKETS_DEFAULT = ['carpark-prod-0', 'carpark-prod-1', 'dagcargo'] +export const VALID_S3_BUCKETS_DEFAULT = ['carpark-prod-0'] +export const CF_R2_DOMAIN = 'fffa4b4363a7e5250af8357087263b3a.r2.cloudflarestorage.com' +export const AWS_S3_DOMAIN = 's3.amazonaws.com' + +/** + * Filters location claims to get the R2 buckets valid for redirect. + * In case of not existing any R2 bucket, verifies if there is an AWS bucket name that could be attempted in CF. + * + * @param {Set} locations + * @param {object} [options] + * @param {string[]} [options.validR2Buckets] + * @param {string[]} [options.validS3Buckets] + */ +export function getBucketKeyPairToRedirect (locations, options = {}) { + const validR2Buckets = options.validR2Buckets || VALID_R2_BUCKETS_DEFAULT + const validS3Buckets = options.validS3Buckets || VALID_S3_BUCKETS_DEFAULT + // Filter by Cloudflare R2 URLs + const r2Urls = Array.from(locations) + .filter( + // CF Domain + l => l.includes(CF_R2_DOMAIN) && + // Bucket name valid for CF + validR2Buckets.filter(b => l.includes(b)).length + ) + + // Transform R2 URLs if existent + if (r2Urls.length) { + return r2Urls.map(url => { + // Format https://account-id.r2.cloudflarestorage.com/bucket-name/key + const domainSplit = url.split(CF_R2_DOMAIN)[1] + const bucketName = domainSplit.split('/')[1] + const key = domainSplit.split(`${bucketName}/`)[1] + + return { + bucketName, + key + } + }) + } + + // Attempt S3 URL to pick bucket to try in R2 + const s3Urls = Array.from(locations) + .filter( + // S3 Domain + l => l.includes(AWS_S3_DOMAIN) && + // Bucket name valid for R2 attempt + validS3Buckets.filter(b => l.includes(b)).length + ) + + // Transform S3 URLs if existent + if (s3Urls.length) { + return s3Urls.map(url => { + // Format 'https://bucket-name.s3.amazonaws.com/key' + const domainParts = url.split(`.${AWS_S3_DOMAIN}`) + const bucketName = domainParts[0].replace('https://', '') + const key = domainParts[1].slice(1) + + return { + bucketName, + key + } + }) + } + + return [] +} /** * @param {import('aws-lambda').APIGatewayProxyEventPathParameters | undefined} queryStringParameters @@ -16,9 +83,25 @@ export function parseQueryStringParameters (queryStringParameters) { throw new Error(`Bad request with not acceptable expires parameter: ${queryStringParameters?.expires}`) } + return { + expiresIn + } +} + +/** + * @param {import('aws-lambda').APIGatewayProxyEventPathParameters | undefined} queryStringParameters + */ +export function parseKeyQueryStringParameters (queryStringParameters) { + const expiresIn = queryStringParameters?.expires ? + parseInt(queryStringParameters?.expires) : DEFAULT_EXPIRES_IN + + if (expiresIn > MAX_EXPIRES_IN || expiresIn < MIN_EXPIRES_IN) { + throw new Error(`Bad request with not acceptable expires parameter: ${queryStringParameters?.expires}`) + } + const bucketName = queryStringParameters?.bucket - if (bucketName && !VALID_BUCKETS.includes(bucketName)) { + if (bucketName && !VALID_BUCKETS_BY_KEY.includes(bucketName)) { throw new Error(`Bad requested with not acceptable bucket: ${bucketName}`) } @@ -35,9 +118,9 @@ export function getEnv() { return { BUCKET_ENDPOINT: mustGetEnv('BUCKET_ENDPOINT'), BUCKET_REGION: mustGetEnv('BUCKET_REGION'), + BUCKET_NAME: mustGetEnv('BUCKET_NAME'), BUCKET_ACCESS_KEY_ID: mustGetEnv('BUCKET_ACCESS_KEY_ID'), - BUCKET_SECRET_ACCESS_KEY: mustGetEnv('BUCKET_SECRET_ACCESS_KEY'), - BUCKET_NAME: mustGetEnv('BUCKET_NAME') + BUCKET_SECRET_ACCESS_KEY: mustGetEnv('BUCKET_SECRET_ACCESS_KEY') } } diff --git a/test/roundabout.test.js b/test/roundabout.test.js index 3ea35b6b..e9669226 100644 --- a/test/roundabout.test.js +++ b/test/roundabout.test.js @@ -13,7 +13,7 @@ test.before(t => { }) test('HEAD /{cid}', async t => { - const carparkCid = 'bagbaiera223xmiutg62dsthdyd6kqgsft25knslnlaxxvwe6nc4zrwezezeq' + const carparkCid = 'bagbaieraky3zsxcozokb33wunu5bmxixfpkz2t2pe25rs6tokqcgm3h3d5ya' const response = await fetch( `${t.context.roundaboutEndpoint}/${carparkCid}`, {