From d76a23fdf20ad8c4c1546e84aea1ad32b9fed0d1 Mon Sep 17 00:00:00 2001 From: Eric Guan Date: Thu, 18 Jan 2024 13:28:02 -0800 Subject: [PATCH] fix: detect cids in query parameters --- src/sw/controller.js | 14 ++++---- src/sw/interceptor.js | 7 ++-- src/utils.js | 72 ++++++++++++++++++++++++++++---------- test/utils.spec.js | 80 ++++++++++++++++++++++++++++++++----------- 4 files changed, 122 insertions(+), 51 deletions(-) diff --git a/src/sw/controller.js b/src/sw/controller.js index 2a78ba9..ffda054 100644 --- a/src/sw/controller.js +++ b/src/sw/controller.js @@ -4,7 +4,7 @@ import { v4 as uuidv4 } from 'uuid' import * as Sentry from '@sentry/browser' import { Interceptor } from './interceptor.js' -import { findCIDInURL } from '../utils.js' +import { findCIDPathInURL } from '../utils.js' const FILTERED_HOSTS = [ 'images.studio.metaplex.com', @@ -48,11 +48,11 @@ export class Controller { } const { url } = event.request - const cid = findCIDInURL(url) + const cidPath = findCIDPathInURL(url) - if (cid) { - debug('cid', cid, url) - event.respondWith(fetchCID(cid, this.saturn, this.clientId, event)) + if (cidPath) { + debug('cidPath', cidPath, url) + event.respondWith(fetchCID(cidPath, this.saturn, this.clientId, event)) } }) } @@ -76,12 +76,12 @@ function getClientKey() { return clientKey } -async function fetchCID (cid, saturn, clientId, event) { +async function fetchCID(cidPath, saturn, clientId, event) { let response = null const { request } = event try { - const interceptor = new Interceptor(cid, saturn, clientId, event) + const interceptor = new Interceptor(cidPath, saturn, clientId, event) response = await interceptor.fetch() } catch (err) { debug(`${request.url}: fetchCID err %O`, err) diff --git a/src/sw/interceptor.js b/src/sw/interceptor.js index 4ffbb7e..fe70cda 100644 --- a/src/sw/interceptor.js +++ b/src/sw/interceptor.js @@ -2,8 +2,6 @@ import toIterable from 'browser-readablestream-to-it' import createDebug from 'debug' import * as Sentry from '@sentry/browser' -import { getCidPathFromURL } from '../utils.js' - const debug = createDebug('sw') const cl = console.log @@ -11,9 +9,8 @@ export class Interceptor { static nocache = false // request/response skips L1 cache entirely static bypasscache = false // request skips L1 cache, response gets cached. - constructor(cid, saturn, clientId, event) { - this.cid = cid - this.cidPath = getCidPathFromURL(event.request.url, cid) + constructor(cidPath, saturn, clientId, event) { + this.cidPath = cidPath this.saturn = saturn this.clientId = clientId this.event = event diff --git a/src/utils.js b/src/utils.js index a6a6c33..8450f9b 100644 --- a/src/utils.js +++ b/src/utils.js @@ -52,31 +52,65 @@ export class Deferred { } // Modified from https://github.com/PinataCloud/ipfs-gateway-tools/blob/34533f3d5f3c0dd616327e2e5443072c27ea569d/src/index.js#L6 -export function findCIDInURL (url) { - const splitUrl = url.split('?')[0].split('/') - for (const split of splitUrl) { - if (isIPFS.cid(split)) { - return split - } - const splitOnDot = split.split('.')[0] - if(isIPFS.cid(splitOnDot)) { - return splitOnDot +export function findCIDPathInURL(url) { + let urlObj + try { + urlObj = new URL(url) + } catch (err) { + return null + } + + let cid = '' + let path = '' + + const { hostname, pathname, searchParams, href } = urlObj + + const searchStrings = [ + hostname + pathname, + ...searchParams.values() + ] + + for (const str of searchStrings) { + const result = findCIDPathInUrlComponent(str) + + // sanity check if parsed cid appears in URL + if (result.cid && href.includes(result.cid)) { + ({ cid, path } = result) + break } } - return null + const cidPath = path ? `${cid}/${path}` : cid + + return cidPath } -export function getCidPathFromURL(url, cid) { - const { hostname, pathname } = new URL(url) - let cidPath +function findCIDPathInUrlComponent(str) { + let cid = '' + let path = '' + + const splitStr = str.split('/') + const isMaybeHost = splitStr[0].includes('.') + + const segmentsToPath = i => splitStr.slice(i).join('/') ?? '' + + for (let i = 0; i < splitStr.length; i++) { + const segment = splitStr[i] + if (isIPFS.cid(segment)) { + cid = segment + path = segmentsToPath(i + 1) + break + } - if (pathname.startsWith('/ipfs/')) { - cidPath = pathname.replace('/ipfs/', '') - } else if (hostname.includes(cid)) { - // https://.ipfs.dweb.link/cat.png -> https://saturn.ms/ipfs//cat.png - cidPath = cid + pathname + const splitOnDot = segment.split('.')[0] + if(isIPFS.cid(splitOnDot)) { + cid = splitOnDot + if (isMaybeHost) { + path = segmentsToPath(1) + } + break + } } - return cidPath + return { cid, path } } diff --git a/test/utils.spec.js b/test/utils.spec.js index 2c7c755..ee5b6bb 100644 --- a/test/utils.spec.js +++ b/test/utils.spec.js @@ -1,41 +1,81 @@ import assert from 'node:assert/strict' import { describe, it } from 'node:test' -import { findCIDInURL, getCidPathFromURL } from '#src/utils.js' +import { findCIDPathInURL } from '#src/utils.js' describe('controller', () => { - it('should find cid in the subdomain', () => { + it('finds the cid in the subdomain', () => { const cid = 'bafybeigt4657qnz5bi2pa7tdsbiobny55hkpt5vupgnueex22tzvwxfiym' const url = `https://${cid}.ipfs.dweb.link` - const foundCid = findCIDInURL(url) - assert.strictEqual(foundCid, cid) + assert.strictEqual(findCIDPathInURL(url), cid) }) - it('should find cid in the url path', () => { + it('finds the cidPath in the subdomain', () => { + const cid = 'bafybeidrf56yzbkocajbloyafrebrdzsam3uj35sce2fdyo4elb6zzoily' + const path = 'test/cat.png' + const cidPath = `${cid}/${path}` + const url = `https://${cid}.ipfs.dweb.link/${path}` + + assert.strictEqual(findCIDPathInURL(url), cidPath) + }) + + it('finds the subdomain cid in an encoded query param', () => { + const cid = 'bafybeidrf56yzbkocajbloyafrebrdzsam3uj35sce2fdyo4elb6zzoily' + const param = `${cid}.ipfs.dweb.link` + const url = `https://proxy.com/?url=${param}` + + assert.strictEqual(findCIDPathInURL(url), cid) + }) + + it('finds the cid in the url path', () => { const cid = 'QmS29VtmK7Ax6TMmMwbwqtuKSGRJTLJAmHMW83qGvBBxhV' const url = `https://ipfs.io/ipfs/${cid}` - const foundCid = findCIDInURL(url) - assert.strictEqual(foundCid, cid) + assert.strictEqual(findCIDPathInURL(url), cid) }) - it('should find cidPath in the subdomain', () => { - const cid = 'bafybeigt4657qnz5bi2pa7tdsbiobny55hkpt5vupgnueex22tzvwxfiym' - const path = 'hello/world.png' - const cidPath = `${cid}/${path}` - const url = `https://${cid}.ipfs.dweb.link/${path}` + it('finds the cidPath in the url path', () => { + const cidPath = 'QmS29VtmK7Ax6TMmMwbwqtuKSGRJTLJAmHMW83qGvBBxhV/cat.png' + const url = `https://ipfs.io/ipfs/${cidPath}` - const foundCidPath = getCidPathFromURL(url, cid) - assert.strictEqual(foundCidPath, cidPath) + assert.strictEqual(findCIDPathInURL(url), cidPath) }) - it('should find cidPath in the url path', () => { - const cid = 'QmS29VtmK7Ax6TMmMwbwqtuKSGRJTLJAmHMW83qGvBBxhV' - const path = 'hello/world.png' + it('finds the cid in an encoded query param', () => { + const cid = 'bafybeidrf56yzbkocajbloyafrebrdzsam3uj35sce2fdyo4elb6zzoily' + const url = `https://proxy.com/?url=ipfs.io%2Fipfs%2F${cid}/` + + assert.strictEqual(findCIDPathInURL(url), cid) + }) + + it('finds the cidPath in an encoded query param', () => { + const cidPath = 'bafybeidrf56yzbkocajbloyafrebrdzsam3uj35sce2fdyo4elb6zzoily/test/cat.png' + const url = `https://proxy.com/?url=https%3A%2F%2Fipfs.io%2Fipfs%2F${cidPath}` + + assert.strictEqual(findCIDPathInURL(url), cidPath) + }) + + it('finds the subdomain cidPath in an encoded query param', () => { + const cid = 'bafybeidrf56yzbkocajbloyafrebrdzsam3uj35sce2fdyo4elb6zzoily' + const path = 'dog/cow/cat.png' const cidPath = `${cid}/${path}` - const url = `https://ipfs.io/ipfs/${cid}/${path}` + const param = `${cid}.ipfs.dweb.link/${path}` + const url = `https://proxy.com/?url=${param}` + + assert.strictEqual(findCIDPathInURL(url), cidPath) + }) + + it('finds the plain cid (no /ipfs/ prefix) in an encoded query param', () => { + const cid = 'bafybeidrf56yzbkocajbloyafrebrdzsam3uj35sce2fdyo4elb6zzoily' + const url = `https://proxy.com/?cid=${cid}` + + assert.strictEqual(findCIDPathInURL(url), cid) + }) + + it('finds the plain cidPath (no /ipfs/ prefix) in an encoded query param', () => { + const cidPath = 'bafybeidrf56yzbkocajbloyafrebrdzsam3uj35sce2fdyo4elb6zzoily/test/cat.png' + const url = `https://proxy.com/?cid=${cidPath}` - const foundCidPath = getCidPathFromURL(url, cid) - assert.strictEqual(foundCidPath, cidPath) + assert.strictEqual(findCIDPathInURL(url), cidPath) }) })