Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion api/_cors.test.mjs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { strict as assert } from 'node:assert';
import test from 'node:test';
import { getCorsHeaders, isDisallowedOrigin } from './_cors.js';
import { getCorsHeaders, getPublicCorsHeaders, isDisallowedOrigin } from './_cors.js';

function makeRequest(origin) {
const headers = new Headers();
Expand Down Expand Up @@ -38,3 +38,9 @@ test('requests without origin remain allowed', () => {
const req = makeRequest(null);
assert.equal(isDisallowedOrigin(req), false);
});

test('getPublicCorsHeaders returns ACAO: * with no Vary', () => {
const headers = getPublicCorsHeaders();
assert.equal(headers['Access-Control-Allow-Origin'], '*');
assert.equal(headers['Vary'], undefined, 'Should not include Vary header');
});
4 changes: 2 additions & 2 deletions middleware.ts
Original file line number Diff line number Diff line change
Expand Up @@ -122,15 +122,15 @@ export default function middleware(request: Request) {
if (BOT_UA.test(ua)) {
return new Response('{"error":"Forbidden"}', {
status: 403,
headers: { 'Content-Type': 'application/json' },
headers: { 'Content-Type': 'application/json', 'Cache-Control': 'public, max-age=86400' },
});
Comment on lines 122 to 126
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 public on bot-block 403 can be served as a CDN cache hit to legitimate users

The middleware returns Cache-Control: public, max-age=86400 on both bot-UA 403 responses (line 124) and short-UA 403 responses (line 132). Per Vercel's execution model, CDN cache checks happen before middleware runs — so once any CDN layer in the request path caches a 403 response keyed only on the URL (no Vary: User-Agent), legitimate users requesting that same URL will receive the cached 403 without middleware ever executing.

Vercel CDN itself only stores responses with s-maxage or CDN-Cache-Control, so Vercel's own cache is safe here. However, the code comments confirm Cloudflare sits in front of api.worldmonitor.app — Cloudflare respects Cache-Control: public for its cache rules, and while it does not cache 4xx by default, a custom Page Rule or Cache Rule (present in many production CF setups) could enable it. Any such CF config would cause all requests to a URL hit by a bot to return 403 to real users for 24 hours.

public is the wrong directive for a security-gate error response; the intent (telling bots to back off) is better served by no-store or omitting Cache-Control entirely:

Suggested change
if (BOT_UA.test(ua)) {
return new Response('{"error":"Forbidden"}', {
status: 403,
headers: { 'Content-Type': 'application/json' },
headers: { 'Content-Type': 'application/json', 'Cache-Control': 'public, max-age=86400' },
});
headers: { 'Content-Type': 'application/json', 'Cache-Control': 'no-store' },

The same fix applies at line 132–134.

}

// No user-agent or suspiciously short — likely a script
if (!ua || ua.length < 10) {
return new Response('{"error":"Forbidden"}', {
status: 403,
headers: { 'Content-Type': 'application/json' },
headers: { 'Content-Type': 'application/json', 'Cache-Control': 'public, max-age=86400' },
});
}
}
Expand Down
14 changes: 14 additions & 0 deletions server/cors.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,20 @@ export function getCorsHeaders(req: Request): Record<string, string> {
};
}

/**
* CORS headers for public cacheable responses (no per-user variation).
* Uses ACAO: * so Vercel CDN stores ONE cache entry per URL instead of one per
* unique Origin. Eliminates Vary: Origin cache fragmentation.
*/
export function getPublicCorsHeaders(): Record<string, string> {
return {
'Access-Control-Allow-Origin': '*',
'Access-Control-Allow-Methods': 'GET, POST, OPTIONS',
'Access-Control-Allow-Headers': 'Content-Type, Authorization, X-WorldMonitor-Key, X-Widget-Key, X-Pro-Key',
'Access-Control-Max-Age': '3600',
};
}

export function isDisallowedOrigin(req: Request): boolean {
const origin = req.headers.get('origin');
if (!origin) return false;
Expand Down
19 changes: 12 additions & 7 deletions server/gateway.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
*/

import { createRouter, type RouteDescriptor } from './router';
import { getCorsHeaders, isDisallowedOrigin, isAllowedOrigin } from './cors';
import { getCorsHeaders, getPublicCorsHeaders, isDisallowedOrigin, isAllowedOrigin } from './cors';
// @ts-expect-error — JS module, no declaration file
import { validateApiKey } from '../api/_api-key.js';
import { mapErrorToResponse } from './error-mapper';
Expand Down Expand Up @@ -370,13 +370,18 @@ export function createDomainGateway(
// bypassing auth entirely.
const reqOrigin = request.headers.get('origin') || '';
const cdnCache = isAllowedOrigin(reqOrigin) ? TIER_CDN_CACHE[tier] : null;
if (cdnCache) mergedHeaders.set('CDN-Cache-Control', cdnCache);
if (cdnCache) {
mergedHeaders.set('CDN-Cache-Control', cdnCache);
// For non-premium public GET routes: use ACAO: * to eliminate
// per-origin CDN cache fragmentation. Premium routes keep per-origin
// CORS to prevent cache-level auth bypass.
if (!PREMIUM_RPC_PATHS.has(pathname)) {
const pub = getPublicCorsHeaders();
for (const [k, v] of Object.entries(pub)) mergedHeaders.set(k, v);
mergedHeaders.delete('Vary');
}
Comment on lines +378 to +382
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 ACAO: * on CDN-cached responses bypasses isDisallowedOrigin() for browser clients

The code removed this warning from the previous version:

ACAO: * with no Vary would collapse all origins into one cache entry, bypassing isDisallowedOrigin() for cache hits — Vercel CDN serves s-maxage responses without re-invoking the function, so a disallowed origin could read a cached ACAO: * response.

That observation is still true after this PR. The flow is now:

  1. Request from allowed origin hits a non-premium route → edge function runs → isDisallowedOrigin() passes → response is cached by Vercel CDN with ACAO: *, Vary deleted.
  2. Request from a disallowed origin (e.g. https://evil.example.com) for the same URL → Vercel CDN cache hit → edge function never runs → isDisallowedOrigin() never executes → browser receives ACAO: * response and the cross-origin read succeeds.

This is intentional for "public" data routes, and the premium guard is in place, so this isn't a catastrophic auth bypass. But it is a meaningful change in the access-control surface: previously, disallowed origins could not read any route via the browser (they got a 403 from the function). Now they can read all non-premium CDN-cached routes.

If the intent is that non-premium routes should be globally readable, this is fine — but it's worth explicitly documenting that isDisallowedOrigin() is no longer enforced at the CDN cache layer for public routes, so future maintainers don't add sensitive-but-non-premium data under the assumption that the origin guard protects it.

}
mergedHeaders.set('X-Cache-Tier', tier);

// Keep per-origin ACAO (already set from corsHeaders above) and preserve Vary: Origin.
// ACAO: * with no Vary would collapse all origins into one cache entry, bypassing
// isDisallowedOrigin() for cache hits — Vercel CDN serves s-maxage responses without
// re-invoking the function, so a disallowed origin could read a cached ACAO: * response.
}
mergedHeaders.delete('X-No-Cache');
if (!new URL(request.url).searchParams.has('_debug')) {
Expand Down
33 changes: 33 additions & 0 deletions tests/cors-public-headers.test.mts
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import { describe, it } from 'node:test';
import assert from 'node:assert/strict';
import { readFileSync } from 'node:fs';
import { resolve, dirname } from 'node:path';
import { fileURLToPath } from 'node:url';

const __dirname = dirname(fileURLToPath(import.meta.url));
const corsSrc = readFileSync(resolve(__dirname, '..', 'server', 'cors.ts'), 'utf-8');

describe('getPublicCorsHeaders', () => {
it('returns ACAO: * without Vary header', () => {
// Verify the function body contains ACAO: * and does NOT include Vary
const fnMatch = corsSrc.match(/export function getPublicCorsHeaders[\s\S]*?^}/m);
assert.ok(fnMatch, 'getPublicCorsHeaders function not found in server/cors.ts');
const fnBody = fnMatch![0];
assert.match(fnBody, /'Access-Control-Allow-Origin':\s*'\*'/, 'Should set ACAO: *');
assert.doesNotMatch(fnBody, /Vary/, 'Should NOT include Vary header');
});

it('includes same Allow-Methods as getCorsHeaders', () => {
const pubMethods = corsSrc.match(/getPublicCorsHeaders[\s\S]*?Allow-Methods':\s*'([^']+)'/);
const perOriginMethods = corsSrc.match(/getCorsHeaders[\s\S]*?Allow-Methods':\s*'([^']+)'/);
assert.ok(pubMethods && perOriginMethods, 'Could not extract Allow-Methods from both functions');
assert.equal(pubMethods![1], perOriginMethods![1], 'Allow-Methods should match between public and per-origin');
});

it('includes same Allow-Headers as getCorsHeaders', () => {
const pubHeaders = corsSrc.match(/getPublicCorsHeaders[\s\S]*?Allow-Headers':\s*'([^']+)'/);
const perOriginHeaders = corsSrc.match(/getCorsHeaders[\s\S]*?Allow-Headers':\s*'([^']+)'/);
Comment on lines +7 to +29
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Tests inspect source text rather than exercising runtime behavior

Both tests/cors-public-headers.test.mts and tests/gateway-cdn-cors.test.mts use readFileSync + regex over the TypeScript source files instead of importing and calling the actual functions. This approach:

  • Will false-pass if the source text matches but the compiled output behaves differently (e.g., an environment variable override, dead branch, or build-time transformation changes the effective value).
  • Will false-fail if anyone switches quote style ('"), reformats, or renames a local variable, even though the runtime behavior is unchanged.
  • Doesn't catch mismatches between cors.ts and gateway.ts at integration level — only surface-level text patterns.

The api/_cors.test.mjs (also changed in this PR) shows the correct pattern: import the function, call it, and assert on the returned value. Bringing the new tests in line with that approach would give much stronger correctness guarantees:

// tests/cors-public-headers.test.mts — preferred approach
import { getPublicCorsHeaders, getCorsHeaders } from '../server/cors.ts';
import { describe, it } from 'node:test';
import assert from 'node:assert/strict';

describe('getPublicCorsHeaders', () => {
  it('returns ACAO: * without Vary header', () => {
    const h = getPublicCorsHeaders();
    assert.equal(h['Access-Control-Allow-Origin'], '*');
    assert.equal(h['Vary'], undefined);
  });
  // ...
});

The same applies to tests/gateway-cdn-cors.test.mts — the assertions there are checking for specific source text patterns (e.g. !PREMIUM_RPC_PATHS\.has\(pathname\)) that would be better covered by an integration test that exercises createDomainGateway end-to-end.

assert.ok(pubHeaders && perOriginHeaders, 'Could not extract Allow-Headers from both functions');
assert.equal(pubHeaders![1], perOriginHeaders![1], 'Allow-Headers should match between public and per-origin');
});
});
71 changes: 71 additions & 0 deletions tests/gateway-cdn-cors.test.mts
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import { describe, it } from 'node:test';
import assert from 'node:assert/strict';
import { readFileSync } from 'node:fs';
import { resolve, dirname } from 'node:path';
import { fileURLToPath } from 'node:url';

const __dirname = dirname(fileURLToPath(import.meta.url));
const src = readFileSync(resolve(__dirname, '..', 'server', 'gateway.ts'), 'utf-8');

// Extract the GET 200 cache block (between "response.status === 200 && request.method === 'GET'" and the next closing brace at indentation 4)
const cacheBlock = (() => {
const start = src.indexOf("response.status === 200 && request.method === 'GET' && response.body");
if (start === -1) return '';
return src.slice(start, start + 3000);
})();

describe('gateway CDN CORS policy', () => {
it('sets ACAO: * when CDN-Cache-Control is present and route is not premium', () => {
assert.match(
cacheBlock,
/getPublicCorsHeaders/,
'Cache block should call getPublicCorsHeaders for CDN-cached routes',
);
assert.match(
cacheBlock,
/!PREMIUM_RPC_PATHS\.has\(pathname\)/,
'Should guard public CORS behind premium path check',
);
});

it('deletes Vary header when CDN-Cache-Control is present and route is not premium', () => {
assert.match(
cacheBlock,
/mergedHeaders\.delete\('Vary'\)/,
'Should delete Vary header for non-premium CDN-cached routes',
);
});

it('preserves per-origin ACAO for premium routes even with CDN-Cache-Control', () => {
// The ACAO: * block is guarded by !PREMIUM_RPC_PATHS.has(pathname),
// so premium routes skip it and keep per-origin CORS from corsHeaders
assert.match(
cacheBlock,
/if\s*\(!PREMIUM_RPC_PATHS\.has\(pathname\)\)/,
'Public CORS should only apply when NOT a premium path',
);
});

it('preserves per-origin ACAO for no-store tier', () => {
// no-store tier has cdnCache = null, so the ACAO: * block never runs
const tierMap = src.match(/'no-store':\s*null/);
assert.ok(tierMap, 'no-store CDN tier should be null (no CDN-Cache-Control)');
});

it('preserves per-origin ACAO for POST requests', () => {
// The entire GET 200 block is guarded by request.method === 'GET'
assert.match(
cacheBlock,
/request\.method\s*===\s*'GET'/,
'CDN cache block only applies to GET requests',
);
});

it('imports getPublicCorsHeaders from cors module', () => {
assert.match(
src,
/import\s*\{[^}]*getPublicCorsHeaders[^}]*\}\s*from\s*'\.\/cors'/,
'gateway.ts should import getPublicCorsHeaders',
);
});
});
35 changes: 35 additions & 0 deletions tests/middleware-bot-cache.test.mts
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import { describe, it } from 'node:test';
import assert from 'node:assert/strict';
import { readFileSync } from 'node:fs';
import { resolve, dirname } from 'node:path';
import { fileURLToPath } from 'node:url';

const __dirname = dirname(fileURLToPath(import.meta.url));
const src = readFileSync(resolve(__dirname, '..', 'middleware.ts'), 'utf-8');

describe('middleware bot responses', () => {
it('bot UA returns 403 with Cache-Control header', () => {
// Find the bot UA block (BOT_UA.test(ua)) followed by its Response
const botBlock = src.match(/if\s*\(BOT_UA\.test\(ua\)\)[\s\S]*?return new Response[\s\S]*?\}\);/);
assert.ok(botBlock, 'BOT_UA response block not found');
assert.match(botBlock![0], /Cache-Control/, 'Bot 403 should include Cache-Control header');
assert.match(botBlock![0], /max-age=86400/, 'Bot 403 should cache for 24h');
});

it('short UA returns 403 with Cache-Control header', () => {
// Find the short UA block
const shortBlock = src.match(/ua\.length\s*<\s*10\)\s*\{[\s\S]*?return new Response[\s\S]*?\}\);/);
assert.ok(shortBlock, 'Short UA response block not found');
assert.match(shortBlock![0], /Cache-Control/, 'Short UA 403 should include Cache-Control header');
assert.match(shortBlock![0], /max-age=86400/, 'Short UA 403 should cache for 24h');
});

it('social preview bots are allowed on /api/story', () => {
assert.match(src, /SOCIAL_PREVIEW_UA\.test\(ua\)/, 'Should check for social preview bots');
assert.match(src, /SOCIAL_PREVIEW_PATHS\.has\(path\)/, 'Should allow social bots on specific paths');
});

it('public API paths bypass bot filtering', () => {
assert.match(src, /PUBLIC_API_PATHS\.has\(path\)/, 'Should bypass bot filter for public paths');
});
});
Loading