diff --git a/Dockerfile.relay b/Dockerfile.relay index 52ff79b0d7..275db28777 100644 --- a/Dockerfile.relay +++ b/Dockerfile.relay @@ -10,7 +10,8 @@ FROM node:22-alpine # curl required by OREF polling (Node.js JA3 fingerprint blocked by Akamai; curl passes) -RUN apk add --no-cache curl +RUN apk add --no-cache curl && \ + addgroup -S appgroup && adduser -S appuser -G appgroup WORKDIR /app @@ -27,6 +28,10 @@ COPY shared/ ./shared/ # Data files required by the relay (telegram-channels.json, etc.) COPY data/ ./data/ +RUN chown -R appuser:appgroup /app + +USER appuser + EXPOSE 3004 HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \ diff --git a/api/_relay.js b/api/_relay.js index b11008776f..c88403c28e 100644 --- a/api/_relay.js +++ b/api/_relay.js @@ -7,7 +7,13 @@ import { jsonResponse } from './_json-response.js'; export function getRelayBaseUrl() { const relayUrl = process.env.WS_RELAY_URL; if (!relayUrl) return null; - return relayUrl.replace('wss://', 'https://').replace('ws://', 'http://').replace(/\/$/, ''); + // Always upgrade to HTTPS — cleartext relay connections are not permitted. + // Normalize any WebSocket scheme to https://. + const httpUrl = relayUrl.replace(/^wss:\/\//, 'https://'); + // If the env var was already https:// or got converted above, we're done. + // Otherwise force https:// for any remaining non-secure scheme. + const secured = httpUrl.startsWith('https://') ? httpUrl : 'https://' + httpUrl.replace(/^[a-z]+:\/\//, ''); + return secured.replace(/\/$/, ''); } export function getRelayHeaders(baseHeaders = {}) { @@ -115,9 +121,9 @@ export function createRelayHandler(cfg) { } catch (error) { if (cfg.fallback) return cfg.fallback(req, corsHeaders); const isTimeout = error?.name === 'AbortError'; + console.error('[relay] error:', error?.message || String(error)); return jsonResponse({ error: isTimeout ? 'Relay timeout' : 'Relay request failed', - details: error?.message || String(error), }, isTimeout ? 504 : 502, corsHeaders); } }; diff --git a/api/mcp-proxy.js b/api/mcp-proxy.js index a5af1e1dfd..a89e8226a5 100644 --- a/api/mcp-proxy.js +++ b/api/mcp-proxy.js @@ -1,4 +1,5 @@ import { getCorsHeaders, isDisallowedOrigin } from './_cors.js'; +import { checkRateLimit } from './_rate-limit.js'; import { jsonResponse } from './_json-response.js'; export const config = { runtime: 'edge' }; @@ -11,11 +12,16 @@ const MCP_PROTOCOL_VERSION = '2025-03-26'; const BLOCKED_HOST_PATTERNS = [ /^localhost$/i, /^127\./, + /^0\.0\.0\.0$/, // unspecified IPv4 — routes to loopback on many systems + /^0+$/, // zero in various forms /^10\./, /^172\.(1[6-9]|2\d|3[01])\./, /^192\.168\./, - /^169\.254\./, // link-local + cloud metadata (AWS/GCP/Azure) + /^169\.254\./, // link-local + cloud metadata (AWS/GCP/Azure) /^::1$/, + /^::$/, // unspecified IPv6 + /^::ffff:/i, // IPv4-mapped IPv6 (e.g. ::ffff:127.0.0.1) + /^\[/, // bracket-wrapped IPv6 in hostname /^fd[0-9a-f]{2}:/i, /^fe80:/i, ]; @@ -42,6 +48,31 @@ function validateServerUrl(raw) { return url; } +// Headers that must not be overridden by user-supplied custom headers. +// Allowing these to be set by the client could lead to SSRF (Host), auth +// hijacking, or request smuggling via hop-by-hop headers. +const BLOCKED_HEADER_NAMES = new Set([ + 'host', + 'cookie', + 'set-cookie', + 'transfer-encoding', + 'content-length', + 'connection', + 'keep-alive', + 'te', + 'trailer', + 'upgrade', + 'proxy-authorization', + 'proxy-authenticate', + 'via', + 'forwarded', + 'x-forwarded-for', + 'x-forwarded-host', + 'x-forwarded-proto', + 'x-real-ip', + 'cf-connecting-ip', +]); + function buildHeaders(customHeaders) { const h = { 'Content-Type': 'application/json', @@ -54,7 +85,8 @@ function buildHeaders(customHeaders) { // Strip CRLF to prevent header injection const safeKey = k.replace(/[\r\n]/g, ''); const safeVal = v.replace(/[\r\n]/g, ''); - if (safeKey) h[safeKey] = safeVal; + if (!safeKey || BLOCKED_HEADER_NAMES.has(safeKey.toLowerCase())) continue; + h[safeKey] = safeVal; } } } @@ -334,6 +366,9 @@ export default async function handler(req) { if (req.method === 'OPTIONS') return new Response(null, { status: 204, headers: cors }); + const rateLimitResponse = await checkRateLimit(req, cors); + if (rateLimitResponse) return rateLimitResponse; + try { if (req.method === 'GET') { const url = new URL(req.url); @@ -369,7 +404,9 @@ export default async function handler(req) { } catch (err) { const msg = err instanceof Error ? err.message : String(err); const isTimeout = msg.includes('TimeoutError') || msg.includes('timed out'); - // Return 422 (not 502) so Cloudflare proxy does not replace our JSON body with its own HTML error page - return jsonResponse({ error: isTimeout ? 'MCP server timed out' : msg }, isTimeout ? 504 : 422, cors); + console.error('[mcp-proxy] error:', msg); + // Return 422 (not 502) so Cloudflare proxy does not replace our JSON body with its own HTML error page. + // Avoid leaking internal error details to the client. + return jsonResponse({ error: isTimeout ? 'MCP server timed out' : 'MCP request failed' }, isTimeout ? 504 : 422, cors); } } diff --git a/api/rss-proxy.js b/api/rss-proxy.js index 18247658bd..fcf8048ea0 100644 --- a/api/rss-proxy.js +++ b/api/rss-proxy.js @@ -184,8 +184,6 @@ export default async function handler(req) { console.error('RSS proxy error:', feedUrl, error.message); return jsonResponse({ error: isTimeout ? 'Feed timeout' : 'Failed to fetch feed', - details: error.message, - url: feedUrl }, isTimeout ? 504 : 502, corsHeaders); } } diff --git a/server/_shared/relay.ts b/server/_shared/relay.ts index e41d1464cc..1ac99fe9d2 100644 --- a/server/_shared/relay.ts +++ b/server/_shared/relay.ts @@ -3,7 +3,10 @@ import { CHROME_UA } from './constants'; export function getRelayBaseUrl(): string | null { const relayUrl = process.env.WS_RELAY_URL; if (!relayUrl) return null; - return relayUrl.replace(/^ws(s?):\/\//, 'http$1://').replace(/\/$/, ''); + // Always upgrade to HTTPS — cleartext relay connections are not permitted. + const httpUrl = relayUrl.replace(/^wss:\/\//, 'https://'); + const secured = httpUrl.startsWith('https://') ? httpUrl : 'https://' + httpUrl.replace(/^[a-z]+:\/\//, ''); + return secured.replace(/\/$/, ''); } export function getRelayHeaders(extra: Record = {}): Record { diff --git a/server/gateway.ts b/server/gateway.ts index 51b9bcbf4e..d96aa51def 100644 --- a/server/gateway.ts +++ b/server/gateway.ts @@ -220,7 +220,9 @@ export function createDomainGateway( try { corsHeaders = getCorsHeaders(request); } catch { - corsHeaders = { 'Access-Control-Allow-Origin': '*' }; + // Never fall back to wildcard CORS — that would bypass the origin allowlist. + // Use the hardcoded production origin as a safe default. + corsHeaders = { 'Access-Control-Allow-Origin': 'https://worldmonitor.app', 'Vary': 'Origin' }; } // OPTIONS preflight diff --git a/server/worldmonitor/news/v1/_classifier.ts b/server/worldmonitor/news/v1/_classifier.ts index ce2df73b24..1c0200373c 100644 --- a/server/worldmonitor/news/v1/_classifier.ts +++ b/server/worldmonitor/news/v1/_classifier.ts @@ -181,15 +181,33 @@ const SHORT_KEYWORDS = new Set([ const keywordRegexCache = new Map(); -function getKeywordRegex(kw: string): RegExp { - let re = keywordRegexCache.get(kw); - if (!re) { - re = SHORT_KEYWORDS.has(kw) - ? new RegExp(`\\b${kw.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\b`) - : new RegExp(kw.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')); - keywordRegexCache.set(kw, re); +function escapeRegExp(s: string): string { + return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); +} + +// Pre-build all keyword regexes at module load time so that no RegExp is +// constructed from runtime strings during request handling (eliminates ReDoS surface). +const ALL_KEYWORD_MAPS: KeywordMap[] = [ + CRITICAL_KEYWORDS, HIGH_KEYWORDS, MEDIUM_KEYWORDS, LOW_KEYWORDS, + TECH_HIGH_KEYWORDS, TECH_MEDIUM_KEYWORDS, TECH_LOW_KEYWORDS, +]; +for (const map of ALL_KEYWORD_MAPS) { + for (const kw of Object.keys(map)) { + if (!keywordRegexCache.has(kw)) { + const escaped = escapeRegExp(kw); + keywordRegexCache.set(kw, SHORT_KEYWORDS.has(kw) + ? new RegExp(`\\b${escaped}\\b`) + : new RegExp(escaped)); + } } - return re; +} + +function keywordMatches(kw: string, text: string): boolean { + const cached = keywordRegexCache.get(kw); + if (cached) return cached.test(text); + // Fallback for unknown keywords (should not happen with hardcoded maps). + // Use plain string search — no dynamic RegExp construction at runtime. + return text.includes(kw); } function matchKeywords( @@ -197,7 +215,7 @@ function matchKeywords( keywords: KeywordMap ): { keyword: string; category: EventCategory } | null { for (const [kw, cat] of Object.entries(keywords)) { - if (getKeywordRegex(kw).test(titleLower)) { + if (keywordMatches(kw, titleLower)) { return { keyword: kw, category: cat }; } } diff --git a/server/worldmonitor/news/v1/list-feed-digest.ts b/server/worldmonitor/news/v1/list-feed-digest.ts index 2b2f7fedc7..a2507c6ca9 100644 --- a/server/worldmonitor/news/v1/list-feed-digest.ts +++ b/server/worldmonitor/news/v1/list-feed-digest.ts @@ -182,15 +182,19 @@ for (const tag of KNOWN_TAGS) { }); } +/** + * Extract the text content of an XML tag. Only pre-cached tag names (see + * KNOWN_TAGS) are accepted — unknown tags return '' immediately. This avoids + * constructing RegExp from runtime strings, eliminating any ReDoS risk. + */ function extractTag(xml: string, tag: string): string { const cached = TAG_REGEX_CACHE.get(tag); - const cdataRe = cached?.cdata ?? new RegExp(`<${tag}[^>]*>\\s*\\s*<\\/${tag}>`, 'i'); - const plainRe = cached?.plain ?? new RegExp(`<${tag}[^>]*>([^<]*)<\\/${tag}>`, 'i'); + if (!cached) return ''; - const cdataMatch = xml.match(cdataRe); + const cdataMatch = xml.match(cached.cdata); if (cdataMatch) return cdataMatch[1]!.trim(); - const match = xml.match(plainRe); + const match = xml.match(cached.plain); return match ? decodeXmlEntities(match[1]!.trim()) : ''; } diff --git a/src/components/DeductionPanel.ts b/src/components/DeductionPanel.ts index b7314db75b..e3d5a87e71 100644 --- a/src/components/DeductionPanel.ts +++ b/src/components/DeductionPanel.ts @@ -1,5 +1,5 @@ import { Panel } from './Panel'; -import { getRpcBaseUrl } from '@/services/rpc-client'; +import { getRpcBaseUrl } from '@/services/rpc-client'; import { IntelligenceServiceClient } from '@/generated/client/worldmonitor/intelligence/v1/service_client'; import { h, replaceChildren } from '@/utils/dom-utils'; import { marked } from 'marked'; @@ -129,7 +129,13 @@ export class DeductionPanel extends Panel { if (resp.analysis) { const parsed = await marked.parse(resp.analysis); if (!this.element?.isConnected) return; - this.resultContainer.innerHTML = DOMPurify.sanitize(parsed); + this.resultContainer.innerHTML = DOMPurify.sanitize(parsed, { + ALLOWED_TAGS: ['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'ul', 'ol', 'li', + 'strong', 'em', 'b', 'i', 'br', 'hr', 'code', 'pre', 'blockquote', + 'table', 'thead', 'tbody', 'tr', 'th', 'td', 'span', 'div', 'small'], + ALLOWED_ATTR: ['class'], + ALLOW_DATA_ATTR: false, + }); const meta = h('div', { style: 'margin-top: 12px; font-size: 0.75em; color: #888;' }, `Generated by ${resp.provider || 'AI'}${resp.model ? ` (${resp.model})` : ''}` diff --git a/src/components/LiveWebcamsPanel.ts b/src/components/LiveWebcamsPanel.ts index 3a8b33cbf5..fb996079fe 100644 --- a/src/components/LiveWebcamsPanel.ts +++ b/src/components/LiveWebcamsPanel.ts @@ -415,10 +415,20 @@ export class LiveWebcamsPanel extends Panel { container.appendChild(overlay); } + private static readonly TRUSTED_ORIGINS = new Set([ + 'https://www.youtube.com', + 'https://www.youtube-nocookie.com', + 'https://webcams.windy.com', + ]); + private handleEmbedMessage(e: MessageEvent): void { const iframe = this.findIframeBySource(e.source); if (!iframe) return; + // Validate origin: only accept messages from YouTube, Windy, or the local sidecar. + const localOrigin = isDesktopRuntime() ? `http://localhost:${getLocalApiPort()}` : null; + if (!LiveWebcamsPanel.TRUSTED_ORIGINS.has(e.origin) && e.origin !== localOrigin) return; + // Desktop sidecar posts { type: 'yt-ready' | 'yt-state' | 'yt-error' } const msg = e.data as { type?: string; state?: number; code?: number; event?: string; info?: unknown } | string | null; diff --git a/src/utils/widget-sanitizer.ts b/src/utils/widget-sanitizer.ts index b0433f6f3d..c3d8b713b9 100644 --- a/src/utils/widget-sanitizer.ts +++ b/src/utils/widget-sanitizer.ts @@ -44,7 +44,9 @@ export function wrapWidgetHtml(html: string, extraClass = ''): string { function escapeSrcdoc(str: string): string { return str .replace(/&/g, '&') - .replace(/"/g, '"'); + .replace(/"/g, '"') + .replace(//g, '>'); } export function wrapProWidgetHtml(bodyContent: string): string { diff --git a/tests/mcp-proxy.test.mjs b/tests/mcp-proxy.test.mjs index b36dd5bd2d..c1df4b491c 100644 --- a/tests/mcp-proxy.test.mjs +++ b/tests/mcp-proxy.test.mjs @@ -183,7 +183,7 @@ describe('api/mcp-proxy', () => { const res = await handler(makeGetRequest({ serverUrl: 'https://mcp.example.com/mcp' })); assert.equal(res.status, 422); const data = await res.json(); - assert.match(data.error, /Method not found/i); + assert.match(data.error, /MCP request failed/i); }); it('returns 504 on fetch timeout', async () => { @@ -303,7 +303,7 @@ describe('api/mcp-proxy', () => { })); assert.equal(res.status, 422); const data = await res.json(); - assert.match(data.error, /Unknown tool/i); + assert.match(data.error, /MCP request failed/i); }); it('returns 504 on timeout during tool call', async () => { @@ -390,7 +390,8 @@ describe('api/mcp-proxy', () => { const res = await handler(makeGetRequest({ serverUrl: 'https://mcp.example.com/sse' })); assert.equal(res.status, 422); const data = await res.json(); - assert.match(data.error, /blocked|SSRF|endpoint/i); + // Error message is intentionally generic to avoid leaking internals + assert.match(data.error, /MCP request failed|blocked|SSRF|endpoint/i); }); }); diff --git a/tests/relay-helper.test.mjs b/tests/relay-helper.test.mjs index 76461fb007..39e0d7db4e 100644 --- a/tests/relay-helper.test.mjs +++ b/tests/relay-helper.test.mjs @@ -56,9 +56,9 @@ describe('getRelayBaseUrl', () => { assert.equal(getRelayBaseUrl(), 'https://relay.example.com'); }); - it('converts ws:// to http://', () => { + it('converts insecure websocket scheme to https://', () => { process.env.WS_RELAY_URL = 'ws://relay.example.com'; - assert.equal(getRelayBaseUrl(), 'http://relay.example.com'); + assert.equal(getRelayBaseUrl(), 'https://relay.example.com'); }); it('strips trailing slash', () => { @@ -310,7 +310,9 @@ describe('createRelayHandler', () => { assert.equal(res.status, 502); const body = await res.json(); assert.equal(body.error, 'Relay request failed'); - assert.equal(body.details, 'Connection refused'); + // Internal error details are intentionally omitted from the response + // to prevent information leakage — they are logged server-side only. + assert.equal(body.details, undefined); }); it('calls fallback when relay unavailable', async () => { diff --git a/tests/shared-relay.test.mjs b/tests/shared-relay.test.mjs index 16b949e86e..cbda615773 100644 --- a/tests/shared-relay.test.mjs +++ b/tests/shared-relay.test.mjs @@ -45,7 +45,9 @@ function loadRelayFunctions() { const getRelayBaseUrl = function () { const relayUrl = process.env.WS_RELAY_URL; if (!relayUrl) return null; - return relayUrl.replace(/^ws(s?):\/\//, 'http$1://').replace(/\/$/, ''); + const httpUrl = relayUrl.replace(/^wss:\/\//, 'https://'); + const secured = httpUrl.startsWith('https://') ? httpUrl : 'https://' + httpUrl.replace(/^[a-z]+:\/\//, ''); + return secured.replace(/\/$/, ''); }; const getRelayHeaders = function (extra = {}) { @@ -65,7 +67,7 @@ function loadRelayFunctions() { }; // Verify source file still matches expected logic shape - assert.ok(src.includes('replace(/^ws(s?):\\/\\//'), 'relay.ts must use single-regex wss:// transform'); + assert.ok(src.includes('wss:'), 'relay.ts must handle wss:// transform'); assert.ok(src.includes('...extra'), 'relay.ts must spread extra before auth headers'); assert.ok(src.includes("relayHeader !== 'authorization'"), 'relay.ts must guard against Authorization header collision'); @@ -89,9 +91,9 @@ describe('getRelayBaseUrl', () => { }); }); - it('transforms ws:// to http://', () => { + it('transforms insecure websocket scheme to https://', () => { withEnv({ WS_RELAY_URL: 'ws://relay.example.com' }, () => { - assert.equal(getRelayBaseUrl(), 'http://relay.example.com'); + assert.equal(getRelayBaseUrl(), 'https://relay.example.com'); }); }); diff --git a/vercel.json b/vercel.json index 7c1bf94dad..eb1e6db864 100644 --- a/vercel.json +++ b/vercel.json @@ -13,9 +13,7 @@ { "source": "/api/(.*)", "headers": [ - { "key": "Access-Control-Allow-Origin", "value": "*" }, - { "key": "Access-Control-Allow-Methods", "value": "GET, POST, OPTIONS" }, - { "key": "Access-Control-Allow-Headers", "value": "Content-Type, Authorization, X-WorldMonitor-Key" } + { "key": "X-Content-Type-Options", "value": "nosniff" } ] }, {