From 753c6766a0b28d66105dc1a6684f6ab7bd1b3f96 Mon Sep 17 00:00:00 2001 From: Greg Scher Date: Fri, 20 Mar 2026 16:49:58 -0400 Subject: [PATCH 1/5] Remove text truncation limits from Telegram posts Posts were being cut to 300 chars (source ingestion) and 150 chars (alert evaluation), losing valuable OSINT context. The sendMessage chunker already handles the 4096-char Telegram API limit. Co-Authored-By: Claude Opus 4.6 (1M context) --- apis/sources/telegram.mjs | 5 ++--- lib/alerts/telegram.mjs | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/apis/sources/telegram.mjs b/apis/sources/telegram.mjs index eb65384..c4f6e17 100644 --- a/apis/sources/telegram.mjs +++ b/apis/sources/telegram.mjs @@ -94,7 +94,7 @@ export async function getChat(chatId) { // Compact a Bot API message for briefing output function compactBotMessage(msg) { return { - text: (msg.text || msg.caption || '').slice(0, 300), + text: msg.text || msg.caption || '', date: msg.date ? new Date(msg.date * 1000).toISOString() : null, chat: msg.chat?.title || msg.chat?.username || 'unknown', views: msg.views || 0, @@ -171,8 +171,7 @@ function parseWebPreview(html, channelId) { .replace(/"/g, '"') .replace(/'/g, "'") .replace(/ /g, ' ') - .trim() - .slice(0, 300); + .trim(); } // Extract view count diff --git a/lib/alerts/telegram.mjs b/lib/alerts/telegram.mjs index 4c3ac3a..4288b02 100644 --- a/lib/alerts/telegram.mjs +++ b/lib/alerts/telegram.mjs @@ -681,7 +681,7 @@ Respond with ONLY valid JSON: if (osintSignals.length > 0) { sections.push('๐Ÿ“ก OSINT SIGNALS:\n' + osintSignals.map(s => { const post = s.item || s; - return ` [${post.channel || 'UNKNOWN'}] ${(post.text || s.reason || '').substring(0, 150)}`; + return ` [${post.channel || 'UNKNOWN'}] ${post.text || s.reason || ''}`; }).join('\n')); } From 2d166c20e81055842fb60c4be6fdadeb2e5ed3f1 Mon Sep 17 00:00:00 2001 From: Greg Scher Date: Sat, 21 Mar 2026 12:59:30 -0400 Subject: [PATCH 2/5] Remove remaining text truncation across delta engine, memory, and ideas MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The prior fix (753c676) only removed truncation at source ingestion and alert formatting. Signals were still being cut to 120 chars in the delta engine, 80 chars in memory snapshots, and 120 chars in the ideas LLM context โ€” so OSINT posts arrived at the alerter already truncated. Co-Authored-By: Claude Opus 4.6 (1M context) --- lib/alerts/telegram.mjs | 7 +++++-- lib/delta/engine.mjs | 2 +- lib/delta/memory.mjs | 2 +- lib/llm/ideas.mjs | 2 +- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/lib/alerts/telegram.mjs b/lib/alerts/telegram.mjs index 4288b02..36a06cf 100644 --- a/lib/alerts/telegram.mjs +++ b/lib/alerts/telegram.mjs @@ -271,7 +271,7 @@ export class TelegramAlerter { headline: `OSINT Surge: ${osintNew.length} New Urgent Posts`, reason: `${osintNew.length} new urgent OSINT signals detected. Elevated conflict reporting tempo.`, actionable: 'Review OSINT stream for pattern. Cross-check with satellite and ACLED data.', - signals: osintNew.map(s => (s.text || '').substring(0, 40)).slice(0, 3), + signals: osintNew.map(s => s.text || s.label || s.key).slice(0, 5), crossCorrelation: 'telegram OSINT', }; } @@ -728,7 +728,10 @@ Respond with ONLY valid JSON: } if (evaluation.signals?.length) { - lines.push('', `Signals: ${evaluation.signals.join(' ยท ')}`); + lines.push('', `*Signals:*`); + for (const sig of evaluation.signals) { + lines.push(`โ€ข ${sig}`); + } } lines.push('', `_${new Date().toISOString().replace('T', ' ').substring(0, 19)} UTC_`); diff --git a/lib/delta/engine.mjs b/lib/delta/engine.mjs index d42a958..e285473 100644 --- a/lib/delta/engine.mjs +++ b/lib/delta/engine.mjs @@ -161,7 +161,7 @@ export function computeDelta(current, previous, thresholdOverrides = {}) { if (hash && !prevHashes.has(hash)) { signals.new.push({ key: `tg_urgent:${hash}`, - text: post.text?.substring(0, 120), + text: post.text, item: post, reason: 'New urgent OSINT post', }); diff --git a/lib/delta/memory.mjs b/lib/delta/memory.mjs index 238c014..9d3c3b3 100644 --- a/lib/delta/memory.mjs +++ b/lib/delta/memory.mjs @@ -199,7 +199,7 @@ export class MemoryManager { bls: data.bls, treasury: data.treasury, gscpi: data.gscpi, - tg: { posts: data.tg?.posts, urgent: (data.tg?.urgent || []).map(p => ({ text: p.text?.substring(0, 80), date: p.date })) }, + tg: { posts: data.tg?.posts, urgent: (data.tg?.urgent || []).map(p => ({ text: p.text, date: p.date })) }, thermal: (data.thermal || []).map(t => ({ region: t.region, det: t.det, night: t.night, hc: t.hc })), air: (data.air || []).map(a => ({ region: a.region, total: a.total })), nuke: (data.nuke || []).map(n => ({ site: n.site, anom: n.anom, cpm: n.cpm })), diff --git a/lib/llm/ideas.mjs b/lib/llm/ideas.mjs index 08e6603..12a0fc1 100644 --- a/lib/llm/ideas.mjs +++ b/lib/llm/ideas.mjs @@ -91,7 +91,7 @@ function compactSweepForLLM(data, delta, previousIdeas) { // Geopolitical signals const urgentPosts = (data.tg?.urgent || []).slice(0, 5); if (urgentPosts.length) { - sections.push(`URGENT_OSINT:\n${urgentPosts.map(p => `- ${(p.text || '').substring(0, 120)}`).join('\n')}`); + sections.push(`URGENT_OSINT:\n${urgentPosts.map(p => `- ${p.text || ''}`).join('\n')}`); } // Thermal / fire detections From 31c305cbbb8ce729733a1c6f7a2d2fba6799ac63 Mon Sep 17 00:00:00 2001 From: Greg Scher Date: Mon, 23 Mar 2026 12:57:37 -0400 Subject: [PATCH 3/5] Escape Markdown in alert signals and cap OSINT text in ideas prompt Addresses PR review: escape Markdown-sensitive characters in _formatTieredAlert signal bullets to prevent Telegram Bot API rejections, and add a 1500-char budget for URGENT_OSINT in compactSweepForLLM to bound prompt size while keeping full text upstream. Co-Authored-By: Claude Opus 4.6 (1M context) --- lib/alerts/telegram.mjs | 7 ++++++- lib/llm/ideas.mjs | 14 ++++++++++++-- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/lib/alerts/telegram.mjs b/lib/alerts/telegram.mjs index 36a06cf..1a7f6ae 100644 --- a/lib/alerts/telegram.mjs +++ b/lib/alerts/telegram.mjs @@ -730,7 +730,7 @@ Respond with ONLY valid JSON: if (evaluation.signals?.length) { lines.push('', `*Signals:*`); for (const sig of evaluation.signals) { - lines.push(`โ€ข ${sig}`); + lines.push(`โ€ข ${escapeMd(sig)}`); } } @@ -742,6 +742,11 @@ Respond with ONLY valid JSON: // โ”€โ”€โ”€ Helpers โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +function escapeMd(text) { + if (!text) return ''; + return text.replace(/([_*\[\]()~`>#+\-=|{}.!\\])/g, '\\$1'); +} + function parseJSON(text) { if (!text) return null; let cleaned = text.trim(); diff --git a/lib/llm/ideas.mjs b/lib/llm/ideas.mjs index 12a0fc1..1e78e23 100644 --- a/lib/llm/ideas.mjs +++ b/lib/llm/ideas.mjs @@ -88,10 +88,20 @@ function compactSweepForLLM(data, delta, previousIdeas) { sections.push(`SUPPLY_CHAIN: GSCPI=${data.gscpi.value} (${data.gscpi.interpretation})`); } - // Geopolitical signals + // Geopolitical signals (cap total OSINT text to ~1500 chars to keep prompt compact) const urgentPosts = (data.tg?.urgent || []).slice(0, 5); if (urgentPosts.length) { - sections.push(`URGENT_OSINT:\n${urgentPosts.map(p => `- ${p.text || ''}`).join('\n')}`); + const MAX_OSINT_CHARS = 1500; + let remaining = MAX_OSINT_CHARS; + const lines = []; + for (const p of urgentPosts) { + const text = p.text || ''; + if (remaining <= 0) break; + const trimmed = text.length > remaining ? text.substring(0, remaining) + 'โ€ฆ' : text; + lines.push(`- ${trimmed}`); + remaining -= trimmed.length; + } + sections.push(`URGENT_OSINT:\n${lines.join('\n')}`); } // Thermal / fire detections From b7322f1c7e7ee34984e65609ee174b4f2fdc7613 Mon Sep 17 00:00:00 2001 From: Greg Scher Date: Mon, 23 Mar 2026 13:01:32 -0400 Subject: [PATCH 4/5] Fix HTML entity decoding and broaden OSINT dedup window - Replace single ' handler with generic numeric/hex entity decoder so ' and other unpadded entities are properly converted - Dedup urgent OSINT posts against all hot memory runs (last 3 sweeps) instead of only the previous sweep, preventing posts that drop out of one sweep from reappearing as "new" in the next Co-Authored-By: Claude Opus 4.6 (1M context) --- apis/sources/telegram.mjs | 5 ++++- lib/delta/engine.mjs | 7 +++++-- lib/delta/memory.mjs | 4 +++- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/apis/sources/telegram.mjs b/apis/sources/telegram.mjs index c4f6e17..5157d3d 100644 --- a/apis/sources/telegram.mjs +++ b/apis/sources/telegram.mjs @@ -169,7 +169,10 @@ function parseWebPreview(html, channelId) { .replace(/</g, '<') .replace(/>/g, '>') .replace(/"/g, '"') - .replace(/'/g, "'") + .replace(/�*39;/g, "'") + .replace(/�*27;/gi, "'") + .replace(/&#(\d+);/g, (_, n) => String.fromCharCode(Number(n))) + .replace(/&#x([0-9a-f]+);/gi, (_, h) => String.fromCharCode(parseInt(h, 16))) .replace(/ /g, ' ') .trim(); } diff --git a/lib/delta/engine.mjs b/lib/delta/engine.mjs index e285473..c98d50f 100644 --- a/lib/delta/engine.mjs +++ b/lib/delta/engine.mjs @@ -90,7 +90,7 @@ function contentHash(text) { * @param {object|null} previous - previous sweep's synthesized data (null on first run) * @param {object} [thresholdOverrides] - optional: { numeric: {...}, count: {...} } */ -export function computeDelta(current, previous, thresholdOverrides = {}) { +export function computeDelta(current, previous, thresholdOverrides = {}, priorRuns = []) { if (!previous) return null; if (!current) return null; @@ -152,8 +152,11 @@ export function computeDelta(current, previous, thresholdOverrides = {}) { // โ”€โ”€โ”€ New urgent Telegram posts (semantic dedup) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + // Dedup against all recent runs (not just the last one) to catch posts that + // drop out of one sweep but reappear in a later one. + const sources = priorRuns.length > 0 ? priorRuns : [previous]; const prevHashes = new Set( - (previous.tg?.urgent || []).map(p => contentHash(p.text)) + sources.flatMap(run => (run?.tg?.urgent || []).map(p => contentHash(p.text))) ); for (const post of (current.tg?.urgent || [])) { diff --git a/lib/delta/memory.mjs b/lib/delta/memory.mjs index 9d3c3b3..a551ebb 100644 --- a/lib/delta/memory.mjs +++ b/lib/delta/memory.mjs @@ -74,7 +74,9 @@ export class MemoryManager { // Add a new run to hot memory addRun(synthesizedData) { const previous = this.getLastRun(); - const delta = computeDelta(synthesizedData, previous); + // Collect urgent post hashes from all hot runs for broader dedup window + const priorRuns = this.hot.runs.map(r => r.data); + const delta = computeDelta(synthesizedData, previous, {}, priorRuns); // Compact the data for storage (strip large arrays) const compact = this._compactForStorage(synthesizedData); From 5c08355e38d30045d72a0dfb37d72913d21c0d66 Mon Sep 17 00:00:00 2001 From: calesthio Date: Tue, 24 Mar 2026 18:48:55 -0700 Subject: [PATCH 5/5] Fix Telegram dedup identity and legacy Markdown escaping --- lib/alerts/telegram.mjs | 4 +++- lib/delta/engine.mjs | 34 ++++++++++++++++++++++++++++------ lib/delta/memory.mjs | 10 +++++++++- 3 files changed, 40 insertions(+), 8 deletions(-) diff --git a/lib/alerts/telegram.mjs b/lib/alerts/telegram.mjs index 1a7f6ae..580a902 100644 --- a/lib/alerts/telegram.mjs +++ b/lib/alerts/telegram.mjs @@ -744,7 +744,9 @@ Respond with ONLY valid JSON: function escapeMd(text) { if (!text) return ''; - return text.replace(/([_*\[\]()~`>#+\-=|{}.!\\])/g, '\\$1'); + // The bot sends alerts with legacy Markdown parse mode, not MarkdownV2. + // Escape only the characters that legacy Markdown actually treats as markup. + return text.replace(/([_*`\[])/g, '\\$1'); } function parseJSON(text) { diff --git a/lib/delta/engine.mjs b/lib/delta/engine.mjs index c98d50f..ec45ddb 100644 --- a/lib/delta/engine.mjs +++ b/lib/delta/engine.mjs @@ -66,9 +66,9 @@ const RISK_KEYS = ['vix', 'hy_spread', 'urgent_posts', 'conflict_events', 'therm // โ”€โ”€โ”€ Semantic Hashing for Telegram Posts โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ /** - * Produce a normalized hash of a post's content. - * Strips timestamps, normalizes numbers, lowercases โ€” so "BREAKING: 5 missiles at 14:32" - * and "Breaking: 7 missiles at 15:01" produce the same hash (both are "missile strike" signals). + * Produce a normalized semantic hash of a post's content. + * This is intentionally lossy and is only safe as a fallback when a stable + * post identity is unavailable. */ function contentHash(text) { if (!text) return ''; @@ -83,12 +83,32 @@ function contentHash(text) { return createHash('sha256').update(normalized).digest('hex').substring(0, 12); } +function stablePostKey(post) { + if (!post) return ''; + + const sourceId = post.postId || post.messageId || ''; + const channelId = post.channel || post.chat || ''; + const date = post.date || ''; + const text = (post.text || '').trim().substring(0, 200); + + if (sourceId) return `id:${sourceId}`; + if (channelId && date) { + return createHash('sha256') + .update(`${channelId}|${date}|${text}`) + .digest('hex') + .substring(0, 16); + } + + return `semantic:${contentHash(post.text)}`; +} + // โ”€โ”€โ”€ Core Delta Computation โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ /** * @param {object} current - current sweep's synthesized data * @param {object|null} previous - previous sweep's synthesized data (null on first run) * @param {object} [thresholdOverrides] - optional: { numeric: {...}, count: {...} } + * @param {Array} [priorRuns] - optional compacted prior runs for broader dedup */ export function computeDelta(current, previous, thresholdOverrides = {}, priorRuns = []) { if (!previous) return null; @@ -153,14 +173,16 @@ export function computeDelta(current, previous, thresholdOverrides = {}, priorRu // โ”€โ”€โ”€ New urgent Telegram posts (semantic dedup) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ // Dedup against all recent runs (not just the last one) to catch posts that - // drop out of one sweep but reappear in a later one. + // drop out of one sweep but reappear in a later one. Use stable post identity + // where possible so updated posts are not collapsed into earlier alerts just + // because their text is semantically similar. const sources = priorRuns.length > 0 ? priorRuns : [previous]; const prevHashes = new Set( - sources.flatMap(run => (run?.tg?.urgent || []).map(p => contentHash(p.text))) + sources.flatMap(run => (run?.tg?.urgent || []).map(stablePostKey)).filter(Boolean) ); for (const post of (current.tg?.urgent || [])) { - const hash = contentHash(post.text); + const hash = stablePostKey(post); if (hash && !prevHashes.has(hash)) { signals.new.push({ key: `tg_urgent:${hash}`, diff --git a/lib/delta/memory.mjs b/lib/delta/memory.mjs index a551ebb..66986f0 100644 --- a/lib/delta/memory.mjs +++ b/lib/delta/memory.mjs @@ -201,7 +201,15 @@ export class MemoryManager { bls: data.bls, treasury: data.treasury, gscpi: data.gscpi, - tg: { posts: data.tg?.posts, urgent: (data.tg?.urgent || []).map(p => ({ text: p.text, date: p.date })) }, + tg: { + posts: data.tg?.posts, + urgent: (data.tg?.urgent || []).map(p => ({ + text: p.text, + date: p.date, + channel: p.channel || p.chat || null, + postId: p.postId || null, + })), + }, thermal: (data.thermal || []).map(t => ({ region: t.region, det: t.det, night: t.night, hc: t.hc })), air: (data.air || []).map(a => ({ region: a.region, total: a.total })), nuke: (data.nuke || []).map(n => ({ site: n.site, anom: n.anom, cpm: n.cpm })),