diff --git a/apis/sources/telegram.mjs b/apis/sources/telegram.mjs index eb65384..5157d3d 100644 --- a/apis/sources/telegram.mjs +++ b/apis/sources/telegram.mjs @@ -94,7 +94,7 @@ export async function getChat(chatId) { // Compact a Bot API message for briefing output function compactBotMessage(msg) { return { - text: (msg.text || msg.caption || '').slice(0, 300), + text: msg.text || msg.caption || '', date: msg.date ? new Date(msg.date * 1000).toISOString() : null, chat: msg.chat?.title || msg.chat?.username || 'unknown', views: msg.views || 0, @@ -169,10 +169,12 @@ function parseWebPreview(html, channelId) { .replace(/</g, '<') .replace(/>/g, '>') .replace(/"/g, '"') - .replace(/'/g, "'") + .replace(/�*39;/g, "'") + .replace(/�*27;/gi, "'") + .replace(/&#(\d+);/g, (_, n) => String.fromCharCode(Number(n))) + .replace(/&#x([0-9a-f]+);/gi, (_, h) => String.fromCharCode(parseInt(h, 16))) .replace(/ /g, ' ') - .trim() - .slice(0, 300); + .trim(); } // Extract view count diff --git a/lib/alerts/telegram.mjs b/lib/alerts/telegram.mjs index 4c3ac3a..580a902 100644 --- a/lib/alerts/telegram.mjs +++ b/lib/alerts/telegram.mjs @@ -271,7 +271,7 @@ export class TelegramAlerter { headline: `OSINT Surge: ${osintNew.length} New Urgent Posts`, reason: `${osintNew.length} new urgent OSINT signals detected. Elevated conflict reporting tempo.`, actionable: 'Review OSINT stream for pattern. Cross-check with satellite and ACLED data.', - signals: osintNew.map(s => (s.text || '').substring(0, 40)).slice(0, 3), + signals: osintNew.map(s => s.text || s.label || s.key).slice(0, 5), crossCorrelation: 'telegram OSINT', }; } @@ -681,7 +681,7 @@ Respond with ONLY valid JSON: if (osintSignals.length > 0) { sections.push('๐Ÿ“ก OSINT SIGNALS:\n' + osintSignals.map(s => { const post = s.item || s; - return ` [${post.channel || 'UNKNOWN'}] ${(post.text || s.reason || '').substring(0, 150)}`; + return ` [${post.channel || 'UNKNOWN'}] ${post.text || s.reason || ''}`; }).join('\n')); } @@ -728,7 +728,10 @@ Respond with ONLY valid JSON: } if (evaluation.signals?.length) { - lines.push('', `Signals: ${evaluation.signals.join(' ยท ')}`); + lines.push('', `*Signals:*`); + for (const sig of evaluation.signals) { + lines.push(`โ€ข ${escapeMd(sig)}`); + } } lines.push('', `_${new Date().toISOString().replace('T', ' ').substring(0, 19)} UTC_`); @@ -739,6 +742,13 @@ Respond with ONLY valid JSON: // โ”€โ”€โ”€ Helpers โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +function escapeMd(text) { + if (!text) return ''; + // The bot sends alerts with legacy Markdown parse mode, not MarkdownV2. + // Escape only the characters that legacy Markdown actually treats as markup. + return text.replace(/([_*`\[])/g, '\\$1'); +} + function parseJSON(text) { if (!text) return null; let cleaned = text.trim(); diff --git a/lib/delta/engine.mjs b/lib/delta/engine.mjs index d42a958..ec45ddb 100644 --- a/lib/delta/engine.mjs +++ b/lib/delta/engine.mjs @@ -66,9 +66,9 @@ const RISK_KEYS = ['vix', 'hy_spread', 'urgent_posts', 'conflict_events', 'therm // โ”€โ”€โ”€ Semantic Hashing for Telegram Posts โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ /** - * Produce a normalized hash of a post's content. - * Strips timestamps, normalizes numbers, lowercases โ€” so "BREAKING: 5 missiles at 14:32" - * and "Breaking: 7 missiles at 15:01" produce the same hash (both are "missile strike" signals). + * Produce a normalized semantic hash of a post's content. + * This is intentionally lossy and is only safe as a fallback when a stable + * post identity is unavailable. */ function contentHash(text) { if (!text) return ''; @@ -83,14 +83,34 @@ function contentHash(text) { return createHash('sha256').update(normalized).digest('hex').substring(0, 12); } +function stablePostKey(post) { + if (!post) return ''; + + const sourceId = post.postId || post.messageId || ''; + const channelId = post.channel || post.chat || ''; + const date = post.date || ''; + const text = (post.text || '').trim().substring(0, 200); + + if (sourceId) return `id:${sourceId}`; + if (channelId && date) { + return createHash('sha256') + .update(`${channelId}|${date}|${text}`) + .digest('hex') + .substring(0, 16); + } + + return `semantic:${contentHash(post.text)}`; +} + // โ”€โ”€โ”€ Core Delta Computation โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ /** * @param {object} current - current sweep's synthesized data * @param {object|null} previous - previous sweep's synthesized data (null on first run) * @param {object} [thresholdOverrides] - optional: { numeric: {...}, count: {...} } + * @param {Array} [priorRuns] - optional compacted prior runs for broader dedup */ -export function computeDelta(current, previous, thresholdOverrides = {}) { +export function computeDelta(current, previous, thresholdOverrides = {}, priorRuns = []) { if (!previous) return null; if (!current) return null; @@ -152,16 +172,21 @@ export function computeDelta(current, previous, thresholdOverrides = {}) { // โ”€โ”€โ”€ New urgent Telegram posts (semantic dedup) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + // Dedup against all recent runs (not just the last one) to catch posts that + // drop out of one sweep but reappear in a later one. Use stable post identity + // where possible so updated posts are not collapsed into earlier alerts just + // because their text is semantically similar. + const sources = priorRuns.length > 0 ? priorRuns : [previous]; const prevHashes = new Set( - (previous.tg?.urgent || []).map(p => contentHash(p.text)) + sources.flatMap(run => (run?.tg?.urgent || []).map(stablePostKey)).filter(Boolean) ); for (const post of (current.tg?.urgent || [])) { - const hash = contentHash(post.text); + const hash = stablePostKey(post); if (hash && !prevHashes.has(hash)) { signals.new.push({ key: `tg_urgent:${hash}`, - text: post.text?.substring(0, 120), + text: post.text, item: post, reason: 'New urgent OSINT post', }); diff --git a/lib/delta/memory.mjs b/lib/delta/memory.mjs index 238c014..66986f0 100644 --- a/lib/delta/memory.mjs +++ b/lib/delta/memory.mjs @@ -74,7 +74,9 @@ export class MemoryManager { // Add a new run to hot memory addRun(synthesizedData) { const previous = this.getLastRun(); - const delta = computeDelta(synthesizedData, previous); + // Collect urgent post hashes from all hot runs for broader dedup window + const priorRuns = this.hot.runs.map(r => r.data); + const delta = computeDelta(synthesizedData, previous, {}, priorRuns); // Compact the data for storage (strip large arrays) const compact = this._compactForStorage(synthesizedData); @@ -199,7 +201,15 @@ export class MemoryManager { bls: data.bls, treasury: data.treasury, gscpi: data.gscpi, - tg: { posts: data.tg?.posts, urgent: (data.tg?.urgent || []).map(p => ({ text: p.text?.substring(0, 80), date: p.date })) }, + tg: { + posts: data.tg?.posts, + urgent: (data.tg?.urgent || []).map(p => ({ + text: p.text, + date: p.date, + channel: p.channel || p.chat || null, + postId: p.postId || null, + })), + }, thermal: (data.thermal || []).map(t => ({ region: t.region, det: t.det, night: t.night, hc: t.hc })), air: (data.air || []).map(a => ({ region: a.region, total: a.total })), nuke: (data.nuke || []).map(n => ({ site: n.site, anom: n.anom, cpm: n.cpm })), diff --git a/lib/llm/ideas.mjs b/lib/llm/ideas.mjs index 08e6603..1e78e23 100644 --- a/lib/llm/ideas.mjs +++ b/lib/llm/ideas.mjs @@ -88,10 +88,20 @@ function compactSweepForLLM(data, delta, previousIdeas) { sections.push(`SUPPLY_CHAIN: GSCPI=${data.gscpi.value} (${data.gscpi.interpretation})`); } - // Geopolitical signals + // Geopolitical signals (cap total OSINT text to ~1500 chars to keep prompt compact) const urgentPosts = (data.tg?.urgent || []).slice(0, 5); if (urgentPosts.length) { - sections.push(`URGENT_OSINT:\n${urgentPosts.map(p => `- ${(p.text || '').substring(0, 120)}`).join('\n')}`); + const MAX_OSINT_CHARS = 1500; + let remaining = MAX_OSINT_CHARS; + const lines = []; + for (const p of urgentPosts) { + const text = p.text || ''; + if (remaining <= 0) break; + const trimmed = text.length > remaining ? text.substring(0, remaining) + 'โ€ฆ' : text; + lines.push(`- ${trimmed}`); + remaining -= trimmed.length; + } + sections.push(`URGENT_OSINT:\n${lines.join('\n')}`); } // Thermal / fire detections