From cc5903d007cdd2d5299837859eed1b3bac2546bc Mon Sep 17 00:00:00 2001 From: lspassos1 Date: Tue, 31 Mar 2026 00:34:02 +0100 Subject: [PATCH 1/3] feat(regulatory): add regulatory RSS fetch seeder Add a standalone seeder that fetches and normalizes SEC, CFTC, Federal Reserve, FDIC, and FINRA regulatory feeds without introducing new dependencies. The script stays import-safe, tolerates partial feed failure, and emits JSON for the fetch/parse-only phase of the pipeline. Unit tests cover RSS/Atom parsing, deduplication, ordering, and degraded-feed behavior. Refs #2492 Refs #2493 Refs #2494 Refs #2495 --- scripts/seed-regulatory-actions.mjs | 257 ++++++++++++++++++++++++++++ tests/regulatory-seed-unit.test.mjs | 185 ++++++++++++++++++++ 2 files changed, 442 insertions(+) create mode 100644 scripts/seed-regulatory-actions.mjs create mode 100644 tests/regulatory-seed-unit.test.mjs diff --git a/scripts/seed-regulatory-actions.mjs b/scripts/seed-regulatory-actions.mjs new file mode 100644 index 000000000..569de5366 --- /dev/null +++ b/scripts/seed-regulatory-actions.mjs @@ -0,0 +1,257 @@ +#!/usr/bin/env node + +import { pathToFileURL } from 'node:url'; +import { CHROME_UA } from './_seed-utils.mjs'; + +const FEED_TIMEOUT_MS = 15_000; +const XML_ACCEPT = 'application/atom+xml, application/rss+xml, application/xml, text/xml, */*'; +const SEC_USER_AGENT = 'WorldMonitor/2.0 (monitor@worldmonitor.app)'; + +const REGULATORY_FEEDS = [ + { agency: 'SEC', url: 'https://www.sec.gov/news/pressreleases.rss', userAgent: SEC_USER_AGENT }, + { agency: 'CFTC', url: 'https://www.cftc.gov/RSS/RSSENF/rssenf.xml' }, + { agency: 'Federal Reserve', url: 'https://www.federalreserve.gov/feeds/press_all.xml' }, + { agency: 'FDIC', url: 'https://public.govdelivery.com/topics/USFDIC_26/feed.rss' }, + { agency: 'FINRA', url: 'http://feeds.finra.org/FINRANotices' }, +]; + +function decodeEntities(input) { + if (!input) return ''; + const named = input + .replace(/&/gi, '&') + .replace(/</gi, '<') + .replace(/>/gi, '>') + .replace(/"/gi, '"') + .replace(/'/gi, "'") + .replace(/ /gi, ' '); + + return named + .replace(/&#(\d+);/g, (_, code) => String.fromCodePoint(Number(code))) + .replace(/&#x([0-9a-f]+);/gi, (_, code) => String.fromCodePoint(parseInt(code, 16))); +} + +function stripHtml(input) { + return decodeEntities( + String(input || '') + .replace(//g, '$1') + .replace(/<[^>]+>/g, ' ') + ).replace(/\s+/g, ' ').trim(); +} + +function getTagValue(block, tagName) { + const match = block.match(new RegExp(`<${tagName}[^>]*>([\\s\\S]*?)<\\/${tagName}>`, 'i')); + return stripHtml(match?.[1] || ''); +} + +function extractAtomLink(block) { + const linkTags = [...block.matchAll(/]*)\/?>/gi)]; + if (linkTags.length === 0) return ''; + + for (const [, attrs] of linkTags) { + const href = attrs.match(/\bhref=["']([^"']+)["']/i)?.[1]; + const rel = attrs.match(/\brel=["']([^"']+)["']/i)?.[1]?.toLowerCase() || ''; + if (href && (!rel || rel === 'alternate')) return decodeEntities(href.trim()); + } + + for (const [, attrs] of linkTags) { + const href = attrs.match(/\bhref=["']([^"']+)["']/i)?.[1]; + if (href) return decodeEntities(href.trim()); + } + + return ''; +} + +function resolveFeedLink(link, feedUrl) { + if (!link) return ''; + try { + return new URL(link).href; + } catch {} + try { + return new URL(link, feedUrl).href; + } catch { + return ''; + } +} + +function canonicalizeLink(link, feedUrl = '') { + const resolved = resolveFeedLink(link, feedUrl); + if (!resolved) return ''; + try { + const url = new URL(resolved); + url.hash = ''; + return url.href; + } catch { + return ''; + } +} + +function toIsoDate(rawDate) { + const value = stripHtml(rawDate); + if (!value) return ''; + const ts = Date.parse(value); + return Number.isFinite(ts) ? new Date(ts).toISOString() : ''; +} + +function slugifyTitle(title) { + return stripHtml(title) + .normalize('NFKD') + .replace(/[\u0300-\u036f]/g, '') + .toLowerCase() + .replace(/[^a-z0-9]+/g, '-') + .replace(/^-+|-+$/g, '') + .slice(0, 80); +} + +function yyyymmdd(isoDate) { + return String(isoDate || '').slice(0, 10).replace(/-/g, ''); +} + +function buildActionId(agency, title, publishedAt) { + const agencySlug = slugifyTitle(agency) || 'agency'; + const titleSlug = slugifyTitle(title) || 'untitled'; + const datePart = yyyymmdd(publishedAt) || 'undated'; + return `${agencySlug}-${titleSlug}-${datePart}`; +} + +function parseRssItems(xml, feedUrl) { + const items = []; + const itemRegex = /]*>([\s\S]*?)<\/item>/gi; + let match; + while ((match = itemRegex.exec(xml)) !== null) { + const block = match[1]; + const title = getTagValue(block, 'title'); + const link = canonicalizeLink(getTagValue(block, 'link'), feedUrl); + const publishedAt = toIsoDate(getTagValue(block, 'pubDate') || getTagValue(block, 'updated')); + items.push({ title, link, publishedAt }); + } + return items; +} + +function parseAtomEntries(xml, feedUrl) { + const entries = []; + const entryRegex = /]*>([\s\S]*?)<\/entry>/gi; + let match; + while ((match = entryRegex.exec(xml)) !== null) { + const block = match[1]; + const title = getTagValue(block, 'title'); + const link = canonicalizeLink(extractAtomLink(block), feedUrl); + const publishedAt = toIsoDate( + getTagValue(block, 'updated') || getTagValue(block, 'published') || getTagValue(block, 'pubDate') + ); + entries.push({ title, link, publishedAt }); + } + return entries; +} + +function parseFeed(xml, feedUrl) { + if (/ item.title && item.link && item.publishedAt) + .map((item) => ({ + id: buildActionId(agency, item.title, item.publishedAt), + agency, + title: item.title, + link: item.link, + publishedAt: item.publishedAt, + })); +} + +function dedupeAndSortActions(actions) { + const seen = new Set(); + const deduped = []; + for (const action of actions) { + const key = canonicalizeLink(action.link); + if (!key || seen.has(key)) continue; + seen.add(key); + deduped.push({ ...action, link: key }); + } + + deduped.sort((a, b) => Date.parse(b.publishedAt) - Date.parse(a.publishedAt)); + return deduped; +} + +async function fetchFeed(feed, fetchImpl = globalThis.fetch) { + const headers = { + Accept: XML_ACCEPT, + 'User-Agent': feed.userAgent || CHROME_UA, + }; + + const response = await fetchImpl(feed.url, { + headers, + signal: AbortSignal.timeout(FEED_TIMEOUT_MS), + }); + + if (!response.ok) { + throw new Error(`${feed.agency}: HTTP ${response.status}`); + } + + const xml = await response.text(); + const parsed = parseFeed(xml, feed.url); + return normalizeFeedItems(parsed, feed.agency); +} + +async function fetchAllFeeds(fetchImpl = globalThis.fetch, feeds = REGULATORY_FEEDS) { + const results = await Promise.allSettled(feeds.map((feed) => fetchFeed(feed, fetchImpl))); + const actions = []; + let successCount = 0; + + for (let index = 0; index < results.length; index += 1) { + const result = results[index]; + const feed = feeds[index]; + if (result.status === 'fulfilled') { + successCount += 1; + actions.push(...result.value); + continue; + } + console.error(`[regulatory] ${feed.agency}: ${result.reason?.message || result.reason}`); + } + + if (successCount === 0) { + throw new Error('All regulatory feeds failed'); + } + + return dedupeAndSortActions(actions); +} + +async function main(fetchImpl = globalThis.fetch) { + const actions = await fetchAllFeeds(fetchImpl); + process.stdout.write(`${JSON.stringify(actions, null, 2)}\n`); + return actions; +} + +const isDirectRun = process.argv[1] && import.meta.url === pathToFileURL(process.argv[1]).href; + +if (isDirectRun) { + main().catch((err) => { + console.error(`FETCH FAILED: ${err.message || err}`); + process.exit(1); + }); +} + +export { + CHROME_UA, + FEED_TIMEOUT_MS, + REGULATORY_FEEDS, + SEC_USER_AGENT, + buildActionId, + canonicalizeLink, + decodeEntities, + dedupeAndSortActions, + extractAtomLink, + fetchAllFeeds, + fetchFeed, + getTagValue, + main, + normalizeFeedItems, + parseAtomEntries, + parseFeed, + parseRssItems, + resolveFeedLink, + slugifyTitle, + stripHtml, + toIsoDate, +}; diff --git a/tests/regulatory-seed-unit.test.mjs b/tests/regulatory-seed-unit.test.mjs new file mode 100644 index 000000000..852d1ddb3 --- /dev/null +++ b/tests/regulatory-seed-unit.test.mjs @@ -0,0 +1,185 @@ +import { describe, it } from 'node:test'; +import assert from 'node:assert/strict'; +import { readFileSync } from 'node:fs'; +import vm from 'node:vm'; + +function normalize(value) { + return JSON.parse(JSON.stringify(value)); +} + +const seedSrc = readFileSync('scripts/seed-regulatory-actions.mjs', 'utf8'); + +const pureSrc = seedSrc + .replace(/^import\s.*$/gm, '') + .replace(/const isDirectRun[\s\S]*?}\n\nexport\s*{[\s\S]*?};?\s*$/m, ''); + +const ctx = vm.createContext({ + console, + Date, + Math, + Number, + Array, + Set, + String, + RegExp, + URL, + URLSearchParams, + AbortSignal, + CHROME_UA: 'Mozilla/5.0 (test)', +}); + +vm.runInContext(pureSrc, ctx); + +const { + decodeEntities, + stripHtml, + extractAtomLink, + parseRssItems, + parseAtomEntries, + parseFeed, + normalizeFeedItems, + dedupeAndSortActions, + fetchAllFeeds, +} = ctx; + +describe('decodeEntities', () => { + it('decodes named and numeric entities', () => { + assert.equal(decodeEntities('Tom & Jerry & &'), 'Tom & Jerry & &'); + }); +}); + +describe('stripHtml', () => { + it('removes tags and CDATA while preserving text', () => { + assert.equal(stripHtml('world]]>'), 'Hello world'); + }); +}); + +describe('parseRssItems', () => { + it('extracts RSS items with normalized links and pubDate', () => { + const xml = ` + + + <![CDATA[SEC & Co. Charges <b>Issuer</b>]]> + /news/press-release/2026-10 + Mon, 30 Mar 2026 18:00:00 GMT + + `; + + assert.deepEqual(normalize(parseRssItems(xml, 'https://www.sec.gov/news/pressreleases.rss')), [{ + title: 'SEC & Co. Charges Issuer', + link: 'https://www.sec.gov/news/press-release/2026-10', + publishedAt: '2026-03-30T18:00:00.000Z', + }]); + }); +}); + +describe('extractAtomLink + parseAtomEntries', () => { + it('prefers alternate href and normalizes publishedAt from updated', () => { + const xml = ` + + + Fed issues notice + + + 2026-03-29T12:30:00Z + + `; + + assert.equal( + extractAtomLink(''), + '/press/notice-a' + ); + + assert.deepEqual(normalize(parseAtomEntries(xml, 'https://www.federalreserve.gov/feeds/press_all.xml')), [{ + title: 'Fed issues notice', + link: 'https://www.federalreserve.gov/press/notice-a', + publishedAt: '2026-03-29T12:30:00.000Z', + }]); + }); +}); + +describe('parseFeed', () => { + it('detects Atom feeds automatically', () => { + const atom = 'A2026-03-28T00:00:00Z'; + const parsed = normalize(parseFeed(atom, 'https://example.test/feed')); + assert.equal(parsed.length, 1); + assert.equal(parsed[0].link, 'https://example.test/a'); + }); +}); + +describe('normalizeFeedItems', () => { + it('skips incomplete entries and generates deterministic ids', () => { + const normalized = normalize(normalizeFeedItems([ + { title: 'SEC Charges XYZ Corp', link: 'https://example.test/sec', publishedAt: '2026-03-29T14:00:00.000Z' }, + { title: '', link: 'https://example.test/missing', publishedAt: '2026-03-29T14:00:00.000Z' }, + ], 'SEC')); + + assert.equal(normalized.length, 1); + assert.equal(normalized[0].id, 'sec-sec-charges-xyz-corp-20260329'); + }); +}); + +describe('dedupeAndSortActions', () => { + it('deduplicates by canonical link and sorts newest first', () => { + const actions = normalize(dedupeAndSortActions([ + { + id: 'older', + agency: 'SEC', + title: 'Older', + link: 'https://example.test/path#frag', + publishedAt: '2026-03-28T10:00:00.000Z', + }, + { + id: 'newer', + agency: 'FDIC', + title: 'Newer', + link: 'https://example.test/new', + publishedAt: '2026-03-30T10:00:00.000Z', + }, + { + id: 'duplicate', + agency: 'SEC', + title: 'Duplicate', + link: 'https://example.test/path', + publishedAt: '2026-03-29T10:00:00.000Z', + }, + ])); + + assert.deepEqual(actions.map((item) => item.id), ['newer', 'older']); + assert.equal(actions[1].link, 'https://example.test/path'); + }); +}); + +describe('fetchAllFeeds', () => { + const feeds = [ + { agency: 'SEC', url: 'https://feeds.test/sec', userAgent: 'Custom-SEC-UA' }, + { agency: 'FDIC', url: 'https://feeds.test/fdic' }, + ]; + + it('returns normalized aggregate when at least one feed succeeds', async () => { + const requests = []; + const fetchStub = async (url, options) => { + requests.push({ url, options }); + if (url.endsWith('/sec')) { + return { + ok: true, + text: async () => `SEC Charges Bankhttps://sec.test/aMon, 30 Mar 2026 18:00:00 GMT`, + }; + } + throw new Error('FDIC timeout'); + }; + + const result = normalize(await fetchAllFeeds(fetchStub, feeds)); + assert.equal(result.length, 1); + assert.equal(result[0].agency, 'SEC'); + assert.equal(requests[0].options.headers['User-Agent'], 'Custom-SEC-UA'); + assert.equal(requests[1].options.headers['User-Agent'], ctx.CHROME_UA); + }); + + it('throws when all feeds fail', async () => { + await assert.rejects( + fetchAllFeeds(async () => { throw new Error('nope'); }, feeds), + /All regulatory feeds failed/ + ); + }); +}); From f117d7bc8a5f6eaa49a8cd5158fa94fe1a4e44c8 Mon Sep 17 00:00:00 2001 From: lspassos1 Date: Tue, 31 Mar 2026 01:20:29 +0100 Subject: [PATCH 2/3] fix(regulatory): harden feed fetch defaults and action ids Use the repository-standard fetch wrapper in the seeder defaults, keep the documented FINRA HTTP exception in place, and include publish time in generated action ids to avoid same-day collisions. Validated with: node --test tests/regulatory-seed-unit.test.mjs; node scripts/seed-regulatory-actions.mjs | head -n 20 --- scripts/seed-regulatory-actions.mjs | 16 ++++++++++++---- tests/regulatory-seed-unit.test.mjs | 2 +- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/scripts/seed-regulatory-actions.mjs b/scripts/seed-regulatory-actions.mjs index 569de5366..646e705cf 100644 --- a/scripts/seed-regulatory-actions.mjs +++ b/scripts/seed-regulatory-actions.mjs @@ -6,12 +6,15 @@ import { CHROME_UA } from './_seed-utils.mjs'; const FEED_TIMEOUT_MS = 15_000; const XML_ACCEPT = 'application/atom+xml, application/rss+xml, application/xml, text/xml, */*'; const SEC_USER_AGENT = 'WorldMonitor/2.0 (monitor@worldmonitor.app)'; +const DEFAULT_FETCH = (...args) => globalThis.fetch(...args); const REGULATORY_FEEDS = [ { agency: 'SEC', url: 'https://www.sec.gov/news/pressreleases.rss', userAgent: SEC_USER_AGENT }, { agency: 'CFTC', url: 'https://www.cftc.gov/RSS/RSSENF/rssenf.xml' }, { agency: 'Federal Reserve', url: 'https://www.federalreserve.gov/feeds/press_all.xml' }, { agency: 'FDIC', url: 'https://public.govdelivery.com/topics/USFDIC_26/feed.rss' }, + // FINRA still publishes this RSS endpoint over plain HTTP; HTTPS requests fail + // from both Node fetch and curl in validation, so keep the official feed URL. { agency: 'FINRA', url: 'http://feeds.finra.org/FINRANotices' }, ]; @@ -106,11 +109,16 @@ function yyyymmdd(isoDate) { return String(isoDate || '').slice(0, 10).replace(/-/g, ''); } +function hhmmss(isoDate) { + return String(isoDate || '').slice(11, 19).replace(/:/g, ''); +} + function buildActionId(agency, title, publishedAt) { const agencySlug = slugifyTitle(agency) || 'agency'; const titleSlug = slugifyTitle(title) || 'untitled'; const datePart = yyyymmdd(publishedAt) || 'undated'; - return `${agencySlug}-${titleSlug}-${datePart}`; + const timePart = hhmmss(publishedAt) || '000000'; + return `${agencySlug}-${titleSlug}-${datePart}-${timePart}`; } function parseRssItems(xml, feedUrl) { @@ -174,7 +182,7 @@ function dedupeAndSortActions(actions) { return deduped; } -async function fetchFeed(feed, fetchImpl = globalThis.fetch) { +async function fetchFeed(feed, fetchImpl = DEFAULT_FETCH) { const headers = { Accept: XML_ACCEPT, 'User-Agent': feed.userAgent || CHROME_UA, @@ -194,7 +202,7 @@ async function fetchFeed(feed, fetchImpl = globalThis.fetch) { return normalizeFeedItems(parsed, feed.agency); } -async function fetchAllFeeds(fetchImpl = globalThis.fetch, feeds = REGULATORY_FEEDS) { +async function fetchAllFeeds(fetchImpl = DEFAULT_FETCH, feeds = REGULATORY_FEEDS) { const results = await Promise.allSettled(feeds.map((feed) => fetchFeed(feed, fetchImpl))); const actions = []; let successCount = 0; @@ -217,7 +225,7 @@ async function fetchAllFeeds(fetchImpl = globalThis.fetch, feeds = REGULATORY_FE return dedupeAndSortActions(actions); } -async function main(fetchImpl = globalThis.fetch) { +async function main(fetchImpl = DEFAULT_FETCH) { const actions = await fetchAllFeeds(fetchImpl); process.stdout.write(`${JSON.stringify(actions, null, 2)}\n`); return actions; diff --git a/tests/regulatory-seed-unit.test.mjs b/tests/regulatory-seed-unit.test.mjs index 852d1ddb3..40a5d1314 100644 --- a/tests/regulatory-seed-unit.test.mjs +++ b/tests/regulatory-seed-unit.test.mjs @@ -115,7 +115,7 @@ describe('normalizeFeedItems', () => { ], 'SEC')); assert.equal(normalized.length, 1); - assert.equal(normalized[0].id, 'sec-sec-charges-xyz-corp-20260329'); + assert.equal(normalized[0].id, 'sec-sec-charges-xyz-corp-20260329-140000'); }); }); From 52eb8e900503be6117c3174464b2c29a9f8e4f93 Mon Sep 17 00:00:00 2001 From: lspassos1 Date: Wed, 1 Apr 2026 22:41:48 +0100 Subject: [PATCH 3/3] feat(regulatory): capture feed descriptions in action records Extract RSS and Atom descriptions into the normalized action payload so later classifier work can use the same parsed feed output. Also adds @ts-check and documents the FINRA HTTP feed constraint. --- scripts/seed-regulatory-actions.mjs | 11 ++++++++--- tests/regulatory-seed-unit.test.mjs | 30 ++++++++++++++++++++++++++--- 2 files changed, 35 insertions(+), 6 deletions(-) diff --git a/scripts/seed-regulatory-actions.mjs b/scripts/seed-regulatory-actions.mjs index 646e705cf..14b955902 100644 --- a/scripts/seed-regulatory-actions.mjs +++ b/scripts/seed-regulatory-actions.mjs @@ -1,4 +1,5 @@ #!/usr/bin/env node +// @ts-check import { pathToFileURL } from 'node:url'; import { CHROME_UA } from './_seed-utils.mjs'; @@ -14,7 +15,8 @@ const REGULATORY_FEEDS = [ { agency: 'Federal Reserve', url: 'https://www.federalreserve.gov/feeds/press_all.xml' }, { agency: 'FDIC', url: 'https://public.govdelivery.com/topics/USFDIC_26/feed.rss' }, // FINRA still publishes this RSS endpoint over plain HTTP; HTTPS requests fail - // from both Node fetch and curl in validation, so keep the official feed URL. + // from both Node fetch and curl in validation, so keep the official feed URL + // and periodically recheck whether HTTPS starts working. { agency: 'FINRA', url: 'http://feeds.finra.org/FINRANotices' }, ]; @@ -128,9 +130,10 @@ function parseRssItems(xml, feedUrl) { while ((match = itemRegex.exec(xml)) !== null) { const block = match[1]; const title = getTagValue(block, 'title'); + const description = getTagValue(block, 'description'); const link = canonicalizeLink(getTagValue(block, 'link'), feedUrl); const publishedAt = toIsoDate(getTagValue(block, 'pubDate') || getTagValue(block, 'updated')); - items.push({ title, link, publishedAt }); + items.push({ title, description, link, publishedAt }); } return items; } @@ -142,11 +145,12 @@ function parseAtomEntries(xml, feedUrl) { while ((match = entryRegex.exec(xml)) !== null) { const block = match[1]; const title = getTagValue(block, 'title'); + const description = getTagValue(block, 'summary') || getTagValue(block, 'content'); const link = canonicalizeLink(extractAtomLink(block), feedUrl); const publishedAt = toIsoDate( getTagValue(block, 'updated') || getTagValue(block, 'published') || getTagValue(block, 'pubDate') ); - entries.push({ title, link, publishedAt }); + entries.push({ title, description, link, publishedAt }); } return entries; } @@ -163,6 +167,7 @@ function normalizeFeedItems(items, agency) { id: buildActionId(agency, item.title, item.publishedAt), agency, title: item.title, + description: item.description || '', link: item.link, publishedAt: item.publishedAt, })); diff --git a/tests/regulatory-seed-unit.test.mjs b/tests/regulatory-seed-unit.test.mjs index 40a5d1314..173b63b2a 100644 --- a/tests/regulatory-seed-unit.test.mjs +++ b/tests/regulatory-seed-unit.test.mjs @@ -55,11 +55,12 @@ describe('stripHtml', () => { }); describe('parseRssItems', () => { - it('extracts RSS items with normalized links and pubDate', () => { + it('extracts RSS items with description, normalized links, and pubDate', () => { const xml = ` <![CDATA[SEC & Co. Charges <b>Issuer</b>]]> + fraud & disclosure failures]]> /news/press-release/2026-10 Mon, 30 Mar 2026 18:00:00 GMT @@ -67,6 +68,7 @@ describe('parseRssItems', () => { assert.deepEqual(normalize(parseRssItems(xml, 'https://www.sec.gov/news/pressreleases.rss')), [{ title: 'SEC & Co. Charges Issuer', + description: 'Alleges fraud & disclosure failures', link: 'https://www.sec.gov/news/press-release/2026-10', publishedAt: '2026-03-30T18:00:00.000Z', }]); @@ -74,11 +76,12 @@ describe('parseRssItems', () => { }); describe('extractAtomLink + parseAtomEntries', () => { - it('prefers alternate href and normalizes publishedAt from updated', () => { + it('prefers alternate href and extracts summary/content with normalized publishedAt', () => { const xml = ` Fed issues notice + policy summary]]> 2026-03-29T12:30:00Z @@ -92,9 +95,27 @@ describe('extractAtomLink + parseAtomEntries', () => { assert.deepEqual(normalize(parseAtomEntries(xml, 'https://www.federalreserve.gov/feeds/press_all.xml')), [{ title: 'Fed issues notice', + description: 'Detailed policy summary', link: 'https://www.federalreserve.gov/press/notice-a', publishedAt: '2026-03-29T12:30:00.000Z', }]); + + const contentXml = ` + + + FDIC update + Formal administrative note

]]>
+ + 2026-03-28T09:15:00Z +
+
`; + + assert.deepEqual(normalize(parseAtomEntries(contentXml, 'https://www.fdic.gov/feed')), [{ + title: 'FDIC update', + description: 'Formal administrative note', + link: 'https://fdic.example.test/a', + publishedAt: '2026-03-28T09:15:00.000Z', + }]); }); }); @@ -111,11 +132,14 @@ describe('normalizeFeedItems', () => { it('skips incomplete entries and generates deterministic ids', () => { const normalized = normalize(normalizeFeedItems([ { title: 'SEC Charges XYZ Corp', link: 'https://example.test/sec', publishedAt: '2026-03-29T14:00:00.000Z' }, + { title: 'SEC Summary', description: 'extra context', link: 'https://example.test/sec-2', publishedAt: '2026-03-29T14:30:00.000Z' }, { title: '', link: 'https://example.test/missing', publishedAt: '2026-03-29T14:00:00.000Z' }, ], 'SEC')); - assert.equal(normalized.length, 1); + assert.equal(normalized.length, 2); assert.equal(normalized[0].id, 'sec-sec-charges-xyz-corp-20260329-140000'); + assert.equal(normalized[0].description, ''); + assert.equal(normalized[1].description, 'extra context'); }); });