From cc5903d007cdd2d5299837859eed1b3bac2546bc Mon Sep 17 00:00:00 2001
From: lspassos1
Date: Tue, 31 Mar 2026 00:34:02 +0100
Subject: [PATCH 1/3] feat(regulatory): add regulatory RSS fetch seeder
Add a standalone seeder that fetches and normalizes SEC, CFTC, Federal Reserve, FDIC, and FINRA regulatory feeds without introducing new dependencies.
The script stays import-safe, tolerates partial feed failure, and emits JSON for the fetch/parse-only phase of the pipeline. Unit tests cover RSS/Atom parsing, deduplication, ordering, and degraded-feed behavior.
Refs #2492
Refs #2493
Refs #2494
Refs #2495
---
scripts/seed-regulatory-actions.mjs | 257 ++++++++++++++++++++++++++++
tests/regulatory-seed-unit.test.mjs | 185 ++++++++++++++++++++
2 files changed, 442 insertions(+)
create mode 100644 scripts/seed-regulatory-actions.mjs
create mode 100644 tests/regulatory-seed-unit.test.mjs
diff --git a/scripts/seed-regulatory-actions.mjs b/scripts/seed-regulatory-actions.mjs
new file mode 100644
index 000000000..569de5366
--- /dev/null
+++ b/scripts/seed-regulatory-actions.mjs
@@ -0,0 +1,257 @@
+#!/usr/bin/env node
+
+import { pathToFileURL } from 'node:url';
+import { CHROME_UA } from './_seed-utils.mjs';
+
+const FEED_TIMEOUT_MS = 15_000;
+const XML_ACCEPT = 'application/atom+xml, application/rss+xml, application/xml, text/xml, */*';
+const SEC_USER_AGENT = 'WorldMonitor/2.0 (monitor@worldmonitor.app)';
+
+const REGULATORY_FEEDS = [
+ { agency: 'SEC', url: 'https://www.sec.gov/news/pressreleases.rss', userAgent: SEC_USER_AGENT },
+ { agency: 'CFTC', url: 'https://www.cftc.gov/RSS/RSSENF/rssenf.xml' },
+ { agency: 'Federal Reserve', url: 'https://www.federalreserve.gov/feeds/press_all.xml' },
+ { agency: 'FDIC', url: 'https://public.govdelivery.com/topics/USFDIC_26/feed.rss' },
+ { agency: 'FINRA', url: 'http://feeds.finra.org/FINRANotices' },
+];
+
+function decodeEntities(input) {
+ if (!input) return '';
+ const named = input
+ .replace(/&/gi, '&')
+ .replace(/</gi, '<')
+ .replace(/>/gi, '>')
+ .replace(/"/gi, '"')
+ .replace(/'/gi, "'")
+ .replace(/ /gi, ' ');
+
+ return named
+ .replace(/(\d+);/g, (_, code) => String.fromCodePoint(Number(code)))
+ .replace(/([0-9a-f]+);/gi, (_, code) => String.fromCodePoint(parseInt(code, 16)));
+}
+
+function stripHtml(input) {
+ return decodeEntities(
+ String(input || '')
+ .replace(//g, '$1')
+ .replace(/<[^>]+>/g, ' ')
+ ).replace(/\s+/g, ' ').trim();
+}
+
+function getTagValue(block, tagName) {
+ const match = block.match(new RegExp(`<${tagName}[^>]*>([\\s\\S]*?)<\\/${tagName}>`, 'i'));
+ return stripHtml(match?.[1] || '');
+}
+
+function extractAtomLink(block) {
+ const linkTags = [...block.matchAll(/]*)\/?>/gi)];
+ if (linkTags.length === 0) return '';
+
+ for (const [, attrs] of linkTags) {
+ const href = attrs.match(/\bhref=["']([^"']+)["']/i)?.[1];
+ const rel = attrs.match(/\brel=["']([^"']+)["']/i)?.[1]?.toLowerCase() || '';
+ if (href && (!rel || rel === 'alternate')) return decodeEntities(href.trim());
+ }
+
+ for (const [, attrs] of linkTags) {
+ const href = attrs.match(/\bhref=["']([^"']+)["']/i)?.[1];
+ if (href) return decodeEntities(href.trim());
+ }
+
+ return '';
+}
+
+function resolveFeedLink(link, feedUrl) {
+ if (!link) return '';
+ try {
+ return new URL(link).href;
+ } catch {}
+ try {
+ return new URL(link, feedUrl).href;
+ } catch {
+ return '';
+ }
+}
+
+function canonicalizeLink(link, feedUrl = '') {
+ const resolved = resolveFeedLink(link, feedUrl);
+ if (!resolved) return '';
+ try {
+ const url = new URL(resolved);
+ url.hash = '';
+ return url.href;
+ } catch {
+ return '';
+ }
+}
+
+function toIsoDate(rawDate) {
+ const value = stripHtml(rawDate);
+ if (!value) return '';
+ const ts = Date.parse(value);
+ return Number.isFinite(ts) ? new Date(ts).toISOString() : '';
+}
+
+function slugifyTitle(title) {
+ return stripHtml(title)
+ .normalize('NFKD')
+ .replace(/[\u0300-\u036f]/g, '')
+ .toLowerCase()
+ .replace(/[^a-z0-9]+/g, '-')
+ .replace(/^-+|-+$/g, '')
+ .slice(0, 80);
+}
+
+function yyyymmdd(isoDate) {
+ return String(isoDate || '').slice(0, 10).replace(/-/g, '');
+}
+
+function buildActionId(agency, title, publishedAt) {
+ const agencySlug = slugifyTitle(agency) || 'agency';
+ const titleSlug = slugifyTitle(title) || 'untitled';
+ const datePart = yyyymmdd(publishedAt) || 'undated';
+ return `${agencySlug}-${titleSlug}-${datePart}`;
+}
+
+function parseRssItems(xml, feedUrl) {
+ const items = [];
+ const itemRegex = /- ]*>([\s\S]*?)<\/item>/gi;
+ let match;
+ while ((match = itemRegex.exec(xml)) !== null) {
+ const block = match[1];
+ const title = getTagValue(block, 'title');
+ const link = canonicalizeLink(getTagValue(block, 'link'), feedUrl);
+ const publishedAt = toIsoDate(getTagValue(block, 'pubDate') || getTagValue(block, 'updated'));
+ items.push({ title, link, publishedAt });
+ }
+ return items;
+}
+
+function parseAtomEntries(xml, feedUrl) {
+ const entries = [];
+ const entryRegex = /]*>([\s\S]*?)<\/entry>/gi;
+ let match;
+ while ((match = entryRegex.exec(xml)) !== null) {
+ const block = match[1];
+ const title = getTagValue(block, 'title');
+ const link = canonicalizeLink(extractAtomLink(block), feedUrl);
+ const publishedAt = toIsoDate(
+ getTagValue(block, 'updated') || getTagValue(block, 'published') || getTagValue(block, 'pubDate')
+ );
+ entries.push({ title, link, publishedAt });
+ }
+ return entries;
+}
+
+function parseFeed(xml, feedUrl) {
+ if (/ item.title && item.link && item.publishedAt)
+ .map((item) => ({
+ id: buildActionId(agency, item.title, item.publishedAt),
+ agency,
+ title: item.title,
+ link: item.link,
+ publishedAt: item.publishedAt,
+ }));
+}
+
+function dedupeAndSortActions(actions) {
+ const seen = new Set();
+ const deduped = [];
+ for (const action of actions) {
+ const key = canonicalizeLink(action.link);
+ if (!key || seen.has(key)) continue;
+ seen.add(key);
+ deduped.push({ ...action, link: key });
+ }
+
+ deduped.sort((a, b) => Date.parse(b.publishedAt) - Date.parse(a.publishedAt));
+ return deduped;
+}
+
+async function fetchFeed(feed, fetchImpl = globalThis.fetch) {
+ const headers = {
+ Accept: XML_ACCEPT,
+ 'User-Agent': feed.userAgent || CHROME_UA,
+ };
+
+ const response = await fetchImpl(feed.url, {
+ headers,
+ signal: AbortSignal.timeout(FEED_TIMEOUT_MS),
+ });
+
+ if (!response.ok) {
+ throw new Error(`${feed.agency}: HTTP ${response.status}`);
+ }
+
+ const xml = await response.text();
+ const parsed = parseFeed(xml, feed.url);
+ return normalizeFeedItems(parsed, feed.agency);
+}
+
+async function fetchAllFeeds(fetchImpl = globalThis.fetch, feeds = REGULATORY_FEEDS) {
+ const results = await Promise.allSettled(feeds.map((feed) => fetchFeed(feed, fetchImpl)));
+ const actions = [];
+ let successCount = 0;
+
+ for (let index = 0; index < results.length; index += 1) {
+ const result = results[index];
+ const feed = feeds[index];
+ if (result.status === 'fulfilled') {
+ successCount += 1;
+ actions.push(...result.value);
+ continue;
+ }
+ console.error(`[regulatory] ${feed.agency}: ${result.reason?.message || result.reason}`);
+ }
+
+ if (successCount === 0) {
+ throw new Error('All regulatory feeds failed');
+ }
+
+ return dedupeAndSortActions(actions);
+}
+
+async function main(fetchImpl = globalThis.fetch) {
+ const actions = await fetchAllFeeds(fetchImpl);
+ process.stdout.write(`${JSON.stringify(actions, null, 2)}\n`);
+ return actions;
+}
+
+const isDirectRun = process.argv[1] && import.meta.url === pathToFileURL(process.argv[1]).href;
+
+if (isDirectRun) {
+ main().catch((err) => {
+ console.error(`FETCH FAILED: ${err.message || err}`);
+ process.exit(1);
+ });
+}
+
+export {
+ CHROME_UA,
+ FEED_TIMEOUT_MS,
+ REGULATORY_FEEDS,
+ SEC_USER_AGENT,
+ buildActionId,
+ canonicalizeLink,
+ decodeEntities,
+ dedupeAndSortActions,
+ extractAtomLink,
+ fetchAllFeeds,
+ fetchFeed,
+ getTagValue,
+ main,
+ normalizeFeedItems,
+ parseAtomEntries,
+ parseFeed,
+ parseRssItems,
+ resolveFeedLink,
+ slugifyTitle,
+ stripHtml,
+ toIsoDate,
+};
diff --git a/tests/regulatory-seed-unit.test.mjs b/tests/regulatory-seed-unit.test.mjs
new file mode 100644
index 000000000..852d1ddb3
--- /dev/null
+++ b/tests/regulatory-seed-unit.test.mjs
@@ -0,0 +1,185 @@
+import { describe, it } from 'node:test';
+import assert from 'node:assert/strict';
+import { readFileSync } from 'node:fs';
+import vm from 'node:vm';
+
+function normalize(value) {
+ return JSON.parse(JSON.stringify(value));
+}
+
+const seedSrc = readFileSync('scripts/seed-regulatory-actions.mjs', 'utf8');
+
+const pureSrc = seedSrc
+ .replace(/^import\s.*$/gm, '')
+ .replace(/const isDirectRun[\s\S]*?}\n\nexport\s*{[\s\S]*?};?\s*$/m, '');
+
+const ctx = vm.createContext({
+ console,
+ Date,
+ Math,
+ Number,
+ Array,
+ Set,
+ String,
+ RegExp,
+ URL,
+ URLSearchParams,
+ AbortSignal,
+ CHROME_UA: 'Mozilla/5.0 (test)',
+});
+
+vm.runInContext(pureSrc, ctx);
+
+const {
+ decodeEntities,
+ stripHtml,
+ extractAtomLink,
+ parseRssItems,
+ parseAtomEntries,
+ parseFeed,
+ normalizeFeedItems,
+ dedupeAndSortActions,
+ fetchAllFeeds,
+} = ctx;
+
+describe('decodeEntities', () => {
+ it('decodes named and numeric entities', () => {
+ assert.equal(decodeEntities('Tom & Jerry & &'), 'Tom & Jerry & &');
+ });
+});
+
+describe('stripHtml', () => {
+ it('removes tags and CDATA while preserving text', () => {
+ assert.equal(stripHtml('world]]>'), 'Hello world');
+ });
+});
+
+describe('parseRssItems', () => {
+ it('extracts RSS items with normalized links and pubDate', () => {
+ const xml = `
+
+
-
+ Issuer]]>
+ /news/press-release/2026-10
+ Mon, 30 Mar 2026 18:00:00 GMT
+
+ `;
+
+ assert.deepEqual(normalize(parseRssItems(xml, 'https://www.sec.gov/news/pressreleases.rss')), [{
+ title: 'SEC & Co. Charges Issuer',
+ link: 'https://www.sec.gov/news/press-release/2026-10',
+ publishedAt: '2026-03-30T18:00:00.000Z',
+ }]);
+ });
+});
+
+describe('extractAtomLink + parseAtomEntries', () => {
+ it('prefers alternate href and normalizes publishedAt from updated', () => {
+ const xml = `
+
+
+ Fed issues notice
+
+
+ 2026-03-29T12:30:00Z
+
+ `;
+
+ assert.equal(
+ extractAtomLink(''),
+ '/press/notice-a'
+ );
+
+ assert.deepEqual(normalize(parseAtomEntries(xml, 'https://www.federalreserve.gov/feeds/press_all.xml')), [{
+ title: 'Fed issues notice',
+ link: 'https://www.federalreserve.gov/press/notice-a',
+ publishedAt: '2026-03-29T12:30:00.000Z',
+ }]);
+ });
+});
+
+describe('parseFeed', () => {
+ it('detects Atom feeds automatically', () => {
+ const atom = 'A2026-03-28T00:00:00Z';
+ const parsed = normalize(parseFeed(atom, 'https://example.test/feed'));
+ assert.equal(parsed.length, 1);
+ assert.equal(parsed[0].link, 'https://example.test/a');
+ });
+});
+
+describe('normalizeFeedItems', () => {
+ it('skips incomplete entries and generates deterministic ids', () => {
+ const normalized = normalize(normalizeFeedItems([
+ { title: 'SEC Charges XYZ Corp', link: 'https://example.test/sec', publishedAt: '2026-03-29T14:00:00.000Z' },
+ { title: '', link: 'https://example.test/missing', publishedAt: '2026-03-29T14:00:00.000Z' },
+ ], 'SEC'));
+
+ assert.equal(normalized.length, 1);
+ assert.equal(normalized[0].id, 'sec-sec-charges-xyz-corp-20260329');
+ });
+});
+
+describe('dedupeAndSortActions', () => {
+ it('deduplicates by canonical link and sorts newest first', () => {
+ const actions = normalize(dedupeAndSortActions([
+ {
+ id: 'older',
+ agency: 'SEC',
+ title: 'Older',
+ link: 'https://example.test/path#frag',
+ publishedAt: '2026-03-28T10:00:00.000Z',
+ },
+ {
+ id: 'newer',
+ agency: 'FDIC',
+ title: 'Newer',
+ link: 'https://example.test/new',
+ publishedAt: '2026-03-30T10:00:00.000Z',
+ },
+ {
+ id: 'duplicate',
+ agency: 'SEC',
+ title: 'Duplicate',
+ link: 'https://example.test/path',
+ publishedAt: '2026-03-29T10:00:00.000Z',
+ },
+ ]));
+
+ assert.deepEqual(actions.map((item) => item.id), ['newer', 'older']);
+ assert.equal(actions[1].link, 'https://example.test/path');
+ });
+});
+
+describe('fetchAllFeeds', () => {
+ const feeds = [
+ { agency: 'SEC', url: 'https://feeds.test/sec', userAgent: 'Custom-SEC-UA' },
+ { agency: 'FDIC', url: 'https://feeds.test/fdic' },
+ ];
+
+ it('returns normalized aggregate when at least one feed succeeds', async () => {
+ const requests = [];
+ const fetchStub = async (url, options) => {
+ requests.push({ url, options });
+ if (url.endsWith('/sec')) {
+ return {
+ ok: true,
+ text: async () => `- SEC Charges Bankhttps://sec.test/aMon, 30 Mar 2026 18:00:00 GMT
`,
+ };
+ }
+ throw new Error('FDIC timeout');
+ };
+
+ const result = normalize(await fetchAllFeeds(fetchStub, feeds));
+ assert.equal(result.length, 1);
+ assert.equal(result[0].agency, 'SEC');
+ assert.equal(requests[0].options.headers['User-Agent'], 'Custom-SEC-UA');
+ assert.equal(requests[1].options.headers['User-Agent'], ctx.CHROME_UA);
+ });
+
+ it('throws when all feeds fail', async () => {
+ await assert.rejects(
+ fetchAllFeeds(async () => { throw new Error('nope'); }, feeds),
+ /All regulatory feeds failed/
+ );
+ });
+});
From f117d7bc8a5f6eaa49a8cd5158fa94fe1a4e44c8 Mon Sep 17 00:00:00 2001
From: lspassos1
Date: Tue, 31 Mar 2026 01:20:29 +0100
Subject: [PATCH 2/3] fix(regulatory): harden feed fetch defaults and action
ids
Use the repository-standard fetch wrapper in the seeder defaults, keep the documented FINRA HTTP exception in place, and include publish time in generated action ids to avoid same-day collisions.
Validated with: node --test tests/regulatory-seed-unit.test.mjs; node scripts/seed-regulatory-actions.mjs | head -n 20
---
scripts/seed-regulatory-actions.mjs | 16 ++++++++++++----
tests/regulatory-seed-unit.test.mjs | 2 +-
2 files changed, 13 insertions(+), 5 deletions(-)
diff --git a/scripts/seed-regulatory-actions.mjs b/scripts/seed-regulatory-actions.mjs
index 569de5366..646e705cf 100644
--- a/scripts/seed-regulatory-actions.mjs
+++ b/scripts/seed-regulatory-actions.mjs
@@ -6,12 +6,15 @@ import { CHROME_UA } from './_seed-utils.mjs';
const FEED_TIMEOUT_MS = 15_000;
const XML_ACCEPT = 'application/atom+xml, application/rss+xml, application/xml, text/xml, */*';
const SEC_USER_AGENT = 'WorldMonitor/2.0 (monitor@worldmonitor.app)';
+const DEFAULT_FETCH = (...args) => globalThis.fetch(...args);
const REGULATORY_FEEDS = [
{ agency: 'SEC', url: 'https://www.sec.gov/news/pressreleases.rss', userAgent: SEC_USER_AGENT },
{ agency: 'CFTC', url: 'https://www.cftc.gov/RSS/RSSENF/rssenf.xml' },
{ agency: 'Federal Reserve', url: 'https://www.federalreserve.gov/feeds/press_all.xml' },
{ agency: 'FDIC', url: 'https://public.govdelivery.com/topics/USFDIC_26/feed.rss' },
+ // FINRA still publishes this RSS endpoint over plain HTTP; HTTPS requests fail
+ // from both Node fetch and curl in validation, so keep the official feed URL.
{ agency: 'FINRA', url: 'http://feeds.finra.org/FINRANotices' },
];
@@ -106,11 +109,16 @@ function yyyymmdd(isoDate) {
return String(isoDate || '').slice(0, 10).replace(/-/g, '');
}
+function hhmmss(isoDate) {
+ return String(isoDate || '').slice(11, 19).replace(/:/g, '');
+}
+
function buildActionId(agency, title, publishedAt) {
const agencySlug = slugifyTitle(agency) || 'agency';
const titleSlug = slugifyTitle(title) || 'untitled';
const datePart = yyyymmdd(publishedAt) || 'undated';
- return `${agencySlug}-${titleSlug}-${datePart}`;
+ const timePart = hhmmss(publishedAt) || '000000';
+ return `${agencySlug}-${titleSlug}-${datePart}-${timePart}`;
}
function parseRssItems(xml, feedUrl) {
@@ -174,7 +182,7 @@ function dedupeAndSortActions(actions) {
return deduped;
}
-async function fetchFeed(feed, fetchImpl = globalThis.fetch) {
+async function fetchFeed(feed, fetchImpl = DEFAULT_FETCH) {
const headers = {
Accept: XML_ACCEPT,
'User-Agent': feed.userAgent || CHROME_UA,
@@ -194,7 +202,7 @@ async function fetchFeed(feed, fetchImpl = globalThis.fetch) {
return normalizeFeedItems(parsed, feed.agency);
}
-async function fetchAllFeeds(fetchImpl = globalThis.fetch, feeds = REGULATORY_FEEDS) {
+async function fetchAllFeeds(fetchImpl = DEFAULT_FETCH, feeds = REGULATORY_FEEDS) {
const results = await Promise.allSettled(feeds.map((feed) => fetchFeed(feed, fetchImpl)));
const actions = [];
let successCount = 0;
@@ -217,7 +225,7 @@ async function fetchAllFeeds(fetchImpl = globalThis.fetch, feeds = REGULATORY_FE
return dedupeAndSortActions(actions);
}
-async function main(fetchImpl = globalThis.fetch) {
+async function main(fetchImpl = DEFAULT_FETCH) {
const actions = await fetchAllFeeds(fetchImpl);
process.stdout.write(`${JSON.stringify(actions, null, 2)}\n`);
return actions;
diff --git a/tests/regulatory-seed-unit.test.mjs b/tests/regulatory-seed-unit.test.mjs
index 852d1ddb3..40a5d1314 100644
--- a/tests/regulatory-seed-unit.test.mjs
+++ b/tests/regulatory-seed-unit.test.mjs
@@ -115,7 +115,7 @@ describe('normalizeFeedItems', () => {
], 'SEC'));
assert.equal(normalized.length, 1);
- assert.equal(normalized[0].id, 'sec-sec-charges-xyz-corp-20260329');
+ assert.equal(normalized[0].id, 'sec-sec-charges-xyz-corp-20260329-140000');
});
});
From 52eb8e900503be6117c3174464b2c29a9f8e4f93 Mon Sep 17 00:00:00 2001
From: lspassos1
Date: Wed, 1 Apr 2026 22:41:48 +0100
Subject: [PATCH 3/3] feat(regulatory): capture feed descriptions in action
records
Extract RSS and Atom descriptions into the normalized action payload so later classifier work can use the same parsed feed output. Also adds @ts-check and documents the FINRA HTTP feed constraint.
---
scripts/seed-regulatory-actions.mjs | 11 ++++++++---
tests/regulatory-seed-unit.test.mjs | 30 ++++++++++++++++++++++++++---
2 files changed, 35 insertions(+), 6 deletions(-)
diff --git a/scripts/seed-regulatory-actions.mjs b/scripts/seed-regulatory-actions.mjs
index 646e705cf..14b955902 100644
--- a/scripts/seed-regulatory-actions.mjs
+++ b/scripts/seed-regulatory-actions.mjs
@@ -1,4 +1,5 @@
#!/usr/bin/env node
+// @ts-check
import { pathToFileURL } from 'node:url';
import { CHROME_UA } from './_seed-utils.mjs';
@@ -14,7 +15,8 @@ const REGULATORY_FEEDS = [
{ agency: 'Federal Reserve', url: 'https://www.federalreserve.gov/feeds/press_all.xml' },
{ agency: 'FDIC', url: 'https://public.govdelivery.com/topics/USFDIC_26/feed.rss' },
// FINRA still publishes this RSS endpoint over plain HTTP; HTTPS requests fail
- // from both Node fetch and curl in validation, so keep the official feed URL.
+ // from both Node fetch and curl in validation, so keep the official feed URL
+ // and periodically recheck whether HTTPS starts working.
{ agency: 'FINRA', url: 'http://feeds.finra.org/FINRANotices' },
];
@@ -128,9 +130,10 @@ function parseRssItems(xml, feedUrl) {
while ((match = itemRegex.exec(xml)) !== null) {
const block = match[1];
const title = getTagValue(block, 'title');
+ const description = getTagValue(block, 'description');
const link = canonicalizeLink(getTagValue(block, 'link'), feedUrl);
const publishedAt = toIsoDate(getTagValue(block, 'pubDate') || getTagValue(block, 'updated'));
- items.push({ title, link, publishedAt });
+ items.push({ title, description, link, publishedAt });
}
return items;
}
@@ -142,11 +145,12 @@ function parseAtomEntries(xml, feedUrl) {
while ((match = entryRegex.exec(xml)) !== null) {
const block = match[1];
const title = getTagValue(block, 'title');
+ const description = getTagValue(block, 'summary') || getTagValue(block, 'content');
const link = canonicalizeLink(extractAtomLink(block), feedUrl);
const publishedAt = toIsoDate(
getTagValue(block, 'updated') || getTagValue(block, 'published') || getTagValue(block, 'pubDate')
);
- entries.push({ title, link, publishedAt });
+ entries.push({ title, description, link, publishedAt });
}
return entries;
}
@@ -163,6 +167,7 @@ function normalizeFeedItems(items, agency) {
id: buildActionId(agency, item.title, item.publishedAt),
agency,
title: item.title,
+ description: item.description || '',
link: item.link,
publishedAt: item.publishedAt,
}));
diff --git a/tests/regulatory-seed-unit.test.mjs b/tests/regulatory-seed-unit.test.mjs
index 40a5d1314..173b63b2a 100644
--- a/tests/regulatory-seed-unit.test.mjs
+++ b/tests/regulatory-seed-unit.test.mjs
@@ -55,11 +55,12 @@ describe('stripHtml', () => {
});
describe('parseRssItems', () => {
- it('extracts RSS items with normalized links and pubDate', () => {
+ it('extracts RSS items with description, normalized links, and pubDate', () => {
const xml = `
-
Issuer]]>
+ fraud & disclosure failures]]>
/news/press-release/2026-10
Mon, 30 Mar 2026 18:00:00 GMT
@@ -67,6 +68,7 @@ describe('parseRssItems', () => {
assert.deepEqual(normalize(parseRssItems(xml, 'https://www.sec.gov/news/pressreleases.rss')), [{
title: 'SEC & Co. Charges Issuer',
+ description: 'Alleges fraud & disclosure failures',
link: 'https://www.sec.gov/news/press-release/2026-10',
publishedAt: '2026-03-30T18:00:00.000Z',
}]);
@@ -74,11 +76,12 @@ describe('parseRssItems', () => {
});
describe('extractAtomLink + parseAtomEntries', () => {
- it('prefers alternate href and normalizes publishedAt from updated', () => {
+ it('prefers alternate href and extracts summary/content with normalized publishedAt', () => {
const xml = `
Fed issues notice
+ policy summary]]>
2026-03-29T12:30:00Z
@@ -92,9 +95,27 @@ describe('extractAtomLink + parseAtomEntries', () => {
assert.deepEqual(normalize(parseAtomEntries(xml, 'https://www.federalreserve.gov/feeds/press_all.xml')), [{
title: 'Fed issues notice',
+ description: 'Detailed policy summary',
link: 'https://www.federalreserve.gov/press/notice-a',
publishedAt: '2026-03-29T12:30:00.000Z',
}]);
+
+ const contentXml = `
+
+
+ FDIC update
+ Formal administrative note
]]>
+
+ 2026-03-28T09:15:00Z
+
+ `;
+
+ assert.deepEqual(normalize(parseAtomEntries(contentXml, 'https://www.fdic.gov/feed')), [{
+ title: 'FDIC update',
+ description: 'Formal administrative note',
+ link: 'https://fdic.example.test/a',
+ publishedAt: '2026-03-28T09:15:00.000Z',
+ }]);
});
});
@@ -111,11 +132,14 @@ describe('normalizeFeedItems', () => {
it('skips incomplete entries and generates deterministic ids', () => {
const normalized = normalize(normalizeFeedItems([
{ title: 'SEC Charges XYZ Corp', link: 'https://example.test/sec', publishedAt: '2026-03-29T14:00:00.000Z' },
+ { title: 'SEC Summary', description: 'extra context', link: 'https://example.test/sec-2', publishedAt: '2026-03-29T14:30:00.000Z' },
{ title: '', link: 'https://example.test/missing', publishedAt: '2026-03-29T14:00:00.000Z' },
], 'SEC'));
- assert.equal(normalized.length, 1);
+ assert.equal(normalized.length, 2);
assert.equal(normalized[0].id, 'sec-sec-charges-xyz-corp-20260329-140000');
+ assert.equal(normalized[0].description, '');
+ assert.equal(normalized[1].description, 'extra context');
});
});