From cc5903d007cdd2d5299837859eed1b3bac2546bc Mon Sep 17 00:00:00 2001
From: lspassos1 <lspassos@icloud.com>
Date: Tue, 31 Mar 2026 00:34:02 +0100
Subject: [PATCH 1/3] feat(regulatory): add regulatory RSS fetch seeder

Add a standalone seeder that fetches and normalizes SEC, CFTC, Federal Reserve, FDIC, and FINRA regulatory feeds without introducing new dependencies.

The script stays import-safe, tolerates partial feed failure, and emits JSON for the fetch/parse-only phase of the pipeline. Unit tests cover RSS/Atom parsing, deduplication, ordering, and degraded-feed behavior.

Refs #2492
Refs #2493
Refs #2494
Refs #2495
---
 scripts/seed-regulatory-actions.mjs | 257 ++++++++++++++++++++++++++++
 tests/regulatory-seed-unit.test.mjs | 185 ++++++++++++++++++++
 2 files changed, 442 insertions(+)
 create mode 100644 scripts/seed-regulatory-actions.mjs
 create mode 100644 tests/regulatory-seed-unit.test.mjs

diff --git a/scripts/seed-regulatory-actions.mjs b/scripts/seed-regulatory-actions.mjs
new file mode 100644
index 000000000..569de5366
--- /dev/null
+++ b/scripts/seed-regulatory-actions.mjs
@@ -0,0 +1,257 @@
+#!/usr/bin/env node
+
+import { pathToFileURL } from 'node:url';
+import { CHROME_UA } from './_seed-utils.mjs';
+
+const FEED_TIMEOUT_MS = 15_000;
+const XML_ACCEPT = 'application/atom+xml, application/rss+xml, application/xml, text/xml, */*';
+const SEC_USER_AGENT = 'WorldMonitor/2.0 (monitor@worldmonitor.app)';
+
+const REGULATORY_FEEDS = [
+  { agency: 'SEC', url: 'https://www.sec.gov/news/pressreleases.rss', userAgent: SEC_USER_AGENT },
+  { agency: 'CFTC', url: 'https://www.cftc.gov/RSS/RSSENF/rssenf.xml' },
+  { agency: 'Federal Reserve', url: 'https://www.federalreserve.gov/feeds/press_all.xml' },
+  { agency: 'FDIC', url: 'https://public.govdelivery.com/topics/USFDIC_26/feed.rss' },
+  { agency: 'FINRA', url: 'http://feeds.finra.org/FINRANotices' },
+];
+
+function decodeEntities(input) {
+  if (!input) return '';
+  const named = input
+    .replace(/&amp;/gi, '&')
+    .replace(/&lt;/gi, '<')
+    .replace(/&gt;/gi, '>')
+    .replace(/&quot;/gi, '"')
+    .replace(/&apos;/gi, "'")
+    .replace(/&nbsp;/gi, ' ');
+
+  return named
+    .replace(/&#(\d+);/g, (_, code) => String.fromCodePoint(Number(code)))
+    .replace(/&#x([0-9a-f]+);/gi, (_, code) => String.fromCodePoint(parseInt(code, 16)));
+}
+
+function stripHtml(input) {
+  return decodeEntities(
+    String(input || '')
+      .replace(/<!\[CDATA\[([\s\S]*?)\]\]>/g, '$1')
+      .replace(/<[^>]+>/g, ' ')
+  ).replace(/\s+/g, ' ').trim();
+}
+
+function getTagValue(block, tagName) {
+  const match = block.match(new RegExp(`<${tagName}[^>]*>([\\s\\S]*?)<\\/${tagName}>`, 'i'));
+  return stripHtml(match?.[1] || '');
+}
+
+function extractAtomLink(block) {
+  const linkTags = [...block.matchAll(/<link\b([^>]*)\/?>/gi)];
+  if (linkTags.length === 0) return '';
+
+  for (const [, attrs] of linkTags) {
+    const href = attrs.match(/\bhref=["']([^"']+)["']/i)?.[1];
+    const rel = attrs.match(/\brel=["']([^"']+)["']/i)?.[1]?.toLowerCase() || '';
+    if (href && (!rel || rel === 'alternate')) return decodeEntities(href.trim());
+  }
+
+  for (const [, attrs] of linkTags) {
+    const href = attrs.match(/\bhref=["']([^"']+)["']/i)?.[1];
+    if (href) return decodeEntities(href.trim());
+  }
+
+  return '';
+}
+
+function resolveFeedLink(link, feedUrl) {
+  if (!link) return '';
+  try {
+    return new URL(link).href;
+  } catch {}
+  try {
+    return new URL(link, feedUrl).href;
+  } catch {
+    return '';
+  }
+}
+
+function canonicalizeLink(link, feedUrl = '') {
+  const resolved = resolveFeedLink(link, feedUrl);
+  if (!resolved) return '';
+  try {
+    const url = new URL(resolved);
+    url.hash = '';
+    return url.href;
+  } catch {
+    return '';
+  }
+}
+
+function toIsoDate(rawDate) {
+  const value = stripHtml(rawDate);
+  if (!value) return '';
+  const ts = Date.parse(value);
+  return Number.isFinite(ts) ? new Date(ts).toISOString() : '';
+}
+
+function slugifyTitle(title) {
+  return stripHtml(title)
+    .normalize('NFKD')
+    .replace(/[\u0300-\u036f]/g, '')
+    .toLowerCase()
+    .replace(/[^a-z0-9]+/g, '-')
+    .replace(/^-+|-+$/g, '')
+    .slice(0, 80);
+}
+
+function yyyymmdd(isoDate) {
+  return String(isoDate || '').slice(0, 10).replace(/-/g, '');
+}
+
+function buildActionId(agency, title, publishedAt) {
+  const agencySlug = slugifyTitle(agency) || 'agency';
+  const titleSlug = slugifyTitle(title) || 'untitled';
+  const datePart = yyyymmdd(publishedAt) || 'undated';
+  return `${agencySlug}-${titleSlug}-${datePart}`;
+}
+
+function parseRssItems(xml, feedUrl) {
+  const items = [];
+  const itemRegex = /<item\b[^>]*>([\s\S]*?)<\/item>/gi;
+  let match;
+  while ((match = itemRegex.exec(xml)) !== null) {
+    const block = match[1];
+    const title = getTagValue(block, 'title');
+    const link = canonicalizeLink(getTagValue(block, 'link'), feedUrl);
+    const publishedAt = toIsoDate(getTagValue(block, 'pubDate') || getTagValue(block, 'updated'));
+    items.push({ title, link, publishedAt });
+  }
+  return items;
+}
+
+function parseAtomEntries(xml, feedUrl) {
+  const entries = [];
+  const entryRegex = /<entry\b[^>]*>([\s\S]*?)<\/entry>/gi;
+  let match;
+  while ((match = entryRegex.exec(xml)) !== null) {
+    const block = match[1];
+    const title = getTagValue(block, 'title');
+    const link = canonicalizeLink(extractAtomLink(block), feedUrl);
+    const publishedAt = toIsoDate(
+      getTagValue(block, 'updated') || getTagValue(block, 'published') || getTagValue(block, 'pubDate')
+    );
+    entries.push({ title, link, publishedAt });
+  }
+  return entries;
+}
+
+function parseFeed(xml, feedUrl) {
+  if (/<entry\b/i.test(xml)) return parseAtomEntries(xml, feedUrl);
+  return parseRssItems(xml, feedUrl);
+}
+
+function normalizeFeedItems(items, agency) {
+  return items
+    .filter((item) => item.title && item.link && item.publishedAt)
+    .map((item) => ({
+      id: buildActionId(agency, item.title, item.publishedAt),
+      agency,
+      title: item.title,
+      link: item.link,
+      publishedAt: item.publishedAt,
+    }));
+}
+
+function dedupeAndSortActions(actions) {
+  const seen = new Set();
+  const deduped = [];
+  for (const action of actions) {
+    const key = canonicalizeLink(action.link);
+    if (!key || seen.has(key)) continue;
+    seen.add(key);
+    deduped.push({ ...action, link: key });
+  }
+
+  deduped.sort((a, b) => Date.parse(b.publishedAt) - Date.parse(a.publishedAt));
+  return deduped;
+}
+
+async function fetchFeed(feed, fetchImpl = globalThis.fetch) {
+  const headers = {
+    Accept: XML_ACCEPT,
+    'User-Agent': feed.userAgent || CHROME_UA,
+  };
+
+  const response = await fetchImpl(feed.url, {
+    headers,
+    signal: AbortSignal.timeout(FEED_TIMEOUT_MS),
+  });
+
+  if (!response.ok) {
+    throw new Error(`${feed.agency}: HTTP ${response.status}`);
+  }
+
+  const xml = await response.text();
+  const parsed = parseFeed(xml, feed.url);
+  return normalizeFeedItems(parsed, feed.agency);
+}
+
+async function fetchAllFeeds(fetchImpl = globalThis.fetch, feeds = REGULATORY_FEEDS) {
+  const results = await Promise.allSettled(feeds.map((feed) => fetchFeed(feed, fetchImpl)));
+  const actions = [];
+  let successCount = 0;
+
+  for (let index = 0; index < results.length; index += 1) {
+    const result = results[index];
+    const feed = feeds[index];
+    if (result.status === 'fulfilled') {
+      successCount += 1;
+      actions.push(...result.value);
+      continue;
+    }
+    console.error(`[regulatory] ${feed.agency}: ${result.reason?.message || result.reason}`);
+  }
+
+  if (successCount === 0) {
+    throw new Error('All regulatory feeds failed');
+  }
+
+  return dedupeAndSortActions(actions);
+}
+
+async function main(fetchImpl = globalThis.fetch) {
+  const actions = await fetchAllFeeds(fetchImpl);
+  process.stdout.write(`${JSON.stringify(actions, null, 2)}\n`);
+  return actions;
+}
+
+const isDirectRun = process.argv[1] && import.meta.url === pathToFileURL(process.argv[1]).href;
+
+if (isDirectRun) {
+  main().catch((err) => {
+    console.error(`FETCH FAILED: ${err.message || err}`);
+    process.exit(1);
+  });
+}
+
+export {
+  CHROME_UA,
+  FEED_TIMEOUT_MS,
+  REGULATORY_FEEDS,
+  SEC_USER_AGENT,
+  buildActionId,
+  canonicalizeLink,
+  decodeEntities,
+  dedupeAndSortActions,
+  extractAtomLink,
+  fetchAllFeeds,
+  fetchFeed,
+  getTagValue,
+  main,
+  normalizeFeedItems,
+  parseAtomEntries,
+  parseFeed,
+  parseRssItems,
+  resolveFeedLink,
+  slugifyTitle,
+  stripHtml,
+  toIsoDate,
+};
diff --git a/tests/regulatory-seed-unit.test.mjs b/tests/regulatory-seed-unit.test.mjs
new file mode 100644
index 000000000..852d1ddb3
--- /dev/null
+++ b/tests/regulatory-seed-unit.test.mjs
@@ -0,0 +1,185 @@
+import { describe, it } from 'node:test';
+import assert from 'node:assert/strict';
+import { readFileSync } from 'node:fs';
+import vm from 'node:vm';
+
+function normalize(value) {
+  return JSON.parse(JSON.stringify(value));
+}
+
+const seedSrc = readFileSync('scripts/seed-regulatory-actions.mjs', 'utf8');
+
+const pureSrc = seedSrc
+  .replace(/^import\s.*$/gm, '')
+  .replace(/const isDirectRun[\s\S]*?}\n\nexport\s*{[\s\S]*?};?\s*$/m, '');
+
+const ctx = vm.createContext({
+  console,
+  Date,
+  Math,
+  Number,
+  Array,
+  Set,
+  String,
+  RegExp,
+  URL,
+  URLSearchParams,
+  AbortSignal,
+  CHROME_UA: 'Mozilla/5.0 (test)',
+});
+
+vm.runInContext(pureSrc, ctx);
+
+const {
+  decodeEntities,
+  stripHtml,
+  extractAtomLink,
+  parseRssItems,
+  parseAtomEntries,
+  parseFeed,
+  normalizeFeedItems,
+  dedupeAndSortActions,
+  fetchAllFeeds,
+} = ctx;
+
+describe('decodeEntities', () => {
+  it('decodes named and numeric entities', () => {
+    assert.equal(decodeEntities('Tom &amp; Jerry &#38; &#x26;'), 'Tom & Jerry & &');
+  });
+});
+
+describe('stripHtml', () => {
+  it('removes tags and CDATA while preserving text', () => {
+    assert.equal(stripHtml('<![CDATA[Hello <strong>world</strong>]]>'), 'Hello world');
+  });
+});
+
+describe('parseRssItems', () => {
+  it('extracts RSS items with normalized links and pubDate', () => {
+    const xml = `<?xml version="1.0"?>
+      <rss><channel>
+        <item>
+          <title><![CDATA[SEC &amp; Co. Charges <b>Issuer</b>]]></title>
+          <link>/news/press-release/2026-10</link>
+          <pubDate>Mon, 30 Mar 2026 18:00:00 GMT</pubDate>
+        </item>
+      </channel></rss>`;
+
+    assert.deepEqual(normalize(parseRssItems(xml, 'https://www.sec.gov/news/pressreleases.rss')), [{
+      title: 'SEC & Co. Charges Issuer',
+      link: 'https://www.sec.gov/news/press-release/2026-10',
+      publishedAt: '2026-03-30T18:00:00.000Z',
+    }]);
+  });
+});
+
+describe('extractAtomLink + parseAtomEntries', () => {
+  it('prefers alternate href and normalizes publishedAt from updated', () => {
+    const xml = `<?xml version="1.0"?>
+      <feed xmlns="http://www.w3.org/2005/Atom">
+        <entry>
+          <title>Fed issues notice</title>
+          <link rel="self" href="https://example.test/self" />
+          <link rel="alternate" href="/press/notice-a" />
+          <updated>2026-03-29T12:30:00Z</updated>
+        </entry>
+      </feed>`;
+
+    assert.equal(
+      extractAtomLink('<entry><link rel="self" href="https://example.test/self" /><link rel="alternate" href="/press/notice-a" /></entry>'),
+      '/press/notice-a'
+    );
+
+    assert.deepEqual(normalize(parseAtomEntries(xml, 'https://www.federalreserve.gov/feeds/press_all.xml')), [{
+      title: 'Fed issues notice',
+      link: 'https://www.federalreserve.gov/press/notice-a',
+      publishedAt: '2026-03-29T12:30:00.000Z',
+    }]);
+  });
+});
+
+describe('parseFeed', () => {
+  it('detects Atom feeds automatically', () => {
+    const atom = '<feed><entry><title>A</title><link href="https://example.test/a" /><updated>2026-03-28T00:00:00Z</updated></entry></feed>';
+    const parsed = normalize(parseFeed(atom, 'https://example.test/feed'));
+    assert.equal(parsed.length, 1);
+    assert.equal(parsed[0].link, 'https://example.test/a');
+  });
+});
+
+describe('normalizeFeedItems', () => {
+  it('skips incomplete entries and generates deterministic ids', () => {
+    const normalized = normalize(normalizeFeedItems([
+      { title: 'SEC Charges XYZ Corp', link: 'https://example.test/sec', publishedAt: '2026-03-29T14:00:00.000Z' },
+      { title: '', link: 'https://example.test/missing', publishedAt: '2026-03-29T14:00:00.000Z' },
+    ], 'SEC'));
+
+    assert.equal(normalized.length, 1);
+    assert.equal(normalized[0].id, 'sec-sec-charges-xyz-corp-20260329');
+  });
+});
+
+describe('dedupeAndSortActions', () => {
+  it('deduplicates by canonical link and sorts newest first', () => {
+    const actions = normalize(dedupeAndSortActions([
+      {
+        id: 'older',
+        agency: 'SEC',
+        title: 'Older',
+        link: 'https://example.test/path#frag',
+        publishedAt: '2026-03-28T10:00:00.000Z',
+      },
+      {
+        id: 'newer',
+        agency: 'FDIC',
+        title: 'Newer',
+        link: 'https://example.test/new',
+        publishedAt: '2026-03-30T10:00:00.000Z',
+      },
+      {
+        id: 'duplicate',
+        agency: 'SEC',
+        title: 'Duplicate',
+        link: 'https://example.test/path',
+        publishedAt: '2026-03-29T10:00:00.000Z',
+      },
+    ]));
+
+    assert.deepEqual(actions.map((item) => item.id), ['newer', 'older']);
+    assert.equal(actions[1].link, 'https://example.test/path');
+  });
+});
+
+describe('fetchAllFeeds', () => {
+  const feeds = [
+    { agency: 'SEC', url: 'https://feeds.test/sec', userAgent: 'Custom-SEC-UA' },
+    { agency: 'FDIC', url: 'https://feeds.test/fdic' },
+  ];
+
+  it('returns normalized aggregate when at least one feed succeeds', async () => {
+    const requests = [];
+    const fetchStub = async (url, options) => {
+      requests.push({ url, options });
+      if (url.endsWith('/sec')) {
+        return {
+          ok: true,
+          text: async () => `<rss><channel><item><title>SEC Charges Bank</title><link>https://sec.test/a</link><pubDate>Mon, 30 Mar 2026 18:00:00 GMT</pubDate></item></channel></rss>`,
+        };
+      }
+      throw new Error('FDIC timeout');
+    };
+
+    const result = normalize(await fetchAllFeeds(fetchStub, feeds));
+    assert.equal(result.length, 1);
+    assert.equal(result[0].agency, 'SEC');
+    assert.equal(requests[0].options.headers['User-Agent'], 'Custom-SEC-UA');
+    assert.equal(requests[1].options.headers['User-Agent'], ctx.CHROME_UA);
+  });
+
+  it('throws when all feeds fail', async () => {
+    await assert.rejects(
+      fetchAllFeeds(async () => { throw new Error('nope'); }, feeds),
+      /All regulatory feeds failed/
+    );
+  });
+});

From f117d7bc8a5f6eaa49a8cd5158fa94fe1a4e44c8 Mon Sep 17 00:00:00 2001
From: lspassos1 <lspassos@icloud.com>
Date: Tue, 31 Mar 2026 01:20:29 +0100
Subject: [PATCH 2/3] fix(regulatory): harden feed fetch defaults and action
 ids

Use the repository-standard fetch wrapper in the seeder defaults, keep the documented FINRA HTTP exception in place, and include publish time in generated action ids to avoid same-day collisions.

Validated with: node --test tests/regulatory-seed-unit.test.mjs; node scripts/seed-regulatory-actions.mjs | head -n 20
---
 scripts/seed-regulatory-actions.mjs | 16 ++++++++++++----
 tests/regulatory-seed-unit.test.mjs |  2 +-
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/scripts/seed-regulatory-actions.mjs b/scripts/seed-regulatory-actions.mjs
index 569de5366..646e705cf 100644
--- a/scripts/seed-regulatory-actions.mjs
+++ b/scripts/seed-regulatory-actions.mjs
@@ -6,12 +6,15 @@ import { CHROME_UA } from './_seed-utils.mjs';
 const FEED_TIMEOUT_MS = 15_000;
 const XML_ACCEPT = 'application/atom+xml, application/rss+xml, application/xml, text/xml, */*';
 const SEC_USER_AGENT = 'WorldMonitor/2.0 (monitor@worldmonitor.app)';
+const DEFAULT_FETCH = (...args) => globalThis.fetch(...args);
 
 const REGULATORY_FEEDS = [
   { agency: 'SEC', url: 'https://www.sec.gov/news/pressreleases.rss', userAgent: SEC_USER_AGENT },
   { agency: 'CFTC', url: 'https://www.cftc.gov/RSS/RSSENF/rssenf.xml' },
   { agency: 'Federal Reserve', url: 'https://www.federalreserve.gov/feeds/press_all.xml' },
   { agency: 'FDIC', url: 'https://public.govdelivery.com/topics/USFDIC_26/feed.rss' },
+  // FINRA still publishes this RSS endpoint over plain HTTP; HTTPS requests fail
+  // from both Node fetch and curl in validation, so keep the official feed URL.
   { agency: 'FINRA', url: 'http://feeds.finra.org/FINRANotices' },
 ];
 
@@ -106,11 +109,16 @@ function yyyymmdd(isoDate) {
   return String(isoDate || '').slice(0, 10).replace(/-/g, '');
 }
 
+function hhmmss(isoDate) {
+  return String(isoDate || '').slice(11, 19).replace(/:/g, '');
+}
+
 function buildActionId(agency, title, publishedAt) {
   const agencySlug = slugifyTitle(agency) || 'agency';
   const titleSlug = slugifyTitle(title) || 'untitled';
   const datePart = yyyymmdd(publishedAt) || 'undated';
-  return `${agencySlug}-${titleSlug}-${datePart}`;
+  const timePart = hhmmss(publishedAt) || '000000';
+  return `${agencySlug}-${titleSlug}-${datePart}-${timePart}`;
 }
 
 function parseRssItems(xml, feedUrl) {
@@ -174,7 +182,7 @@ function dedupeAndSortActions(actions) {
   return deduped;
 }
 
-async function fetchFeed(feed, fetchImpl = globalThis.fetch) {
+async function fetchFeed(feed, fetchImpl = DEFAULT_FETCH) {
   const headers = {
     Accept: XML_ACCEPT,
     'User-Agent': feed.userAgent || CHROME_UA,
@@ -194,7 +202,7 @@ async function fetchFeed(feed, fetchImpl = globalThis.fetch) {
   return normalizeFeedItems(parsed, feed.agency);
 }
 
-async function fetchAllFeeds(fetchImpl = globalThis.fetch, feeds = REGULATORY_FEEDS) {
+async function fetchAllFeeds(fetchImpl = DEFAULT_FETCH, feeds = REGULATORY_FEEDS) {
   const results = await Promise.allSettled(feeds.map((feed) => fetchFeed(feed, fetchImpl)));
   const actions = [];
   let successCount = 0;
@@ -217,7 +225,7 @@ async function fetchAllFeeds(fetchImpl = globalThis.fetch, feeds = REGULATORY_FE
   return dedupeAndSortActions(actions);
 }
 
-async function main(fetchImpl = globalThis.fetch) {
+async function main(fetchImpl = DEFAULT_FETCH) {
   const actions = await fetchAllFeeds(fetchImpl);
   process.stdout.write(`${JSON.stringify(actions, null, 2)}\n`);
   return actions;
diff --git a/tests/regulatory-seed-unit.test.mjs b/tests/regulatory-seed-unit.test.mjs
index 852d1ddb3..40a5d1314 100644
--- a/tests/regulatory-seed-unit.test.mjs
+++ b/tests/regulatory-seed-unit.test.mjs
@@ -115,7 +115,7 @@ describe('normalizeFeedItems', () => {
     ], 'SEC'));
 
     assert.equal(normalized.length, 1);
-    assert.equal(normalized[0].id, 'sec-sec-charges-xyz-corp-20260329');
+    assert.equal(normalized[0].id, 'sec-sec-charges-xyz-corp-20260329-140000');
   });
 });
 

From 52eb8e900503be6117c3174464b2c29a9f8e4f93 Mon Sep 17 00:00:00 2001
From: lspassos1 <lspassos@icloud.com>
Date: Wed, 1 Apr 2026 22:41:48 +0100
Subject: [PATCH 3/3] feat(regulatory): capture feed descriptions in action
 records

Extract RSS and Atom descriptions into the normalized action payload so later classifier work can use the same parsed feed output. Also adds @ts-check and documents the FINRA HTTP feed constraint.
---
 scripts/seed-regulatory-actions.mjs | 11 ++++++++---
 tests/regulatory-seed-unit.test.mjs | 30 ++++++++++++++++++++++++++---
 2 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/scripts/seed-regulatory-actions.mjs b/scripts/seed-regulatory-actions.mjs
index 646e705cf..14b955902 100644
--- a/scripts/seed-regulatory-actions.mjs
+++ b/scripts/seed-regulatory-actions.mjs
@@ -1,4 +1,5 @@
 #!/usr/bin/env node
+// @ts-check
 
 import { pathToFileURL } from 'node:url';
 import { CHROME_UA } from './_seed-utils.mjs';
@@ -14,7 +15,8 @@ const REGULATORY_FEEDS = [
   { agency: 'Federal Reserve', url: 'https://www.federalreserve.gov/feeds/press_all.xml' },
   { agency: 'FDIC', url: 'https://public.govdelivery.com/topics/USFDIC_26/feed.rss' },
   // FINRA still publishes this RSS endpoint over plain HTTP; HTTPS requests fail
-  // from both Node fetch and curl in validation, so keep the official feed URL.
+  // from both Node fetch and curl in validation, so keep the official feed URL
+  // and periodically recheck whether HTTPS starts working.
   { agency: 'FINRA', url: 'http://feeds.finra.org/FINRANotices' },
 ];
 
@@ -128,9 +130,10 @@ function parseRssItems(xml, feedUrl) {
   while ((match = itemRegex.exec(xml)) !== null) {
     const block = match[1];
     const title = getTagValue(block, 'title');
+    const description = getTagValue(block, 'description');
     const link = canonicalizeLink(getTagValue(block, 'link'), feedUrl);
     const publishedAt = toIsoDate(getTagValue(block, 'pubDate') || getTagValue(block, 'updated'));
-    items.push({ title, link, publishedAt });
+    items.push({ title, description, link, publishedAt });
   }
   return items;
 }
@@ -142,11 +145,12 @@ function parseAtomEntries(xml, feedUrl) {
   while ((match = entryRegex.exec(xml)) !== null) {
     const block = match[1];
     const title = getTagValue(block, 'title');
+    const description = getTagValue(block, 'summary') || getTagValue(block, 'content');
     const link = canonicalizeLink(extractAtomLink(block), feedUrl);
     const publishedAt = toIsoDate(
       getTagValue(block, 'updated') || getTagValue(block, 'published') || getTagValue(block, 'pubDate')
     );
-    entries.push({ title, link, publishedAt });
+    entries.push({ title, description, link, publishedAt });
   }
   return entries;
 }
@@ -163,6 +167,7 @@ function normalizeFeedItems(items, agency) {
       id: buildActionId(agency, item.title, item.publishedAt),
       agency,
       title: item.title,
+      description: item.description || '',
       link: item.link,
       publishedAt: item.publishedAt,
     }));
diff --git a/tests/regulatory-seed-unit.test.mjs b/tests/regulatory-seed-unit.test.mjs
index 40a5d1314..173b63b2a 100644
--- a/tests/regulatory-seed-unit.test.mjs
+++ b/tests/regulatory-seed-unit.test.mjs
@@ -55,11 +55,12 @@ describe('stripHtml', () => {
 });
 
 describe('parseRssItems', () => {
-  it('extracts RSS items with normalized links and pubDate', () => {
+  it('extracts RSS items with description, normalized links, and pubDate', () => {
     const xml = `<?xml version="1.0"?>
       <rss><channel>
         <item>
           <title><![CDATA[SEC &amp; Co. Charges <b>Issuer</b>]]></title>
+          <description><![CDATA[Alleges <strong>fraud</strong> &amp; disclosure failures]]></description>
           <link>/news/press-release/2026-10</link>
           <pubDate>Mon, 30 Mar 2026 18:00:00 GMT</pubDate>
         </item>
@@ -67,6 +68,7 @@ describe('parseRssItems', () => {
 
     assert.deepEqual(normalize(parseRssItems(xml, 'https://www.sec.gov/news/pressreleases.rss')), [{
       title: 'SEC & Co. Charges Issuer',
+      description: 'Alleges fraud & disclosure failures',
       link: 'https://www.sec.gov/news/press-release/2026-10',
       publishedAt: '2026-03-30T18:00:00.000Z',
     }]);
@@ -74,11 +76,12 @@ describe('parseRssItems', () => {
 });
 
 describe('extractAtomLink + parseAtomEntries', () => {
-  it('prefers alternate href and normalizes publishedAt from updated', () => {
+  it('prefers alternate href and extracts summary/content with normalized publishedAt', () => {
     const xml = `<?xml version="1.0"?>
       <feed xmlns="http://www.w3.org/2005/Atom">
         <entry>
           <title>Fed issues notice</title>
+          <summary><![CDATA[Detailed <b>policy</b> summary]]></summary>
           <link rel="self" href="https://example.test/self" />
           <link rel="alternate" href="/press/notice-a" />
           <updated>2026-03-29T12:30:00Z</updated>
@@ -92,9 +95,27 @@ describe('extractAtomLink + parseAtomEntries', () => {
 
     assert.deepEqual(normalize(parseAtomEntries(xml, 'https://www.federalreserve.gov/feeds/press_all.xml')), [{
       title: 'Fed issues notice',
+      description: 'Detailed policy summary',
       link: 'https://www.federalreserve.gov/press/notice-a',
       publishedAt: '2026-03-29T12:30:00.000Z',
     }]);
+
+    const contentXml = `<?xml version="1.0"?>
+      <feed xmlns="http://www.w3.org/2005/Atom">
+        <entry>
+          <title>FDIC update</title>
+          <content type="html"><![CDATA[<p>Formal <strong>administrative</strong> note</p>]]></content>
+          <link href="https://fdic.example.test/a" />
+          <published>2026-03-28T09:15:00Z</published>
+        </entry>
+      </feed>`;
+
+    assert.deepEqual(normalize(parseAtomEntries(contentXml, 'https://www.fdic.gov/feed')), [{
+      title: 'FDIC update',
+      description: 'Formal administrative note',
+      link: 'https://fdic.example.test/a',
+      publishedAt: '2026-03-28T09:15:00.000Z',
+    }]);
   });
 });
 
@@ -111,11 +132,14 @@ describe('normalizeFeedItems', () => {
   it('skips incomplete entries and generates deterministic ids', () => {
     const normalized = normalize(normalizeFeedItems([
       { title: 'SEC Charges XYZ Corp', link: 'https://example.test/sec', publishedAt: '2026-03-29T14:00:00.000Z' },
+      { title: 'SEC Summary', description: 'extra context', link: 'https://example.test/sec-2', publishedAt: '2026-03-29T14:30:00.000Z' },
       { title: '', link: 'https://example.test/missing', publishedAt: '2026-03-29T14:00:00.000Z' },
     ], 'SEC'));
 
-    assert.equal(normalized.length, 1);
+    assert.equal(normalized.length, 2);
     assert.equal(normalized[0].id, 'sec-sec-charges-xyz-corp-20260329-140000');
+    assert.equal(normalized[0].description, '');
+    assert.equal(normalized[1].description, 'extra context');
   });
 });