diff --git a/crawler/collectors/bis.py b/crawler/collectors/bis.py
index 3acb6b6..c06fd16 100644
--- a/crawler/collectors/bis.py
+++ b/crawler/collectors/bis.py
@@ -31,7 +31,7 @@
 
 
 BASE_URL = "https://www.bis.gov"
-START_URL = "https://www.bis.gov/news-updates"
+START_URL = f"{BASE_URL}/news-updates"
 
 HEADLESS = True
 WAIT_SEC = 15
@@ -141,8 +141,6 @@ def extract_card_links_from_page(driver: webdriver.Chrome) -> List[Dict]:
         title_tag = a_tag.find("h3")
         if title_tag:
             title = clean_text(title_tag.get_text(" ", strip=True))
-        else:
-            title = clean_text(a_tag.get_text(" ", strip=True))
 
         if not title:
             continue
diff --git a/crawler/collectors/fed.py b/crawler/collectors/fed.py
index 26d0d99..831e710 100644
--- a/crawler/collectors/fed.py
+++ b/crawler/collectors/fed.py
@@ -17,41 +17,33 @@
 from crawler.support_legacy.data_paths import collected_csv_path
 
 BASE_URL = "https://www.federalreserve.gov"
-CALENDAR_URL = "https://www.federalreserve.gov/monetarypolicy/fomccalendars.htm"
+CALENDAR_URL = f"{BASE_URL}/monetarypolicy/fomccalendars.htm"
 
 HEADERS = {
     "User-Agent": "Mozilla/5.0"
 }
 
-# FOMC 캘린더에서 회의 월로 사용되는 값들
-MONTHS = {
-    "January", "February", "March", "April", "May", "June",
-    "July", "August", "September", "October", "November", "December",
-    "Apr/May"
-}
-
 
 def crawl_implementation_note(url: str) -> dict:
     """
-    FOMC Implementation Note 페이지에서
-    날짜, 제목, 본문 텍스트를 추출한다.
+    Implementation Note 상세 페이지에서
+    공개일, 제목, 본문 텍스트를 추출한다.
     """
     response = requests.get(url, headers=HEADERS, timeout=20)
     response.raise_for_status()
 
     soup = BeautifulSoup(response.text, "html.parser")
 
-    # 페이지 상단에 표시된 공식 게시 날짜
+    # 페이지 상단에 노출된 공식 게시 날짜를 읽는다.
     date_tag = soup.find("p", class_="article__time")
     release_date = date_tag.get_text(" ", strip=True) if date_tag else ""
 
-    # 페이지 제목
+    # 제목은 보통 h3에 들어 있다.
     title_tag = soup.find("h3")
     title = title_tag.get_text(" ", strip=True) if title_tag else ""
 
-    # Implementation Note 본문이 들어 있는 div 탐색
-    # col-xs-12 클래스를 가진 div들 중에서 heading 영역은 제외하고
-    # 실제 본문이 들어 있는 첫 번째 div를 article로 사용
+    # 같은 폭의 div가 여러 개 있을 수 있으므로,
+    # heading 블록을 제외한 첫 번째 본문 컨테이너를 사용한다.
     divs = soup.find_all("div", class_="col-xs-12")
 
     article = None
@@ -64,19 +56,18 @@ def crawl_implementation_note(url: str) -> dict:
     contents = []
 
     if article:
-        # 본문 안에서 문단(p), 목록(li), 인용(blockquote)만 수집
+        # 문단, 목록, 인용문만 모아 본문으로 정리한다.
         for tag in article.find_all(["p", "li", "blockquote"]):
             text = tag.get_text(" ", strip=True)
             if not text:
                 continue
 
-            # 목록 항목은 나중에 구분하기 쉽도록 앞에 '-' 추가
+            # 목록 항목은 본문 안에서도 구분되도록 접두사를 붙인다.
             if tag.name == "li":
                 text = f"- {text}"
 
             contents.append(text)
 
-    # 줄바꿈 기준으로 하나의 긴 텍스트로 합침
     body_text = "\n".join(contents)
 
     return {
@@ -89,65 +80,62 @@ def crawl_implementation_note(url: str) -> dict:
 
 def crawl_fomc_statement(url: str) -> dict:
     """
-    FOMC Statement 페이지에서
-    날짜, 제목, 배포시각, 본문 텍스트를 추출한다.
+    FOMC statement 상세 페이지에서
+    공개일, 배포 시각, 제목, 본문 텍스트를 추출한다.
     """
     response = requests.get(url, headers=HEADERS, timeout=20)
     response.raise_for_status()
 
     soup = BeautifulSoup(response.text, "html.parser")
 
-    # 페이지 상단 날짜
+    # 페이지 상단 게시 날짜
     date_tag = soup.find("p", class_="article__time")
     release_date = date_tag.get_text(" ", strip=True) if date_tag else ""
 
-    # Statement 제목
+    # statement 제목
     title_tag = soup.find("h3")
     title = title_tag.get_text(" ", strip=True) if title_tag else ""
 
-    # 제목 바로 아래 p 태그에 배포 시각이 있는 경우가 많음
-    # 예: "For release at 2:00 p.m. EDT"
+    # 제목 바로 아래 p 태그에 배포 시각이 붙는 경우가 많다.
     release_time = ""
     if title_tag:
         next_p = title_tag.find_next("p")
         if next_p:
-            release_time = next_p.get_text(" ", strip=True)
+            release_time_text = next_p.get_text(" ", strip=True)
+            release_time = release_time_text.split(" Share", 1)[0].strip()
 
-    paragraphs = []
-
-    if title_tag:
-        # 제목 이후에 나오는 태그들을 순서대로 탐색
-        for tag in title_tag.find_all_next():
+    # statement 본문은 보통 col-sm-8 폭의 본문 영역에 들어 있다.
+    divs = soup.find_all("div", class_="col-sm-8")
 
-            # 다음 큰 섹션이 시작되면 본문 수집 종료
-            if tag.name in ["hr", "h3", "h4"]:
-                break
+    article = None
+    for div in divs:
+        classes = div.get("class", [])
+        if "heading" not in classes:
+            article = div
+            break
 
-            # Statement 본문은 주로 p 태그에 들어 있으므로 p만 수집
-            if tag.name != "p":
-                continue
+    contents = []
+    stop_texts = ("for media inquiries", "implementation note issued")
 
+    if article:
+        # 본문 뒤쪽의 연락처/관련 안내 구간이 나오기 전까지만 수집한다.
+        for tag in article.find_all(["p", "li", "blockquote"]):
             text = tag.get_text(" ", strip=True)
-
-            # 빈 문단은 제외
             if not text:
                 continue
 
-            # 배포 시각 문장은 본문이 아니므로 제외
+            if tag.name == "li":
+                text = f"- {text}"
+
             if text == release_time:
                 continue
 
-            lowered = text.lower()
-
-            # 하단 연락처나 관련 링크 영역이 시작되면 종료
-            if lowered.startswith("for media inquiries"):
-                break
-            if lowered.startswith("implementation note issued"):
+            if text.lower().startswith(stop_texts):
                 break
 
-            paragraphs.append(text)
+            contents.append(text)
 
-    body_text = "\n".join(paragraphs)
+    body_text = "\n".join(contents)
 
     return {
         "release_date": release_date,
@@ -159,37 +147,46 @@ def crawl_fomc_statement(url: str) -> dict:
 
 def crawl_minutes(url: str) -> dict:
     """
-    FOMC Minutes 페이지에서
+    FOMC minutes 상세 페이지에서
     제목과 본문 텍스트를 추출한다.
-    release_date는 캘린더 페이지의 '(Released ...)' 문구에서 별도로 추출한다.
+
+    release_date는 상세 페이지가 아니라 캘린더 페이지의
+    '(Released ...)' 문구에서 따로 채운다.
     """
     response = requests.get(url, headers=HEADERS, timeout=20)
     response.raise_for_status()
 
     soup = BeautifulSoup(response.text, "html.parser")
 
-    # Minutes 전체 본문 영역
+    # minutes 전체 본문이 들어 있는 컨테이너
     article = soup.find("div", id="article")
 
-    # Minutes 제목
-    title_tag = soup.find("h3")
+    # 제목은 본문 컨테이너 안의 h3에서 읽는다.
+    title_tag = article.find("h3") if article else None
     title = title_tag.get_text(" ", strip=True) if title_tag else ""
 
     contents = []
 
     if article:
-        # 본문 안의 문단, 목록, 인용문 수집
-        for tag in article.find_all(["p", "li", "blockquote"]):
+        # minutes는 뒤쪽에 Notation Vote, Attendance 같은 부록 구간이 붙기 때문에
+        # 핵심 본문으로 볼 수 있는 문단과 목록까지만 수집한다.
+        for tag in article.find_all(["p", "li"]):
             text = tag.get_text(" ", strip=True)
 
             if not text:
                 continue
 
-            # 숫자만 있는 경우는 각주 번호일 가능성이 높으므로 제거
+            # 숫자만 있는 줄은 각주 번호일 가능성이 높아서 제외한다.
             if text.isdigit():
                 continue
 
-            # 목록 항목 구분용 기호 추가
+            # 본문 이후의 부록/참석자 구간이 시작되면 수집을 멈춘다.
+            lowered = text.lower()
+            if lowered.startswith("notation vote"):
+                break
+            if lowered.startswith("attendance"):
+                break
+
             if tag.name == "li":
                 text = f"- {text}"
 
@@ -206,115 +203,100 @@ def crawl_minutes(url: str) -> dict:
 
 
 def main() -> None:
-    # FOMC 캘린더 페이지 요청
+    # FOMC 연간 캘린더 페이지를 가져온다.
     response = requests.get(CALENDAR_URL, headers=HEADERS, timeout=20)
     response.raise_for_status()
 
-    # 캘린더 페이지 HTML 파싱
+    # 캘린더 HTML을 파싱한다.
     soup = BeautifulSoup(response.text, "html.parser")
 
     results = []
 
-    # 연도별 FOMC 섹션 탐색
-    # 예: "2025 FOMC Meetings", "2024 FOMC Meetings"
-    year_sections = soup.find_all("div", attrs={"class": "panel-heading"})
+    # 연도별 패널을 순회하며 "2025 FOMC Meetings" 같은 섹션만 고른다.
+    sections = soup.find_all("div", class_="panel-default")
 
-    for section in year_sections:
+    for section in sections:
         heading = section.find("h4")
         if heading is None:
             continue
 
-        # 연도 헤더에서 실제 FOMC 회의 연도인지 확인
         heading_text = heading.get_text(" ", strip=True)
         match = re.match(r"(\d{4}) FOMC Meetings", heading_text)
         if not match:
             continue
 
-        # 현재 연도 섹션 아래의 형제 노드들을 순서대로 확인
-        node = section.find_next_sibling()
-
-        while node:
-
-            # strong 태그가 있는 노드만 회의 정보 블록으로 간주
-            if node.find("strong"):
-
-                # 현재 노드의 첫 부분에서 월 이름 추출
-                # 현재 사이트 구조상 node.contents[1]에 월 텍스트가 들어 있음
-                text = node.contents[1].get_text(" ", strip=True)
-
-                # 회의가 SEP인지 여부(월 매칭이 실패해도 기본값은 안전하게 유지)
-                is_sep = False
-                if text in MONTHS:
-
-                    # 현재 회의 날짜 범위 추출
-                    # 예: "27-28", "17-18*"
-                    date_node = node.contents[3]
-                    meeting_period = date_node.get_text(" ", strip=True)
-
-                    # 별표(*)가 붙은 회의는 SEP 회의로 처리
-                    is_sep = "*" in meeting_period
-
-                # 현재 회의 블록 안의 모든 링크 순회
-                for link in node.find_all("a", href=True):
-
-                    label = link.get_text(" ", strip=True).lower()
-                    url = urljoin(BASE_URL, link["href"])
-
-                    doc_type = None
-                    article = None
-
-                    # Implementation Note 링크인 경우
-                    if "implementation note" in label:
-                        doc_type = "implementation_note"
-                        article = crawl_implementation_note(url)
-
-                    # HTML 링크인 경우 부모 strong 텍스트를 보고
-                    # Statement인지 Minutes인지 구분
-                    elif label == "html":
-
-                        parent_title = link.parent.strong.get_text(" ", strip=True).lower()
-
-                        if "statement:" in parent_title:
-                            doc_type = "statement"
-                            article = crawl_fomc_statement(url)
-
-                        elif "minutes:" in parent_title:
-                            doc_type = "minutes"
-                            article = crawl_minutes(url)
-
-                            # Minutes release 날짜는 개별 minutes 페이지가 아니라
-                            # 캘린더 페이지의 '(Released ...)' 문구에서 추출
-                            release_match = re.search(
-                                r"Released ([A-Za-z]+ \d{1,2}, \d{4})",
-                                node.get_text(" ", strip=True)
-                            )
-
-                            release_date = release_match.group(1) if release_match else None
-                            article["release_date"] = release_date
-
-                    # 정상적으로 문서 정보를 추출한 경우 결과 저장
-                    if doc_type and article:
-                        results.append({
-                            "release_date": article["release_date"],
-                            "release_time": article["release_time"],
-                            "is_sep": is_sep,
-                            "category": "FOMC",
-                            "doc_type": doc_type,
-                            "url": url,
-                            "title": article["title"],
-                            "body": article["body"]
-                        })
-
-            # 다음 회의 블록으로 이동
-            time.sleep(0.5)
-            node = node.find_next_sibling()
-
-    # 결과를 데이터프레임으로 변환하고 중복 제거
+        # 각 연도 섹션 안에서 개별 회의 블록을 찾는다.
+        meetings = section.find_all("div", class_="fomc-meeting")
+
+        for meeting in meetings:
+            date_tag = meeting.find("strong", class_="fomc-meeting__date")
+            date = date_tag.get_text(" ", strip=True) if date_tag else ""
+
+            # 날짜 문자열에 별표가 붙은 경우 SEP 회의로 간주한다.
+            is_sep = "*" in date
+
+            # 회의 블록 안의 문서 링크를 순회한다.
+            for link in meeting.find_all("a", href=True):
+                label = link.get_text(" ", strip=True).lower()
+                url = urljoin(BASE_URL, link["href"])
+
+                doc_type = None
+                article = None
+
+                # Implementation Note 링크는 전용 파서로 처리한다.
+                if "implementation note" in label:
+                    doc_type = "implementation_note"
+                    article = crawl_implementation_note(url)
+
+                # HTML 링크는 부모 strong 텍스트를 보고
+                # statement인지 minutes인지 구분한다.
+                elif label == "html":
+                    parent_strong = link.parent.find("strong") if link.parent else None
+                    if parent_strong is None:
+                        continue
+
+                    parent_title = parent_strong.get_text(" ", strip=True).lower()
+
+                    if "statement:" in parent_title:
+                        doc_type = "statement"
+                        article = crawl_fomc_statement(url)
+
+                    elif "minutes:" in parent_title:
+                        doc_type = "minutes"
+                        article = crawl_minutes(url)
+
+                        # minutes 공개일은 상세 페이지보다 캘린더 문구가 더 명확해서
+                        # 현재 링크가 속한 블록의 Released 문구에서 추출한다.
+                        release_match = re.search(
+                            r"Released ([A-Za-z]+ \d{1,2}, \d{4})",
+                            link.parent.get_text(" ", strip=True)
+                        )
+
+                        release_date = release_match.group(1) if release_match else None
+                        article["release_date"] = release_date
+
+                # 문서 파싱이 성공한 경우 표준 레코드로 저장한다.
+                if doc_type and article:
+                    results.append({
+                        "release_date": article["release_date"],
+                        "release_time": article["release_time"],
+                        "is_sep": is_sep,
+                        "category": "FOMC",
+                        "doc_type": doc_type,
+                        "url": url,
+                        "title": article["title"],
+                        "body": article["body"]
+                    })
+
+                # 연속 요청 부담을 줄이기 위해 짧게 쉰다.
+                time.sleep(0.5)
+
+    # 결과를 DataFrame으로 정리하고 중복을 제거한다.
     df = pd.DataFrame(results).drop_duplicates()
 
     print(df.head(20))
 
-    # CSV 파일로 저장
+    # 수집 결과를 CSV로 저장한다.
     df.to_csv(collected_csv_path("fed_fomc_links.csv"), index=False, encoding="utf-8-sig")
 
 
diff --git a/crawler/collectors/ucsb.py b/crawler/collectors/ucsb.py
new file mode 100644
index 0000000..f2066a5
--- /dev/null
+++ b/crawler/collectors/ucsb.py
@@ -0,0 +1,480 @@
+from __future__ import annotations
+
+import argparse
+import json
+import re
+import sys
+import time
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Mapping, Sequence
+from urllib.parse import urljoin
+
+import pandas as pd
+import requests
+from bs4 import BeautifulSoup
+
+PROJECT_ROOT = Path(__file__).resolve().parents[2]
+PROJECT_ROOT_STR = str(PROJECT_ROOT)
+
+if PROJECT_ROOT_STR not in sys.path:
+    sys.path.insert(0, PROJECT_ROOT_STR)
+
+from crawler.support_legacy.data_paths import collected_csv_path
+
+BASE_URL = "https://www.presidency.ucsb.edu"
+DEFAULT_OUTPUT_CSV = collected_csv_path("ucsb_presidential_documents.csv")
+DEFAULT_KEYWORD_CONFIG_PATH = Path(__file__).with_name("ucsb_keywords.json")
+ITEMS_PER_PAGE = 20
+
+HEADERS = {
+    "User-Agent": (
+        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+        "AppleWebKit/537.36 (KHTML, like Gecko) "
+        "Chrome/123.0.0.0 Safari/537.36"
+    )
+}
+
+# UCSB에서 현재 수집 대상으로 삼는 문서 카테고리의 목록 페이지 URL
+DOC_TYPE_URLS = {
+    "Executive Orders": (
+        f"{BASE_URL}/documents/app-categories/"
+        "written-presidential-orders/presidential/executive-orders"
+    ),
+    "Press Conferences": f"{BASE_URL}/documents/app-categories/presidential/news-conferences",
+    "Fact Sheets": f"{BASE_URL}/documents/app-attributes/fact-sheets",
+}
+
+# 본문 뒤쪽의 메타데이터/탐색 영역이 시작되는 제목들.
+# 이런 구간이 나오면 기사 본문 수집을 멈춘다.
+STOP_SECTION_TITLES = {
+    "filed under",
+    "categories",
+    "simple search of our archives",
+}
+
+# UCSB 목록 페이지의 날짜 헤더 예: "April 5, 2026"
+DATE_PATTERN = re.compile(r"^[A-Z][a-z]+ \d{1,2}, \d{4}$")
+
+
+def clean_text(text: str) -> str:
+    """
+    연속 공백과 줄바꿈을 하나의 공백으로 정리한 뒤 양끝 공백을 제거한다.
+
+    UCSB 페이지는 줄바꿈과 공백이 섞여 있는 경우가 많아서,
+    대부분의 텍스트 비교와 저장 전에 이 정규화를 거친다.
+    """
+    return re.sub(r"\s+", " ", text).strip()
+
+
+def parse_published_date(raw_value: str) -> str:
+    """
+    UCSB 목록 페이지의 날짜 문자열을 YYYY-MM-DD 형식으로 변환한다.
+    """
+    parsed = datetime.strptime(raw_value, "%B %d, %Y")
+    return parsed.strftime("%Y-%m-%d")
+
+
+def normalize_keyword_dictionary(
+    keyword_dictionary: Mapping[str, Sequence[str]],
+) -> dict[str, list[str]]:
+    """
+    키워드 JSON을 내부에서 쓰기 쉬운 형태로 정리한다.
+
+    - 그룹명과 키워드 문자열의 공백을 정리한다.
+    - 빈 문자열은 제거한다.
+    - 중복 키워드는 제거하고 소문자 기준으로 정렬한다.
+    - 최종적으로 유효한 그룹이 하나도 없으면 예외를 발생시킨다.
+    """
+    normalized: dict[str, list[str]] = {}
+
+    for group_name, raw_keywords in keyword_dictionary.items():
+        group = clean_text(str(group_name))
+        keywords = [
+            clean_text(str(keyword))
+            for keyword in raw_keywords
+            if clean_text(str(keyword))
+        ]
+        if group and keywords:
+            normalized[group] = sorted(set(keywords), key=str.lower)
+
+    if not normalized:
+        raise ValueError("Keyword dictionary must include at least one non-empty group.")
+
+    return normalized
+
+
+def load_keyword_dictionary(keyword_config_path: str | Path | None) -> dict[str, list[str]]:
+    """
+    키워드 설정 JSON 파일을 읽고 정규화된 사전 형태로 반환한다.
+
+    경로가 주어지지 않으면 수집기 옆의 기본 키워드 파일을 사용한다.
+    """
+    config_path = (
+        Path(keyword_config_path)
+        if keyword_config_path is not None
+        else DEFAULT_KEYWORD_CONFIG_PATH
+    )
+
+    with config_path.open("r", encoding="utf-8") as file:
+        payload = json.load(file)
+
+    if not isinstance(payload, dict):
+        raise ValueError("Keyword config must be a JSON object of group -> keyword list.")
+
+    return normalize_keyword_dictionary(payload)
+
+
+def fetch_page(url: str) -> BeautifulSoup:
+    """
+    UCSB 페이지를 요청하고 BeautifulSoup 객체로 반환한다.
+    """
+    response = requests.get(url, headers=HEADERS, timeout=30)
+    response.raise_for_status()
+    return BeautifulSoup(response.text, "html.parser")
+
+
+def build_listing_url(base_url: str, page_number: int) -> str:
+    """
+    UCSB 카테고리 목록 URL에 페이지 번호와 페이지 크기 파라미터를 붙인다.
+
+    이 사이트는 첫 페이지를 `page=1` 없이도 제공하므로,
+    1페이지는 `items_per_page`만 붙이고 이후 페이지부터 `page=N`을 사용한다.
+    """
+    if page_number <= 1:
+        return f"{base_url}?items_per_page={ITEMS_PER_PAGE}"
+    return f"{base_url}?page={page_number}&items_per_page={ITEMS_PER_PAGE}"
+
+
+def parse_listing_page(soup: BeautifulSoup, doc_type: str) -> list[dict[str, str]]:
+    """
+    UCSB 목록 페이지에서 문서 메타데이터를 추출한다.
+
+    이 함수는 다음 정보를 만든다.
+    - title
+    - url
+    - published_date
+    - doc_type
+    - category
+
+    목록 페이지는 날짜 헤더(h4)와 문서 링크(a)가 섞여 있으므로,
+    가장 최근에 본 날짜 헤더를 현재 링크의 게시일로 사용한다.
+    """
+    items: list[dict[str, str]] = []
+    current_date = ""
+
+    # 실제 목록 컨테이너는 view-content 안에 들어 있다.
+    listing_container = soup.find("div", class_="view-content")
+    if listing_container is None:
+        return items
+
+    for tag in listing_container.find_all(["h4", "a"]):
+        if tag.name == "h4":
+            heading_text = clean_text(tag.get_text(" ", strip=True))
+            if DATE_PATTERN.match(heading_text):
+                current_date = heading_text
+            continue
+
+        href = tag.get("href")
+        title = clean_text(tag.get_text(" ", strip=True))
+
+        if not href or not title:
+            continue
+
+        # 실제 문서 링크만 수집한다.
+        if not href.startswith("/documents/"):
+            continue
+
+        # 페이지네이션 UI 텍스트는 문자 인코딩에 따라 깨질 수 있으므로,
+        # 완전일치 대신 대표 접두어 기준으로 제외한다.
+        lowered_title = title.lower()
+        if lowered_title.startswith("next") or lowered_title.startswith("last"):
+            continue
+
+        # 날짜 헤더를 만나기 전에 등장한 링크는 게시일을 알 수 없으므로 건너뛴다.
+        if not current_date:
+            continue
+
+        items.append(
+            {
+                "title": title,
+                "url": urljoin(BASE_URL, href),
+                "published_date": parse_published_date(current_date),
+                "doc_type": doc_type,
+                "category": "UCSB Presidency Project",
+            }
+        )
+
+    # 목록 페이지 안에 같은 문서가 여러 번 보일 수 있어 URL 기준으로 중복 제거한다.
+    deduped: list[dict[str, str]] = []
+    seen_urls: set[str] = set()
+    for item in items:
+        if item["url"] in seen_urls:
+            continue
+        seen_urls.add(item["url"])
+        deduped.append(item)
+
+    return deduped
+
+
+def crawl_listing(
+    base_url: str,
+    doc_type: str,
+    max_pages: int,
+    sleep_sec: float,
+) -> list[dict[str, str]]:
+    """
+    한 문서 유형에 대해 여러 목록 페이지를 순회하며 문서 메타데이터를 수집한다.
+
+    - 페이지마다 parse_listing_page()를 호출한다.
+    - 중복 URL은 제거한다.
+    - 어떤 페이지에서 결과가 더 이상 나오지 않으면 거기서 중단한다.
+    """
+    items: list[dict[str, str]] = []
+    seen_urls: set[str] = set()
+
+    for page_number in range(1, max_pages + 1):
+        page_url = build_listing_url(base_url, page_number)
+        print(f"[INDEX] {doc_type}: {page_url}")
+
+        try:
+            soup = fetch_page(page_url)
+        except Exception as exc:
+            print(f"  -> failed to fetch listing page: {exc}")
+            continue
+
+        page_items = parse_listing_page(soup, doc_type)
+        if not page_items:
+            print("  -> no listing items found, stopping")
+            break
+
+        added_count = 0
+        for item in page_items:
+            if item["url"] in seen_urls:
+                continue
+            seen_urls.add(item["url"])
+            items.append(item)
+            added_count += 1
+
+        print(f"  -> found {added_count} new items")
+        time.sleep(sleep_sec)
+
+    return items
+
+
+def extract_article_body(soup: BeautifulSoup) -> str:
+    """
+    문서 상세 페이지에서 실제 본문 텍스트만 추출한다.
+
+    UCSB 문서 본문은 `div.field-docs-content` 안에 들어 있는 경우가 많다.
+    여기서 문단/목록/인용문/소제목만 모으고,
+    메타데이터 섹션으로 넘어가는 제목이 나오면 수집을 멈춘다.
+    """
+    content_container = soup.find("div", class_="field-docs-content")
+    if content_container is None:
+        return ""
+
+    body_parts: list[str] = []
+
+    for node in content_container.find_all(["p", "li", "blockquote", "h2", "h3", "h4"]):
+        heading_text = clean_text(node.get_text(" ", strip=True)).lower()
+
+        # "Filed Under", "Categories" 같은 메타데이터 섹션이 시작되면 본문 수집 종료
+        if node.name in {"h2", "h3", "h4"} and heading_text.startswith(tuple(STOP_SECTION_TITLES)):
+            break
+
+        text = clean_text(node.get_text("\n", strip=True))
+
+        if not text:
+            continue
+
+        # 날짜만 있는 줄은 본문으로 쓰지 않는다.
+        if DATE_PATTERN.match(text):
+            continue
+
+        if node.name == "li":
+            text = f"- {text}"
+
+        body_parts.append(text)
+
+    # 동일한 텍스트 조각이 반복될 수 있어 한 번 더 중복 제거한다.
+    deduped_parts: list[str] = []
+    seen_parts: set[str] = set()
+    for part in body_parts:
+        if part in seen_parts:
+            continue
+        seen_parts.add(part)
+        deduped_parts.append(part)
+
+    return "\n\n".join(deduped_parts).strip()
+
+
+def match_keywords(
+    text: str,
+    keyword_dictionary: Mapping[str, Sequence[str]],
+) -> dict[str, list[str]]:
+    """
+    제목+본문 텍스트에서 어떤 키워드 그룹이 매치되는지 찾는다.
+
+    반환값은 다음 형태다.
+    {
+        "group_name": ["matched keyword 1", "matched keyword 2", ...]
+    }
+    """
+    lowered_text = text.lower()
+    matches: dict[str, list[str]] = {}
+
+    for group_name, keywords in keyword_dictionary.items():
+        group_matches = [keyword for keyword in keywords if keyword.lower() in lowered_text]
+        if group_matches:
+            matches[group_name] = sorted(set(group_matches), key=str.lower)
+
+    return matches
+
+
+def parse_article(
+    metadata: Mapping[str, str],
+    keyword_dictionary: Mapping[str, Sequence[str]],
+) -> dict[str, Any] | None:
+    """
+    개별 문서 상세 페이지를 읽어 제목/본문/키워드 매칭 결과를 만든다.
+
+    키워드가 하나도 매치되지 않으면 None을 반환해서 최종 결과에서 제외한다.
+    """
+    url = metadata["url"]
+    print(f"[ARTICLE] {url}")
+
+    try:
+        soup = fetch_page(url)
+    except Exception as exc:
+        print(f"  -> failed to fetch article: {exc}")
+        return None
+
+    title_tag = soup.find("h1")
+    title = clean_text(title_tag.get_text(" ", strip=True)) if title_tag else metadata["title"]
+    body = extract_article_body(soup)
+    combined_text = f"{title}\n{body}"
+    matches = match_keywords(combined_text, keyword_dictionary)
+
+    if not matches:
+        print("  -> skipped: no keyword match")
+        return None
+
+    matched_keywords = sorted(
+        {keyword for keywords in matches.values() for keyword in keywords},
+        key=str.lower,
+    )
+    matched_groups = sorted(matches.keys(), key=str.lower)
+
+    print(f"  -> kept: {', '.join(matched_groups)}")
+    return {
+        **metadata,
+        "title": title,
+        "body": body,
+        "matched_keyword_groups": ", ".join(matched_groups),
+        "matched_keywords": ", ".join(matched_keywords),
+        "keyword_matches_json": json.dumps(matches, ensure_ascii=False),
+    }
+
+
+def crawl_ucsb_documents(
+    keyword_dictionary: Mapping[str, Sequence[str]],
+    doc_types: Sequence[str] | None = None,
+    max_pages: int = 5,
+    sleep_sec: float = 0.5,
+    output_csv: str = DEFAULT_OUTPUT_CSV,
+) -> pd.DataFrame:
+    """
+    UCSB 크롤러의 메인 실행 함수.
+
+    흐름은 다음과 같다.
+    1. 문서 유형별 목록 페이지를 순회해 링크를 모은다.
+    2. 각 문서 상세 페이지에서 제목/본문을 추출한다.
+    3. 키워드가 매치된 문서만 결과에 남긴다.
+    4. 최종 결과를 CSV로 저장한다.
+    """
+    selected_doc_types = list(doc_types or DOC_TYPE_URLS.keys())
+    invalid_doc_types = [doc_type for doc_type in selected_doc_types if doc_type not in DOC_TYPE_URLS]
+    if invalid_doc_types:
+        raise ValueError(f"Unsupported doc types: {invalid_doc_types}")
+
+    results: list[dict[str, Any]] = []
+
+    for doc_type in selected_doc_types:
+        listing_items = crawl_listing(
+            base_url=DOC_TYPE_URLS[doc_type],
+            doc_type=doc_type,
+            max_pages=max_pages,
+            sleep_sec=sleep_sec,
+        )
+        print(f"[SUMMARY] {doc_type}: {len(listing_items)} listing items")
+
+        for item in listing_items:
+            article = parse_article(item, keyword_dictionary=keyword_dictionary)
+            if article is not None:
+                results.append(article)
+            time.sleep(sleep_sec)
+
+    df = pd.DataFrame(results)
+
+    if not df.empty:
+        # 후속 처리에서 쓰기 쉬운 순서로 컬럼을 정리하고 날짜 순으로 정렬한다.
+        df = df[
+            [
+                "published_date",
+                "category",
+                "doc_type",
+                "title",
+                "url",
+                "matched_keyword_groups",
+                "matched_keywords",
+                "keyword_matches_json",
+                "body",
+            ]
+        ].sort_values(by=["published_date", "doc_type"], ascending=[True, True])
+
+    df.to_csv(output_csv, index=False, encoding="utf-8-sig")
+    print(f"[DONE] saved {len(df)} rows to {output_csv}")
+    return df
+
+
+def parse_args() -> argparse.Namespace:
+    """
+    CLI 실행 시 사용할 인자를 정의한다.
+    """
+    parser = argparse.ArgumentParser(description="UCSB Presidency Project crawler")
+    parser.add_argument(
+        "--keyword-config",
+        default=str(DEFAULT_KEYWORD_CONFIG_PATH),
+        help="Path to a JSON file with {group_name: [keywords...]}",
+    )
+    parser.add_argument(
+        "--doc-types",
+        nargs="+",
+        default=list(DOC_TYPE_URLS.keys()),
+        choices=list(DOC_TYPE_URLS.keys()),
+        help="Subset of document types to crawl",
+    )
+    parser.add_argument("--max-pages", type=int, default=5)
+    parser.add_argument("--sleep-sec", type=float, default=0.5)
+    parser.add_argument("--output-csv", default=DEFAULT_OUTPUT_CSV)
+    return parser.parse_args()
+
+
+def main() -> None:
+    """
+    CLI 인자를 읽어 UCSB 크롤러를 실행한다.
+    """
+    args = parse_args()
+    keyword_dictionary = load_keyword_dictionary(args.keyword_config)
+    crawl_ucsb_documents(
+        keyword_dictionary=keyword_dictionary,
+        doc_types=args.doc_types,
+        max_pages=args.max_pages,
+        sleep_sec=args.sleep_sec,
+        output_csv=args.output_csv,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/crawler/collectors/ucsb_keywords.json b/crawler/collectors/ucsb_keywords.json
new file mode 100644
index 0000000..63c5d6c
--- /dev/null
+++ b/crawler/collectors/ucsb_keywords.json
@@ -0,0 +1,48 @@
+{
+  "group_a_direct_qqq_index": [
+    "QQQ",
+    "Invesco QQQ",
+    "Nasdaq-100",
+    "Nasdaq 100",
+    "NDX"
+  ],
+  "group_b_big_tech_holdings": [
+    "Apple",
+    "Microsoft",
+    "Nvidia",
+    "Amazon",
+    "Meta",
+    "Alphabet",
+    "Tesla",
+    "Broadcom"
+  ],
+  "group_c_core_industries": [
+    "semiconductor",
+    "chip",
+    "AI",
+    "cloud",
+    "software",
+    "datacenter",
+    "advanced computing"
+  ],
+  "group_d_macro_variables": [
+    "inflation",
+    "interest rates",
+    "labor market",
+    "liquidity",
+    "financial conditions",
+    "yields",
+    "recession",
+    "growth outlook"
+  ],
+  "group_e_policy_regulation": [
+    "export controls",
+    "tariffs",
+    "sanctions",
+    "antitrust",
+    "tax credits",
+    "industrial policy",
+    "regulation",
+    "restrictions"
+  ]
+}
diff --git a/crawler/collectors/whitehouse.py b/crawler/collectors/whitehouse.py
deleted file mode 100644
index 0ab3fee..0000000
--- a/crawler/collectors/whitehouse.py
+++ /dev/null
@@ -1,303 +0,0 @@
-import time
-import re
-import sys
-from typing import List, Dict, Optional
-from urllib.parse import urljoin
-from datetime import datetime
-from pathlib import Path
-
-import requests
-import pandas as pd
-from bs4 import BeautifulSoup
-
-PROJECT_ROOT = Path(__file__).resolve().parents[2]
-PROJECT_ROOT_STR = str(PROJECT_ROOT)
-
-if PROJECT_ROOT_STR not in sys.path:
-    sys.path.insert(0, PROJECT_ROOT_STR)
-
-from crawler.support_legacy.data_paths import collected_csv_path
-
-BASE_URL = "https://www.whitehouse.gov"
-NEWS_URL = f"{BASE_URL}/news/"
-
-HEADERS = {
-    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
-                  "(KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"
-}
-
-# QQQ에 영향 줄 가능성이 큰 정책 키워드
-QQQ_KEYWORDS = [
-    "artificial intelligence", "ai", "semiconductor", "chip", "chips",
-    "nvidia", "amd", "intel", "data center", "cloud", "cybersecurity",
-    "antitrust", "competition", "big tech", "technology", "export control",
-    "tariff", "trade", "china", "advanced computing", "software",
-    "digital", "broadband", "quantum", "5g", "6g"
-]
-
-# 카테고리 추출용 후보
-DOC_TYPE_CANDIDATES = {
-    "Articles",
-    "Briefings & Statements",
-    "Fact Sheets",
-    "Executive Orders",
-    "Presidential Memoranda",
-    "Proclamations",
-    "Remarks",
-    "Research",
-    "Presidential Actions",
-}
-
-
-def clean_text(text: str) -> str:
-    """공백 정리"""
-    return re.sub(r"\s+", " ", text).strip()
-
-
-def contains_qqq_keyword(text: str, keywords: List[str]) -> bool:
-    """본문/제목에 QQQ 관련 키워드가 있는지 확인"""
-    text_lower = text.lower()
-    return any(keyword.lower() in text_lower for keyword in keywords)
-
-
-def find_news_cards(soup: BeautifulSoup) -> List[BeautifulSoup]:
-    """
-    /news/ 페이지에서 개별 글 카드(목록 항목)를 찾는다.
-    현재 화이트하우스 뉴스 페이지는 <h2> 안의 링크 형태가 많으므로,
-    제목 링크를 기준으로 역추적한다.
-    """
-    cards = []
-
-    # 일반적으로 목록 제목은 h2 / h3 / h4 내부 a 태그
-    for tag in soup.find_all(["h2"]):
-        a = tag.find("a", href=True)
-        if not a:
-            continue
-
-        href = a["href"]
-
-        if "whitehouse.gov" not in href:
-            continue
-
-        # 뉴스/발표성 문서만 남김
-        if not any(part in href for part in ["/articles/", "/briefings-statements/", "/presidential-actions/"]):
-            continue
-
-        cards.append(tag)
-
-    return cards
-
-
-def parse_listing_item(tag: BeautifulSoup) -> Optional[Dict]:
-    """
-    목록 페이지의 카드에서
-    title / url / doc_type / published_date 를 최대한 추출
-    """
-    a = tag.find("a", href=True)
-    if not a:
-        return None
-
-    title = clean_text(a.get_text(" ", strip=True))
-    url = a["href"]
-
-    parent = tag.parent
-
-    date_tag = parent.find("time")
-    date = clean_text(date_tag.get_text(" ", strip=True)) if date_tag else ""
-
-    type_tag = tag.parent.find("a", rel="tag")
-    type = clean_text(type_tag.get_text(" ", strip=True)) if type_tag else ""
-
-    # 날짜 추정
-    date_match = re.search(
-        r"(January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},\s+\d{4}",
-        date
-    )
-    published_date = date_match.group(0) if date_match else None
-    print
-    try:
-        dt_obj = datetime.strptime(published_date, "%B %d, %Y")
-        published_date = dt_obj.strftime("%Y-%m-%d")
-    except ValueError as e:
-        print(f"날짜 변환 실패: {published_date} -> {e}")
-        published_date = None
-
-    # 카테고리 추정
-    doc_type = None
-    for candidate in DOC_TYPE_CANDIDATES:
-        if candidate.lower() in type.lower():
-            doc_type = candidate
-            break
-
-    return {
-        "title": title,
-        "url": url,
-        "category": "White House",
-        "doc_type": doc_type,
-        "published_date": published_date,
-    }
-
-
-def fetch_page(url: str) -> BeautifulSoup:
-    response = requests.get(url, headers=HEADERS, timeout=30)
-    response.raise_for_status()
-    return BeautifulSoup(response.text, "html.parser")
-
-
-def crawl_news_index(max_pages: int = 5, sleep_sec: float = 1.0) -> List[Dict]:
-    """
-    /news/ 와 /news/page/{n}/ 를 순회하면서
-    글 목록 URL을 수집
-    """
-    items = []
-    seen_urls = set()
-
-    for page_num in range(1, max_pages + 1):
-        if page_num == 1:
-            url = NEWS_URL
-        else:
-            url = f"{NEWS_URL}page/{page_num}/"
-
-        print(f"[INDEX] {url}")
-
-        try:
-            soup = fetch_page(url)
-        except Exception as e:
-            print(f"  -> 페이지 수집 실패: {e}")
-            continue
-
-        cards = find_news_cards(soup)
-        print(f"  -> 발견 카드 수: {len(cards)}")
-
-        for card in cards:
-            item = parse_listing_item(card)
-            if not item:
-                continue
-            if item["url"] in seen_urls:
-                continue
-
-            seen_urls.add(item["url"])
-            items.append(item)
-
-        time.sleep(sleep_sec)
-
-    return items
-
-
-def extract_article_body(soup: BeautifulSoup) -> str:
-    """
-    본문 추출
-    - 기사형 페이지는 h1/title/date 아래에 p 태그가 이어지는 경우가 많음
-    - footer, related, subscribe 영역 제외
-    """
-    # 너무 광범위하게 잡히는 걸 막기 위해 긴 p 태그만 우선 수집
-    paragraphs = []
-
-    for p in soup.find_all("p"):
-        text = clean_text(p.get_text(" ", strip=True))
-
-        if len(text) < 20:
-            continue
-
-        # 하단 공유/구독/저작권성 문구 제거
-        lowered = text.lower()
-        if any(bad in lowered for bad in [
-            "subscribe", "click here", "follow on social media",
-            "notifications", "privacy policy"
-        ]):
-            continue
-
-        paragraphs.append(text)
-
-    # 중복 제거
-    deduped = []
-    seen = set()
-    for p in paragraphs:
-        if p not in seen:
-            seen.add(p)
-            deduped.append(p)
-
-    return "\n".join(deduped)
-
-
-def parse_article(metadata: Optional[Dict]) -> Optional[Dict]:
-    new_data = metadata.copy()
-    
-    url = metadata["url"]
-    print(f"[ARTICLE] {url}")
-
-    try:
-        soup = fetch_page(url)
-    except Exception as e:
-        print(f"  -> 본문 수집 실패: {e}")
-        return None
-    
-    body = extract_article_body(soup)
-    new_data["body"] = body
-
-    return new_data
-
-
-def crawl_whitehouse_qqq_policy(
-    max_pages: int = 10,
-    sleep_sec: float = 1.0,
-    output_csv: str = collected_csv_path("whitehouse_qqq_policy.csv")
-) -> pd.DataFrame:
-    """
-    1) 뉴스 인덱스 수집
-    2) 개별 문서 본문 수집
-    3) QQQ 관련 키워드 필터링
-    4) CSV 저장
-    """
-    listing_items = crawl_news_index(max_pages=max_pages, sleep_sec=sleep_sec)
-
-    print(f"\n총 목록 수집 개수: {len(listing_items)}")
-
-    results = []
-
-    for item in listing_items:
-        article = parse_article(item)
-
-        if not article:
-            time.sleep(sleep_sec)
-            continue
-
-        combined_text = f"{article['title'] or ''}\n{article['body'] or ''}"
-
-        if contains_qqq_keyword(combined_text, QQQ_KEYWORDS):
-            matched_keywords = [
-                kw for kw in QQQ_KEYWORDS
-                if kw.lower() in combined_text.lower()
-            ]
-
-            article["matched_keywords"] = ", ".join(sorted(set(matched_keywords)))
-            results.append(article)
-            print(f"  -> QQQ 관련 문서 저장: {article['title']}")
-        else:
-            print("  -> 비관련 문서")
-
-        time.sleep(sleep_sec)
-
-    df = pd.DataFrame(results)
-
-    if not df.empty:
-        # 본문이 너무 길면 필요에 따라 일부 컬럼만 저장 가능
-        df = df[[
-            "published_date", "category", "doc_type", "title", "url",
-            "matched_keywords", "body"
-        ]].sort_values(by="published_date", ascending=False, na_position="last")
-
-    df.to_csv(output_csv, index=False, encoding="utf-8-sig")
-    print(f"\n저장 완료: {output_csv} / 건수: {len(df)}")
-
-    return df
-
-
-if __name__ == "__main__":
-    df = crawl_whitehouse_qqq_policy(
-        max_pages=160,       # 처음엔 3~5페이지 정도로 테스트
-        sleep_sec=1.2,
-        output_csv=collected_csv_path("whitehouse_qqq_policy.csv")
-    )
-
-    print(df.head(10))
diff --git a/crawler/postprocessing/proprocessing.py b/crawler/postprocessing/proprocessing.py
index 2fbc19d..ed39f5a 100644
--- a/crawler/postprocessing/proprocessing.py
+++ b/crawler/postprocessing/proprocessing.py
@@ -24,6 +24,10 @@
 BODY_LENGTH_COL = "body_original_length"
 
 
+def _existing_csv_paths(csv_paths: Iterable[str]) -> list[str]:
+    return [path for path in csv_paths if Path(path).exists()]
+
+
 def _pick_first_existing(df: pd.DataFrame, candidates: Iterable[str]) -> Optional[str]:
     for c in candidates:
         if c in df.columns:
@@ -210,11 +214,18 @@ def read_csv_and_add_cyclical_time_features(
 
 
 def main() -> None:
-    csv_paths = [
+    csv_candidates = [
         summarized_csv_path("fed_fomc_links_summarized.csv"),
-        summarized_csv_path("whitehouse_qqq_policy_summarized.csv"),
+        summarized_csv_path("ucsb_presidential_documents_summarized.csv"),
         summarized_csv_path("bis_press_releases_summarized.csv"),
     ]
+    csv_paths = _existing_csv_paths(csv_candidates)
+
+    if not csv_paths:
+        raise FileNotFoundError(
+            "No summarized crawler outputs were found. "
+            f"Checked: {csv_candidates}"
+        )
 
     merged = merge_csvs_to_table(csv_paths)
     print("[INFO] merged_rows=", len(merged))
diff --git a/crawler/support_legacy/crawling_test.py b/crawler/support_legacy/crawling_test.py
index 7158ba7..383b5a2 100644
--- a/crawler/support_legacy/crawling_test.py
+++ b/crawler/support_legacy/crawling_test.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import sys
 from pathlib import Path
 
@@ -9,16 +11,22 @@
 if PROJECT_ROOT_STR not in sys.path:
     sys.path.insert(0, PROJECT_ROOT_STR)
 
-from crawler.support_legacy.data_paths import feature_csv_path
+from crawler.support_legacy.data_paths import collected_csv_path
+
+
+def main() -> None:
+    csv_path = collected_csv_path("ucsb_presidential_documents.csv")
+    df = pd.read_csv(csv_path)
+
+    keyword_columns = [
+        "published_date",
+        "matched_keyword_groups",
+        "matched_keywords",
+    ]
 
-merged = pd.read_csv(feature_csv_path("merged_finbert.csv"))
-print(merged.columns)
-print(merged[merged["body_sentiment_score"]>0.5])
+    available_columns = [column for column in keyword_columns if column in df.columns]
+    print(df[available_columns].to_string(index=False))
 
-# dataset2 = pd.read_csv(collected_csv_path("whitehouse_qqq_policy.csv"))
-# text_lengths2 = dataset2["body"].fillna("").astype(str).str.len()
-# print(text_lengths2.to_string())
 
-# dataset3 = pd.read_csv(collected_csv_path("bis_press_releases.csv"))
-# text_lengths3 = dataset3["body"].fillna("").astype(str).str.len()
-# print(text_lengths3.to_string())
+if __name__ == "__main__":
+    main()
diff --git a/crawler/support_legacy/data_paths.py b/crawler/support_legacy/data_paths.py
index 1a236d3..a16eb93 100644
--- a/crawler/support_legacy/data_paths.py
+++ b/crawler/support_legacy/data_paths.py
@@ -50,13 +50,13 @@ def csv_path(filename: str) -> str:
     """
     collected_files = {
         "fed_fomc_links.csv",
-        "whitehouse_qqq_policy.csv",
         "bis_press_releases.csv",
+        "ucsb_presidential_documents.csv",
     }
     summarized_files = {
         "fed_fomc_links_summarized.csv",
-        "whitehouse_qqq_policy_summarized.csv",
         "bis_press_releases_summarized.csv",
+        "ucsb_presidential_documents_summarized.csv",
     }
     feature_files = {
         "merged_table_sorted.csv",