diff --git a/.claude/tools/openmc_mcp_server.py b/.claude/tools/openmc_mcp_server.py
new file mode 100644
index 00000000000..37917abc188
--- /dev/null
+++ b/.claude/tools/openmc_mcp_server.py
@@ -0,0 +1,250 @@
+#!/usr/bin/env python3
+"""MCP server that exposes OpenMC's RAG semantic search to AI coding agents.
+
+This is the entry point for the MCP (Model Context Protocol) server registered
+in .mcp.json at the repo root. When an MCP-capable agent (e.g. Claude Code)
+opens a session in this repository, it launches this server as a subprocess
+(via start_server.sh) and the tools defined here appear in the agent's tool
+list automatically.
+
+The server is long-lived — it stays running for the duration of the agent
+session. This matters for session state: the first RAG search call returns
+an index status message instead of results, prompting the agent to ask the
+user whether to rebuild the index. That first-call flag resets each session.
+
+Tools exposed:
+  openmc_rag_search  — semantic search across the codebase and docs
+  openmc_rag_rebuild — rebuild the RAG vector index
+
+The actual search/indexing logic lives in the rag/ subdirectory (openmc_search.py,
+indexer.py, chunker.py, embeddings.py). This file is just the MCP interface
+layer and session state management.
+"""
+
+from mcp.server.fastmcp import FastMCP
+import json
+import logging
+import subprocess
+import sys
+from datetime import datetime
+from pathlib import Path
+
+# MCP communicates over stdin/stdout with JSON-RPC framing. Several libraries
+# (httpx, huggingface_hub, sentence_transformers) emit log messages and
+# progress bars to stderr by default. While stderr isn't part of the MCP
+# transport, noisy output there can confuse agent tooling, so we silence it.
+logging.getLogger("httpx").setLevel(logging.WARNING)
+logging.getLogger("huggingface_hub").setLevel(logging.ERROR)
+logging.getLogger("sentence_transformers").setLevel(logging.WARNING)
+
+# Path constants. This file lives at .claude/tools/openmc_mcp_server.py,
+# so parents[2] is the OpenMC repo root.
+OPENMC_ROOT = Path(__file__).resolve().parents[2]
+CACHE_DIR = OPENMC_ROOT / ".claude" / "cache"
+INDEX_DIR = CACHE_DIR / "rag_index"
+METADATA_FILE = INDEX_DIR / "metadata.json"
+
+# The RAG modules (openmc_search, indexer, etc.) live in .claude/tools/rag/.
+# We add that directory to sys.path so we can import them directly.
+TOOLS_DIR = Path(__file__).resolve().parent
+sys.path.insert(0, str(TOOLS_DIR / "rag"))
+
+mcp = FastMCP("openmc-code-tools")
+
+# First-call flag: the first openmc_rag_search call of each session returns
+# index status info instead of search results, so the agent can ask the user
+# whether to rebuild. This resets when the server process restarts (i.e. each
+# new agent session).
+_rag_first_call = True
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _get_current_branch():
+    """Get the current git branch name."""
+    try:
+        result = subprocess.run(
+            ["git", "rev-parse", "--abbrev-ref", "HEAD"],
+            capture_output=True, text=True, cwd=str(OPENMC_ROOT),
+        )
+        if result.returncode != 0 or not result.stdout.strip():
+            return "unknown"
+        return result.stdout.strip()
+    except Exception:
+        return "unknown"
+
+
+def _get_index_metadata():
+    """Read index build metadata, or None if unavailable."""
+    if not METADATA_FILE.exists():
+        return None
+    try:
+        return json.loads(METADATA_FILE.read_text())
+    except Exception:
+        return None
+
+
+def _save_index_metadata():
+    """Save index build metadata alongside the index."""
+    metadata = {
+        "built_at": datetime.now().strftime("%Y-%m-%d %H:%M"),
+        "branch": _get_current_branch(),
+    }
+    METADATA_FILE.write_text(json.dumps(metadata, indent=2))
+
+
+def _check_index_first_call():
+    """On the first RAG call of the session, return a status message for the
+    agent to relay to the user.  Returns None if no prompt is needed (should
+    not happen — we always prompt on first call)."""
+    current_branch = _get_current_branch()
+
+    if not INDEX_DIR.exists():
+        return (
+            "No RAG index found. Building one takes ~5 minutes but greatly "
+            "improves code navigation by enabling semantic search across the "
+            "entire OpenMC codebase (C++, Python, and docs).\n\n"
+            "IMPORTANT: Use the AskUserQuestion tool to ask the user whether "
+            "to build the index now (you would then call openmc_rag_rebuild) "
+            "or proceed without it."
+        )
+
+    meta = _get_index_metadata()
+    if meta:
+        built_at = meta.get("built_at", "unknown time")
+        built_branch = meta.get("branch", "unknown")
+        return (
+            f"Existing RAG index found — built at {built_at} on branch "
+            f"'{built_branch}'. Current branch is '{current_branch}'.\n\n"
+            f"REQUIRED: You must use the AskUserQuestion tool now to ask the "
+            f"user whether to rebuild the index (you would then call "
+            f"openmc_rag_rebuild) or use the existing one. Do not skip this "
+            f"step — the user may have uncommitted changes. Do not decide "
+            f"on their behalf."
+        )
+
+    return (
+        f"RAG index found but has no build metadata. "
+        f"Current branch is '{current_branch}'.\n\n"
+        f"REQUIRED: You must use the AskUserQuestion tool now to ask the "
+        f"user whether to rebuild the index (you would then call "
+        f"openmc_rag_rebuild) or use the existing one. Do not skip this "
+        f"step. Do not decide on their behalf."
+    )
+
+
+# ---------------------------------------------------------------------------
+# Tools
+# ---------------------------------------------------------------------------
+
+@mcp.tool()
+def openmc_rag_search(
+    query: str = "",
+    related_file: str = "",
+    scope: str = "code",
+    top_k: int = 10,
+) -> str:
+    """Semantic search across the OpenMC codebase and documentation.
+
+    Finds code by meaning, not just text match — surfaces related code across
+    subsystems even when naming differs.  Use for discovery and exploration
+    before reaching for grep.  Covers C++, Python, and RST docs.
+
+    Args:
+        query: Search query (e.g. "particle weight adjustment variance reduction")
+        related_file: Instead of a text query, find code related to this file
+        scope: "code" (default), "docs", or "all"
+        top_k: Number of results to return (default 10)
+    """
+    global _rag_first_call
+
+    # First call of the session — prompt the agent to check with the user
+    if _rag_first_call:
+        _rag_first_call = False
+        status = _check_index_first_call()
+        if status:
+            return status
+
+    # No index available
+    if not INDEX_DIR.exists():
+        return (
+            "No RAG index available. Call openmc_rag_rebuild() to build one "
+            "(takes ~5 minutes)."
+        )
+
+    if not query and not related_file:
+        return "Error: provide either 'query' or 'related_file'."
+
+    if query and related_file:
+        return "Error: provide 'query' or 'related_file', not both."
+
+    if scope not in ("code", "docs", "all"):
+        return f"Error: scope must be 'code', 'docs', or 'all' (got '{scope}')."
+
+    if top_k < 1:
+        return f"Error: top_k must be at least 1 (got {top_k})."
+
+    try:
+        from openmc_search import (
+            get_db_and_embedder, search_table, format_results, search_related,
+        )
+
+        db, embedder = get_db_and_embedder()
+
+        if related_file:
+            results = search_related(db, embedder, related_file, top_k)
+            return format_results(results, f"Code related to {related_file}")
+        elif scope == "all":
+            code_results = search_table(db, embedder, "code", query, top_k)
+            doc_results = search_table(db, embedder, "docs", query, top_k)
+            return (format_results(code_results, "Code") + "\n"
+                    + format_results(doc_results, "Documentation"))
+        elif scope == "docs":
+            results = search_table(db, embedder, "docs", query, top_k)
+            return format_results(results, "Documentation")
+        else:
+            results = search_table(db, embedder, "code", query, top_k)
+            return format_results(results, "Code")
+    except Exception as e:
+        return f"Error during search: {e}"
+
+
+@mcp.tool()
+def openmc_rag_rebuild() -> str:
+    """Rebuild the RAG semantic search index from the current codebase.
+
+    Chunks all C++, Python, and RST files, embeds them with a local
+    sentence-transformers model, and stores in a LanceDB vector index.
+    Takes ~5 minutes on 10 CPU cores.  Call this after pulling new code
+    or switching branches.
+    """
+    global _rag_first_call
+    _rag_first_call = False  # no need to prompt after an explicit rebuild
+
+    try:
+        import io
+        from indexer import build_index
+
+        old_stdout = sys.stdout
+        sys.stdout = captured = io.StringIO()
+        try:
+            build_index()
+        finally:
+            sys.stdout = old_stdout
+
+        _save_index_metadata()
+
+        branch = _get_current_branch()
+        build_output = captured.getvalue()
+        return (
+            f"Index rebuilt successfully on branch '{branch}'.\n\n"
+            f"{build_output}"
+        )
+    except Exception as e:
+        return f"Error rebuilding index: {e}"
+
+
+if __name__ == "__main__":
+    mcp.run()
diff --git a/.claude/tools/rag/chunker.py b/.claude/tools/rag/chunker.py
new file mode 100644
index 00000000000..b28ddb0f8a2
--- /dev/null
+++ b/.claude/tools/rag/chunker.py
@@ -0,0 +1,105 @@
+"""Split source files into overlapping text chunks for vector embedding.
+
+The indexer (indexer.py) calls chunk_file() on every C++, Python, and RST file
+in the repo. Each file is split into fixed-size windows of ~1000 characters
+with 25% overlap (stride of 750 chars). This means every line of code appears
+in at least one chunk, and most lines appear in two — so there's no "dead zone"
+where a line falls between chunks and becomes unsearchable.
+
+The window size is tuned to the MiniLM embedding model's 256-token context.
+Code averages ~4 characters per token, so 1000 chars ≈ 250 tokens — just
+under the model's limit. Chunks are snapped to line boundaries to avoid
+splitting mid-line.
+
+Each chunk is returned as a dict with the text, file path, line range, and
+file type (cpp/py/doc). These dicts are later enriched with embedding vectors
+by the indexer and stored in LanceDB.
+"""
+
+from pathlib import Path
+
+# ~256 tokens for MiniLM. 1 token ≈ 4 chars for code.
+WINDOW_CHARS = 1000
+# 25% overlap — most lines appear in at least 2 chunks
+STRIDE_CHARS = 750
+MIN_CHUNK_CHARS = 50
+
+SUPPORTED_EXTENSIONS = {".cpp", ".h", ".py", ".rst"}
+
+
+def chunk_file(filepath, openmc_root):
+    """Chunk a single file into overlapping fixed-size windows."""
+    filepath = Path(filepath)
+    if filepath.suffix not in SUPPORTED_EXTENSIONS:
+        return []
+
+    rel = str(filepath.relative_to(openmc_root))
+    try:
+        content = filepath.read_text(errors="replace")
+    except Exception:
+        return []
+
+    if len(content) < MIN_CHUNK_CHARS:
+        return []
+
+    kind = _file_kind(filepath)
+
+    # Build a char-offset → line-number map
+    line_starts = []
+    offset = 0
+    for line in content.split("\n"):
+        line_starts.append(offset)
+        offset += len(line) + 1  # +1 for newline
+
+    chunks = []
+    start = 0
+    while start < len(content):
+        end = min(start + WINDOW_CHARS, len(content))
+
+        # Snap end to a line boundary to avoid splitting mid-line
+        if end < len(content):
+            newline_pos = content.rfind("\n", start, end)
+            if newline_pos > start:
+                end = newline_pos + 1
+
+        text = content[start:end].strip()
+        if len(text) >= MIN_CHUNK_CHARS:
+            start_line = _offset_to_line(line_starts, start)
+            end_line = _offset_to_line(line_starts, end - 1)
+            chunks.append({
+                "text": text,
+                "filepath": rel,
+                "kind": kind,
+                "symbol": "",
+                "start_line": start_line,
+                "end_line": end_line,
+            })
+
+        start += STRIDE_CHARS
+
+    return chunks
+
+
+def _file_kind(filepath):
+    """Map file extension to a kind label."""
+    ext = filepath.suffix
+    if ext in (".cpp", ".h"):
+        return "cpp"
+    elif ext == ".py":
+        return "py"
+    elif ext == ".rst":
+        return "doc"
+    return "other"
+
+
+def _offset_to_line(line_starts, offset):
+    """Convert a character offset to a 1-based line number."""
+    # Binary search for the line containing this offset
+    lo, hi = 0, len(line_starts) - 1
+    while lo < hi:
+        mid = (lo + hi + 1) // 2
+        if line_starts[mid] <= offset:
+            lo = mid
+        else:
+            hi = mid - 1
+    return lo + 1  # 1-based
diff --git a/.claude/tools/rag/embeddings.py b/.claude/tools/rag/embeddings.py
new file mode 100644
index 00000000000..1fe85b50d9e
--- /dev/null
+++ b/.claude/tools/rag/embeddings.py
@@ -0,0 +1,120 @@
+"""Thin wrapper around sentence-transformers for embedding text into vectors.
+
+Uses the all-MiniLM-L6-v2 model — a small (22M param, 384-dim) model that
+runs on CPU with no GPU or API key required.
+
+Network behavior and privacy
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+No user code, queries, or file contents are EVER sent to HuggingFace or any
+external service. All embedding computation happens locally. The only network
+activity is the one-time model download on first use:
+
+  First run (model not yet cached, ~80MB download):
+    - Downloads model weight files from huggingface.co. This is a standard
+      HTTP file download, similar to pip installing a package.
+    - The only metadata sent in these requests is an HTTP user-agent header
+      containing library version numbers (e.g. "hf_hub/1.6.0;
+      python/3.12.3; torch/2.10.0"). No filenames, file contents, queries,
+      or any user-identifiable information is sent.
+    - The huggingface_hub library has an optional feature where it can report
+      anonymous library usage statistics (just version numbers, not user
+      data) back to HuggingFace. We disable this by setting
+      HF_HUB_DISABLE_TELEMETRY=1.
+
+  Subsequent runs (model already cached):
+    - We set HF_HUB_OFFLINE=1 automatically (see _set_offline_if_cached()
+      below), which prevents ALL network calls. The model loads entirely
+      from the local cache at ~/.cache/huggingface/hub/. Zero bytes leave
+      the machine.
+
+How the model is downloaded
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The SentenceTransformer() constructor (called in __init__ below) handles
+the download automatically on first use. It calls into the huggingface_hub
+library, which downloads the model files from:
+
+    https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2
+
+The files are saved to ~/.cache/huggingface/hub/ and reused on subsequent
+runs. We pass token=False to ensure no authentication token is sent.
+
+This module is imported by both the MCP server (for search queries) and the
+indexer (for bulk embedding of code chunks). The bulk embed() call shows a
+progress bar; the single-query embed_query() does not.
+
+The env vars below must be set before importing transformers or
+sentence_transformers. They suppress warnings and progress bars that these
+libraries emit by default. Stray stderr output would interfere with the MCP
+server's JSON-RPC transport.
+"""
+
+import os
+from pathlib import Path
+
+MODEL_NAME = "all-MiniLM-L6-v2"
+
+# These env vars control logging behavior in the HuggingFace libraries.
+# They must be set before the libraries are imported.
+os.environ.setdefault("TRANSFORMERS_VERBOSITY", "error")   # suppress warnings
+os.environ.setdefault("HF_HUB_VERBOSITY", "error")        # suppress warnings
+os.environ.setdefault("HF_HUB_DISABLE_PROGRESS_BARS", "1")
+os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")   # suppress threading warning
+# Disable anonymous library usage statistics (version numbers only, not user
+# data — but we disable it anyway as a matter of policy).
+os.environ.setdefault("HF_HUB_DISABLE_TELEMETRY", "1")
+
+
+def _set_offline_if_cached():
+    """If the model has already been downloaded, tell huggingface_hub to
+    skip all network calls by setting HF_HUB_OFFLINE=1.
+
+    Without this, huggingface_hub makes an HTTP request to huggingface.co
+    on every load to check if the cached model is still up to date — even
+    though the model never changes. Setting HF_HUB_OFFLINE=1 prevents this.
+
+    This must run before sentence_transformers is imported, because the
+    library reads the env var at import time.
+    """
+    # HuggingFace caches downloaded models under ~/.cache/huggingface/hub/
+    # in directories named like "models--sentence-transformers--all-MiniLM-L6-v2".
+    # The HF_HOME env var can override the base cache location.
+    hf_home = os.environ.get("HF_HOME")
+    if hf_home:
+        cache_dir = Path(hf_home) / "hub"
+    else:
+        cache_dir = Path.home() / ".cache" / "huggingface" / "hub"
+
+    model_dir = cache_dir / f"models--sentence-transformers--{MODEL_NAME}"
+    if model_dir.exists():
+        os.environ.setdefault("HF_HUB_OFFLINE", "1")
+
+
+_set_offline_if_cached()
+
+# This import must come after the env vars above are set, because the
+# transformers library reads them at import time.
+import transformers
+transformers.logging.disable_progress_bar()
+
+
+class EmbeddingProvider:
+    """Sentence-transformers embedder using all-MiniLM-L6-v2."""
+
+    def __init__(self, model_name: str = MODEL_NAME):
+        from sentence_transformers import SentenceTransformer
+
+        # This constructor loads the model from the local cache. If the model
+        # has not been downloaded yet, it downloads it from huggingface.co
+        # (~80MB, one-time). token=False ensures no auth token is sent.
+        self.model = SentenceTransformer(model_name, token=False)
+        self.dim = self.model.get_sentence_embedding_dimension()
+
+    def embed(self, texts: list[str]) -> list[list[float]]:
+        """Embed a list of texts into vectors."""
+        embeddings = self.model.encode(texts, show_progress_bar=True,
+                                       batch_size=64)
+        return embeddings.tolist()
+
+    def embed_query(self, text: str) -> list[float]:
+        """Embed a single query text."""
+        return self.model.encode([text])[0].tolist()
diff --git a/.claude/tools/rag/indexer.py b/.claude/tools/rag/indexer.py
new file mode 100644
index 00000000000..34613092bb1
--- /dev/null
+++ b/.claude/tools/rag/indexer.py
@@ -0,0 +1,136 @@
+#!/usr/bin/env python3
+"""Build the RAG vector index for the OpenMC codebase.
+
+This is the index-building half of the RAG pipeline. All operations are local
+once the embedding model has been downloaded and cached (see embeddings.py for
+details on model download, caching, and network behavior). It walks the repo,
+chunks every
+C++/Python/RST file (via chunker.py), embeds all chunks into 384-dim vectors
+(via embeddings.py), and stores them in a local LanceDB database on disk. The
+result is a .claude/cache/rag_index/ directory containing two tables — "code"
+and "docs" — that openmc_search.py queries at search time.
+
+Building the full index takes ~5 minutes on a 10-core machine. The bottleneck
+is the embedding step (running all chunks through the MiniLM model on CPU).
+
+Can be run standalone:  python indexer.py
+Or called programmatically:  from indexer import build_index; build_index()
+The MCP server (openmc_mcp_server.py) uses the latter when the agent calls
+openmc_rag_rebuild.
+"""
+
+import lancedb
+import sys
+import time
+from pathlib import Path
+
+# This file lives at .claude/tools/rag/indexer.py. The sys.path insert lets
+# us import sibling modules (embeddings, chunker) when run as a standalone
+# script. When imported from the MCP server, the server has already done this.
+TOOLS_DIR = Path(__file__).resolve().parent.parent
+sys.path.insert(0, str(TOOLS_DIR / "rag"))
+
+from embeddings import EmbeddingProvider
+from chunker import chunk_file
+
+
+OPENMC_ROOT = Path(__file__).resolve().parents[3]
+CACHE_DIR = OPENMC_ROOT / ".claude" / "cache"
+INDEX_DIR = CACHE_DIR / "rag_index"
+
+CODE_PATTERNS = [
+    "src/**/*.cpp",
+    "include/openmc/**/*.h",
+    "openmc/**/*.py",
+    "tests/**/*.py",
+    "examples/**/*.py",
+]
+
+DOC_PATTERNS = [
+    "docs/**/*.rst",
+]
+
+
+def collect_chunks(patterns, openmc_root):
+    """Collect all chunks from files matching the given patterns."""
+    chunks = []
+    for pattern in patterns:
+        for filepath in sorted(openmc_root.glob(pattern)):
+            if "__pycache__" in str(filepath):
+                continue
+            file_chunks = chunk_file(filepath, openmc_root)
+            chunks.extend(file_chunks)
+    return chunks
+
+
+def build_index():
+    """Build or rebuild the complete vector index."""
+    start = time.time()
+
+    # Collect all chunks
+    print("Collecting code chunks...")
+    code_chunks = collect_chunks(CODE_PATTERNS, OPENMC_ROOT)
+    print(f"  {len(code_chunks)} code chunks")
+
+    print("Collecting doc chunks...")
+    doc_chunks = collect_chunks(DOC_PATTERNS, OPENMC_ROOT)
+    print(f"  {len(doc_chunks)} doc chunks")
+
+    all_chunks = code_chunks + doc_chunks
+    if not all_chunks:
+        print("ERROR: No chunks collected!", file=sys.stderr)
+        sys.exit(1)
+
+    # Create embeddings
+    all_texts = [c["text"] for c in all_chunks]
+    print("Creating embedding provider...")
+    embedder = EmbeddingProvider()
+    print(f"  dim={embedder.dim}")
+
+    print("Embedding chunks...")
+    all_embeddings = embedder.embed(all_texts)
+
+    # Build LanceDB tables
+    INDEX_DIR.mkdir(parents=True, exist_ok=True)
+    db = lancedb.connect(str(INDEX_DIR))
+
+    # Separate code vs doc records by index (code_chunks come first in all_chunks)
+    n_code = len(code_chunks)
+    code_records = []
+    doc_records = []
+    for i, (chunk, emb) in enumerate(zip(all_chunks, all_embeddings)):
+        record = {
+            "text": chunk["text"],
+            "filepath": chunk["filepath"],
+            "kind": chunk["kind"],
+            "symbol": chunk.get("symbol", ""),
+            "start_line": chunk.get("start_line", 0),
+            "end_line": chunk.get("end_line", 0),
+            "vector": emb,
+        }
+        if i < n_code:
+            code_records.append(record)
+        else:
+            doc_records.append(record)
+
+    # Create tables (drop existing)
+    result = db.table_names() if hasattr(db, "table_names") else db.list_tables()
+    existing = result.tables if hasattr(result, "tables") else list(result)
+    for table_name in ("code", "docs"):
+        if table_name in existing:
+            db.drop_table(table_name)
+
+    if code_records:
+        db.create_table("code", code_records)
+        print(f"  Created 'code' table: {len(code_records)} rows")
+
+    if doc_records:
+        db.create_table("docs", doc_records)
+        print(f"  Created 'docs' table: {len(doc_records)} rows")
+
+    elapsed = time.time() - start
+    print(f"Done in {elapsed:.1f}s")
+
+
+if __name__ == "__main__":
+    build_index()
diff --git a/.claude/tools/rag/openmc_search.py b/.claude/tools/rag/openmc_search.py
new file mode 100644
index 00000000000..4125ee96607
--- /dev/null
+++ b/.claude/tools/rag/openmc_search.py
@@ -0,0 +1,202 @@
+#!/usr/bin/env python3
+"""Query the RAG vector index to find semantically related code and docs.
+
+This is the query-time half of the RAG pipeline (the counterpart to indexer.py,
+which builds the index). All operations are local — no network calls are made
+once the embedding model has been downloaded (see embeddings.py for details on
+model download and caching). Given a natural-language query, it embeds the query
+with the same MiniLM model
+used at index time, then finds the closest chunks in the local LanceDB vector
+database by cosine similarity.
+
+The core functions (get_db_and_embedder, search_table, format_results,
+search_related) are imported by the MCP server for tool calls. The script
+can also be run standalone from the command line.
+
+The "related file" mode works differently from a text query: it reads the
+target file's chunks from the index, combines them into a synthetic query
+vector, and searches for the nearest chunks from *other* files. This surfaces
+files that are semantically similar to the target file.
+
+Usage:
+    openmc_search.py "query"                    # Search code (default)
+    openmc_search.py "query" --docs             # Search documentation
+    openmc_search.py "query" --all              # Search both code and docs
+    openmc_search.py --related src/particle.cpp # Find related code
+    openmc_search.py "query" --top-k 20         # Return more results
+"""
+
+import argparse
+import sys
+from pathlib import Path
+
+# Same sys.path setup as indexer.py — needed for standalone CLI use.
+TOOLS_DIR = Path(__file__).resolve().parent.parent
+sys.path.insert(0, str(TOOLS_DIR / "rag"))
+
+OPENMC_ROOT = Path(__file__).resolve().parents[3]
+CACHE_DIR = OPENMC_ROOT / ".claude" / "cache"
+INDEX_DIR = CACHE_DIR / "rag_index"
+
+
+def get_db_and_embedder():
+    """Load the LanceDB database and embedding provider."""
+    import lancedb
+    from embeddings import EmbeddingProvider
+
+    if not INDEX_DIR.exists():
+        raise FileNotFoundError(
+            "No RAG index found. Call openmc_rag_rebuild() to build one."
+        )
+
+    db = lancedb.connect(str(INDEX_DIR))
+
+    embedder = EmbeddingProvider()
+    return db, embedder
+
+
+def _table_names(db):
+    """Return table names as a list, compatible with multiple LanceDB versions."""
+    result = db.table_names() if hasattr(db, "table_names") else db.list_tables()
+    return result.tables if hasattr(result, "tables") else list(result)
+
+
+def search_table(db, embedder, table_name, query, top_k):
+    """Search a LanceDB table with a text query."""
+    if table_name not in _table_names(db):
+        print(f"Table '{table_name}' not found in index.", file=sys.stderr)
+        return []
+
+    table = db.open_table(table_name)
+    query_vec = embedder.embed_query(query)
+    results = table.search(query_vec).limit(top_k).to_list()
+    return results
+
+
+def format_results(results, label=""):
+    """Format search results for display."""
+    if not results:
+        return "No results found.\n"
+
+    output = []
+    if label:
+        output.append(f"=== {label} ===\n")
+
+    for i, r in enumerate(results, 1):
+        filepath = r["filepath"]
+        start = r["start_line"]
+        end = r["end_line"]
+        kind = r["kind"]
+        dist = r.get("_distance", 0)
+
+        header = f"[{i}] {filepath}:{start}-{end} ({kind}, dist={dist:.3f})"
+        output.append(header)
+
+        # Show text preview (first 500 chars)
+        text = r["text"][:500]
+        if len(r["text"]) > 500:
+            text += "\n  ..."
+        # Indent the text
+        for line in text.split("\n"):
+            output.append(f"  {line}")
+        output.append("")
+
+    return "\n".join(output)
+
+
+def search_related(db, embedder, filepath, top_k):
+    """Find code related to a given file."""
+    if "code" not in _table_names(db):
+        print("No 'code' table in index.", file=sys.stderr)
+        return []
+
+    table = db.open_table("code")
+
+    # Normalize filepath
+    fp = filepath
+    if Path(filepath).is_absolute():
+        try:
+            fp = str(Path(filepath).relative_to(OPENMC_ROOT))
+        except ValueError:
+            pass
+
+    # Get chunks from target file
+    try:
+        safe_fp = fp.replace("'", "''")
+        target_chunks = table.search().where(
+            f"filepath = '{safe_fp}'"
+        ).limit(50).to_list()
+    except Exception:
+        # LanceDB where clause might not work in all versions
+        # Fall back to fetching all and filtering
+        all_data = table.to_pandas()
+        target_rows = all_data[all_data["filepath"] == fp]
+        if target_rows.empty:
+            print(f"No chunks found for '{fp}'", file=sys.stderr)
+            return []
+        target_chunks = target_rows.head(50).to_dict("records")
+
+    if not target_chunks:
+        print(f"No chunks found for '{fp}'", file=sys.stderr)
+        return []
+
+    # Combine top chunks as the query
+    combined_text = " ".join(c["text"][:200] for c in target_chunks[:5])
+    query_vec = embedder.embed_query(combined_text)
+
+    # Search excluding the source file
+    results = table.search(query_vec).limit(top_k + 10).to_list()
+    # Filter out same file
+    results = [r for r in results if r["filepath"] != fp][:top_k]
+    return results
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Semantic search across OpenMC codebase and docs",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""examples:
+  %(prog)s "particle random number seed initialization"
+  %(prog)s "how to define tallies" --docs
+  %(prog)s "weight window variance reduction" --all
+  %(prog)s "where is cross section data loaded" --top-k 15
+  %(prog)s --related src/simulation.cpp
+  %(prog)s --related src/particle_restart.cpp --top-k 5""",
+    )
+    parser.add_argument("query", nargs="?", help="Search query")
+    parser.add_argument("--docs", action="store_true",
+                        help="Search documentation instead of code")
+    parser.add_argument("--all", action="store_true",
+                        help="Search both code and documentation")
+    parser.add_argument("--related", metavar="FILE",
+                        help="Find code related to a given file")
+    parser.add_argument("--top-k", type=int, default=10,
+                        help="Number of results (default: 10)")
+    args = parser.parse_args()
+
+    if not args.query and not args.related:
+        parser.print_help()
+        sys.exit(1)
+
+    db, embedder = get_db_and_embedder()
+
+    if args.related:
+        results = search_related(db, embedder, args.related, args.top_k)
+        print(format_results(results, f"Code related to {args.related}"))
+    elif args.all:
+        code_results = search_table(
+            db, embedder, "code", args.query, args.top_k)
+        doc_results = search_table(
+            db, embedder, "docs", args.query, args.top_k)
+        print(format_results(code_results, "Code"))
+        print(format_results(doc_results, "Documentation"))
+    elif args.docs:
+        results = search_table(db, embedder, "docs", args.query, args.top_k)
+        print(format_results(results, "Documentation"))
+    else:
+        results = search_table(db, embedder, "code", args.query, args.top_k)
+        print(format_results(results, "Code"))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/.claude/tools/requirements.txt b/.claude/tools/requirements.txt
new file mode 100644
index 00000000000..bd5d38d6c50
--- /dev/null
+++ b/.claude/tools/requirements.txt
@@ -0,0 +1,8 @@
+# MCP server
+mcp>=1.0.0
+
+# Vector database
+lancedb>=0.15.0
+
+# Embeddings (local, no API key)
+sentence-transformers>=2.7.0
diff --git a/.claude/tools/start_server.sh b/.claude/tools/start_server.sh
new file mode 100755
index 00000000000..c111dd73e88
--- /dev/null
+++ b/.claude/tools/start_server.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+# Bootstrap the Python venv (if needed) and start the OpenMC MCP server.
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+CACHE_DIR="$(dirname "$SCRIPT_DIR")/cache"
+VENV_DIR="$CACHE_DIR/.venv"
+SENTINEL="$VENV_DIR/.installed"
+
+if ! command -v python3 >/dev/null 2>&1; then
+    echo "Error: python3 not found on PATH." >&2
+    exit 1
+fi
+
+if ! python3 -c 'import sys; assert sys.version_info >= (3,12)' 2>/dev/null; then
+    echo "Error: Python 3.12+ is required." >&2
+    exit 1
+fi
+
+if [ ! -f "$SENTINEL" ]; then
+    rm -rf "$VENV_DIR"
+    mkdir -p "$CACHE_DIR"
+    python3 -m venv "$VENV_DIR"
+
+    if ! "$VENV_DIR/bin/pip" install -q -r "$SCRIPT_DIR/requirements.txt"; then
+        echo "Error: pip install failed. Remove $VENV_DIR and retry." >&2
+        rm -rf "$VENV_DIR"
+        exit 1
+    fi
+
+    touch "$SENTINEL"
+fi
+
+exec "$VENV_DIR/bin/python" "$SCRIPT_DIR/openmc_mcp_server.py"
diff --git a/.gitignore b/.gitignore
index 780059f3072..dd8dfb14a96 100644
--- a/.gitignore
+++ b/.gitignore
@@ -104,5 +104,8 @@ CMakeSettings.json
 # Visual Studio Code configuration files
 .vscode/
 
+# Claude Code agent tools (cached/generated artifacts)
+.claude/cache/
+
 # Python pickle files
 *.pkl
diff --git a/.mcp.json b/.mcp.json
new file mode 100644
index 00000000000..bdfaa538e1a
--- /dev/null
+++ b/.mcp.json
@@ -0,0 +1,9 @@
+{
+  "mcpServers": {
+    "openmc-code-tools": {
+      "type": "stdio",
+      "command": "bash",
+      "args": [".claude/tools/start_server.sh"]
+    }
+  }
+}
diff --git a/AGENTS.md b/AGENTS.md
index dce32d0e476..19abba7d97b 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -42,6 +42,56 @@ OpenMC uses a git flow branching model with two primary branches:
 
 When reviewing code changes in this repository, use the `reviewing-openmc-code` skill.
 
+## Codebase Navigation Tools
+
+Two MCP tools are registered in `.mcp.json` at the repo root and appear
+automatically in any MCP-capable agent session.
+
+**`openmc_rag_search`** — Semantic search across the codebase (C++, Python, RST
+docs). Finds code by meaning, not just text match. Surfaces related code across
+subsystems even when naming differs (e.g., "particle RNG seeding" finds code
+across transport, restart, and random ray modes — files you would never find
+with `grep "particle seed"`). The index uses a small 22M-param embedding model
+(384-dim). Phrase-level natural-language queries work much better than single
+keywords or symbol names.
+
+**`openmc_rag_rebuild`** — Rebuild the RAG vector index. Call after pulling new
+code or switching branches. The first RAG search of each session will report
+the index status and ask whether to rebuild — you can also call this explicitly.
+
+### Why RAG matters
+
+OpenMC is large enough that changes in one subsystem can silently break
+invariants that distant subsystems depend on — and those distant files often
+use different naming, so grep won't find them. The RAG search finds code by
+meaning, surfacing files you wouldn't have thought to open.
+
+An agent reviewed a large OpenMC PR without RAG. It found 1 of 11 serious
+bugs. Its post-mortem:
+
+> **I treated the diff as a closed system.** I verified internal consistency of
+> the changed code obsessively, but never built a global understanding of how
+> the changed code fits into the wider codebase. The diff altered assumptions
+> that code elsewhere silently relied on — but I couldn't see that because I
+> never looked beyond the diff. I couldn't see the forest for the trees.
+>
+> **Why I resisted RAG:** Overconfidence. My internal model was "I can see the
+> diff, I understand the data structures, I can trace the logic." The diff felt
+> self-contained. RAG felt like it would return noisy results about tangentially
+> related code. But in a codebase this large, changes in one subsystem can
+> quietly break invariants that distant subsystems depend on — and you need
+> global awareness to foresee that.
+>
+> **In the post-mortem**, I re-ran the RAG queries I should have run during the
+> review. They directly surfaced the files containing the bugs I missed — files
+> I never thought to open because they weren't in the diff.
+
+The takeaway: when reviewing or modifying code, ask yourself "what else in this
+codebase might depend on the behavior I'm changing?" If you aren't sure, that's
+a good time for a RAG query. It won't replace the grep-based investigation you
+should already be doing — but it can surface files you wouldn't have thought to
+open.
+
 ### Workflow for contributors
 
 1. Create a feature/bugfix branch off `develop`
diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 100644
index 00000000000..9538b5ddc4a
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1,14 @@
+## OpenMC Codebase Tools
+
+Read the FULL `AGENTS.md` in this directory before starting work. It contains
+project context, coding conventions, and documentation of the RAG search tools
+registered in `.mcp.json`.
+
+### Claude Code-specific: first-call behavior
+
+The first `openmc_rag_search` call of each session returns an index status
+message instead of search results. When this happens, you MUST use the
+`AskUserQuestion` tool to present the rebuild/use-existing choice to the user.
+Do not ask conversationally — always use the widget. Do not skip this step even
+if the index looks current — the user may have uncommitted changes that warrant
+a rebuild.
diff --git a/docs/source/devguide/agentic-tools.rst b/docs/source/devguide/agentic-tools.rst
new file mode 100644
index 00000000000..fed377cc075
--- /dev/null
+++ b/docs/source/devguide/agentic-tools.rst
@@ -0,0 +1,104 @@
+.. _devguide_agentic_tools:
+
+===========================
+Agentic Development Tools
+===========================
+
+OpenMC ships a set of tools designed for AI coding agents (such as
+`Claude Code`_) that agents can use to navigate and understand the codebase.
+
+.. _Claude Code: https://claude.ai/code
+
+Motivation
+----------
+
+Agentic tools like Claude Code are skilled at using grep to navigate and
+understand large code bases. However, grep can only find exact text matches —
+it cannot discover code that is *conceptually* related but uses different
+naming. Without a "global view" of the codebase that a human developer will
+build up over time, the agent is generally blind to any file it hasn't
+tokenized fully. While it can grep to see who else calls a function, it
+remains blind if other areas might be related but not share identical naming
+conventions.
+
+This problem is mitigated somewhat by using a model with a longer context
+window. OpenMC has somewhere around ~1 million tokens of C++ and ~1 million
+tokens of python. While Claude Code in early 2026 only has a context window
+of 200k tokens, beta versions have extended context windows of 1M tokens,
+and it's not unreasonable to assume that models may be available in the near
+future that greatly exceed these limits.
+
+However, even assuming the entire repository can be fit within a context
+window, there are several downsides to doing this.
+`Model performance degrades significantly as context size increases`_.
+Benchmark results are
+greatly improved if the model has less garbage to pick through. Additionally, API usage
+is typically billed as tokens in/out per turn. As the context file
+grows these costs become much larger. As such, there is still significant
+motivation to solving the above problem, so as to ensure only relevant
+information is drawn into context so as to maximize model performance and
+minimize costs.
+
+Setup
+-----
+
+The tools are registered as an `MCP (Model Context Protocol)`_ server in
+``.mcp.json`` at the repository root. AI agents that support MCP (such as
+Claude Code) discover them automatically on session start. The underlying
+Python scripts can also be run directly from the command line.
+
+All tools run entirely locally — no API keys or external service accounts are
+required. Python dependencies are installed automatically into an isolated
+virtual environment at ``.claude/cache/.venv/`` on first use.
+
+.. _Model performance degrades significantly as context size increases: https://www.anthropic.com/news/claude-opus-4-6
+.. _MCP (Model Context Protocol): https://modelcontextprotocol.io
+
+RAG Semantic Search
+-------------------
+
+The RAG (Retrieval-Augmented Generation) semantic search addresses this
+problem — it finds code by meaning, not just text match, surfacing related code
+across subsystems that ``grep`` would miss entirely. Two MCP tools are provided:
+
+- **openmc_rag_search** — Given a natural-language query, returns the most
+  relevant code chunks with file paths, line numbers, and a preview. Can search
+  code, documentation, or both. Can also find code related to a given file.
+- **openmc_rag_rebuild** — Rebuilds the search index. Should be called after
+  pulling new code or switching branches.
+
+How it works
+^^^^^^^^^^^^
+
+The search pipeline runs entirely on your local CPU:
+
+1. **Chunking.** All C++, Python, and RST files are split into overlapping
+   fixed-size windows (~1000 characters, 25% overlap). This ensures every line
+   of code appears in at least one chunk and most lines appear in two.
+
+2. **Embedding.** Each chunk is embedded into a 384-dimensional vector using
+   the `all-MiniLM-L6-v2`_ sentence-transformer model (22 million parameters).
+   This model runs on CPU with no GPU required. No API key is needed — the
+   model weights are downloaded once from Hugging Face and cached locally.
+
+3. **Indexing.** The vectors are stored in a local LanceDB_ database on disk.
+   Building the full index takes approximately 5 minutes on a machine with
+   10 CPU cores. The index is stored in ``.claude/cache/rag_index/`` and
+   persists across sessions.
+
+4. **Searching.** Your query is embedded using the same model, and the closest
+   chunks are retrieved by vector similarity. Results include the file path,
+   line range, file type, similarity distance, and a text preview.
+
+.. _all-MiniLM-L6-v2: https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2
+.. _LanceDB: https://lancedb.com
+
+Requirements
+^^^^^^^^^^^^
+
+No system dependencies beyond **Python 3.12+** with ``pip``. An internet
+connection is required on first use to download the Python packages and
+embedding model weights; subsequent runs are fully offline. The Python packages
+(``sentence-transformers``, ``lancedb``) and their dependencies (including
+PyTorch, ~2GB) are installed automatically into an isolated virtual environment
+on first use.
diff --git a/docs/source/devguide/index.rst b/docs/source/devguide/index.rst
index 2e131e09490..53b9f585385 100644
--- a/docs/source/devguide/index.rst
+++ b/docs/source/devguide/index.rst
@@ -14,6 +14,7 @@ other related topics.
 
     contributing
     workflow
+    agentic-tools
     styleguide
     policies
     tests