From fed44e71fe16aba06055929ae47ed69e65535c9f Mon Sep 17 00:00:00 2001
From: PizzaSober <junxingmeng@gmail.com>
Date: Tue, 27 Jan 2026 19:16:04 +0800
Subject: [PATCH] sync: paper-tool]

---
 service/app/mcp/literature.py                 | 463 ------------------
 service/app/tools/builtin/__init__.py         |   4 +
 service/app/tools/builtin/literature.py       | 390 +++++++++++++++
 service/app/tools/capabilities.py             |   1 +
 service/app/tools/prepare.py                  |   6 +
 service/app/tools/registry.py                 |  14 +
 .../components/ChatToolbar/ToolSelector.tsx   |  33 +-
 web/src/core/agent/toolConfig.ts              |  19 +
 web/src/i18n/locales/en/app.json              |   2 +
 web/src/i18n/locales/zh/app.json              |   2 +
 10 files changed, 461 insertions(+), 473 deletions(-)
 delete mode 100644 service/app/mcp/literature.py
 create mode 100644 service/app/tools/builtin/literature.py

diff --git a/service/app/mcp/literature.py b/service/app/mcp/literature.py
deleted file mode 100644
index 5c5e55ca..00000000
--- a/service/app/mcp/literature.py
+++ /dev/null
@@ -1,463 +0,0 @@
-"""
-Literature MCP Server - Multi-source academic literature search
-
-Provides tools for searching academic literature from multiple data sources
-(OpenAlex, Semantic Scholar, PubMed, etc.) with unified interface.
-"""
-
-import json
-import logging
-from datetime import datetime
-from typing import Any
-
-import httpx
-from fastmcp import FastMCP
-
-from app.utils.literature import SearchRequest, WorkDistributor
-
-logger = logging.getLogger(__name__)
-
-TRUE_VALUES = frozenset({"true", "1", "yes"})
-FALSE_VALUES = frozenset({"false", "0", "no"})
-
-# Create FastMCP instance
-mcp = FastMCP("literature")
-
-# Metadata for MCP server
-__mcp_metadata__ = {
-    "name": "Literature Search",
-    "description": "Search academic literature from multiple sources with advanced filtering",
-    "version": "1.0.0",
-}
-
-
-@mcp.tool()
-async def search_literature(
-    query: str,
-    mailto: str | None = None,
-    author: str | None = None,
-    institution: str | None = None,
-    source: str | None = None,
-    year_from: str | None = None,
-    year_to: str | None = None,
-    is_oa: str | None = None,
-    work_type: str | None = None,
-    language: str | None = None,
-    is_retracted: str | None = None,
-    has_abstract: str | None = None,
-    has_fulltext: str | None = None,
-    sort_by: str = "relevance",
-    max_results: str | int = 50,
-    data_sources: list[str] | None = None,
-    include_abstract: str | bool = False,
-) -> str:
-    """
-    Search academic literature from multiple data sources (OpenAlex, Semantic Scholar, PubMed, etc.)
-
-    🔑 STRONGLY RECOMMENDED: Always provide a valid email address (mailto parameter)
-    ═════════════════════════════════════════════════════════════════════════════════
-
-    📊 Performance Difference:
-    - WITH email (mailto):      10 requests/second (fast, ideal for large searches)
-    - WITHOUT email (mailto):   1 request/second  (slow, sequential processing)
-
-    ⚠️ Impact: Omitting email can cause 10x slowdown or timeouts for large result sets.
-    Production research should ALWAYS include email. Example: "researcher@university.edu"
-
-    Response Format Overview
-    ════════════════════════
-    The tool returns TWO sections automatically:
-
-    1️⃣ EXECUTIVE SUMMARY
-       - Key statistics (total found, unique count, sources)
-       - Average citations and open access rate
-       - Publication year range
-       - Warning/issue resolution status
-
-    2️⃣ DETAILED RESULTS (Complete JSON with URLs)
-       - Each paper includes:
-         • ✅ Valid URLs (access_url; doi is a raw identifier)
-         • Title, Authors (first 5), Publication Year
-         • Citation Count, Journal, Open Access Status
-         • Abstract (only if include_abstract=True)
-       - Format: JSON array for easy parsing/import
-       - All URLs are validated and functional
-
-    Args:
-        query: Search keywords (e.g., "machine learning", "CRISPR", "cancer immunotherapy")
-               [REQUIRED] Most important parameter for accurate results
-
-        mailto: Email address to enable fast API pool at OpenAlex
-               [⭐ STRONGLY RECOMMENDED - includes your email]
-               Examples: "researcher@mit.edu", "student@university.edu", "name@company.com"
-               Impact: 10x faster searches. Production users MUST provide this.
-               Note: Email is private, only used for API identification.
-
-        author: OPTIONAL - Filter by author name (e.g., "Albert Einstein", "Jennifer Doudna")
-                Will auto-correct common misspellings if not found exactly
-
-        institution: OPTIONAL - Filter by affiliation (e.g., "MIT", "Harvard", "Stanford University")
-                    Partial name matching supported
-
-        source: OPTIONAL - Filter by journal/venue (e.g., "Nature", "Science", "JAMA")
-                Matches both journal names and abbreviated titles
-
-        year_from: OPTIONAL - Start year (e.g., "2020" or 2020)
-                  Accepts string or integer, will auto-clamp to valid range (1700-2026)
-
-        year_to: OPTIONAL - End year (e.g., "2024" or 2024)
-                Accepts string or integer, will auto-clamp to valid range (1700-2026)
-                If year_from > year_to, they will be automatically swapped
-
-        is_oa: OPTIONAL - Open access filter ("true"/"false"/"yes"/"no")
-               "true" returns ONLY open access papers with direct links
-
-        work_type: OPTIONAL - Filter by publication type
-                  Options: "article", "review", "preprint", "book", "dissertation", "dataset", etc.
-
-        language: OPTIONAL - Filter by publication language (e.g., "en", "zh", "ja", "fr", "de")
-                 "en" = English only, "zh" = Chinese only, etc.
-
-        is_retracted: OPTIONAL - Retracted paper filter ("true"/"false")
-                     "false" excludes retracted works (recommended for research)
-                     "true" shows ONLY retracted papers (for auditing)
-
-        has_abstract: OPTIONAL - Require abstract ("true"/"false")
-                     "true" returns only papers with abstracts
-
-        has_fulltext: OPTIONAL - Require full text access ("true"/"false")
-                     "true" returns only papers with available full text
-
-        sort_by: Sort results - "relevance" (default), "cited_by_count", "publication_date"
-                 "cited_by_count" useful for influential papers
-                 "publication_date" shows most recent first
-
-        max_results: Result limit (default: 50, range: 1-1000, accepts string or int)
-                    More results = slower query. Recommended: 50-200 for research
-
-        data_sources: Advanced - Sources to query (default: ["openalex"])
-                     Can include: ["openalex", "semantic_scholar", "pubmed"]
-
-        include_abstract: Include full abstracts in JSON output? (default: False)
-                 True = include full abstracts for detailed review
-                 False = save token budget by excluding abstracts
-
-    Returns:
-        Markdown report with two sections:
-
-        📋 Section 1: EXECUTIVE SUMMARY
-           └─ Search conditions recap
-           └─ Total results found & unique count
-           └─ Statistics: avg citations, OA rate, year range
-           └─ ⚠️ Any warnings/filter issues & resolutions
-
-        📊 Section 2: COMPLETE RESULTS (JSON Array)
-           └─ Each paper object contains:
-             • "doi": Raw DOI string (not a URL)
-             • "title": Paper title
-             • "authors": Author names [first 5 only to save tokens]
-             • "publication_year": Publication date
-             • "cited_by_count": Citation impact metric
-             • "journal": Journal/venue name
-             • "description": Short description about the paper
-           └─ access_url is validated and immediately accessible
-           └─ Copy JSON directly into spreadsheet, database, or reference manager
-
-    Usage Tips (READ THIS!)
-    ══════════════════════
-    ✅ DO:
-       - Always provide mailto (10x faster searches)
-       - Start simple: query + mailto first
-       - Review results before refining search
-       - Use filters incrementally to narrow down
-       - Set include_abstract=True only for final review (saves API calls)
-
-    ❌ DON'T:
-       - Make multiple searches without reviewing first results
-       - Use vague keywords like "research" or "analysis"
-       - Search without mailto unless doing quick test
-       - Ignore the "Next Steps Guide" section
-       - Omit email for production/important research
-    """
-    try:
-        # Validate query early to avoid accidental broad searches
-        if not query or not str(query).strip():
-            return "❌ Invalid input: query cannot be empty."
-        if len(str(query).strip()) < 3:
-            return "❌ Invalid input: query is too short (minimum 3 characters)."
-
-        # Convert string parameters to proper types
-        year_from_int = int(year_from) if year_from and str(year_from).strip() else None
-        year_to_int = int(year_to) if year_to and str(year_to).strip() else None
-
-        # Clamp year ranges (warn but don't block search)
-        max_year = datetime.now().year + 1
-        year_warning = ""
-        if year_from_int is not None and year_from_int > max_year:
-            year_warning += f"year_from {year_from_int}→{max_year}. "
-            year_from_int = max_year
-        if year_to_int is not None and year_to_int < 1700:
-            year_warning += f"year_to {year_to_int}→1700. "
-            year_to_int = 1700
-
-        # Ensure year_from <= year_to when both are set
-        if year_from_int is not None and year_to_int is not None and year_from_int > year_to_int:
-            year_warning += f"year_from {year_from_int} and year_to {year_to_int} swapped to maintain a valid range. "
-            year_from_int, year_to_int = year_to_int, year_from_int
-
-        # Convert is_oa to boolean
-        bool_warning_parts: list[str] = []
-
-        def _parse_bool_field(raw: str | bool | None, field_name: str) -> bool | None:
-            if raw is None:
-                return None
-            if isinstance(raw, bool):
-                return raw
-            val = str(raw).strip().lower()
-            if val in TRUE_VALUES:
-                return True
-            if val in FALSE_VALUES:
-                return False
-            bool_warning_parts.append(f"{field_name}={raw!r} not recognized; ignoring this filter.")
-            return None
-
-        # Convert bool-like fields
-        is_oa_bool = _parse_bool_field(is_oa, "is_oa")
-        is_retracted_bool = _parse_bool_field(is_retracted, "is_retracted")
-        has_abstract_bool = _parse_bool_field(has_abstract, "has_abstract")
-        has_fulltext_bool = _parse_bool_field(has_fulltext, "has_fulltext")
-
-        # Convert max_results to int with early clamping
-        max_results_warning = ""
-        try:
-            max_results_int = int(max_results) if max_results else 50
-        except (TypeError, ValueError):
-            max_results_warning = "⚠️ max_results is not a valid integer; using default 50. "
-            max_results_int = 50
-
-        if max_results_int < 1:
-            max_results_warning += f"max_results {max_results_int}→50 (minimum is 1). "
-            max_results_int = 50
-        elif max_results_int > 1000:
-            max_results_warning += f"max_results {max_results_int}→1000 (maximum is 1000). "
-            max_results_int = 1000
-
-        # Convert include_abstract to bool
-        include_abstract_bool = str(include_abstract).lower() in {"true", "1", "yes"} if include_abstract else False
-
-        openalex_email = mailto.strip() if mailto and str(mailto).strip() else None
-
-        logger.info(
-            "Literature search requested: query=%r, mailto=%s, max_results=%d",
-            query,
-            "<redacted>" if openalex_email else None,
-            max_results_int,
-        )
-
-        # Create search request with converted types
-        request = SearchRequest(
-            query=query,
-            author=author,
-            institution=institution,
-            source=source,
-            year_from=year_from_int,
-            year_to=year_to_int,
-            is_oa=is_oa_bool,
-            work_type=work_type,
-            language=language,
-            is_retracted=is_retracted_bool,
-            has_abstract=has_abstract_bool,
-            has_fulltext=has_fulltext_bool,
-            sort_by=sort_by,
-            max_results=max_results_int,
-            data_sources=data_sources,
-        )
-
-        # Execute search
-        async with WorkDistributor(openalex_email=openalex_email) as distributor:
-            result = await distributor.search(request)
-
-        if year_warning:
-            result.setdefault("warnings", []).append(f"⚠️ Year adjusted: {year_warning.strip()}")
-        if bool_warning_parts:
-            result.setdefault("warnings", []).append("⚠️ Boolean filter issues: " + " ".join(bool_warning_parts))
-        if max_results_warning:
-            result.setdefault("warnings", []).append(max_results_warning.strip())
-
-        # Format output
-        return _format_search_result(request, result, include_abstract_bool)
-
-    except ValueError as e:
-        logger.warning(f"Literature search validation error: {e}")
-        return f"❌ Invalid input: {str(e)}"
-    except httpx.HTTPError as e:
-        logger.error(f"Literature search network error: {e}", exc_info=True)
-        return "❌ Network error while contacting literature sources. Please try again later."
-    except Exception as e:
-        logger.error(f"Literature search failed: {e}", exc_info=True)
-        return "❌ Unexpected error during search. Please retry or contact support."
-
-
-def _format_search_result(request: SearchRequest, result: dict[str, Any], include_abstract: bool = False) -> str:
-    """
-    Format search results into human-readable report + JSON data
-
-    Args:
-        request: Original search request
-        result: Search result from WorkDistributor
-        include_abstract: Whether to include abstracts in JSON (default: False to save tokens)
-
-    Returns:
-        Formatted markdown report with embedded JSON
-    """
-    works = result["works"]
-
-    # Build report sections
-    sections: list[str] = ["# Literature Search Report\n"]
-
-    # Warnings and resolution status (if any)
-    if warnings := result.get("warnings", []):
-        sections.extend(["## ⚠️ Warnings and Resolution Status\n", *warnings, ""])
-
-    # Search conditions
-    conditions: list[str] = [
-        f"- **Query**: {request.query}",
-        *([f"- **Author**: {request.author}"] if request.author else []),
-        *([f"- **Institution**: {request.institution}"] if request.institution else []),
-        *([f"- **Source**: {request.source}"] if request.source else []),
-        *(
-            [f"- **Year Range**: {request.year_from or '...'} - {request.year_to or '...'}"]
-            if request.year_from or request.year_to
-            else []
-        ),
-        *([f"- **Open Access Only**: {'Yes' if request.is_oa else 'No'}"] if request.is_oa is not None else []),
-        *([f"- **Work Type**: {request.work_type}"] if request.work_type else []),
-        *([f"- **Language**: {request.language}"] if request.language else []),
-        *(
-            [f"- **Exclude Retracted**: {'No' if request.is_retracted else 'Yes'}"]
-            if request.is_retracted is not None
-            else []
-        ),
-        *(
-            [f"- **Require Abstract**: {'Yes' if request.has_abstract else 'No'}"]
-            if request.has_abstract is not None
-            else []
-        ),
-        *(
-            [f"- **Require Full Text**: {'Yes' if request.has_fulltext else 'No'}"]
-            if request.has_fulltext is not None
-            else []
-        ),
-        f"- **Sort By**: {request.sort_by}",
-        f"- **Max Results**: {request.max_results}",
-    ]
-    sections.extend(["## Search Conditions\n", "\n".join(conditions), ""])
-
-    # Check if no results
-    if not works:
-        sections.extend(["## ❌ No Results Found\n", "**Suggestions to improve your search:**\n"])
-        suggestions: list[str] = [
-            "1. **Simplify keywords**: Try broader or different terms",
-            *(["2. **Remove author filter**: Author name may not be recognized"] if request.author else []),
-            *(["3. **Remove institution filter**: Try without institution constraint"] if request.institution else []),
-            *(["4. **Remove source filter**: Try without journal constraint"] if request.source else []),
-            *(
-                ["5. **Expand year range**: Current range may be too narrow"]
-                if request.year_from or request.year_to
-                else []
-            ),
-            *(["6. **Remove open access filter**: Include non-OA papers"] if request.is_oa else []),
-            "7. **Check spelling**: Verify all terms are spelled correctly",
-        ]
-        sections.extend(["\n".join(suggestions), ""])
-        return "\n".join(sections)
-
-    # Statistics and overall insights
-    total_count = result["total_count"]
-    unique_count = result["unique_count"]
-    sources = result["sources"]
-
-    stats: list[str] = [
-        f"- **Total Found**: {total_count} works",
-        f"- **After Deduplication**: {unique_count} works",
-    ]
-    source_info = ", ".join(f"{name}: {count}" for name, count in sources.items())
-    stats.append(f"- **Data Sources**: {source_info}")
-
-    # Add insights
-    avg_citations = sum(w.cited_by_count for w in works) / len(works)
-    stats.append(f"- **Average Citations**: {avg_citations:.1f}")
-
-    oa_count = sum(w.is_oa for w in works)
-    oa_ratio = (oa_count / len(works)) * 100
-    stats.append(f"- **Open Access Rate**: {oa_ratio:.1f}% ({oa_count}/{len(works)})")
-
-    if years := [w.publication_year for w in works if w.publication_year]:
-        stats.append(f"- **Year Range**: {min(years)} - {max(years)}")
-
-    sections.extend(["## Search Statistics\n", "\n".join(stats), ""])
-
-    # Complete JSON list
-    sections.extend(
-        [
-            "## Complete Works List (JSON)\n",
-            "The following JSON contains all works with full abstracts:\n"
-            if include_abstract
-            else "The following JSON contains all works (abstracts excluded to save tokens):\n",
-            "```json",
-        ]
-    )
-
-    # Convert works to dict for JSON serialization
-    works_dict = []
-    for work in works:
-        work_data = {
-            "id": work.id,
-            "doi": work.doi,
-            "title": work.title,
-            "authors": work.authors[:5],  # Limit to first 5 authors
-            "publication_year": work.publication_year,
-            "cited_by_count": work.cited_by_count,
-            "journal": work.journal,
-            "primary_institution": work.primary_institution,
-            "is_oa": work.is_oa,
-            "access_url": work.access_url,
-            "source": work.source,
-        }
-        # Only include abstract if requested
-        if include_abstract and work.abstract:
-            work_data["abstract"] = work.abstract
-        works_dict.append(work_data)
-
-    sections.extend([json.dumps(works_dict, indent=2, ensure_ascii=False), "```", ""])
-
-    # Next steps guidance - prevent infinite loops
-    sections.extend(["---", "## 🎯 Next Steps Guide\n", "**Before making another search, consider:**\n"])
-    next_steps: list[str] = [
-        *(["✓ **Results found** - Review the JSON data above for your analysis"] if unique_count > 0 else []),
-        *(
-            [
-                f"⚠️ **Result limit reached** ({request.max_results}) - "
-                "Consider narrowing filters (author, year, journal) for more targeted results"
-            ]
-            if unique_count >= request.max_results
-            else []
-        ),
-        *(
-            ["💡 **Few results** - Consider broadening your search by removing some filters"]
-            if 0 < unique_count < 10
-            else []
-        ),
-        "",
-        "**To refine your search:**",
-        "- If too many results → Add more specific filters (author, institution, journal, year)",
-        "- If too few results → Remove filters or use broader keywords",
-        "- If wrong results → Check filter spelling and try variations",
-        "",
-        "⚠️ **Important**: Avoid making multiple similar searches without reviewing results first!",
-        "Each search consumes API quota and context window. Make targeted, deliberate queries.",
-    ]
-
-    sections.append("\n".join(next_steps))
-
-    return "\n".join(sections)
diff --git a/service/app/tools/builtin/__init__.py b/service/app/tools/builtin/__init__.py
index 0b2c48e0..2ccc256e 100644
--- a/service/app/tools/builtin/__init__.py
+++ b/service/app/tools/builtin/__init__.py
@@ -11,11 +11,13 @@
 - image: Image generation and analysis
 - memory: Conversation history search (disabled)
 - research: Deep research workflow tools (component-internal, not exported here)
+- literature: Literature search and normalization
 """
 
 from app.tools.builtin.fetch import create_web_fetch_tool
 from app.tools.builtin.image import create_image_tools, create_image_tools_for_agent
 from app.tools.builtin.knowledge import create_knowledge_tools, create_knowledge_tools_for_agent
+from app.tools.builtin.literature import create_literature_search_tool
 from app.tools.builtin.memory import create_memory_tools, create_memory_tools_for_agent
 from app.tools.builtin.search import create_web_search_tool
 
@@ -24,6 +26,8 @@
     "create_web_search_tool",
     # Fetch
     "create_web_fetch_tool",
+    # Literature
+    "create_literature_search_tool",
     # Knowledge
     "create_knowledge_tools",
     "create_knowledge_tools_for_agent",
diff --git a/service/app/tools/builtin/literature.py b/service/app/tools/builtin/literature.py
new file mode 100644
index 00000000..fdb0ee27
--- /dev/null
+++ b/service/app/tools/builtin/literature.py
@@ -0,0 +1,390 @@
+"""
+Literature Search Tool
+
+LangChain tool for searching academic literature from multiple data sources
+(OpenAlex, Semantic Scholar, PubMed, etc.) with unified interface.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+from datetime import datetime
+from typing import Any, Literal
+
+import httpx
+from langchain_core.tools import BaseTool, StructuredTool
+from pydantic import BaseModel, Field
+
+from app.utils.literature import SearchRequest, WorkDistributor
+
+logger = logging.getLogger(__name__)
+
+TRUE_VALUES = frozenset({"true", "1", "yes"})
+FALSE_VALUES = frozenset({"false", "0", "no"})
+
+
+class LiteratureSearchInput(BaseModel):
+    """Input schema for literature search tool."""
+
+    query: str = Field(
+        description="Search keywords (e.g., 'machine learning', 'CRISPR', 'cancer immunotherapy'). "
+        "Most important parameter for accurate results."
+    )
+    mailto: str | None = Field(
+        default=None,
+        description="Email address to enable fast API pool at OpenAlex. "
+        "STRONGLY RECOMMENDED - provides 10x faster searches. "
+        "Example: 'researcher@university.edu'",
+    )
+    author: str | None = Field(
+        default=None,
+        description="Filter by author name (e.g., 'Albert Einstein', 'Jennifer Doudna'). "
+        "Will auto-correct common misspellings.",
+    )
+    institution: str | None = Field(
+        default=None,
+        description="Filter by affiliation (e.g., 'MIT', 'Harvard', 'Stanford University'). "
+        "Partial name matching supported.",
+    )
+    source: str | None = Field(
+        default=None,
+        description="Filter by journal/venue (e.g., 'Nature', 'Science', 'JAMA'). "
+        "Matches both journal names and abbreviated titles.",
+    )
+    year_from: int | None = Field(
+        default=None,
+        description="Start year (e.g., 2020). Will auto-clamp to valid range (1700-current).",
+    )
+    year_to: int | None = Field(
+        default=None,
+        description="End year (e.g., 2024). Will auto-clamp to valid range (1700-current).",
+    )
+    is_oa: bool | None = Field(
+        default=None,
+        description="Open access filter. True returns ONLY open access papers with direct links.",
+    )
+    work_type: str | None = Field(
+        default=None,
+        description="Filter by publication type. "
+        "Options: 'article', 'review', 'preprint', 'book', 'dissertation', 'dataset', etc.",
+    )
+    language: str | None = Field(
+        default=None,
+        description="Filter by publication language (e.g., 'en', 'zh', 'ja', 'fr', 'de').",
+    )
+    is_retracted: bool | None = Field(
+        default=None,
+        description="Retracted paper filter. False excludes retracted works (recommended). "
+        "True shows ONLY retracted papers (for auditing).",
+    )
+    has_abstract: bool | None = Field(
+        default=None,
+        description="Require abstract. True returns only papers with abstracts.",
+    )
+    has_fulltext: bool | None = Field(
+        default=None,
+        description="Require full text access. True returns only papers with available full text.",
+    )
+    sort_by: Literal["relevance", "cited_by_count", "publication_date"] = Field(
+        default="relevance",
+        description="Sort results. 'cited_by_count' for influential papers, 'publication_date' for most recent first.",
+    )
+    data_sources: list[str] | None = Field(
+        default=None,
+        description="Data sources to query. Options: ['openalex', 'semantic_scholar', 'pubmed']. "
+        "Default: ['openalex'].",
+    )
+
+
+async def _search_literature(
+    query: str,
+    mailto: str | None = None,
+    author: str | None = None,
+    institution: str | None = None,
+    source: str | None = None,
+    year_from: int | None = None,
+    year_to: int | None = None,
+    is_oa: bool | None = None,
+    work_type: str | None = None,
+    language: str | None = None,
+    is_retracted: bool | None = None,
+    has_abstract: bool | None = None,
+    has_fulltext: bool | None = None,
+    sort_by: str = "relevance",
+    data_sources: list[str] | None = None,
+) -> str:
+    """
+    Search academic literature from multiple data sources.
+
+    Returns a markdown report with executive summary and JSON results.
+    """
+    # Hard-coded: abstracts excluded to save tokens
+    include_abstract = False
+
+    try:
+        # Validate query
+        if not query or not str(query).strip():
+            return "Invalid input: query cannot be empty."
+        if len(str(query).strip()) < 3:
+            return "Invalid input: query is too short (minimum 3 characters)."
+
+        # Clamp year ranges
+        max_year = datetime.now().year + 1
+        year_warning = ""
+        year_from_clamped = year_from
+        year_to_clamped = year_to
+
+        if year_from_clamped is not None and year_from_clamped > max_year:
+            year_warning += f"year_from {year_from_clamped} clamped to {max_year}. "
+            year_from_clamped = max_year
+        if year_to_clamped is not None and year_to_clamped < 1700:
+            year_warning += f"year_to {year_to_clamped} clamped to 1700. "
+            year_to_clamped = 1700
+
+        # Ensure year_from <= year_to when both are set
+        if year_from_clamped is not None and year_to_clamped is not None and year_from_clamped > year_to_clamped:
+            year_warning += f"year_from {year_from_clamped} and year_to {year_to_clamped} swapped. "
+            year_from_clamped, year_to_clamped = year_to_clamped, year_from_clamped
+
+        # Hard-coded max_results
+        max_results = 10
+
+        openalex_email = mailto.strip() if mailto and str(mailto).strip() else None
+
+        logger.info(
+            "Literature search requested: query=%r, mailto=%s, max_results=%d",
+            query,
+            "<redacted>" if openalex_email else None,
+            max_results,
+        )
+
+        # Create search request
+        request = SearchRequest(
+            query=query,
+            author=author,
+            institution=institution,
+            source=source,
+            year_from=year_from_clamped,
+            year_to=year_to_clamped,
+            is_oa=is_oa,
+            work_type=work_type,
+            language=language,
+            is_retracted=is_retracted,
+            has_abstract=has_abstract,
+            has_fulltext=has_fulltext,
+            sort_by=sort_by,
+            max_results=max_results,
+            data_sources=data_sources,
+        )
+
+        # Execute search
+        async with WorkDistributor(openalex_email=openalex_email) as distributor:
+            result = await distributor.search(request)
+
+        if year_warning:
+            result.setdefault("warnings", []).append(f"Year adjusted: {year_warning.strip()}")
+
+        # Format output
+        return _format_search_result(request, result, include_abstract)
+
+    except ValueError as e:
+        logger.warning(f"Literature search validation error: {e}")
+        return f"Invalid input: {e!s}"
+    except httpx.HTTPError as e:
+        logger.error(f"Literature search network error: {e}", exc_info=True)
+        return "Network error while contacting literature sources. Please try again later."
+    except Exception as e:
+        logger.error(f"Literature search failed: {e}", exc_info=True)
+        return "Unexpected error during search. Please retry or contact support."
+
+
+def _format_search_result(request: SearchRequest, result: dict[str, Any], include_abstract: bool = False) -> str:
+    """
+    Format search results into human-readable report + JSON data.
+
+    Args:
+        request: Original search request
+        result: Search result from WorkDistributor
+        include_abstract: Whether to include abstracts in JSON (default: False to save tokens)
+
+    Returns:
+        Formatted markdown report with embedded JSON
+    """
+    works = result["works"]
+
+    # Build report sections
+    sections: list[str] = ["# Literature Search Report\n"]
+
+    # Warnings and resolution status (if any)
+    if warnings := result.get("warnings", []):
+        sections.extend(["## Warnings and Resolution Status\n", *warnings, ""])
+
+    # Search conditions
+    conditions: list[str] = [
+        f"- **Query**: {request.query}",
+        *([f"- **Author**: {request.author}"] if request.author else []),
+        *([f"- **Institution**: {request.institution}"] if request.institution else []),
+        *([f"- **Source**: {request.source}"] if request.source else []),
+        *(
+            [f"- **Year Range**: {request.year_from or '...'} - {request.year_to or '...'}"]
+            if request.year_from or request.year_to
+            else []
+        ),
+        *([f"- **Open Access Only**: {'Yes' if request.is_oa else 'No'}"] if request.is_oa is not None else []),
+        *([f"- **Work Type**: {request.work_type}"] if request.work_type else []),
+        *([f"- **Language**: {request.language}"] if request.language else []),
+        *(
+            [f"- **Exclude Retracted**: {'No' if request.is_retracted else 'Yes'}"]
+            if request.is_retracted is not None
+            else []
+        ),
+        *(
+            [f"- **Require Abstract**: {'Yes' if request.has_abstract else 'No'}"]
+            if request.has_abstract is not None
+            else []
+        ),
+        *(
+            [f"- **Require Full Text**: {'Yes' if request.has_fulltext else 'No'}"]
+            if request.has_fulltext is not None
+            else []
+        ),
+        f"- **Sort By**: {request.sort_by}",
+        f"- **Max Results**: {request.max_results}",
+    ]
+    sections.extend(["## Search Conditions\n", "\n".join(conditions), ""])
+
+    # Check if no results
+    if not works:
+        sections.extend(["## No Results Found\n", "**Suggestions to improve your search:**\n"])
+        suggestions: list[str] = [
+            "1. **Simplify keywords**: Try broader or different terms",
+            *(["2. **Remove author filter**: Author name may not be recognized"] if request.author else []),
+            *(["3. **Remove institution filter**: Try without institution constraint"] if request.institution else []),
+            *(["4. **Remove source filter**: Try without journal constraint"] if request.source else []),
+            *(
+                ["5. **Expand year range**: Current range may be too narrow"]
+                if request.year_from or request.year_to
+                else []
+            ),
+            *(["6. **Remove open access filter**: Include non-OA papers"] if request.is_oa else []),
+            "7. **Check spelling**: Verify all terms are spelled correctly",
+        ]
+        sections.extend(["\n".join(suggestions), ""])
+        return "\n".join(sections)
+
+    # Statistics and overall insights
+    total_count = result["total_count"]
+    unique_count = result["unique_count"]
+    sources = result["sources"]
+
+    stats: list[str] = [
+        f"- **Total Found**: {total_count} works",
+        f"- **After Deduplication**: {unique_count} works",
+    ]
+    source_info = ", ".join(f"{name}: {count}" for name, count in sources.items())
+    stats.append(f"- **Data Sources**: {source_info}")
+
+    # Add insights
+    avg_citations = sum(w.cited_by_count for w in works) / len(works)
+    stats.append(f"- **Average Citations**: {avg_citations:.1f}")
+
+    oa_count = sum(w.is_oa for w in works)
+    oa_ratio = (oa_count / len(works)) * 100
+    stats.append(f"- **Open Access Rate**: {oa_ratio:.1f}% ({oa_count}/{len(works)})")
+
+    if years := [w.publication_year for w in works if w.publication_year]:
+        stats.append(f"- **Year Range**: {min(years)} - {max(years)}")
+
+    sections.extend(["## Search Statistics\n", "\n".join(stats), ""])
+
+    # Complete JSON list
+    sections.extend(
+        [
+            "## Complete Works List (JSON)\n",
+            "The following JSON contains all works with full abstracts:\n"
+            if include_abstract
+            else "The following JSON contains all works (abstracts excluded to save tokens):\n",
+            "```json",
+        ]
+    )
+
+    # Convert works to dict for JSON serialization
+    works_dict = []
+    for work in works:
+        work_data = {
+            "id": work.id,
+            "doi": work.doi,
+            "title": work.title,
+            "authors": work.authors,
+            "publication_year": work.publication_year,
+            "cited_by_count": work.cited_by_count,
+            "journal": work.journal,
+            "primary_institution": work.primary_institution,
+            "is_oa": work.is_oa,
+            "access_url": work.access_url,
+            "source": work.source,
+        }
+        # Only include abstract if requested
+        if include_abstract and work.abstract:
+            work_data["abstract"] = work.abstract
+        works_dict.append(work_data)
+
+    sections.extend([json.dumps(works_dict, indent=2, ensure_ascii=False), "```", ""])
+
+    # Next steps guidance
+    sections.extend(["---", "## Next Steps Guide\n", "**Before making another search, consider:**\n"])
+    next_steps: list[str] = [
+        *(["- **Results found** - Review the JSON data above for your analysis"] if unique_count > 0 else []),
+        *(
+            [
+                f"- **Result limit reached** ({request.max_results}) - "
+                "Consider narrowing filters (author, year, journal) for more targeted results"
+            ]
+            if unique_count >= request.max_results
+            else []
+        ),
+        *(
+            ["- **Few results** - Consider broadening your search by removing some filters"]
+            if 0 < unique_count < 10
+            else []
+        ),
+        "",
+        "**To refine your search:**",
+        "- If too many results: Add more specific filters (author, institution, journal, year)",
+        "- If too few results: Remove filters or use broader keywords",
+        "- If wrong results: Check filter spelling and try variations",
+        "",
+        "**Important**: Avoid making multiple similar searches without reviewing results first!",
+        "Each search consumes API quota and context window. Make targeted, deliberate queries.",
+    ]
+
+    sections.append("\n".join(next_steps))
+
+    return "\n".join(sections)
+
+
+def create_literature_search_tool() -> BaseTool:
+    """
+    Create the literature search tool.
+
+    Returns:
+        StructuredTool for literature search.
+    """
+    return StructuredTool(
+        name="literature_search",
+        description=(
+            "Search academic literature from multiple data sources (OpenAlex, Semantic Scholar, PubMed). "
+            "Returns up to 10 papers with detailed information. "
+            "IMPORTANT: When presenting results to users, always include: "
+            "1) Paper title, 2) Authors, 3) Publication year, 4) Journal name, "
+            "5) Citation count, 6) access_url (clickable link to read the paper). "
+            "The access_url is critical - users need it to access the full paper. "
+            "Supports filtering by author, institution, journal, year range, open access status, and more."
+        ),
+        args_schema=LiteratureSearchInput,
+        coroutine=_search_literature,
+    )
+
+
+__all__ = ["create_literature_search_tool", "LiteratureSearchInput"]
diff --git a/service/app/tools/capabilities.py b/service/app/tools/capabilities.py
index 1b4deecf..c471c3fe 100644
--- a/service/app/tools/capabilities.py
+++ b/service/app/tools/capabilities.py
@@ -54,6 +54,7 @@ class ToolCapability(StrEnum):
     "bing_search": [ToolCapability.WEB_SEARCH],
     "tavily_search": [ToolCapability.WEB_SEARCH],
     "web_fetch": [ToolCapability.WEB_SEARCH],
+    "literature_search": [ToolCapability.WEB_SEARCH],
     # Knowledge tools
     "knowledge_list": [ToolCapability.KNOWLEDGE_RETRIEVAL],
     "knowledge_read": [ToolCapability.KNOWLEDGE_RETRIEVAL, ToolCapability.FILE_OPERATIONS],
diff --git a/service/app/tools/prepare.py b/service/app/tools/prepare.py
index 81b4a6df..a3dedabb 100644
--- a/service/app/tools/prepare.py
+++ b/service/app/tools/prepare.py
@@ -82,6 +82,7 @@ def _load_all_builtin_tools(
     Load all available builtin tools.
 
     - Web search + fetch: loaded if SearXNG is enabled
+    - Literature search: always loaded
     - Knowledge tools: loaded if effective knowledge_set_id exists and user_id is available
     - Image tools: loaded if image generation is enabled and user_id is available
     - Memory tools: loaded if agent and user_id are available (currently disabled)
@@ -110,6 +111,11 @@ def _load_all_builtin_tools(
         if web_fetch:
             tools.append(web_fetch)
 
+    # Load literature search tool if available
+    literature_search = BuiltinToolRegistry.get("literature_search")
+    if literature_search:
+        tools.append(literature_search)
+
     # Determine effective knowledge_set_id
     # Priority: session override > agent config
     effective_knowledge_set_id = session_knowledge_set_id or (agent.knowledge_set_id if agent else None)
diff --git a/service/app/tools/registry.py b/service/app/tools/registry.py
index 66d27420..8e95f4c3 100644
--- a/service/app/tools/registry.py
+++ b/service/app/tools/registry.py
@@ -175,6 +175,7 @@ def register_builtin_tools() -> None:
     """
     from app.tools.builtin.fetch import create_web_fetch_tool
     from app.tools.builtin.knowledge import create_knowledge_tools
+    from app.tools.builtin.literature import create_literature_search_tool
     from app.tools.builtin.search import create_web_search_tool
 
     # Register web search tool
@@ -204,6 +205,19 @@ def register_builtin_tools() -> None:
         cost=ToolCostConfig(base_cost=1),
     )
 
+    # Register literature search tool
+    literature_tool = create_literature_search_tool()
+    BuiltinToolRegistry.register(
+        tool_id="literature_search",
+        tool=literature_tool,
+        category="search",
+        display_name="Literature Search",
+        ui_toggleable=True,
+        default_enabled=False,
+        requires_context=[],
+        cost=ToolCostConfig(base_cost=1),
+    )
+
     # Tool cost configs for knowledge tools
     knowledge_tool_costs = {
         "knowledge_list": ToolCostConfig(),  # Free
diff --git a/web/src/components/layouts/components/ChatToolbar/ToolSelector.tsx b/web/src/components/layouts/components/ChatToolbar/ToolSelector.tsx
index 285d00b0..6312bac3 100644
--- a/web/src/components/layouts/components/ChatToolbar/ToolSelector.tsx
+++ b/web/src/components/layouts/components/ChatToolbar/ToolSelector.tsx
@@ -9,9 +9,11 @@ import {
 import {
   isImageEnabled,
   isKnowledgeEnabled,
+  isLiteratureSearchEnabled,
   isWebSearchEnabled,
   updateImageEnabled,
   updateKnowledgeEnabled,
+  updateLiteratureSearchEnabled,
   // updateMemoryEnabled,
   updateWebSearchEnabled,
 } from "@/core/agent/toolConfig";
@@ -22,6 +24,7 @@ import {
 } from "@/service/knowledgeSetService";
 import type { Agent } from "@/types/agents";
 import {
+  AcademicCapIcon,
   BookOpenIcon,
   CheckIcon,
   ChevronDownIcon,
@@ -59,6 +62,7 @@ export function ToolSelector({
   const webSearchEnabled = isWebSearchEnabled(agent);
   const knowledgeEnabled = isKnowledgeEnabled(agent);
   const imageEnabled = isImageEnabled(agent);
+  const literatureSearchEnabled = isLiteratureSearchEnabled(agent);
   // const memoryEnabled = isMemoryEnabled(agent);  // Disabled: pending RAG/pgvector implementation
   // const memoryEnabled = false; // Hardcoded off until RAG is implemented
 
@@ -70,6 +74,7 @@ export function ToolSelector({
     webSearchEnabled,
     effectiveKnowledgeSetId && knowledgeEnabled,
     imageEnabled,
+    literatureSearchEnabled,
     // memoryEnabled,  // Disabled: pending RAG/pgvector implementation
   ].filter(Boolean).length;
 
@@ -115,6 +120,12 @@ export function ToolSelector({
     await onUpdateAgent({ ...agent, graph_config: newGraphConfig });
   };
 
+  const handleToggleLiteratureSearch = async () => {
+    if (!agent) return;
+    const newGraphConfig = updateLiteratureSearchEnabled(agent, !literatureSearchEnabled);
+    await onUpdateAgent({ ...agent, graph_config: newGraphConfig });
+  };
+
   // const handleToggleMemory = async () => {
   //   if (!agent) return;
   //   const newGraphConfig = updateMemoryEnabled(agent, !memoryEnabled);
@@ -328,33 +339,35 @@ export function ToolSelector({
             {imageEnabled && <CheckIcon className="h-4 w-4 text-green-500" />}
           </button>
 
-          {/* Memory Search - Disabled: pending RAG/pgvector implementation */}
-          {/* <button
-            onClick={handleToggleMemory}
+          {/* Literature Search */}
+          <button
+            onClick={handleToggleLiteratureSearch}
             className={cn(
               "w-full flex items-center justify-between px-2 py-2 rounded-md transition-colors",
               "hover:bg-neutral-100 dark:hover:bg-neutral-800",
-              memoryEnabled && "bg-amber-50 dark:bg-amber-900/20",
+              literatureSearchEnabled && "bg-amber-50 dark:bg-amber-900/20",
             )}
           >
             <div className="flex items-center gap-2">
-              <ClockIcon
+              <AcademicCapIcon
                 className={cn(
                   "h-4 w-4",
-                  memoryEnabled ? "text-amber-500" : "text-neutral-400",
+                  literatureSearchEnabled ? "text-amber-500" : "text-neutral-400",
                 )}
               />
               <div className="text-left">
                 <div className="text-sm font-medium">
-                  {t("app.toolbar.memory", "Memory")}
+                  {t("app.toolbar.literatureSearch", "Literature Search")}
                 </div>
                 <div className="text-xs text-neutral-500 dark:text-neutral-400">
-                  {t("app.toolbar.memoryDesc", "Search conversation history")}
+                  {t("app.toolbar.literatureSearchDesc", "Search academic papers")}
                 </div>
               </div>
             </div>
-            {memoryEnabled && <CheckIcon className="h-4 w-4 text-amber-500" />}
-          </button> */}
+            {literatureSearchEnabled && <CheckIcon className="h-4 w-4 text-amber-500" />}
+          </button>
+
+          {/* Memory Search - Disabled: pending RAG/pgvector implementation */}
         </div>
       </PopoverContent>
     </Popover>
diff --git a/web/src/core/agent/toolConfig.ts b/web/src/core/agent/toolConfig.ts
index d36e1da9..1adef4df 100644
--- a/web/src/core/agent/toolConfig.ts
+++ b/web/src/core/agent/toolConfig.ts
@@ -20,6 +20,7 @@ export const BUILTIN_TOOLS = {
   GENERATE_IMAGE: "generate_image",
   READ_IMAGE: "read_image",
   MEMORY_SEARCH: "memory_search",
+  LITERATURE_SEARCH: "literature_search",
 } as const;
 
 // Web search tools as a group (search + fetch always together)
@@ -43,6 +44,7 @@ export const ALL_BUILTIN_TOOL_IDS = [
   BUILTIN_TOOLS.GENERATE_IMAGE,
   BUILTIN_TOOLS.READ_IMAGE,
   BUILTIN_TOOLS.MEMORY_SEARCH,
+  BUILTIN_TOOLS.LITERATURE_SEARCH,
 ];
 
 // Image tools as a group
@@ -310,3 +312,20 @@ export function updateMemoryEnabled(
 ): Record<string, unknown> {
   return updateToolFilter(agent, BUILTIN_TOOLS.MEMORY_SEARCH, enabled);
 }
+
+/**
+ * Check if literature search is enabled
+ */
+export function isLiteratureSearchEnabled(agent: Agent | null): boolean {
+  return isToolEnabled(agent, BUILTIN_TOOLS.LITERATURE_SEARCH);
+}
+
+/**
+ * Enable/disable literature search
+ */
+export function updateLiteratureSearchEnabled(
+  agent: Agent,
+  enabled: boolean,
+): Record<string, unknown> {
+  return updateToolFilter(agent, BUILTIN_TOOLS.LITERATURE_SEARCH, enabled);
+}
diff --git a/web/src/i18n/locales/en/app.json b/web/src/i18n/locales/en/app.json
index beea8f01..84e27e08 100644
--- a/web/src/i18n/locales/en/app.json
+++ b/web/src/i18n/locales/en/app.json
@@ -60,6 +60,8 @@
     "noKnowledge": "None",
     "image": "Image",
     "imageDesc": "Generate and read images",
+    "literatureSearch": "Literature",
+    "literatureSearchDesc": "Search academic papers",
     "memory": "Memory",
     "memoryDesc": "Search conversation history"
   },
diff --git a/web/src/i18n/locales/zh/app.json b/web/src/i18n/locales/zh/app.json
index 9e689381..b1fa8987 100644
--- a/web/src/i18n/locales/zh/app.json
+++ b/web/src/i18n/locales/zh/app.json
@@ -60,6 +60,8 @@
     "noKnowledge": "无",
     "image": "图片",
     "imageDesc": "生成和读取图片",
+    "literatureSearch": "文献搜索",
+    "literatureSearchDesc": "搜索学术文献和论文",
     "memory": "记忆",
     "memoryDesc": "搜索对话历史"
   },