vectorize-io · cdbartholomew · Apr 13, 2026
diff --git a/hindsight-api-slim/hindsight_api/api/http.py b/hindsight-api-slim/hindsight_api/api/http.py
@@ -720,6 +720,12 @@ class ReflectRequest(BaseModel):
         default=None,
         description="Exclude specific mental models by ID from the reflect loop.",
     )
+    facts_only: bool = Field(
+        default=False,
+        description="When true, only extracted facts are used during reflect. "
+        "Raw source chunks and document context are excluded. "
+        "The expand tool is disabled.",
+    )
 
     @field_validator("fact_types")
     @classmethod
@@ -1526,6 +1532,12 @@ class MentalModelTrigger(BaseModel):
             "Supports nested and/or/not expressions for complex tag-based scoping."
         ),
     )
+    facts_only: bool = Field(
+        default=False,
+        description="When true, only extracted facts are used during mental model refresh. "
+        "Raw source chunks and document context are excluded. "
+        "The expand tool is disabled.",
+    )
 
     @field_validator("fact_types")
     @classmethod
@@ -3100,6 +3112,7 @@ async def api_reflect(
                     fact_types=request.fact_types,
                     exclude_mental_models=request.exclude_mental_models,
                     exclude_mental_model_ids=request.exclude_mental_model_ids,
+                    facts_only=request.facts_only,
                 )
 
             # Build based_on (memories + mental_models + directives) if facts are requested

diff --git a/hindsight-api-slim/hindsight_api/engine/memory_engine.py b/hindsight-api-slim/hindsight_api/engine/memory_engine.py
@@ -952,6 +952,7 @@ async def _handle_refresh_mental_model(self, task_dict: dict[str, Any]):
         fact_types = trigger_data.get("fact_types")
         exclude_mental_models = trigger_data.get("exclude_mental_models", False)
         stored_exclude_ids: list[str] = trigger_data.get("exclude_mental_model_ids") or []
+        facts_only = trigger_data.get("facts_only", False)
 
         tag_filtering = _resolve_refresh_tag_filtering(mental_model.get("tags"), trigger_data)
 
@@ -967,6 +968,7 @@ async def _handle_refresh_mental_model(self, task_dict: dict[str, Any]):
             fact_types=fact_types,
             exclude_mental_models=exclude_mental_models,
             exclude_mental_model_ids=list({*stored_exclude_ids, mental_model_id}),
+            facts_only=facts_only,
         )
 
         generated_content = reflect_result.text or "No content generated"
@@ -5399,6 +5401,7 @@ async def reflect_async(
         exclude_mental_model_ids: list[str] | None = None,
         fact_types: list[str] | None = None,
         exclude_mental_models: bool = False,
+        facts_only: bool = False,
         _skip_span: bool = False,
     ) -> ReflectResult:
         """
@@ -5553,6 +5556,7 @@ async def recall_fn(q: str, max_tokens: int = 4096, max_chunk_tokens: int = 1000
                 tag_groups=tag_groups,
                 max_chunk_tokens=max_chunk_tokens,
                 fact_types=recall_fact_types if fact_types is not None else None,
+                include_chunks=not facts_only,
             )
 
         async def expand_fn(memory_ids: list[str], depth: str) -> dict[str, Any]:
@@ -5616,6 +5620,7 @@ async def expand_fn(memory_ids: list[str], depth: str) -> dict[str, Any]:
                         include_recall=include_recall,
                         budget=effective_budget,
                         max_context_tokens=max_context_tokens,
+                        facts_only=facts_only,
                     ),
                     timeout=wall_timeout,
                 )

diff --git a/hindsight-api-slim/hindsight_api/engine/reflect/agent.py b/hindsight-api-slim/hindsight_api/engine/reflect/agent.py
@@ -320,6 +320,7 @@ async def run_reflect_agent(
     include_recall: bool = True,
     budget: str | None = None,
     max_context_tokens: int = 100_000,
+    facts_only: bool = False,
 ) -> ReflectAgentResult:
     """
     Execute the reflect agent loop using native tool calling.
@@ -362,6 +363,7 @@ async def run_reflect_agent(
         include_mental_models=has_mental_models,
         include_observations=include_observations,
         include_recall=include_recall,
+        facts_only=facts_only,
     )
     # Build set of enabled tool names to guard against LLM hallucinating disabled tool calls
     enabled_tools: frozenset[str] = frozenset(t["function"]["name"] for t in tools if t.get("type") == "function")
@@ -782,7 +784,7 @@ def _log_completion(answer: str, iterations: int, forced: bool = False):
             hallucinated_tools = []
             for tc in other_tools:
                 norm = _normalize_tool_name(tc.name)
-                if enabled_tools is not None and norm not in enabled_tools and norm not in ("done", "expand"):
+                if enabled_tools is not None and norm not in enabled_tools and norm not in ("done",):
                     hallucinated_tools.append(tc)
                 else:
                     allowed_tools.append(tc)

diff --git a/hindsight-api-slim/hindsight_api/engine/reflect/tools.py b/hindsight-api-slim/hindsight_api/engine/reflect/tools.py
@@ -214,6 +214,7 @@ async def tool_recall(
     connection_budget: int = 1,
     max_chunk_tokens: int = 1000,
     fact_types: list[str] | None = None,
+    include_chunks: bool = True,
 ) -> dict[str, Any]:
     """
     Search memories using TEMPR retrieval.
@@ -232,13 +233,14 @@ async def tool_recall(
         connection_budget: Max DB connections for this recall (default 1 for internal ops)
         max_chunk_tokens: Maximum tokens for raw source chunk text (default 1000, always included)
         fact_types: Optional filter for fact types to retrieve. Defaults to ["experience", "world"].
+        include_chunks: Whether to include raw chunk text in results (default True).
+            When False, only extracted facts are returned (facts_only mode).
 
     Returns:
         Dict with list of matching memories including raw chunk text
     """
     # Only world/experience are valid for raw recall (observation is handled by search_observations)
     recall_fact_type = [ft for ft in (fact_types or ["experience", "world"]) if ft in ("world", "experience")]
-    include_chunks = True
     internal_ctx = replace(request_context, internal=True)
     result = await memory_engine.recall_async(
         bank_id=bank_id,
@@ -256,11 +258,13 @@ async def tool_recall(
         max_chunk_tokens=max_chunk_tokens,
     )
 
-    return {
+    response: dict[str, Any] = {
         "query": query,
         "memories": [m.model_dump() for m in result.results],
-        "chunks": {k: v.model_dump() for k, v in (result.chunks or {}).items()},
     }
+    if include_chunks:
+        response["chunks"] = {k: v.model_dump() for k, v in (result.chunks or {}).items()}
+    return response
 
 
 async def tool_expand(

diff --git a/hindsight-api-slim/hindsight_api/engine/reflect/tools_schema.py b/hindsight-api-slim/hindsight_api/engine/reflect/tools_schema.py
@@ -227,11 +227,48 @@ def _build_done_tool_with_directives(directive_rules: list[str]) -> dict:
     }
 
 
+def _build_facts_only_recall_tool() -> dict:
+    """Build a recall tool schema without chunk-related parameters for facts_only mode."""
+    return {
+        "type": "function",
+        "function": {
+            "name": "recall",
+            "description": (
+                "Search raw memories (facts and experiences). This is the ground truth data. "
+                "Use when: (1) no reflections/mental models exist, (2) mental models are stale, "
+                "(3) you need specific details not in synthesized knowledge. "
+                "Returns individual memory facts with their timestamps. "
+                "Note: only extracted facts are returned; raw source chunks are not available."
+            ),
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "reason": {
+                        "type": "string",
+                        "description": "Brief explanation of why you're making this search (for debugging)",
+                    },
+                    "query": {
+                        "type": "string",
+                        "description": "Search query string",
+                    },
+                    "max_tokens": {
+                        "type": "integer",
+                        "description": "Optional limit on result size (default 2048). Use higher values for broader searches.",
+                    },
+                },
+                "required": ["reason", "query"],
+            },
+        },
+    }
+
+
 def get_reflect_tools(
     directive_rules: list[str] | None = None,
     include_mental_models: bool = True,
     include_observations: bool = True,
     include_recall: bool = True,
+    include_expand: bool = True,
+    facts_only: bool = False,
 ) -> list[dict]:
     """
     Get the list of tools for the reflect agent.
@@ -247,6 +284,9 @@ def get_reflect_tools(
         include_mental_models: Whether to include the search_mental_models tool.
         include_observations: Whether to include the search_observations tool.
         include_recall: Whether to include the recall tool.
+        include_expand: Whether to include the expand tool.
+        facts_only: When True, strips chunk-related parameters from the recall tool
+            schema and excludes expand. Only extracted facts are available.
 
     Returns:
         List of tool definitions in OpenAI format
@@ -258,9 +298,13 @@ def get_reflect_tools(
     if include_observations:
         tools.append(TOOL_SEARCH_OBSERVATIONS)
     if include_recall:
-        tools.append(TOOL_RECALL)
+        if facts_only:
+            tools.append(_build_facts_only_recall_tool())
+        else:
+            tools.append(TOOL_RECALL)
 
-    tools.append(TOOL_EXPAND)
+    if include_expand and not facts_only:
+        tools.append(TOOL_EXPAND)
 
     # Use directive-aware done tool if directives are present
     if directive_rules:

diff --git a/hindsight-api-slim/tests/test_reflect_agent.py b/hindsight-api-slim/tests/test_reflect_agent.py
@@ -633,3 +633,172 @@ def __getattr__(self, name: str):
 
         finally:
             await memory.delete_bank(bank_id, request_context=request_context)
+
+
+class TestFactsOnlyMode:
+    """Test that facts_only mode excludes chunks and the expand tool."""
+
+    @pytest.fixture
+    def mock_llm(self):
+        llm = MagicMock()
+        llm.call_with_tools = AsyncMock()
+        llm.call = AsyncMock(
+            return_value=("Facts only answer", TokenUsage(input_tokens=100, output_tokens=50, total_tokens=150))
+        )
+        return llm
+
+    @pytest.fixture
+    def mock_functions(self):
+        return {
+            "search_mental_models_fn": AsyncMock(return_value={"mental_models": []}),
+            "search_observations_fn": AsyncMock(return_value={"observations": []}),
+            "recall_fn": AsyncMock(return_value={"memories": [{"id": "mem-1", "text": "test fact"}]}),
+            "expand_fn": AsyncMock(return_value={"results": []}),
+        }
+
+    @pytest.mark.asyncio
+    async def test_facts_only_excludes_expand_from_tools(self, mock_llm, mock_functions):
+        """When facts_only=True, expand tool should not be in the tool list sent to the LLM."""
+        mock_llm.call_with_tools.side_effect = [
+            LLMToolCallResult(
+                tool_calls=[LLMToolCall(id="1", name="recall", arguments={"reason": "test", "query": "test"})],
+                finish_reason="tool_calls",
+            ),
+            LLMToolCallResult(
+                tool_calls=[
+                    LLMToolCall(id="2", name="done", arguments={"answer": "Facts only answer", "memory_ids": ["mem-1"]})
+                ],
+                finish_reason="tool_calls",
+            ),
+        ]
+
+        await run_reflect_agent(
+            llm_config=mock_llm,
+            bank_id="test-bank",
+            query="test query",
+            bank_profile={"name": "Test", "mission": "Testing"},
+            facts_only=True,
+            **mock_functions,
+        )
+
+        # Check the tools passed to LLM - expand should not be present
+        first_call_args = mock_llm.call_with_tools.call_args_list[0]
+        tools_arg = first_call_args.kwargs.get("tools") or first_call_args[1].get("tools")
+        tool_names = [t["function"]["name"] for t in tools_arg]
+        assert "expand" not in tool_names
+        assert "recall" in tool_names
+        assert "done" in tool_names
+
+    @pytest.mark.asyncio
+    async def test_facts_only_recall_tool_has_no_chunk_param(self, mock_llm, mock_functions):
+        """When facts_only=True, recall tool schema should not have max_chunk_tokens param."""
+        mock_llm.call_with_tools.side_effect = [
+            LLMToolCallResult(
+                tool_calls=[LLMToolCall(id="1", name="done", arguments={"answer": "Answer", "memory_ids": []})],
+                finish_reason="tool_calls",
+            ),
+        ]
+
+        await run_reflect_agent(
+            llm_config=mock_llm,
+            bank_id="test-bank",
+            query="test query",
+            bank_profile={"name": "Test", "mission": "Testing"},
+            facts_only=True,
+            **mock_functions,
+        )
+
+        first_call_args = mock_llm.call_with_tools.call_args_list[0]
+        tools_arg = first_call_args.kwargs.get("tools") or first_call_args[1].get("tools")
+        recall_tools = [t for t in tools_arg if t["function"]["name"] == "recall"]
+        assert len(recall_tools) == 1
+        recall_props = recall_tools[0]["function"]["parameters"]["properties"]
+        assert "max_chunk_tokens" not in recall_props
+
+    @pytest.mark.asyncio
+    async def test_facts_only_rejects_expand_calls(self, mock_llm, mock_functions):
+        """When facts_only=True, if LLM hallucinates an expand call, it should be rejected."""
+        mock_llm.call_with_tools.side_effect = [
+            LLMToolCallResult(
+                tool_calls=[LLMToolCall(id="1", name="expand", arguments={"reason": "need context", "memory_ids": ["mem-1"], "depth": "chunk"})],
+                finish_reason="tool_calls",
+            ),
+            LLMToolCallResult(
+                tool_calls=[
+                    LLMToolCall(id="2", name="done", arguments={"answer": "Answer without expand", "memory_ids": []})
+                ],
+                finish_reason="tool_calls",
+            ),
+        ]
+
+        result = await run_reflect_agent(
+            llm_config=mock_llm,
+            bank_id="test-bank",
+            query="test query",
+            bank_profile={"name": "Test", "mission": "Testing"},
+            facts_only=True,
+            **mock_functions,
+        )
+
+        # expand_fn should never be called since the tool is not available
+        mock_functions["expand_fn"].assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_default_mode_includes_expand(self, mock_llm, mock_functions):
+        """When facts_only is not set (default), expand tool should be available."""
+        mock_llm.call_with_tools.side_effect = [
+            LLMToolCallResult(
+                tool_calls=[LLMToolCall(id="1", name="done", arguments={"answer": "Answer", "memory_ids": []})],
+                finish_reason="tool_calls",
+            ),
+        ]
+
+        await run_reflect_agent(
+            llm_config=mock_llm,
+            bank_id="test-bank",
+            query="test query",
+            bank_profile={"name": "Test", "mission": "Testing"},
+            **mock_functions,
+        )
+
+        first_call_args = mock_llm.call_with_tools.call_args_list[0]
+        tools_arg = first_call_args.kwargs.get("tools") or first_call_args[1].get("tools")
+        tool_names = [t["function"]["name"] for t in tools_arg]
+        assert "expand" in tool_names
+
+
+class TestFactsOnlyToolSchema:
+    """Test get_reflect_tools with facts_only flag."""
+
+    def test_facts_only_excludes_expand(self):
+        from hindsight_api.engine.reflect.tools_schema import get_reflect_tools
+
+        tools = get_reflect_tools(facts_only=True)
+        tool_names = [t["function"]["name"] for t in tools]
+        assert "expand" not in tool_names
+
+    def test_facts_only_strips_chunk_param_from_recall(self):
+        from hindsight_api.engine.reflect.tools_schema import get_reflect_tools
+
+        tools = get_reflect_tools(facts_only=True)
+        recall_tools = [t for t in tools if t["function"]["name"] == "recall"]
+        assert len(recall_tools) == 1
+        props = recall_tools[0]["function"]["parameters"]["properties"]
+        assert "max_chunk_tokens" not in props
+
+    def test_default_includes_expand_and_chunk_param(self):
+        from hindsight_api.engine.reflect.tools_schema import get_reflect_tools
+
+        tools = get_reflect_tools()
+        tool_names = [t["function"]["name"] for t in tools]
+        assert "expand" in tool_names
+        recall_tools = [t for t in tools if t["function"]["name"] == "recall"]
+        props = recall_tools[0]["function"]["parameters"]["properties"]
+        assert "max_chunk_tokens" in props
+
+    def test_facts_only_does_not_mutate_original_tool_recall(self):
+        from hindsight_api.engine.reflect.tools_schema import TOOL_RECALL, get_reflect_tools
+
+        get_reflect_tools(facts_only=True)
+        # Original TOOL_RECALL should still have max_chunk_tokens
+        assert "max_chunk_tokens" in TOOL_RECALL["function"]["parameters"]["properties"]