Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions hindsight-api-slim/hindsight_api/api/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -720,6 +720,12 @@ class ReflectRequest(BaseModel):
default=None,
description="Exclude specific mental models by ID from the reflect loop.",
)
facts_only: bool = Field(
default=False,
description="When true, only extracted facts are used during reflect. "
"Raw source chunks and document context are excluded. "
"The expand tool is disabled.",
)

@field_validator("fact_types")
@classmethod
Expand Down Expand Up @@ -1526,6 +1532,12 @@ class MentalModelTrigger(BaseModel):
"Supports nested and/or/not expressions for complex tag-based scoping."
),
)
facts_only: bool = Field(
default=False,
description="When true, only extracted facts are used during mental model refresh. "
"Raw source chunks and document context are excluded. "
"The expand tool is disabled.",
)

@field_validator("fact_types")
@classmethod
Expand Down Expand Up @@ -3100,6 +3112,7 @@ async def api_reflect(
fact_types=request.fact_types,
exclude_mental_models=request.exclude_mental_models,
exclude_mental_model_ids=request.exclude_mental_model_ids,
facts_only=request.facts_only,
)

# Build based_on (memories + mental_models + directives) if facts are requested
Expand Down
5 changes: 5 additions & 0 deletions hindsight-api-slim/hindsight_api/engine/memory_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -952,6 +952,7 @@ async def _handle_refresh_mental_model(self, task_dict: dict[str, Any]):
fact_types = trigger_data.get("fact_types")
exclude_mental_models = trigger_data.get("exclude_mental_models", False)
stored_exclude_ids: list[str] = trigger_data.get("exclude_mental_model_ids") or []
facts_only = trigger_data.get("facts_only", False)

tag_filtering = _resolve_refresh_tag_filtering(mental_model.get("tags"), trigger_data)

Expand All @@ -967,6 +968,7 @@ async def _handle_refresh_mental_model(self, task_dict: dict[str, Any]):
fact_types=fact_types,
exclude_mental_models=exclude_mental_models,
exclude_mental_model_ids=list({*stored_exclude_ids, mental_model_id}),
facts_only=facts_only,
)

generated_content = reflect_result.text or "No content generated"
Expand Down Expand Up @@ -5399,6 +5401,7 @@ async def reflect_async(
exclude_mental_model_ids: list[str] | None = None,
fact_types: list[str] | None = None,
exclude_mental_models: bool = False,
facts_only: bool = False,
_skip_span: bool = False,
) -> ReflectResult:
"""
Expand Down Expand Up @@ -5553,6 +5556,7 @@ async def recall_fn(q: str, max_tokens: int = 4096, max_chunk_tokens: int = 1000
tag_groups=tag_groups,
max_chunk_tokens=max_chunk_tokens,
fact_types=recall_fact_types if fact_types is not None else None,
include_chunks=not facts_only,
)

async def expand_fn(memory_ids: list[str], depth: str) -> dict[str, Any]:
Expand Down Expand Up @@ -5616,6 +5620,7 @@ async def expand_fn(memory_ids: list[str], depth: str) -> dict[str, Any]:
include_recall=include_recall,
budget=effective_budget,
max_context_tokens=max_context_tokens,
facts_only=facts_only,
),
timeout=wall_timeout,
)
Expand Down
4 changes: 3 additions & 1 deletion hindsight-api-slim/hindsight_api/engine/reflect/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,7 @@ async def run_reflect_agent(
include_recall: bool = True,
budget: str | None = None,
max_context_tokens: int = 100_000,
facts_only: bool = False,
) -> ReflectAgentResult:
"""
Execute the reflect agent loop using native tool calling.
Expand Down Expand Up @@ -362,6 +363,7 @@ async def run_reflect_agent(
include_mental_models=has_mental_models,
include_observations=include_observations,
include_recall=include_recall,
facts_only=facts_only,
)
# Build set of enabled tool names to guard against LLM hallucinating disabled tool calls
enabled_tools: frozenset[str] = frozenset(t["function"]["name"] for t in tools if t.get("type") == "function")
Expand Down Expand Up @@ -782,7 +784,7 @@ def _log_completion(answer: str, iterations: int, forced: bool = False):
hallucinated_tools = []
for tc in other_tools:
norm = _normalize_tool_name(tc.name)
if enabled_tools is not None and norm not in enabled_tools and norm not in ("done", "expand"):
if enabled_tools is not None and norm not in enabled_tools and norm not in ("done",):
hallucinated_tools.append(tc)
else:
allowed_tools.append(tc)
Expand Down
10 changes: 7 additions & 3 deletions hindsight-api-slim/hindsight_api/engine/reflect/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ async def tool_recall(
connection_budget: int = 1,
max_chunk_tokens: int = 1000,
fact_types: list[str] | None = None,
include_chunks: bool = True,
) -> dict[str, Any]:
"""
Search memories using TEMPR retrieval.
Expand All @@ -232,13 +233,14 @@ async def tool_recall(
connection_budget: Max DB connections for this recall (default 1 for internal ops)
max_chunk_tokens: Maximum tokens for raw source chunk text (default 1000, always included)
fact_types: Optional filter for fact types to retrieve. Defaults to ["experience", "world"].
include_chunks: Whether to include raw chunk text in results (default True).
When False, only extracted facts are returned (facts_only mode).

Returns:
Dict with list of matching memories including raw chunk text
"""
# Only world/experience are valid for raw recall (observation is handled by search_observations)
recall_fact_type = [ft for ft in (fact_types or ["experience", "world"]) if ft in ("world", "experience")]
include_chunks = True
internal_ctx = replace(request_context, internal=True)
result = await memory_engine.recall_async(
bank_id=bank_id,
Expand All @@ -256,11 +258,13 @@ async def tool_recall(
max_chunk_tokens=max_chunk_tokens,
)

return {
response: dict[str, Any] = {
"query": query,
"memories": [m.model_dump() for m in result.results],
"chunks": {k: v.model_dump() for k, v in (result.chunks or {}).items()},
}
if include_chunks:
response["chunks"] = {k: v.model_dump() for k, v in (result.chunks or {}).items()}
return response


async def tool_expand(
Expand Down
48 changes: 46 additions & 2 deletions hindsight-api-slim/hindsight_api/engine/reflect/tools_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,11 +227,48 @@ def _build_done_tool_with_directives(directive_rules: list[str]) -> dict:
}


def _build_facts_only_recall_tool() -> dict:
"""Build a recall tool schema without chunk-related parameters for facts_only mode."""
return {
"type": "function",
"function": {
"name": "recall",
"description": (
"Search raw memories (facts and experiences). This is the ground truth data. "
"Use when: (1) no reflections/mental models exist, (2) mental models are stale, "
"(3) you need specific details not in synthesized knowledge. "
"Returns individual memory facts with their timestamps. "
"Note: only extracted facts are returned; raw source chunks are not available."
),
"parameters": {
"type": "object",
"properties": {
"reason": {
"type": "string",
"description": "Brief explanation of why you're making this search (for debugging)",
},
"query": {
"type": "string",
"description": "Search query string",
},
"max_tokens": {
"type": "integer",
"description": "Optional limit on result size (default 2048). Use higher values for broader searches.",
},
},
"required": ["reason", "query"],
},
},
}


def get_reflect_tools(
directive_rules: list[str] | None = None,
include_mental_models: bool = True,
include_observations: bool = True,
include_recall: bool = True,
include_expand: bool = True,
facts_only: bool = False,
) -> list[dict]:
"""
Get the list of tools for the reflect agent.
Expand All @@ -247,6 +284,9 @@ def get_reflect_tools(
include_mental_models: Whether to include the search_mental_models tool.
include_observations: Whether to include the search_observations tool.
include_recall: Whether to include the recall tool.
include_expand: Whether to include the expand tool.
facts_only: When True, strips chunk-related parameters from the recall tool
schema and excludes expand. Only extracted facts are available.

Returns:
List of tool definitions in OpenAI format
Expand All @@ -258,9 +298,13 @@ def get_reflect_tools(
if include_observations:
tools.append(TOOL_SEARCH_OBSERVATIONS)
if include_recall:
tools.append(TOOL_RECALL)
if facts_only:
tools.append(_build_facts_only_recall_tool())
else:
tools.append(TOOL_RECALL)

tools.append(TOOL_EXPAND)
if include_expand and not facts_only:
tools.append(TOOL_EXPAND)

# Use directive-aware done tool if directives are present
if directive_rules:
Expand Down
169 changes: 169 additions & 0 deletions hindsight-api-slim/tests/test_reflect_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -633,3 +633,172 @@ def __getattr__(self, name: str):

finally:
await memory.delete_bank(bank_id, request_context=request_context)


class TestFactsOnlyMode:
"""Test that facts_only mode excludes chunks and the expand tool."""

@pytest.fixture
def mock_llm(self):
llm = MagicMock()
llm.call_with_tools = AsyncMock()
llm.call = AsyncMock(
return_value=("Facts only answer", TokenUsage(input_tokens=100, output_tokens=50, total_tokens=150))
)
return llm

@pytest.fixture
def mock_functions(self):
return {
"search_mental_models_fn": AsyncMock(return_value={"mental_models": []}),
"search_observations_fn": AsyncMock(return_value={"observations": []}),
"recall_fn": AsyncMock(return_value={"memories": [{"id": "mem-1", "text": "test fact"}]}),
"expand_fn": AsyncMock(return_value={"results": []}),
}

@pytest.mark.asyncio
async def test_facts_only_excludes_expand_from_tools(self, mock_llm, mock_functions):
"""When facts_only=True, expand tool should not be in the tool list sent to the LLM."""
mock_llm.call_with_tools.side_effect = [
LLMToolCallResult(
tool_calls=[LLMToolCall(id="1", name="recall", arguments={"reason": "test", "query": "test"})],
finish_reason="tool_calls",
),
LLMToolCallResult(
tool_calls=[
LLMToolCall(id="2", name="done", arguments={"answer": "Facts only answer", "memory_ids": ["mem-1"]})
],
finish_reason="tool_calls",
),
]

await run_reflect_agent(
llm_config=mock_llm,
bank_id="test-bank",
query="test query",
bank_profile={"name": "Test", "mission": "Testing"},
facts_only=True,
**mock_functions,
)

# Check the tools passed to LLM - expand should not be present
first_call_args = mock_llm.call_with_tools.call_args_list[0]
tools_arg = first_call_args.kwargs.get("tools") or first_call_args[1].get("tools")
tool_names = [t["function"]["name"] for t in tools_arg]
assert "expand" not in tool_names
assert "recall" in tool_names
assert "done" in tool_names

@pytest.mark.asyncio
async def test_facts_only_recall_tool_has_no_chunk_param(self, mock_llm, mock_functions):
"""When facts_only=True, recall tool schema should not have max_chunk_tokens param."""
mock_llm.call_with_tools.side_effect = [
LLMToolCallResult(
tool_calls=[LLMToolCall(id="1", name="done", arguments={"answer": "Answer", "memory_ids": []})],
finish_reason="tool_calls",
),
]

await run_reflect_agent(
llm_config=mock_llm,
bank_id="test-bank",
query="test query",
bank_profile={"name": "Test", "mission": "Testing"},
facts_only=True,
**mock_functions,
)

first_call_args = mock_llm.call_with_tools.call_args_list[0]
tools_arg = first_call_args.kwargs.get("tools") or first_call_args[1].get("tools")
recall_tools = [t for t in tools_arg if t["function"]["name"] == "recall"]
assert len(recall_tools) == 1
recall_props = recall_tools[0]["function"]["parameters"]["properties"]
assert "max_chunk_tokens" not in recall_props

@pytest.mark.asyncio
async def test_facts_only_rejects_expand_calls(self, mock_llm, mock_functions):
"""When facts_only=True, if LLM hallucinates an expand call, it should be rejected."""
mock_llm.call_with_tools.side_effect = [
LLMToolCallResult(
tool_calls=[LLMToolCall(id="1", name="expand", arguments={"reason": "need context", "memory_ids": ["mem-1"], "depth": "chunk"})],
finish_reason="tool_calls",
),
LLMToolCallResult(
tool_calls=[
LLMToolCall(id="2", name="done", arguments={"answer": "Answer without expand", "memory_ids": []})
],
finish_reason="tool_calls",
),
]

result = await run_reflect_agent(
llm_config=mock_llm,
bank_id="test-bank",
query="test query",
bank_profile={"name": "Test", "mission": "Testing"},
facts_only=True,
**mock_functions,
)

# expand_fn should never be called since the tool is not available
mock_functions["expand_fn"].assert_not_called()

@pytest.mark.asyncio
async def test_default_mode_includes_expand(self, mock_llm, mock_functions):
"""When facts_only is not set (default), expand tool should be available."""
mock_llm.call_with_tools.side_effect = [
LLMToolCallResult(
tool_calls=[LLMToolCall(id="1", name="done", arguments={"answer": "Answer", "memory_ids": []})],
finish_reason="tool_calls",
),
]

await run_reflect_agent(
llm_config=mock_llm,
bank_id="test-bank",
query="test query",
bank_profile={"name": "Test", "mission": "Testing"},
**mock_functions,
)

first_call_args = mock_llm.call_with_tools.call_args_list[0]
tools_arg = first_call_args.kwargs.get("tools") or first_call_args[1].get("tools")
tool_names = [t["function"]["name"] for t in tools_arg]
assert "expand" in tool_names


class TestFactsOnlyToolSchema:
"""Test get_reflect_tools with facts_only flag."""

def test_facts_only_excludes_expand(self):
from hindsight_api.engine.reflect.tools_schema import get_reflect_tools

tools = get_reflect_tools(facts_only=True)
tool_names = [t["function"]["name"] for t in tools]
assert "expand" not in tool_names

def test_facts_only_strips_chunk_param_from_recall(self):
from hindsight_api.engine.reflect.tools_schema import get_reflect_tools

tools = get_reflect_tools(facts_only=True)
recall_tools = [t for t in tools if t["function"]["name"] == "recall"]
assert len(recall_tools) == 1
props = recall_tools[0]["function"]["parameters"]["properties"]
assert "max_chunk_tokens" not in props

def test_default_includes_expand_and_chunk_param(self):
from hindsight_api.engine.reflect.tools_schema import get_reflect_tools

tools = get_reflect_tools()
tool_names = [t["function"]["name"] for t in tools]
assert "expand" in tool_names
recall_tools = [t for t in tools if t["function"]["name"] == "recall"]
props = recall_tools[0]["function"]["parameters"]["properties"]
assert "max_chunk_tokens" in props

def test_facts_only_does_not_mutate_original_tool_recall(self):
from hindsight_api.engine.reflect.tools_schema import TOOL_RECALL, get_reflect_tools

get_reflect_tools(facts_only=True)
# Original TOOL_RECALL should still have max_chunk_tokens
assert "max_chunk_tokens" in TOOL_RECALL["function"]["parameters"]["properties"]
Loading