Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions hindsight-api-slim/hindsight_api/api/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -1526,6 +1526,27 @@ class MentalModelTrigger(BaseModel):
"Supports nested and/or/not expressions for complex tag-based scoping."
),
)
include_chunks: bool | None = Field(
default=None,
description=(
"Override whether the internal recall used during refresh returns raw chunk text. "
"None means use the bank/global config default (recall_include_chunks)."
),
)
recall_max_tokens: int | None = Field(
default=None,
description=(
"Override the token budget for facts returned by the internal recall during refresh. "
"None means use the bank/global config default (recall_max_tokens)."
),
)
recall_chunks_max_tokens: int | None = Field(
default=None,
description=(
"Override the token budget for raw chunks returned by the internal recall during refresh. "
"None means use the bank/global config default (recall_chunks_max_tokens)."
),
)

@field_validator("fact_types")
@classmethod
Expand Down
21 changes: 21 additions & 0 deletions hindsight-api-slim/hindsight_api/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,9 @@ def normalize_config_dict(config: dict[str, Any]) -> dict[str, Any]:
ENV_REFLECT_WALL_TIMEOUT = "HINDSIGHT_API_REFLECT_WALL_TIMEOUT"
ENV_REFLECT_MISSION = "HINDSIGHT_API_REFLECT_MISSION"
ENV_REFLECT_SOURCE_FACTS_MAX_TOKENS = "HINDSIGHT_API_REFLECT_SOURCE_FACTS_MAX_TOKENS"
ENV_RECALL_INCLUDE_CHUNKS = "HINDSIGHT_API_RECALL_INCLUDE_CHUNKS"
ENV_RECALL_MAX_TOKENS = "HINDSIGHT_API_RECALL_MAX_TOKENS"
ENV_RECALL_CHUNKS_MAX_TOKENS = "HINDSIGHT_API_RECALL_CHUNKS_MAX_TOKENS"

# Audit log settings
ENV_AUDIT_LOG_ENABLED = "HINDSIGHT_API_AUDIT_LOG_ENABLED"
Expand Down Expand Up @@ -587,6 +590,9 @@ def normalize_config_dict(config: dict[str, Any]) -> dict[str, Any]:
DEFAULT_REFLECT_MAX_CONTEXT_TOKENS = 100_000 # Max accumulated context tokens before forcing final prompt
DEFAULT_REFLECT_WALL_TIMEOUT = 300 # Wall-clock timeout in seconds for the entire reflect operation (5 minutes)
DEFAULT_REFLECT_SOURCE_FACTS_MAX_TOKENS = -1 # Token budget for source facts in search_observations (-1 = disabled)
DEFAULT_RECALL_INCLUDE_CHUNKS = True # Whether internal recall (e.g. mental model refresh) returns raw chunks
DEFAULT_RECALL_MAX_TOKENS = 2048 # Token budget for facts returned by internal recall
DEFAULT_RECALL_CHUNKS_MAX_TOKENS = 1000 # Token budget for raw chunks returned by internal recall

# Disposition defaults (None = not set, fall back to bank DB value or 3)
DEFAULT_DISPOSITION_SKEPTICISM = None
Expand Down Expand Up @@ -925,6 +931,11 @@ class HindsightConfig:
reflect_mission: str | None
reflect_source_facts_max_tokens: int

# Recall settings (used by internal recall, e.g. during mental model refresh)
recall_include_chunks: bool
recall_max_tokens: int
recall_chunks_max_tokens: int

# Disposition settings (hierarchical - can be overridden per bank; None = fall back to DB)
disposition_skepticism: int | None
disposition_literalism: int | None
Expand Down Expand Up @@ -1038,6 +1049,10 @@ class HindsightConfig:
# Reflect settings
"reflect_mission",
"reflect_source_facts_max_tokens",
# Recall settings (used by internal recall, e.g. mental model refresh)
"recall_include_chunks",
"recall_max_tokens",
"recall_chunks_max_tokens",
# Disposition settings
"disposition_skepticism",
"disposition_literalism",
Expand Down Expand Up @@ -1523,6 +1538,12 @@ def from_env(cls) -> "HindsightConfig":
reflect_source_facts_max_tokens=int(
os.getenv(ENV_REFLECT_SOURCE_FACTS_MAX_TOKENS, str(DEFAULT_REFLECT_SOURCE_FACTS_MAX_TOKENS))
),
recall_include_chunks=os.getenv(ENV_RECALL_INCLUDE_CHUNKS, str(DEFAULT_RECALL_INCLUDE_CHUNKS)).lower()
in ("true", "1", "yes"),
recall_max_tokens=int(os.getenv(ENV_RECALL_MAX_TOKENS, str(DEFAULT_RECALL_MAX_TOKENS))),
recall_chunks_max_tokens=int(
os.getenv(ENV_RECALL_CHUNKS_MAX_TOKENS, str(DEFAULT_RECALL_CHUNKS_MAX_TOKENS))
),
# Disposition settings (None = fall back to DB value)
disposition_skepticism=int(os.getenv(ENV_DISPOSITION_SKEPTICISM))
if os.getenv(ENV_DISPOSITION_SKEPTICISM)
Expand Down
50 changes: 48 additions & 2 deletions hindsight-api-slim/hindsight_api/engine/memory_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,13 @@
import httpx
import tiktoken

from ..config import DEFAULT_REFLECT_SOURCE_FACTS_MAX_TOKENS, get_config
from ..config import (
DEFAULT_RECALL_CHUNKS_MAX_TOKENS,
DEFAULT_RECALL_INCLUDE_CHUNKS,
DEFAULT_RECALL_MAX_TOKENS,
DEFAULT_REFLECT_SOURCE_FACTS_MAX_TOKENS,
get_config,
)
from ..metrics import get_metrics_collector
from ..tracing import create_operation_span
from ..utils import mask_network_location
Expand Down Expand Up @@ -952,6 +958,9 @@ async def _handle_refresh_mental_model(self, task_dict: dict[str, Any]):
fact_types = trigger_data.get("fact_types")
exclude_mental_models = trigger_data.get("exclude_mental_models", False)
stored_exclude_ids: list[str] = trigger_data.get("exclude_mental_model_ids") or []
recall_include_chunks_override = trigger_data.get("include_chunks")
recall_max_tokens_override = trigger_data.get("recall_max_tokens")
recall_chunks_max_tokens_override = trigger_data.get("recall_chunks_max_tokens")

tag_filtering = _resolve_refresh_tag_filtering(mental_model.get("tags"), trigger_data)

Expand All @@ -967,6 +976,9 @@ async def _handle_refresh_mental_model(self, task_dict: dict[str, Any]):
fact_types=fact_types,
exclude_mental_models=exclude_mental_models,
exclude_mental_model_ids=list({*stored_exclude_ids, mental_model_id}),
recall_include_chunks=recall_include_chunks_override,
recall_max_tokens_override=recall_max_tokens_override,
recall_chunks_max_tokens_override=recall_chunks_max_tokens_override,
)

generated_content = reflect_result.text or "No content generated"
Expand Down Expand Up @@ -5399,6 +5411,9 @@ async def reflect_async(
exclude_mental_model_ids: list[str] | None = None,
fact_types: list[str] | None = None,
exclude_mental_models: bool = False,
recall_include_chunks: bool | None = None,
recall_max_tokens_override: int | None = None,
recall_chunks_max_tokens_override: int | None = None,
_skip_span: bool = False,
) -> ReflectResult:
"""
Expand Down Expand Up @@ -5521,6 +5536,23 @@ async def search_mental_models_fn(q: str, max_results: int = 5) -> dict[str, Any
"reflect_source_facts_max_tokens", DEFAULT_REFLECT_SOURCE_FACTS_MAX_TOKENS
)

# Resolve recall overrides: caller arg (e.g. mental model trigger) → bank config → env default
effective_recall_include_chunks = (
recall_include_chunks
if recall_include_chunks is not None
else config_dict.get("recall_include_chunks", DEFAULT_RECALL_INCLUDE_CHUNKS)
)
effective_recall_max_tokens = (
recall_max_tokens_override
if recall_max_tokens_override is not None
else config_dict.get("recall_max_tokens", DEFAULT_RECALL_MAX_TOKENS)
)
effective_recall_chunks_max_tokens = (
recall_chunks_max_tokens_override
if recall_chunks_max_tokens_override is not None
else config_dict.get("recall_chunks_max_tokens", DEFAULT_RECALL_CHUNKS_MAX_TOKENS)
)

async def search_observations_fn(q: str, max_tokens: int = 5000) -> dict[str, Any]:
return await tool_search_observations(
self,
Expand All @@ -5541,7 +5573,14 @@ async def search_observations_fn(q: str, max_tokens: int = 5000) -> dict[str, An
recall_fact_types = [ft for ft in (fact_types or ["world", "experience"]) if ft in ("world", "experience")]
include_recall = bool(recall_fact_types)

async def recall_fn(q: str, max_tokens: int = 4096, max_chunk_tokens: int = 1000) -> dict[str, Any]:
# Defaults are bound at closure-definition time (re-evaluated on each
# reflect_async call), so per-bank/per-trigger overrides apply when the
# agent invokes recall without explicit token args.
async def recall_fn(
q: str,
max_tokens: int = effective_recall_max_tokens,
max_chunk_tokens: int = effective_recall_chunks_max_tokens,
) -> dict[str, Any]:
return await tool_recall(
self,
bank_id,
Expand All @@ -5553,6 +5592,7 @@ async def recall_fn(q: str, max_tokens: int = 4096, max_chunk_tokens: int = 1000
tag_groups=tag_groups,
max_chunk_tokens=max_chunk_tokens,
fact_types=recall_fact_types if fact_types is not None else None,
include_chunks=effective_recall_include_chunks,
)

async def expand_fn(memory_ids: list[str], depth: str) -> dict[str, Any]:
Expand Down Expand Up @@ -6770,6 +6810,9 @@ async def refresh_mental_model(
fact_types = trigger_data.get("fact_types")
exclude_mental_models = trigger_data.get("exclude_mental_models", False)
stored_exclude_ids: list[str] = trigger_data.get("exclude_mental_model_ids") or []
recall_include_chunks_override = trigger_data.get("include_chunks")
recall_max_tokens_override = trigger_data.get("recall_max_tokens")
recall_chunks_max_tokens_override = trigger_data.get("recall_chunks_max_tokens")

tag_filtering = _resolve_refresh_tag_filtering(mental_model.get("tags"), trigger_data)

Expand All @@ -6785,6 +6828,9 @@ async def refresh_mental_model(
fact_types=fact_types,
exclude_mental_models=exclude_mental_models,
exclude_mental_model_ids=list({*stored_exclude_ids, mental_model_id}),
recall_include_chunks=recall_include_chunks_override,
recall_max_tokens_override=recall_max_tokens_override,
recall_chunks_max_tokens_override=recall_chunks_max_tokens_override,
_skip_span=True,
)

Expand Down
7 changes: 4 additions & 3 deletions hindsight-api-slim/hindsight_api/engine/reflect/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ async def tool_recall(
connection_budget: int = 1,
max_chunk_tokens: int = 1000,
fact_types: list[str] | None = None,
include_chunks: bool = True,
) -> dict[str, Any]:
"""
Search memories using TEMPR retrieval.
Expand All @@ -230,15 +231,15 @@ async def tool_recall(
tags: Filter by tags (includes untagged memories)
tags_match: How to match tags - "any" (OR), "all" (AND), or "exact"
connection_budget: Max DB connections for this recall (default 1 for internal ops)
max_chunk_tokens: Maximum tokens for raw source chunk text (default 1000, always included)
max_chunk_tokens: Maximum tokens for raw source chunk text (default 1000)
fact_types: Optional filter for fact types to retrieve. Defaults to ["experience", "world"].
include_chunks: Whether to fetch raw chunk text alongside facts (default True).

Returns:
Dict with list of matching memories including raw chunk text
Dict with list of matching memories including raw chunk text (when include_chunks)
"""
# Only world/experience are valid for raw recall (observation is handled by search_observations)
recall_fact_type = [ft for ft in (fact_types or ["experience", "world"]) if ft in ("world", "experience")]
include_chunks = True
internal_ctx = replace(request_context, internal=True)
result = await memory_engine.recall_async(
bank_id=bank_id,
Expand Down
4 changes: 2 additions & 2 deletions hindsight-api-slim/tests/test_hierarchical_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ async def test_hierarchical_fields_categorization():
assert "retain_chunk_batch_size" in configurable

# Verify count is correct
assert len(configurable) == 22
assert len(configurable) == 25

# Verify credential fields (NEVER exposed)
assert "llm_api_key" in credentials
Expand Down Expand Up @@ -458,7 +458,7 @@ async def test_config_get_bank_config_no_static_or_credential_fields_leak(memory
assert field in config, f"Expected configurable field '{field}' missing from config"

# Should have a small number of configurable fields (not hundreds)
assert len(config) < 25, f"Too many fields returned: {len(config)}"
assert len(config) < 30, f"Too many fields returned: {len(config)}"

finally:
await memory.delete_bank(bank_id, request_context=request_context)
Expand Down
Loading
Loading