Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions secator/ai/history.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,13 @@ class ChatHistory:

messages: List[Dict[str, str]] = field(default_factory=list)
model: Optional[str] = None
# Billed token/cost usage accrued by LLM calls this object makes internally
# (history summarization/compaction). The owning `ai` task drains these into
# context.ai_tokens so summarization is billed alongside the main loop.
billed_tokens: int = 0
billed_prompt_tokens: int = 0
billed_completion_tokens: int = 0
billed_cost: float = 0.0

def add_system(self, content: str) -> None:
self.messages.append({"role": "system", "content": content})
Expand Down Expand Up @@ -390,6 +397,26 @@ def compact(self, model: str, api_base: Optional[str] = None,
with console.status(f"[bold orange3]Compacting chat history...[/] [gray42] • {token_str}[/]", spinner="dots"):
result = call_llm([{"role": "user", "content": prompt}], model, 0.3, api_base, api_key)

# Record billed usage of the summarization call so the owning task can
# roll it into context.ai_tokens. Missing usage counts as 0.
usage = result.get("usage") or {}
try:
self.billed_tokens += int(usage.get("tokens") or 0)
except (TypeError, ValueError):
pass
try:
self.billed_prompt_tokens += int(usage.get("prompt_tokens") or 0)
except (TypeError, ValueError):
pass
try:
self.billed_completion_tokens += int(usage.get("completion_tokens") or 0)
except (TypeError, ValueError):
pass
try:
self.billed_cost += float(usage.get("cost") or 0)
except (TypeError, ValueError):
pass

self.messages = []
if initial_system:
self.messages.append(initial_system)
Expand Down
2 changes: 2 additions & 0 deletions secator/ai/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,8 @@ def call_llm(

usage = {
"tokens": response.usage.total_tokens,
"prompt_tokens": getattr(response.usage, "prompt_tokens", None),
"completion_tokens": getattr(response.usage, "completion_tokens", None),
"cost": cost,
}

Expand Down
86 changes: 86 additions & 0 deletions secator/tasks/ai.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,9 @@ def _run_loop(self) -> Generator:
# Prompt user when context is filling up (local only)
yield from self._summarize_user()

# Roll any billed summarization usage into context.ai_tokens
self._drain_history_usage()

# Subagent token usage (for batch progress tracking)
if self.is_subagent:
by_role = self.history.count_tokens_by_role(self.model)
Expand All @@ -278,6 +281,11 @@ def _run_loop(self) -> Generator:
tool_calls = result.get("tool_calls", [])
usage = result.get("usage", {})

# Accumulate billed tokens for this run (read by the billing chore
# as context.ai_tokens). Done here, before any empty-response
# `continue`, so every billed call is counted exactly once.
self._account_usage(usage)

self.debug(f'content: {content[:200] if content else "(empty)"}', sub='llm')

# Empty response
Expand Down Expand Up @@ -455,6 +463,23 @@ def _init_options(self):
workspace=self.reports_folder or ""
)

# Per-run billed-token accounting. The platform billing chore reads
# `context.ai_tokens` (cumulative billed tokens) — the AI analog of
# `context.scan_hours`. Initialize on the runner context so it is
# persisted onto the task doc even if the run makes zero LLM calls.
self.context.setdefault("ai_tokens", 0)
self.context.setdefault("ai_prompt_tokens", 0)
self.context.setdefault("ai_completion_tokens", 0)
self.context.setdefault("ai_cost", 0.0)

# Record the resolved model id used for this run so the platform metering
# chore can price the consumed tokens against the model registry (free
# vs paid, per-million in/out/cached rates). This is the *configured*
# model for the run; if the user switches model mid-session that change
# is out of scope (the configured model is recorded). Set unconditionally
# (not setdefault) so it reflects the option resolved in this _init.
self.context["ai_model"] = self.model

# Create interactivity backend
self.session_id = self.session_name or str(self.id)
self.backend = create_backend(self.interactive, timeout=CONFIG.addons.ai.user_response_timeout)
Expand Down Expand Up @@ -514,6 +539,7 @@ def _detect_mode(self, force=False):
messages = [{"role": "user", "content": f"{selection_prompt}\n{self.prompt}"}]
with maybe_status("[bold orange3]Detecting intent...[/]", spinner="dots"):
result = call_llm(messages, self.intent_model, temperature=0.3, api_base=self.api_base, api_key=self.api_key)
self._account_usage(result.get("usage"))
mode = result["content"].strip().lower()
if mode in ("attack", "chat"):
console.print(rf"[bold green]\[INF][/] Detected intent: [bold]{mode}[/]")
Expand Down Expand Up @@ -760,6 +786,66 @@ def _dispatch_and_collect(self, actions, ctx):
# History helpers
# -------------------------------------------------------------------------

def _account_usage(self, usage):
"""Accumulate billed token/cost usage from a single LLM call onto the runner context.

`usage` is the dict returned by `call_llm`
(`{"tokens", "prompt_tokens", "completion_tokens", "cost"}`) or None.
Missing/None usage counts as 0 so accounting never crashes the run. The
running total lives on `self.context["ai_tokens"]` (int, cumulative) which
is persisted onto the task doc and read by the platform billing chore.
`context["ai_prompt_tokens"]`/`["ai_completion_tokens"]` carry the split.
"""
if not usage:
return
try:
tokens = usage.get("tokens") or 0
self.context["ai_tokens"] = int(self.context.get("ai_tokens", 0) or 0) + int(tokens)
except (TypeError, ValueError):
pass
try:
prompt_tokens = usage.get("prompt_tokens") or 0
self.context["ai_prompt_tokens"] = \
int(self.context.get("ai_prompt_tokens", 0) or 0) + int(prompt_tokens)
except (TypeError, ValueError):
pass
try:
completion_tokens = usage.get("completion_tokens") or 0
self.context["ai_completion_tokens"] = \
int(self.context.get("ai_completion_tokens", 0) or 0) + int(completion_tokens)
except (TypeError, ValueError):
pass
try:
cost = usage.get("cost") or 0
self.context["ai_cost"] = float(self.context.get("ai_cost", 0.0) or 0.0) + float(cost)
except (TypeError, ValueError):
pass

def _drain_history_usage(self):
"""Roll billed usage accrued by history summarization into context.ai_tokens.

`ChatHistory.compact` makes its own LLM calls and stashes their billed
usage on the history object; drain it here so it is counted exactly once.
"""
history = getattr(self, "history", None)
if history is None:
return
tokens = getattr(history, "billed_tokens", 0) or 0
prompt_tokens = getattr(history, "billed_prompt_tokens", 0) or 0
completion_tokens = getattr(history, "billed_completion_tokens", 0) or 0
cost = getattr(history, "billed_cost", 0.0) or 0.0
if tokens:
self._account_usage({
"tokens": tokens,
"prompt_tokens": prompt_tokens,
"completion_tokens": completion_tokens,
"cost": cost,
})
history.billed_tokens = 0
history.billed_prompt_tokens = 0
history.billed_completion_tokens = 0
history.billed_cost = 0.0
Comment on lines +824 to +847

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🎯 Functional Correctness | 🟡 Minor | ⚡ Quick win

Cost can be silently dropped when billed_tokens is 0 but billed_cost is non-zero.

The drain only fires (and resets the counters) when tokens is truthy. If a summarization call reports a cost with zero/missing tokens, that cost is neither accounted nor reset on this iteration. It would only be picked up on a later drain that happens to have non-zero tokens, and is lost entirely if that never occurs. Gate on either value.

🛠️ Proposed fix
 		tokens = getattr(history, "billed_tokens", 0) or 0
 		cost = getattr(history, "billed_cost", 0.0) or 0.0
-		if tokens:
+		if tokens or cost:
 			self._account_usage({"tokens": tokens, "cost": cost})
 			history.billed_tokens = 0
 			history.billed_cost = 0.0
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
def _drain_history_usage(self):
"""Roll billed usage accrued by history summarization into context.ai_tokens.
`ChatHistory.compact` makes its own LLM calls and stashes their billed
usage on the history object; drain it here so it is counted exactly once.
"""
history = getattr(self, "history", None)
if history is None:
return
tokens = getattr(history, "billed_tokens", 0) or 0
cost = getattr(history, "billed_cost", 0.0) or 0.0
if tokens:
self._account_usage({"tokens": tokens, "cost": cost})
history.billed_tokens = 0
history.billed_cost = 0.0
def _drain_history_usage(self):
"""Roll billed usage accrued by history summarization into context.ai_tokens.
`ChatHistory.compact` makes its own LLM calls and stashes their billed
usage on the history object; drain it here so it is counted exactly once.
"""
history = getattr(self, "history", None)
if history is None:
return
tokens = getattr(history, "billed_tokens", 0) or 0
cost = getattr(history, "billed_cost", 0.0) or 0.0
if tokens or cost:
self._account_usage({"tokens": tokens, "cost": cost})
history.billed_tokens = 0
history.billed_cost = 0.0
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@secator/tasks/ai.py` around lines 800 - 814, The usage drain in
_drain_history_usage only accounts and clears history.billed_tokens and
history.billed_cost when tokens is truthy, which can drop cost-only usage.
Update the condition so the drain runs when either billed_tokens or billed_cost
is present, and make sure both history.billed_tokens and history.billed_cost are
reset after calling self._account_usage, even when tokens is zero.


def _add_assistant_to_history(self, content, tool_calls):
"""Add assistant message (with optional tool calls) to chat history."""
if tool_calls:
Expand Down
Loading
Loading