Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions secator/ai/history.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,11 @@ class ChatHistory:

messages: List[Dict[str, str]] = field(default_factory=list)
model: Optional[str] = None
# Billed token/cost usage accrued by LLM calls this object makes internally
# (history summarization/compaction). The owning `ai` task drains these into
# context.ai_tokens so summarization is billed alongside the main loop.
billed_tokens: int = 0
billed_cost: float = 0.0

def add_system(self, content: str) -> None:
self.messages.append({"role": "system", "content": content})
Expand Down Expand Up @@ -390,6 +395,18 @@ def compact(self, model: str, api_base: Optional[str] = None,
with console.status(f"[bold orange3]Compacting chat history...[/] [gray42] • {token_str}[/]", spinner="dots"):
result = call_llm([{"role": "user", "content": prompt}], model, 0.3, api_base, api_key)

# Record billed usage of the summarization call so the owning task can
# roll it into context.ai_tokens. Missing usage counts as 0.
usage = result.get("usage") or {}
try:
self.billed_tokens += int(usage.get("tokens") or 0)
except (TypeError, ValueError):
pass
try:
self.billed_cost += float(usage.get("cost") or 0)
except (TypeError, ValueError):
pass

self.messages = []
if initial_system:
self.messages.append(initial_system)
Expand Down
53 changes: 53 additions & 0 deletions secator/tasks/ai.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,9 @@ def _run_loop(self) -> Generator:
# Prompt user when context is filling up (local only)
yield from self._summarize_user()

# Roll any billed summarization usage into context.ai_tokens
self._drain_history_usage()

# Subagent token usage (for batch progress tracking)
if self.is_subagent:
by_role = self.history.count_tokens_by_role(self.model)
Expand All @@ -278,6 +281,11 @@ def _run_loop(self) -> Generator:
tool_calls = result.get("tool_calls", [])
usage = result.get("usage", {})

# Accumulate billed tokens for this run (read by the billing chore
# as context.ai_tokens). Done here, before any empty-response
# `continue`, so every billed call is counted exactly once.
self._account_usage(usage)

self.debug(f'content: {content[:200] if content else "(empty)"}', sub='llm')

# Empty response
Expand Down Expand Up @@ -455,6 +463,13 @@ def _init_options(self):
workspace=self.reports_folder or ""
)

# Per-run billed-token accounting. The platform billing chore reads
# `context.ai_tokens` (cumulative billed tokens) — the AI analog of
# `context.scan_hours`. Initialize on the runner context so it is
# persisted onto the task doc even if the run makes zero LLM calls.
self.context.setdefault("ai_tokens", 0)
self.context.setdefault("ai_cost", 0.0)

# Create interactivity backend
self.session_id = self.session_name or str(self.id)
self.backend = create_backend(self.interactive, timeout=CONFIG.addons.ai.user_response_timeout)
Expand Down Expand Up @@ -514,6 +529,7 @@ def _detect_mode(self, force=False):
messages = [{"role": "user", "content": f"{selection_prompt}\n{self.prompt}"}]
with maybe_status("[bold orange3]Detecting intent...[/]", spinner="dots"):
result = call_llm(messages, self.intent_model, temperature=0.3, api_base=self.api_base, api_key=self.api_key)
self._account_usage(result.get("usage"))
mode = result["content"].strip().lower()
if mode in ("attack", "chat"):
console.print(rf"[bold green]\[INF][/] Detected intent: [bold]{mode}[/]")
Expand Down Expand Up @@ -760,6 +776,43 @@ def _dispatch_and_collect(self, actions, ctx):
# History helpers
# -------------------------------------------------------------------------

def _account_usage(self, usage):
"""Accumulate billed token/cost usage from a single LLM call onto the runner context.

`usage` is the dict returned by `call_llm` (`{"tokens", "cost"}`) or None.
Missing/None usage counts as 0 so accounting never crashes the run. The
running total lives on `self.context["ai_tokens"]` (int, cumulative) which
is persisted onto the task doc and read by the platform billing chore.
"""
if not usage:
return
try:
tokens = usage.get("tokens") or 0
self.context["ai_tokens"] = int(self.context.get("ai_tokens", 0) or 0) + int(tokens)
except (TypeError, ValueError):
pass
try:
cost = usage.get("cost") or 0
self.context["ai_cost"] = float(self.context.get("ai_cost", 0.0) or 0.0) + float(cost)
except (TypeError, ValueError):
pass

def _drain_history_usage(self):
"""Roll billed usage accrued by history summarization into context.ai_tokens.

`ChatHistory.compact` makes its own LLM calls and stashes their billed
usage on the history object; drain it here so it is counted exactly once.
"""
history = getattr(self, "history", None)
if history is None:
return
tokens = getattr(history, "billed_tokens", 0) or 0
cost = getattr(history, "billed_cost", 0.0) or 0.0
if tokens:
self._account_usage({"tokens": tokens, "cost": cost})
history.billed_tokens = 0
history.billed_cost = 0.0
Comment on lines +824 to +847

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🎯 Functional Correctness | 🟡 Minor | ⚡ Quick win

Cost can be silently dropped when billed_tokens is 0 but billed_cost is non-zero.

The drain only fires (and resets the counters) when tokens is truthy. If a summarization call reports a cost with zero/missing tokens, that cost is neither accounted nor reset on this iteration. It would only be picked up on a later drain that happens to have non-zero tokens, and is lost entirely if that never occurs. Gate on either value.

🛠️ Proposed fix
 		tokens = getattr(history, "billed_tokens", 0) or 0
 		cost = getattr(history, "billed_cost", 0.0) or 0.0
-		if tokens:
+		if tokens or cost:
 			self._account_usage({"tokens": tokens, "cost": cost})
 			history.billed_tokens = 0
 			history.billed_cost = 0.0
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
def _drain_history_usage(self):
"""Roll billed usage accrued by history summarization into context.ai_tokens.
`ChatHistory.compact` makes its own LLM calls and stashes their billed
usage on the history object; drain it here so it is counted exactly once.
"""
history = getattr(self, "history", None)
if history is None:
return
tokens = getattr(history, "billed_tokens", 0) or 0
cost = getattr(history, "billed_cost", 0.0) or 0.0
if tokens:
self._account_usage({"tokens": tokens, "cost": cost})
history.billed_tokens = 0
history.billed_cost = 0.0
def _drain_history_usage(self):
"""Roll billed usage accrued by history summarization into context.ai_tokens.
`ChatHistory.compact` makes its own LLM calls and stashes their billed
usage on the history object; drain it here so it is counted exactly once.
"""
history = getattr(self, "history", None)
if history is None:
return
tokens = getattr(history, "billed_tokens", 0) or 0
cost = getattr(history, "billed_cost", 0.0) or 0.0
if tokens or cost:
self._account_usage({"tokens": tokens, "cost": cost})
history.billed_tokens = 0
history.billed_cost = 0.0
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@secator/tasks/ai.py` around lines 800 - 814, The usage drain in
_drain_history_usage only accounts and clears history.billed_tokens and
history.billed_cost when tokens is truthy, which can drop cost-only usage.
Update the condition so the drain runs when either billed_tokens or billed_cost
is present, and make sure both history.billed_tokens and history.billed_cost are
reset after calling self._account_usage, even when tokens is zero.


def _add_assistant_to_history(self, content, tool_calls):
"""Add assistant message (with optional tool calls) to chat history."""
if tool_calls:
Expand Down
225 changes: 225 additions & 0 deletions tests/unit/test_ai_tokens.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
"""Tests for per-run billed AI token accounting.

The `ai` task accumulates billed tokens from every LLM call it makes into
`context.ai_tokens` (and cost into `context.ai_cost`). The platform billing
chore reads `context.ai_tokens` — the AI analog of `context.scan_hours`.

These tests verify:
- N calls with known token counts sum onto `context.ai_tokens`.
- Missing/None usage counts as 0 and never crashes the run.
- History summarization usage is rolled in exactly once.
"""
import contextlib
import unittest
from unittest.mock import patch

from secator.definitions import ADDONS_ENABLED

HAS_AI = ADDONS_ENABLED.get('ai', False)

if HAS_AI:
from secator.tasks.ai import ai
from secator.ai.history import ChatHistory


def _make_task():
"""Construct a bare `ai` task instance with a context dict, bypassing __init__.

We avoid the full runner construction (which needs a workspace, backend, etc.)
since the accounting helpers only touch `self.context` and `self.history`.
"""
task = ai.__new__(ai)
task.context = {}
task.history = ChatHistory()
# Mirror what _init_options seeds.
task.context.setdefault("ai_tokens", 0)
task.context.setdefault("ai_cost", 0.0)
return task


@unittest.skipUnless(HAS_AI, 'ai addon required')
class TestAiTokenAccounting(unittest.TestCase):

def test_sum_over_n_calls(self):
"""N call_llm usages sum onto context.ai_tokens (and ai_cost)."""
task = _make_task()
usages = [
{"tokens": 100, "cost": 0.001},
{"tokens": 250, "cost": 0.002},
{"tokens": 50, "cost": 0.0005},
]
for u in usages:
task._account_usage(u)
self.assertEqual(task.context["ai_tokens"], 400)
self.assertAlmostEqual(task.context["ai_cost"], 0.0035)

def test_missing_usage_counts_as_zero(self):
"""None / empty / missing-key usage never crashes and adds 0."""
task = _make_task()
task._account_usage(None)
task._account_usage({})
task._account_usage({"tokens": None, "cost": None})
task._account_usage({"cost": 0.5}) # no tokens key
self.assertEqual(task.context["ai_tokens"], 0)

def test_malformed_usage_does_not_crash(self):
"""Non-numeric token/cost values are ignored, not raised."""
task = _make_task()
task._account_usage({"tokens": "abc", "cost": "xyz"})
task._account_usage({"tokens": 42, "cost": 0.01})
self.assertEqual(task.context["ai_tokens"], 42)

def test_field_persisted_on_context(self):
"""The platform reads context.ai_tokens — confirm that exact key."""
task = _make_task()
task._account_usage({"tokens": 123, "cost": 0.0})
self.assertIn("ai_tokens", task.context)
self.assertEqual(task.context["ai_tokens"], 123)
self.assertIsInstance(task.context["ai_tokens"], int)

def test_history_summarization_usage_drained_once(self):
"""Billed tokens accrued by history compaction roll in exactly once."""
task = _make_task()
# Simulate ChatHistory.compact stashing summarization usage.
task.history.billed_tokens = 500
task.history.billed_cost = 0.004
task._drain_history_usage()
self.assertEqual(task.context["ai_tokens"], 500)
self.assertAlmostEqual(task.context["ai_cost"], 0.004)
# Draining again must not double-count.
task._drain_history_usage()
self.assertEqual(task.context["ai_tokens"], 500)

def test_history_compact_records_billed_usage(self):
"""ChatHistory.compact accrues the summarization call's billed tokens."""
history = ChatHistory(model="test-model")
history.add_system("system")
history.add_user("u1")
history.add_assistant("a1")
history.add_user("u2")
history.add_assistant("a2")
history.add_user("u3")
history.add_assistant("a3")

fake = {"content": "summary", "usage": {"tokens": 321, "cost": 0.003}}
with patch('secator.ai.utils.call_llm', return_value=fake):
with patch('secator.ai.history.get_context_window', return_value=8000):
history.compact("test-model", keep_last=2)

self.assertEqual(history.billed_tokens, 321)
self.assertAlmostEqual(history.billed_cost, 0.003)

def test_history_compact_missing_usage_is_zero(self):
"""compact() with no usage on the response adds 0 billed tokens."""
history = ChatHistory(model="test-model")
history.add_system("system")
history.add_user("u1")
history.add_assistant("a1")
history.add_user("u2")
history.add_assistant("a2")
history.add_user("u3")
history.add_assistant("a3")

fake = {"content": "summary", "usage": None}
with patch('secator.ai.utils.call_llm', return_value=fake):
with patch('secator.ai.history.get_context_window', return_value=8000):
history.compact("test-model", keep_last=2)

self.assertEqual(history.billed_tokens, 0)


@contextlib.contextmanager
def _loop_patches(task, responses):
"""Patch the heavy collaborators _run_loop touches so we can drive it bare.

Leaves call_llm token accounting intact (that is what we are testing).
"""
with contextlib.ExitStack() as stack:
stack.enter_context(patch('secator.tasks.ai.call_llm', side_effect=responses))
stack.enter_context(patch('secator.ai.history.get_context_window', return_value=8000))
stack.enter_context(patch('secator.tasks.ai.get_context_window', return_value=8000))
stack.enter_context(patch('secator.tasks.ai.save_history'))
stack.enter_context(patch.object(type(task), 'reports_folder', property(lambda self: None)))
stack.enter_context(patch.object(ai, '_summarize_auto', return_value=iter(())))
stack.enter_context(patch.object(ai, '_summarize_user', return_value=iter(())))
yield stack


@unittest.skipUnless(HAS_AI, 'ai addon required')
class TestAiTokenAccountingEndToEnd(unittest.TestCase):
"""Drive the real _run_loop with mocked call_llm and assert the sum lands."""

def _make_loop_task(self):
task = _make_task()
# Minimal state _run_loop reads.
task.inputs = []
task.model = "test-model"
task.intent_model = "test-model"
task.temp = 0.7
task.api_base = None
task.api_key = "key"
task.max_iterations = 3
task.max_tokens_total = 100000
task.max_workers = 1
task.is_subagent = True
task.verbose = False
task.dry_run = False
task.mode = "chat"
task.scope = "workspace"
task.results = []
task.encryptor = None
task.tool_schemas = []
task.permission_engine = None
task.dangerous = True
task.interactive = "auto"
task._sync = True
task.session_id = "s"
task._reports_folder = None
task.debug = lambda *a, **k: None
task.add_result = lambda *a, **k: None
from secator.ai.interactivity import create_backend
task.backend = create_backend("auto")
return task

def test_loop_sums_token_usage(self):
"""Three content responses with known tokens sum onto context.ai_tokens."""
task = self._make_loop_task()
responses = [
{"content": "r1", "tool_calls": [], "usage": {"tokens": 100, "cost": 0.001}},
{"content": "r2", "tool_calls": [], "usage": {"tokens": 200, "cost": 0.002}},
{"content": "r3", "tool_calls": [], "usage": {"tokens": 300, "cost": 0.003}},
]
# auto backend returns None on follow-up prompt -> loop exits after first
# content-only response. Force it to keep going by mocking the prompt to
# add a user turn for the first two, then exit.
prompt_calls = {"n": 0}

def fake_prompt(choices):
prompt_calls["n"] += 1
if prompt_calls["n"] >= 3:
return None # exit
task.history.add_user("continue")
return []

with _loop_patches(task, responses):
with patch.object(ai, '_prompt_and_redetect', side_effect=fake_prompt):
list(task._run_loop())

self.assertEqual(task.context["ai_tokens"], 600)
self.assertAlmostEqual(task.context["ai_cost"], 0.006)

def test_loop_with_no_usage_is_zero(self):
"""Responses without usage leave context.ai_tokens at 0 (no crash)."""
task = self._make_loop_task()
responses = [
{"content": "r1", "tool_calls": [], "usage": None},
]
with _loop_patches(task, responses):
with patch.object(ai, '_prompt_and_redetect', return_value=None):
list(task._run_loop())

self.assertEqual(task.context["ai_tokens"], 0)


if __name__ == '__main__':
unittest.main()
Loading