freelabz · ocervell · Jun 25, 2026 · Jun 25, 2026 · Jun 25, 2026 · coderabbitai
diff --git a/secator/ai/history.py b/secator/ai/history.py
@@ -124,6 +124,11 @@ class ChatHistory:
 
     messages: List[Dict[str, str]] = field(default_factory=list)
     model: Optional[str] = None
+    # Billed token/cost usage accrued by LLM calls this object makes internally
+    # (history summarization/compaction). The owning `ai` task drains these into
+    # context.ai_tokens so summarization is billed alongside the main loop.
+    billed_tokens: int = 0
+    billed_cost: float = 0.0
 
     def add_system(self, content: str) -> None:
         self.messages.append({"role": "system", "content": content})
@@ -390,6 +395,18 @@ def compact(self, model: str, api_base: Optional[str] = None,
         with console.status(f"[bold orange3]Compacting chat history...[/] [gray42] • {token_str}[/]", spinner="dots"):
             result = call_llm([{"role": "user", "content": prompt}], model, 0.3, api_base, api_key)
 
+        # Record billed usage of the summarization call so the owning task can
+        # roll it into context.ai_tokens. Missing usage counts as 0.
+        usage = result.get("usage") or {}
+        try:
+            self.billed_tokens += int(usage.get("tokens") or 0)
+        except (TypeError, ValueError):
+            pass
+        try:
+            self.billed_cost += float(usage.get("cost") or 0)
+        except (TypeError, ValueError):
+            pass
+
         self.messages = []
         if initial_system:
             self.messages.append(initial_system)

diff --git a/secator/tasks/ai.py b/secator/tasks/ai.py
@@ -255,6 +255,9 @@ def _run_loop(self) -> Generator:
 				# Prompt user when context is filling up (local only)
 				yield from self._summarize_user()
 
+				# Roll any billed summarization usage into context.ai_tokens
+				self._drain_history_usage()
+
 				# Subagent token usage (for batch progress tracking)
 				if self.is_subagent:
 					by_role = self.history.count_tokens_by_role(self.model)
@@ -278,6 +281,11 @@ def _run_loop(self) -> Generator:
 				tool_calls = result.get("tool_calls", [])
 				usage = result.get("usage", {})
 
+				# Accumulate billed tokens for this run (read by the billing chore
+				# as context.ai_tokens). Done here, before any empty-response
+				# `continue`, so every billed call is counted exactly once.
+				self._account_usage(usage)
+
 				self.debug(f'content: {content[:200] if content else "(empty)"}', sub='llm')
 
 				# Empty response
@@ -455,6 +463,13 @@ def _init_options(self):
 			workspace=self.reports_folder or ""
 		)
 
+		# Per-run billed-token accounting. The platform billing chore reads
+		# `context.ai_tokens` (cumulative billed tokens) — the AI analog of
+		# `context.scan_hours`. Initialize on the runner context so it is
+		# persisted onto the task doc even if the run makes zero LLM calls.
+		self.context.setdefault("ai_tokens", 0)
+		self.context.setdefault("ai_cost", 0.0)
+
 		# Create interactivity backend
 		self.session_id = self.session_name or str(self.id)
 		self.backend = create_backend(self.interactive, timeout=CONFIG.addons.ai.user_response_timeout)
@@ -514,6 +529,7 @@ def _detect_mode(self, force=False):
 			messages = [{"role": "user", "content": f"{selection_prompt}\n{self.prompt}"}]
 			with maybe_status("[bold orange3]Detecting intent...[/]", spinner="dots"):
 				result = call_llm(messages, self.intent_model, temperature=0.3, api_base=self.api_base, api_key=self.api_key)
+			self._account_usage(result.get("usage"))
 			mode = result["content"].strip().lower()
 			if mode in ("attack", "chat"):
 				console.print(rf"[bold green]\[INF][/] Detected intent: [bold]{mode}[/]")
@@ -760,6 +776,43 @@ def _dispatch_and_collect(self, actions, ctx):
 	# History helpers
 	# -------------------------------------------------------------------------
 
+	def _account_usage(self, usage):
+		"""Accumulate billed token/cost usage from a single LLM call onto the runner context.
+
+		`usage` is the dict returned by `call_llm` (`{"tokens", "cost"}`) or None.
+		Missing/None usage counts as 0 so accounting never crashes the run. The
+		running total lives on `self.context["ai_tokens"]` (int, cumulative) which
+		is persisted onto the task doc and read by the platform billing chore.
+		"""
+		if not usage:
+			return
+		try:
+			tokens = usage.get("tokens") or 0
+			self.context["ai_tokens"] = int(self.context.get("ai_tokens", 0) or 0) + int(tokens)
+		except (TypeError, ValueError):
+			pass
+		try:
+			cost = usage.get("cost") or 0
+			self.context["ai_cost"] = float(self.context.get("ai_cost", 0.0) or 0.0) + float(cost)
+		except (TypeError, ValueError):
+			pass
+
+	def _drain_history_usage(self):
+		"""Roll billed usage accrued by history summarization into context.ai_tokens.
+
+		`ChatHistory.compact` makes its own LLM calls and stashes their billed
+		usage on the history object; drain it here so it is counted exactly once.
+		"""
+		history = getattr(self, "history", None)
+		if history is None:
+			return
+		tokens = getattr(history, "billed_tokens", 0) or 0
+		cost = getattr(history, "billed_cost", 0.0) or 0.0
+		if tokens:
+			self._account_usage({"tokens": tokens, "cost": cost})
+			history.billed_tokens = 0
+			history.billed_cost = 0.0
-	def _drain_history_usage(self):
-		"""Roll billed usage accrued by history summarization into context.ai_tokens.
-
-		`ChatHistory.compact` makes its own LLM calls and stashes their billed
-		usage on the history object; drain it here so it is counted exactly once.
-		"""
-		history = getattr(self, "history", None)
-		if history is None:
-			return
-		tokens = getattr(history, "billed_tokens", 0) or 0
-		cost = getattr(history, "billed_cost", 0.0) or 0.0
-		if tokens:
-			self._account_usage({"tokens": tokens, "cost": cost})
-			history.billed_tokens = 0
-			history.billed_cost = 0.0
+	def _drain_history_usage(self):
+		"""Roll billed usage accrued by history summarization into context.ai_tokens.
+
+		`ChatHistory.compact` makes its own LLM calls and stashes their billed
+		usage on the history object; drain it here so it is counted exactly once.
+		"""
+		history = getattr(self, "history", None)
+		if history is None:
+			return
+		tokens = getattr(history, "billed_tokens", 0) or 0
+		cost = getattr(history, "billed_cost", 0.0) or 0.0
+		if tokens or cost:
+			self._account_usage({"tokens": tokens, "cost": cost})
+			history.billed_tokens = 0
+			history.billed_cost = 0.0
-	def _drain_history_usage(self):
-		"""Roll billed usage accrued by history summarization into context.ai_tokens.
-
-		`ChatHistory.compact` makes its own LLM calls and stashes their billed
-		usage on the history object; drain it here so it is counted exactly once.
-		"""
-		history = getattr(self, "history", None)
-		if history is None:
-			return
-		tokens = getattr(history, "billed_tokens", 0) or 0
-		cost = getattr(history, "billed_cost", 0.0) or 0.0
-		if tokens:
-			self._account_usage({"tokens": tokens, "cost": cost})
-			history.billed_tokens = 0
-			history.billed_cost = 0.0
+	def _drain_history_usage(self):
+		"""Roll billed usage accrued by history summarization into context.ai_tokens.
+
+		`ChatHistory.compact` makes its own LLM calls and stashes their billed
+		usage on the history object; drain it here so it is counted exactly once.
+		"""
+		history = getattr(self, "history", None)
+		if history is None:
+			return
+		tokens = getattr(history, "billed_tokens", 0) or 0
+		cost = getattr(history, "billed_cost", 0.0) or 0.0
+		if tokens or cost:
+			self._account_usage({"tokens": tokens, "cost": cost})
+			history.billed_tokens = 0
+			history.billed_cost = 0.0
+
 	def _add_assistant_to_history(self, content, tool_calls):
 		"""Add assistant message (with optional tool calls) to chat history."""
 		if tool_calls:

diff --git a/tests/unit/test_ai_tokens.py b/tests/unit/test_ai_tokens.py
@@ -0,0 +1,225 @@
+"""Tests for per-run billed AI token accounting.
+
+The `ai` task accumulates billed tokens from every LLM call it makes into
+`context.ai_tokens` (and cost into `context.ai_cost`). The platform billing
+chore reads `context.ai_tokens` — the AI analog of `context.scan_hours`.
+
+These tests verify:
+- N calls with known token counts sum onto `context.ai_tokens`.
+- Missing/None usage counts as 0 and never crashes the run.
+- History summarization usage is rolled in exactly once.
+"""
+import contextlib
+import unittest
+from unittest.mock import patch
+
+from secator.definitions import ADDONS_ENABLED
+
+HAS_AI = ADDONS_ENABLED.get('ai', False)
+
+if HAS_AI:
+	from secator.tasks.ai import ai
+	from secator.ai.history import ChatHistory
+
+
+def _make_task():
+	"""Construct a bare `ai` task instance with a context dict, bypassing __init__.
+
+	We avoid the full runner construction (which needs a workspace, backend, etc.)
+	since the accounting helpers only touch `self.context` and `self.history`.
+	"""
+	task = ai.__new__(ai)
+	task.context = {}
+	task.history = ChatHistory()
+	# Mirror what _init_options seeds.
+	task.context.setdefault("ai_tokens", 0)
+	task.context.setdefault("ai_cost", 0.0)
+	return task
+
+
+@unittest.skipUnless(HAS_AI, 'ai addon required')
+class TestAiTokenAccounting(unittest.TestCase):
+
+	def test_sum_over_n_calls(self):
+		"""N call_llm usages sum onto context.ai_tokens (and ai_cost)."""
+		task = _make_task()
+		usages = [
+			{"tokens": 100, "cost": 0.001},
+			{"tokens": 250, "cost": 0.002},
+			{"tokens": 50, "cost": 0.0005},
+		]
+		for u in usages:
+			task._account_usage(u)
+		self.assertEqual(task.context["ai_tokens"], 400)
+		self.assertAlmostEqual(task.context["ai_cost"], 0.0035)
+
+	def test_missing_usage_counts_as_zero(self):
+		"""None / empty / missing-key usage never crashes and adds 0."""
+		task = _make_task()
+		task._account_usage(None)
+		task._account_usage({})
+		task._account_usage({"tokens": None, "cost": None})
+		task._account_usage({"cost": 0.5})  # no tokens key
+		self.assertEqual(task.context["ai_tokens"], 0)
+
+	def test_malformed_usage_does_not_crash(self):
+		"""Non-numeric token/cost values are ignored, not raised."""
+		task = _make_task()
+		task._account_usage({"tokens": "abc", "cost": "xyz"})
+		task._account_usage({"tokens": 42, "cost": 0.01})
+		self.assertEqual(task.context["ai_tokens"], 42)
+
+	def test_field_persisted_on_context(self):
+		"""The platform reads context.ai_tokens — confirm that exact key."""
+		task = _make_task()
+		task._account_usage({"tokens": 123, "cost": 0.0})
+		self.assertIn("ai_tokens", task.context)
+		self.assertEqual(task.context["ai_tokens"], 123)
+		self.assertIsInstance(task.context["ai_tokens"], int)
+
+	def test_history_summarization_usage_drained_once(self):
+		"""Billed tokens accrued by history compaction roll in exactly once."""
+		task = _make_task()
+		# Simulate ChatHistory.compact stashing summarization usage.
+		task.history.billed_tokens = 500
+		task.history.billed_cost = 0.004
+		task._drain_history_usage()
+		self.assertEqual(task.context["ai_tokens"], 500)
+		self.assertAlmostEqual(task.context["ai_cost"], 0.004)
+		# Draining again must not double-count.
+		task._drain_history_usage()
+		self.assertEqual(task.context["ai_tokens"], 500)
+
+	def test_history_compact_records_billed_usage(self):
+		"""ChatHistory.compact accrues the summarization call's billed tokens."""
+		history = ChatHistory(model="test-model")
+		history.add_system("system")
+		history.add_user("u1")
+		history.add_assistant("a1")
+		history.add_user("u2")
+		history.add_assistant("a2")
+		history.add_user("u3")
+		history.add_assistant("a3")
+
+		fake = {"content": "summary", "usage": {"tokens": 321, "cost": 0.003}}
+		with patch('secator.ai.utils.call_llm', return_value=fake):
+			with patch('secator.ai.history.get_context_window', return_value=8000):
+				history.compact("test-model", keep_last=2)
+
+		self.assertEqual(history.billed_tokens, 321)
+		self.assertAlmostEqual(history.billed_cost, 0.003)
+
+	def test_history_compact_missing_usage_is_zero(self):
+		"""compact() with no usage on the response adds 0 billed tokens."""
+		history = ChatHistory(model="test-model")
+		history.add_system("system")
+		history.add_user("u1")
+		history.add_assistant("a1")
+		history.add_user("u2")
+		history.add_assistant("a2")
+		history.add_user("u3")
+		history.add_assistant("a3")
+
+		fake = {"content": "summary", "usage": None}
+		with patch('secator.ai.utils.call_llm', return_value=fake):
+			with patch('secator.ai.history.get_context_window', return_value=8000):
+				history.compact("test-model", keep_last=2)
+
+		self.assertEqual(history.billed_tokens, 0)
+
+
+@contextlib.contextmanager
+def _loop_patches(task, responses):
+	"""Patch the heavy collaborators _run_loop touches so we can drive it bare.
+
+	Leaves call_llm token accounting intact (that is what we are testing).
+	"""
+	with contextlib.ExitStack() as stack:
+		stack.enter_context(patch('secator.tasks.ai.call_llm', side_effect=responses))
+		stack.enter_context(patch('secator.ai.history.get_context_window', return_value=8000))
+		stack.enter_context(patch('secator.tasks.ai.get_context_window', return_value=8000))
+		stack.enter_context(patch('secator.tasks.ai.save_history'))
+		stack.enter_context(patch.object(type(task), 'reports_folder', property(lambda self: None)))
+		stack.enter_context(patch.object(ai, '_summarize_auto', return_value=iter(())))
+		stack.enter_context(patch.object(ai, '_summarize_user', return_value=iter(())))
+		yield stack
+
+
+@unittest.skipUnless(HAS_AI, 'ai addon required')
+class TestAiTokenAccountingEndToEnd(unittest.TestCase):
+	"""Drive the real _run_loop with mocked call_llm and assert the sum lands."""
+
+	def _make_loop_task(self):
+		task = _make_task()
+		# Minimal state _run_loop reads.
+		task.inputs = []
+		task.model = "test-model"
+		task.intent_model = "test-model"
+		task.temp = 0.7
+		task.api_base = None
+		task.api_key = "key"
+		task.max_iterations = 3
+		task.max_tokens_total = 100000
+		task.max_workers = 1
+		task.is_subagent = True
+		task.verbose = False
+		task.dry_run = False
+		task.mode = "chat"
+		task.scope = "workspace"
+		task.results = []
+		task.encryptor = None
+		task.tool_schemas = []
+		task.permission_engine = None
+		task.dangerous = True
+		task.interactive = "auto"
+		task._sync = True
+		task.session_id = "s"
+		task._reports_folder = None
+		task.debug = lambda *a, **k: None
+		task.add_result = lambda *a, **k: None
+		from secator.ai.interactivity import create_backend
+		task.backend = create_backend("auto")
+		return task
+
+	def test_loop_sums_token_usage(self):
+		"""Three content responses with known tokens sum onto context.ai_tokens."""
+		task = self._make_loop_task()
+		responses = [
+			{"content": "r1", "tool_calls": [], "usage": {"tokens": 100, "cost": 0.001}},
+			{"content": "r2", "tool_calls": [], "usage": {"tokens": 200, "cost": 0.002}},
+			{"content": "r3", "tool_calls": [], "usage": {"tokens": 300, "cost": 0.003}},
+		]
+		# auto backend returns None on follow-up prompt -> loop exits after first
+		# content-only response. Force it to keep going by mocking the prompt to
+		# add a user turn for the first two, then exit.
+		prompt_calls = {"n": 0}
+
+		def fake_prompt(choices):
+			prompt_calls["n"] += 1
+			if prompt_calls["n"] >= 3:
+				return None  # exit
+			task.history.add_user("continue")
+			return []
+
+		with _loop_patches(task, responses):
+			with patch.object(ai, '_prompt_and_redetect', side_effect=fake_prompt):
+				list(task._run_loop())
+
+		self.assertEqual(task.context["ai_tokens"], 600)
+		self.assertAlmostEqual(task.context["ai_cost"], 0.006)
+
+	def test_loop_with_no_usage_is_zero(self):
+		"""Responses without usage leave context.ai_tokens at 0 (no crash)."""
+		task = self._make_loop_task()
+		responses = [
+			{"content": "r1", "tool_calls": [], "usage": None},
+		]
+		with _loop_patches(task, responses):
+			with patch.object(ai, '_prompt_and_redetect', return_value=None):
+				list(task._run_loop())
+
+		self.assertEqual(task.context["ai_tokens"], 0)
+
+
+if __name__ == '__main__':
+	unittest.main()