freelabz · ocervell · Jun 25, 2026 · Jun 25, 2026 · Jun 25, 2026 · coderabbitai
diff --git a/secator/ai/history.py b/secator/ai/history.py
@@ -124,6 +124,13 @@ class ChatHistory:
 
     messages: List[Dict[str, str]] = field(default_factory=list)
     model: Optional[str] = None
+    # Billed token/cost usage accrued by LLM calls this object makes internally
+    # (history summarization/compaction). The owning `ai` task drains these into
+    # context.ai_tokens so summarization is billed alongside the main loop.
+    billed_tokens: int = 0
+    billed_prompt_tokens: int = 0
+    billed_completion_tokens: int = 0
+    billed_cost: float = 0.0
 
     def add_system(self, content: str) -> None:
         self.messages.append({"role": "system", "content": content})
@@ -390,6 +397,26 @@ def compact(self, model: str, api_base: Optional[str] = None,
         with console.status(f"[bold orange3]Compacting chat history...[/] [gray42] • {token_str}[/]", spinner="dots"):
             result = call_llm([{"role": "user", "content": prompt}], model, 0.3, api_base, api_key)
 
+        # Record billed usage of the summarization call so the owning task can
+        # roll it into context.ai_tokens. Missing usage counts as 0.
+        usage = result.get("usage") or {}
+        try:
+            self.billed_tokens += int(usage.get("tokens") or 0)
+        except (TypeError, ValueError):
+            pass
+        try:
+            self.billed_prompt_tokens += int(usage.get("prompt_tokens") or 0)
+        except (TypeError, ValueError):
+            pass
+        try:
+            self.billed_completion_tokens += int(usage.get("completion_tokens") or 0)
+        except (TypeError, ValueError):
+            pass
+        try:
+            self.billed_cost += float(usage.get("cost") or 0)
+        except (TypeError, ValueError):
+            pass
+
         self.messages = []
         if initial_system:
             self.messages.append(initial_system)

diff --git a/secator/ai/utils.py b/secator/ai/utils.py
@@ -275,6 +275,8 @@ def call_llm(
 
 		usage = {
 			"tokens": response.usage.total_tokens,
+			"prompt_tokens": getattr(response.usage, "prompt_tokens", None),
+			"completion_tokens": getattr(response.usage, "completion_tokens", None),
 			"cost": cost,
 		}
 

diff --git a/secator/tasks/ai.py b/secator/tasks/ai.py
@@ -255,6 +255,9 @@ def _run_loop(self) -> Generator:
 				# Prompt user when context is filling up (local only)
 				yield from self._summarize_user()
 
+				# Roll any billed summarization usage into context.ai_tokens
+				self._drain_history_usage()
+
 				# Subagent token usage (for batch progress tracking)
 				if self.is_subagent:
 					by_role = self.history.count_tokens_by_role(self.model)
@@ -278,6 +281,11 @@ def _run_loop(self) -> Generator:
 				tool_calls = result.get("tool_calls", [])
 				usage = result.get("usage", {})
 
+				# Accumulate billed tokens for this run (read by the billing chore
+				# as context.ai_tokens). Done here, before any empty-response
+				# `continue`, so every billed call is counted exactly once.
+				self._account_usage(usage)
+
 				self.debug(f'content: {content[:200] if content else "(empty)"}', sub='llm')
 
 				# Empty response
@@ -455,6 +463,23 @@ def _init_options(self):
 			workspace=self.reports_folder or ""
 		)
 
+		# Per-run billed-token accounting. The platform billing chore reads
+		# `context.ai_tokens` (cumulative billed tokens) — the AI analog of
+		# `context.scan_hours`. Initialize on the runner context so it is
+		# persisted onto the task doc even if the run makes zero LLM calls.
+		self.context.setdefault("ai_tokens", 0)
+		self.context.setdefault("ai_prompt_tokens", 0)
+		self.context.setdefault("ai_completion_tokens", 0)
+		self.context.setdefault("ai_cost", 0.0)
+
+		# Record the resolved model id used for this run so the platform metering
+		# chore can price the consumed tokens against the model registry (free
+		# vs paid, per-million in/out/cached rates). This is the *configured*
+		# model for the run; if the user switches model mid-session that change
+		# is out of scope (the configured model is recorded). Set unconditionally
+		# (not setdefault) so it reflects the option resolved in this _init.
+		self.context["ai_model"] = self.model
+
 		# Create interactivity backend
 		self.session_id = self.session_name or str(self.id)
 		self.backend = create_backend(self.interactive, timeout=CONFIG.addons.ai.user_response_timeout)
@@ -514,6 +539,7 @@ def _detect_mode(self, force=False):
 			messages = [{"role": "user", "content": f"{selection_prompt}\n{self.prompt}"}]
 			with maybe_status("[bold orange3]Detecting intent...[/]", spinner="dots"):
 				result = call_llm(messages, self.intent_model, temperature=0.3, api_base=self.api_base, api_key=self.api_key)
+			self._account_usage(result.get("usage"))
 			mode = result["content"].strip().lower()
 			if mode in ("attack", "chat"):
 				console.print(rf"[bold green]\[INF][/] Detected intent: [bold]{mode}[/]")
@@ -760,6 +786,66 @@ def _dispatch_and_collect(self, actions, ctx):
 	# History helpers
 	# -------------------------------------------------------------------------
 
+	def _account_usage(self, usage):
+		"""Accumulate billed token/cost usage from a single LLM call onto the runner context.
+
+		`usage` is the dict returned by `call_llm`
+		(`{"tokens", "prompt_tokens", "completion_tokens", "cost"}`) or None.
+		Missing/None usage counts as 0 so accounting never crashes the run. The
+		running total lives on `self.context["ai_tokens"]` (int, cumulative) which
+		is persisted onto the task doc and read by the platform billing chore.
+		`context["ai_prompt_tokens"]`/`["ai_completion_tokens"]` carry the split.
+		"""
+		if not usage:
+			return
+		try:
+			tokens = usage.get("tokens") or 0
+			self.context["ai_tokens"] = int(self.context.get("ai_tokens", 0) or 0) + int(tokens)
+		except (TypeError, ValueError):
+			pass
+		try:
+			prompt_tokens = usage.get("prompt_tokens") or 0
+			self.context["ai_prompt_tokens"] = \
+				int(self.context.get("ai_prompt_tokens", 0) or 0) + int(prompt_tokens)
+		except (TypeError, ValueError):
+			pass
+		try:
+			completion_tokens = usage.get("completion_tokens") or 0
+			self.context["ai_completion_tokens"] = \
+				int(self.context.get("ai_completion_tokens", 0) or 0) + int(completion_tokens)
+		except (TypeError, ValueError):
+			pass
+		try:
+			cost = usage.get("cost") or 0
+			self.context["ai_cost"] = float(self.context.get("ai_cost", 0.0) or 0.0) + float(cost)
+		except (TypeError, ValueError):
+			pass
+
+	def _drain_history_usage(self):
+		"""Roll billed usage accrued by history summarization into context.ai_tokens.
+
+		`ChatHistory.compact` makes its own LLM calls and stashes their billed
+		usage on the history object; drain it here so it is counted exactly once.
+		"""
+		history = getattr(self, "history", None)
+		if history is None:
+			return
+		tokens = getattr(history, "billed_tokens", 0) or 0
+		prompt_tokens = getattr(history, "billed_prompt_tokens", 0) or 0
+		completion_tokens = getattr(history, "billed_completion_tokens", 0) or 0
+		cost = getattr(history, "billed_cost", 0.0) or 0.0
+		if tokens:
+			self._account_usage({
+				"tokens": tokens,
+				"prompt_tokens": prompt_tokens,
+				"completion_tokens": completion_tokens,
+				"cost": cost,
+			})
+			history.billed_tokens = 0
+			history.billed_prompt_tokens = 0
+			history.billed_completion_tokens = 0
+			history.billed_cost = 0.0
-	def _drain_history_usage(self):
-		"""Roll billed usage accrued by history summarization into context.ai_tokens.
-
-		`ChatHistory.compact` makes its own LLM calls and stashes their billed
-		usage on the history object; drain it here so it is counted exactly once.
-		"""
-		history = getattr(self, "history", None)
-		if history is None:
-			return
-		tokens = getattr(history, "billed_tokens", 0) or 0
-		cost = getattr(history, "billed_cost", 0.0) or 0.0
-		if tokens:
-			self._account_usage({"tokens": tokens, "cost": cost})
-			history.billed_tokens = 0
-			history.billed_cost = 0.0
+	def _drain_history_usage(self):
+		"""Roll billed usage accrued by history summarization into context.ai_tokens.
+
+		`ChatHistory.compact` makes its own LLM calls and stashes their billed
+		usage on the history object; drain it here so it is counted exactly once.
+		"""
+		history = getattr(self, "history", None)
+		if history is None:
+			return
+		tokens = getattr(history, "billed_tokens", 0) or 0
+		cost = getattr(history, "billed_cost", 0.0) or 0.0
+		if tokens or cost:
+			self._account_usage({"tokens": tokens, "cost": cost})
+			history.billed_tokens = 0
+			history.billed_cost = 0.0
-	def _drain_history_usage(self):
-		"""Roll billed usage accrued by history summarization into context.ai_tokens.
-
-		`ChatHistory.compact` makes its own LLM calls and stashes their billed
-		usage on the history object; drain it here so it is counted exactly once.
-		"""
-		history = getattr(self, "history", None)
-		if history is None:
-			return
-		tokens = getattr(history, "billed_tokens", 0) or 0
-		cost = getattr(history, "billed_cost", 0.0) or 0.0
-		if tokens:
-			self._account_usage({"tokens": tokens, "cost": cost})
-			history.billed_tokens = 0
-			history.billed_cost = 0.0
+	def _drain_history_usage(self):
+		"""Roll billed usage accrued by history summarization into context.ai_tokens.
+
+		`ChatHistory.compact` makes its own LLM calls and stashes their billed
+		usage on the history object; drain it here so it is counted exactly once.
+		"""
+		history = getattr(self, "history", None)
+		if history is None:
+			return
+		tokens = getattr(history, "billed_tokens", 0) or 0
+		cost = getattr(history, "billed_cost", 0.0) or 0.0
+		if tokens or cost:
+			self._account_usage({"tokens": tokens, "cost": cost})
+			history.billed_tokens = 0
+			history.billed_cost = 0.0
+
 	def _add_assistant_to_history(self, content, tool_calls):
 		"""Add assistant message (with optional tool calls) to chat history."""
 		if tool_calls: