Skip to content

Commit 219804f

Browse files
committed
fix: use backend thread token usage for header total
1 parent 2b1fcb3 commit 219804f

14 files changed

Lines changed: 480 additions & 35 deletions

File tree

backend/app/gateway/routers/thread_runs.py

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,27 @@ class RunResponse(BaseModel):
6868
updated_at: str = ""
6969

7070

71+
class ThreadTokenUsageModelBreakdown(BaseModel):
72+
tokens: int = 0
73+
runs: int = 0
74+
75+
76+
class ThreadTokenUsageCallerBreakdown(BaseModel):
77+
lead_agent: int = 0
78+
subagent: int = 0
79+
middleware: int = 0
80+
81+
82+
class ThreadTokenUsageResponse(BaseModel):
83+
thread_id: str
84+
total_tokens: int = 0
85+
total_input_tokens: int = 0
86+
total_output_tokens: int = 0
87+
total_runs: int = 0
88+
by_model: dict[str, ThreadTokenUsageModelBreakdown] = Field(default_factory=dict)
89+
by_caller: ThreadTokenUsageCallerBreakdown = Field(default_factory=ThreadTokenUsageCallerBreakdown)
90+
91+
7192
# ---------------------------------------------------------------------------
7293
# Helpers
7394
# ---------------------------------------------------------------------------
@@ -368,10 +389,10 @@ async def list_run_events(
368389
return await event_store.list_events(thread_id, run_id, event_types=types, limit=limit)
369390

370391

371-
@router.get("/{thread_id}/token-usage")
392+
@router.get("/{thread_id}/token-usage", response_model=ThreadTokenUsageResponse)
372393
@require_permission("threads", "read", owner_check=True)
373-
async def thread_token_usage(thread_id: str, request: Request) -> dict:
394+
async def thread_token_usage(thread_id: str, request: Request) -> ThreadTokenUsageResponse:
374395
"""Thread-level token usage aggregation."""
375396
run_store = get_run_store(request)
376397
agg = await run_store.aggregate_tokens_by_thread(thread_id)
377-
return {"thread_id": thread_id, **agg}
398+
return ThreadTokenUsageResponse(thread_id=thread_id, **agg)

backend/tests/test_run_repository.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,61 @@ async def test_update_run_completion_preserves_existing_fields(self, tmp_path):
166166
assert row["total_tokens"] == 100
167167
await _cleanup()
168168

169+
@pytest.mark.anyio
170+
async def test_aggregate_tokens_by_thread_counts_completed_runs_only(self, tmp_path):
171+
repo = await _make_repo(tmp_path)
172+
await repo.put("success-run", thread_id="t1", status="running")
173+
await repo.update_run_completion(
174+
"success-run",
175+
status="success",
176+
total_input_tokens=70,
177+
total_output_tokens=30,
178+
total_tokens=100,
179+
lead_agent_tokens=80,
180+
subagent_tokens=15,
181+
middleware_tokens=5,
182+
)
183+
await repo.put("error-run", thread_id="t1", status="running")
184+
await repo.update_run_completion(
185+
"error-run",
186+
status="error",
187+
total_input_tokens=20,
188+
total_output_tokens=30,
189+
total_tokens=50,
190+
lead_agent_tokens=40,
191+
subagent_tokens=10,
192+
)
193+
await repo.put("running-run", thread_id="t1", status="running")
194+
await repo.update_run_completion(
195+
"running-run",
196+
status="running",
197+
total_input_tokens=900,
198+
total_output_tokens=99,
199+
total_tokens=999,
200+
lead_agent_tokens=999,
201+
)
202+
await repo.put("other-thread-run", thread_id="t2", status="running")
203+
await repo.update_run_completion(
204+
"other-thread-run",
205+
status="success",
206+
total_tokens=888,
207+
lead_agent_tokens=888,
208+
)
209+
210+
agg = await repo.aggregate_tokens_by_thread("t1")
211+
212+
assert agg["total_tokens"] == 150
213+
assert agg["total_input_tokens"] == 90
214+
assert agg["total_output_tokens"] == 60
215+
assert agg["total_runs"] == 2
216+
assert agg["by_model"] == {"unknown": {"tokens": 150, "runs": 2}}
217+
assert agg["by_caller"] == {
218+
"lead_agent": 120,
219+
"subagent": 25,
220+
"middleware": 5,
221+
}
222+
await _cleanup()
223+
169224
@pytest.mark.anyio
170225
async def test_list_by_thread_ordered_desc(self, tmp_path):
171226
"""list_by_thread returns newest first."""
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
"""Tests for thread-level token usage aggregation API."""
2+
3+
from __future__ import annotations
4+
5+
from unittest.mock import AsyncMock, MagicMock
6+
7+
from _router_auth_helpers import make_authed_test_app
8+
from fastapi.testclient import TestClient
9+
10+
from app.gateway.routers import thread_runs
11+
12+
13+
def _make_app(run_store: MagicMock):
14+
app = make_authed_test_app()
15+
app.include_router(thread_runs.router)
16+
app.state.run_store = run_store
17+
return app
18+
19+
20+
def test_thread_token_usage_returns_stable_shape():
21+
run_store = MagicMock()
22+
run_store.aggregate_tokens_by_thread = AsyncMock(
23+
return_value={
24+
"total_tokens": 150,
25+
"total_input_tokens": 90,
26+
"total_output_tokens": 60,
27+
"total_runs": 2,
28+
"by_model": {"unknown": {"tokens": 150, "runs": 2}},
29+
"by_caller": {
30+
"lead_agent": 120,
31+
"subagent": 25,
32+
"middleware": 5,
33+
},
34+
},
35+
)
36+
app = _make_app(run_store)
37+
38+
with TestClient(app) as client:
39+
response = client.get("/api/threads/thread-1/token-usage")
40+
41+
assert response.status_code == 200
42+
assert response.json() == {
43+
"thread_id": "thread-1",
44+
"total_tokens": 150,
45+
"total_input_tokens": 90,
46+
"total_output_tokens": 60,
47+
"total_runs": 2,
48+
"by_model": {"unknown": {"tokens": 150, "runs": 2}},
49+
"by_caller": {
50+
"lead_agent": 120,
51+
"subagent": 25,
52+
"middleware": 5,
53+
},
54+
}
55+
run_store.aggregate_tokens_by_thread.assert_awaited_once_with("thread-1")

frontend/src/app/workspace/agents/[agent_name]/chats/[thread_id]/page.tsx

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@ import { useI18n } from "@/core/i18n/hooks";
2626
import { useModels } from "@/core/models/hooks";
2727
import { useNotification } from "@/core/notification/hooks";
2828
import { useLocalSettings, useThreadSettings } from "@/core/settings";
29-
import { useThreadStream } from "@/core/threads/hooks";
29+
import { useThreadStream, useThreadTokenUsage } from "@/core/threads/hooks";
30+
import { threadTokenUsageToTokenUsage } from "@/core/threads/token-usage";
3031
import { textOfMessage } from "@/core/threads/utils";
3132
import { env } from "@/env";
3233
import { cn } from "@/lib/utils";
@@ -42,22 +43,29 @@ export default function AgentChatPage() {
4243

4344
const { agent } = useAgent(agent_name);
4445

45-
const { threadId, setThreadId, isNewThread, setIsNewThread } =
46+
const { threadId, setThreadId, isNewThread, setIsNewThread, isMock } =
4647
useThreadChat();
4748
const [settings, setSettings] = useThreadSettings(threadId);
4849
const [localSettings, setLocalSettings] = useLocalSettings();
4950
const { tokenUsageEnabled } = useModels();
51+
const threadTokenUsage = useThreadTokenUsage(
52+
isNewThread || isMock ? undefined : threadId,
53+
{ enabled: tokenUsageEnabled && !isMock },
54+
);
55+
const backendTokenUsage = threadTokenUsageToTokenUsage(threadTokenUsage.data);
5056

5157
const { showNotification } = useNotification();
5258
const {
5359
thread,
60+
pendingUsageMessages,
5461
sendMessage,
5562
isHistoryLoading,
5663
hasMoreHistory,
5764
loadMoreHistory,
5865
} = useThreadStream({
5966
threadId: isNewThread ? undefined : threadId,
6067
context: { ...settings.context, agent_name: agent_name },
68+
isMock,
6169
onStart: (createdThreadId) => {
6270
setThreadId(createdThreadId);
6371
setIsNewThread(false);
@@ -141,8 +149,11 @@ export default function AgentChatPage() {
141149
</Button>
142150
</Tooltip>
143151
<TokenUsageIndicator
152+
threadId={isNewThread ? undefined : threadId}
153+
backendUsage={backendTokenUsage}
144154
enabled={tokenUsageEnabled}
145155
messages={thread.messages}
156+
pendingMessages={pendingUsageMessages}
146157
preferences={localSettings.tokenUsage}
147158
onPreferencesChange={(preferences) =>
148159
setLocalSettings("tokenUsage", preferences)

frontend/src/app/workspace/chats/[thread_id]/page.tsx

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@ import { useI18n } from "@/core/i18n/hooks";
2525
import { useModels } from "@/core/models/hooks";
2626
import { useNotification } from "@/core/notification/hooks";
2727
import { useLocalSettings, useThreadSettings } from "@/core/settings";
28-
import { useThreadStream } from "@/core/threads/hooks";
28+
import { useThreadStream, useThreadTokenUsage } from "@/core/threads/hooks";
29+
import { threadTokenUsageToTokenUsage } from "@/core/threads/token-usage";
2930
import { textOfMessage } from "@/core/threads/utils";
3031
import { env } from "@/env";
3132
import { cn } from "@/lib/utils";
@@ -44,6 +45,11 @@ export default function ChatPage() {
4445
const [settings, setSettings] = useThreadSettings(threadId);
4546
const [localSettings, setLocalSettings] = useLocalSettings();
4647
const { tokenUsageEnabled } = useModels();
48+
const threadTokenUsage = useThreadTokenUsage(
49+
isNewThread || isMock ? undefined : threadId,
50+
{ enabled: tokenUsageEnabled && !isMock },
51+
);
52+
const backendTokenUsage = threadTokenUsageToTokenUsage(threadTokenUsage.data);
4753
const mountedRef = useRef(false);
4854
useSpecificChatMode();
4955

@@ -63,6 +69,7 @@ export default function ChatPage() {
6369

6470
const {
6571
thread,
72+
pendingUsageMessages,
6673
sendMessage,
6774
isUploading,
6875
isHistoryLoading,
@@ -137,8 +144,11 @@ export default function ChatPage() {
137144
</div>
138145
<div className="flex items-center gap-2">
139146
<TokenUsageIndicator
147+
threadId={isNewThread ? undefined : threadId}
148+
backendUsage={backendTokenUsage}
140149
enabled={tokenUsageEnabled}
141150
messages={thread.messages}
151+
pendingMessages={pendingUsageMessages}
142152
preferences={localSettings.tokenUsage}
143153
onPreferencesChange={(preferences) =>
144154
setLocalSettings("tokenUsage", preferences)

frontend/src/components/workspace/token-usage-indicator.tsx

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,11 @@ import {
1515
DropdownMenuTrigger,
1616
} from "@/components/ui/dropdown-menu";
1717
import { useI18n } from "@/core/i18n/hooks";
18-
import { accumulateUsage, formatTokenCount } from "@/core/messages/usage";
18+
import {
19+
formatTokenCount,
20+
selectHeaderTokenUsage,
21+
type TokenUsage,
22+
} from "@/core/messages/usage";
1923
import {
2024
getTokenUsageViewPreset,
2125
tokenUsagePreferencesFromPreset,
@@ -25,23 +29,37 @@ import {
2529
import { cn } from "@/lib/utils";
2630

2731
interface TokenUsageIndicatorProps {
32+
threadId?: string;
2833
messages: Message[];
34+
pendingMessages?: Message[];
35+
backendUsage?: TokenUsage | null;
2936
enabled?: boolean;
3037
preferences: TokenUsagePreferences;
3138
onPreferencesChange: (preferences: TokenUsagePreferences) => void;
3239
className?: string;
3340
}
3441

3542
export function TokenUsageIndicator({
43+
threadId,
3644
messages,
45+
pendingMessages,
46+
backendUsage,
3747
enabled = false,
3848
preferences,
3949
onPreferencesChange,
4050
className,
4151
}: TokenUsageIndicatorProps) {
4252
const { t } = useI18n();
4353

44-
const usage = useMemo(() => accumulateUsage(messages), [messages]);
54+
const usage = useMemo(
55+
() =>
56+
selectHeaderTokenUsage({
57+
backendUsage: threadId ? backendUsage : null,
58+
messages,
59+
pendingMessages,
60+
}),
61+
[backendUsage, messages, pendingMessages, threadId],
62+
);
4563
const preset = getTokenUsageViewPreset(preferences);
4664

4765
if (!enabled) {

frontend/src/core/i18n/locales/en-US.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,7 @@ export const enUS: Translations = {
310310
unavailable:
311311
"No token usage yet. Usage appears only after a successful model response when the provider returns usage_metadata.",
312312
unavailableShort: "No usage returned",
313-
note: "Shown from provider-returned usage_metadata. Totals are best-effort conversation totals and may differ from provider billing pages.",
313+
note: "Header totals use persisted thread usage when available. Per-turn and debug usage come from visible messages. Totals may differ from provider billing pages.",
314314
presets: {
315315
off: "Off",
316316
summary: "Summary",

frontend/src/core/i18n/locales/zh-CN.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,7 @@ export const zhCN: Translations = {
296296
unavailable:
297297
"暂无 Token 用量。只有模型成功返回且供应商提供 usage_metadata 时才会显示。",
298298
unavailableShort: "未返回用量",
299-
note: "基于供应商返回的 usage_metadata 展示。当前总量是 best-effort 的会话参考值,可能与平台账单页不完全一致。",
299+
note: "顶部总量优先使用后端持久化的线程用量。每轮和调试用量来自当前可见消息,可能与平台账单页不完全一致。",
300300
presets: {
301301
off: "关闭",
302302
summary: "总览",

frontend/src/core/messages/usage.ts

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,40 @@ export function accumulateUsage(messages: Message[]): TokenUsage | null {
6565
return hasUsage ? cumulative : null;
6666
}
6767

68+
function hasNonZeroUsage(
69+
usage: TokenUsage | null | undefined,
70+
): usage is TokenUsage {
71+
return (
72+
usage !== null &&
73+
usage !== undefined &&
74+
(usage.inputTokens > 0 || usage.outputTokens > 0 || usage.totalTokens > 0)
75+
);
76+
}
77+
78+
function addUsage(base: TokenUsage, delta: TokenUsage): TokenUsage {
79+
return {
80+
inputTokens: base.inputTokens + delta.inputTokens,
81+
outputTokens: base.outputTokens + delta.outputTokens,
82+
totalTokens: base.totalTokens + delta.totalTokens,
83+
};
84+
}
85+
86+
export function selectHeaderTokenUsage({
87+
backendUsage,
88+
messages,
89+
pendingMessages = [],
90+
}: {
91+
backendUsage?: TokenUsage | null;
92+
messages: Message[];
93+
pendingMessages?: Message[];
94+
}): TokenUsage | null {
95+
if (hasNonZeroUsage(backendUsage)) {
96+
const pendingUsage = accumulateUsage(pendingMessages);
97+
return pendingUsage ? addUsage(backendUsage, pendingUsage) : backendUsage;
98+
}
99+
return accumulateUsage(messages);
100+
}
101+
68102
/**
69103
* Format a token count for display: 1234 -> "1,234", 12345 -> "12.3K"
70104
*/

0 commit comments

Comments
 (0)