Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions apps/example/src/app/api/chat/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -62,5 +62,16 @@ export async function POST(request: NextRequest) {
}),
sendReasoning: true,
sendSources: true,
// Token usage tracking - available in onFinish callback
onFinish: async (event) => {
if (event.usage) {
console.log("[Usage]", {
chatId: id,
inputTokens: event.usage.inputTokens,
outputTokens: event.usage.outputTokens,
totalTokens: event.usage.totalTokens,
});
}
},
});
}
4 changes: 4 additions & 0 deletions apps/example/src/components/chat/chat-header.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,16 @@

import { ChatNavigation } from "./chat-navigation";
import { ChatTitle } from "./chat-title";
import { TokenUsage } from "./token-usage";

export function ChatHeader() {
return (
<div className="flex items-center justify-center relative h-8">
<ChatNavigation />
<ChatTitle />
<div className="absolute right-0 flex items-center gap-2">
<TokenUsage />
</div>
</div>
);
}
1 change: 1 addition & 0 deletions apps/example/src/components/chat/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ export { EmptyState } from "./empty-state";
export { RateLimitIndicator } from "./rate-limit-indicator";
export { SuggestedPrompts } from "./suggested-prompts";
export { SuggestionPills } from "./suggestion-pills";
export { TokenUsage } from "./token-usage";
61 changes: 61 additions & 0 deletions apps/example/src/components/chat/token-usage.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
"use client";

import { useDataPart } from "@ai-sdk-tools/store";
import type { LanguageModelUsage } from "ai";
import {
Context,
ContextContent,
ContextContentBody,
ContextContentFooter,
ContextContentHeader,
ContextInputUsage,
ContextOutputUsage,
ContextReasoningUsage,
ContextCacheUsage,
ContextTrigger,
} from "@/components/ai-elements/context";

// Model ID for cost calculation (GPT-4o-mini - primary model used)
const MODEL_ID = "openai/gpt-4o-mini";
const MAX_TOKENS = 128_000; // GPT-4o-mini context window

interface UsageDataPart {
usage: LanguageModelUsage;
}

export function TokenUsage() {
const [usageData] = useDataPart<UsageDataPart>("usage");

if (!usageData?.usage) {
return null;
}

const { usage } = usageData;
const totalTokens = usage.totalTokens ?? 0;

if (totalTokens === 0) {
return null;
}

return (
<Context
usedTokens={totalTokens}
maxTokens={MAX_TOKENS}
usage={usage}
modelId={MODEL_ID}
>
<ContextTrigger className="h-7 px-2 text-xs" />
<ContextContent align="end" side="bottom">
<ContextContentHeader />
<ContextContentBody className="space-y-1.5">
<ContextInputUsage />
<ContextOutputUsage />
<ContextReasoningUsage />
<ContextCacheUsage />
</ContextContentBody>
<ContextContentFooter />
</ContextContent>
</Context>
);
}

84 changes: 75 additions & 9 deletions packages/agents/src/agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@ import {
generateObject,
generateText,
type LanguageModel,
type LanguageModelUsage,
type ModelMessage,
type StepResult,
stepCountIs,
type Tool,
tool,
type UIMessage,
type UIMessageStreamOnFinishCallback,
type UIMessageStreamWriter,
} from "ai";
import { z } from "zod";
Expand All @@ -31,7 +31,7 @@ import {
} from "./handoff.js";
import { promptWithHandoffInstructions } from "./handoff-prompt.js";
import { AgentRunContext } from "./run-context.js";
import { writeAgentStatus, writeSuggestions } from "./streaming.js";
import { writeAgentStatus, writeSuggestions, writeUsage } from "./streaming.js";
import { createDefaultInputFilter } from "./tool-result-extractor.js";
import type {
AgentConfig,
Expand Down Expand Up @@ -188,6 +188,8 @@ export class Agent<
const toolChoice = (options as Record<string, unknown>).toolChoice as
| string
| undefined;
const experimentalTelemetry = (options as Record<string, unknown>)
.experimental_telemetry as Record<string, unknown> | undefined;

// Resolve instructions dynamically (static string or function)
const resolvedInstructions =
Expand Down Expand Up @@ -284,6 +286,8 @@ export class Agent<

if (maxSteps) additionalOptions.maxSteps = maxSteps;
if (onStepFinish) additionalOptions.onStepFinish = onStepFinish;
if (experimentalTelemetry)
additionalOptions.experimental_telemetry = experimentalTelemetry;

// Handle simple { messages } format (like working code)
if ("messages" in options && !("prompt" in options) && options.messages) {
Expand Down Expand Up @@ -361,6 +365,8 @@ export class Agent<
sendFinish,
sendStart,
messageMetadata,
// AI SDK telemetry
experimental_telemetry,
// Response options
status,
statusText,
Expand All @@ -370,10 +376,34 @@ export class Agent<
// Declare variable to store chat metadata (will be loaded in execute block)
let existingChatForSave: any = null;

// Wrap onFinish to save messages after streaming
const wrappedOnFinish: UIMessageStreamOnFinishCallback<never> = async (
event,
) => {
// Accumulate token usage across all agent rounds
let accumulatedUsage: LanguageModelUsage = {
inputTokens: 0,
outputTokens: 0,
totalTokens: 0,
};

// Helper to add usage from a stream result
const addUsage = (usage: LanguageModelUsage | undefined) => {
if (usage) {
accumulatedUsage = {
inputTokens:
(accumulatedUsage.inputTokens ?? 0) + (usage.inputTokens ?? 0),
outputTokens:
(accumulatedUsage.outputTokens ?? 0) + (usage.outputTokens ?? 0),
totalTokens:
(accumulatedUsage.totalTokens ?? 0) + (usage.totalTokens ?? 0),
};
}
};

// Wrap onFinish to save messages after streaming and include usage
const wrappedOnFinish = async (event: {
isAborted: boolean;
isContinuation: boolean;
responseMessage: UIMessage;
messages: UIMessage[];
}): Promise<void> => {
// Save messages and update chat session after stream completes
if (this.memory?.history?.enabled && context) {
const { chatId, userId } = this.extractMemoryIdentifiers(
Expand Down Expand Up @@ -415,8 +445,12 @@ export class Agent<
}
}

// Call user's onFinish
await onFinish?.(event);
// Call user's onFinish with usage data
const hasUsage = (accumulatedUsage.totalTokens ?? 0) > 0;
await onFinish?.({
...event,
usage: hasUsage ? accumulatedUsage : undefined,
});
};

const stream = createUIMessageStream({
Expand All @@ -434,7 +468,9 @@ export class Agent<
]);

// Load chat metadata once for the entire request (stored in closure for wrappedOnFinish)
const { chatId } = this.extractMemoryIdentifiers(context as TContext);
const { chatId } = context
? this.extractMemoryIdentifiers(context as TContext)
: { chatId: undefined };
if (this.memory?.chats?.enabled && chatId) {
existingChatForSave = await this.memory.provider?.getChat?.(chatId);
}
Expand Down Expand Up @@ -719,6 +755,7 @@ export class Agent<
messages: messagesToSend,
executionContext: executionContext,
maxSteps, // Limit tool calls per round
experimental_telemetry,
onStepFinish: async (step: unknown) => {
if (onEvent) {
await onEvent({
Expand Down Expand Up @@ -847,6 +884,25 @@ export class Agent<
}
}

// Capture usage from the stream result after consumption
try {
// Use totalUsage for multi-step generations, fallback to usage for single step
const streamUsage =
(await result.totalUsage) || (await result.usage);
addUsage(streamUsage);
logger.debug("Captured usage from stream", {
agent: currentAgent.name,
round,
inputTokens: streamUsage?.inputTokens,
outputTokens: streamUsage?.outputTokens,
totalTokens: streamUsage?.totalTokens,
});
} catch (usageError) {
logger.debug("Could not capture usage from stream", {
error: usageError,
});
}

// Update conversation - only add text if it's a complete response
// Don't add intermediate text that was generated between tool calls
if (textAccumulated && !handoffData) {
Expand Down Expand Up @@ -1152,6 +1208,16 @@ export class Agent<
);
}

// Stream usage data if available
if ((accumulatedUsage.totalTokens ?? 0) > 0) {
writeUsage(writer, accumulatedUsage);
logger.debug("Streamed usage data", {
inputTokens: accumulatedUsage.inputTokens,
outputTokens: accumulatedUsage.outputTokens,
totalTokens: accumulatedUsage.totalTokens,
});
}

writer.write({ type: "finish" });
} catch (error) {
logger.error("Error in toUIMessageStream", { error });
Expand Down
3 changes: 3 additions & 0 deletions packages/agents/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ export {
writeAgentStatus,
writeDataPart,
writeRateLimit,
writeUsage,
} from "./streaming.js";
// Types
export type {
Expand All @@ -47,6 +48,8 @@ export type {
AgentEvent,
AgentGenerateOptions,
AgentGenerateResult,
AgentStreamOnFinishCallback,
AgentStreamOnFinishEvent,
AgentStreamOptions,
AgentStreamOptionsUI,
AgentStreamResult,
Expand Down
27 changes: 26 additions & 1 deletion packages/agents/src/streaming.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
* to the UI message stream following the AI SDK's streaming data pattern.
*/

import type { UIMessageStreamWriter } from "ai";
import type { LanguageModelUsage, UIMessageStreamWriter } from "ai";
import type { AgentDataParts } from "./types.js";

/**
Expand Down Expand Up @@ -114,3 +114,28 @@ export function writeSuggestions(
): void {
writeDataPart(writer, "data-suggestions", { prompts }, { transient: true });
}

/**
* Write transient token usage information.
*
* Usage data is ephemeral and won't be added to message history.
* It's only available via the onData callback in useChat.
*
* @param writer - The UI message stream writer
* @param usage - Token usage data from the language model
*
* @example
* ```typescript
* writeUsage(writer, {
* inputTokens: 150,
* outputTokens: 200,
* totalTokens: 350
* });
* ```
*/
export function writeUsage(
writer: UIMessageStreamWriter,
usage: LanguageModelUsage,
): void {
writeDataPart(writer, "data-usage", { usage }, { transient: true });
}
Loading