diff --git a/apps/example/src/app/api/chat/route.ts b/apps/example/src/app/api/chat/route.ts index b191353..2d7f02e 100644 --- a/apps/example/src/app/api/chat/route.ts +++ b/apps/example/src/app/api/chat/route.ts @@ -62,5 +62,16 @@ export async function POST(request: NextRequest) { }), sendReasoning: true, sendSources: true, + // Token usage tracking - available in onFinish callback + onFinish: async (event) => { + if (event.usage) { + console.log("[Usage]", { + chatId: id, + inputTokens: event.usage.inputTokens, + outputTokens: event.usage.outputTokens, + totalTokens: event.usage.totalTokens, + }); + } + }, }); } diff --git a/apps/example/src/components/chat/chat-header.tsx b/apps/example/src/components/chat/chat-header.tsx index 7edb57a..8c495e8 100644 --- a/apps/example/src/components/chat/chat-header.tsx +++ b/apps/example/src/components/chat/chat-header.tsx @@ -2,12 +2,16 @@ import { ChatNavigation } from "./chat-navigation"; import { ChatTitle } from "./chat-title"; +import { TokenUsage } from "./token-usage"; export function ChatHeader() { return (
+
+ +
); } diff --git a/apps/example/src/components/chat/index.ts b/apps/example/src/components/chat/index.ts index 8420dbc..cacf4ce 100644 --- a/apps/example/src/components/chat/index.ts +++ b/apps/example/src/components/chat/index.ts @@ -10,3 +10,4 @@ export { EmptyState } from "./empty-state"; export { RateLimitIndicator } from "./rate-limit-indicator"; export { SuggestedPrompts } from "./suggested-prompts"; export { SuggestionPills } from "./suggestion-pills"; +export { TokenUsage } from "./token-usage"; diff --git a/apps/example/src/components/chat/token-usage.tsx b/apps/example/src/components/chat/token-usage.tsx new file mode 100644 index 0000000..c0841e2 --- /dev/null +++ b/apps/example/src/components/chat/token-usage.tsx @@ -0,0 +1,61 @@ +"use client"; + +import { useDataPart } from "@ai-sdk-tools/store"; +import type { LanguageModelUsage } from "ai"; +import { + Context, + ContextContent, + ContextContentBody, + ContextContentFooter, + ContextContentHeader, + ContextInputUsage, + ContextOutputUsage, + ContextReasoningUsage, + ContextCacheUsage, + ContextTrigger, +} from "@/components/ai-elements/context"; + +// Model ID for cost calculation (GPT-4o-mini - primary model used) +const MODEL_ID = "openai/gpt-4o-mini"; +const MAX_TOKENS = 128_000; // GPT-4o-mini context window + +interface UsageDataPart { + usage: LanguageModelUsage; +} + +export function TokenUsage() { + const [usageData] = useDataPart("usage"); + + if (!usageData?.usage) { + return null; + } + + const { usage } = usageData; + const totalTokens = usage.totalTokens ?? 0; + + if (totalTokens === 0) { + return null; + } + + return ( + + + + + + + + + + + + + + ); +} + diff --git a/packages/agents/src/agent.ts b/packages/agents/src/agent.ts index e59e64d..175232f 100644 --- a/packages/agents/src/agent.ts +++ b/packages/agents/src/agent.ts @@ -13,13 +13,13 @@ import { generateObject, generateText, type LanguageModel, + type LanguageModelUsage, type ModelMessage, type StepResult, stepCountIs, type Tool, tool, type UIMessage, - type UIMessageStreamOnFinishCallback, type UIMessageStreamWriter, } from "ai"; import { z } from "zod"; @@ -31,7 +31,7 @@ import { } from "./handoff.js"; import { promptWithHandoffInstructions } from "./handoff-prompt.js"; import { AgentRunContext } from "./run-context.js"; -import { writeAgentStatus, writeSuggestions } from "./streaming.js"; +import { writeAgentStatus, writeSuggestions, writeUsage } from "./streaming.js"; import { createDefaultInputFilter } from "./tool-result-extractor.js"; import type { AgentConfig, @@ -188,6 +188,8 @@ export class Agent< const toolChoice = (options as Record).toolChoice as | string | undefined; + const experimentalTelemetry = (options as Record) + .experimental_telemetry as Record | undefined; // Resolve instructions dynamically (static string or function) const resolvedInstructions = @@ -284,6 +286,8 @@ export class Agent< if (maxSteps) additionalOptions.maxSteps = maxSteps; if (onStepFinish) additionalOptions.onStepFinish = onStepFinish; + if (experimentalTelemetry) + additionalOptions.experimental_telemetry = experimentalTelemetry; // Handle simple { messages } format (like working code) if ("messages" in options && !("prompt" in options) && options.messages) { @@ -361,6 +365,8 @@ export class Agent< sendFinish, sendStart, messageMetadata, + // AI SDK telemetry + experimental_telemetry, // Response options status, statusText, @@ -370,10 +376,34 @@ export class Agent< // Declare variable to store chat metadata (will be loaded in execute block) let existingChatForSave: any = null; - // Wrap onFinish to save messages after streaming - const wrappedOnFinish: UIMessageStreamOnFinishCallback = async ( - event, - ) => { + // Accumulate token usage across all agent rounds + let accumulatedUsage: LanguageModelUsage = { + inputTokens: 0, + outputTokens: 0, + totalTokens: 0, + }; + + // Helper to add usage from a stream result + const addUsage = (usage: LanguageModelUsage | undefined) => { + if (usage) { + accumulatedUsage = { + inputTokens: + (accumulatedUsage.inputTokens ?? 0) + (usage.inputTokens ?? 0), + outputTokens: + (accumulatedUsage.outputTokens ?? 0) + (usage.outputTokens ?? 0), + totalTokens: + (accumulatedUsage.totalTokens ?? 0) + (usage.totalTokens ?? 0), + }; + } + }; + + // Wrap onFinish to save messages after streaming and include usage + const wrappedOnFinish = async (event: { + isAborted: boolean; + isContinuation: boolean; + responseMessage: UIMessage; + messages: UIMessage[]; + }): Promise => { // Save messages and update chat session after stream completes if (this.memory?.history?.enabled && context) { const { chatId, userId } = this.extractMemoryIdentifiers( @@ -415,8 +445,12 @@ export class Agent< } } - // Call user's onFinish - await onFinish?.(event); + // Call user's onFinish with usage data + const hasUsage = (accumulatedUsage.totalTokens ?? 0) > 0; + await onFinish?.({ + ...event, + usage: hasUsage ? accumulatedUsage : undefined, + }); }; const stream = createUIMessageStream({ @@ -434,7 +468,9 @@ export class Agent< ]); // Load chat metadata once for the entire request (stored in closure for wrappedOnFinish) - const { chatId } = this.extractMemoryIdentifiers(context as TContext); + const { chatId } = context + ? this.extractMemoryIdentifiers(context as TContext) + : { chatId: undefined }; if (this.memory?.chats?.enabled && chatId) { existingChatForSave = await this.memory.provider?.getChat?.(chatId); } @@ -719,6 +755,7 @@ export class Agent< messages: messagesToSend, executionContext: executionContext, maxSteps, // Limit tool calls per round + experimental_telemetry, onStepFinish: async (step: unknown) => { if (onEvent) { await onEvent({ @@ -847,6 +884,25 @@ export class Agent< } } + // Capture usage from the stream result after consumption + try { + // Use totalUsage for multi-step generations, fallback to usage for single step + const streamUsage = + (await result.totalUsage) || (await result.usage); + addUsage(streamUsage); + logger.debug("Captured usage from stream", { + agent: currentAgent.name, + round, + inputTokens: streamUsage?.inputTokens, + outputTokens: streamUsage?.outputTokens, + totalTokens: streamUsage?.totalTokens, + }); + } catch (usageError) { + logger.debug("Could not capture usage from stream", { + error: usageError, + }); + } + // Update conversation - only add text if it's a complete response // Don't add intermediate text that was generated between tool calls if (textAccumulated && !handoffData) { @@ -1152,6 +1208,16 @@ export class Agent< ); } + // Stream usage data if available + if ((accumulatedUsage.totalTokens ?? 0) > 0) { + writeUsage(writer, accumulatedUsage); + logger.debug("Streamed usage data", { + inputTokens: accumulatedUsage.inputTokens, + outputTokens: accumulatedUsage.outputTokens, + totalTokens: accumulatedUsage.totalTokens, + }); + } + writer.write({ type: "finish" }); } catch (error) { logger.error("Error in toUIMessageStream", { error }); diff --git a/packages/agents/src/index.ts b/packages/agents/src/index.ts index 21ac3ad..2955e94 100644 --- a/packages/agents/src/index.ts +++ b/packages/agents/src/index.ts @@ -39,6 +39,7 @@ export { writeAgentStatus, writeDataPart, writeRateLimit, + writeUsage, } from "./streaming.js"; // Types export type { @@ -47,6 +48,8 @@ export type { AgentEvent, AgentGenerateOptions, AgentGenerateResult, + AgentStreamOnFinishCallback, + AgentStreamOnFinishEvent, AgentStreamOptions, AgentStreamOptionsUI, AgentStreamResult, diff --git a/packages/agents/src/streaming.ts b/packages/agents/src/streaming.ts index cc49d91..61b64dc 100644 --- a/packages/agents/src/streaming.ts +++ b/packages/agents/src/streaming.ts @@ -5,7 +5,7 @@ * to the UI message stream following the AI SDK's streaming data pattern. */ -import type { UIMessageStreamWriter } from "ai"; +import type { LanguageModelUsage, UIMessageStreamWriter } from "ai"; import type { AgentDataParts } from "./types.js"; /** @@ -114,3 +114,28 @@ export function writeSuggestions( ): void { writeDataPart(writer, "data-suggestions", { prompts }, { transient: true }); } + +/** + * Write transient token usage information. + * + * Usage data is ephemeral and won't be added to message history. + * It's only available via the onData callback in useChat. + * + * @param writer - The UI message stream writer + * @param usage - Token usage data from the language model + * + * @example + * ```typescript + * writeUsage(writer, { + * inputTokens: 150, + * outputTokens: 200, + * totalTokens: 350 + * }); + * ``` + */ +export function writeUsage( + writer: UIMessageStreamWriter, + usage: LanguageModelUsage, +): void { + writeDataPart(writer, "data-usage", { usage }, { transient: true }); +} diff --git a/packages/agents/src/types.ts b/packages/agents/src/types.ts index d208d20..5cae03d 100644 --- a/packages/agents/src/types.ts +++ b/packages/agents/src/types.ts @@ -6,13 +6,41 @@ import type { ModelMessage, StepResult, StreamTextResult, + TelemetrySettings, Tool, UIMessage, - UIMessageStreamOnFinishCallback, UIMessageStreamWriter, } from "ai"; import type { AgentRunContext } from "./run-context.js"; +/** + * Extended onFinish event that includes token usage information. + * This extends the AI SDK's UIMessageStreamOnFinishCallback event with usage data. + */ +export interface AgentStreamOnFinishEvent { + /** Whether the stream was aborted */ + isAborted: boolean; + /** Whether this is a continuation message */ + isContinuation: boolean; + /** The response message generated by the agent */ + responseMessage: UIMessage; + /** All messages including the response */ + messages: UIMessage[]; + /** + * Token usage information for the entire agent execution. + * Accumulated across all agent rounds and steps. + */ + usage?: LanguageModelUsage; +} + +/** + * Callback type for when the agent stream finishes. + * Extends the AI SDK's callback with token usage information. + */ +export type AgentStreamOnFinishCallback = ( + event: AgentStreamOnFinishEvent, +) => void | Promise; + /** * Interface for context objects that include memory identifiers */ @@ -297,8 +325,11 @@ export interface AgentStreamOptionsUI< onEvent?: (event: AgentEvent) => void | Promise; // AI SDK createUIMessageStream options - /** Callback when stream finishes with final messages */ - onFinish?: UIMessageStreamOnFinishCallback; + /** + * Callback when stream finishes with final messages. + * Includes token usage information accumulated across all agent rounds. + */ + onFinish?: AgentStreamOnFinishCallback; /** Process errors, e.g. to log them. Returns error message for data stream */ onError?: (error: unknown) => string; /** Generate message ID for the response message */ @@ -321,6 +352,11 @@ export interface AgentStreamOptionsUI< // AI SDK response options /** AI SDK transform - stream transform function */ experimental_transform?: unknown; + /** + * Telemetry settings for OpenTelemetry tracing. + * Enable to track agent execution with your observability platform. + */ + experimental_telemetry?: TelemetrySettings; /** HTTP status code */ status?: number; /** HTTP status text */ @@ -369,6 +405,10 @@ export interface AgentDataParts { suggestions: { prompts: string[]; }; + /** Token usage information (transient) */ + usage: { + usage: LanguageModelUsage; + }; // Allow extension with custom data parts [key: string]: unknown; }