midday-ai · feelfetch · Nov 29, 2025 · Nov 29, 2025 · Nov 29, 2025
diff --git a/apps/example/src/app/api/chat/route.ts b/apps/example/src/app/api/chat/route.ts
@@ -62,5 +62,16 @@ export async function POST(request: NextRequest) {
     }),
     sendReasoning: true,
     sendSources: true,
+    // Token usage tracking - available in onFinish callback
+    onFinish: async (event) => {
+      if (event.usage) {
+        console.log("[Usage]", {
+          chatId: id,
+          inputTokens: event.usage.inputTokens,
+          outputTokens: event.usage.outputTokens,
+          totalTokens: event.usage.totalTokens,
+        });
+      }
+    },
   });
 }
diff --git a/apps/example/src/components/chat/chat-header.tsx b/apps/example/src/components/chat/chat-header.tsx
@@ -2,12 +2,16 @@
 
 import { ChatNavigation } from "./chat-navigation";
 import { ChatTitle } from "./chat-title";
+import { TokenUsage } from "./token-usage";
 
 export function ChatHeader() {
   return (
     <div className="flex items-center justify-center relative h-8">
       <ChatNavigation />
       <ChatTitle />
+      <div className="absolute right-0 flex items-center gap-2">
+        <TokenUsage />
+      </div>
     </div>
   );
 }
diff --git a/apps/example/src/components/chat/index.ts b/apps/example/src/components/chat/index.ts
@@ -10,3 +10,4 @@ export { EmptyState } from "./empty-state";
 export { RateLimitIndicator } from "./rate-limit-indicator";
 export { SuggestedPrompts } from "./suggested-prompts";
 export { SuggestionPills } from "./suggestion-pills";
+export { TokenUsage } from "./token-usage";
diff --git a/apps/example/src/components/chat/token-usage.tsx b/apps/example/src/components/chat/token-usage.tsx
@@ -0,0 +1,61 @@
+"use client";
+
+import { useDataPart } from "@ai-sdk-tools/store";
+import type { LanguageModelUsage } from "ai";
+import {
+  Context,
+  ContextContent,
+  ContextContentBody,
+  ContextContentFooter,
+  ContextContentHeader,
+  ContextInputUsage,
+  ContextOutputUsage,
+  ContextReasoningUsage,
+  ContextCacheUsage,
+  ContextTrigger,
+} from "@/components/ai-elements/context";
+
+// Model ID for cost calculation (GPT-4o-mini - primary model used)
+const MODEL_ID = "openai/gpt-4o-mini";
+const MAX_TOKENS = 128_000; // GPT-4o-mini context window
+
+interface UsageDataPart {
+  usage: LanguageModelUsage;
+}
+
+export function TokenUsage() {
+  const [usageData] = useDataPart<UsageDataPart>("usage");
+
+  if (!usageData?.usage) {
+    return null;
+  }
+
+  const { usage } = usageData;
+  const totalTokens = usage.totalTokens ?? 0;
+
+  if (totalTokens === 0) {
+    return null;
+  }
+
+  return (
+    <Context
+      usedTokens={totalTokens}
+      maxTokens={MAX_TOKENS}
+      usage={usage}
+      modelId={MODEL_ID}
+    >
+      <ContextTrigger className="h-7 px-2 text-xs" />
+      <ContextContent align="end" side="bottom">
+        <ContextContentHeader />
+        <ContextContentBody className="space-y-1.5">
+          <ContextInputUsage />
+          <ContextOutputUsage />
+          <ContextReasoningUsage />
+          <ContextCacheUsage />
+        </ContextContentBody>
+        <ContextContentFooter />
+      </ContextContent>
+    </Context>
+  );
+}
+
diff --git a/packages/agents/src/agent.ts b/packages/agents/src/agent.ts
@@ -13,13 +13,13 @@ import {
   generateObject,
   generateText,
   type LanguageModel,
+  type LanguageModelUsage,
   type ModelMessage,
   type StepResult,
   stepCountIs,
   type Tool,
   tool,
   type UIMessage,
-  type UIMessageStreamOnFinishCallback,
   type UIMessageStreamWriter,
 } from "ai";
 import { z } from "zod";
@@ -31,7 +31,7 @@ import {
 } from "./handoff.js";
 import { promptWithHandoffInstructions } from "./handoff-prompt.js";
 import { AgentRunContext } from "./run-context.js";
-import { writeAgentStatus, writeSuggestions } from "./streaming.js";
+import { writeAgentStatus, writeSuggestions, writeUsage } from "./streaming.js";
 import { createDefaultInputFilter } from "./tool-result-extractor.js";
 import type {
   AgentConfig,
@@ -188,6 +188,8 @@ export class Agent<
     const toolChoice = (options as Record<string, unknown>).toolChoice as
       | string
       | undefined;
+    const experimentalTelemetry = (options as Record<string, unknown>)
+      .experimental_telemetry as Record<string, unknown> | undefined;
 
     // Resolve instructions dynamically (static string or function)
     const resolvedInstructions =
@@ -284,6 +286,8 @@ export class Agent<
 
     if (maxSteps) additionalOptions.maxSteps = maxSteps;
     if (onStepFinish) additionalOptions.onStepFinish = onStepFinish;
+    if (experimentalTelemetry)
+      additionalOptions.experimental_telemetry = experimentalTelemetry;
 
     // Handle simple { messages } format (like working code)
     if ("messages" in options && !("prompt" in options) && options.messages) {
@@ -361,6 +365,8 @@ export class Agent<
       sendFinish,
       sendStart,
       messageMetadata,
+      // AI SDK telemetry
+      experimental_telemetry,
       // Response options
       status,
       statusText,
@@ -370,10 +376,34 @@ export class Agent<
     // Declare variable to store chat metadata (will be loaded in execute block)
     let existingChatForSave: any = null;
 
-    // Wrap onFinish to save messages after streaming
-    const wrappedOnFinish: UIMessageStreamOnFinishCallback<never> = async (
-      event,
-    ) => {
+    // Accumulate token usage across all agent rounds
+    let accumulatedUsage: LanguageModelUsage = {
+      inputTokens: 0,
+      outputTokens: 0,
+      totalTokens: 0,
+    };
+
+    // Helper to add usage from a stream result
+    const addUsage = (usage: LanguageModelUsage | undefined) => {
+      if (usage) {
+        accumulatedUsage = {
+          inputTokens:
+            (accumulatedUsage.inputTokens ?? 0) + (usage.inputTokens ?? 0),
+          outputTokens:
+            (accumulatedUsage.outputTokens ?? 0) + (usage.outputTokens ?? 0),
+          totalTokens:
+            (accumulatedUsage.totalTokens ?? 0) + (usage.totalTokens ?? 0),
+        };
+      }
+    };
+
+    // Wrap onFinish to save messages after streaming and include usage
+    const wrappedOnFinish = async (event: {
+      isAborted: boolean;
+      isContinuation: boolean;
+      responseMessage: UIMessage;
+      messages: UIMessage[];
+    }): Promise<void> => {
       // Save messages and update chat session after stream completes
       if (this.memory?.history?.enabled && context) {
         const { chatId, userId } = this.extractMemoryIdentifiers(
@@ -415,8 +445,12 @@ export class Agent<
         }
       }
 
-      // Call user's onFinish
-      await onFinish?.(event);
+      // Call user's onFinish with usage data
+      const hasUsage = (accumulatedUsage.totalTokens ?? 0) > 0;
+      await onFinish?.({
+        ...event,
+        usage: hasUsage ? accumulatedUsage : undefined,
+      });
     };
 
     const stream = createUIMessageStream({
@@ -434,7 +468,9 @@ export class Agent<
         ]);
 
         // Load chat metadata once for the entire request (stored in closure for wrappedOnFinish)
-        const { chatId } = this.extractMemoryIdentifiers(context as TContext);
+        const { chatId } = context
+          ? this.extractMemoryIdentifiers(context as TContext)
+          : { chatId: undefined };
         if (this.memory?.chats?.enabled && chatId) {
           existingChatForSave = await this.memory.provider?.getChat?.(chatId);
         }
@@ -719,6 +755,7 @@ export class Agent<
               messages: messagesToSend,
               executionContext: executionContext,
               maxSteps, // Limit tool calls per round
+              experimental_telemetry,
               onStepFinish: async (step: unknown) => {
                 if (onEvent) {
                   await onEvent({
@@ -847,6 +884,25 @@ export class Agent<
               }
             }
 
+            // Capture usage from the stream result after consumption
+            try {
+              // Use totalUsage for multi-step generations, fallback to usage for single step
+              const streamUsage =
+                (await result.totalUsage) || (await result.usage);
+              addUsage(streamUsage);
+              logger.debug("Captured usage from stream", {
+                agent: currentAgent.name,
+                round,
+                inputTokens: streamUsage?.inputTokens,
+                outputTokens: streamUsage?.outputTokens,
+                totalTokens: streamUsage?.totalTokens,
+              });
+            } catch (usageError) {
+              logger.debug("Could not capture usage from stream", {
+                error: usageError,
+              });
+            }
+
             // Update conversation - only add text if it's a complete response
             // Don't add intermediate text that was generated between tool calls
             if (textAccumulated && !handoffData) {
@@ -1152,6 +1208,16 @@ export class Agent<
             );
           }
 
+          // Stream usage data if available
+          if ((accumulatedUsage.totalTokens ?? 0) > 0) {
+            writeUsage(writer, accumulatedUsage);
+            logger.debug("Streamed usage data", {
+              inputTokens: accumulatedUsage.inputTokens,
+              outputTokens: accumulatedUsage.outputTokens,
+              totalTokens: accumulatedUsage.totalTokens,
+            });
+          }
+
           writer.write({ type: "finish" });
         } catch (error) {
           logger.error("Error in toUIMessageStream", { error });

diff --git a/packages/agents/src/index.ts b/packages/agents/src/index.ts
@@ -39,6 +39,7 @@ export {
   writeAgentStatus,
   writeDataPart,
   writeRateLimit,
+  writeUsage,
 } from "./streaming.js";
 // Types
 export type {
@@ -47,6 +48,8 @@ export type {
   AgentEvent,
   AgentGenerateOptions,
   AgentGenerateResult,
+  AgentStreamOnFinishCallback,
+  AgentStreamOnFinishEvent,
   AgentStreamOptions,
   AgentStreamOptionsUI,
   AgentStreamResult,

diff --git a/packages/agents/src/streaming.ts b/packages/agents/src/streaming.ts
@@ -5,7 +5,7 @@
  * to the UI message stream following the AI SDK's streaming data pattern.
  */
 
-import type { UIMessageStreamWriter } from "ai";
+import type { LanguageModelUsage, UIMessageStreamWriter } from "ai";
 import type { AgentDataParts } from "./types.js";
 
 /**
@@ -114,3 +114,28 @@ export function writeSuggestions(
 ): void {
   writeDataPart(writer, "data-suggestions", { prompts }, { transient: true });
 }
+
+/**
+ * Write transient token usage information.
+ *
+ * Usage data is ephemeral and won't be added to message history.
+ * It's only available via the onData callback in useChat.
+ *
+ * @param writer - The UI message stream writer
+ * @param usage - Token usage data from the language model
+ *
+ * @example
+ * ```typescript
+ * writeUsage(writer, {
+ *   inputTokens: 150,
+ *   outputTokens: 200,
+ *   totalTokens: 350
+ * });
+ * ```
+ */
+export function writeUsage(
+  writer: UIMessageStreamWriter,
+  usage: LanguageModelUsage,
+): void {
+  writeDataPart(writer, "data-usage", { usage }, { transient: true });
+}