From 6ee45c1ab4ef48eff6ef0d684f0e644899e30af6 Mon Sep 17 00:00:00 2001
From: Jordan Wong <jordan.wong@datadoghq.com>
Date: Thu, 2 Oct 2025 09:29:43 -0400
Subject: [PATCH 01/22] openai responses instrumentation

---
 .../datadog-instrumentations/src/openai.js    | 43 +++++++++++++
 packages/datadog-plugin-openai/src/tracing.js | 61 +++++++++++++++++++
 .../dd-trace/src/llmobs/plugins/openai.js     | 48 ++++++++++++++-
 3 files changed, 150 insertions(+), 2 deletions(-)

diff --git a/packages/datadog-instrumentations/src/openai.js b/packages/datadog-instrumentations/src/openai.js
index 76aad798c82..c909164bb55 100644
--- a/packages/datadog-instrumentations/src/openai.js
+++ b/packages/datadog-instrumentations/src/openai.js
@@ -22,6 +22,21 @@ const V4_PACKAGE_SHIMS = [
     methods: ['create'],
     streamedResponse: true
   },
+  {
+    file: 'resources/responses',
+    targetClass: 'Responses',
+    baseResource: 'responses',
+    methods: ['create'],
+    streamedResponse: false
+  },
+  {
+    file: 'resources/responses/responses',
+    targetClass: 'Responses',
+    baseResource: 'responses',
+    methods: ['create'],
+    streamedResponse: false,
+    versions: ['>=4.85.0']
+  },
   {
     file: 'resources/embeddings',
     targetClass: 'Embeddings',
@@ -137,6 +152,24 @@ const V4_PACKAGE_SHIMS = [
   }
 ]
 
+// define and return function to patch over the original function
+function wrapCreate (create) {
+  return function (request) {
+    if (!vertexaiTracingChannel.start.hasSubscribers) {
+      // calls the original function
+      return create.apply(this, arguments)
+    }
+
+    const ctx = {
+      request,
+      instance: this,
+      resource: [this.constructor.name, create.name].join('.')
+    }
+    // am I using the right channel? tracingChannel vs diagnostics channel
+    return ch.tracePromise(create, ctx, this, ...arguments)
+  }
+}
+
 addHook({ name: 'openai', file: 'dist/api.js', versions: ['>=3.0.0 <4'] }, exports => {
   const methodNames = Object.getOwnPropertyNames(exports.OpenAIApi.prototype)
   methodNames.shift() // remove leading 'constructor' method
@@ -160,6 +193,16 @@ addHook({ name: 'openai', file: 'dist/api.js', versions: ['>=3.0.0 <4'] }, expor
   return exports
 })
 
+//register patching hooks via addHook
+addHook({ name: 'openai', file: 'resources/responses.js', versions: ['>=4.87.0'] }, exports => {
+  const Responses = exports.OpenAIApi.responses
+  // wrap functions on module exports with shimmer.wrap
+  shimmer.wrap(responses.prototype, 'responses.createResponse', wrapCreate)
+  return exports
+})
+
+
+
 /**
  * For streamed responses, we need to accumulate all of the content in
  * the chunks, and let the combined content be the final response.
diff --git a/packages/datadog-plugin-openai/src/tracing.js b/packages/datadog-plugin-openai/src/tracing.js
index cc9f0abe036..cdd6388d39b 100644
--- a/packages/datadog-plugin-openai/src/tracing.js
+++ b/packages/datadog-plugin-openai/src/tracing.js
@@ -136,6 +136,10 @@ class OpenAiTracingPlugin extends TracingPlugin {
       case 'createEdit':
         createEditRequestExtraction(tags, payload, openaiStore)
         break
+
+      case 'createResponse':
+        createResponseRequestExtraction(tags, payload, openaiStore)
+        break
     }
 
     span.addTags(tags)
@@ -315,6 +319,10 @@ function normalizeMethodName (methodName) {
     case 'embeddings.create':
       return 'createEmbedding'
 
+    // responses
+    case 'responses.create':
+      return 'createResponse'
+
     // files
     case 'files.create':
       return 'createFile'
@@ -378,6 +386,35 @@ function createEditRequestExtraction (tags, payload, openaiStore) {
   openaiStore.instruction = instruction
 }
 
+function createResponseRequestExtraction (tags, payload, openaiStore) {
+  // Extract model information
+  if (payload.model) {
+    tags['openai.request.model'] = payload.model
+  }
+  
+  // Extract input information
+  if (payload.input) {
+    openaiStore.input = payload.input
+    tags['openai.request.input_length'] = payload.input.length
+  }
+  
+  // Extract reasoning configuration
+  if (payload.reasoning) {
+    if (payload.reasoning.effort) {
+      tags['openai.request.reasoning.effort'] = payload.reasoning.effort
+    }
+    openaiStore.reasoning = payload.reasoning
+  }
+  
+  // Extract background flag
+  if (payload.background !== undefined) {
+    tags['openai.request.background'] = payload.background
+  }
+  
+  // Store the full payload for response extraction
+  openaiStore.responseData = payload
+}
+
 function retrieveModelRequestExtraction (tags, payload) {
   tags['openai.request.id'] = payload.id
 }
@@ -412,6 +449,10 @@ function responseDataExtractionByMethod (methodName, tags, body, openaiStore) {
       commonCreateResponseExtraction(tags, body, openaiStore, methodName)
       break
 
+    case 'createResponse':
+      createResponseResponseExtraction(tags, body, openaiStore)
+      break
+
     case 'listFiles':
     case 'listFineTunes':
     case 'listFineTuneEvents':
@@ -515,6 +556,26 @@ function commonCreateResponseExtraction (tags, body, openaiStore, methodName) {
   openaiStore.choices = body.choices
 }
 
+function createResponseResponseExtraction (tags, body, openaiStore) {
+  // Extract response ID if available
+  if (body.id) {
+    tags['openai.response.id'] = body.id
+  }
+  
+  // Extract status if available
+  if (body.status) {
+    tags['openai.response.status'] = body.status
+  }
+  
+  // Extract model from response if available
+  if (body.model) {
+    tags['openai.response.model'] = body.model
+  }
+  
+  // Store the full response for potential future use
+  openaiStore.response = body
+}
+
 // The server almost always responds with JSON
 function coerceResponseBody (body, methodName) {
   switch (methodName) {
diff --git a/packages/dd-trace/src/llmobs/plugins/openai.js b/packages/dd-trace/src/llmobs/plugins/openai.js
index 0f8a6a40e8f..f77ba682563 100644
--- a/packages/dd-trace/src/llmobs/plugins/openai.js
+++ b/packages/dd-trace/src/llmobs/plugins/openai.js
@@ -21,7 +21,7 @@ class OpenAiLLMObsPlugin extends LLMObsPlugin {
     const methodName = gateResource(normalizeOpenAIResourceName(resource))
     if (!methodName) return // we will not trace all openai methods for llmobs
 
-    const inputs = ctx.args[0] // completion, chat completion, and embeddings take one argument
+    const inputs = ctx.args[0] // completion, chat completion, embeddings, and responses take one argument
     const operation = getOperation(methodName)
     const kind = operation === 'embedding' ? 'embedding' : 'llm'
 
@@ -55,6 +55,8 @@ class OpenAiLLMObsPlugin extends LLMObsPlugin {
       this._tagChatCompletion(span, inputs, response, error)
     } else if (operation === 'embedding') {
       this._tagEmbedding(span, inputs, response, error)
+    } else if (operation === 'response') {
+      this._tagResponse(span, inputs, response, error)
     }
 
     if (!error) {
@@ -187,6 +189,41 @@ class OpenAiLLMObsPlugin extends LLMObsPlugin {
 
     this._tagger.tagMetadata(span, metadata)
   }
+
+  _tagResponse (span, inputs, response, error) {
+    const { input, model, reasoning, background, ...parameters } = inputs
+
+    // Create input message format
+    const responseInput = [{ content: input }]
+
+    if (error) {
+      this._tagger.tagLLMIO(span, responseInput, [{ content: '' }])
+      return
+    }
+
+    // Create output message format
+    const responseOutput = [{ content: response.output || '' }]
+
+    this._tagger.tagLLMIO(span, responseInput, responseOutput)
+    console.log('hello params', parameters)
+    
+    // Tag metadata
+    const metadata = Object.entries(parameters).reduce((obj, [key, value]) => {
+      if (!['tools', 'functions'].includes(key)) {
+        obj[key] = value
+      }
+      return obj
+    }, {})
+    
+    if (reasoning) {
+      metadata.reasoning = reasoning
+    }
+    if (background !== undefined) {
+      metadata.background = background
+    }
+
+    this._tagger.tagMetadata(span, metadata)
+  }
 }
 
 // TODO: this will be moved to the APM integration
@@ -203,13 +240,18 @@ function normalizeOpenAIResourceName (resource) {
     // embeddings
     case 'embeddings.create':
       return 'createEmbedding'
+
+    // responses
+    case 'responses.create':
+      return 'createResponse'
+
     default:
       return resource
   }
 }
 
 function gateResource (resource) {
-  return ['createCompletion', 'createChatCompletion', 'createEmbedding'].includes(resource)
+  return ['createCompletion', 'createChatCompletion', 'createEmbedding', 'createResponse'].includes(resource)
     ? resource
     : undefined
 }
@@ -222,6 +264,8 @@ function getOperation (resource) {
       return 'chat'
     case 'createEmbedding':
       return 'embedding'
+    case 'createResponse':
+      return 'response'
     default:
       // should never happen
       return 'unknown'

From 97308105336c6cc0a6142afc629cb491104fbce0 Mon Sep 17 00:00:00 2001
From: Jordan Wong <jordan.wong@datadoghq.com>
Date: Fri, 10 Oct 2025 00:25:17 -0400
Subject: [PATCH 02/22] clean up code

---
 .../datadog-instrumentations/src/openai.js    | 39 +------------------
 1 file changed, 2 insertions(+), 37 deletions(-)

diff --git a/packages/datadog-instrumentations/src/openai.js b/packages/datadog-instrumentations/src/openai.js
index c909164bb55..4d1540b6193 100644
--- a/packages/datadog-instrumentations/src/openai.js
+++ b/packages/datadog-instrumentations/src/openai.js
@@ -22,20 +22,13 @@ const V4_PACKAGE_SHIMS = [
     methods: ['create'],
     streamedResponse: true
   },
-  {
-    file: 'resources/responses',
-    targetClass: 'Responses',
-    baseResource: 'responses',
-    methods: ['create'],
-    streamedResponse: false
-  },
   {
     file: 'resources/responses/responses',
     targetClass: 'Responses',
     baseResource: 'responses',
     methods: ['create'],
-    streamedResponse: false,
-    versions: ['>=4.85.0']
+    streamedResponse: true,
+    versions: ['>=4.87.0']
   },
   {
     file: 'resources/embeddings',
@@ -152,24 +145,6 @@ const V4_PACKAGE_SHIMS = [
   }
 ]
 
-// define and return function to patch over the original function
-function wrapCreate (create) {
-  return function (request) {
-    if (!vertexaiTracingChannel.start.hasSubscribers) {
-      // calls the original function
-      return create.apply(this, arguments)
-    }
-
-    const ctx = {
-      request,
-      instance: this,
-      resource: [this.constructor.name, create.name].join('.')
-    }
-    // am I using the right channel? tracingChannel vs diagnostics channel
-    return ch.tracePromise(create, ctx, this, ...arguments)
-  }
-}
-
 addHook({ name: 'openai', file: 'dist/api.js', versions: ['>=3.0.0 <4'] }, exports => {
   const methodNames = Object.getOwnPropertyNames(exports.OpenAIApi.prototype)
   methodNames.shift() // remove leading 'constructor' method
@@ -193,16 +168,6 @@ addHook({ name: 'openai', file: 'dist/api.js', versions: ['>=3.0.0 <4'] }, expor
   return exports
 })
 
-//register patching hooks via addHook
-addHook({ name: 'openai', file: 'resources/responses.js', versions: ['>=4.87.0'] }, exports => {
-  const Responses = exports.OpenAIApi.responses
-  // wrap functions on module exports with shimmer.wrap
-  shimmer.wrap(responses.prototype, 'responses.createResponse', wrapCreate)
-  return exports
-})
-
-
-
 /**
  * For streamed responses, we need to accumulate all of the content in
  * the chunks, and let the combined content be the final response.

From 883feaeb687e3ce7168b86e62841758b9fc5d609 Mon Sep 17 00:00:00 2001
From: Jordan Wong <jordan.wong@datadoghq.com>
Date: Fri, 10 Oct 2025 16:59:22 -0400
Subject: [PATCH 03/22] add streaming processing, fix span tag adding

---
 .../src/stream-helpers.js                     |  97 +++++++++-
 packages/datadog-plugin-openai/src/tracing.js |   5 +-
 .../dd-trace/src/llmobs/plugins/openai.js     | 183 ++++++++++++++++--
 .../dd-trace/src/llmobs/span_processor.js     |   3 +-
 packages/dd-trace/src/llmobs/tagger.js        |  21 +-
 5 files changed, 288 insertions(+), 21 deletions(-)

diff --git a/packages/datadog-plugin-openai/src/stream-helpers.js b/packages/datadog-plugin-openai/src/stream-helpers.js
index 0f27cc4d4e7..5d264d8e404 100644
--- a/packages/datadog-plugin-openai/src/stream-helpers.js
+++ b/packages/datadog-plugin-openai/src/stream-helpers.js
@@ -107,8 +107,103 @@ function constructChatCompletionResponseFromStreamedChunks (chunks, n) {
   })
 }
 
+/**
+ * Constructs the entire response from a stream of OpenAI responses chunks.
+ * The responses API uses event-based streaming with delta chunks.
+ * @param {Array<Record<string, any>>} chunks
+ * @param {number} n (not used for responses API, but kept for consistency)
+ * @returns {Record<string, any>}
+ */
+function constructResponseResponseFromStreamedChunks (chunks, n) {
+  if (chunks.length === 0) return {}
+  
+  // The responses API streams events with different types:
+  // - response.output_text.delta: incremental text deltas
+  // - response.output_text.done: complete text for a content part
+  // - response.output_item.done: complete output item with role
+  // - response.done/response.incomplete/response.completed: final response with output array and usage
+  
+  // Find the last chunk with a complete response object (status: done, incomplete, or completed)
+  let finalResponse = null
+  for (let i = chunks.length - 1; i >= 0; i--) {
+    const chunk = chunks[i]
+    if (chunk.response && ['done', 'incomplete', 'completed'].includes(chunk.response.status)) {
+      finalResponse = chunk.response
+      break
+    }
+  }
+  
+  // If we found a final response, we may need to add accumulated text
+  if (finalResponse) {
+    // For simple text responses, if output is empty or an empty array, accumulate from deltas
+    const outputIsEmpty = !finalResponse.output || 
+                          finalResponse.output === '' || 
+                          (Array.isArray(finalResponse.output) && finalResponse.output.length === 0)
+    
+    if (outputIsEmpty) {
+      const outputText = chunks
+        .filter(chunk => chunk.type === 'response.output_text.delta')
+        .map(chunk => chunk.delta)
+        .join('')
+      
+      if (outputText) {
+        return {
+          ...finalResponse,
+          output: outputText
+        }
+      }
+    }
+    return finalResponse
+  }
+  
+  // If no final response found, fall back to accumulating from deltas and items
+  const baseResponse = chunks[0]?.response || {}
+  
+  // Accumulate text from delta chunks
+  const outputText = chunks
+    .filter(chunk => chunk.type === 'response.output_text.delta')
+    .map(chunk => chunk.delta)
+    .join('')
+  
+  // Check for tool call chunks (output_item.added with function_call type)
+  const toolCallChunks = chunks.filter(chunk => 
+    chunk.type === 'response.output_item.added' && 
+    chunk.item?.type === 'function_call'
+  )
+  
+  const result = {
+    ...baseResponse,
+    output: outputText || '',
+    usage: chunks[chunks.length - 1]?.response?.usage || null
+  }
+  
+  // If there are tool calls, structure them in the output array format
+  if (toolCallChunks.length > 0) {
+    result.output = []
+    for (const toolCallChunk of toolCallChunks) {
+      const item = toolCallChunk.item
+      result.output.push({
+        type: 'message',
+        role: 'assistant',
+        content: [],
+        tool_calls: [{
+          id: item.call_id,
+          type: 'function',
+          function: {
+            name: item.name,
+            arguments: item.arguments
+          }
+        }]
+      })
+    }
+  }
+  
+  return result
+}
+
 module.exports = {
   convertBuffersToObjects,
   constructCompletionResponseFromStreamedChunks,
-  constructChatCompletionResponseFromStreamedChunks
+  constructChatCompletionResponseFromStreamedChunks,
+  constructResponseResponseFromStreamedChunks
 }
diff --git a/packages/datadog-plugin-openai/src/tracing.js b/packages/datadog-plugin-openai/src/tracing.js
index cdd6388d39b..dcff9faac8b 100644
--- a/packages/datadog-plugin-openai/src/tracing.js
+++ b/packages/datadog-plugin-openai/src/tracing.js
@@ -11,7 +11,8 @@ const { MEASURED } = require('../../../ext/tags')
 const {
   convertBuffersToObjects,
   constructCompletionResponseFromStreamedChunks,
-  constructChatCompletionResponseFromStreamedChunks
+  constructChatCompletionResponseFromStreamedChunks,
+  constructResponseResponseFromStreamedChunks
 } = require('./stream-helpers')
 
 const { DD_MAJOR } = require('../../../version')
@@ -61,6 +62,8 @@ class OpenAiTracingPlugin extends TracingPlugin {
         response = constructCompletionResponseFromStreamedChunks(chunks, n)
       } else if (methodName === 'createChatCompletion') {
         response = constructChatCompletionResponseFromStreamedChunks(chunks, n)
+      } else if (methodName === 'createResponse') {
+        response = constructResponseResponseFromStreamedChunks(chunks, n)
       }
 
       ctx.result = { data: response }
diff --git a/packages/dd-trace/src/llmobs/plugins/openai.js b/packages/dd-trace/src/llmobs/plugins/openai.js
index f77ba682563..c1ef1da5f0c 100644
--- a/packages/dd-trace/src/llmobs/plugins/openai.js
+++ b/packages/dd-trace/src/llmobs/plugins/openai.js
@@ -79,14 +79,20 @@ class OpenAiLLMObsPlugin extends LLMObsPlugin {
     const tokenUsage = response.usage
 
     if (tokenUsage) {
-      const inputTokens = tokenUsage.prompt_tokens
-      if (inputTokens) metrics.inputTokens = inputTokens
+      // Responses API uses input_tokens, Chat/Completions use prompt_tokens
+      const inputTokens = tokenUsage.input_tokens ?? tokenUsage.prompt_tokens
+      if (inputTokens !== undefined) metrics.inputTokens = inputTokens
 
-      const outputTokens = tokenUsage.completion_tokens
-      if (outputTokens) metrics.outputTokens = outputTokens
+      // Responses API uses output_tokens, Chat/Completions use completion_tokens
+      const outputTokens = tokenUsage.output_tokens ?? tokenUsage.completion_tokens
+      if (outputTokens !== undefined) metrics.outputTokens = outputTokens
 
-      const totalTokens = tokenUsage.total_toksn || (inputTokens + outputTokens)
-      if (totalTokens) metrics.totalTokens = totalTokens
+      const totalTokens = tokenUsage.total_tokens || (inputTokens + outputTokens)
+      if (totalTokens !== undefined) metrics.totalTokens = totalTokens
+
+      // Cache read tokens for responses API
+      const cachedTokens = tokenUsage.input_tokens_details?.cached_tokens
+      if (cachedTokens !== undefined) metrics.cache_read_input_tokens = cachedTokens
     }
 
     return metrics
@@ -193,28 +199,175 @@ class OpenAiLLMObsPlugin extends LLMObsPlugin {
   _tagResponse (span, inputs, response, error) {
     const { input, model, reasoning, background, ...parameters } = inputs
 
-    // Create input message format
-    const responseInput = [{ content: input }]
-
+    // Create input messages
+    const inputMessages = []
+    
+    // Add system message if instructions exist
+    if (inputs.instructions) {
+      inputMessages.push({ role: 'system', content: inputs.instructions })
+    }
+    
+    // Handle input - can be string or array of mixed messages
+    if (Array.isArray(input)) {
+      for (const item of input) {
+        if (item.type === 'function_call') {
+          // Function call: convert to message with tool_calls
+          // Parse arguments if it's a JSON string
+          let parsedArgs = item.arguments
+          if (typeof parsedArgs === 'string') {
+            try {
+              parsedArgs = JSON.parse(parsedArgs)
+            } catch (e) {
+              parsedArgs = {}
+            }
+          }
+          inputMessages.push({
+            role: '',
+            toolCalls: [{
+              toolId: item.call_id,
+              name: item.name,
+              arguments: parsedArgs,
+              type: 'function'
+            }]
+          })
+        } else if (item.type === 'function_call_output') {
+          // Function output: convert to tool message
+          inputMessages.push({
+            role: 'tool',
+            content: item.output
+          })
+        } else if (item.role && item.content) {
+          // Regular message
+          inputMessages.push({ role: item.role, content: item.content })
+        }
+      }
+    } else {
+      // Simple string input
+      inputMessages.push({ role: 'user', content: input })
+    }
+    
     if (error) {
-      this._tagger.tagLLMIO(span, responseInput, [{ content: '' }])
+      this._tagger.tagLLMIO(span, inputMessages, [{ content: '' }])
       return
     }
 
-    // Create output message format
-    const responseOutput = [{ content: response.output || '' }]
+    // Create output messages
+    const outputMessages = []
+    
+    // Handle output - can be string (streaming) or array of message objects (non-streaming)
+    if (typeof response.output === 'string') {
+      // Simple text output (streaming)
+      outputMessages.push({ role: 'assistant', content: response.output })
+    } else if (Array.isArray(response.output)) {
+      // Array output - process all items to extract reasoning, messages, and tool calls
+      // Non-streaming: array of items (messages, function_calls, or reasoning)
+      for (const item of response.output) {
+        // Handle reasoning type (reasoning responses)
+        if (item.type === 'reasoning') {
+          // Extract reasoning text from summary
+          let reasoningText = ''
+          if (item.summary && Array.isArray(item.summary) && item.summary.length > 0) {
+            const summaryItem = item.summary[0]
+            if (summaryItem.type === 'summary_text' && summaryItem.text) {
+              reasoningText = summaryItem.text
+            }
+          }
+          outputMessages.push({
+            role: 'reasoning',
+            content: reasoningText
+          })
+        } else if (item.type === 'function_call') {
+          // Handle function_call type (responses API tool calls)
+          let args = item.arguments
+          // Parse arguments if it's a JSON string
+          if (typeof args === 'string') {
+            try {
+              args = JSON.parse(args)
+            } catch (e) {
+              args = {}
+            }
+          }
+          outputMessages.push({
+            role: '',  // Tool calls have empty role in LLMObs
+            toolCalls: [{
+              toolId: item.call_id,
+              name: item.name,
+              arguments: args,
+              type: 'function'
+            }]
+          })
+        } else {
+          // Handle regular message objects
+          const outputMsg = { role: item.role || 'assistant', content: '' }
+          
+          // Extract content from message
+          if (item.content && Array.isArray(item.content)) {
+            // Content is array of content parts
+            // For responses API, text content has type 'output_text', not 'text'
+            const textParts = item.content
+              .filter(c => c.type === 'text' || c.type === 'output_text')
+              .map(c => c.text)
+            outputMsg.content = textParts.join('')
+          } else if (typeof item.content === 'string') {
+            outputMsg.content = item.content
+          }
+          
+          // Extract tool calls if present in message.tool_calls
+          if (item.tool_calls && Array.isArray(item.tool_calls)) {
+            outputMsg.toolCalls = item.tool_calls.map(tc => {
+              let args = tc.function?.arguments || tc.arguments
+              // Parse arguments if it's a JSON string
+              if (typeof args === 'string') {
+                try {
+                  args = JSON.parse(args)
+                } catch (e) {
+                  args = {}
+                }
+              }
+              return {
+                toolId: tc.id,
+                name: tc.function?.name || tc.name,
+                arguments: args,
+                type: tc.type || 'function'
+              }
+            })
+          }
+          
+          outputMessages.push(outputMsg)
+        }
+      }
+    } else if (response.output_text) {
+      // Fallback: use output_text if available (for simple non-streaming responses without reasoning/tools)
+      outputMessages.push({ role: 'assistant', content: response.output_text })
+    } else {
+      // No output
+      outputMessages.push({ role: 'assistant', content: '' })
+    }
 
-    this._tagger.tagLLMIO(span, responseInput, responseOutput)
-    console.log('hello params', parameters)
+    this._tagger.tagLLMIO(span, inputMessages, outputMessages)
     
     // Tag metadata
     const metadata = Object.entries(parameters).reduce((obj, [key, value]) => {
-      if (!['tools', 'functions'].includes(key)) {
+      if (!['tools', 'functions', 'instructions'].includes(key)) {
         obj[key] = value
       }
       return obj
     }, {})
     
+    // Add fields from response
+    if (response.temperature !== undefined) metadata.temperature = Number(response.temperature)
+    if (response.top_p !== undefined) metadata.top_p = Number(response.top_p)
+    if (response.tools !== undefined) {
+      metadata.tools = Array.isArray(response.tools) ? [...response.tools] : response.tools
+    }
+    if (response.tool_choice !== undefined) metadata.tool_choice = response.tool_choice
+    if (response.truncation !== undefined) metadata.truncation = response.truncation
+    if (response.text !== undefined) metadata.text = response.text
+    if (response.usage?.output_tokens_details?.reasoning_tokens !== undefined) {
+      metadata.reasoning_tokens = response.usage.output_tokens_details.reasoning_tokens
+    }
+    
+    // Add reasoning metadata from input parameters
     if (reasoning) {
       metadata.reasoning = reasoning
     }
diff --git a/packages/dd-trace/src/llmobs/span_processor.js b/packages/dd-trace/src/llmobs/span_processor.js
index da600ffdef0..804964313f0 100644
--- a/packages/dd-trace/src/llmobs/span_processor.js
+++ b/packages/dd-trace/src/llmobs/span_processor.js
@@ -166,7 +166,8 @@ class LLMObsSpanProcessor {
           continue
         }
         if (value !== null && typeof value === 'object') {
-          add(value, carrier[key] = {})
+          carrier[key] = Array.isArray(value) ? [] : {}
+          add(value, carrier[key])
         } else {
           carrier[key] = value
         }
diff --git a/packages/dd-trace/src/llmobs/tagger.js b/packages/dd-trace/src/llmobs/tagger.js
index 7746dfaa7d1..ff88cdc99c9 100644
--- a/packages/dd-trace/src/llmobs/tagger.js
+++ b/packages/dd-trace/src/llmobs/tagger.js
@@ -282,15 +282,13 @@ class LLMObsTagger {
       const { content = '', role } = message
       const toolCalls = message.toolCalls
       const toolId = message.toolId
-      const messageObj = { content }
+      const messageObj = {}
 
       const valid = typeof content === 'string'
       if (!valid) {
         this.#handleFailure('Message content must be a string.', 'invalid_io_messages')
       }
 
-      let condition = this.#tagConditionalString(role, 'Message role', messageObj, 'role')
-
       if (toolCalls) {
         const filteredToolCalls = this.#filterToolCalls(toolCalls)
 
@@ -299,6 +297,23 @@ class LLMObsTagger {
         }
       }
 
+      // Only include content if it's not empty OR if there are no tool calls
+      // (For responses API, tool call messages should not have content field)
+      if (content !== '' || !messageObj.tool_calls) {
+        messageObj.content = content
+      }
+
+      // For role, always include it (even if empty string) when there are tool calls
+      // Otherwise use conditional tagging which skips empty values
+      let condition
+      if (messageObj.tool_calls && messageObj.tool_calls.length > 0) {
+        // For tool call messages, always include role even if empty
+        messageObj.role = role || ''
+        condition = true
+      } else {
+        condition = this.#tagConditionalString(role, 'Message role', messageObj, 'role')
+      }
+
       if (toolId) {
         if (role === 'tool') {
           condition = this.#tagConditionalString(toolId, 'Tool ID', messageObj, 'tool_id')

From 9b90cdc9aad245611c0217e875f11405836005a5 Mon Sep 17 00:00:00 2001
From: Jordan Wong <jordan.wong@datadoghq.com>
Date: Wed, 15 Oct 2025 13:49:27 -0400
Subject: [PATCH 04/22] make changes to conform with updated tool call tagging
 behavior tests

---
 .../dd-trace/src/llmobs/plugins/openai.js     | 28 ++++++-----
 packages/dd-trace/src/llmobs/tagger.js        | 49 ++++++++++++++++---
 2 files changed, 60 insertions(+), 17 deletions(-)

diff --git a/packages/dd-trace/src/llmobs/plugins/openai.js b/packages/dd-trace/src/llmobs/plugins/openai.js
index c1ef1da5f0c..9299b65c6b7 100644
--- a/packages/dd-trace/src/llmobs/plugins/openai.js
+++ b/packages/dd-trace/src/llmobs/plugins/openai.js
@@ -222,19 +222,24 @@ class OpenAiLLMObsPlugin extends LLMObsPlugin {
             }
           }
           inputMessages.push({
-            role: '',
+            role: 'assistant',
             toolCalls: [{
               toolId: item.call_id,
               name: item.name,
               arguments: parsedArgs,
-              type: 'function'
+              type: 'function_call'
             }]
           })
         } else if (item.type === 'function_call_output') {
-          // Function output: convert to tool message
+          // Function output: convert to user message with tool_results
           inputMessages.push({
-            role: 'tool',
-            content: item.output
+            role: 'user',
+            toolResults: [{
+              toolId: item.call_id,
+              result: item.output,
+              name: item.name || '',
+              type: 'function_call_output'
+            }]
           })
         } else if (item.role && item.content) {
           // Regular message
@@ -288,12 +293,12 @@ class OpenAiLLMObsPlugin extends LLMObsPlugin {
             }
           }
           outputMessages.push({
-            role: '',  // Tool calls have empty role in LLMObs
+            role: 'assistant',
             toolCalls: [{
               toolId: item.call_id,
               name: item.name,
               arguments: args,
-              type: 'function'
+              type: 'function_call'
             }]
           })
         } else {
@@ -328,7 +333,7 @@ class OpenAiLLMObsPlugin extends LLMObsPlugin {
                 toolId: tc.id,
                 name: tc.function?.name || tc.name,
                 arguments: args,
-                type: tc.type || 'function'
+                type: tc.type || 'function_call'
               }
             })
           }
@@ -355,9 +360,10 @@ class OpenAiLLMObsPlugin extends LLMObsPlugin {
     }, {})
     
     // Add fields from response
-    if (response.temperature !== undefined) metadata.temperature = Number(response.temperature)
-    if (response.top_p !== undefined) metadata.top_p = Number(response.top_p)
-    if (response.tools !== undefined) {
+    if (response.temperature !== undefined) metadata.temperature = parseFloat(response.temperature)
+    if (response.top_p !== undefined) metadata.top_p = parseFloat(response.top_p)
+    // Only include tools if it's not an empty array
+    if (response.tools !== undefined && !(Array.isArray(response.tools) && response.tools.length === 0)) {
       metadata.tools = Array.isArray(response.tools) ? [...response.tools] : response.tools
     }
     if (response.tool_choice !== undefined) metadata.tool_choice = response.tool_choice
diff --git a/packages/dd-trace/src/llmobs/tagger.js b/packages/dd-trace/src/llmobs/tagger.js
index ff88cdc99c9..c8bd4614120 100644
--- a/packages/dd-trace/src/llmobs/tagger.js
+++ b/packages/dd-trace/src/llmobs/tagger.js
@@ -259,6 +259,34 @@ class LLMObsTagger {
     return filteredToolCalls
   }
 
+  #filterToolResults (toolResults) {
+    if (!Array.isArray(toolResults)) {
+      toolResults = [toolResults]
+    }
+
+    const filteredToolResults = []
+    for (const toolResult of toolResults) {
+      if (typeof toolResult !== 'object') {
+        this.#handleFailure('Tool result must be an object.', 'invalid_io_messages')
+        continue
+      }
+
+      const { toolId, result, name = '', type } = toolResult
+      const toolResultObj = {}
+
+      const condition1 = this.#tagConditionalString(toolId, 'Tool result ID', toolResultObj, 'tool_id')
+      const condition2 = this.#tagConditionalString(result, 'Tool result', toolResultObj, 'result')
+      // name can be empty string, so always include it
+      toolResultObj.name = name
+      const condition3 = this.#tagConditionalString(type, 'Tool result type', toolResultObj, 'type')
+
+      if (condition1 && condition2 && condition3) {
+        filteredToolResults.push(toolResultObj)
+      }
+    }
+    return filteredToolResults
+  }
+
   #tagMessages (span, data, key) {
     if (!data) {
       return
@@ -281,6 +309,7 @@ class LLMObsTagger {
 
       const { content = '', role } = message
       const toolCalls = message.toolCalls
+      const toolResults = message.toolResults
       const toolId = message.toolId
       const messageObj = {}
 
@@ -297,17 +326,25 @@ class LLMObsTagger {
         }
       }
 
-      // Only include content if it's not empty OR if there are no tool calls
-      // (For responses API, tool call messages should not have content field)
-      if (content !== '' || !messageObj.tool_calls) {
+      if (toolResults) {
+        const filteredToolResults = this.#filterToolResults(toolResults)
+
+        if (filteredToolResults.length) {
+          messageObj.tool_results = filteredToolResults
+        }
+      }
+
+      // Include content if not empty, no tool calls/results, or explicitly provided
+      if (content !== '' || (!messageObj.tool_calls && !messageObj.tool_results) || ('content' in message)) {
         messageObj.content = content
       }
 
-      // For role, always include it (even if empty string) when there are tool calls
+      // For role, always include it when there are tool calls or tool results
       // Otherwise use conditional tagging which skips empty values
       let condition
-      if (messageObj.tool_calls && messageObj.tool_calls.length > 0) {
-        // For tool call messages, always include role even if empty
+      if ((messageObj.tool_calls && messageObj.tool_calls.length > 0) || 
+          (messageObj.tool_results && messageObj.tool_results.length > 0)) {
+        // For tool call/result messages, always include role
         messageObj.role = role || ''
         condition = true
       } else {

From 566c6deabd107139971013e1fbf4d42973c09ef0 Mon Sep 17 00:00:00 2001
From: Jordan Wong <jordan.wong@datadoghq.com>
Date: Wed, 15 Oct 2025 14:56:17 -0400
Subject: [PATCH 05/22] fix tags after merge master

---
 .../dd-trace/src/llmobs/plugins/openai.js     | 21 +++++++++++++------
 packages/dd-trace/src/llmobs/tagger.js        | 12 +++--------
 2 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/packages/dd-trace/src/llmobs/plugins/openai.js b/packages/dd-trace/src/llmobs/plugins/openai.js
index 4802add6389..63b1e3c259d 100644
--- a/packages/dd-trace/src/llmobs/plugins/openai.js
+++ b/packages/dd-trace/src/llmobs/plugins/openai.js
@@ -86,12 +86,21 @@ class OpenAiLLMObsPlugin extends LLMObsPlugin {
       if (outputTokens !== undefined) metrics.outputTokens = outputTokens
 
       const totalTokens = tokenUsage.total_tokens || (inputTokens + outputTokens)
-      if (totalTokens) metrics.totalTokens = totalTokens
-
-      const promptTokensDetails = tokenUsage.prompt_tokens_details
-      if (promptTokensDetails) {
-        const cacheReadTokens = promptTokensDetails.cached_tokens
-        if (cacheReadTokens) metrics.cacheReadTokens = cacheReadTokens
+      if (totalTokens !== undefined) metrics.totalTokens = totalTokens
+
+      // Cache tokens - Responses API uses input_tokens_details, Chat/Completions use prompt_tokens_details
+      // For Responses API, always include cache tokens (even if 0)
+      // For Chat API, only include if > 0
+      if (tokenUsage.input_tokens_details) {
+        // Responses API - always include
+        const cacheReadTokens = tokenUsage.input_tokens_details.cached_tokens
+        if (cacheReadTokens !== undefined) metrics.cacheReadTokens = cacheReadTokens
+      } else if (tokenUsage.prompt_tokens_details) {
+        // Chat/Completions API - only include if > 0
+        const cacheReadTokens = tokenUsage.prompt_tokens_details.cached_tokens
+        if (cacheReadTokens !== undefined && cacheReadTokens > 0) {
+          metrics.cacheReadTokens = cacheReadTokens
+        }
       }
     }
 
diff --git a/packages/dd-trace/src/llmobs/tagger.js b/packages/dd-trace/src/llmobs/tagger.js
index b1d95dea2e9..ff229c46850 100644
--- a/packages/dd-trace/src/llmobs/tagger.js
+++ b/packages/dd-trace/src/llmobs/tagger.js
@@ -280,11 +280,13 @@ class LLMObsTagger {
         continue
       }
 
-      const { result, toolId, type } = toolResult
+      const { result, toolId, name = '', type } = toolResult
       const toolResultObj = {}
 
       const condition1 = this.#tagConditionalString(result, 'Tool result', toolResultObj, 'result')
       const condition2 = this.#tagConditionalString(toolId, 'Tool ID', toolResultObj, 'tool_id')
+      // name can be empty string, so always include it
+      toolResultObj.name = name
       const condition3 = this.#tagConditionalString(type, 'Tool type', toolResultObj, 'type')
 
       if (condition1 && condition2 && condition3) {
@@ -358,14 +360,6 @@ class LLMObsTagger {
         condition = this.#tagConditionalString(role, 'Message role', messageObj, 'role')
       }
 
-      if (toolResults) {
-        const filteredToolResults = this.#filterToolResults(toolResults)
-
-        if (filteredToolResults.length) {
-          messageObj.tool_results = filteredToolResults
-        }
-      }
-
       if (toolId) {
         if (role === 'tool') {
           condition = this.#tagConditionalString(toolId, 'Tool ID', messageObj, 'tool_id')

From ba6afbb8489557e8a3578d8c098e9d45176f36ed Mon Sep 17 00:00:00 2001
From: Jordan Wong <jordan.wong@datadoghq.com>
Date: Wed, 15 Oct 2025 17:33:19 -0400
Subject: [PATCH 06/22] remove unecessary stream chunk processing,  refactor
 metadata tags

---
 .../src/stream-helpers.js                     | 71 +------------------
 packages/datadog-plugin-openai/src/tracing.js | 19 -----
 .../dd-trace/src/llmobs/plugins/openai.js     | 37 +++++-----
 3 files changed, 19 insertions(+), 108 deletions(-)

diff --git a/packages/datadog-plugin-openai/src/stream-helpers.js b/packages/datadog-plugin-openai/src/stream-helpers.js
index 5d264d8e404..38021a2ce5c 100644
--- a/packages/datadog-plugin-openai/src/stream-helpers.js
+++ b/packages/datadog-plugin-openai/src/stream-helpers.js
@@ -124,81 +124,16 @@ function constructResponseResponseFromStreamedChunks (chunks, n) {
   // - response.done/response.incomplete/response.completed: final response with output array and usage
   
   // Find the last chunk with a complete response object (status: done, incomplete, or completed)
-  let finalResponse = null
+  let finalResponse
   for (let i = chunks.length - 1; i >= 0; i--) {
     const chunk = chunks[i]
     if (chunk.response && ['done', 'incomplete', 'completed'].includes(chunk.response.status)) {
       finalResponse = chunk.response
-      break
+      return finalResponse
     }
   }
   
-  // If we found a final response, we may need to add accumulated text
-  if (finalResponse) {
-    // For simple text responses, if output is empty or an empty array, accumulate from deltas
-    const outputIsEmpty = !finalResponse.output || 
-                          finalResponse.output === '' || 
-                          (Array.isArray(finalResponse.output) && finalResponse.output.length === 0)
-    
-    if (outputIsEmpty) {
-      const outputText = chunks
-        .filter(chunk => chunk.type === 'response.output_text.delta')
-        .map(chunk => chunk.delta)
-        .join('')
-      
-      if (outputText) {
-        return {
-          ...finalResponse,
-          output: outputText
-        }
-      }
-    }
-    return finalResponse
-  }
-  
-  // If no final response found, fall back to accumulating from deltas and items
-  const baseResponse = chunks[0]?.response || {}
-  
-  // Accumulate text from delta chunks
-  const outputText = chunks
-    .filter(chunk => chunk.type === 'response.output_text.delta')
-    .map(chunk => chunk.delta)
-    .join('')
-  
-  // Check for tool call chunks (output_item.added with function_call type)
-  const toolCallChunks = chunks.filter(chunk => 
-    chunk.type === 'response.output_item.added' && 
-    chunk.item?.type === 'function_call'
-  )
-  
-  const result = {
-    ...baseResponse,
-    output: outputText || '',
-    usage: chunks[chunks.length - 1]?.response?.usage || null
-  }
-  
-  // If there are tool calls, structure them in the output array format
-  if (toolCallChunks.length > 0) {
-    result.output = []
-    for (const toolCallChunk of toolCallChunks) {
-      const item = toolCallChunk.item
-      result.output.push({
-        type: 'message',
-        role: 'assistant',
-        content: [],
-        tool_calls: [{
-          id: item.call_id,
-          type: 'function',
-          function: {
-            name: item.name,
-            arguments: item.arguments
-          }
-        }]
-      })
-    }
-  }
-  
-  return result
+  return finalResponse
 }
 
 module.exports = {
diff --git a/packages/datadog-plugin-openai/src/tracing.js b/packages/datadog-plugin-openai/src/tracing.js
index 72aa9d3ee03..289ec956652 100644
--- a/packages/datadog-plugin-openai/src/tracing.js
+++ b/packages/datadog-plugin-openai/src/tracing.js
@@ -393,25 +393,6 @@ function createResponseRequestExtraction (tags, payload, openaiStore) {
     tags['openai.request.model'] = payload.model
   }
   
-  // Extract input information
-  if (payload.input) {
-    openaiStore.input = payload.input
-    tags['openai.request.input_length'] = payload.input.length
-  }
-  
-  // Extract reasoning configuration
-  if (payload.reasoning) {
-    if (payload.reasoning.effort) {
-      tags['openai.request.reasoning.effort'] = payload.reasoning.effort
-    }
-    openaiStore.reasoning = payload.reasoning
-  }
-  
-  // Extract background flag
-  if (payload.background !== undefined) {
-    tags['openai.request.background'] = payload.background
-  }
-  
   // Store the full payload for response extraction
   openaiStore.responseData = payload
 }
diff --git a/packages/dd-trace/src/llmobs/plugins/openai.js b/packages/dd-trace/src/llmobs/plugins/openai.js
index 63b1e3c259d..7c7dd4b4e07 100644
--- a/packages/dd-trace/src/llmobs/plugins/openai.js
+++ b/packages/dd-trace/src/llmobs/plugins/openai.js
@@ -54,7 +54,7 @@ class OpenAiLLMObsPlugin extends LLMObsPlugin {
     } else if (operation === 'embedding') {
       this._tagEmbedding(span, inputs, response, error)
     } else if (operation === 'response') {
-      this._tagResponse(span, inputs, response, error)
+      this.#tagResponse(span, inputs, response, error)
     }
 
     if (!error) {
@@ -205,8 +205,8 @@ class OpenAiLLMObsPlugin extends LLMObsPlugin {
     this._tagger.tagMetadata(span, metadata)
   }
 
-  _tagResponse (span, inputs, response, error) {
-    const { input, model, reasoning, background, ...parameters } = inputs
+  #tagResponse (span, inputs, response, error) {
+    const { input, model, ...parameters } = inputs
 
     // Create input messages
     const inputMessages = []
@@ -236,7 +236,7 @@ class OpenAiLLMObsPlugin extends LLMObsPlugin {
               toolId: item.call_id,
               name: item.name,
               arguments: parsedArgs,
-              type: 'function_call'
+              type: item.type
             }]
           })
         } else if (item.type === 'function_call_output') {
@@ -247,7 +247,7 @@ class OpenAiLLMObsPlugin extends LLMObsPlugin {
               toolId: item.call_id,
               result: item.output,
               name: item.name || '',
-              type: 'function_call_output'
+              type: item.type
             }]
           })
         } else if (item.role && item.content) {
@@ -307,7 +307,7 @@ class OpenAiLLMObsPlugin extends LLMObsPlugin {
               toolId: item.call_id,
               name: item.name,
               arguments: args,
-              type: 'function_call'
+              type: item.type
             }]
           })
         } else {
@@ -360,35 +360,30 @@ class OpenAiLLMObsPlugin extends LLMObsPlugin {
 
     this._tagger.tagLLMIO(span, inputMessages, outputMessages)
     
-    // Tag metadata
+    // Tag metadata - use allowlist approach for request parameters
+    const allowedParamKeys = [
+      'max_output_tokens',
+      'temperature',
+      'stream',
+      'reasoning'
+    ]
+    
     const metadata = Object.entries(parameters).reduce((obj, [key, value]) => {
-      if (!['tools', 'functions', 'instructions'].includes(key)) {
+      if (allowedParamKeys.includes(key)) {
         obj[key] = value
       }
       return obj
     }, {})
     
-    // Add fields from response
+    // Add fields from response object (convert numbers to floats)
     if (response.temperature !== undefined) metadata.temperature = parseFloat(response.temperature)
     if (response.top_p !== undefined) metadata.top_p = parseFloat(response.top_p)
-    // Only include tools if it's not an empty array
-    if (response.tools !== undefined && !(Array.isArray(response.tools) && response.tools.length === 0)) {
-      metadata.tools = Array.isArray(response.tools) ? [...response.tools] : response.tools
-    }
     if (response.tool_choice !== undefined) metadata.tool_choice = response.tool_choice
     if (response.truncation !== undefined) metadata.truncation = response.truncation
     if (response.text !== undefined) metadata.text = response.text
     if (response.usage?.output_tokens_details?.reasoning_tokens !== undefined) {
       metadata.reasoning_tokens = response.usage.output_tokens_details.reasoning_tokens
     }
-    
-    // Add reasoning metadata from input parameters
-    if (reasoning) {
-      metadata.reasoning = reasoning
-    }
-    if (background !== undefined) {
-      metadata.background = background
-    }
 
     this._tagger.tagMetadata(span, metadata)
   }

From 2781b78f73241721f5b591cc2dab2352801b6aa1 Mon Sep 17 00:00:00 2001
From: Jordan Wong <jordan.wong@datadoghq.com>
Date: Mon, 20 Oct 2025 10:36:24 -0400
Subject: [PATCH 07/22] address comments, remove unecessary empty content tag
 handling

---
 .../src/stream-helpers.js                     |  7 +----
 .../dd-trace/src/llmobs/plugins/openai.js     | 28 +++++++++----------
 packages/dd-trace/src/llmobs/tagger.js        | 13 ++++-----
 3 files changed, 21 insertions(+), 27 deletions(-)

diff --git a/packages/datadog-plugin-openai/src/stream-helpers.js b/packages/datadog-plugin-openai/src/stream-helpers.js
index 38021a2ce5c..9c3c65e9d34 100644
--- a/packages/datadog-plugin-openai/src/stream-helpers.js
+++ b/packages/datadog-plugin-openai/src/stream-helpers.js
@@ -115,8 +115,6 @@ function constructChatCompletionResponseFromStreamedChunks (chunks, n) {
  * @returns {Record<string, any>}
  */
 function constructResponseResponseFromStreamedChunks (chunks, n) {
-  if (chunks.length === 0) return {}
-  
   // The responses API streams events with different types:
   // - response.output_text.delta: incremental text deltas
   // - response.output_text.done: complete text for a content part
@@ -124,16 +122,13 @@ function constructResponseResponseFromStreamedChunks (chunks, n) {
   // - response.done/response.incomplete/response.completed: final response with output array and usage
   
   // Find the last chunk with a complete response object (status: done, incomplete, or completed)
-  let finalResponse
   for (let i = chunks.length - 1; i >= 0; i--) {
     const chunk = chunks[i]
     if (chunk.response && ['done', 'incomplete', 'completed'].includes(chunk.response.status)) {
-      finalResponse = chunk.response
-      return finalResponse
+      return chunk.response
     }
   }
   
-  return finalResponse
 }
 
 module.exports = {
diff --git a/packages/dd-trace/src/llmobs/plugins/openai.js b/packages/dd-trace/src/llmobs/plugins/openai.js
index 7c7dd4b4e07..a6e94731916 100644
--- a/packages/dd-trace/src/llmobs/plugins/openai.js
+++ b/packages/dd-trace/src/llmobs/plugins/openai.js
@@ -98,7 +98,7 @@ class OpenAiLLMObsPlugin extends LLMObsPlugin {
       } else if (tokenUsage.prompt_tokens_details) {
         // Chat/Completions API - only include if > 0
         const cacheReadTokens = tokenUsage.prompt_tokens_details.cached_tokens
-        if (cacheReadTokens !== undefined && cacheReadTokens > 0) {
+        if (cacheReadTokens) {
           metrics.cacheReadTokens = cacheReadTokens
         }
       }
@@ -206,6 +206,14 @@ class OpenAiLLMObsPlugin extends LLMObsPlugin {
   }
 
   #tagResponse (span, inputs, response, error) {
+    // Tag metadata - use allowlist approach for request parameters
+    const allowedParamKeys = [
+      'max_output_tokens',
+      'temperature',
+      'stream',
+      'reasoning'
+    ]
+    
     const { input, model, ...parameters } = inputs
 
     // Create input messages
@@ -226,7 +234,7 @@ class OpenAiLLMObsPlugin extends LLMObsPlugin {
           if (typeof parsedArgs === 'string') {
             try {
               parsedArgs = JSON.parse(parsedArgs)
-            } catch (e) {
+            } catch {
               parsedArgs = {}
             }
           }
@@ -280,7 +288,7 @@ class OpenAiLLMObsPlugin extends LLMObsPlugin {
         if (item.type === 'reasoning') {
           // Extract reasoning text from summary
           let reasoningText = ''
-          if (item.summary && Array.isArray(item.summary) && item.summary.length > 0) {
+          if (Array.isArray(item.summary) && item.summary.length > 0) {
             const summaryItem = item.summary[0]
             if (summaryItem.type === 'summary_text' && summaryItem.text) {
               reasoningText = summaryItem.text
@@ -297,7 +305,7 @@ class OpenAiLLMObsPlugin extends LLMObsPlugin {
           if (typeof args === 'string') {
             try {
               args = JSON.parse(args)
-            } catch (e) {
+            } catch {
               args = {}
             }
           }
@@ -319,7 +327,7 @@ class OpenAiLLMObsPlugin extends LLMObsPlugin {
             // Content is array of content parts
             // For responses API, text content has type 'output_text', not 'text'
             const textParts = item.content
-              .filter(c => c.type === 'text' || c.type === 'output_text')
+              .filter(c => c.type === 'output_text')
               .map(c => c.text)
             outputMsg.content = textParts.join('')
           } else if (typeof item.content === 'string') {
@@ -327,7 +335,7 @@ class OpenAiLLMObsPlugin extends LLMObsPlugin {
           }
           
           // Extract tool calls if present in message.tool_calls
-          if (item.tool_calls && Array.isArray(item.tool_calls)) {
+          if (Array.isArray(item.tool_calls)) {
             outputMsg.toolCalls = item.tool_calls.map(tc => {
               let args = tc.function?.arguments || tc.arguments
               // Parse arguments if it's a JSON string
@@ -360,14 +368,6 @@ class OpenAiLLMObsPlugin extends LLMObsPlugin {
 
     this._tagger.tagLLMIO(span, inputMessages, outputMessages)
     
-    // Tag metadata - use allowlist approach for request parameters
-    const allowedParamKeys = [
-      'max_output_tokens',
-      'temperature',
-      'stream',
-      'reasoning'
-    ]
-    
     const metadata = Object.entries(parameters).reduce((obj, [key, value]) => {
       if (allowedParamKeys.includes(key)) {
         obj[key] = value
diff --git a/packages/dd-trace/src/llmobs/tagger.js b/packages/dd-trace/src/llmobs/tagger.js
index ff229c46850..167ad7cf584 100644
--- a/packages/dd-trace/src/llmobs/tagger.js
+++ b/packages/dd-trace/src/llmobs/tagger.js
@@ -286,7 +286,11 @@ class LLMObsTagger {
       const condition1 = this.#tagConditionalString(result, 'Tool result', toolResultObj, 'result')
       const condition2 = this.#tagConditionalString(toolId, 'Tool ID', toolResultObj, 'tool_id')
       // name can be empty string, so always include it
-      toolResultObj.name = name
+      if (typeof name === 'string') {
+        toolResultObj.name = name
+      } else {
+        this.#handleFailure(`[LLMObs] Expected tool result name to be a string, instead got "${typeof name}"`)
+      }
       const condition3 = this.#tagConditionalString(type, 'Tool type', toolResultObj, 'type')
 
       if (condition1 && condition2 && condition3) {
@@ -320,7 +324,7 @@ class LLMObsTagger {
       const toolCalls = message.toolCalls
       const toolResults = message.toolResults
       const toolId = message.toolId
-      const messageObj = {}
+      const messageObj = {content}
 
       const valid = typeof content === 'string'
       if (!valid) {
@@ -343,11 +347,6 @@ class LLMObsTagger {
         }
       }
 
-      // Include content if not empty, no tool calls/results, or explicitly provided
-      if (content !== '' || (!messageObj.tool_calls && !messageObj.tool_results) || ('content' in message)) {
-        messageObj.content = content
-      }
-
       // For role, always include it when there are tool calls or tool results
       // Otherwise use conditional tagging which skips empty values
       let condition

From e78cacde11548bb7f597287eafac4ca98b1250aa Mon Sep 17 00:00:00 2001
From: Jordan Wong <jordan.wong@datadoghq.com>
Date: Mon, 20 Oct 2025 11:03:21 -0400
Subject: [PATCH 08/22] run linter

---
 .../src/stream-helpers.js                     |  3 +-
 packages/datadog-plugin-openai/src/tracing.js |  8 ++---
 .../dd-trace/src/llmobs/plugins/openai.js     | 32 +++++++++----------
 packages/dd-trace/src/llmobs/tagger.js        |  4 +--
 4 files changed, 23 insertions(+), 24 deletions(-)

diff --git a/packages/datadog-plugin-openai/src/stream-helpers.js b/packages/datadog-plugin-openai/src/stream-helpers.js
index 9c3c65e9d34..4e0dc5533f1 100644
--- a/packages/datadog-plugin-openai/src/stream-helpers.js
+++ b/packages/datadog-plugin-openai/src/stream-helpers.js
@@ -120,7 +120,7 @@ function constructResponseResponseFromStreamedChunks (chunks, n) {
   // - response.output_text.done: complete text for a content part
   // - response.output_item.done: complete output item with role
   // - response.done/response.incomplete/response.completed: final response with output array and usage
-  
+
   // Find the last chunk with a complete response object (status: done, incomplete, or completed)
   for (let i = chunks.length - 1; i >= 0; i--) {
     const chunk = chunks[i]
@@ -128,7 +128,6 @@ function constructResponseResponseFromStreamedChunks (chunks, n) {
       return chunk.response
     }
   }
-  
 }
 
 module.exports = {
diff --git a/packages/datadog-plugin-openai/src/tracing.js b/packages/datadog-plugin-openai/src/tracing.js
index 289ec956652..f2ebb119478 100644
--- a/packages/datadog-plugin-openai/src/tracing.js
+++ b/packages/datadog-plugin-openai/src/tracing.js
@@ -392,7 +392,7 @@ function createResponseRequestExtraction (tags, payload, openaiStore) {
   if (payload.model) {
     tags['openai.request.model'] = payload.model
   }
-  
+
   // Store the full payload for response extraction
   openaiStore.responseData = payload
 }
@@ -543,17 +543,17 @@ function createResponseResponseExtraction (tags, body, openaiStore) {
   if (body.id) {
     tags['openai.response.id'] = body.id
   }
-  
+
   // Extract status if available
   if (body.status) {
     tags['openai.response.status'] = body.status
   }
-  
+
   // Extract model from response if available
   if (body.model) {
     tags['openai.response.model'] = body.model
   }
-  
+
   // Store the full response for potential future use
   openaiStore.response = body
 }
diff --git a/packages/dd-trace/src/llmobs/plugins/openai.js b/packages/dd-trace/src/llmobs/plugins/openai.js
index a6e94731916..e917569fa70 100644
--- a/packages/dd-trace/src/llmobs/plugins/openai.js
+++ b/packages/dd-trace/src/llmobs/plugins/openai.js
@@ -207,23 +207,23 @@ class OpenAiLLMObsPlugin extends LLMObsPlugin {
 
   #tagResponse (span, inputs, response, error) {
     // Tag metadata - use allowlist approach for request parameters
-    const allowedParamKeys = [
+    const allowedParamKeys = new Set([
       'max_output_tokens',
       'temperature',
       'stream',
       'reasoning'
-    ]
-    
+    ])
+
     const { input, model, ...parameters } = inputs
 
     // Create input messages
     const inputMessages = []
-    
+
     // Add system message if instructions exist
     if (inputs.instructions) {
       inputMessages.push({ role: 'system', content: inputs.instructions })
     }
-    
+
     // Handle input - can be string or array of mixed messages
     if (Array.isArray(input)) {
       for (const item of input) {
@@ -267,7 +267,7 @@ class OpenAiLLMObsPlugin extends LLMObsPlugin {
       // Simple string input
       inputMessages.push({ role: 'user', content: input })
     }
-    
+
     if (error) {
       this._tagger.tagLLMIO(span, inputMessages, [{ content: '' }])
       return
@@ -275,7 +275,7 @@ class OpenAiLLMObsPlugin extends LLMObsPlugin {
 
     // Create output messages
     const outputMessages = []
-    
+
     // Handle output - can be string (streaming) or array of message objects (non-streaming)
     if (typeof response.output === 'string') {
       // Simple text output (streaming)
@@ -321,7 +321,7 @@ class OpenAiLLMObsPlugin extends LLMObsPlugin {
         } else {
           // Handle regular message objects
           const outputMsg = { role: item.role || 'assistant', content: '' }
-          
+
           // Extract content from message
           if (item.content && Array.isArray(item.content)) {
             // Content is array of content parts
@@ -333,7 +333,7 @@ class OpenAiLLMObsPlugin extends LLMObsPlugin {
           } else if (typeof item.content === 'string') {
             outputMsg.content = item.content
           }
-          
+
           // Extract tool calls if present in message.tool_calls
           if (Array.isArray(item.tool_calls)) {
             outputMsg.toolCalls = item.tool_calls.map(tc => {
@@ -342,7 +342,7 @@ class OpenAiLLMObsPlugin extends LLMObsPlugin {
               if (typeof args === 'string') {
                 try {
                   args = JSON.parse(args)
-                } catch (e) {
+                } catch {
                   args = {}
                 }
               }
@@ -354,7 +354,7 @@ class OpenAiLLMObsPlugin extends LLMObsPlugin {
               }
             })
           }
-          
+
           outputMessages.push(outputMsg)
         }
       }
@@ -367,17 +367,17 @@ class OpenAiLLMObsPlugin extends LLMObsPlugin {
     }
 
     this._tagger.tagLLMIO(span, inputMessages, outputMessages)
-    
+
     const metadata = Object.entries(parameters).reduce((obj, [key, value]) => {
-      if (allowedParamKeys.includes(key)) {
+      if (allowedParamKeys.has(key)) {
         obj[key] = value
       }
       return obj
     }, {})
-    
+
     // Add fields from response object (convert numbers to floats)
-    if (response.temperature !== undefined) metadata.temperature = parseFloat(response.temperature)
-    if (response.top_p !== undefined) metadata.top_p = parseFloat(response.top_p)
+    if (response.temperature !== undefined) metadata.temperature = Number.parseFloat(response.temperature)
+    if (response.top_p !== undefined) metadata.top_p = Number.parseFloat(response.top_p)
     if (response.tool_choice !== undefined) metadata.tool_choice = response.tool_choice
     if (response.truncation !== undefined) metadata.truncation = response.truncation
     if (response.text !== undefined) metadata.text = response.text
diff --git a/packages/dd-trace/src/llmobs/tagger.js b/packages/dd-trace/src/llmobs/tagger.js
index 167ad7cf584..9cc68fd27b8 100644
--- a/packages/dd-trace/src/llmobs/tagger.js
+++ b/packages/dd-trace/src/llmobs/tagger.js
@@ -324,7 +324,7 @@ class LLMObsTagger {
       const toolCalls = message.toolCalls
       const toolResults = message.toolResults
       const toolId = message.toolId
-      const messageObj = {content}
+      const messageObj = { content }
 
       const valid = typeof content === 'string'
       if (!valid) {
@@ -350,7 +350,7 @@ class LLMObsTagger {
       // For role, always include it when there are tool calls or tool results
       // Otherwise use conditional tagging which skips empty values
       let condition
-      if ((messageObj.tool_calls && messageObj.tool_calls.length > 0) || 
+      if ((messageObj.tool_calls && messageObj.tool_calls.length > 0) ||
           (messageObj.tool_results && messageObj.tool_results.length > 0)) {
         // For tool call/result messages, always include role
         messageObj.role = role || ''

From 5477b429e65485e9ff287f65c70638ae4abde8b0 Mon Sep 17 00:00:00 2001
From: Jordan Wong <jordan.wong@datadoghq.com>
Date: Mon, 20 Oct 2025 11:13:49 -0400
Subject: [PATCH 09/22] move const allowedParams to top of file

---
 packages/dd-trace/src/llmobs/plugins/openai.js | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/packages/dd-trace/src/llmobs/plugins/openai.js b/packages/dd-trace/src/llmobs/plugins/openai.js
index e917569fa70..191641d1a35 100644
--- a/packages/dd-trace/src/llmobs/plugins/openai.js
+++ b/packages/dd-trace/src/llmobs/plugins/openai.js
@@ -2,6 +2,13 @@
 
 const LLMObsPlugin = require('./base')
 
+const allowedParamKeys = new Set([
+  'max_output_tokens',
+  'temperature',
+  'stream',
+  'reasoning'
+])
+
 function isIterable (obj) {
   if (obj == null) {
     return false
@@ -207,12 +214,6 @@ class OpenAiLLMObsPlugin extends LLMObsPlugin {
 
   #tagResponse (span, inputs, response, error) {
     // Tag metadata - use allowlist approach for request parameters
-    const allowedParamKeys = new Set([
-      'max_output_tokens',
-      'temperature',
-      'stream',
-      'reasoning'
-    ])
 
     const { input, model, ...parameters } = inputs
 

From 650b06cd7849b856f13024ce3f67818b91aa16cf Mon Sep 17 00:00:00 2001
From: Jordan Wong <jordan.wong@datadoghq.com>
Date: Tue, 21 Oct 2025 10:42:15 -0400
Subject: [PATCH 10/22] clean up

---
 .../datadog-plugin-openai/src/stream-helpers.js    |  3 +--
 packages/datadog-plugin-openai/src/tracing.js      |  2 +-
 packages/dd-trace/src/llmobs/plugins/openai.js     |  2 +-
 packages/dd-trace/src/llmobs/tagger.js             | 14 ++------------
 4 files changed, 5 insertions(+), 16 deletions(-)

diff --git a/packages/datadog-plugin-openai/src/stream-helpers.js b/packages/datadog-plugin-openai/src/stream-helpers.js
index 4e0dc5533f1..f254402f604 100644
--- a/packages/datadog-plugin-openai/src/stream-helpers.js
+++ b/packages/datadog-plugin-openai/src/stream-helpers.js
@@ -111,10 +111,9 @@ function constructChatCompletionResponseFromStreamedChunks (chunks, n) {
  * Constructs the entire response from a stream of OpenAI responses chunks.
  * The responses API uses event-based streaming with delta chunks.
  * @param {Array<Record<string, any>>} chunks
- * @param {number} n (not used for responses API, but kept for consistency)
  * @returns {Record<string, any>}
  */
-function constructResponseResponseFromStreamedChunks (chunks, n) {
+function constructResponseResponseFromStreamedChunks (chunks) {
   // The responses API streams events with different types:
   // - response.output_text.delta: incremental text deltas
   // - response.output_text.done: complete text for a content part
diff --git a/packages/datadog-plugin-openai/src/tracing.js b/packages/datadog-plugin-openai/src/tracing.js
index f2ebb119478..b5f3ab4a7a2 100644
--- a/packages/datadog-plugin-openai/src/tracing.js
+++ b/packages/datadog-plugin-openai/src/tracing.js
@@ -61,7 +61,7 @@ class OpenAiTracingPlugin extends TracingPlugin {
       } else if (methodName === 'createChatCompletion') {
         response = constructChatCompletionResponseFromStreamedChunks(chunks, n)
       } else if (methodName === 'createResponse') {
-        response = constructResponseResponseFromStreamedChunks(chunks, n)
+        response = constructResponseResponseFromStreamedChunks(chunks)
       }
 
       ctx.result = { data: response }
diff --git a/packages/dd-trace/src/llmobs/plugins/openai.js b/packages/dd-trace/src/llmobs/plugins/openai.js
index 191641d1a35..166258379ae 100644
--- a/packages/dd-trace/src/llmobs/plugins/openai.js
+++ b/packages/dd-trace/src/llmobs/plugins/openai.js
@@ -324,7 +324,7 @@ class OpenAiLLMObsPlugin extends LLMObsPlugin {
           const outputMsg = { role: item.role || 'assistant', content: '' }
 
           // Extract content from message
-          if (item.content && Array.isArray(item.content)) {
+          if (Array.isArray(item.content)) {
             // Content is array of content parts
             // For responses API, text content has type 'output_text', not 'text'
             const textParts = item.content
diff --git a/packages/dd-trace/src/llmobs/tagger.js b/packages/dd-trace/src/llmobs/tagger.js
index 9cc68fd27b8..d95ba3f8197 100644
--- a/packages/dd-trace/src/llmobs/tagger.js
+++ b/packages/dd-trace/src/llmobs/tagger.js
@@ -326,6 +326,8 @@ class LLMObsTagger {
       const toolId = message.toolId
       const messageObj = { content }
 
+      let condition = this.#tagConditionalString(role, 'Message role', messageObj, 'role')
+
       const valid = typeof content === 'string'
       if (!valid) {
         this.#handleFailure('Message content must be a string.', 'invalid_io_messages')
@@ -347,18 +349,6 @@ class LLMObsTagger {
         }
       }
 
-      // For role, always include it when there are tool calls or tool results
-      // Otherwise use conditional tagging which skips empty values
-      let condition
-      if ((messageObj.tool_calls && messageObj.tool_calls.length > 0) ||
-          (messageObj.tool_results && messageObj.tool_results.length > 0)) {
-        // For tool call/result messages, always include role
-        messageObj.role = role || ''
-        condition = true
-      } else {
-        condition = this.#tagConditionalString(role, 'Message role', messageObj, 'role')
-      }
-
       if (toolId) {
         if (role === 'tool') {
           condition = this.#tagConditionalString(toolId, 'Tool ID', messageObj, 'tool_id')

From 2762fdfae150227fcef86fb137262debe60f3cfb Mon Sep 17 00:00:00 2001
From: Jordan Wong <jordan.wong@datadoghq.com>
Date: Thu, 23 Oct 2025 12:43:34 -0400
Subject: [PATCH 11/22] add test for openai response span

---
 .../llmobs/plugins/openai/openaiv4.spec.js    | 38 +++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/packages/dd-trace/test/llmobs/plugins/openai/openaiv4.spec.js b/packages/dd-trace/test/llmobs/plugins/openai/openaiv4.spec.js
index 3d8911bee82..c8c3d4af68b 100644
--- a/packages/dd-trace/test/llmobs/plugins/openai/openaiv4.spec.js
+++ b/packages/dd-trace/test/llmobs/plugins/openai/openaiv4.spec.js
@@ -708,6 +708,44 @@ describe('integrations', () => {
 
         expect(events.llmobsSpans[0]).to.deepEqualWithMockValues(expectedSecondLlmSpanEvent)
       })
+
+      it('submits a response span', async function () {
+        if (semifies(realVersion, '<4.87.0')) {
+          this.skip()
+        }
+
+        await openai.responses.create({
+          model: 'gpt-4o',
+          input: 'What is the capital of France?',
+          max_output_tokens: 100,
+          temperature: 0.5,
+          stream: false
+        })
+
+        const { apmSpans, llmobsSpans } = await getEvents()
+        const expected = expectedLLMObsLLMSpanEvent({
+          span: apmSpans[0],
+          spanKind: 'llm',
+          name: 'OpenAI.createResponse',
+          inputMessages: [
+            { role: 'user', content: 'What is the capital of France?' }
+          ],
+          outputMessages: [
+            { role: 'assistant', content: MOCK_STRING }
+          ],
+          tokenMetrics: { input_tokens: MOCK_NUMBER, output_tokens: MOCK_NUMBER, total_tokens: MOCK_NUMBER },
+          modelName: 'gpt-4o-mini',
+          modelProvider: 'openai',
+          metadata: {
+            max_output_tokens: 100,
+            temperature: 0.5,
+            stream: false
+          },
+          tags: { ml_app: 'test', language: 'javascript', integration: 'openai' }
+        })
+
+        expect(llmobsSpans[0]).to.deepEqualWithMockValues(expected)
+      })
     })
   })
 })

From 6323a81cfcd9f66c45144dd24abe288110fe5e1a Mon Sep 17 00:00:00 2001
From: Jordan Wong <jordan.wong@datadoghq.com>
Date: Mon, 27 Oct 2025 10:15:25 -0400
Subject: [PATCH 12/22] add streamed test, fix non-streamed response test

---
 .../llmobs/plugins/openai/openaiv4.spec.js    | 56 ++++++++++++++++++-
 1 file changed, 54 insertions(+), 2 deletions(-)

diff --git a/packages/dd-trace/test/llmobs/plugins/openai/openaiv4.spec.js b/packages/dd-trace/test/llmobs/plugins/openai/openaiv4.spec.js
index c8c3d4af68b..ebcb074a303 100644
--- a/packages/dd-trace/test/llmobs/plugins/openai/openaiv4.spec.js
+++ b/packages/dd-trace/test/llmobs/plugins/openai/openaiv4.spec.js
@@ -715,7 +715,7 @@ describe('integrations', () => {
         }
 
         await openai.responses.create({
-          model: 'gpt-4o',
+          model: 'gpt-4o-mini',
           input: 'What is the capital of France?',
           max_output_tokens: 100,
           temperature: 0.5,
@@ -733,12 +733,17 @@ describe('integrations', () => {
           outputMessages: [
             { role: 'assistant', content: MOCK_STRING }
           ],
-          tokenMetrics: { input_tokens: MOCK_NUMBER, output_tokens: MOCK_NUMBER, total_tokens: MOCK_NUMBER },
+          tokenMetrics: { input_tokens: MOCK_NUMBER, output_tokens: MOCK_NUMBER, total_tokens: MOCK_NUMBER, cache_read_input_tokens: 0 },
           modelName: 'gpt-4o-mini',
           modelProvider: 'openai',
           metadata: {
             max_output_tokens: 100,
             temperature: 0.5,
+            top_p: 1,
+            tool_choice: 'auto',
+            truncation: 'disabled',
+            text: { format: { type: 'text' }, verbosity: 'medium' },
+            reasoning_tokens: 0,
             stream: false
           },
           tags: { ml_app: 'test', language: 'javascript', integration: 'openai' }
@@ -746,6 +751,53 @@ describe('integrations', () => {
 
         expect(llmobsSpans[0]).to.deepEqualWithMockValues(expected)
       })
+
+      it('submits a streamed response span', async function () {
+        if (semifies(realVersion, '<4.87.0')) {
+          this.skip()
+        }
+
+        const stream = await openai.responses.create({
+          model: 'gpt-4o-mini',
+          input: 'Stream this please',
+          max_output_tokens: 50,
+          temperature: 0,
+          stream: true
+        })
+
+        for await (const part of stream) {
+          expect(part).to.have.property('type')
+        }
+
+        const { apmSpans, llmobsSpans } = await getEvents()
+        const expected = expectedLLMObsLLMSpanEvent({
+          span: apmSpans[0],
+          spanKind: 'llm',
+          name: 'OpenAI.createResponse',
+          inputMessages: [
+            { role: 'user', content: 'Stream this please' }
+          ],
+          outputMessages: [
+            { role: 'assistant', content: MOCK_STRING }
+          ],
+          tokenMetrics: { input_tokens: MOCK_NUMBER, output_tokens: MOCK_NUMBER, total_tokens: MOCK_NUMBER, cache_read_input_tokens: 0 },
+          modelName: 'gpt-4o-mini',
+          modelProvider: 'openai',
+          metadata: {
+            max_output_tokens: 50,
+            temperature: 0,
+            top_p: 1,
+            tool_choice: 'auto',
+            truncation: 'disabled',
+            text: { format: { type: 'text' }, verbosity: 'medium' },
+            reasoning_tokens: 0,
+            stream: true
+          },
+          tags: { ml_app: 'test', language: 'javascript', integration: 'openai' }
+        })
+
+        expect(llmobsSpans[0]).to.deepEqualWithMockValues(expected)
+      })
     })
   })
 })

From f124bc9b4bbcf77175e2492681c277cb40ae9b34 Mon Sep 17 00:00:00 2001
From: Jordan Wong <jordan.wong@datadoghq.com>
Date: Mon, 27 Oct 2025 10:51:25 -0400
Subject: [PATCH 13/22] add cassettes

---
 .../openai_responses_post_13b63907.yaml       | 130 ++++++++
 .../openai_responses_post_13c05471.yaml       | 314 ++++++++++++++++++
 2 files changed, 444 insertions(+)
 create mode 100644 packages/dd-trace/test/llmobs/cassettes/openai/openai_responses_post_13b63907.yaml
 create mode 100644 packages/dd-trace/test/llmobs/cassettes/openai/openai_responses_post_13c05471.yaml

diff --git a/packages/dd-trace/test/llmobs/cassettes/openai/openai_responses_post_13b63907.yaml b/packages/dd-trace/test/llmobs/cassettes/openai/openai_responses_post_13b63907.yaml
new file mode 100644
index 00000000000..4274e0cc588
--- /dev/null
+++ b/packages/dd-trace/test/llmobs/cassettes/openai/openai_responses_post_13b63907.yaml
@@ -0,0 +1,130 @@
+interactions:
+- request:
+    body: '{"model":"gpt-4o-mini","input":"What is the capital of France?","max_output_tokens":100,"temperature":0.5,"stream":false}'
+    headers:
+      ? !!python/object/apply:multidict._multidict.istr
+      - Accept
+      : - application/json
+      ? !!python/object/apply:multidict._multidict.istr
+      - Accept-Encoding
+      : - gzip, deflate
+      ? !!python/object/apply:multidict._multidict.istr
+      - Accept-Language
+      : - '*'
+      ? !!python/object/apply:multidict._multidict.istr
+      - Connection
+      : - keep-alive
+      Content-Length:
+      - '121'
+      ? !!python/object/apply:multidict._multidict.istr
+      - Content-Type
+      : - application/json
+      ? !!python/object/apply:multidict._multidict.istr
+      - User-Agent
+      : - OpenAI/JS 6.4.0
+      ? !!python/object/apply:multidict._multidict.istr
+      - sec-fetch-mode
+      : - cors
+      ? !!python/object/apply:multidict._multidict.istr
+      - x-stainless-arch
+      : - arm64
+      ? !!python/object/apply:multidict._multidict.istr
+      - x-stainless-lang
+      : - js
+      ? !!python/object/apply:multidict._multidict.istr
+      - x-stainless-os
+      : - MacOS
+      ? !!python/object/apply:multidict._multidict.istr
+      - x-stainless-package-version
+      : - 6.4.0
+      ? !!python/object/apply:multidict._multidict.istr
+      - x-stainless-retry-count
+      : - '0'
+      ? !!python/object/apply:multidict._multidict.istr
+      - x-stainless-runtime
+      : - node
+      ? !!python/object/apply:multidict._multidict.istr
+      - x-stainless-runtime-version
+      : - v20.15.1
+    method: POST
+    uri: https://api.openai.com/v1/responses
+  response:
+    body:
+      string: "{\n  \"id\": \"resp_0ba32533a7d6e1ea0168ff866675a881a381fd2d7a17f838c2\",\n
+        \ \"object\": \"response\",\n  \"created_at\": 1761576550,\n  \"status\":
+        \"completed\",\n  \"background\": false,\n  \"billing\": {\n    \"payer\":
+        \"developer\"\n  },\n  \"error\": null,\n  \"incomplete_details\": null,\n
+        \ \"instructions\": null,\n  \"max_output_tokens\": 100,\n  \"max_tool_calls\":
+        null,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n  \"output\": [\n    {\n
+        \     \"id\": \"msg_0ba32533a7d6e1ea0168ff8666ed2881a3a85ff3bd38183da5\",\n
+        \     \"type\": \"message\",\n      \"status\": \"completed\",\n      \"content\":
+        [\n        {\n          \"type\": \"output_text\",\n          \"annotations\":
+        [],\n          \"logprobs\": [],\n          \"text\": \"The capital of France
+        is Paris.\"\n        }\n      ],\n      \"role\": \"assistant\"\n    }\n  ],\n
+        \ \"parallel_tool_calls\": true,\n  \"previous_response_id\": null,\n  \"prompt_cache_key\":
+        null,\n  \"reasoning\": {\n    \"effort\": null,\n    \"summary\": null\n
+        \ },\n  \"safety_identifier\": null,\n  \"service_tier\": \"default\",\n  \"store\":
+        false,\n  \"temperature\": 0.5,\n  \"text\": {\n    \"format\": {\n      \"type\":
+        \"text\"\n    },\n    \"verbosity\": \"medium\"\n  },\n  \"tool_choice\":
+        \"auto\",\n  \"tools\": [],\n  \"top_logprobs\": 0,\n  \"top_p\": 1.0,\n  \"truncation\":
+        \"disabled\",\n  \"usage\": {\n    \"input_tokens\": 14,\n    \"input_tokens_details\":
+        {\n      \"cached_tokens\": 0\n    },\n    \"output_tokens\": 8,\n    \"output_tokens_details\":
+        {\n      \"reasoning_tokens\": 0\n    },\n    \"total_tokens\": 22\n  },\n
+        \ \"user\": null,\n  \"metadata\": {}\n}"
+    headers:
+      CF-RAY:
+      - 9952ff9df9ad05d3-IAD
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Mon, 27 Oct 2025 14:49:11 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=C0GyMvCZLE0zxKsBJruXuZiyfxOQWJTfAv08AljQ5Eo-1761576551-1.0.1.1-YVdZesFuQf.lGxvzkWDiaqkpm849gyJgCxSrYvBoVUkWDLJAAcJ07ylwTPYrxqcvCPg5dmg1YHYDc7_Nt.tlNL6tJWBp0D5Ro7PmxLluSN0;
+        path=/; expires=Mon, 27-Oct-25 15:19:11 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=54X0Rg1n8bwX_SxsLelH2MgkI9mloEFys84bjmQwVvc-1761576551175-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - datadog-4
+      openai-processing-ms:
+      - '719'
+      openai-project:
+      - proj_6cMiry5CHgK3zKotG0LtMb9H
+      openai-version:
+      - '2020-10-01'
+      x-envoy-upstream-service-time:
+      - '723'
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999967'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_7114efca340644ca834401f95882e54a
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/packages/dd-trace/test/llmobs/cassettes/openai/openai_responses_post_13c05471.yaml b/packages/dd-trace/test/llmobs/cassettes/openai/openai_responses_post_13c05471.yaml
new file mode 100644
index 00000000000..b170d780cac
--- /dev/null
+++ b/packages/dd-trace/test/llmobs/cassettes/openai/openai_responses_post_13c05471.yaml
@@ -0,0 +1,314 @@
+interactions:
+- request:
+    body: '{"model":"gpt-4o-mini","input":"Stream this please","max_output_tokens":50,"temperature":0,"stream":true}'
+    headers:
+      ? !!python/object/apply:multidict._multidict.istr
+      - Accept
+      : - application/json
+      ? !!python/object/apply:multidict._multidict.istr
+      - Accept-Encoding
+      : - gzip, deflate
+      ? !!python/object/apply:multidict._multidict.istr
+      - Accept-Language
+      : - '*'
+      ? !!python/object/apply:multidict._multidict.istr
+      - Connection
+      : - keep-alive
+      Content-Length:
+      - '105'
+      ? !!python/object/apply:multidict._multidict.istr
+      - Content-Type
+      : - application/json
+      ? !!python/object/apply:multidict._multidict.istr
+      - User-Agent
+      : - OpenAI/JS 6.4.0
+      ? !!python/object/apply:multidict._multidict.istr
+      - sec-fetch-mode
+      : - cors
+      ? !!python/object/apply:multidict._multidict.istr
+      - x-stainless-arch
+      : - arm64
+      ? !!python/object/apply:multidict._multidict.istr
+      - x-stainless-lang
+      : - js
+      ? !!python/object/apply:multidict._multidict.istr
+      - x-stainless-os
+      : - MacOS
+      ? !!python/object/apply:multidict._multidict.istr
+      - x-stainless-package-version
+      : - 6.4.0
+      ? !!python/object/apply:multidict._multidict.istr
+      - x-stainless-retry-count
+      : - '0'
+      ? !!python/object/apply:multidict._multidict.istr
+      - x-stainless-runtime
+      : - node
+      ? !!python/object/apply:multidict._multidict.istr
+      - x-stainless-runtime-version
+      : - v20.15.1
+    method: POST
+    uri: https://api.openai.com/v1/responses
+  response:
+    body:
+      string: 'event: response.created
+
+        data: {"type":"response.created","sequence_number":0,"response":{"id":"resp_02a325206886e7330168ff8667b35c8195af5d01e2d807b363","object":"response","created_at":1761576551,"status":"in_progress","background":false,"error":null,"incomplete_details":null,"instructions":null,"max_output_tokens":50,"max_tool_calls":null,"model":"gpt-4o-mini-2024-07-18","output":[],"parallel_tool_calls":true,"previous_response_id":null,"prompt_cache_key":null,"reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":false,"temperature":0.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}}}
+
+
+        event: response.in_progress
+
+        data: {"type":"response.in_progress","sequence_number":1,"response":{"id":"resp_02a325206886e7330168ff8667b35c8195af5d01e2d807b363","object":"response","created_at":1761576551,"status":"in_progress","background":false,"error":null,"incomplete_details":null,"instructions":null,"max_output_tokens":50,"max_tool_calls":null,"model":"gpt-4o-mini-2024-07-18","output":[],"parallel_tool_calls":true,"previous_response_id":null,"prompt_cache_key":null,"reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":false,"temperature":0.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}}}
+
+
+        event: response.output_item.added
+
+        data: {"type":"response.output_item.added","sequence_number":2,"output_index":0,"item":{"id":"msg_02a325206886e7330168ff8668dc008195baa78f73253ead04","type":"message","status":"in_progress","content":[],"role":"assistant"}}
+
+
+        event: response.content_part.added
+
+        data: {"type":"response.content_part.added","sequence_number":3,"item_id":"msg_02a325206886e7330168ff8668dc008195baa78f73253ead04","output_index":0,"content_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":""}}
+
+
+        event: response.output_text.delta
+
+        data: {"type":"response.output_text.delta","sequence_number":4,"item_id":"msg_02a325206886e7330168ff8668dc008195baa78f73253ead04","output_index":0,"content_index":0,"delta":"I","logprobs":[],"obfuscation":"sXApFWoqYTnY8RB"}
+
+
+        event: response.output_text.delta
+
+        data: {"type":"response.output_text.delta","sequence_number":5,"item_id":"msg_02a325206886e7330168ff8668dc008195baa78f73253ead04","output_index":0,"content_index":0,"delta":"
+        can''t","logprobs":[],"obfuscation":"6rrkfTjqgA"}
+
+
+        event: response.output_text.delta
+
+        data: {"type":"response.output_text.delta","sequence_number":6,"item_id":"msg_02a325206886e7330168ff8668dc008195baa78f73253ead04","output_index":0,"content_index":0,"delta":"
+        stream","logprobs":[],"obfuscation":"zTQcvzXkr"}
+
+
+        event: response.output_text.delta
+
+        data: {"type":"response.output_text.delta","sequence_number":7,"item_id":"msg_02a325206886e7330168ff8668dc008195baa78f73253ead04","output_index":0,"content_index":0,"delta":"
+        content","logprobs":[],"obfuscation":"NXub1Hqf"}
+
+
+        event: response.output_text.delta
+
+        data: {"type":"response.output_text.delta","sequence_number":8,"item_id":"msg_02a325206886e7330168ff8668dc008195baa78f73253ead04","output_index":0,"content_index":0,"delta":"
+        directly","logprobs":[],"obfuscation":"Ik3o4B6"}
+
+
+        event: response.output_text.delta
+
+        data: {"type":"response.output_text.delta","sequence_number":9,"item_id":"msg_02a325206886e7330168ff8668dc008195baa78f73253ead04","output_index":0,"content_index":0,"delta":",","logprobs":[],"obfuscation":"DNmI50dmeVuCU0E"}
+
+
+        event: response.output_text.delta
+
+        data: {"type":"response.output_text.delta","sequence_number":10,"item_id":"msg_02a325206886e7330168ff8668dc008195baa78f73253ead04","output_index":0,"content_index":0,"delta":"
+        but","logprobs":[],"obfuscation":"Ye7rU3zOPHMS"}
+
+
+        event: response.output_text.delta
+
+        data: {"type":"response.output_text.delta","sequence_number":11,"item_id":"msg_02a325206886e7330168ff8668dc008195baa78f73253ead04","output_index":0,"content_index":0,"delta":"
+        I","logprobs":[],"obfuscation":"mQw45n3DQI38f6"}
+
+
+        event: response.output_text.delta
+
+        data: {"type":"response.output_text.delta","sequence_number":12,"item_id":"msg_02a325206886e7330168ff8668dc008195baa78f73253ead04","output_index":0,"content_index":0,"delta":"
+        can","logprobs":[],"obfuscation":"HuV1gXeTgSg1"}
+
+
+        event: response.output_text.delta
+
+        data: {"type":"response.output_text.delta","sequence_number":13,"item_id":"msg_02a325206886e7330168ff8668dc008195baa78f73253ead04","output_index":0,"content_index":0,"delta":"
+        help","logprobs":[],"obfuscation":"v4zsC16sDw8"}
+
+
+        event: response.output_text.delta
+
+        data: {"type":"response.output_text.delta","sequence_number":14,"item_id":"msg_02a325206886e7330168ff8668dc008195baa78f73253ead04","output_index":0,"content_index":0,"delta":"
+        you","logprobs":[],"obfuscation":"onfoNCWDukoD"}
+
+
+        event: response.output_text.delta
+
+        data: {"type":"response.output_text.delta","sequence_number":15,"item_id":"msg_02a325206886e7330168ff8668dc008195baa78f73253ead04","output_index":0,"content_index":0,"delta":"
+        find","logprobs":[],"obfuscation":"y2g9muALvN2"}
+
+
+        event: response.output_text.delta
+
+        data: {"type":"response.output_text.delta","sequence_number":16,"item_id":"msg_02a325206886e7330168ff8668dc008195baa78f73253ead04","output_index":0,"content_index":0,"delta":"
+        where","logprobs":[],"obfuscation":"riOCUTQ2Hw"}
+
+
+        event: response.output_text.delta
+
+        data: {"type":"response.output_text.delta","sequence_number":17,"item_id":"msg_02a325206886e7330168ff8668dc008195baa78f73253ead04","output_index":0,"content_index":0,"delta":"
+        to","logprobs":[],"obfuscation":"s9h6vUfCuyVtg"}
+
+
+        event: response.output_text.delta
+
+        data: {"type":"response.output_text.delta","sequence_number":18,"item_id":"msg_02a325206886e7330168ff8668dc008195baa78f73253ead04","output_index":0,"content_index":0,"delta":"
+        watch","logprobs":[],"obfuscation":"hh9jyVBUpH"}
+
+
+        event: response.output_text.delta
+
+        data: {"type":"response.output_text.delta","sequence_number":19,"item_id":"msg_02a325206886e7330168ff8668dc008195baa78f73253ead04","output_index":0,"content_index":0,"delta":"
+        it","logprobs":[],"obfuscation":"SVBCj6G7oA9ws"}
+
+
+        event: response.output_text.delta
+
+        data: {"type":"response.output_text.delta","sequence_number":20,"item_id":"msg_02a325206886e7330168ff8668dc008195baa78f73253ead04","output_index":0,"content_index":0,"delta":"
+        or","logprobs":[],"obfuscation":"rmKxechHQh1bZ"}
+
+
+        event: response.output_text.delta
+
+        data: {"type":"response.output_text.delta","sequence_number":21,"item_id":"msg_02a325206886e7330168ff8668dc008195baa78f73253ead04","output_index":0,"content_index":0,"delta":"
+        provide","logprobs":[],"obfuscation":"wqlI2Ews"}
+
+
+        event: response.output_text.delta
+
+        data: {"type":"response.output_text.delta","sequence_number":22,"item_id":"msg_02a325206886e7330168ff8668dc008195baa78f73253ead04","output_index":0,"content_index":0,"delta":"
+        information","logprobs":[],"obfuscation":"dn0n"}
+
+
+        event: response.output_text.delta
+
+        data: {"type":"response.output_text.delta","sequence_number":23,"item_id":"msg_02a325206886e7330168ff8668dc008195baa78f73253ead04","output_index":0,"content_index":0,"delta":"
+        about","logprobs":[],"obfuscation":"AEFNaXRNSg"}
+
+
+        event: response.output_text.delta
+
+        data: {"type":"response.output_text.delta","sequence_number":24,"item_id":"msg_02a325206886e7330168ff8668dc008195baa78f73253ead04","output_index":0,"content_index":0,"delta":"
+        it","logprobs":[],"obfuscation":"O4xsymJ6spvog"}
+
+
+        event: response.output_text.delta
+
+        data: {"type":"response.output_text.delta","sequence_number":25,"item_id":"msg_02a325206886e7330168ff8668dc008195baa78f73253ead04","output_index":0,"content_index":0,"delta":".","logprobs":[],"obfuscation":"ZPdlXeQyZppxxJw"}
+
+
+        event: response.output_text.delta
+
+        data: {"type":"response.output_text.delta","sequence_number":26,"item_id":"msg_02a325206886e7330168ff8668dc008195baa78f73253ead04","output_index":0,"content_index":0,"delta":"
+        What","logprobs":[],"obfuscation":"gFkxPUXcj7D"}
+
+
+        event: response.output_text.delta
+
+        data: {"type":"response.output_text.delta","sequence_number":27,"item_id":"msg_02a325206886e7330168ff8668dc008195baa78f73253ead04","output_index":0,"content_index":0,"delta":"
+        are","logprobs":[],"obfuscation":"7GXuowbYqZ5V"}
+
+
+        event: response.output_text.delta
+
+        data: {"type":"response.output_text.delta","sequence_number":28,"item_id":"msg_02a325206886e7330168ff8668dc008195baa78f73253ead04","output_index":0,"content_index":0,"delta":"
+        you","logprobs":[],"obfuscation":"qYd9Zjp5Zjke"}
+
+
+        event: response.output_text.delta
+
+        data: {"type":"response.output_text.delta","sequence_number":29,"item_id":"msg_02a325206886e7330168ff8668dc008195baa78f73253ead04","output_index":0,"content_index":0,"delta":"
+        looking","logprobs":[],"obfuscation":"Mtmhp2jN"}
+
+
+        event: response.output_text.delta
+
+        data: {"type":"response.output_text.delta","sequence_number":30,"item_id":"msg_02a325206886e7330168ff8668dc008195baa78f73253ead04","output_index":0,"content_index":0,"delta":"
+        to","logprobs":[],"obfuscation":"ujMnmAiKvwXhL"}
+
+
+        event: response.output_text.delta
+
+        data: {"type":"response.output_text.delta","sequence_number":31,"item_id":"msg_02a325206886e7330168ff8668dc008195baa78f73253ead04","output_index":0,"content_index":0,"delta":"
+        stream","logprobs":[],"obfuscation":"q1oFE1YBW"}
+
+
+        event: response.output_text.delta
+
+        data: {"type":"response.output_text.delta","sequence_number":32,"item_id":"msg_02a325206886e7330168ff8668dc008195baa78f73253ead04","output_index":0,"content_index":0,"delta":"?","logprobs":[],"obfuscation":"bkucj2SIB5lk5jM"}
+
+
+        event: response.output_text.done
+
+        data: {"type":"response.output_text.done","sequence_number":33,"item_id":"msg_02a325206886e7330168ff8668dc008195baa78f73253ead04","output_index":0,"content_index":0,"text":"I
+        can''t stream content directly, but I can help you find where to watch it
+        or provide information about it. What are you looking to stream?","logprobs":[]}
+
+
+        event: response.content_part.done
+
+        data: {"type":"response.content_part.done","sequence_number":34,"item_id":"msg_02a325206886e7330168ff8668dc008195baa78f73253ead04","output_index":0,"content_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":"I
+        can''t stream content directly, but I can help you find where to watch it
+        or provide information about it. What are you looking to stream?"}}
+
+
+        event: response.output_item.done
+
+        data: {"type":"response.output_item.done","sequence_number":35,"output_index":0,"item":{"id":"msg_02a325206886e7330168ff8668dc008195baa78f73253ead04","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"I
+        can''t stream content directly, but I can help you find where to watch it
+        or provide information about it. What are you looking to stream?"}],"role":"assistant"}}
+
+
+        event: response.completed
+
+        data: {"type":"response.completed","sequence_number":36,"response":{"id":"resp_02a325206886e7330168ff8667b35c8195af5d01e2d807b363","object":"response","created_at":1761576551,"status":"completed","background":false,"error":null,"incomplete_details":null,"instructions":null,"max_output_tokens":50,"max_tool_calls":null,"model":"gpt-4o-mini-2024-07-18","output":[{"id":"msg_02a325206886e7330168ff8668dc008195baa78f73253ead04","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"I
+        can''t stream content directly, but I can help you find where to watch it
+        or provide information about it. What are you looking to stream?"}],"role":"assistant"}],"parallel_tool_calls":true,"previous_response_id":null,"prompt_cache_key":null,"reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"default","store":false,"temperature":0.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":{"input_tokens":10,"input_tokens_details":{"cached_tokens":0},"output_tokens":30,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":40},"user":null,"metadata":{}}}
+
+
+        '
+    headers:
+      CF-RAY:
+      - 9952ffa58b4a9c76-IAD
+      Connection:
+      - keep-alive
+      Content-Type:
+      - text/event-stream; charset=utf-8
+      Date:
+      - Mon, 27 Oct 2025 14:49:12 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=PGV1cl2FvwKo9QxfXkcqGRFt9iOkOFgfkg5v3fKPuME-1761576552-1.0.1.1-ZnxCwcVs4j4wCSpzFrzALk7uMcoreMW6n3tVVQnazXybB4tDqazzKl_eX6yqZFoPnMvWCNl8V_zkSafoRcxymxjQ7uzOx_QDkGqsDKqP5Eo;
+        path=/; expires=Mon, 27-Oct-25 15:19:12 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=xGY.h88DrIS80WhDwsCXgBxCbYCF0BEWKuOpeAKUVxE-1761576552018-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - datadog-4
+      openai-processing-ms:
+      - '306'
+      openai-project:
+      - proj_6cMiry5CHgK3zKotG0LtMb9H
+      openai-version:
+      - '2020-10-01'
+      x-envoy-upstream-service-time:
+      - '316'
+      x-request-id:
+      - req_49176fb3cd33440d8e2f3d7332a738f4
+    status:
+      code: 200
+      message: OK
+version: 1

From 50c557ff018c8799feeba05861422e1fabab0c03 Mon Sep 17 00:00:00 2001
From: Jordan Wong <jordan.wong@datadoghq.com>
Date: Mon, 27 Oct 2025 13:09:17 -0400
Subject: [PATCH 14/22] remove default to empty string for non-existent name
 case

---
 packages/dd-trace/src/llmobs/plugins/openai.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/dd-trace/src/llmobs/plugins/openai.js b/packages/dd-trace/src/llmobs/plugins/openai.js
index 166258379ae..c257343120b 100644
--- a/packages/dd-trace/src/llmobs/plugins/openai.js
+++ b/packages/dd-trace/src/llmobs/plugins/openai.js
@@ -255,7 +255,7 @@ class OpenAiLLMObsPlugin extends LLMObsPlugin {
             toolResults: [{
               toolId: item.call_id,
               result: item.output,
-              name: item.name || '',
+              name: item.name,
               type: item.type
             }]
           })

From 14311ccc32f089953e01ff0a606c57ccea1d0318 Mon Sep 17 00:00:00 2001
From: Jordan Wong <jordan.wong@datadoghq.com>
Date: Mon, 27 Oct 2025 13:11:57 -0400
Subject: [PATCH 15/22] lint

---
 .../test/llmobs/plugins/openai/openaiv4.spec.js    | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/packages/dd-trace/test/llmobs/plugins/openai/openaiv4.spec.js b/packages/dd-trace/test/llmobs/plugins/openai/openaiv4.spec.js
index ebcb074a303..d58cd672147 100644
--- a/packages/dd-trace/test/llmobs/plugins/openai/openaiv4.spec.js
+++ b/packages/dd-trace/test/llmobs/plugins/openai/openaiv4.spec.js
@@ -733,7 +733,12 @@ describe('integrations', () => {
           outputMessages: [
             { role: 'assistant', content: MOCK_STRING }
           ],
-          tokenMetrics: { input_tokens: MOCK_NUMBER, output_tokens: MOCK_NUMBER, total_tokens: MOCK_NUMBER, cache_read_input_tokens: 0 },
+          tokenMetrics: {
+            input_tokens: MOCK_NUMBER,
+            output_tokens: MOCK_NUMBER,
+            total_tokens: MOCK_NUMBER,
+            cache_read_input_tokens: 0
+          },
           modelName: 'gpt-4o-mini',
           modelProvider: 'openai',
           metadata: {
@@ -780,7 +785,12 @@ describe('integrations', () => {
           outputMessages: [
             { role: 'assistant', content: MOCK_STRING }
           ],
-          tokenMetrics: { input_tokens: MOCK_NUMBER, output_tokens: MOCK_NUMBER, total_tokens: MOCK_NUMBER, cache_read_input_tokens: 0 },
+          tokenMetrics: {
+            input_tokens: MOCK_NUMBER,
+            output_tokens: MOCK_NUMBER,
+            total_tokens: MOCK_NUMBER,
+            cache_read_input_tokens: 0
+          },
           modelName: 'gpt-4o-mini',
           modelProvider: 'openai',
           metadata: {

From 8f0ff985417e98ba342cbe8bf47adf63dcc6276e Mon Sep 17 00:00:00 2001
From: Jordan Wong <jordan.wong@datadoghq.com>
Date: Mon, 27 Oct 2025 14:48:17 -0400
Subject: [PATCH 16/22] fix test

---
 packages/dd-trace/src/llmobs/plugins/openai.js | 2 +-
 packages/dd-trace/test/llmobs/tagger.spec.js   | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/packages/dd-trace/src/llmobs/plugins/openai.js b/packages/dd-trace/src/llmobs/plugins/openai.js
index c257343120b..166258379ae 100644
--- a/packages/dd-trace/src/llmobs/plugins/openai.js
+++ b/packages/dd-trace/src/llmobs/plugins/openai.js
@@ -255,7 +255,7 @@ class OpenAiLLMObsPlugin extends LLMObsPlugin {
             toolResults: [{
               toolId: item.call_id,
               result: item.output,
-              name: item.name,
+              name: item.name || '',
               type: item.type
             }]
           })
diff --git a/packages/dd-trace/test/llmobs/tagger.spec.js b/packages/dd-trace/test/llmobs/tagger.spec.js
index dc6be04d4be..2ec800e3d9a 100644
--- a/packages/dd-trace/test/llmobs/tagger.spec.js
+++ b/packages/dd-trace/test/llmobs/tagger.spec.js
@@ -405,14 +405,14 @@ describe('tagger', () => {
       describe('tagging tool results appropriately', () => {
         it('tags a span with tool results', () => {
           const inputData = [
-            { content: 'hello', toolResults: [{ result: 'foo', toolId: '123', type: 'tool_result' }] }
+            { content: 'hello', toolResults: [{ name: '', result: 'foo', toolId: '123', type: 'tool_result'}] }
           ]
 
           tagger._register(span)
           tagger.tagLLMIO(span, inputData)
           expect(Tagger.tagMap.get(span)).to.deep.equal({
             '_ml_obs.meta.input.messages': [
-              { content: 'hello', tool_results: [{ result: 'foo', tool_id: '123', type: 'tool_result' }] }
+              { content: 'hello', tool_results: [{ result: 'foo', tool_id: '123', name: '', type: 'tool_result' }] }
             ]
           })
         })

From 149fe0abdc84cab758dbd22034d6719d04fd5d88 Mon Sep 17 00:00:00 2001
From: Jordan Wong <jordan.wong@datadoghq.com>
Date: Mon, 27 Oct 2025 14:59:16 -0400
Subject: [PATCH 17/22] lint

---
 packages/dd-trace/test/llmobs/tagger.spec.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/dd-trace/test/llmobs/tagger.spec.js b/packages/dd-trace/test/llmobs/tagger.spec.js
index 2ec800e3d9a..58ff1fb79b0 100644
--- a/packages/dd-trace/test/llmobs/tagger.spec.js
+++ b/packages/dd-trace/test/llmobs/tagger.spec.js
@@ -405,7 +405,7 @@ describe('tagger', () => {
       describe('tagging tool results appropriately', () => {
         it('tags a span with tool results', () => {
           const inputData = [
-            { content: 'hello', toolResults: [{ name: '', result: 'foo', toolId: '123', type: 'tool_result'}] }
+            { content: 'hello', toolResults: [{ name: '', result: 'foo', toolId: '123', type: 'tool_result' }] }
           ]
 
           tagger._register(span)

From f05fee88c5934eca5f03c927689b02d80dd27b73 Mon Sep 17 00:00:00 2001
From: Jordan Wong <jordan.wong@datadoghq.com>
Date: Fri, 31 Oct 2025 15:16:01 -0400
Subject: [PATCH 18/22] Update
 packages/datadog-plugin-openai/src/stream-helpers.js

Co-authored-by: Ruben Bridgewater <ruben@bridgewater.de>
---
 packages/datadog-plugin-openai/src/stream-helpers.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/datadog-plugin-openai/src/stream-helpers.js b/packages/datadog-plugin-openai/src/stream-helpers.js
index f254402f604..11947497e22 100644
--- a/packages/datadog-plugin-openai/src/stream-helpers.js
+++ b/packages/datadog-plugin-openai/src/stream-helpers.js
@@ -123,7 +123,7 @@ function constructResponseResponseFromStreamedChunks (chunks) {
   // Find the last chunk with a complete response object (status: done, incomplete, or completed)
   for (let i = chunks.length - 1; i >= 0; i--) {
     const chunk = chunks[i]
-    if (chunk.response && ['done', 'incomplete', 'completed'].includes(chunk.response.status)) {
+    if (chunk.response && reponseStatusSet.has(chunk.response.status)) {
       return chunk.response
     }
   }

From bc3bffa08e4cada78a4dac7b25bb9d9ad2d439f5 Mon Sep 17 00:00:00 2001
From: Jordan Wong <jordan.wong@datadoghq.com>
Date: Fri, 31 Oct 2025 15:16:13 -0400
Subject: [PATCH 19/22] Update packages/dd-trace/src/llmobs/plugins/openai.js

Co-authored-by: Ruben Bridgewater <ruben@bridgewater.de>
---
 packages/dd-trace/src/llmobs/plugins/openai.js | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/packages/dd-trace/src/llmobs/plugins/openai.js b/packages/dd-trace/src/llmobs/plugins/openai.js
index 166258379ae..6ad5c180a42 100644
--- a/packages/dd-trace/src/llmobs/plugins/openai.js
+++ b/packages/dd-trace/src/llmobs/plugins/openai.js
@@ -377,8 +377,8 @@ class OpenAiLLMObsPlugin extends LLMObsPlugin {
     }, {})
 
     // Add fields from response object (convert numbers to floats)
-    if (response.temperature !== undefined) metadata.temperature = Number.parseFloat(response.temperature)
-    if (response.top_p !== undefined) metadata.top_p = Number.parseFloat(response.top_p)
+    if (response.temperature !== undefined) metadata.temperature = Number(response.temperature)
+    if (response.top_p !== undefined) metadata.top_p = Number(response.top_p)
     if (response.tool_choice !== undefined) metadata.tool_choice = response.tool_choice
     if (response.truncation !== undefined) metadata.truncation = response.truncation
     if (response.text !== undefined) metadata.text = response.text

From 172a57d6430643f3a730bd93dd24e85faa7c3d69 Mon Sep 17 00:00:00 2001
From: Jordan Wong <jordan.wong@datadoghq.com>
Date: Fri, 31 Oct 2025 15:51:57 -0400
Subject: [PATCH 20/22] clean up response chunk extracting

---
 packages/datadog-plugin-openai/src/stream-helpers.js | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/packages/datadog-plugin-openai/src/stream-helpers.js b/packages/datadog-plugin-openai/src/stream-helpers.js
index 11947497e22..48ad62c16a6 100644
--- a/packages/datadog-plugin-openai/src/stream-helpers.js
+++ b/packages/datadog-plugin-openai/src/stream-helpers.js
@@ -121,6 +121,9 @@ function constructResponseResponseFromStreamedChunks (chunks) {
   // - response.done/response.incomplete/response.completed: final response with output array and usage
 
   // Find the last chunk with a complete response object (status: done, incomplete, or completed)
+  responseStatusSet = new Set(['done', 'incomplete', 'completed'])
+  
+  const response = chunks.find(chunk => chunk.response && responseStatusSet.has(chunk.response.status))
   for (let i = chunks.length - 1; i >= 0; i--) {
     const chunk = chunks[i]
     if (chunk.response && reponseStatusSet.has(chunk.response.status)) {

From f5bf2b5db594f4b554cddbe9a0c16a25d1214a61 Mon Sep 17 00:00:00 2001
From: Jordan Wong <jordan.wong@datadoghq.com>
Date: Fri, 31 Oct 2025 16:19:44 -0400
Subject: [PATCH 21/22] fix tests

---
 packages/datadog-plugin-openai/src/stream-helpers.js |  7 +++----
 .../test/llmobs/plugins/openai/openaiv4.spec.js      | 12 ++++++------
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/packages/datadog-plugin-openai/src/stream-helpers.js b/packages/datadog-plugin-openai/src/stream-helpers.js
index 48ad62c16a6..b20e4387921 100644
--- a/packages/datadog-plugin-openai/src/stream-helpers.js
+++ b/packages/datadog-plugin-openai/src/stream-helpers.js
@@ -121,12 +121,11 @@ function constructResponseResponseFromStreamedChunks (chunks) {
   // - response.done/response.incomplete/response.completed: final response with output array and usage
 
   // Find the last chunk with a complete response object (status: done, incomplete, or completed)
-  responseStatusSet = new Set(['done', 'incomplete', 'completed'])
-  
-  const response = chunks.find(chunk => chunk.response && responseStatusSet.has(chunk.response.status))
+  const responseStatusSet = new Set(['done', 'incomplete', 'completed'])
+
   for (let i = chunks.length - 1; i >= 0; i--) {
     const chunk = chunks[i]
-    if (chunk.response && reponseStatusSet.has(chunk.response.status)) {
+    if (chunk.response && responseStatusSet.has(chunk.response.status)) {
       return chunk.response
     }
   }
diff --git a/packages/dd-trace/test/llmobs/plugins/openai/openaiv4.spec.js b/packages/dd-trace/test/llmobs/plugins/openai/openaiv4.spec.js
index d58cd672147..9daa4bc51bb 100644
--- a/packages/dd-trace/test/llmobs/plugins/openai/openaiv4.spec.js
+++ b/packages/dd-trace/test/llmobs/plugins/openai/openaiv4.spec.js
@@ -723,7 +723,7 @@ describe('integrations', () => {
         })
 
         const { apmSpans, llmobsSpans } = await getEvents()
-        const expected = expectedLLMObsLLMSpanEvent({
+        assertLlmObsSpanEvent(llmobsSpans[0], {
           span: apmSpans[0],
           spanKind: 'llm',
           name: 'OpenAI.createResponse',
@@ -733,7 +733,7 @@ describe('integrations', () => {
           outputMessages: [
             { role: 'assistant', content: MOCK_STRING }
           ],
-          tokenMetrics: {
+          metrics: {
             input_tokens: MOCK_NUMBER,
             output_tokens: MOCK_NUMBER,
             total_tokens: MOCK_NUMBER,
@@ -751,7 +751,7 @@ describe('integrations', () => {
             reasoning_tokens: 0,
             stream: false
           },
-          tags: { ml_app: 'test', language: 'javascript', integration: 'openai' }
+          tags: { ml_app: 'test', integration: 'openai' }
         })
 
         expect(llmobsSpans[0]).to.deepEqualWithMockValues(expected)
@@ -775,7 +775,7 @@ describe('integrations', () => {
         }
 
         const { apmSpans, llmobsSpans } = await getEvents()
-        const expected = expectedLLMObsLLMSpanEvent({
+        assertLlmObsSpanEvent(llmobsSpans[0], {
           span: apmSpans[0],
           spanKind: 'llm',
           name: 'OpenAI.createResponse',
@@ -785,7 +785,7 @@ describe('integrations', () => {
           outputMessages: [
             { role: 'assistant', content: MOCK_STRING }
           ],
-          tokenMetrics: {
+          metrics: {
             input_tokens: MOCK_NUMBER,
             output_tokens: MOCK_NUMBER,
             total_tokens: MOCK_NUMBER,
@@ -803,7 +803,7 @@ describe('integrations', () => {
             reasoning_tokens: 0,
             stream: true
           },
-          tags: { ml_app: 'test', language: 'javascript', integration: 'openai' }
+          tags: { ml_app: 'test', integration: 'openai' }
         })
 
         expect(llmobsSpans[0]).to.deepEqualWithMockValues(expected)

From 3be49380f7fd78e310365d701641ecd0ef1927d6 Mon Sep 17 00:00:00 2001
From: Jordan Wong <jordan.wong@datadoghq.com>
Date: Fri, 31 Oct 2025 16:31:47 -0400
Subject: [PATCH 22/22] lint remove expect statements

---
 .../dd-trace/test/llmobs/plugins/openai/openaiv4.spec.js    | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/packages/dd-trace/test/llmobs/plugins/openai/openaiv4.spec.js b/packages/dd-trace/test/llmobs/plugins/openai/openaiv4.spec.js
index bc272831dd1..5f8e2f3c92e 100644
--- a/packages/dd-trace/test/llmobs/plugins/openai/openaiv4.spec.js
+++ b/packages/dd-trace/test/llmobs/plugins/openai/openaiv4.spec.js
@@ -684,8 +684,6 @@ describe('integrations', () => {
           },
           tags: { ml_app: 'test', integration: 'openai' }
         })
-
-        expect(llmobsSpans[0]).to.deepEqualWithMockValues(expected)
       })
 
       it('submits a streamed response span', async function () {
@@ -702,7 +700,7 @@ describe('integrations', () => {
         })
 
         for await (const part of stream) {
-          expect(part).to.have.property('type')
+          assert.ok(Object.hasOwn(part, 'type'))
         }
 
         const { apmSpans, llmobsSpans } = await getEvents()
@@ -736,8 +734,6 @@ describe('integrations', () => {
           },
           tags: { ml_app: 'test', integration: 'openai' }
         })
-
-        expect(llmobsSpans[0]).to.deepEqualWithMockValues(expected)
       })
     })
   })