diff --git a/eng/spellchecking_exclusions.dic b/eng/spellchecking_exclusions.dic
index 72596816516..2abdfbd64a2 100644
Binary files a/eng/spellchecking_exclusions.dic and b/eng/spellchecking_exclusions.dic differ
diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/ChatCompletion/ChatResponseExtensions.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/ChatCompletion/ChatResponseExtensions.cs
index 61e4a494d33..01cdb8dc322 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Abstractions/ChatCompletion/ChatResponseExtensions.cs
+++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/ChatCompletion/ChatResponseExtensions.cs
@@ -180,6 +180,55 @@ static async Task<ChatResponse> ToChatResponseAsync(
         }
     }
 
+    /// <summary>Coalesces sequential <see cref="TextContent"/> content elements.</summary>
+    internal static void CoalesceTextContent(List<AIContent> contents)
+    {
+        StringBuilder? coalescedText = null;
+
+        // Iterate through all of the items in the list looking for contiguous items that can be coalesced.
+        int start = 0;
+        while (start < contents.Count - 1)
+        {
+            // We need at least two TextContents in a row to be able to coalesce.
+            if (contents[start] is not TextContent firstText)
+            {
+                start++;
+                continue;
+            }
+
+            if (contents[start + 1] is not TextContent secondText)
+            {
+                start += 2;
+                continue;
+            }
+
+            // Append the text from those nodes and continue appending subsequent TextContents until we run out.
+            // We null out nodes as their text is appended so that we can later remove them all in one O(N) operation.
+            coalescedText ??= new();
+            _ = coalescedText.Clear().Append(firstText.Text).Append(secondText.Text);
+            contents[start + 1] = null!;
+            int i = start + 2;
+            for (; i < contents.Count && contents[i] is TextContent next; i++)
+            {
+                _ = coalescedText.Append(next.Text);
+                contents[i] = null!;
+            }
+
+            // Store the replacement node.
+            contents[start] = new TextContent(coalescedText.ToString())
+            {
+                // We inherit the properties of the first text node. We don't currently propagate additional
+                // properties from the subsequent nodes. If we ever need to, we can add that here.
+                AdditionalProperties = firstText.AdditionalProperties?.Clone(),
+            };
+
+            start = i;
+        }
+
+        // Remove all of the null slots left over from the coalescing process.
+        _ = contents.RemoveAll(u => u is null);
+    }
+
     /// <summary>Finalizes the <paramref name="response"/> object.</summary>
     private static void FinalizeResponse(ChatResponse response)
     {
@@ -296,53 +345,4 @@ private static void ProcessUpdate(ChatResponseUpdate update, ChatResponse respon
             }
         }
     }
-
-    /// <summary>Coalesces sequential <see cref="TextContent"/> content elements.</summary>
-    private static void CoalesceTextContent(List<AIContent> contents)
-    {
-        StringBuilder? coalescedText = null;
-
-        // Iterate through all of the items in the list looking for contiguous items that can be coalesced.
-        int start = 0;
-        while (start < contents.Count - 1)
-        {
-            // We need at least two TextContents in a row to be able to coalesce.
-            if (contents[start] is not TextContent firstText)
-            {
-                start++;
-                continue;
-            }
-
-            if (contents[start + 1] is not TextContent secondText)
-            {
-                start += 2;
-                continue;
-            }
-
-            // Append the text from those nodes and continue appending subsequent TextContents until we run out.
-            // We null out nodes as their text is appended so that we can later remove them all in one O(N) operation.
-            coalescedText ??= new();
-            _ = coalescedText.Clear().Append(firstText.Text).Append(secondText.Text);
-            contents[start + 1] = null!;
-            int i = start + 2;
-            for (; i < contents.Count && contents[i] is TextContent next; i++)
-            {
-                _ = coalescedText.Append(next.Text);
-                contents[i] = null!;
-            }
-
-            // Store the replacement node.
-            contents[start] = new TextContent(coalescedText.ToString())
-            {
-                // We inherit the properties of the first text node. We don't currently propagate additional
-                // properties from the subsequent nodes. If we ever need to, we can add that here.
-                AdditionalProperties = firstText.AdditionalProperties?.Clone(),
-            };
-
-            start = i;
-        }
-
-        // Remove all of the null slots left over from the coalescing process.
-        _ = contents.RemoveAll(u => u is null);
-    }
 }
diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Contents/AIContent.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Contents/AIContent.cs
index 6562b7bcc42..068bd1ce447 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Contents/AIContent.cs
+++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Contents/AIContent.cs
@@ -8,6 +8,7 @@ namespace Microsoft.Extensions.AI;
 /// <summary>Provides a base class for all content used with AI services.</summary>
 [JsonPolymorphic(TypeDiscriminatorPropertyName = "$type")]
 [JsonDerivedType(typeof(DataContent), typeDiscriminator: "data")]
+[JsonDerivedType(typeof(ErrorContent), typeDiscriminator: "error")]
 [JsonDerivedType(typeof(FunctionCallContent), typeDiscriminator: "functionCall")]
 [JsonDerivedType(typeof(FunctionResultContent), typeDiscriminator: "functionResult")]
 [JsonDerivedType(typeof(TextContent), typeDiscriminator: "text")]
diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Contents/ErrorContent.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Contents/ErrorContent.cs
new file mode 100644
index 00000000000..ceca3002f88
--- /dev/null
+++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Contents/ErrorContent.cs
@@ -0,0 +1,48 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics;
+using System.Text.Json.Serialization;
+using Microsoft.Shared.Diagnostics;
+
+namespace Microsoft.Extensions.AI;
+
+/// <summary>Represents an error.</summary>
+/// <remarks>
+/// Typically, <see cref="ErrorContent"/> is used for non-fatal errors, where something went wrong
+/// as part of the operation but the operation was still able to continue.
+/// </remarks>
+[DebuggerDisplay("{DebuggerDisplay,nq}")]
+public class ErrorContent : AIContent
+{
+    /// <summary>The error message.</summary>
+    private string _message;
+
+    /// <summary>Initializes a new instance of the <see cref="ErrorContent"/> class with the specified message.</summary>
+    /// <param name="message">The message to store in this content.</param>
+    [JsonConstructor]
+    public ErrorContent(string message)
+    {
+        _message = Throw.IfNull(message);
+    }
+
+    /// <summary>Gets or sets the error message.</summary>
+    public string Message
+    {
+        get => _message;
+        set => _message = Throw.IfNull(value);
+    }
+
+    /// <summary>Gets or sets the error code.</summary>
+    public string? ErrorCode { get; set; }
+
+    /// <summary>Gets or sets the error details.</summary>
+    public string? Details { get; set; }
+
+    /// <summary>Gets a string representing this instance to display in the debugger.</summary>
+    [DebuggerBrowsable(DebuggerBrowsableState.Never)]
+    private string DebuggerDisplay =>
+        $"Error = {Message}" +
+        (ErrorCode is not null ? $" ({ErrorCode})" : string.Empty) +
+        (Details is not null ? $" - {Details}" : string.Empty);
+}
diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Microsoft.Extensions.AI.Abstractions.csproj b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Microsoft.Extensions.AI.Abstractions.csproj
index da24217861e..27a2c5d0513 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Microsoft.Extensions.AI.Abstractions.csproj
+++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Microsoft.Extensions.AI.Abstractions.csproj
@@ -16,16 +16,18 @@
   <PropertyGroup>
     <TargetFrameworks>$(TargetFrameworks);netstandard2.0</TargetFrameworks>
     <NoWarn>$(NoWarn);CA2227;CA1034;SA1316;S3253</NoWarn>
+    <NoWarn>$(NoWarn);MEAI001</NoWarn>
     <TreatWarningsAsErrors>true</TreatWarningsAsErrors>
     <DisableNETStandardCompatErrors>true</DisableNETStandardCompatErrors>
   </PropertyGroup>
 
   <PropertyGroup>
+    <InjectExperimentalAttributeOnLegacy>true</InjectExperimentalAttributeOnLegacy>
     <InjectJsonSchemaExporterOnLegacy>true</InjectJsonSchemaExporterOnLegacy>
+    <InjectRequiredMemberOnLegacy>true</InjectRequiredMemberOnLegacy>
     <InjectSharedEmptyCollections>true</InjectSharedEmptyCollections>
     <InjectStringHashOnLegacy>true</InjectStringHashOnLegacy>
     <InjectStringSyntaxAttributeOnLegacy>true</InjectStringSyntaxAttributeOnLegacy>
-    <InjectRequiredMemberOnLegacy>true</InjectRequiredMemberOnLegacy>
   </PropertyGroup>
 
   <ItemGroup Condition="'$(TargetFrameworkIdentifier)' != '.NETCoreApp'">
diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/DelegatingSpeechToTextClient.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/DelegatingSpeechToTextClient.cs
new file mode 100644
index 00000000000..6cbe2392e4c
--- /dev/null
+++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/DelegatingSpeechToTextClient.cs
@@ -0,0 +1,77 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.Diagnostics.CodeAnalysis;
+using System.IO;
+using System.Threading;
+using System.Threading.Tasks;
+using Microsoft.Shared.Diagnostics;
+
+namespace Microsoft.Extensions.AI;
+
+/// <summary>
+/// Provides an optional base class for an <see cref="ISpeechToTextClient"/> that passes through calls to another instance.
+/// </summary>
+/// <remarks>
+/// This is recommended as a base type when building clients that can be chained in any order around an underlying <see cref="ISpeechToTextClient"/>.
+/// The default implementation simply passes each call to the inner client instance.
+/// </remarks>
+[Experimental("MEAI001")]
+public class DelegatingSpeechToTextClient : ISpeechToTextClient
+{
+    /// <summary>
+    /// Initializes a new instance of the <see cref="DelegatingSpeechToTextClient"/> class.
+    /// </summary>
+    /// <param name="innerClient">The wrapped client instance.</param>
+    protected DelegatingSpeechToTextClient(ISpeechToTextClient innerClient)
+    {
+        InnerClient = Throw.IfNull(innerClient);
+    }
+
+    /// <inheritdoc />
+    public void Dispose()
+    {
+        Dispose(disposing: true);
+        GC.SuppressFinalize(this);
+    }
+
+    /// <summary>Gets the inner <see cref="ISpeechToTextClient" />.</summary>
+    protected ISpeechToTextClient InnerClient { get; }
+
+    /// <inheritdoc />
+    public virtual Task<SpeechToTextResponse> GetTextAsync(
+        Stream audioSpeechStream, SpeechToTextOptions? options = null, CancellationToken cancellationToken = default)
+    {
+        return InnerClient.GetTextAsync(audioSpeechStream, options, cancellationToken);
+    }
+
+    /// <inheritdoc />
+    public virtual IAsyncEnumerable<SpeechToTextResponseUpdate> GetStreamingTextAsync(
+        Stream audioSpeechStream, SpeechToTextOptions? options = null, CancellationToken cancellationToken = default)
+    {
+        return InnerClient.GetStreamingTextAsync(audioSpeechStream, options, cancellationToken);
+    }
+
+    /// <inheritdoc />
+    public virtual object? GetService(Type serviceType, object? serviceKey = null)
+    {
+        _ = Throw.IfNull(serviceType);
+
+        // If the key is non-null, we don't know what it means so pass through to the inner service.
+        return
+            serviceKey is null && serviceType.IsInstanceOfType(this) ? this :
+            InnerClient.GetService(serviceType, serviceKey);
+    }
+
+    /// <summary>Provides a mechanism for releasing unmanaged resources.</summary>
+    /// <param name="disposing"><see langword="true"/> if being called from <see cref="Dispose()"/>; otherwise, <see langword="false"/>.</param>
+    protected virtual void Dispose(bool disposing)
+    {
+        if (disposing)
+        {
+            InnerClient.Dispose();
+        }
+    }
+}
diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/ISpeechToTextClient.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/ISpeechToTextClient.cs
new file mode 100644
index 00000000000..65458d6602c
--- /dev/null
+++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/ISpeechToTextClient.cs
@@ -0,0 +1,61 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.Diagnostics.CodeAnalysis;
+using System.IO;
+using System.Threading;
+using System.Threading.Tasks;
+
+namespace Microsoft.Extensions.AI;
+
+/// <summary>Represents a speech to text client.</summary>
+/// <remarks>
+/// <para>
+/// Unless otherwise specified, all members of <see cref="ISpeechToTextClient"/> are thread-safe for concurrent use.
+/// It is expected that all implementations of <see cref="ISpeechToTextClient"/> support being used by multiple requests concurrently.
+/// </para>
+/// <para>
+/// However, implementations of <see cref="ISpeechToTextClient"/> might mutate the arguments supplied to <see cref="GetTextAsync"/> and
+/// <see cref="GetStreamingTextAsync"/>, such as by configuring the options instance. Thus, consumers of the interface either should avoid
+/// using shared instances of these arguments for concurrent invocations or should otherwise ensure by construction that no
+/// <see cref="ISpeechToTextClient"/> instances are used which might employ such mutation. For example, the ConfigureOptions method be
+/// provided with a callback that could mutate the supplied options argument, and that should be avoided if using a singleton options instance.
+/// The audio speech stream passed to these methods will not be closed or disposed by the implementation.
+/// </para>
+/// </remarks>
+[Experimental("MEAI001")]
+public interface ISpeechToTextClient : IDisposable
+{
+    /// <summary>Sends audio speech content to the model and returns the generated text.</summary>
+    /// <param name="audioSpeechStream">The audio speech stream to send.</param>
+    /// <param name="options">The speech to text options to configure the request.</param>
+    /// <param name="cancellationToken">The <see cref="CancellationToken"/> to monitor for cancellation requests. The default is <see cref="CancellationToken.None"/>.</param>
+    /// <returns>The text generated.</returns>
+    Task<SpeechToTextResponse> GetTextAsync(
+        Stream audioSpeechStream,
+        SpeechToTextOptions? options = null,
+        CancellationToken cancellationToken = default);
+
+    /// <summary>Sends audio speech content to the model and streams back the generated text.</summary>
+    /// <param name="audioSpeechStream">The audio speech stream to send.</param>
+    /// <param name="options">The speech to text options to configure the request.</param>
+    /// <param name="cancellationToken">The <see cref="CancellationToken"/> to monitor for cancellation requests. The default is <see cref="CancellationToken.None"/>.</param>
+    /// <returns>The text updates representing the streamed output.</returns>
+    IAsyncEnumerable<SpeechToTextResponseUpdate> GetStreamingTextAsync(
+        Stream audioSpeechStream,
+        SpeechToTextOptions? options = null,
+        CancellationToken cancellationToken = default);
+
+    /// <summary>Asks the <see cref="ISpeechToTextClient"/> for an object of the specified type <paramref name="serviceType"/>.</summary>
+    /// <param name="serviceType">The type of object being requested.</param>
+    /// <param name="serviceKey">An optional key that can be used to help identify the target service.</param>
+    /// <returns>The found object, otherwise <see langword="null"/>.</returns>
+    /// <exception cref="ArgumentNullException"><paramref name="serviceType"/> is <see langword="null"/>.</exception>
+    /// <remarks>
+    /// The purpose of this method is to allow for the retrieval of strongly typed services that might be provided by the <see cref="ISpeechToTextClient"/>,
+    /// including itself or any services it might be wrapping.
+    /// </remarks>
+    object? GetService(Type serviceType, object? serviceKey = null);
+}
diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/SpeechToTextClientExtensions.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/SpeechToTextClientExtensions.cs
new file mode 100644
index 00000000000..d8ca62f34ea
--- /dev/null
+++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/SpeechToTextClientExtensions.cs
@@ -0,0 +1,77 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Collections.Generic;
+using System.Diagnostics.CodeAnalysis;
+using System.IO;
+using System.Runtime.InteropServices;
+using System.Threading;
+using System.Threading.Tasks;
+using Microsoft.Shared.Diagnostics;
+
+namespace Microsoft.Extensions.AI;
+
+/// <summary>Extensions for <see cref="ISpeechToTextClient"/>.</summary>
+[Experimental("MEAI001")]
+public static class SpeechToTextClientExtensions
+{
+    /// <summary>Asks the <see cref="ISpeechToTextClient"/> for an object of type <typeparamref name="TService"/>.</summary>
+    /// <typeparam name="TService">The type of the object to be retrieved.</typeparam>
+    /// <param name="client">The client.</param>
+    /// <param name="serviceKey">An optional key that can be used to help identify the target service.</param>
+    /// <returns>The found object, otherwise <see langword="null"/>.</returns>
+    /// <remarks>
+    /// The purpose of this method is to allow for the retrieval of strongly typed services that may be provided by the <see cref="ISpeechToTextClient"/>,
+    /// including itself or any services it might be wrapping.
+    /// </remarks>
+    public static TService? GetService<TService>(this ISpeechToTextClient client, object? serviceKey = null)
+    {
+        _ = Throw.IfNull(client);
+
+        return (TService?)client.GetService(typeof(TService), serviceKey);
+    }
+
+    /// <summary>Generates text from speech providing a single audio speech <see cref="DataContent"/>.</summary>
+    /// <param name="client">The client.</param>
+    /// <param name="audioSpeechContent">The single audio speech content.</param>
+    /// <param name="options">The speech to text options to configure the request.</param>
+    /// <param name="cancellationToken">The <see cref="CancellationToken"/> to monitor for cancellation requests. The default is <see cref="CancellationToken.None"/>.</param>
+    /// <returns>The text generated by the client.</returns>
+    public static Task<SpeechToTextResponse> GetTextAsync(
+        this ISpeechToTextClient client,
+        DataContent audioSpeechContent,
+        SpeechToTextOptions? options = null,
+        CancellationToken cancellationToken = default)
+    {
+        _ = Throw.IfNull(client);
+        _ = Throw.IfNull(audioSpeechContent);
+
+        var audioSpeechStream = MemoryMarshal.TryGetArray(audioSpeechContent.Data, out var array) ?
+            new MemoryStream(array.Array!, array.Offset, array.Count) :
+            new MemoryStream(audioSpeechContent.Data.ToArray());
+
+        return client.GetTextAsync(audioSpeechStream, options, cancellationToken);
+    }
+
+    /// <summary>Generates text from speech providing a single audio speech <see cref="DataContent"/>.</summary>
+    /// <param name="client">The client.</param>
+    /// <param name="audioSpeechContent">The single audio speech content.</param>
+    /// <param name="options">The speech to text options to configure the request.</param>
+    /// <param name="cancellationToken">The <see cref="CancellationToken"/> to monitor for cancellation requests. The default is <see cref="CancellationToken.None"/>.</param>
+    /// <returns>The text generated by the client.</returns>
+    public static IAsyncEnumerable<SpeechToTextResponseUpdate> GetStreamingTextAsync(
+        this ISpeechToTextClient client,
+        DataContent audioSpeechContent,
+        SpeechToTextOptions? options = null,
+        CancellationToken cancellationToken = default)
+    {
+        _ = Throw.IfNull(client);
+        _ = Throw.IfNull(audioSpeechContent);
+
+        var audioSpeechStream = MemoryMarshal.TryGetArray(audioSpeechContent.Data, out var array) ?
+            new MemoryStream(array.Array!, array.Offset, array.Count) :
+            new MemoryStream(audioSpeechContent.Data.ToArray());
+
+        return client.GetStreamingTextAsync(audioSpeechStream, options, cancellationToken);
+    }
+}
diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/SpeechToTextClientMetadata.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/SpeechToTextClientMetadata.cs
new file mode 100644
index 00000000000..df39fb7facc
--- /dev/null
+++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/SpeechToTextClientMetadata.cs
@@ -0,0 +1,43 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Diagnostics.CodeAnalysis;
+
+namespace Microsoft.Extensions.AI;
+
+/// <summary>Provides metadata about an <see cref="ISpeechToTextClient"/>.</summary>
+[Experimental("MEAI001")]
+public class SpeechToTextClientMetadata
+{
+    /// <summary>Initializes a new instance of the <see cref="SpeechToTextClientMetadata"/> class.</summary>
+    /// <param name="providerName">
+    /// The name of the speech to text  provider, if applicable. Where possible, this should map to the
+    /// appropriate name defined in the OpenTelemetry Semantic Conventions for Generative AI systems.
+    /// </param>
+    /// <param name="providerUri">The URL for accessing the speech to text  provider, if applicable.</param>
+    /// <param name="defaultModelId">The ID of the speech to text used by default, if applicable.</param>
+    public SpeechToTextClientMetadata(string? providerName = null, Uri? providerUri = null, string? defaultModelId = null)
+    {
+        DefaultModelId = defaultModelId;
+        ProviderName = providerName;
+        ProviderUri = providerUri;
+    }
+
+    /// <summary>Gets the name of the speech to text provider.</summary>
+    /// <remarks>
+    /// Where possible, this maps to the appropriate name defined in the
+    /// OpenTelemetry Semantic Conventions for Generative AI systems.
+    /// </remarks>
+    public string? ProviderName { get; }
+
+    /// <summary>Gets the URL for accessing the speech to text provider.</summary>
+    public Uri? ProviderUri { get; }
+
+    /// <summary>Gets the ID of the default model used by this speech to text client.</summary>
+    /// <remarks>
+    /// This value can be null if either the name is unknown or there are multiple possible models associated with this instance.
+    /// An individual request may override this value via <see cref="SpeechToTextOptions.ModelId"/>.
+    /// </remarks>
+    public string? DefaultModelId { get; }
+}
diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/SpeechToTextOptions.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/SpeechToTextOptions.cs
new file mode 100644
index 00000000000..cb196a4c91c
--- /dev/null
+++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/SpeechToTextOptions.cs
@@ -0,0 +1,42 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics.CodeAnalysis;
+
+namespace Microsoft.Extensions.AI;
+
+/// <summary>Represents the options for an speech to text request.</summary>
+[Experimental("MEAI001")]
+public class SpeechToTextOptions
+{
+    /// <summary>Gets or sets the model ID for the speech to text.</summary>
+    public string? ModelId { get; set; }
+
+    /// <summary>Gets or sets the language of source speech.</summary>
+    public string? SpeechLanguage { get; set; }
+
+    /// <summary>Gets or sets the language for the target generated text.</summary>
+    public string? TextLanguage { get; set; }
+
+    /// <summary>Gets or sets the sample rate of the speech input audio.</summary>
+    public int? SpeechSampleRate { get; set; }
+
+    /// <summary>Gets or sets any additional properties associated with the options.</summary>
+    public AdditionalPropertiesDictionary? AdditionalProperties { get; set; }
+
+    /// <summary>Produces a clone of the current <see cref="SpeechToTextOptions"/> instance.</summary>
+    /// <returns>A clone of the current <see cref="SpeechToTextOptions"/> instance.</returns>
+    public virtual SpeechToTextOptions Clone()
+    {
+        SpeechToTextOptions options = new()
+        {
+            ModelId = ModelId,
+            SpeechLanguage = SpeechLanguage,
+            TextLanguage = TextLanguage,
+            SpeechSampleRate = SpeechSampleRate,
+            AdditionalProperties = AdditionalProperties?.Clone(),
+        };
+
+        return options;
+    }
+}
diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/SpeechToTextResponse.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/SpeechToTextResponse.cs
new file mode 100644
index 00000000000..24fa20a11ed
--- /dev/null
+++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/SpeechToTextResponse.cs
@@ -0,0 +1,101 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.Diagnostics.CodeAnalysis;
+using System.Text.Json.Serialization;
+using Microsoft.Shared.Diagnostics;
+
+#pragma warning disable EA0011 // Consider removing unnecessary conditional access operators
+
+namespace Microsoft.Extensions.AI;
+
+/// <summary>Represents the result of an speech to text request.</summary>
+[Experimental("MEAI001")]
+public class SpeechToTextResponse
+{
+    /// <summary>The content items in the generated text response.</summary>
+    private IList<AIContent>? _contents;
+
+    /// <summary>Initializes a new instance of the <see cref="SpeechToTextResponse"/> class.</summary>
+    [JsonConstructor]
+    public SpeechToTextResponse()
+    {
+    }
+
+    /// <summary>Initializes a new instance of the <see cref="SpeechToTextResponse"/> class.</summary>
+    /// <param name="contents">The contents for this response.</param>
+    public SpeechToTextResponse(IList<AIContent> contents)
+    {
+        _contents = Throw.IfNull(contents);
+    }
+
+    /// <summary>Initializes a new instance of the <see cref="SpeechToTextResponse"/> class.</summary>
+    /// <param name="content">Content of the response.</param>
+    public SpeechToTextResponse(string? content)
+        : this(content is null ? [] : [new TextContent(content)])
+    {
+    }
+
+    /// <summary>Gets or sets the start time of the text segment in relation to the full audio speech length.</summary>
+    public TimeSpan? StartTime { get; set; }
+
+    /// <summary>Gets or sets the end time of the text segment in relation to the full audio speech length.</summary>
+    public TimeSpan? EndTime { get; set; }
+
+    /// <summary>Gets or sets the ID of the speech to text response.</summary>
+    public string? ResponseId { get; set; }
+
+    /// <summary>Gets or sets the model ID used in the creation of the speech to text completion.</summary>
+    public string? ModelId { get; set; }
+
+    /// <summary>Gets or sets the raw representation of the speech to text completion from an underlying implementation.</summary>
+    /// <remarks>
+    /// If a <see cref="SpeechToTextResponse"/> is created to represent some underlying object from another object
+    /// model, this property can be used to store that original object. This can be useful for debugging or
+    /// for enabling a consumer to access the underlying object model if needed.
+    /// </remarks>
+    [JsonIgnore]
+    public object? RawRepresentation { get; set; }
+
+    /// <summary>Gets or sets any additional properties associated with the speech to text completion.</summary>
+    public AdditionalPropertiesDictionary? AdditionalProperties { get; set; }
+
+    /// <summary>Gets the text of this speech to text response.</summary>
+    /// <remarks>
+    /// This property concatenates the text of all <see cref="TextContent"/> objects in <see cref="Contents"/>.
+    /// </remarks>
+    [JsonIgnore]
+    public string Text => _contents?.ConcatText() ?? string.Empty;
+
+    /// <inheritdoc />
+    public override string ToString() => Text;
+
+    /// <summary>Creates an array of <see cref="SpeechToTextResponseUpdate" /> instances that represent this <see cref="SpeechToTextResponse" />.</summary>
+    /// <returns>An array of <see cref="SpeechToTextResponseUpdate" /> instances that may be used to represent this <see cref="SpeechToTextResponse" />.</returns>
+    public SpeechToTextResponseUpdate[] ToSpeechToTextResponseUpdates()
+    {
+        SpeechToTextResponseUpdate update = new SpeechToTextResponseUpdate
+        {
+            Contents = Contents,
+            AdditionalProperties = AdditionalProperties,
+            RawRepresentation = RawRepresentation,
+            StartTime = StartTime,
+            EndTime = EndTime,
+            Kind = SpeechToTextResponseUpdateKind.TextUpdated,
+            ResponseId = ResponseId,
+            ModelId = ModelId,
+        };
+
+        return [update];
+    }
+
+    /// <summary>Gets or sets the generated content items.</summary>
+    [AllowNull]
+    public IList<AIContent> Contents
+    {
+        get => _contents ??= [];
+        set => _contents = value;
+    }
+}
diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/SpeechToTextResponseUpdate.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/SpeechToTextResponseUpdate.cs
new file mode 100644
index 00000000000..24b7f079302
--- /dev/null
+++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/SpeechToTextResponseUpdate.cs
@@ -0,0 +1,102 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.Diagnostics.CodeAnalysis;
+using System.Text.Json.Serialization;
+using Microsoft.Shared.Diagnostics;
+
+#pragma warning disable EA0011 // Consider removing unnecessary conditional access operators
+
+namespace Microsoft.Extensions.AI;
+
+/// <summary>
+/// Represents a single streaming response chunk from an <see cref="ISpeechToTextClient"/>.
+/// </summary>
+/// <remarks>
+/// <para><see cref="SpeechToTextResponseUpdate"/> is so named because it represents streaming updates
+/// to an speech to text generation. As such, it is considered erroneous for multiple updates that are part
+/// of the same audio speech to contain competing values. For example, some updates that are part of
+/// the same audio speech may have a <see langword="null"/> value, and others may have a non-<see langword="null"/> value,
+/// but all of those with a non-<see langword="null"/> value must have the same value (e.g. <see cref="SpeechToTextResponseUpdate.ResponseId"/>).
+/// </para>
+/// <para>
+/// The relationship between <see cref="SpeechToTextResponse"/> and <see cref="SpeechToTextResponseUpdate"/> is
+/// codified in the <see cref="SpeechToTextResponseUpdateExtensions.ToSpeechToTextResponseAsync"/> and
+/// <see cref="SpeechToTextResponse.ToSpeechToTextResponseUpdates"/>, which enable bidirectional conversions
+/// between the two. Note, however, that the conversion may be slightly lossy, for example if multiple updates
+/// all have different <see cref="SpeechToTextResponseUpdate.RawRepresentation"/> objects whereas there's
+/// only one slot for such an object available in <see cref="SpeechToTextResponse.RawRepresentation"/>.
+/// </para>
+/// </remarks>
+[Experimental("MEAI001")]
+public class SpeechToTextResponseUpdate
+{
+    private IList<AIContent>? _contents;
+
+    /// <summary>Initializes a new instance of the <see cref="SpeechToTextResponseUpdate"/> class.</summary>
+    [JsonConstructor]
+    public SpeechToTextResponseUpdate()
+    {
+    }
+
+    /// <summary>Initializes a new instance of the <see cref="SpeechToTextResponseUpdate"/> class.</summary>
+    /// <param name="contents">The contents for this message.</param>
+    public SpeechToTextResponseUpdate(IList<AIContent> contents)
+    {
+        _contents = Throw.IfNull(contents);
+    }
+
+    /// <summary>Initializes a new instance of the <see cref="SpeechToTextResponseUpdate"/> class.</summary>
+    /// <param name="content">Content of the message.</param>
+    public SpeechToTextResponseUpdate(string? content)
+        : this(content is null ? [] : [new TextContent(content)])
+    {
+    }
+
+    /// <summary>Gets or sets the kind of the generated text update.</summary>
+    public SpeechToTextResponseUpdateKind Kind { get; set; } = SpeechToTextResponseUpdateKind.TextUpdating;
+
+    /// <summary>Gets or sets the ID of the generated text response of which this update is a part.</summary>
+    public string? ResponseId { get; set; }
+
+    /// <summary>Gets or sets the start time of the text segment associated with this update in relation to the full audio speech length.</summary>
+    public TimeSpan? StartTime { get; set; }
+
+    /// <summary>Gets or sets the end time of the text segment associated with this update in relation to the full audio speech length.</summary>
+    public TimeSpan? EndTime { get; set; }
+
+    /// <summary>Gets or sets the model ID using in the creation of the speech to text of which this update is a part.</summary>
+    public string? ModelId { get; set; }
+
+    /// <summary>Gets or sets the raw representation of the generated text update from an underlying implementation.</summary>
+    /// <remarks>
+    /// If a <see cref="SpeechToTextResponseUpdate"/> is created to represent some underlying object from another object
+    /// model, this property can be used to store that original object. This can be useful for debugging or
+    /// for enabling a consumer to access the underlying object model if needed.
+    /// </remarks>
+    [JsonIgnore]
+    public object? RawRepresentation { get; set; }
+
+    /// <summary>Gets or sets additional properties for the update.</summary>
+    public AdditionalPropertiesDictionary? AdditionalProperties { get; set; }
+
+    /// <summary>Gets the text of this speech to text response.</summary>
+    /// <remarks>
+    /// This property concatenates the text of all <see cref="TextContent"/> objects in <see cref="Contents"/>.
+    /// </remarks>
+    [JsonIgnore]
+    public string Text => _contents?.ConcatText() ?? string.Empty;
+
+    /// <summary>Gets or sets the generated content items.</summary>
+    [AllowNull]
+    public IList<AIContent> Contents
+    {
+        get => _contents ??= [];
+        set => _contents = value;
+    }
+
+    /// <inheritdoc/>
+    public override string ToString() => Text;
+}
diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/SpeechToTextResponseUpdateExtensions.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/SpeechToTextResponseUpdateExtensions.cs
new file mode 100644
index 00000000000..230ec838ba3
--- /dev/null
+++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/SpeechToTextResponseUpdateExtensions.cs
@@ -0,0 +1,145 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.Diagnostics.CodeAnalysis;
+using System.Threading;
+using System.Threading.Tasks;
+using Microsoft.Shared.Diagnostics;
+
+namespace Microsoft.Extensions.AI;
+
+/// <summary>
+/// Provides extension methods for working with <see cref="SpeechToTextResponseUpdate"/> instances.
+/// </summary>
+[Experimental("MEAI001")]
+public static class SpeechToTextResponseUpdateExtensions
+{
+    /// <summary>Combines <see cref="SpeechToTextResponseUpdate"/> instances into a single <see cref="SpeechToTextResponse"/>.</summary>
+    /// <param name="updates">The updates to be combined.</param>
+    /// <returns>The combined <see cref="SpeechToTextResponse"/>.</returns>
+    public static SpeechToTextResponse ToSpeechToTextResponse(
+        this IEnumerable<SpeechToTextResponseUpdate> updates)
+    {
+        _ = Throw.IfNull(updates);
+
+        SpeechToTextResponse response = new();
+        List<AIContent> contents = [];
+        string? responseId = null;
+        string? modelId = null;
+        AdditionalPropertiesDictionary? additionalProperties = null;
+
+        TimeSpan? endTime = null;
+        foreach (var update in updates)
+        {
+            // Track the first start time provided by the updates
+            response.StartTime ??= update.StartTime;
+
+            // Track the last end time provided by the updates
+            if (update.EndTime is not null)
+            {
+                endTime = update.EndTime;
+            }
+
+            ProcessUpdate(update, contents, ref responseId, ref modelId, ref additionalProperties);
+        }
+
+        ChatResponseExtensions.CoalesceTextContent(contents);
+        response.EndTime = endTime;
+        response.Contents = contents;
+        response.ResponseId = responseId;
+        response.ModelId = modelId;
+        response.AdditionalProperties = additionalProperties;
+
+        return response;
+    }
+
+    /// <summary>Combines <see cref="SpeechToTextResponseUpdate"/> instances into a single <see cref="SpeechToTextResponse"/>.</summary>
+    /// <param name="updates">The updates to be combined.</param>
+    /// <param name="cancellationToken">The <see cref="CancellationToken"/> to monitor for cancellation requests. The default is <see cref="CancellationToken.None"/>.</param>
+    /// <returns>The combined <see cref="SpeechToTextResponse"/>.</returns>
+    public static Task<SpeechToTextResponse> ToSpeechToTextResponseAsync(
+        this IAsyncEnumerable<SpeechToTextResponseUpdate> updates, CancellationToken cancellationToken = default)
+    {
+        _ = Throw.IfNull(updates);
+
+        return ToResponseAsync(updates, cancellationToken);
+
+        static async Task<SpeechToTextResponse> ToResponseAsync(
+            IAsyncEnumerable<SpeechToTextResponseUpdate> updates, CancellationToken cancellationToken)
+        {
+            SpeechToTextResponse response = new();
+            List<AIContent> contents = [];
+            string? responseId = null;
+            string? modelId = null;
+            AdditionalPropertiesDictionary? additionalProperties = null;
+
+            TimeSpan? endTime = null;
+            await foreach (var update in updates.WithCancellation(cancellationToken).ConfigureAwait(false))
+            {
+                // Track the first start time provided by the updates
+                response.StartTime ??= update.StartTime;
+
+                // Track the last end time provided by the updates
+                if (update.EndTime is not null)
+                {
+                    endTime = update.EndTime;
+                }
+
+                ProcessUpdate(update, contents, ref responseId, ref modelId, ref additionalProperties);
+            }
+
+            ChatResponseExtensions.CoalesceTextContent(contents);
+
+            response.EndTime = endTime;
+            response.Contents = contents;
+            response.ResponseId = responseId;
+            response.ModelId = modelId;
+            response.AdditionalProperties = additionalProperties;
+
+            return response;
+        }
+    }
+
+    /// <summary>Processes the <see cref="SpeechToTextResponseUpdate"/>, incorporating its contents and properties.</summary>
+    /// <param name="update">The update to process.</param>
+    /// <param name="contents">The list of content items being accumulated.</param>
+    /// <param name="responseId">The response ID to update if the update has one.</param>
+    /// <param name="modelId">The model ID to update if the update has one.</param>
+    /// <param name="additionalProperties">The additional properties to update if the update has any.</param>
+    private static void ProcessUpdate(
+        SpeechToTextResponseUpdate update,
+        List<AIContent> contents,
+        ref string? responseId,
+        ref string? modelId,
+        ref AdditionalPropertiesDictionary? additionalProperties)
+    {
+        if (update.ResponseId is not null)
+        {
+            responseId = update.ResponseId;
+        }
+
+        if (update.ModelId is not null)
+        {
+            modelId = update.ModelId;
+        }
+
+        contents.AddRange(update.Contents);
+
+        if (update.AdditionalProperties is not null)
+        {
+            if (additionalProperties is null)
+            {
+                additionalProperties = new(update.AdditionalProperties);
+            }
+            else
+            {
+                foreach (var entry in update.AdditionalProperties)
+                {
+                    additionalProperties[entry.Key] = entry.Value;
+                }
+            }
+        }
+    }
+}
diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/SpeechToTextResponseUpdateKind.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/SpeechToTextResponseUpdateKind.cs
new file mode 100644
index 00000000000..1a3d7b0a474
--- /dev/null
+++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/SpeechToText/SpeechToTextResponseUpdateKind.cs
@@ -0,0 +1,104 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.ComponentModel;
+using System.Diagnostics.CodeAnalysis;
+using System.Text.Json;
+using System.Text.Json.Serialization;
+using Microsoft.Shared.Diagnostics;
+
+namespace Microsoft.Extensions.AI;
+
+/// <summary>
+/// Describes the intended purpose of a specific update during streaming of speech to text updates.
+/// </summary>
+[Experimental("MEAI001")]
+[JsonConverter(typeof(Converter))]
+public readonly struct SpeechToTextResponseUpdateKind : IEquatable<SpeechToTextResponseUpdateKind>
+{
+    /// <summary>Gets when the generated text session is opened.</summary>
+    public static SpeechToTextResponseUpdateKind SessionOpen { get; } = new("sessionopen");
+
+    /// <summary>Gets when a non-blocking error occurs during speech to text updates.</summary>
+    public static SpeechToTextResponseUpdateKind Error { get; } = new("error");
+
+    /// <summary>Gets when the text update is in progress, without waiting for silence.</summary>
+    public static SpeechToTextResponseUpdateKind TextUpdating { get; } = new("textupdating");
+
+    /// <summary>Gets when the text was generated after small period of silence.</summary>
+    public static SpeechToTextResponseUpdateKind TextUpdated { get; } = new("textupdated");
+
+    /// <summary>Gets when the generated text session is closed.</summary>
+    public static SpeechToTextResponseUpdateKind SessionClose { get; } = new("sessionclose");
+
+    /// <summary>
+    /// Gets the value associated with this <see cref="SpeechToTextResponseUpdateKind"/>.
+    /// </summary>
+    /// <remarks>
+    /// The value will be serialized into the "kind" message field of the speech to text update format.
+    /// </remarks>
+    public string Value { get; }
+
+    /// <summary>
+    /// Initializes a new instance of the <see cref="SpeechToTextResponseUpdateKind"/> struct with the provided value.
+    /// </summary>
+    /// <param name="value">The value to associate with this <see cref="SpeechToTextResponseUpdateKind"/>.</param>
+    [JsonConstructor]
+    public SpeechToTextResponseUpdateKind(string value)
+    {
+        Value = Throw.IfNullOrWhitespace(value);
+    }
+
+    /// <summary>
+    /// Returns a value indicating whether two <see cref="SpeechToTextResponseUpdateKind"/> instances are equivalent, as determined by a
+    /// case-insensitive comparison of their values.
+    /// </summary>
+    /// <param name="left">The first <see cref="SpeechToTextResponseUpdateKind"/> instance to compare.</param>
+    /// <param name="right">The second <see cref="SpeechToTextResponseUpdateKind"/> instance to compare.</param>
+    /// <returns><see langword="true"/> if left and right are both null or have equivalent values; otherwise, <see langword="false"/>.</returns>
+    public static bool operator ==(SpeechToTextResponseUpdateKind left, SpeechToTextResponseUpdateKind right)
+    {
+        return left.Equals(right);
+    }
+
+    /// <summary>
+    /// Returns a value indicating whether two <see cref="SpeechToTextResponseUpdateKind"/> instances are not equivalent, as determined by a
+    /// case-insensitive comparison of their values.
+    /// </summary>
+    /// <param name="left">The first <see cref="SpeechToTextResponseUpdateKind"/> instance to compare. </param>
+    /// <param name="right">The second <see cref="SpeechToTextResponseUpdateKind"/> instance to compare. </param>
+    /// <returns><see langword="true"/> if left and right have different values; <see langword="false"/> if they have equivalent values or are both null.</returns>
+    public static bool operator !=(SpeechToTextResponseUpdateKind left, SpeechToTextResponseUpdateKind right)
+    {
+        return !(left == right);
+    }
+
+    /// <inheritdoc/>
+    public override bool Equals([NotNullWhen(true)] object? obj)
+        => obj is SpeechToTextResponseUpdateKind otherRole && Equals(otherRole);
+
+    /// <inheritdoc/>
+    public bool Equals(SpeechToTextResponseUpdateKind other)
+        => string.Equals(Value, other.Value, StringComparison.OrdinalIgnoreCase);
+
+    /// <inheritdoc/>
+    public override int GetHashCode()
+        => StringComparer.OrdinalIgnoreCase.GetHashCode(Value);
+
+    /// <inheritdoc/>
+    public override string ToString() => Value;
+
+    /// <summary>Provides a <see cref="JsonConverter{T}"/> for serializing <see cref="SpeechToTextResponseUpdateKind"/> instances.</summary>
+    [EditorBrowsable(EditorBrowsableState.Never)]
+    public sealed class Converter : JsonConverter<SpeechToTextResponseUpdateKind>
+    {
+        /// <inheritdoc />
+        public override SpeechToTextResponseUpdateKind Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) =>
+            new(reader.GetString()!);
+
+        /// <inheritdoc />
+        public override void Write(Utf8JsonWriter writer, SpeechToTextResponseUpdateKind value, JsonSerializerOptions options)
+            => Throw.IfNull(writer).WriteStringValue(value.Value);
+    }
+}
diff --git a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Utilities/AIJsonUtilities.Defaults.cs b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Utilities/AIJsonUtilities.Defaults.cs
index c85d7791cb6..67ddfcbc8d7 100644
--- a/src/Libraries/Microsoft.Extensions.AI.Abstractions/Utilities/AIJsonUtilities.Defaults.cs
+++ b/src/Libraries/Microsoft.Extensions.AI.Abstractions/Utilities/AIJsonUtilities.Defaults.cs
@@ -77,6 +77,11 @@ private static JsonSerializerOptions CreateDefaultOptions()
         UseStringEnumConverter = true,
         DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull,
         WriteIndented = true)]
+    [JsonSerializable(typeof(SpeechToTextOptions))]
+    [JsonSerializable(typeof(SpeechToTextClientMetadata))]
+    [JsonSerializable(typeof(SpeechToTextResponse))]
+    [JsonSerializable(typeof(SpeechToTextResponseUpdate))]
+    [JsonSerializable(typeof(IReadOnlyList<SpeechToTextResponseUpdate>))]
     [JsonSerializable(typeof(IList<ChatMessage>))]
     [JsonSerializable(typeof(ChatOptions))]
     [JsonSerializable(typeof(EmbeddingGenerationOptions))]
diff --git a/src/Libraries/Microsoft.Extensions.AI.OpenAI/Microsoft.Extensions.AI.OpenAI.csproj b/src/Libraries/Microsoft.Extensions.AI.OpenAI/Microsoft.Extensions.AI.OpenAI.csproj
index 18bfe009184..552d45f0fc6 100644
--- a/src/Libraries/Microsoft.Extensions.AI.OpenAI/Microsoft.Extensions.AI.OpenAI.csproj
+++ b/src/Libraries/Microsoft.Extensions.AI.OpenAI/Microsoft.Extensions.AI.OpenAI.csproj
@@ -16,17 +16,19 @@
   <PropertyGroup>
     <TargetFrameworks>$(TargetFrameworks);netstandard2.0</TargetFrameworks>
     <NoWarn>$(NoWarn);CA1063;CA1508;CA2227;SA1316;S1121;S3358;EA0002;OPENAI002</NoWarn>
+    <NoWarn>$(NoWarn);MEAI001</NoWarn>
     <TreatWarningsAsErrors>true</TreatWarningsAsErrors>
     <DisableNETStandardCompatErrors>true</DisableNETStandardCompatErrors>
     <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
   </PropertyGroup>
 
   <PropertyGroup>
+    <InjectCompilerFeatureRequiredOnLegacy>true</InjectCompilerFeatureRequiredOnLegacy>
+    <InjectExperimentalAttributeOnLegacy>true</InjectExperimentalAttributeOnLegacy>
+    <InjectRequiredMemberOnLegacy>true</InjectRequiredMemberOnLegacy>
     <InjectSharedEmptyCollections>true</InjectSharedEmptyCollections>
-    <InjectStringHashOnLegacy>true</InjectStringHashOnLegacy>
     <InjectSharedServerSentEvents>true</InjectSharedServerSentEvents>
-    <InjectRequiredMemberOnLegacy>true</InjectRequiredMemberOnLegacy>
-    <InjectCompilerFeatureRequiredOnLegacy>true</InjectCompilerFeatureRequiredOnLegacy>
+    <InjectStringHashOnLegacy>true</InjectStringHashOnLegacy>
   </PropertyGroup>
 
   <ItemGroup>
diff --git a/src/Libraries/Microsoft.Extensions.AI.OpenAI/OpenAIClientExtensions.cs b/src/Libraries/Microsoft.Extensions.AI.OpenAI/OpenAIClientExtensions.cs
index 6b330e4da00..c2753379974 100644
--- a/src/Libraries/Microsoft.Extensions.AI.OpenAI/OpenAIClientExtensions.cs
+++ b/src/Libraries/Microsoft.Extensions.AI.OpenAI/OpenAIClientExtensions.cs
@@ -3,8 +3,10 @@
 
 using System;
 using System.ComponentModel;
+using System.Diagnostics.CodeAnalysis;
 using Microsoft.Shared.Diagnostics;
 using OpenAI;
+using OpenAI.Audio;
 using OpenAI.Chat;
 using OpenAI.Embeddings;
 using OpenAI.Responses;
@@ -35,6 +37,13 @@ public static IChatClient AsIChatClient(this ChatClient chatClient) =>
     public static IChatClient AsIChatClient(this OpenAIResponseClient responseClient) =>
         new OpenAIResponseChatClient(responseClient);
 
+    /// <summary>Gets an <see cref="ISpeechToTextClient"/> for use with this <see cref="AudioClient"/>.</summary>
+    /// <param name="audioClient">The client.</param>
+    /// <returns>An <see cref="ISpeechToTextClient"/> that can be used to transcribe audio via the <see cref="AudioClient"/>.</returns>
+    [Experimental("MEAI001")]
+    public static ISpeechToTextClient AsISpeechToTextClient(this AudioClient audioClient) =>
+        new OpenAISpeechToTextClient(audioClient);
+
     /// <summary>Gets an <see cref="IEmbeddingGenerator{String, Single}"/> for use with this <see cref="OpenAIClient"/>.</summary>
     /// <param name="openAIClient">The client.</param>
     /// <param name="modelId">The model to use.</param>
diff --git a/src/Libraries/Microsoft.Extensions.AI.OpenAI/OpenAISpeechToTextClient.cs b/src/Libraries/Microsoft.Extensions.AI.OpenAI/OpenAISpeechToTextClient.cs
new file mode 100644
index 00000000000..78fe00a8377
--- /dev/null
+++ b/src/Libraries/Microsoft.Extensions.AI.OpenAI/OpenAISpeechToTextClient.cs
@@ -0,0 +1,278 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.Diagnostics.CodeAnalysis;
+using System.IO;
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Threading;
+using System.Threading.Tasks;
+using Microsoft.Shared.Diagnostics;
+using OpenAI;
+using OpenAI.Audio;
+
+#pragma warning disable S1067 // Expressions should not be too complex
+#pragma warning disable S3011 // Reflection should not be used to increase accessibility of classes, methods, or fields
+
+namespace Microsoft.Extensions.AI;
+
+/// <summary>Represents an <see cref="ISpeechToTextClient"/> for an OpenAI <see cref="OpenAIClient"/> or <see cref="OpenAI.Audio.AudioClient"/>.</summary>
+[Experimental("MEAI001")]
+internal sealed class OpenAISpeechToTextClient : ISpeechToTextClient
+{
+    /// <summary>Default OpenAI endpoint.</summary>
+    private static readonly Uri _defaultOpenAIEndpoint = new("https://api.openai.com/v1");
+
+    /// <summary>Metadata about the client.</summary>
+    private readonly SpeechToTextClientMetadata _metadata;
+
+    /// <summary>The underlying <see cref="AudioClient" />.</summary>
+    private readonly AudioClient _audioClient;
+
+    /// <summary>Initializes a new instance of the <see cref="OpenAISpeechToTextClient"/> class for the specified <see cref="AudioClient"/>.</summary>
+    /// <param name="audioClient">The underlying client.</param>
+    public OpenAISpeechToTextClient(AudioClient audioClient)
+    {
+        _ = Throw.IfNull(audioClient);
+
+        _audioClient = audioClient;
+
+        // https://github.com/openai/openai-dotnet/issues/215
+        // The endpoint and model aren't currently exposed, so use reflection to get at them, temporarily. Once packages
+        // implement the abstractions directly rather than providing adapters on top of the public APIs,
+        // the package can provide such implementations separate from what's exposed in the public API.
+        Uri providerUrl = typeof(AudioClient).GetField("_endpoint", BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance)
+            ?.GetValue(audioClient) as Uri ?? _defaultOpenAIEndpoint;
+        string? model = typeof(AudioClient).GetField("_model", BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance)
+            ?.GetValue(audioClient) as string;
+
+        _metadata = new("openai", providerUrl, model);
+    }
+
+    /// <inheritdoc />
+    public object? GetService(Type serviceType, object? serviceKey = null)
+    {
+        _ = Throw.IfNull(serviceType);
+
+        return
+            serviceKey is not null ? null :
+            serviceType == typeof(SpeechToTextClientMetadata) ? _metadata :
+            serviceType == typeof(AudioClient) ? _audioClient :
+            serviceType.IsInstanceOfType(this) ? this :
+            null;
+    }
+
+    /// <inheritdoc />
+    public async IAsyncEnumerable<SpeechToTextResponseUpdate> GetStreamingTextAsync(
+        Stream audioSpeechStream, SpeechToTextOptions? options = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
+    {
+        _ = Throw.IfNull(audioSpeechStream);
+
+        var speechResponse = await GetTextAsync(audioSpeechStream, options, cancellationToken).ConfigureAwait(false);
+
+        foreach (var update in speechResponse.ToSpeechToTextResponseUpdates())
+        {
+            yield return update;
+        }
+    }
+
+    /// <inheritdoc />
+    public async Task<SpeechToTextResponse> GetTextAsync(
+        Stream audioSpeechStream, SpeechToTextOptions? options = null, CancellationToken cancellationToken = default)
+    {
+        _ = Throw.IfNull(audioSpeechStream);
+
+        SpeechToTextResponse response = new();
+
+        // <summary>A translation is triggered when the target text language is specified and the source language is not provided or different.</summary>
+        static bool IsTranslationRequest(SpeechToTextOptions? options)
+             => options is not null && options.TextLanguage is not null
+                && (options.SpeechLanguage is null || options.SpeechLanguage != options.TextLanguage);
+
+        if (IsTranslationRequest(options))
+        {
+            _ = Throw.IfNull(options);
+
+            var openAIOptions = ToOpenAITranslationOptions(options);
+            AudioTranslation translationResult;
+
+#if NET
+            await using (audioSpeechStream.ConfigureAwait(false))
+#else
+            using (audioSpeechStream)
+#endif
+            {
+                translationResult = (await _audioClient.TranslateAudioAsync(
+                    audioSpeechStream,
+                    "file.wav", // this information internally is required but is only being used to create a header name in the multipart request.
+                    openAIOptions, cancellationToken).ConfigureAwait(false)).Value;
+            }
+
+            UpdateResponseFromOpenAIAudioTranslation(response, translationResult);
+        }
+        else
+        {
+            var openAIOptions = ToOpenAITranscriptionOptions(options);
+
+            // Transcription request
+            AudioTranscription transcriptionResult;
+
+#if NET
+            await using (audioSpeechStream.ConfigureAwait(false))
+#else
+            using (audioSpeechStream)
+#endif
+            {
+                transcriptionResult = (await _audioClient.TranscribeAudioAsync(
+                    audioSpeechStream,
+                    "file.wav", // this information internally is required but is only being used to create a header name in the multipart request.
+                    openAIOptions, cancellationToken).ConfigureAwait(false)).Value;
+            }
+
+            UpdateResponseFromOpenAIAudioTranscription(response, transcriptionResult);
+        }
+
+        return response;
+    }
+
+    /// <inheritdoc />
+    void IDisposable.Dispose()
+    {
+        // Nothing to dispose. Implementation required for the IAudioTranscriptionClient interface.
+    }
+
+    /// <summary>Updates a <see cref="SpeechToTextResponse"/> from an OpenAI <see cref="AudioTranscription"/>.</summary>
+    /// <param name="response">The response to update.</param>
+    /// <param name="audioTranscription">The OpenAI audio transcription.</param>
+    private static void UpdateResponseFromOpenAIAudioTranscription(SpeechToTextResponse response, AudioTranscription audioTranscription)
+    {
+        _ = Throw.IfNull(audioTranscription);
+
+        var segmentCount = audioTranscription.Segments.Count;
+        var wordCount = audioTranscription.Words.Count;
+
+        TimeSpan? endTime = null;
+        TimeSpan? startTime = null;
+        if (segmentCount > 0)
+        {
+            endTime = audioTranscription.Segments[segmentCount - 1].EndTime;
+            startTime = audioTranscription.Segments[0].StartTime;
+        }
+        else if (wordCount > 0)
+        {
+            endTime = audioTranscription.Words[wordCount - 1].EndTime;
+            startTime = audioTranscription.Words[0].StartTime;
+        }
+
+        // Update the response
+        response.RawRepresentation = audioTranscription;
+        response.Contents = [new TextContent(audioTranscription.Text)];
+        response.StartTime = startTime;
+        response.EndTime = endTime;
+        response.AdditionalProperties = new AdditionalPropertiesDictionary
+        {
+            [nameof(audioTranscription.Language)] = audioTranscription.Language,
+            [nameof(audioTranscription.Duration)] = audioTranscription.Duration
+        };
+    }
+
+    /// <summary>Converts an extensions options instance to an OpenAI options instance.</summary>
+    private static AudioTranscriptionOptions ToOpenAITranscriptionOptions(SpeechToTextOptions? options)
+    {
+        AudioTranscriptionOptions result = new();
+
+        if (options is not null)
+        {
+            if (options.SpeechLanguage is not null)
+            {
+                result.Language = options.SpeechLanguage;
+            }
+
+            if (options.AdditionalProperties is { Count: > 0 } additionalProperties)
+            {
+                if (additionalProperties.TryGetValue(nameof(result.Temperature), out float? temperature))
+                {
+                    result.Temperature = temperature;
+                }
+
+                if (additionalProperties.TryGetValue(nameof(result.TimestampGranularities), out object? timestampGranularities))
+                {
+                    result.TimestampGranularities = timestampGranularities is AudioTimestampGranularities granularities ? granularities : default;
+                }
+
+                if (additionalProperties.TryGetValue(nameof(result.ResponseFormat), out AudioTranscriptionFormat? responseFormat))
+                {
+                    result.ResponseFormat = responseFormat;
+                }
+
+                if (additionalProperties.TryGetValue(nameof(result.Prompt), out string? prompt))
+                {
+                    result.Prompt = prompt;
+                }
+            }
+        }
+
+        return result;
+    }
+
+    /// <summary>Updates a <see cref="SpeechToTextResponse"/> from an OpenAI <see cref="AudioTranslation"/>.</summary>
+    /// <param name="response">The response to update.</param>
+    /// <param name="audioTranslation">The OpenAI audio translation.</param>
+    private static void UpdateResponseFromOpenAIAudioTranslation(SpeechToTextResponse response, AudioTranslation audioTranslation)
+    {
+        _ = Throw.IfNull(audioTranslation);
+
+        var segmentCount = audioTranslation.Segments.Count;
+
+        TimeSpan? endTime = null;
+        TimeSpan? startTime = null;
+        if (segmentCount > 0)
+        {
+            endTime = audioTranslation.Segments[segmentCount - 1].EndTime;
+            startTime = audioTranslation.Segments[0].StartTime;
+        }
+
+        // Update the response
+        response.RawRepresentation = audioTranslation;
+        response.Contents = [new TextContent(audioTranslation.Text)];
+        response.StartTime = startTime;
+        response.EndTime = endTime;
+        response.AdditionalProperties = new AdditionalPropertiesDictionary
+        {
+            [nameof(audioTranslation.Language)] = audioTranslation.Language,
+            [nameof(audioTranslation.Duration)] = audioTranslation.Duration
+        };
+    }
+
+    /// <summary>Converts an extensions options instance to an OpenAI options instance.</summary>
+    private static AudioTranslationOptions ToOpenAITranslationOptions(SpeechToTextOptions? options)
+    {
+        AudioTranslationOptions result = new();
+
+        if (options is not null)
+        {
+            if (options.AdditionalProperties is { Count: > 0 } additionalProperties)
+            {
+                if (additionalProperties.TryGetValue(nameof(result.Temperature), out float? temperature))
+                {
+                    result.Temperature = temperature;
+                }
+
+                if (additionalProperties.TryGetValue(nameof(result.ResponseFormat), out AudioTranslationFormat? responseFormat))
+                {
+                    result.ResponseFormat = responseFormat;
+                }
+
+                if (additionalProperties.TryGetValue(nameof(result.Prompt), out string? prompt))
+                {
+                    result.Prompt = prompt;
+                }
+            }
+        }
+
+        return result;
+    }
+}
+
diff --git a/src/Libraries/Microsoft.Extensions.AI/Microsoft.Extensions.AI.csproj b/src/Libraries/Microsoft.Extensions.AI/Microsoft.Extensions.AI.csproj
index 10f590639ec..378c3e49dfd 100644
--- a/src/Libraries/Microsoft.Extensions.AI/Microsoft.Extensions.AI.csproj
+++ b/src/Libraries/Microsoft.Extensions.AI/Microsoft.Extensions.AI.csproj
@@ -23,6 +23,7 @@
   </PropertyGroup>
 
   <PropertyGroup>
+    <InjectExperimentalAttributeOnLegacy>true</InjectExperimentalAttributeOnLegacy>
     <InjectSharedEmptyCollections>true</InjectSharedEmptyCollections>
     <InjectStringSyntaxAttributeOnLegacy>true</InjectStringSyntaxAttributeOnLegacy>
     <DisableMicrosoftExtensionsLoggingSourceGenerator>false</DisableMicrosoftExtensionsLoggingSourceGenerator>
diff --git a/src/Libraries/Microsoft.Extensions.AI/SpeechToText/ConfigureOptionsSpeechToTextClient.cs b/src/Libraries/Microsoft.Extensions.AI/SpeechToText/ConfigureOptionsSpeechToTextClient.cs
new file mode 100644
index 00000000000..85833a3c171
--- /dev/null
+++ b/src/Libraries/Microsoft.Extensions.AI/SpeechToText/ConfigureOptionsSpeechToTextClient.cs
@@ -0,0 +1,65 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.Diagnostics.CodeAnalysis;
+using System.IO;
+using System.Runtime.CompilerServices;
+using System.Threading;
+using System.Threading.Tasks;
+using Microsoft.Shared.Diagnostics;
+
+namespace Microsoft.Extensions.AI;
+
+/// <summary>Represents a delegating chat client that configures a <see cref="SpeechToTextOptions"/> instance used by the remainder of the pipeline.</summary>
+[Experimental("MEAI001")]
+public sealed class ConfigureOptionsSpeechToTextClient : DelegatingSpeechToTextClient
+{
+    /// <summary>The callback delegate used to configure options.</summary>
+    private readonly Action<SpeechToTextOptions> _configureOptions;
+
+    /// <summary>Initializes a new instance of the <see cref="ConfigureOptionsSpeechToTextClient"/> class with the specified <paramref name="configure"/> callback.</summary>
+    /// <param name="innerClient">The inner client.</param>
+    /// <param name="configure">
+    /// The delegate to invoke to configure the <see cref="SpeechToTextOptions"/> instance. It is passed a clone of the caller-supplied <see cref="SpeechToTextOptions"/> instance
+    /// (or a newly constructed instance if the caller-supplied instance is <see langword="null"/>).
+    /// </param>
+    /// <remarks>
+    /// The <paramref name="configure"/> delegate is passed either a new instance of <see cref="SpeechToTextOptions"/> if
+    /// the caller didn't supply a <see cref="SpeechToTextOptions"/> instance, or a clone (via <see cref="SpeechToTextOptions.Clone"/> of the caller-supplied
+    /// instance if one was supplied.
+    /// </remarks>
+    public ConfigureOptionsSpeechToTextClient(ISpeechToTextClient innerClient, Action<SpeechToTextOptions> configure)
+        : base(innerClient)
+    {
+        _configureOptions = Throw.IfNull(configure);
+    }
+
+    /// <inheritdoc/>
+    public override async Task<SpeechToTextResponse> GetTextAsync(
+        Stream audioSpeechStream, SpeechToTextOptions? options = null, CancellationToken cancellationToken = default)
+    {
+        return await base.GetTextAsync(audioSpeechStream, Configure(options), cancellationToken).ConfigureAwait(false);
+    }
+
+    /// <inheritdoc/>
+    public override async IAsyncEnumerable<SpeechToTextResponseUpdate> GetStreamingTextAsync(
+        Stream audioSpeechStream, SpeechToTextOptions? options = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
+    {
+        await foreach (var update in base.GetStreamingTextAsync(audioSpeechStream, Configure(options), cancellationToken).ConfigureAwait(false))
+        {
+            yield return update;
+        }
+    }
+
+    /// <summary>Creates and configures the <see cref="SpeechToTextOptions"/> to pass along to the inner client.</summary>
+    private SpeechToTextOptions Configure(SpeechToTextOptions? options)
+    {
+        options = options?.Clone() ?? new();
+
+        _configureOptions(options);
+
+        return options;
+    }
+}
diff --git a/src/Libraries/Microsoft.Extensions.AI/SpeechToText/ConfigureOptionsSpeechToTextClientBuilderExtensions.cs b/src/Libraries/Microsoft.Extensions.AI/SpeechToText/ConfigureOptionsSpeechToTextClientBuilderExtensions.cs
new file mode 100644
index 00000000000..037d25a14d5
--- /dev/null
+++ b/src/Libraries/Microsoft.Extensions.AI/SpeechToText/ConfigureOptionsSpeechToTextClientBuilderExtensions.cs
@@ -0,0 +1,38 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Diagnostics.CodeAnalysis;
+using Microsoft.Shared.Diagnostics;
+
+#pragma warning disable SA1629 // Documentation text should end with a period
+
+namespace Microsoft.Extensions.AI;
+
+/// <summary>Provides extensions for configuring <see cref="ConfigureOptionsSpeechToTextClient"/> instances.</summary>
+[Experimental("MEAI001")]
+public static class ConfigureOptionsSpeechToTextClientBuilderExtensions
+{
+    /// <summary>
+    /// Adds a callback that configures a <see cref="SpeechToTextOptions"/> to be passed to the next client in the pipeline.
+    /// </summary>
+    /// <param name="builder">The <see cref="SpeechToTextClientBuilder"/>.</param>
+    /// <param name="configure">
+    /// The delegate to invoke to configure the <see cref="SpeechToTextOptions"/> instance.
+    /// It is passed a clone of the caller-supplied <see cref="SpeechToTextOptions"/> instance (or a newly constructed instance if the caller-supplied instance is <see langword="null"/>).
+    /// </param>
+    /// <remarks>
+    /// This method can be used to set default options. The <paramref name="configure"/> delegate is passed either a new instance of
+    /// <see cref="SpeechToTextOptions"/> if the caller didn't supply a <see cref="SpeechToTextOptions"/> instance, or a clone (via <see cref="SpeechToTextOptions.Clone"/>)
+    /// of the caller-supplied instance if one was supplied.
+    /// </remarks>
+    /// <returns>The <paramref name="builder"/>.</returns>
+    public static SpeechToTextClientBuilder ConfigureOptions(
+        this SpeechToTextClientBuilder builder, Action<SpeechToTextOptions> configure)
+    {
+        _ = Throw.IfNull(builder);
+        _ = Throw.IfNull(configure);
+
+        return builder.Use(innerClient => new ConfigureOptionsSpeechToTextClient(innerClient, configure));
+    }
+}
diff --git a/src/Libraries/Microsoft.Extensions.AI/SpeechToText/LoggingSpeechToTextClient.cs b/src/Libraries/Microsoft.Extensions.AI/SpeechToText/LoggingSpeechToTextClient.cs
new file mode 100644
index 00000000000..4494d319dc0
--- /dev/null
+++ b/src/Libraries/Microsoft.Extensions.AI/SpeechToText/LoggingSpeechToTextClient.cs
@@ -0,0 +1,199 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.Diagnostics.CodeAnalysis;
+using System.IO;
+using System.Runtime.CompilerServices;
+using System.Text.Json;
+using System.Threading;
+using System.Threading.Tasks;
+using Microsoft.Extensions.Logging;
+using Microsoft.Shared.Diagnostics;
+
+namespace Microsoft.Extensions.AI;
+
+/// <summary>A delegating speech to text client that logs speech to text operations to an <see cref="ILogger"/>.</summary>
+/// <para>
+/// The provided implementation of <see cref="ISpeechToTextClient"/> is thread-safe for concurrent use so long as the
+/// <see cref="ILogger"/> employed is also thread-safe for concurrent use.
+/// </para>
+[Experimental("MEAI001")]
+public partial class LoggingSpeechToTextClient : DelegatingSpeechToTextClient
+{
+    /// <summary>An <see cref="ILogger"/> instance used for all logging.</summary>
+    private readonly ILogger _logger;
+
+    /// <summary>The <see cref="JsonSerializerOptions"/> to use for serialization of state written to the logger.</summary>
+    private JsonSerializerOptions _jsonSerializerOptions;
+
+    /// <summary>Initializes a new instance of the <see cref="LoggingSpeechToTextClient"/> class.</summary>
+    /// <param name="innerClient">The underlying <see cref="ISpeechToTextClient"/>.</param>
+    /// <param name="logger">An <see cref="ILogger"/> instance that will be used for all logging.</param>
+    public LoggingSpeechToTextClient(ISpeechToTextClient innerClient, ILogger logger)
+        : base(innerClient)
+    {
+        _logger = Throw.IfNull(logger);
+        _jsonSerializerOptions = AIJsonUtilities.DefaultOptions;
+    }
+
+    /// <summary>Gets or sets JSON serialization options to use when serializing logging data.</summary>
+    public JsonSerializerOptions JsonSerializerOptions
+    {
+        get => _jsonSerializerOptions;
+        set => _jsonSerializerOptions = Throw.IfNull(value);
+    }
+
+    /// <inheritdoc/>
+    public override async Task<SpeechToTextResponse> GetTextAsync(
+        Stream audioSpeechStream, SpeechToTextOptions? options = null, CancellationToken cancellationToken = default)
+    {
+        if (_logger.IsEnabled(LogLevel.Debug))
+        {
+            if (_logger.IsEnabled(LogLevel.Trace))
+            {
+                LogInvokedSensitive(nameof(GetTextAsync), AsJson(options), AsJson(this.GetService<SpeechToTextClientMetadata>()));
+            }
+            else
+            {
+                LogInvoked(nameof(GetTextAsync));
+            }
+        }
+
+        try
+        {
+            var response = await base.GetTextAsync(audioSpeechStream, options, cancellationToken).ConfigureAwait(false);
+
+            if (_logger.IsEnabled(LogLevel.Debug))
+            {
+                if (_logger.IsEnabled(LogLevel.Trace))
+                {
+                    LogCompletedSensitive(nameof(GetTextAsync), AsJson(response));
+                }
+                else
+                {
+                    LogCompleted(nameof(GetTextAsync));
+                }
+            }
+
+            return response;
+        }
+        catch (OperationCanceledException)
+        {
+            LogInvocationCanceled(nameof(GetTextAsync));
+            throw;
+        }
+        catch (Exception ex)
+        {
+            LogInvocationFailed(nameof(GetTextAsync), ex);
+            throw;
+        }
+    }
+
+    /// <inheritdoc/>
+    public override async IAsyncEnumerable<SpeechToTextResponseUpdate> GetStreamingTextAsync(
+        Stream audioSpeechStream, SpeechToTextOptions? options = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
+    {
+        if (_logger.IsEnabled(LogLevel.Debug))
+        {
+            if (_logger.IsEnabled(LogLevel.Trace))
+            {
+                LogInvokedSensitive(nameof(GetStreamingTextAsync), AsJson(options), AsJson(this.GetService<SpeechToTextClientMetadata>()));
+            }
+            else
+            {
+                LogInvoked(nameof(GetStreamingTextAsync));
+            }
+        }
+
+        IAsyncEnumerator<SpeechToTextResponseUpdate> e;
+        try
+        {
+            e = base.GetStreamingTextAsync(audioSpeechStream, options, cancellationToken).GetAsyncEnumerator(cancellationToken);
+        }
+        catch (OperationCanceledException)
+        {
+            LogInvocationCanceled(nameof(GetStreamingTextAsync));
+            throw;
+        }
+        catch (Exception ex)
+        {
+            LogInvocationFailed(nameof(GetStreamingTextAsync), ex);
+            throw;
+        }
+
+        try
+        {
+            SpeechToTextResponseUpdate? update = null;
+            while (true)
+            {
+                try
+                {
+                    if (!await e.MoveNextAsync().ConfigureAwait(false))
+                    {
+                        break;
+                    }
+
+                    update = e.Current;
+                }
+                catch (OperationCanceledException)
+                {
+                    LogInvocationCanceled(nameof(GetStreamingTextAsync));
+                    throw;
+                }
+                catch (Exception ex)
+                {
+                    LogInvocationFailed(nameof(GetStreamingTextAsync), ex);
+                    throw;
+                }
+
+                if (_logger.IsEnabled(LogLevel.Debug))
+                {
+                    if (_logger.IsEnabled(LogLevel.Trace))
+                    {
+                        LogStreamingUpdateSensitive(AsJson(update));
+                    }
+                    else
+                    {
+                        LogStreamingUpdate();
+                    }
+                }
+
+                yield return update;
+            }
+
+            LogCompleted(nameof(GetStreamingTextAsync));
+        }
+        finally
+        {
+            await e.DisposeAsync().ConfigureAwait(false);
+        }
+    }
+
+    private string AsJson<T>(T value) => LoggingHelpers.AsJson(value, _jsonSerializerOptions);
+
+    [LoggerMessage(LogLevel.Debug, "{MethodName} invoked.")]
+    private partial void LogInvoked(string methodName);
+
+    [LoggerMessage(LogLevel.Trace, "{MethodName} invoked: Options: {SpeechToTextOptions}. Metadata: {SpeechToTextClientMetadata}.")]
+    private partial void LogInvokedSensitive(string methodName, string speechToTextOptions, string speechToTextClientMetadata);
+
+    [LoggerMessage(LogLevel.Debug, "{MethodName} completed.")]
+    private partial void LogCompleted(string methodName);
+
+    [LoggerMessage(LogLevel.Trace, "{MethodName} completed: {SpeechToTextResponse}.")]
+    private partial void LogCompletedSensitive(string methodName, string speechToTextResponse);
+
+    [LoggerMessage(LogLevel.Debug, "GetStreamingTextAsync received update.")]
+    private partial void LogStreamingUpdate();
+
+    [LoggerMessage(LogLevel.Trace, "GetStreamingTextAsync received update: {SpeechToTextResponseUpdate}")]
+    private partial void LogStreamingUpdateSensitive(string speechToTextResponseUpdate);
+
+    [LoggerMessage(LogLevel.Debug, "{MethodName} canceled.")]
+    private partial void LogInvocationCanceled(string methodName);
+
+    [LoggerMessage(LogLevel.Error, "{MethodName} failed.")]
+    private partial void LogInvocationFailed(string methodName, Exception error);
+}
diff --git a/src/Libraries/Microsoft.Extensions.AI/SpeechToText/SpeechToTextClientBuilder.cs b/src/Libraries/Microsoft.Extensions.AI/SpeechToText/SpeechToTextClientBuilder.cs
new file mode 100644
index 00000000000..dae4224a94d
--- /dev/null
+++ b/src/Libraries/Microsoft.Extensions.AI/SpeechToText/SpeechToTextClientBuilder.cs
@@ -0,0 +1,81 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.Diagnostics.CodeAnalysis;
+using Microsoft.Shared.Diagnostics;
+
+namespace Microsoft.Extensions.AI;
+
+/// <summary>A builder for creating pipelines of <see cref="ISpeechToTextClient"/>.</summary>
+[Experimental("MEAI001")]
+public sealed class SpeechToTextClientBuilder
+{
+    private readonly Func<IServiceProvider, ISpeechToTextClient> _innerClientFactory;
+
+    /// <summary>The registered client factory instances.</summary>
+    private List<Func<ISpeechToTextClient, IServiceProvider, ISpeechToTextClient>>? _clientFactories;
+
+    /// <summary>Initializes a new instance of the <see cref="SpeechToTextClientBuilder"/> class.</summary>
+    /// <param name="innerClient">The inner <see cref="ISpeechToTextClient"/> that represents the underlying backend.</param>
+    public SpeechToTextClientBuilder(ISpeechToTextClient innerClient)
+    {
+        _ = Throw.IfNull(innerClient);
+        _innerClientFactory = _ => innerClient;
+    }
+
+    /// <summary>Initializes a new instance of the <see cref="SpeechToTextClientBuilder"/> class.</summary>
+    /// <param name="innerClientFactory">A callback that produces the inner <see cref="ISpeechToTextClient"/> that represents the underlying backend.</param>
+    public SpeechToTextClientBuilder(Func<IServiceProvider, ISpeechToTextClient> innerClientFactory)
+    {
+        _innerClientFactory = Throw.IfNull(innerClientFactory);
+    }
+
+    /// <summary>Builds an <see cref="ISpeechToTextClient"/> that represents the entire pipeline. Calls to this instance will pass through each of the pipeline stages in turn.</summary>
+    /// <param name="services">
+    /// The <see cref="IServiceProvider"/> that should provide services to the <see cref="ISpeechToTextClient"/> instances.
+    /// If null, an empty <see cref="IServiceProvider"/> will be used.
+    /// </param>
+    /// <returns>An instance of <see cref="ISpeechToTextClient"/> that represents the entire pipeline.</returns>
+    public ISpeechToTextClient Build(IServiceProvider? services = null)
+    {
+        services ??= EmptyServiceProvider.Instance;
+        var audioClient = _innerClientFactory(services);
+
+        // To match intuitive expectations, apply the factories in reverse order, so that the first factory added is the outermost.
+        if (_clientFactories is not null)
+        {
+            for (var i = _clientFactories.Count - 1; i >= 0; i--)
+            {
+                audioClient = _clientFactories[i](audioClient, services) ??
+                    throw new InvalidOperationException(
+                        $"The {nameof(SpeechToTextClientBuilder)} entry at index {i} returned null. " +
+                        $"Ensure that the callbacks passed to {nameof(Use)} return non-null {nameof(ISpeechToTextClient)} instances.");
+            }
+        }
+
+        return audioClient;
+    }
+
+    /// <summary>Adds a factory for an intermediate audio transcription client to the audio transcription client pipeline.</summary>
+    /// <param name="clientFactory">The client factory function.</param>
+    /// <returns>The updated <see cref="SpeechToTextClientBuilder"/> instance.</returns>
+    public SpeechToTextClientBuilder Use(Func<ISpeechToTextClient, ISpeechToTextClient> clientFactory)
+    {
+        _ = Throw.IfNull(clientFactory);
+
+        return Use((innerClient, _) => clientFactory(innerClient));
+    }
+
+    /// <summary>Adds a factory for an intermediate audio transcription client to the audio transcription client pipeline.</summary>
+    /// <param name="clientFactory">The client factory function.</param>
+    /// <returns>The updated <see cref="SpeechToTextClientBuilder"/> instance.</returns>
+    public SpeechToTextClientBuilder Use(Func<ISpeechToTextClient, IServiceProvider, ISpeechToTextClient> clientFactory)
+    {
+        _ = Throw.IfNull(clientFactory);
+
+        (_clientFactories ??= []).Add(clientFactory);
+        return this;
+    }
+}
diff --git a/src/Libraries/Microsoft.Extensions.AI/SpeechToText/SpeechToTextClientBuilderExtensions.cs b/src/Libraries/Microsoft.Extensions.AI/SpeechToText/SpeechToTextClientBuilderExtensions.cs
new file mode 100644
index 00000000000..7ce2b19ac37
--- /dev/null
+++ b/src/Libraries/Microsoft.Extensions.AI/SpeechToText/SpeechToTextClientBuilderExtensions.cs
@@ -0,0 +1,48 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Diagnostics.CodeAnalysis;
+using Microsoft.Extensions.DependencyInjection;
+using Microsoft.Extensions.Logging;
+using Microsoft.Extensions.Logging.Abstractions;
+using Microsoft.Shared.Diagnostics;
+
+namespace Microsoft.Extensions.AI;
+
+/// <summary>Provides extensions for configuring <see cref="LoggingSpeechToTextClient"/> instances.</summary>
+[Experimental("MEAI001")]
+public static class SpeechToTextClientBuilderExtensions
+{
+    /// <summary>Adds logging to the audio transcription client pipeline.</summary>
+    /// <param name="builder">The <see cref="SpeechToTextClientBuilder"/>.</param>
+    /// <param name="loggerFactory">
+    /// An optional <see cref="ILoggerFactory"/> used to create a logger with which logging should be performed.
+    /// If not supplied, a required instance will be resolved from the service provider.
+    /// </param>
+    /// <param name="configure">An optional callback that can be used to configure the <see cref="LoggingSpeechToTextClient"/> instance.</param>
+    /// <returns>The <paramref name="builder"/>.</returns>
+    public static SpeechToTextClientBuilder UseLogging(
+        this SpeechToTextClientBuilder builder,
+        ILoggerFactory? loggerFactory = null,
+        Action<LoggingSpeechToTextClient>? configure = null)
+    {
+        _ = Throw.IfNull(builder);
+
+        return builder.Use((innerClient, services) =>
+        {
+            loggerFactory ??= services.GetRequiredService<ILoggerFactory>();
+
+            // If the factory we resolve is for the null logger, the LoggingAudioTranscriptionClient will end up
+            // being an expensive nop, so skip adding it and just return the inner client.
+            if (loggerFactory == NullLoggerFactory.Instance)
+            {
+                return innerClient;
+            }
+
+            var audioClient = new LoggingSpeechToTextClient(innerClient, loggerFactory.CreateLogger(typeof(LoggingSpeechToTextClient)));
+            configure?.Invoke(audioClient);
+            return audioClient;
+        });
+    }
+}
diff --git a/src/Libraries/Microsoft.Extensions.AI/SpeechToText/SpeechToTextClientBuilderServiceCollectionExtensions.cs b/src/Libraries/Microsoft.Extensions.AI/SpeechToText/SpeechToTextClientBuilderServiceCollectionExtensions.cs
new file mode 100644
index 00000000000..5ef54e8db26
--- /dev/null
+++ b/src/Libraries/Microsoft.Extensions.AI/SpeechToText/SpeechToTextClientBuilderServiceCollectionExtensions.cs
@@ -0,0 +1,81 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Diagnostics.CodeAnalysis;
+using Microsoft.Extensions.AI;
+using Microsoft.Shared.Diagnostics;
+
+namespace Microsoft.Extensions.DependencyInjection;
+
+/// <summary>Provides extension methods for registering <see cref="ISpeechToTextClient"/> with a <see cref="IServiceCollection"/>.</summary>
+[Experimental("MEAI001")]
+public static class SpeechToTextClientBuilderServiceCollectionExtensions
+{
+    /// <summary>Registers a singleton <see cref="ISpeechToTextClient"/> in the <see cref="IServiceCollection"/>.</summary>
+    /// <param name="serviceCollection">The <see cref="IServiceCollection"/> to which the client should be added.</param>
+    /// <param name="innerClient">The inner <see cref="ISpeechToTextClient"/> that represents the underlying backend.</param>
+    /// <param name="lifetime">The service lifetime for the client. Defaults to <see cref="ServiceLifetime.Singleton"/>.</param>
+    /// <returns>A <see cref="SpeechToTextClientBuilder"/> that can be used to build a pipeline around the inner client.</returns>
+    /// <remarks>The client is registered as a singleton service.</remarks>
+    public static SpeechToTextClientBuilder AddSpeechToTextClient(
+        this IServiceCollection serviceCollection,
+        ISpeechToTextClient innerClient,
+        ServiceLifetime lifetime = ServiceLifetime.Singleton)
+        => AddSpeechToTextClient(serviceCollection, _ => innerClient, lifetime);
+
+    /// <summary>Registers a singleton <see cref="ISpeechToTextClient"/> in the <see cref="IServiceCollection"/>.</summary>
+    /// <param name="serviceCollection">The <see cref="IServiceCollection"/> to which the client should be added.</param>
+    /// <param name="innerClientFactory">A callback that produces the inner <see cref="ISpeechToTextClient"/> that represents the underlying backend.</param>
+    /// <param name="lifetime">The service lifetime for the client. Defaults to <see cref="ServiceLifetime.Singleton"/>.</param>
+    /// <returns>A <see cref="SpeechToTextClientBuilder"/> that can be used to build a pipeline around the inner client.</returns>
+    /// <remarks>The client is registered as a singleton service.</remarks>
+    public static SpeechToTextClientBuilder AddSpeechToTextClient(
+        this IServiceCollection serviceCollection,
+        Func<IServiceProvider, ISpeechToTextClient> innerClientFactory,
+        ServiceLifetime lifetime = ServiceLifetime.Singleton)
+    {
+        _ = Throw.IfNull(serviceCollection);
+        _ = Throw.IfNull(innerClientFactory);
+
+        var builder = new SpeechToTextClientBuilder(innerClientFactory);
+        serviceCollection.Add(new ServiceDescriptor(typeof(ISpeechToTextClient), builder.Build, lifetime));
+        return builder;
+    }
+
+    /// <summary>Registers a keyed singleton <see cref="ISpeechToTextClient"/> in the <see cref="IServiceCollection"/>.</summary>
+    /// <param name="serviceCollection">The <see cref="IServiceCollection"/> to which the client should be added.</param>
+    /// <param name="serviceKey">The key with which to associate the client.</param>
+    /// <param name="innerClient">The inner <see cref="ISpeechToTextClient"/> that represents the underlying backend.</param>
+    /// <param name="lifetime">The service lifetime for the client. Defaults to <see cref="ServiceLifetime.Singleton"/>.</param>
+    /// <returns>A <see cref="SpeechToTextClientBuilder"/> that can be used to build a pipeline around the inner client.</returns>
+    /// <remarks>The client is registered as a scoped service.</remarks>
+    public static SpeechToTextClientBuilder AddKeyedSpeechToTextClient(
+        this IServiceCollection serviceCollection,
+        object serviceKey,
+        ISpeechToTextClient innerClient,
+        ServiceLifetime lifetime = ServiceLifetime.Singleton)
+        => AddKeyedSpeechToTextClient(serviceCollection, serviceKey, _ => innerClient, lifetime);
+
+    /// <summary>Registers a keyed singleton <see cref="ISpeechToTextClient"/> in the <see cref="IServiceCollection"/>.</summary>
+    /// <param name="serviceCollection">The <see cref="IServiceCollection"/> to which the client should be added.</param>
+    /// <param name="serviceKey">The key with which to associate the client.</param>
+    /// <param name="innerClientFactory">A callback that produces the inner <see cref="ISpeechToTextClient"/> that represents the underlying backend.</param>
+    /// <param name="lifetime">The service lifetime for the client. Defaults to <see cref="ServiceLifetime.Singleton"/>.</param>
+    /// <returns>A <see cref="SpeechToTextClientBuilder"/> that can be used to build a pipeline around the inner client.</returns>
+    /// <remarks>The client is registered as a scoped service.</remarks>
+    public static SpeechToTextClientBuilder AddKeyedSpeechToTextClient(
+        this IServiceCollection serviceCollection,
+        object serviceKey,
+        Func<IServiceProvider, ISpeechToTextClient> innerClientFactory,
+        ServiceLifetime lifetime = ServiceLifetime.Singleton)
+    {
+        _ = Throw.IfNull(serviceCollection);
+        _ = Throw.IfNull(serviceKey);
+        _ = Throw.IfNull(innerClientFactory);
+
+        var builder = new SpeechToTextClientBuilder(innerClientFactory);
+        serviceCollection.Add(new ServiceDescriptor(typeof(ISpeechToTextClient), serviceKey, factory: (services, serviceKey) => builder.Build(services), lifetime));
+        return builder;
+    }
+}
diff --git a/src/Libraries/Microsoft.Extensions.AI/SpeechToText/SpeechToTextClientBuilderSpeechToTextClientExtensions.cs b/src/Libraries/Microsoft.Extensions.AI/SpeechToText/SpeechToTextClientBuilderSpeechToTextClientExtensions.cs
new file mode 100644
index 00000000000..29569c55207
--- /dev/null
+++ b/src/Libraries/Microsoft.Extensions.AI/SpeechToText/SpeechToTextClientBuilderSpeechToTextClientExtensions.cs
@@ -0,0 +1,27 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics.CodeAnalysis;
+using Microsoft.Extensions.AI;
+using Microsoft.Shared.Diagnostics;
+
+namespace Microsoft.Extensions.AI;
+
+/// <summary>Provides extension methods for working with <see cref="ISpeechToTextClient"/> in the context of <see cref="SpeechToTextClientBuilder"/>.</summary>
+[Experimental("MEAI001")]
+public static class SpeechToTextClientBuilderSpeechToTextClientExtensions
+{
+    /// <summary>Creates a new <see cref="SpeechToTextClientBuilder"/> using <paramref name="innerClient"/> as its inner client.</summary>
+    /// <param name="innerClient">The client to use as the inner client.</param>
+    /// <returns>The new <see cref="SpeechToTextClientBuilder"/> instance.</returns>
+    /// <remarks>
+    /// This method is equivalent to using the <see cref="SpeechToTextClientBuilder"/> constructor directly,
+    /// specifying <paramref name="innerClient"/> as the inner client.
+    /// </remarks>
+    public static SpeechToTextClientBuilder AsBuilder(this ISpeechToTextClient innerClient)
+    {
+        _ = Throw.IfNull(innerClient);
+
+        return new SpeechToTextClientBuilder(innerClient);
+    }
+}
diff --git a/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Contents/ErrorContentTests.cs b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Contents/ErrorContentTests.cs
new file mode 100644
index 00000000000..2564f6bc2c9
--- /dev/null
+++ b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Contents/ErrorContentTests.cs
@@ -0,0 +1,53 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Text.Json;
+using Xunit;
+
+namespace Microsoft.Extensions.AI;
+
+public class ErrorContentTests
+{
+    [Fact]
+    public void Constructor_ShouldInitializeProperties()
+    {
+        // Arrange
+        string errorMessage = "Error occurred";
+        string errorCode = "ERR001";
+        string errorDetails = "Something went wrong";
+
+        // Act
+        var errorContent = new ErrorContent(errorMessage)
+        {
+            ErrorCode = errorCode,
+            Details = errorDetails
+        };
+
+        // Assert
+        Assert.Equal(errorMessage, errorContent.Message);
+        Assert.Equal(errorCode, errorContent.ErrorCode);
+        Assert.Equal(errorDetails, errorContent.Details);
+    }
+
+    [Fact]
+    public void JsonSerialization_ShouldSerializeAndDeserializeCorrectly()
+    {
+        // Arrange
+        var errorContent = new ErrorContent("Error occurred")
+        {
+            ErrorCode = "ERR001",
+            Details = "Something went wrong"
+        };
+        var options = new JsonSerializerOptions { PropertyNamingPolicy = JsonNamingPolicy.CamelCase };
+
+        // Act
+        var json = JsonSerializer.Serialize(errorContent, options);
+        var deserializedErrorContent = JsonSerializer.Deserialize<ErrorContent>(json, options);
+
+        // Assert
+        Assert.NotNull(deserializedErrorContent);
+        Assert.Equal(errorContent.Message, deserializedErrorContent!.Message);
+        Assert.Equal(errorContent.ErrorCode, deserializedErrorContent.ErrorCode);
+        Assert.Equal(errorContent.Details, deserializedErrorContent.Details);
+    }
+}
diff --git a/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Microsoft.Extensions.AI.Abstractions.Tests.csproj b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Microsoft.Extensions.AI.Abstractions.Tests.csproj
index b22bdc9fdde..f7b3a0154e5 100644
--- a/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Microsoft.Extensions.AI.Abstractions.Tests.csproj
+++ b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/Microsoft.Extensions.AI.Abstractions.Tests.csproj
@@ -6,6 +6,7 @@
 
   <PropertyGroup>
     <NoWarn>$(NoWarn);CA1063;CA1861;CA2201;VSTHRD003;S104</NoWarn>
+    <NoWarn>$(NoWarn);MEAI001</NoWarn>
     <TreatWarningsAsErrors>true</TreatWarningsAsErrors>
   </PropertyGroup>
 
diff --git a/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/SpeechToText/DelegatingSpeechToTextClientTests.cs b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/SpeechToText/DelegatingSpeechToTextClientTests.cs
new file mode 100644
index 00000000000..ef4da7f94bd
--- /dev/null
+++ b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/SpeechToText/DelegatingSpeechToTextClientTests.cs
@@ -0,0 +1,166 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Threading;
+using System.Threading.Tasks;
+using Xunit;
+
+namespace Microsoft.Extensions.AI;
+
+public class DelegatingSpeechToTextClientTests
+{
+    [Fact]
+    public void RequiresInnerSpeechToTextClient()
+    {
+        Assert.Throws<ArgumentNullException>("innerClient", () => new NoOpDelegatingSpeechToTextClient(null!));
+    }
+
+    [Fact]
+    public async Task GetTextAsyncDefaultsToInnerClientAsync()
+    {
+        // Arrange
+        using var expectedAudioSpeechStream = new MemoryStream();
+        var expectedOptions = new SpeechToTextOptions();
+        var expectedCancellationToken = CancellationToken.None;
+        var expectedResult = new TaskCompletionSource<SpeechToTextResponse>();
+        var expectedResponse = new SpeechToTextResponse([]);
+        using var inner = new TestSpeechToTextClient
+        {
+            GetTextAsyncCallback = (audioSpeechStream, options, cancellationToken) =>
+            {
+                Assert.Same(expectedAudioSpeechStream, audioSpeechStream);
+                Assert.Same(expectedOptions, options);
+                Assert.Equal(expectedCancellationToken, cancellationToken);
+                return expectedResult.Task;
+            }
+        };
+
+        using var delegating = new NoOpDelegatingSpeechToTextClient(inner);
+
+        // Act
+        var resultTask = delegating.GetTextAsync(expectedAudioSpeechStream, expectedOptions, expectedCancellationToken);
+
+        // Assert
+        Assert.False(resultTask.IsCompleted);
+        expectedResult.SetResult(expectedResponse);
+        Assert.True(resultTask.IsCompleted);
+        Assert.Same(expectedResponse, await resultTask);
+    }
+
+    [Fact]
+    public async Task GetStreamingTextAsyncDefaultsToInnerClientAsync()
+    {
+        // Arrange
+        using var expectedAudioSpeechStream = new MemoryStream();
+        var expectedOptions = new SpeechToTextOptions();
+        var expectedCancellationToken = CancellationToken.None;
+        SpeechToTextResponseUpdate[] expectedResults =
+        [
+            new("Text update 1"),
+            new("Text update 2")
+        ];
+
+        using var inner = new TestSpeechToTextClient
+        {
+            GetStreamingTextAsyncCallback = (audioSpeechStream, options, cancellationToken) =>
+            {
+                Assert.Same(expectedAudioSpeechStream, audioSpeechStream);
+                Assert.Same(expectedOptions, options);
+                Assert.Equal(expectedCancellationToken, cancellationToken);
+                return YieldAsync(expectedResults);
+            }
+        };
+
+        using var delegating = new NoOpDelegatingSpeechToTextClient(inner);
+
+        // Act
+        var resultAsyncEnumerable = delegating.GetStreamingTextAsync(expectedAudioSpeechStream, expectedOptions, expectedCancellationToken);
+
+        // Assert
+        var enumerator = resultAsyncEnumerable.GetAsyncEnumerator();
+        Assert.True(await enumerator.MoveNextAsync());
+        Assert.Same(expectedResults[0], enumerator.Current);
+        Assert.True(await enumerator.MoveNextAsync());
+        Assert.Same(expectedResults[1], enumerator.Current);
+        Assert.False(await enumerator.MoveNextAsync());
+    }
+
+    [Fact]
+    public void GetServiceThrowsForNullType()
+    {
+        using var inner = new TestSpeechToTextClient();
+        using var delegating = new NoOpDelegatingSpeechToTextClient(inner);
+        Assert.Throws<ArgumentNullException>("serviceType", () => delegating.GetService(null!));
+    }
+
+    [Fact]
+    public void GetServiceReturnsSelfIfCompatibleWithRequestAndKeyIsNull()
+    {
+        // Arrange
+        using var inner = new TestSpeechToTextClient();
+        using var delegating = new NoOpDelegatingSpeechToTextClient(inner);
+
+        // Act
+        var client = delegating.GetService<DelegatingSpeechToTextClient>();
+
+        // Assert
+        Assert.Same(delegating, client);
+    }
+
+    [Fact]
+    public void GetServiceDelegatesToInnerIfKeyIsNotNull()
+    {
+        // Arrange
+        var expectedParam = new object();
+        var expectedKey = new object();
+        using var expectedResult = new TestSpeechToTextClient();
+        using var inner = new TestSpeechToTextClient
+        {
+            GetServiceCallback = (_, _) => expectedResult
+        };
+        using var delegating = new NoOpDelegatingSpeechToTextClient(inner);
+
+        // Act
+        var client = delegating.GetService<ISpeechToTextClient>(expectedKey);
+
+        // Assert
+        Assert.Same(expectedResult, client);
+    }
+
+    [Fact]
+    public void GetServiceDelegatesToInnerIfNotCompatibleWithRequest()
+    {
+        // Arrange
+        var expectedParam = new object();
+        var expectedResult = TimeZoneInfo.Local;
+        var expectedKey = new object();
+        using var inner = new TestSpeechToTextClient
+        {
+            GetServiceCallback = (type, key) => type == expectedResult.GetType() && key == expectedKey
+                ? expectedResult
+                : throw new InvalidOperationException("Unexpected call")
+        };
+        using var delegating = new NoOpDelegatingSpeechToTextClient(inner);
+
+        // Act
+        var tzi = delegating.GetService<TimeZoneInfo>(expectedKey);
+
+        // Assert
+        Assert.Same(expectedResult, tzi);
+    }
+
+    private static async IAsyncEnumerable<T> YieldAsync<T>(IEnumerable<T> input)
+    {
+        await Task.Yield();
+        foreach (var item in input)
+        {
+            yield return item;
+        }
+    }
+
+    private sealed class NoOpDelegatingSpeechToTextClient(ISpeechToTextClient innerClient)
+        : DelegatingSpeechToTextClient(innerClient);
+}
diff --git a/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/SpeechToText/SpeechToTextClientExtensionsTests.cs b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/SpeechToText/SpeechToTextClientExtensionsTests.cs
new file mode 100644
index 00000000000..d39c73fc0c6
--- /dev/null
+++ b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/SpeechToText/SpeechToTextClientExtensionsTests.cs
@@ -0,0 +1,93 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.Threading;
+using System.Threading.Tasks;
+using Xunit;
+
+namespace Microsoft.Extensions.AI;
+
+public class SpeechToTextClientExtensionsTests
+{
+    [Fact]
+    public void GetService_InvalidArgs_Throws()
+    {
+        Assert.Throws<ArgumentNullException>("client", () =>
+        {
+            _ = SpeechToTextClientExtensions.GetService<object>(null!);
+        });
+    }
+
+    [Fact]
+    public async Task GetTextAsync_InvalidArgs_Throws()
+    {
+        // Note: the extension method now requires a DataContent (not a string).
+        ISpeechToTextClient? client = null;
+        var content = new DataContent("data:audio/wav;base64,AQIDBA==");
+        var ex1 = await Assert.ThrowsAsync<ArgumentNullException>(() => SpeechToTextClientExtensions.GetTextAsync(client!, content));
+        Assert.Equal("client", ex1.ParamName);
+
+        using var testClient = new TestSpeechToTextClient();
+        DataContent? nullContent = null;
+        var ex2 = await Assert.ThrowsAsync<ArgumentNullException>(() => SpeechToTextClientExtensions.GetTextAsync(testClient, nullContent!));
+        Assert.Equal("audioSpeechContent", ex2.ParamName);
+    }
+
+    [Fact]
+    public async Task GetStreamingTextAsync_InvalidArgs_Throws()
+    {
+        ISpeechToTextClient? client = null;
+        var content = new DataContent("data:audio/wav;base64,AQIDBA==");
+        var ex1 = await Assert.ThrowsAsync<ArgumentNullException>(() => SpeechToTextClientExtensions.GetStreamingTextAsync(client!, content).GetAsyncEnumerator().MoveNextAsync().AsTask());
+        Assert.Equal("client", ex1.ParamName);
+
+        using var testClient = new TestSpeechToTextClient();
+        DataContent? nullContent = null;
+        var ex2 = await Assert.ThrowsAsync<ArgumentNullException>(() => SpeechToTextClientExtensions.GetStreamingTextAsync(testClient, nullContent!).GetAsyncEnumerator().MoveNextAsync().AsTask());
+        Assert.Equal("audioSpeechContent", ex2.ParamName);
+    }
+
+    [Fact]
+    public async Task GetStreamingTextAsync_CreatesTextMessageAsync()
+    {
+        // Arrange
+        var expectedOptions = new SpeechToTextOptions();
+        using var cts = new CancellationTokenSource();
+
+        using TestSpeechToTextClient client = new()
+        {
+            GetStreamingTextAsyncCallback = (audioSpeechStream, options, cancellationToken) =>
+            {
+                // For testing, return an async enumerable yielding one streaming update with text "world".
+                var update = new SpeechToTextResponseUpdate();
+                update.Contents.Add(new TextContent("world"));
+                return YieldAsync(update);
+            },
+        };
+
+        int count = 0;
+        await foreach (var update in SpeechToTextClientExtensions.GetStreamingTextAsync(
+            client,
+            new DataContent("data:audio/wav;base64,AQIDBA=="),
+            expectedOptions,
+            cts.Token))
+        {
+            Assert.Equal(0, count);
+            Assert.Equal("world", update.Text);
+            count++;
+        }
+
+        Assert.Equal(1, count);
+    }
+
+    private static async IAsyncEnumerable<SpeechToTextResponseUpdate> YieldAsync(params SpeechToTextResponseUpdate[] updates)
+    {
+        await Task.Yield();
+        foreach (var update in updates)
+        {
+            yield return update;
+        }
+    }
+}
diff --git a/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/SpeechToText/SpeechToTextClientMetadataTests.cs b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/SpeechToText/SpeechToTextClientMetadataTests.cs
new file mode 100644
index 00000000000..c9081d0adb6
--- /dev/null
+++ b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/SpeechToText/SpeechToTextClientMetadataTests.cs
@@ -0,0 +1,29 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using Xunit;
+
+namespace Microsoft.Extensions.AI;
+
+public class SpeechToTextClientMetadataTests
+{
+    [Fact]
+    public void Constructor_NullValues_AllowedAndRoundtrip()
+    {
+        SpeechToTextClientMetadata metadata = new(null, null, null);
+        Assert.Null(metadata.ProviderName);
+        Assert.Null(metadata.ProviderUri);
+        Assert.Null(metadata.DefaultModelId);
+    }
+
+    [Fact]
+    public void Constructor_Value_Roundtrips()
+    {
+        var uri = new Uri("https://example.com");
+        SpeechToTextClientMetadata metadata = new("providerName", uri, "theModel");
+        Assert.Equal("providerName", metadata.ProviderName);
+        Assert.Same(uri, metadata.ProviderUri);
+        Assert.Equal("theModel", metadata.DefaultModelId);
+    }
+}
diff --git a/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/SpeechToText/SpeechToTextClientTests.cs b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/SpeechToText/SpeechToTextClientTests.cs
new file mode 100644
index 00000000000..092ad57b2c2
--- /dev/null
+++ b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/SpeechToText/SpeechToTextClientTests.cs
@@ -0,0 +1,85 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Collections.Generic;
+using System.Threading;
+using System.Threading.Tasks;
+using Xunit;
+
+namespace Microsoft.Extensions.AI;
+
+public class SpeechToTextClientTests
+{
+    [Fact]
+    public async Task GetTextAsync_CreatesTextMessageAsync()
+    {
+        // Arrange
+        var expectedResponse = new SpeechToTextResponse("hello");
+        var expectedOptions = new SpeechToTextOptions();
+        using var cts = new CancellationTokenSource();
+
+        using TestSpeechToTextClient client = new()
+        {
+            GetTextAsyncCallback = (audioSpeechStream, options, cancellationToken) =>
+            {
+                // For the purpose of the test, we assume that the underlying implementation converts the audio speech stream into a transcription choice.
+                // (In a real implementation, the audio speech data would be processed.)
+                return Task.FromResult(new SpeechToTextResponse("hello"));
+            },
+        };
+
+        // Act – call the extension method with a valid DataContent.
+        SpeechToTextResponse response = await SpeechToTextClientExtensions.GetTextAsync(
+            client,
+            new DataContent("data:audio/wav;base64,AQIDBA=="),
+            expectedOptions,
+            cts.Token);
+
+        // Assert
+        Assert.Equal(expectedResponse.Text, response.Text);
+    }
+
+    [Fact]
+    public async Task GetStreamingTextAsync_CreatesStreamingUpdatesAsync()
+    {
+        // Arrange
+        var expectedOptions = new SpeechToTextOptions();
+        using var cts = new CancellationTokenSource();
+
+        using TestSpeechToTextClient client = new()
+        {
+            GetStreamingTextAsyncCallback = (audioSpeechStream, options, cancellationToken) =>
+            {
+                // For the purpose of the test, we simulate a streaming response with multiple updates
+                return GetStreamingUpdatesAsync();
+            },
+        };
+
+        // Act – call the extension method with a valid DataContent
+        List<SpeechToTextResponseUpdate> updates = [];
+        await foreach (var update in SpeechToTextClientExtensions.GetStreamingTextAsync(
+            client,
+            new DataContent("data:audio/wav;base64,AQIDBA=="),
+            expectedOptions,
+            cts.Token))
+        {
+            updates.Add(update);
+        }
+
+        // Assert
+        Assert.Equal(3, updates.Count);
+        Assert.Equal("hello ", updates[0].Text);
+        Assert.Equal("world ", updates[1].Text);
+        Assert.Equal("!", updates[2].Text);
+    }
+
+    // Helper method to simulate streaming updates
+#pragma warning disable CS1998 // Async method lacks 'await' operators and will run synchronously
+    private static async IAsyncEnumerable<SpeechToTextResponseUpdate> GetStreamingUpdatesAsync()
+    {
+        yield return new("hello ");
+        yield return new("world ");
+        yield return new("!");
+    }
+#pragma warning restore CS1998 // Async method lacks 'await' operators and will run synchronously
+}
diff --git a/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/SpeechToText/SpeechToTextOptionsTests.cs b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/SpeechToText/SpeechToTextOptionsTests.cs
new file mode 100644
index 00000000000..20936fd4517
--- /dev/null
+++ b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/SpeechToText/SpeechToTextOptionsTests.cs
@@ -0,0 +1,84 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Text.Json;
+using Xunit;
+
+namespace Microsoft.Extensions.AI;
+
+public class SpeechToTextOptionsTests
+{
+    [Fact]
+    public void Constructor_Parameterless_PropsDefaulted()
+    {
+        SpeechToTextOptions options = new();
+        Assert.Null(options.ModelId);
+        Assert.Null(options.SpeechLanguage);
+        Assert.Null(options.SpeechSampleRate);
+        Assert.Null(options.AdditionalProperties);
+
+        SpeechToTextOptions clone = options.Clone();
+        Assert.Null(clone.ModelId);
+        Assert.Null(clone.SpeechLanguage);
+        Assert.Null(clone.SpeechSampleRate);
+        Assert.Null(clone.AdditionalProperties);
+    }
+
+    [Fact]
+    public void Properties_Roundtrip()
+    {
+        SpeechToTextOptions options = new();
+
+        AdditionalPropertiesDictionary additionalProps = new()
+        {
+            ["key"] = "value",
+        };
+
+        options.ModelId = "modelId";
+        options.SpeechLanguage = "en-US";
+        options.SpeechSampleRate = 44100;
+        options.AdditionalProperties = additionalProps;
+
+        Assert.Equal("modelId", options.ModelId);
+        Assert.Equal("en-US", options.SpeechLanguage);
+        Assert.Equal(44100, options.SpeechSampleRate);
+        Assert.Same(additionalProps, options.AdditionalProperties);
+
+        SpeechToTextOptions clone = options.Clone();
+        Assert.Equal("modelId", clone.ModelId);
+        Assert.Equal("en-US", clone.SpeechLanguage);
+        Assert.Equal(44100, clone.SpeechSampleRate);
+        Assert.Equal(additionalProps, clone.AdditionalProperties);
+    }
+
+    [Fact]
+    public void JsonSerialization_Roundtrips()
+    {
+        SpeechToTextOptions options = new();
+
+        AdditionalPropertiesDictionary additionalProps = new()
+        {
+            ["key"] = "value",
+        };
+
+        options.ModelId = "modelId";
+        options.SpeechLanguage = "en-US";
+        options.SpeechSampleRate = 44100;
+        options.AdditionalProperties = additionalProps;
+
+        string json = JsonSerializer.Serialize(options, TestJsonSerializerContext.Default.SpeechToTextOptions);
+
+        SpeechToTextOptions? deserialized = JsonSerializer.Deserialize(json, TestJsonSerializerContext.Default.SpeechToTextOptions);
+        Assert.NotNull(deserialized);
+
+        Assert.Equal("modelId", deserialized.ModelId);
+        Assert.Equal("en-US", deserialized.SpeechLanguage);
+        Assert.Equal(44100, deserialized.SpeechSampleRate);
+
+        Assert.NotNull(deserialized.AdditionalProperties);
+        Assert.Single(deserialized.AdditionalProperties);
+        Assert.True(deserialized.AdditionalProperties.TryGetValue("key", out object? value));
+        Assert.IsType<JsonElement>(value);
+        Assert.Equal("value", ((JsonElement)value!).GetString());
+    }
+}
diff --git a/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/SpeechToText/SpeechToTextResponseTests.cs b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/SpeechToText/SpeechToTextResponseTests.cs
new file mode 100644
index 00000000000..33b27b01291
--- /dev/null
+++ b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/SpeechToText/SpeechToTextResponseTests.cs
@@ -0,0 +1,229 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text.Json;
+using Xunit;
+
+namespace Microsoft.Extensions.AI;
+
+public class SpeechToTextResponseTests
+{
+    [Fact]
+    public void Constructor_InvalidArgs_Throws()
+    {
+        Assert.Throws<ArgumentNullException>("contents", () => new SpeechToTextResponse((IList<AIContent>)null!));
+    }
+
+    [Fact]
+    public void Constructor_Parameterless_PropsDefaulted()
+    {
+        SpeechToTextResponse response = new();
+        Assert.Empty(response.Contents);
+        Assert.Empty(response.Text);
+        Assert.NotNull(response.Contents);
+        Assert.Same(response.Contents, response.Contents);
+        Assert.Empty(response.Contents);
+        Assert.Null(response.RawRepresentation);
+        Assert.Null(response.AdditionalProperties);
+        Assert.Null(response.StartTime);
+        Assert.Null(response.EndTime);
+        Assert.Equal(string.Empty, response.ToString());
+    }
+
+    [Theory]
+    [InlineData(null)]
+    [InlineData("text")]
+    public void Constructor_String_PropsRoundtrip(string? text)
+    {
+        SpeechToTextResponse response = new(text);
+
+        Assert.Same(response.Contents, response.Contents);
+        if (text is null)
+        {
+            Assert.Empty(response.Contents);
+        }
+        else
+        {
+            Assert.Single(response.Contents);
+            TextContent tc = Assert.IsType<TextContent>(response.Contents[0]);
+            Assert.Equal(text, tc.Text);
+        }
+
+        Assert.Null(response.RawRepresentation);
+        Assert.Null(response.AdditionalProperties);
+        Assert.Equal(text ?? string.Empty, response.ToString());
+    }
+
+    [Fact]
+    public void Constructor_List_InvalidArgs_Throws()
+    {
+        Assert.Throws<ArgumentNullException>("contents", () => new SpeechToTextResponse((IList<AIContent>)null!));
+    }
+
+    [Theory]
+    [InlineData(0)]
+    [InlineData(1)]
+    [InlineData(2)]
+    public void Constructor_List_PropsRoundtrip(int contentCount)
+    {
+        List<AIContent> content = [];
+        for (int i = 0; i < contentCount; i++)
+        {
+            content.Add(new TextContent($"text-{i}"));
+        }
+
+        SpeechToTextResponse response = new(content);
+
+        Assert.Same(response.Contents, response.Contents);
+        if (contentCount == 0)
+        {
+            Assert.Empty(response.Contents);
+            Assert.Empty(response.Text);
+        }
+        else
+        {
+            Assert.Equal(contentCount, response.Contents.Count);
+            for (int i = 0; i < contentCount; i++)
+            {
+                TextContent tc = Assert.IsType<TextContent>(response.Contents[i]);
+                Assert.Equal($"text-{i}", tc.Text);
+            }
+
+            Assert.Equal(string.Concat(Enumerable.Range(0, contentCount).Select(i => $"text-{i}")), response.Text);
+            Assert.Equal(string.Concat(Enumerable.Range(0, contentCount).Select(i => $"text-{i}")), response.ToString());
+        }
+    }
+
+    [Fact]
+    public void Properties_Roundtrip()
+    {
+        SpeechToTextResponse response = new();
+        Assert.Null(response.ResponseId);
+        response.ResponseId = "id";
+        Assert.Equal("id", response.ResponseId);
+
+        Assert.Null(response.ModelId);
+        response.ModelId = "modelId";
+        Assert.Equal("modelId", response.ModelId);
+
+        Assert.Null(response.RawRepresentation);
+        object raw = new();
+        response.RawRepresentation = raw;
+        Assert.Same(raw, response.RawRepresentation);
+
+        Assert.Null(response.AdditionalProperties);
+        AdditionalPropertiesDictionary additionalProps = [];
+        response.AdditionalProperties = additionalProps;
+        Assert.Same(additionalProps, response.AdditionalProperties);
+
+        Assert.Null(response.StartTime);
+        TimeSpan startTime = TimeSpan.FromSeconds(1);
+        response.StartTime = startTime;
+        Assert.Equal(startTime, response.StartTime);
+
+        Assert.Null(response.EndTime);
+        TimeSpan endTime = TimeSpan.FromSeconds(2);
+        response.EndTime = endTime;
+        Assert.Equal(endTime, response.EndTime);
+
+        List<AIContent> newContents = [new TextContent("text1"), new TextContent("text2")];
+        response.Contents = newContents;
+        Assert.Same(newContents, response.Contents);
+    }
+
+    [Fact]
+    public void JsonSerialization_Roundtrips()
+    {
+        SpeechToTextResponse original = new()
+        {
+            Contents =
+            [
+                new TextContent("Text1"),
+                new TextContent("Text2"),
+                new TextContent("Text3"),
+                new TextContent("Text4"),
+            ],
+            ResponseId = "id",
+            ModelId = "modelId",
+            StartTime = TimeSpan.FromSeconds(1),
+            EndTime = TimeSpan.FromSeconds(2),
+            RawRepresentation = new(),
+            AdditionalProperties = new() { ["key"] = "value" },
+        };
+
+        string json = JsonSerializer.Serialize(original, TestJsonSerializerContext.Default.SpeechToTextResponse);
+
+        SpeechToTextResponse? result = JsonSerializer.Deserialize(json, TestJsonSerializerContext.Default.SpeechToTextResponse);
+
+        Assert.NotNull(result);
+        Assert.Equal(4, result.Contents.Count);
+
+        for (int i = 0; i < original.Contents.Count; i++)
+        {
+            Assert.Equal($"Text{i + 1}", ((TextContent)result.Contents[i]).Text);
+        }
+
+        Assert.Equal("id", result.ResponseId);
+        Assert.Equal("modelId", result.ModelId);
+        Assert.Equal(TimeSpan.FromSeconds(1), result.StartTime);
+        Assert.Equal(TimeSpan.FromSeconds(2), result.EndTime);
+
+        Assert.NotNull(result.AdditionalProperties);
+        Assert.Single(result.AdditionalProperties);
+        Assert.True(result.AdditionalProperties.TryGetValue("key", out object? value));
+        Assert.IsType<JsonElement>(value);
+        Assert.Equal("value", ((JsonElement)value!).GetString());
+    }
+
+    [Fact]
+    public void ToString_OutputsText()
+    {
+        SpeechToTextResponse response = new("This is a test." + Environment.NewLine + "It's multiple lines.");
+        Assert.Equal("This is a test." + Environment.NewLine + "It's multiple lines.", response.ToString());
+    }
+
+    [Fact]
+    public void ToSpeechToTextResponseUpdates_ReturnsExpectedUpdate()
+    {
+        // Arrange: create a response with contents
+        SpeechToTextResponse response = new()
+        {
+            Contents =
+            [
+                new TextContent("Hello, "),
+                new DataContent("data:image/png;base64,AQIDBA==", mediaType: "image/png"),
+                new TextContent("world!")
+            ],
+            StartTime = TimeSpan.FromSeconds(1),
+            EndTime = TimeSpan.FromSeconds(2),
+            ResponseId = "12345",
+            ModelId = "someModel",
+            AdditionalProperties = new() { ["key1"] = "value1", ["key2"] = 42 },
+        };
+
+        // Act: convert to streaming updates
+        SpeechToTextResponseUpdate[] updates = response.ToSpeechToTextResponseUpdates();
+
+        // Assert: should be a single update with all properties
+        Assert.Single(updates);
+
+        SpeechToTextResponseUpdate update = updates[0];
+        Assert.Equal("12345", update.ResponseId);
+        Assert.Equal("someModel", update.ModelId);
+        Assert.Equal(SpeechToTextResponseUpdateKind.TextUpdated, update.Kind);
+        Assert.Equal(TimeSpan.FromSeconds(1), update.StartTime);
+        Assert.Equal(TimeSpan.FromSeconds(2), update.EndTime);
+
+        Assert.Equal(3, update.Contents.Count);
+        Assert.Equal("Hello, ", Assert.IsType<TextContent>(update.Contents[0]).Text);
+        Assert.Equal("image/png", Assert.IsType<DataContent>(update.Contents[1]).MediaType);
+        Assert.Equal("world!", Assert.IsType<TextContent>(update.Contents[2]).Text);
+
+        Assert.NotNull(update.AdditionalProperties);
+        Assert.Equal("value1", update.AdditionalProperties["key1"]);
+        Assert.Equal(42, update.AdditionalProperties["key2"]);
+    }
+}
diff --git a/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/SpeechToText/SpeechToTextResponseUpdateExtensionsTests.cs b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/SpeechToText/SpeechToTextResponseUpdateExtensionsTests.cs
new file mode 100644
index 00000000000..f0a2f08ab13
--- /dev/null
+++ b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/SpeechToText/SpeechToTextResponseUpdateExtensionsTests.cs
@@ -0,0 +1,140 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using Xunit;
+
+namespace Microsoft.Extensions.AI;
+
+public class SpeechToTextResponseUpdateExtensionsTests
+{
+    public static IEnumerable<object[]> ToSpeechToTextResponse_Coalescing_VariousSequenceAndGapLengths_MemberData()
+    {
+        foreach (bool useAsync in new[] { false, true })
+        {
+            for (int numSequences = 1; numSequences <= 3; numSequences++)
+            {
+                for (int sequenceLength = 1; sequenceLength <= 3; sequenceLength++)
+                {
+                    for (int gapLength = 1; gapLength <= 3; gapLength++)
+                    {
+                        foreach (bool gapBeginningEnd in new[] { false, true })
+                        {
+                            yield return new object[] { useAsync, numSequences, sequenceLength, gapLength, false };
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    [Fact]
+    public void InvalidArgs_Throws()
+    {
+        Assert.Throws<ArgumentNullException>("updates", () => ((List<SpeechToTextResponseUpdate>)null!).ToSpeechToTextResponse());
+    }
+
+    [Theory]
+    [InlineData(false)]
+    [InlineData(true)]
+    public async Task ToSpeechToTextResponse_SuccessfullyCreatesResponse(bool useAsync)
+    {
+        SpeechToTextResponseUpdate[] updates =
+        [
+            new("Hello ") { ModelId = "model123", StartTime = null, AdditionalProperties = new() { ["a"] = "b" } },
+            new("human, ") { ModelId = "model123", StartTime = TimeSpan.FromSeconds(10), EndTime = TimeSpan.FromSeconds(20) },
+            new("How ") { ModelId = "model123", StartTime = TimeSpan.FromSeconds(22), EndTime = TimeSpan.FromSeconds(23) },
+            new("are ") { ModelId = "model123", StartTime = TimeSpan.FromSeconds(23), EndTime = TimeSpan.FromSeconds(24) },
+            new([new TextContent("You?")]) { ModelId = "model123", StartTime = TimeSpan.FromSeconds(24), EndTime = TimeSpan.FromSeconds(25), AdditionalProperties = new() { ["c"] = "d" } },
+            new() { ResponseId = "someResponse", ModelId = "model123", StartTime = TimeSpan.FromSeconds(25), EndTime = TimeSpan.FromSeconds(35) },
+        ];
+
+        SpeechToTextResponse response = useAsync ?
+            updates.ToSpeechToTextResponse() :
+            await YieldAsync(updates).ToSpeechToTextResponseAsync();
+
+        Assert.NotNull(response);
+
+        Assert.Equal("someResponse", response.ResponseId);
+        Assert.Equal(TimeSpan.FromSeconds(10), response.StartTime);
+        Assert.Equal(TimeSpan.FromSeconds(35), response.EndTime);
+        Assert.Equal("model123", response.ModelId);
+
+        Assert.NotNull(response.AdditionalProperties);
+        Assert.Equal(2, response.AdditionalProperties.Count);
+        Assert.Equal("b", response.AdditionalProperties["a"]);
+        Assert.Equal("d", response.AdditionalProperties["c"]);
+
+        Assert.Equal("Hello human, How are You?", response.Text);
+    }
+
+    [Theory]
+    [MemberData(nameof(ToSpeechToTextResponse_Coalescing_VariousSequenceAndGapLengths_MemberData))]
+    public async Task ToSpeechToTextResponse_Coalescing_VariousSequenceAndGapLengths(bool useAsync, int numSequences, int sequenceLength, int gapLength, bool gapBeginningEnd)
+    {
+        List<SpeechToTextResponseUpdate> updates = [];
+
+        List<string> expected = [];
+
+        if (gapBeginningEnd)
+        {
+            AddGap();
+        }
+
+        for (int sequenceNum = 0; sequenceNum < numSequences; sequenceNum++)
+        {
+            StringBuilder sb = new();
+            for (int i = 0; i < sequenceLength; i++)
+            {
+                string text = $"{(char)('A' + sequenceNum)}{i}";
+                updates.Add(new(text));
+                sb.Append(text);
+            }
+
+            expected.Add(sb.ToString());
+
+            if (sequenceNum < numSequences - 1)
+            {
+                AddGap();
+            }
+        }
+
+        if (gapBeginningEnd)
+        {
+            AddGap();
+        }
+
+        void AddGap()
+        {
+            for (int i = 0; i < gapLength; i++)
+            {
+                updates.Add(new() { Contents = [new DataContent("data:image/png;base64,aGVsbG8=")] });
+            }
+        }
+
+        SpeechToTextResponse response = useAsync ? await YieldAsync(updates).ToSpeechToTextResponseAsync() : updates.ToSpeechToTextResponse();
+        Assert.NotNull(response);
+
+        Assert.Equal(expected.Count + (gapLength * ((numSequences - 1) + (gapBeginningEnd ? 2 : 0))), response.Contents.Count);
+
+        TextContent[] contents = response.Contents.OfType<TextContent>().ToArray();
+        Assert.Equal(expected.Count, contents.Length);
+        for (int i = 0; i < expected.Count; i++)
+        {
+            Assert.Equal(expected[i], contents[i].Text);
+        }
+    }
+
+    private static async IAsyncEnumerable<SpeechToTextResponseUpdate> YieldAsync(IEnumerable<SpeechToTextResponseUpdate> updates)
+    {
+        foreach (SpeechToTextResponseUpdate update in updates)
+        {
+            await Task.Yield();
+            yield return update;
+        }
+    }
+}
diff --git a/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/SpeechToText/SpeechToTextResponseUpdateKindTests.cs b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/SpeechToText/SpeechToTextResponseUpdateKindTests.cs
new file mode 100644
index 00000000000..ddc72d076db
--- /dev/null
+++ b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/SpeechToText/SpeechToTextResponseUpdateKindTests.cs
@@ -0,0 +1,65 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Text.Json;
+using Xunit;
+
+namespace Microsoft.Extensions.AI;
+
+public class SpeechToTextResponseUpdateKindTests
+{
+    [Fact]
+    public void Constructor_Value_Roundtrips()
+    {
+        Assert.Equal("abc", new SpeechToTextResponseUpdateKind("abc").Value);
+    }
+
+    [Fact]
+    public void Constructor_NullOrWhiteSpace_Throws()
+    {
+        Assert.Throws<ArgumentNullException>("value", () => new SpeechToTextResponseUpdateKind(null!));
+        Assert.Throws<ArgumentException>("value", () => new SpeechToTextResponseUpdateKind("  "));
+    }
+
+    [Fact]
+    public void Equality_UsesOrdinalIgnoreCaseComparison()
+    {
+        var kind1 = new SpeechToTextResponseUpdateKind("abc");
+        var kind2 = new SpeechToTextResponseUpdateKind("ABC");
+        Assert.True(kind1.Equals(kind2));
+        Assert.True(kind1.Equals((object)kind2));
+        Assert.True(kind1 == kind2);
+        Assert.False(kind1 != kind2);
+
+        var kind3 = new SpeechToTextResponseUpdateKind("def");
+        Assert.False(kind1.Equals(kind3));
+        Assert.False(kind1.Equals((object)kind3));
+        Assert.False(kind1 == kind3);
+        Assert.True(kind1 != kind3);
+
+        Assert.Equal(kind1.GetHashCode(), new SpeechToTextResponseUpdateKind("abc").GetHashCode());
+        Assert.Equal(kind1.GetHashCode(), new SpeechToTextResponseUpdateKind("ABC").GetHashCode());
+    }
+
+    [Fact]
+    public void Singletons_UseKnownValues()
+    {
+        Assert.Equal(SpeechToTextResponseUpdateKind.SessionOpen.ToString(), SpeechToTextResponseUpdateKind.SessionOpen.Value);
+        Assert.Equal(SpeechToTextResponseUpdateKind.Error.ToString(), SpeechToTextResponseUpdateKind.Error.Value);
+        Assert.Equal(SpeechToTextResponseUpdateKind.TextUpdating.ToString(), SpeechToTextResponseUpdateKind.TextUpdating.Value);
+        Assert.Equal(SpeechToTextResponseUpdateKind.TextUpdated.ToString(), SpeechToTextResponseUpdateKind.TextUpdated.Value);
+        Assert.Equal(SpeechToTextResponseUpdateKind.SessionClose.ToString(), SpeechToTextResponseUpdateKind.SessionClose.Value);
+    }
+
+    [Fact]
+    public void JsonSerialization_Roundtrips()
+    {
+        var kind = new SpeechToTextResponseUpdateKind("abc");
+        string json = JsonSerializer.Serialize(kind, TestJsonSerializerContext.Default.SpeechToTextResponseUpdateKind);
+        Assert.Equal("\"abc\"", json);
+
+        var result = JsonSerializer.Deserialize<SpeechToTextResponseUpdateKind>(json, TestJsonSerializerContext.Default.SpeechToTextResponseUpdateKind);
+        Assert.Equal(kind, result);
+    }
+}
diff --git a/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/SpeechToText/SpeechToTextResponseUpdateTests.cs b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/SpeechToText/SpeechToTextResponseUpdateTests.cs
new file mode 100644
index 00000000000..0eae376070e
--- /dev/null
+++ b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/SpeechToText/SpeechToTextResponseUpdateTests.cs
@@ -0,0 +1,114 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.Text.Json;
+using Xunit;
+
+namespace Microsoft.Extensions.AI;
+
+public class SpeechToTextResponseUpdateTests
+{
+    [Fact]
+    public void Constructor_PropsDefaulted()
+    {
+        SpeechToTextResponseUpdate update = new();
+
+        Assert.Equal(SpeechToTextResponseUpdateKind.TextUpdating, update.Kind);
+        Assert.Empty(update.Text);
+        Assert.Empty(update.Contents);
+        Assert.Null(update.ResponseId);
+        Assert.Null(update.StartTime);
+        Assert.Null(update.EndTime);
+        Assert.Equal(string.Empty, update.ToString());
+    }
+
+    [Fact]
+    public void Properties_Roundtrip()
+    {
+        SpeechToTextResponseUpdate update = new()
+        {
+            Kind = new SpeechToTextResponseUpdateKind("custom"),
+        };
+
+        Assert.Equal("custom", update.Kind.Value);
+
+        // Test the computed Text property
+        Assert.Empty(update.Text);
+
+        // Contents: assigning a new list then resetting to null should yield an empty list.
+        List<AIContent> newList = new();
+        newList.Add(new TextContent("content1"));
+        update.Contents = newList;
+        Assert.Same(newList, update.Contents);
+        update.Contents = null;
+        Assert.NotNull(update.Contents);
+        Assert.Empty(update.Contents);
+
+        update.ResponseId = "comp123";
+        Assert.Equal("comp123", update.ResponseId);
+
+        update.StartTime = TimeSpan.FromSeconds(10);
+        update.EndTime = TimeSpan.FromSeconds(20);
+        Assert.Equal(TimeSpan.FromSeconds(10), update.StartTime);
+        Assert.Equal(TimeSpan.FromSeconds(20), update.EndTime);
+    }
+
+    [Fact]
+    public void Text_Get_UsesFirstTextContent()
+    {
+        SpeechToTextResponseUpdate update = new(
+        [
+            new DataContent("data:audio/wav;base64,AQIDBA==", "application/octet-stream"),
+            new DataContent("data:image/wav;base64,AQIDBA==", "application/octet-stream"),
+            new FunctionCallContent("callId1", "fc1"),
+            new TextContent("text-1"),
+            new TextContent("text-2"),
+            new FunctionResultContent("callId1", "result"),
+        ]);
+
+        // The getter returns the text of the first TextContent (which is at index 3).
+        TextContent textContent = Assert.IsType<TextContent>(update.Contents[3]);
+        Assert.Equal("text-1", textContent.Text);
+        Assert.Equal("text-1text-2", update.Text);
+
+        // Assume the ToString concatenates the text of all TextContent items.
+        Assert.Equal("text-1text-2", update.ToString());
+
+        // The setter should update the first TextContent item.
+        Assert.Same(textContent, update.Contents[3]);
+    }
+
+    [Fact]
+    public void JsonSerialization_Roundtrips()
+    {
+        SpeechToTextResponseUpdate original = new()
+        {
+            Kind = new SpeechToTextResponseUpdateKind("transcribed"),
+            ResponseId = "id123",
+            StartTime = TimeSpan.FromSeconds(5),
+            EndTime = TimeSpan.FromSeconds(10),
+            Contents = new List<AIContent>
+            {
+                new TextContent("text-1"),
+                new DataContent("data:audio/wav;base64,AQIDBA==", "application/octet-stream")
+            }
+        };
+
+        string json = JsonSerializer.Serialize(original, TestJsonSerializerContext.Default.SpeechToTextResponseUpdate);
+        SpeechToTextResponseUpdate? result = JsonSerializer.Deserialize(json, TestJsonSerializerContext.Default.SpeechToTextResponseUpdate);
+        Assert.NotNull(result);
+
+        Assert.Equal(original.Kind, result.Kind);
+        Assert.Equal(original.ResponseId, result.ResponseId);
+        Assert.Equal(original.StartTime, result.StartTime);
+        Assert.Equal(original.EndTime, result.EndTime);
+        Assert.Equal(original.Contents.Count, result.Contents.Count);
+        for (int i = 0; i < original.Contents.Count; i++)
+        {
+            // Compare via string conversion.
+            Assert.Equal(original.Contents[i].ToString(), result.Contents[i].ToString());
+        }
+    }
+}
diff --git a/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/TestJsonSerializerContext.cs b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/TestJsonSerializerContext.cs
index 0362be74821..d15f0a19fa9 100644
--- a/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/TestJsonSerializerContext.cs
+++ b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/TestJsonSerializerContext.cs
@@ -16,6 +16,10 @@ namespace Microsoft.Extensions.AI;
     UseStringEnumConverter = true)]
 [JsonSerializable(typeof(ChatResponse))]
 [JsonSerializable(typeof(ChatResponseUpdate))]
+[JsonSerializable(typeof(SpeechToTextResponse))]
+[JsonSerializable(typeof(SpeechToTextResponseUpdate))]
+[JsonSerializable(typeof(SpeechToTextResponseUpdateKind))]
+[JsonSerializable(typeof(SpeechToTextOptions))]
 [JsonSerializable(typeof(ChatOptions))]
 [JsonSerializable(typeof(EmbeddingGenerationOptions))]
 [JsonSerializable(typeof(Dictionary<string, object?>))]
diff --git a/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/TestSpeechToTextClient.cs b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/TestSpeechToTextClient.cs
new file mode 100644
index 00000000000..44e1d739533
--- /dev/null
+++ b/test/Libraries/Microsoft.Extensions.AI.Abstractions.Tests/TestSpeechToTextClient.cs
@@ -0,0 +1,60 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Threading;
+using System.Threading.Tasks;
+
+namespace Microsoft.Extensions.AI;
+
+public sealed class TestSpeechToTextClient : ISpeechToTextClient
+{
+    public TestSpeechToTextClient()
+    {
+        GetServiceCallback = DefaultGetServiceCallback;
+    }
+
+    public IServiceProvider? Services { get; set; }
+
+    // Callbacks for asynchronous operations.
+    public Func<Stream,
+        SpeechToTextOptions?,
+        CancellationToken,
+        Task<SpeechToTextResponse>>?
+        GetTextAsyncCallback
+    { get; set; }
+
+    public Func<Stream,
+        SpeechToTextOptions?,
+        CancellationToken,
+        IAsyncEnumerable<SpeechToTextResponseUpdate>>?
+        GetStreamingTextAsyncCallback
+    { get; set; }
+
+    public Func<Type, object?, object?> GetServiceCallback { get; set; }
+
+    private object? DefaultGetServiceCallback(Type serviceType, object? serviceKey)
+        => serviceType is not null && serviceKey is null && serviceType.IsInstanceOfType(this) ? this : null;
+
+    public Task<SpeechToTextResponse> GetTextAsync(
+        Stream audioSpeechStream,
+        SpeechToTextOptions? options = null,
+        CancellationToken cancellationToken = default)
+        => GetTextAsyncCallback!.Invoke(audioSpeechStream, options, cancellationToken);
+
+    public IAsyncEnumerable<SpeechToTextResponseUpdate> GetStreamingTextAsync(
+        Stream audioSpeechStream,
+        SpeechToTextOptions? options = null,
+        CancellationToken cancellationToken = default)
+        => GetStreamingTextAsyncCallback!.Invoke(audioSpeechStream, options, cancellationToken);
+
+    public object? GetService(Type serviceType, object? serviceKey = null)
+        => GetServiceCallback!.Invoke(serviceType, serviceKey);
+
+    public void Dispose()
+    {
+        // Dispose of resources if any.
+    }
+}
diff --git a/test/Libraries/Microsoft.Extensions.AI.Integration.Tests/ChatClientIntegrationTests.cs b/test/Libraries/Microsoft.Extensions.AI.Integration.Tests/ChatClientIntegrationTests.cs
index 6b06b3c0ed2..5f00c9b9c44 100644
--- a/test/Libraries/Microsoft.Extensions.AI.Integration.Tests/ChatClientIntegrationTests.cs
+++ b/test/Libraries/Microsoft.Extensions.AI.Integration.Tests/ChatClientIntegrationTests.cs
@@ -967,7 +967,9 @@ private static Uri GetImageDataUri()
     [MemberNotNull(nameof(_chatClient))]
     protected void SkipIfNotEnabled()
     {
-        if (_chatClient is null)
+        string? skipIntegration = TestRunnerConfiguration.Instance["SkipIntegrationTests"];
+
+        if (skipIntegration is not null || _chatClient is null)
         {
             throw new SkipTestException("Client is not enabled.");
         }
diff --git a/test/Libraries/Microsoft.Extensions.AI.Integration.Tests/Microsoft.Extensions.AI.Integration.Tests.csproj b/test/Libraries/Microsoft.Extensions.AI.Integration.Tests/Microsoft.Extensions.AI.Integration.Tests.csproj
index cf9f4d9703d..ec925a15309 100644
--- a/test/Libraries/Microsoft.Extensions.AI.Integration.Tests/Microsoft.Extensions.AI.Integration.Tests.csproj
+++ b/test/Libraries/Microsoft.Extensions.AI.Integration.Tests/Microsoft.Extensions.AI.Integration.Tests.csproj
@@ -7,6 +7,7 @@
 
   <PropertyGroup>
     <NoWarn>$(NoWarn);CA1063;CA1861;SA1130;VSTHRD003</NoWarn>
+    <NoWarn>$(NoWarn);MEAI001</NoWarn>
     <TreatWarningsAsErrors>true</TreatWarningsAsErrors>
   </PropertyGroup>
 
@@ -17,11 +18,19 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <EmbeddedResource Include="dotnet.png" />
+    <None Remove="Resources\audio001.mp3" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <EmbeddedResource Include="Resources\audio001.mp3">
+      <CopyToOutputDirectory>Never</CopyToOutputDirectory>
+    </EmbeddedResource>
+    <EmbeddedResource Include="Resources\dotnet.png" />
   </ItemGroup>
 
   <ItemGroup>
     <Compile Include="..\Microsoft.Extensions.AI.Abstractions.Tests\TestChatClient.cs" />
+    <Compile Include="..\Microsoft.Extensions.AI.Abstractions.Tests\TestSpeechToTextClient.cs" />
   </ItemGroup>
 
   <ItemGroup>
diff --git a/test/Libraries/Microsoft.Extensions.AI.Integration.Tests/Resources/audio001.mp3 b/test/Libraries/Microsoft.Extensions.AI.Integration.Tests/Resources/audio001.mp3
new file mode 100644
index 00000000000..9fbfb2bca17
Binary files /dev/null and b/test/Libraries/Microsoft.Extensions.AI.Integration.Tests/Resources/audio001.mp3 differ
diff --git a/test/Libraries/Microsoft.Extensions.AI.Integration.Tests/dotnet.png b/test/Libraries/Microsoft.Extensions.AI.Integration.Tests/Resources/dotnet.png
similarity index 100%
rename from test/Libraries/Microsoft.Extensions.AI.Integration.Tests/dotnet.png
rename to test/Libraries/Microsoft.Extensions.AI.Integration.Tests/Resources/dotnet.png
diff --git a/test/Libraries/Microsoft.Extensions.AI.Integration.Tests/SpeechToTextClientIntegrationTests.cs b/test/Libraries/Microsoft.Extensions.AI.Integration.Tests/SpeechToTextClientIntegrationTests.cs
new file mode 100644
index 00000000000..f0ea6c1790e
--- /dev/null
+++ b/test/Libraries/Microsoft.Extensions.AI.Integration.Tests/SpeechToTextClientIntegrationTests.cs
@@ -0,0 +1,83 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Diagnostics.CodeAnalysis;
+using System.IO;
+using System.Text;
+using System.Threading.Tasks;
+using Microsoft.TestUtilities;
+using Xunit;
+
+#pragma warning disable CA2214 // Do not call overridable methods in constructors
+
+namespace Microsoft.Extensions.AI;
+
+public abstract class SpeechToTextClientIntegrationTests : IDisposable
+{
+    private readonly ISpeechToTextClient? _client;
+
+    protected SpeechToTextClientIntegrationTests()
+    {
+        _client = CreateClient();
+    }
+
+    public void Dispose()
+    {
+        _client?.Dispose();
+        GC.SuppressFinalize(this);
+    }
+
+    protected abstract ISpeechToTextClient? CreateClient();
+
+    [ConditionalFact]
+    public virtual async Task GetTextAsync_SingleAudioRequestMessage()
+    {
+        SkipIfNotEnabled();
+
+        using var audioSpeechStream = GetAudioStream("audio001.mp3");
+        var response = await _client.GetTextAsync(audioSpeechStream);
+
+        Assert.Contains("gym", response.Text, StringComparison.OrdinalIgnoreCase);
+    }
+
+    [ConditionalFact]
+    public virtual async Task GetStreamingTextAsync_SingleStreamingResponseChoice()
+    {
+        SkipIfNotEnabled();
+
+        using var audioSpeechStream = GetAudioStream("audio001.mp3");
+
+        StringBuilder sb = new();
+        await foreach (var chunk in _client.GetStreamingTextAsync(audioSpeechStream))
+        {
+            sb.Append(chunk.Text);
+        }
+
+        string responseText = sb.ToString();
+        Assert.Contains("finally", responseText, StringComparison.OrdinalIgnoreCase);
+        Assert.Contains("gym", responseText, StringComparison.OrdinalIgnoreCase);
+    }
+
+    private static Stream GetAudioStream(string fileName)
+    {
+        using Stream? s = typeof(SpeechToTextClientIntegrationTests).Assembly.GetManifestResourceStream($"Microsoft.Extensions.AI.Resources.{fileName}");
+        Assert.NotNull(s);
+        MemoryStream ms = new();
+        s.CopyTo(ms);
+
+        ms.Position = 0;
+        return ms;
+    }
+
+    [MemberNotNull(nameof(_client))]
+    protected void SkipIfNotEnabled()
+    {
+        string? skipIntegration = TestRunnerConfiguration.Instance["SkipIntegrationTests"];
+
+        if (skipIntegration is not null || _client is null)
+        {
+            throw new SkipTestException("Client is not enabled.");
+        }
+    }
+}
diff --git a/test/Libraries/Microsoft.Extensions.AI.Integration.Tests/VerbatimMultiPartHttpHandler.cs b/test/Libraries/Microsoft.Extensions.AI.Integration.Tests/VerbatimMultiPartHttpHandler.cs
new file mode 100644
index 00000000000..6b0374d70cd
--- /dev/null
+++ b/test/Libraries/Microsoft.Extensions.AI.Integration.Tests/VerbatimMultiPartHttpHandler.cs
@@ -0,0 +1,215 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Net.Http;
+using System.Text;
+using System.Text.Json;
+using System.Text.RegularExpressions;
+using System.Threading;
+using System.Threading.Tasks;
+using Xunit;
+
+#pragma warning disable S3996 // URI properties should not be strings
+
+/// <summary>
+/// An <see cref="HttpMessageHandler"/> that checks the multi-part request body as a root
+/// JSON structure of properties and sends back an expected JSON response.
+/// </summary>
+/// <remarks>
+/// The order of the properties does not affect the comparison.
+/// <para>
+/// An expected input of <c>{ "name": "something" }</c> will Assert for a multipart body that has
+/// a <b>name</b> field with a value of <c>something</c>.
+/// </para>
+/// <para>
+/// An expected input of <c>{ "multiple[]": ["one","two"] }</c> will Assert for a multipart body that has
+/// two <b>multiple[]</b> fields each having "one" and "two" value respectively.
+/// </para>
+/// </remarks>
+/// <param name="expectedInput">
+/// A JSON string representing the expected structure and values of the multipart request body to be verified.
+/// For example, <c>{ "name": "something" }</c> or <c>{ "multiple[]": ["one","two"] }</c>.
+/// </param>
+/// <param name="sentJsonOutput">
+/// A JSON string that will be returned as the response body when the request matches the expected input.
+/// </param>
+public class VerbatimMultiPartHttpHandler(string expectedInput, string sentJsonOutput) : HttpClientHandler
+{
+    public string? ExpectedRequestUriContains { get; init; }
+
+    protected override async Task<HttpResponseMessage> SendAsync(
+        HttpRequestMessage request,
+        CancellationToken cancellationToken)
+    {
+        Assert.NotNull(request.Content);
+        Assert.NotNull(request.Content.Headers.ContentType);
+        Assert.Equal("multipart/form-data", request.Content.Headers.ContentType.MediaType);
+
+        Assert.NotNull(request.RequestUri);
+        if (!string.IsNullOrEmpty(ExpectedRequestUriContains))
+        {
+            Assert.Contains(ExpectedRequestUriContains!, request.RequestUri!.ToString());
+        }
+
+        Dictionary<string, object> parameters = [];
+
+        // Extract the boundary
+        string? boundary = request.Content.Headers.ContentType.Parameters
+            .FirstOrDefault(p => p.Name == "boundary")?.Value;
+
+        if (string.IsNullOrEmpty(boundary))
+        {
+            throw new InvalidOperationException("Boundary not found.");
+        }
+
+        string fullBoundary = $"--{boundary!.Trim('"')}";
+
+        // Read the entire body into memory (for simplicity; stream in production for large data)
+#if NET
+        byte[] bodyBytes = await request.Content.ReadAsByteArrayAsync(cancellationToken);
+#else
+        byte[] bodyBytes = await request.Content.ReadAsByteArrayAsync();
+#endif
+        using var stream = new MemoryStream(bodyBytes);
+        using var reader = new StreamReader(stream, Encoding.UTF8);
+#if NET
+
+        string bodyText = await reader.ReadToEndAsync(cancellationToken);
+#else
+        string bodyText = await reader.ReadToEndAsync();
+#endif
+
+        // Make it legible for debugging and splitting
+        bodyText = RemoveSpecialCharacters(bodyText);
+
+        string[] parts = bodyText.Split(new string[] { fullBoundary }, StringSplitOptions.None);
+
+        foreach (string part in parts)
+        {
+            if (part.Trim() == "--")
+            {
+                continue; // End boundary
+            }
+
+            // Parse headers and body
+            int headerEnd = part.IndexOf("\r\n\r\n");
+            if (headerEnd < 0)
+            {
+                continue;
+            }
+
+            string headers = part.Substring(0, headerEnd).Trim();
+            string rawValue = part.Substring(headerEnd + 4).TrimEnd('\r', '\n');
+
+            // Get the parameter name and value
+            if (headers.Contains("name="))
+            {
+                // Text field
+                string name = ExtractNameFromHeaders(headers);
+
+                // Skip file fields
+                if (!name.StartsWith("file"))
+                {
+                    if (parameters.ContainsKey(name))
+                    {
+                        ((List<JsonElement>)parameters[name]).Add(ParseContentToJsonElement(rawValue));
+                    }
+                    else
+                    {
+                        parameters.Add(name, new List<JsonElement> { ParseContentToJsonElement(rawValue) });
+                    }
+                }
+            }
+        }
+
+        // Transform one value lists into single values
+        foreach (var key in parameters.Keys.ToList())
+        {
+            if (parameters[key] is List<JsonElement> list && list.Count == 1)
+            {
+                parameters[key] = list[0];
+            }
+        }
+
+        var jsonParameters = JsonSerializer.Serialize(parameters);
+        Assert.NotNull(jsonParameters);
+
+        AssertJsonEquals(expectedInput, jsonParameters);
+
+        return new() { Content = new StringContent(sentJsonOutput, Encoding.UTF8, "application/json") };
+    }
+
+    private static string RemoveSpecialCharacters(string input)
+    {
+        return Regex.Replace(input, @"[^a-zA-Z0-9_ .,!?\r\n""=;\//\[\]-]", "");
+    }
+
+    private static JsonElement ParseContentToJsonElement(string content)
+    {
+        // Try parsing as a number
+        if (int.TryParse(content, out int intValue))
+        {
+            return JsonSerializer.SerializeToElement(intValue);
+        }
+
+        if (double.TryParse(content, out double doubleValue))
+        {
+            return JsonSerializer.SerializeToElement(doubleValue);
+        }
+
+        // Try parsing as a boolean
+        if (bool.TryParse(content, out bool boolValue))
+        {
+            return JsonSerializer.SerializeToElement(boolValue);
+        }
+
+        // Default to string
+        return JsonSerializer.SerializeToElement(content);
+    }
+
+    private static string ExtractNameFromHeaders(string headers)
+    {
+        const string NamePrefix = "name=";
+        int start = headers.IndexOf(NamePrefix) + NamePrefix.Length;
+        int end = headers.IndexOf(";", start);
+
+        if (end == -1)
+        {
+            end = headers.Length;
+        }
+
+        return headers.Substring(start, end - start).Trim('"');
+    }
+
+    public static string? RemoveWhiteSpace(string? text) =>
+        text is null ? null :
+        Regex.Replace(text, @"\s*", string.Empty);
+
+    private static Dictionary<char, int>? GetCharacterFrequencies(string text)
+        => RemoveWhiteSpace(text)?.GroupBy(c => c)
+               .ToDictionary(g => g.Key, g => g.Count());
+
+    private static void AssertJsonEquals(string expected, string actual)
+    {
+        var expectedFrequencies = GetCharacterFrequencies(expected);
+        var actualFrequencies = GetCharacterFrequencies(actual);
+
+        Assert.NotNull(expectedFrequencies);
+        Assert.NotNull(actualFrequencies);
+
+        foreach (var kvp in expectedFrequencies)
+        {
+            if (!actualFrequencies.ContainsKey(kvp.Key) || kvp.Value != actualFrequencies[kvp.Key])
+            {
+                Assert.Fail($"Expected: {expected}, Actual: {actual}");
+            }
+
+            // Ensure the frequencies are equal during the test
+            Assert.Equal(kvp.Value, actualFrequencies[kvp.Key]);
+        }
+    }
+}
diff --git a/test/Libraries/Microsoft.Extensions.AI.OpenAI.Tests/IntegrationTestHelpers.cs b/test/Libraries/Microsoft.Extensions.AI.OpenAI.Tests/IntegrationTestHelpers.cs
index 4b7252965f0..2a20b121ab0 100644
--- a/test/Libraries/Microsoft.Extensions.AI.OpenAI.Tests/IntegrationTestHelpers.cs
+++ b/test/Libraries/Microsoft.Extensions.AI.OpenAI.Tests/IntegrationTestHelpers.cs
@@ -16,6 +16,7 @@ internal static class IntegrationTestHelpers
     public static OpenAIClient? GetOpenAIClient()
     {
         var configuration = TestRunnerConfiguration.Instance;
+
         string? apiKey = configuration["OpenAI:Key"];
 
         if (apiKey is not null)
diff --git a/test/Libraries/Microsoft.Extensions.AI.OpenAI.Tests/Microsoft.Extensions.AI.OpenAI.Tests.csproj b/test/Libraries/Microsoft.Extensions.AI.OpenAI.Tests/Microsoft.Extensions.AI.OpenAI.Tests.csproj
index 66412bfeace..5626f4f207e 100644
--- a/test/Libraries/Microsoft.Extensions.AI.OpenAI.Tests/Microsoft.Extensions.AI.OpenAI.Tests.csproj
+++ b/test/Libraries/Microsoft.Extensions.AI.OpenAI.Tests/Microsoft.Extensions.AI.OpenAI.Tests.csproj
@@ -2,7 +2,7 @@
   <PropertyGroup>
     <RootNamespace>Microsoft.Extensions.AI</RootNamespace>
     <Description>Unit tests for Microsoft.Extensions.AI.OpenAI</Description>
-    <NoWarn>$(NoWarn);OPENAI002</NoWarn>
+    <NoWarn>$(NoWarn);OPENAI002;MEAI001</NoWarn>
   </PropertyGroup>
 
   <PropertyGroup>
diff --git a/test/Libraries/Microsoft.Extensions.AI.OpenAI.Tests/OpenAISpeechToTextClientIntegrationTests.cs b/test/Libraries/Microsoft.Extensions.AI.OpenAI.Tests/OpenAISpeechToTextClientIntegrationTests.cs
new file mode 100644
index 00000000000..c80b37c865e
--- /dev/null
+++ b/test/Libraries/Microsoft.Extensions.AI.OpenAI.Tests/OpenAISpeechToTextClientIntegrationTests.cs
@@ -0,0 +1,12 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+namespace Microsoft.Extensions.AI;
+
+public class OpenAISpeechToTextClientIntegrationTests : SpeechToTextClientIntegrationTests
+{
+    protected override ISpeechToTextClient? CreateClient()
+        => IntegrationTestHelpers.GetOpenAIClient()?
+            .GetAudioClient(TestRunnerConfiguration.Instance["OpenAI:AudioTranscriptionModel"] ?? "whisper-1")
+            .AsISpeechToTextClient();
+}
diff --git a/test/Libraries/Microsoft.Extensions.AI.OpenAI.Tests/OpenAISpeechToTextClientTests.cs b/test/Libraries/Microsoft.Extensions.AI.OpenAI.Tests/OpenAISpeechToTextClientTests.cs
new file mode 100644
index 00000000000..4587c3a5524
--- /dev/null
+++ b/test/Libraries/Microsoft.Extensions.AI.OpenAI.Tests/OpenAISpeechToTextClientTests.cs
@@ -0,0 +1,284 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.ClientModel;
+using System.ClientModel.Primitives;
+using System.IO;
+using System.Net.Http;
+using System.Threading;
+using System.Threading.Tasks;
+using Azure.AI.OpenAI;
+using Microsoft.Extensions.Logging;
+using OpenAI;
+using OpenAI.Audio;
+using Xunit;
+
+#pragma warning disable S103 // Lines should not be too long
+
+namespace Microsoft.Extensions.AI;
+
+public class OpenAISpeechToTextClientTests
+{
+    [Fact]
+    public void AsISpeechToTextClient_InvalidArgs_Throws()
+    {
+        Assert.Throws<ArgumentNullException>("audioClient", () => ((AudioClient)null!).AsISpeechToTextClient());
+    }
+
+    [Theory]
+    [InlineData(false)]
+    [InlineData(true)]
+    public void AsISpeechToTextClient_AudioClient_ProducesExpectedMetadata(bool useAzureOpenAI)
+    {
+        Uri endpoint = new("http://localhost/some/endpoint");
+        string model = "amazingModel";
+
+        var client = useAzureOpenAI ?
+            new AzureOpenAIClient(endpoint, new ApiKeyCredential("key")) :
+            new OpenAIClient(new ApiKeyCredential("key"), new OpenAIClientOptions { Endpoint = endpoint });
+
+        ISpeechToTextClient speechToTextClient = client.GetAudioClient(model).AsISpeechToTextClient();
+        var metadata = speechToTextClient.GetService<SpeechToTextClientMetadata>();
+        Assert.Equal("openai", metadata?.ProviderName);
+        Assert.Equal(endpoint, metadata?.ProviderUri);
+        Assert.Equal(model, metadata?.DefaultModelId);
+    }
+
+    [Fact]
+    public void GetService_AudioClient_SuccessfullyReturnsUnderlyingClient()
+    {
+        AudioClient audioClient = new OpenAIClient(new ApiKeyCredential("key")).GetAudioClient("model");
+        ISpeechToTextClient speechToTextClient = audioClient.AsISpeechToTextClient();
+        Assert.Same(speechToTextClient, speechToTextClient.GetService<ISpeechToTextClient>());
+        Assert.Same(audioClient, speechToTextClient.GetService<AudioClient>());
+        using var factory = LoggerFactory.Create(b => b.AddFakeLogging());
+        using ISpeechToTextClient pipeline = speechToTextClient
+            .AsBuilder()
+            .UseLogging(factory)
+            .Build();
+
+        Assert.NotNull(pipeline.GetService<LoggingSpeechToTextClient>());
+
+        Assert.Same(audioClient, pipeline.GetService<AudioClient>());
+        Assert.IsType<LoggingSpeechToTextClient>(pipeline.GetService<ISpeechToTextClient>());
+    }
+
+    [Theory]
+    [InlineData("pt", null)]
+    [InlineData("en", null)]
+    [InlineData("en", "en")]
+    [InlineData("pt", "pt")]
+    public async Task GetTextAsync_BasicRequestResponse(string? speechLanguage, string? textLanguage)
+    {
+        string input = $$"""
+                {
+                    "model": "whisper-1",
+                    "language": "{{speechLanguage}}"
+                }
+                """;
+
+        const string Output = """
+                {
+                    "text":"I finally got back to the gym the other day."
+                }
+                """;
+
+        using VerbatimMultiPartHttpHandler handler = new(input, Output) { ExpectedRequestUriContains = "audio/transcriptions" };
+        using HttpClient httpClient = new(handler);
+        using ISpeechToTextClient client = CreateSpeechToTextClient(httpClient, "whisper-1");
+
+        using var audioSpeechStream = GetAudioStream();
+        var response = await client.GetTextAsync(audioSpeechStream, new SpeechToTextOptions
+        {
+            SpeechLanguage = speechLanguage,
+            TextLanguage = textLanguage
+        });
+
+        Assert.NotNull(response);
+
+        Assert.Contains("I finally got back to the gym the other day", response.Text);
+
+        Assert.NotNull(response.RawRepresentation);
+        Assert.IsType<AudioTranscription>(response.RawRepresentation);
+    }
+
+    [Fact]
+    public async Task GetTextAsync_Cancelled_Throws()
+    {
+        using HttpClient httpClient = new();
+        using ISpeechToTextClient client = CreateSpeechToTextClient(httpClient, "whisper-1");
+
+        using var fileStream = GetAudioStream();
+        using var cancellationTokenSource = new CancellationTokenSource();
+        cancellationTokenSource.Cancel();
+
+        await Assert.ThrowsAsync<TaskCanceledException>(()
+            => client.GetTextAsync(fileStream, cancellationToken: cancellationTokenSource.Token));
+    }
+
+    [Fact]
+    public async Task GetStreamingTextAsync_Cancelled_Throws()
+    {
+        using HttpClient httpClient = new();
+        using ISpeechToTextClient client = CreateSpeechToTextClient(httpClient, "whisper-1");
+
+        using var fileStream = GetAudioStream();
+        using var cancellationTokenSource = new CancellationTokenSource();
+        cancellationTokenSource.Cancel();
+
+        await Assert.ThrowsAsync<TaskCanceledException>(()
+            => client
+                .GetStreamingTextAsync(fileStream, cancellationToken: cancellationTokenSource.Token)
+                .GetAsyncEnumerator()
+                .MoveNextAsync()
+                .AsTask());
+    }
+
+    [Theory]
+    [InlineData("pt", null)]
+    [InlineData("en", null)]
+    [InlineData("en", "en")]
+    [InlineData("pt", "pt")]
+    public async Task GetStreamingTextAsync_BasicRequestResponse(string? speechLanguage, string? textLanguage)
+    {
+        // There's no support for streaming audio in the OpenAI API,
+        // so we're just testing the client's ability to handle streaming responses.
+
+        string input = $$"""
+                {
+                    "model": "whisper-1",
+                    "language": "{{speechLanguage}}"
+                }
+                """;
+
+        const string Output = """
+                {
+                    "text":"I finally got back to the gym the other day."
+                }
+                """;
+
+        using VerbatimMultiPartHttpHandler handler = new(input, Output) { ExpectedRequestUriContains = "audio/transcriptions" };
+        using HttpClient httpClient = new(handler);
+        using ISpeechToTextClient client = CreateSpeechToTextClient(httpClient, "whisper-1");
+
+        using var audioSpeechStream = GetAudioStream();
+        await foreach (var update in client.GetStreamingTextAsync(audioSpeechStream, new SpeechToTextOptions
+        {
+            SpeechLanguage = speechLanguage,
+            TextLanguage = textLanguage
+        }))
+        {
+            Assert.Contains("I finally got back to the gym the other day", update.Text);
+            Assert.NotNull(update.RawRepresentation);
+            Assert.IsType<OpenAI.Audio.AudioTranscription>(update.RawRepresentation);
+        }
+    }
+
+    [Fact]
+    public async Task GetStreamingTextAsync_BasicTranslateRequestResponse()
+    {
+        string textLanguage = "en";
+
+        // There's no support for non english translations, so no language is passed to the API.
+        const string Input = $$"""
+                {
+                    "model": "whisper-1"
+                }
+                """;
+
+        const string Output = """
+                {
+                    "text":"I finally got back to the gym the other day."
+                }
+                """;
+
+        using VerbatimMultiPartHttpHandler handler = new(Input, Output) { ExpectedRequestUriContains = "audio/translations" };
+        using HttpClient httpClient = new(handler);
+        using ISpeechToTextClient client = CreateSpeechToTextClient(httpClient, "whisper-1");
+
+        using var audioSpeechStream = GetAudioStream();
+        await foreach (var update in client.GetStreamingTextAsync(audioSpeechStream, new SpeechToTextOptions
+        {
+            SpeechLanguage = "pt",
+            TextLanguage = textLanguage
+        }))
+        {
+            Assert.Contains("I finally got back to the gym the other day", update.Text);
+            Assert.NotNull(update.RawRepresentation);
+            Assert.IsType<AudioTranslation>(update.RawRepresentation);
+        }
+    }
+
+    [Fact]
+    public async Task GetTextAsync_NonStronglyTypedOptions_AllSent()
+    {
+        const string Input = """
+                {
+                    "model": "whisper-1",
+                    "prompt":"Hide any bad words with ",
+                    "temperature": 0.5,
+                    "response_format": "vtt",
+                    "timestamp_granularities[]": ["word","segment"]
+                }
+                """;
+
+        const string Output = """
+                {
+                    "text":"I finally got back to the gym the other day."
+                }
+                """;
+
+        using VerbatimMultiPartHttpHandler handler = new(Input, Output);
+        using HttpClient httpClient = new(handler);
+        using ISpeechToTextClient client = CreateSpeechToTextClient(httpClient, "whisper-1");
+
+        using var audioSpeechStream = GetAudioStream();
+        Assert.NotNull(await client.GetTextAsync(audioSpeechStream, new()
+        {
+            AdditionalProperties = new()
+            {
+                ["Prompt"] = "Hide any bad words with ",
+                ["SpeechLanguage"] = "pt",
+                ["Temperature"] = 0.5f,
+                ["TimestampGranularities"] = AudioTimestampGranularities.Segment | AudioTimestampGranularities.Word,
+                ["ResponseFormat"] = AudioTranscriptionFormat.Vtt,
+            },
+        }));
+    }
+
+    [Fact]
+    public async Task GetTextAsync_StronglyTypedOptions_AllSent()
+    {
+        const string Input = """
+                {
+                    "model": "whisper-1",
+                    "language": "pt"
+                }
+                """;
+
+        const string Output = """
+                {
+                    "text":"I finally got back to the gym the other day."
+                }
+                """;
+
+        using VerbatimMultiPartHttpHandler handler = new(Input, Output);
+        using HttpClient httpClient = new(handler);
+        using ISpeechToTextClient client = CreateSpeechToTextClient(httpClient, "whisper-1");
+
+        using var audioSpeechStream = GetAudioStream();
+        Assert.NotNull(await client.GetTextAsync(audioSpeechStream, new()
+        {
+            SpeechLanguage = "pt",
+        }));
+    }
+
+    private static Stream GetAudioStream()
+        => new MemoryStream([0x01, 0x02]);
+
+    private static ISpeechToTextClient CreateSpeechToTextClient(HttpClient httpClient, string modelId) =>
+        new OpenAIClient(new ApiKeyCredential("apikey"), new OpenAIClientOptions { Transport = new HttpClientPipelineTransport(httpClient) })
+            .GetAudioClient(modelId)
+            .AsISpeechToTextClient();
+}
diff --git a/test/Libraries/Microsoft.Extensions.AI.Tests/Microsoft.Extensions.AI.Tests.csproj b/test/Libraries/Microsoft.Extensions.AI.Tests/Microsoft.Extensions.AI.Tests.csproj
index 32589c430e0..9b8967a37ce 100644
--- a/test/Libraries/Microsoft.Extensions.AI.Tests/Microsoft.Extensions.AI.Tests.csproj
+++ b/test/Libraries/Microsoft.Extensions.AI.Tests/Microsoft.Extensions.AI.Tests.csproj
@@ -6,6 +6,7 @@
 
   <PropertyGroup>
     <NoWarn>$(NoWarn);CA1063;CA1861;SA1130;VSTHRD003</NoWarn>
+    <NoWarn>$(NoWarn);MEAI001</NoWarn>
     <TreatWarningsAsErrors>true</TreatWarningsAsErrors>
   </PropertyGroup>
 
@@ -16,6 +17,7 @@
   <ItemGroup>
     <Compile Include="..\Microsoft.Extensions.AI.Abstractions.Tests\AssertExtensions.cs" />
     <Compile Include="..\Microsoft.Extensions.AI.Abstractions.Tests\TestChatClient.cs" />
+    <Compile Include="..\Microsoft.Extensions.AI.Abstractions.Tests\TestSpeechToTextClient.cs" />
     <Compile Include="..\Microsoft.Extensions.AI.Abstractions.Tests\TestEmbeddingGenerator.cs" />
   </ItemGroup>
 
diff --git a/test/Libraries/Microsoft.Extensions.AI.Tests/SpeechToText/ConfigureOptionsSpeechToTextClientTests.cs b/test/Libraries/Microsoft.Extensions.AI.Tests/SpeechToText/ConfigureOptionsSpeechToTextClientTests.cs
new file mode 100644
index 00000000000..6140b7ed354
--- /dev/null
+++ b/test/Libraries/Microsoft.Extensions.AI.Tests/SpeechToText/ConfigureOptionsSpeechToTextClientTests.cs
@@ -0,0 +1,101 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Threading;
+using System.Threading.Tasks;
+using Xunit;
+
+namespace Microsoft.Extensions.AI;
+
+public class ConfigureOptionsSpeechToTextClientTests
+{
+    [Fact]
+    public void ConfigureOptionsSpeechToTextClient_InvalidArgs_Throws()
+    {
+        Assert.Throws<ArgumentNullException>("innerClient", () => new ConfigureOptionsSpeechToTextClient(null!, _ => { }));
+        Assert.Throws<ArgumentNullException>("configure", () => new ConfigureOptionsSpeechToTextClient(new TestSpeechToTextClient(), null!));
+    }
+
+    [Fact]
+    public void ConfigureOptions_InvalidArgs_Throws()
+    {
+        using var innerClient = new TestSpeechToTextClient();
+        var builder = innerClient.AsBuilder();
+        Assert.Throws<ArgumentNullException>("configure", () => builder.ConfigureOptions(null!));
+    }
+
+    [Theory]
+    [InlineData(false)]
+    [InlineData(true)]
+    public async Task ConfigureOptions_ReturnedInstancePassedToNextClient(bool nullProvidedOptions)
+    {
+        SpeechToTextOptions? providedOptions = nullProvidedOptions ? null : new() { ModelId = "test" };
+        SpeechToTextOptions? returnedOptions = null;
+        SpeechToTextResponse expectedResponse = new([]);
+        var expectedUpdates = Enumerable.Range(0, 3).Select(i => new SpeechToTextResponseUpdate()).ToArray();
+        using CancellationTokenSource cts = new();
+
+        using ISpeechToTextClient innerClient = new TestSpeechToTextClient
+        {
+            GetTextAsyncCallback = (audioSpeechStream, options, cancellationToken) =>
+            {
+                Assert.Same(returnedOptions, options);
+                Assert.Equal(cts.Token, cancellationToken);
+                return Task.FromResult(expectedResponse);
+            },
+
+            GetStreamingTextAsyncCallback = (audioSpeechStream, options, cancellationToken) =>
+            {
+                Assert.Same(returnedOptions, options);
+                Assert.Equal(cts.Token, cancellationToken);
+                return YieldUpdates(expectedUpdates);
+            },
+        };
+
+        using var client = innerClient
+            .AsBuilder()
+            .ConfigureOptions(options =>
+            {
+                Assert.NotSame(providedOptions, options);
+                if (nullProvidedOptions)
+                {
+                    Assert.Null(options.ModelId);
+                }
+                else
+                {
+                    Assert.Equal(providedOptions!.ModelId, options.ModelId);
+                }
+
+                returnedOptions = options;
+            })
+            .Build();
+
+        using var audioSpeechStream = new MemoryStream(new byte[] { 1, 2, 3, 4 });
+        var response = await client.GetTextAsync(audioSpeechStream, providedOptions, cts.Token);
+        Assert.Same(expectedResponse, response);
+
+        int i = 0;
+        using var audioSpeechStream2 = new MemoryStream(new byte[] { 1, 2, 3, 4 });
+        await using var e = client.GetStreamingTextAsync(audioSpeechStream2, providedOptions, cts.Token).GetAsyncEnumerator();
+        while (i < expectedUpdates.Length)
+        {
+            Assert.True(await e.MoveNextAsync());
+            Assert.Same(expectedUpdates[i++], e.Current);
+        }
+
+        Assert.False(await e.MoveNextAsync());
+
+        static async IAsyncEnumerable<SpeechToTextResponseUpdate> YieldUpdates(SpeechToTextResponseUpdate[] updates)
+        {
+            foreach (var update in updates)
+            {
+                await Task.Yield();
+                yield return update;
+            }
+        }
+    }
+}
diff --git a/test/Libraries/Microsoft.Extensions.AI.Tests/SpeechToText/LoggingSpeechToTextClientTests.cs b/test/Libraries/Microsoft.Extensions.AI.Tests/SpeechToText/LoggingSpeechToTextClientTests.cs
new file mode 100644
index 00000000000..79c09dd5c6f
--- /dev/null
+++ b/test/Libraries/Microsoft.Extensions.AI.Tests/SpeechToText/LoggingSpeechToTextClientTests.cs
@@ -0,0 +1,150 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Threading.Tasks;
+using Microsoft.Extensions.AI;
+using Microsoft.Extensions.DependencyInjection;
+using Microsoft.Extensions.Logging;
+using Microsoft.Extensions.Logging.Abstractions;
+using Microsoft.Extensions.Logging.Testing;
+using Xunit;
+
+namespace Microsoft.Extensions.AI;
+
+public class LoggingSpeechToTextClientTests
+{
+    [Fact]
+    public void LoggingSpeechToTextClient_InvalidArgs_Throws()
+    {
+        Assert.Throws<ArgumentNullException>("innerClient", () => new LoggingSpeechToTextClient(null!, NullLogger.Instance));
+        Assert.Throws<ArgumentNullException>("logger", () => new LoggingSpeechToTextClient(new TestSpeechToTextClient(), null!));
+    }
+
+    [Fact]
+    public void UseLogging_AvoidsInjectingNopClient()
+    {
+        using var innerClient = new TestSpeechToTextClient();
+
+        Assert.Null(innerClient.AsBuilder().UseLogging(NullLoggerFactory.Instance).Build().GetService(typeof(LoggingSpeechToTextClient)));
+        Assert.Same(innerClient, innerClient.AsBuilder().UseLogging(NullLoggerFactory.Instance).Build().GetService(typeof(ISpeechToTextClient)));
+
+        using var factory = LoggerFactory.Create(b => b.AddFakeLogging());
+        Assert.NotNull(innerClient.AsBuilder().UseLogging(factory).Build().GetService(typeof(LoggingSpeechToTextClient)));
+
+        ServiceCollection c = new();
+        c.AddFakeLogging();
+        var services = c.BuildServiceProvider();
+        Assert.NotNull(innerClient.AsBuilder().UseLogging().Build(services).GetService(typeof(LoggingSpeechToTextClient)));
+        Assert.NotNull(innerClient.AsBuilder().UseLogging(null).Build(services).GetService(typeof(LoggingSpeechToTextClient)));
+        Assert.Null(innerClient.AsBuilder().UseLogging(NullLoggerFactory.Instance).Build(services).GetService(typeof(LoggingSpeechToTextClient)));
+    }
+
+    [Theory]
+    [InlineData(LogLevel.Trace)]
+    [InlineData(LogLevel.Debug)]
+    [InlineData(LogLevel.Information)]
+    public async Task GetTextAsync_LogsResponseInvocationAndCompletion(LogLevel level)
+    {
+        var collector = new FakeLogCollector();
+
+        ServiceCollection c = new();
+        c.AddLogging(b => b.AddProvider(new FakeLoggerProvider(collector)).SetMinimumLevel(level));
+        var services = c.BuildServiceProvider();
+
+        using ISpeechToTextClient innerClient = new TestSpeechToTextClient
+        {
+            GetTextAsyncCallback = (audioSpeechStream, options, cancellationToken) =>
+            {
+                return Task.FromResult(new SpeechToTextResponse("blue whale"));
+            },
+        };
+
+        using ISpeechToTextClient client = innerClient
+            .AsBuilder()
+            .UseLogging()
+            .Build(services);
+
+        using var audioSpeechStream = new MemoryStream(new byte[] { 1, 2, 3, 4 });
+        await client.GetTextAsync(
+            audioSpeechStream,
+            new SpeechToTextOptions { SpeechLanguage = "pt" });
+
+        var logs = collector.GetSnapshot();
+        if (level is LogLevel.Trace)
+        {
+            Assert.Collection(logs,
+                entry => Assert.True(entry.Message.Contains($"{nameof(ISpeechToTextClient.GetTextAsync)} invoked:") && entry.Message.Contains("\"speechLanguage\": \"pt\"")),
+                entry => Assert.True(entry.Message.Contains($"{nameof(ISpeechToTextClient.GetTextAsync)} completed:") && entry.Message.Contains("blue whale")));
+        }
+        else if (level is LogLevel.Debug)
+        {
+            Assert.Collection(logs,
+                entry => Assert.True(entry.Message.Contains($"{nameof(ISpeechToTextClient.GetTextAsync)} invoked.") && !entry.Message.Contains("\"speechLanguage\": \"pt\"")),
+                entry => Assert.True(entry.Message.Contains($"{nameof(ISpeechToTextClient.GetTextAsync)} completed.") && !entry.Message.Contains("blue whale")));
+        }
+        else
+        {
+            Assert.Empty(logs);
+        }
+    }
+
+    [Theory]
+    [InlineData(LogLevel.Trace)]
+    [InlineData(LogLevel.Debug)]
+    [InlineData(LogLevel.Information)]
+    public async Task GetStreamingTextAsync_LogsUpdateReceived(LogLevel level)
+    {
+        var collector = new FakeLogCollector();
+        using ILoggerFactory loggerFactory = LoggerFactory.Create(b => b.AddProvider(new FakeLoggerProvider(collector)).SetMinimumLevel(level));
+
+        using ISpeechToTextClient innerClient = new TestSpeechToTextClient
+        {
+            GetStreamingTextAsyncCallback = (audioSpeechStream, options, cancellationToken) => GetUpdatesAsync()
+        };
+
+        static async IAsyncEnumerable<SpeechToTextResponseUpdate> GetUpdatesAsync()
+        {
+            await Task.Yield();
+            yield return new SpeechToTextResponseUpdate("blue ");
+            yield return new SpeechToTextResponseUpdate("whale");
+        }
+
+        using ISpeechToTextClient client = innerClient
+            .AsBuilder()
+            .UseLogging(loggerFactory)
+            .Build();
+
+        using var audioSpeechStream = new MemoryStream(new byte[] { 1, 2, 3, 4 });
+        await foreach (var update in client.GetStreamingTextAsync(
+            audioSpeechStream,
+            new SpeechToTextOptions { SpeechLanguage = "pt" }))
+        {
+            // nop
+        }
+
+        var logs = collector.GetSnapshot();
+        if (level is LogLevel.Trace)
+        {
+            Assert.Collection(logs,
+                entry => Assert.True(entry.Message.Contains($"{nameof(ISpeechToTextClient.GetStreamingTextAsync)} invoked:") && entry.Message.Contains("\"speechLanguage\": \"pt\"")),
+                entry => Assert.True(entry.Message.Contains($"{nameof(ISpeechToTextClient.GetStreamingTextAsync)} received update:") && entry.Message.Contains("blue")),
+                entry => Assert.True(entry.Message.Contains($"{nameof(ISpeechToTextClient.GetStreamingTextAsync)} received update:") && entry.Message.Contains("whale")),
+                entry => Assert.Contains($"{nameof(ISpeechToTextClient.GetStreamingTextAsync)} completed.", entry.Message));
+        }
+        else if (level is LogLevel.Debug)
+        {
+            Assert.Collection(logs,
+                entry => Assert.True(entry.Message.Contains($"{nameof(ISpeechToTextClient.GetStreamingTextAsync)} invoked.") && !entry.Message.Contains("speechLanguage")),
+                entry => Assert.True(entry.Message.Contains($"{nameof(ISpeechToTextClient.GetStreamingTextAsync)} received update.") && !entry.Message.Contains("blue")),
+                entry => Assert.True(entry.Message.Contains($"{nameof(ISpeechToTextClient.GetStreamingTextAsync)} received update.") && !entry.Message.Contains("whale")),
+                entry => Assert.Contains($"{nameof(ISpeechToTextClient.GetStreamingTextAsync)} completed.", entry.Message));
+        }
+        else
+        {
+            Assert.Empty(logs);
+        }
+    }
+}
diff --git a/test/Libraries/Microsoft.Extensions.AI.Tests/SpeechToText/SingletonSpeechToTextClientExtensions.cs b/test/Libraries/Microsoft.Extensions.AI.Tests/SpeechToText/SingletonSpeechToTextClientExtensions.cs
new file mode 100644
index 00000000000..5fc038f8147
--- /dev/null
+++ b/test/Libraries/Microsoft.Extensions.AI.Tests/SpeechToText/SingletonSpeechToTextClientExtensions.cs
@@ -0,0 +1,11 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+namespace Microsoft.Extensions.AI;
+
+public static class SingletonSpeechToTextClientExtensions
+{
+    public static SpeechToTextClientBuilder UseSingletonMiddleware(this SpeechToTextClientBuilder builder)
+        => builder.Use((inner, services)
+            => new SpeechToTextClientDependencyInjectionPatterns.SingletonMiddleware(inner, services));
+}
diff --git a/test/Libraries/Microsoft.Extensions.AI.Tests/SpeechToText/SpeechToTextClientDependencyInjectionPatterns.cs b/test/Libraries/Microsoft.Extensions.AI.Tests/SpeechToText/SpeechToTextClientDependencyInjectionPatterns.cs
new file mode 100644
index 00000000000..07596a1bb6f
--- /dev/null
+++ b/test/Libraries/Microsoft.Extensions.AI.Tests/SpeechToText/SpeechToTextClientDependencyInjectionPatterns.cs
@@ -0,0 +1,162 @@
+﻿// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using Microsoft.Extensions.DependencyInjection;
+using Xunit;
+
+namespace Microsoft.Extensions.AI;
+
+public class SpeechToTextClientDependencyInjectionPatterns
+{
+    private IServiceCollection ServiceCollection { get; } = new ServiceCollection();
+
+    [Fact]
+    public void CanRegisterSingletonUsingFactory()
+    {
+        // Arrange/Act
+        ServiceCollection.AddSpeechToTextClient(services => new TestSpeechToTextClient { Services = services })
+            .UseSingletonMiddleware();
+
+        // Assert
+        var services = ServiceCollection.BuildServiceProvider();
+        using var scope1 = services.CreateScope();
+        using var scope2 = services.CreateScope();
+
+        var instance1 = scope1.ServiceProvider.GetRequiredService<ISpeechToTextClient>();
+        var instance1Copy = scope1.ServiceProvider.GetRequiredService<ISpeechToTextClient>();
+        var instance2 = scope2.ServiceProvider.GetRequiredService<ISpeechToTextClient>();
+
+        // Each scope gets the same instance, because it's singleton
+        var instance = Assert.IsType<SingletonMiddleware>(instance1);
+        Assert.Same(instance, instance1Copy);
+        Assert.Same(instance, instance2);
+        Assert.IsType<TestSpeechToTextClient>(instance.InnerClient);
+    }
+
+    [Fact]
+    public void CanRegisterSingletonUsingSharedInstance()
+    {
+        // Arrange/Act
+        using var singleton = new TestSpeechToTextClient();
+        ServiceCollection.AddSpeechToTextClient(singleton)
+            .UseSingletonMiddleware();
+
+        // Assert
+        var services = ServiceCollection.BuildServiceProvider();
+        using var scope1 = services.CreateScope();
+        using var scope2 = services.CreateScope();
+
+        var instance1 = scope1.ServiceProvider.GetRequiredService<ISpeechToTextClient>();
+        var instance1Copy = scope1.ServiceProvider.GetRequiredService<ISpeechToTextClient>();
+        var instance2 = scope2.ServiceProvider.GetRequiredService<ISpeechToTextClient>();
+
+        // Each scope gets the same instance, because it's singleton
+        var instance = Assert.IsType<SingletonMiddleware>(instance1);
+        Assert.Same(instance, instance1Copy);
+        Assert.Same(instance, instance2);
+        Assert.IsType<TestSpeechToTextClient>(instance.InnerClient);
+    }
+
+    [Fact]
+    public void CanRegisterKeyedSingletonUsingFactory()
+    {
+        // Arrange/Act
+        ServiceCollection.AddKeyedSpeechToTextClient("mykey", services => new TestSpeechToTextClient { Services = services })
+            .UseSingletonMiddleware();
+
+        // Assert
+        var services = ServiceCollection.BuildServiceProvider();
+        using var scope1 = services.CreateScope();
+        using var scope2 = services.CreateScope();
+
+        Assert.Null(services.GetService<ISpeechToTextClient>());
+
+        var instance1 = scope1.ServiceProvider.GetRequiredKeyedService<ISpeechToTextClient>("mykey");
+        var instance1Copy = scope1.ServiceProvider.GetRequiredKeyedService<ISpeechToTextClient>("mykey");
+        var instance2 = scope2.ServiceProvider.GetRequiredKeyedService<ISpeechToTextClient>("mykey");
+
+        // Each scope gets the same instance, because it's singleton
+        var instance = Assert.IsType<SingletonMiddleware>(instance1);
+        Assert.Same(instance, instance1Copy);
+        Assert.Same(instance, instance2);
+        Assert.IsType<TestSpeechToTextClient>(instance.InnerClient);
+    }
+
+    [Fact]
+    public void CanRegisterKeyedSingletonUsingSharedInstance()
+    {
+        // Arrange/Act
+        using var singleton = new TestSpeechToTextClient();
+        ServiceCollection.AddKeyedSpeechToTextClient("mykey", singleton)
+            .UseSingletonMiddleware();
+
+        // Assert
+        var services = ServiceCollection.BuildServiceProvider();
+        using var scope1 = services.CreateScope();
+        using var scope2 = services.CreateScope();
+
+        Assert.Null(services.GetService<ISpeechToTextClient>());
+
+        var instance1 = scope1.ServiceProvider.GetRequiredKeyedService<ISpeechToTextClient>("mykey");
+        var instance1Copy = scope1.ServiceProvider.GetRequiredKeyedService<ISpeechToTextClient>("mykey");
+        var instance2 = scope2.ServiceProvider.GetRequiredKeyedService<ISpeechToTextClient>("mykey");
+
+        // Each scope gets the same instance, because it's singleton
+        var instance = Assert.IsType<SingletonMiddleware>(instance1);
+        Assert.Same(instance, instance1Copy);
+        Assert.Same(instance, instance2);
+        Assert.IsType<TestSpeechToTextClient>(instance.InnerClient);
+    }
+
+    [Theory]
+    [InlineData(null)]
+    [InlineData(ServiceLifetime.Singleton)]
+    [InlineData(ServiceLifetime.Scoped)]
+    [InlineData(ServiceLifetime.Transient)]
+    public void AddSpeechToTextClient_RegistersExpectedLifetime(ServiceLifetime? lifetime)
+    {
+        ServiceCollection sc = new();
+        ServiceLifetime expectedLifetime = lifetime ?? ServiceLifetime.Singleton;
+        SpeechToTextClientBuilder builder = lifetime.HasValue
+            ? sc.AddSpeechToTextClient(services => new TestSpeechToTextClient(), lifetime.Value)
+            : sc.AddSpeechToTextClient(services => new TestSpeechToTextClient());
+
+        ServiceDescriptor sd = Assert.Single(sc);
+        Assert.Equal(typeof(ISpeechToTextClient), sd.ServiceType);
+        Assert.False(sd.IsKeyedService);
+        Assert.Null(sd.ImplementationInstance);
+        Assert.NotNull(sd.ImplementationFactory);
+        Assert.IsType<TestSpeechToTextClient>(sd.ImplementationFactory(null!));
+        Assert.Equal(expectedLifetime, sd.Lifetime);
+    }
+
+    [Theory]
+    [InlineData(null)]
+    [InlineData(ServiceLifetime.Singleton)]
+    [InlineData(ServiceLifetime.Scoped)]
+    [InlineData(ServiceLifetime.Transient)]
+    public void AddKeyedSpeechToTextClient_RegistersExpectedLifetime(ServiceLifetime? lifetime)
+    {
+        ServiceCollection sc = new();
+        ServiceLifetime expectedLifetime = lifetime ?? ServiceLifetime.Singleton;
+        SpeechToTextClientBuilder builder = lifetime.HasValue
+            ? sc.AddKeyedSpeechToTextClient("key", services => new TestSpeechToTextClient(), lifetime.Value)
+            : sc.AddKeyedSpeechToTextClient("key", services => new TestSpeechToTextClient());
+
+        ServiceDescriptor sd = Assert.Single(sc);
+        Assert.Equal(typeof(ISpeechToTextClient), sd.ServiceType);
+        Assert.True(sd.IsKeyedService);
+        Assert.Equal("key", sd.ServiceKey);
+        Assert.Null(sd.KeyedImplementationInstance);
+        Assert.NotNull(sd.KeyedImplementationFactory);
+        Assert.IsType<TestSpeechToTextClient>(sd.KeyedImplementationFactory(null!, null!));
+        Assert.Equal(expectedLifetime, sd.Lifetime);
+    }
+
+    public class SingletonMiddleware(ISpeechToTextClient inner, IServiceProvider services) : DelegatingSpeechToTextClient(inner)
+    {
+        public new ISpeechToTextClient InnerClient => base.InnerClient;
+        public IServiceProvider Services => services;
+    }
+}