diff --git a/dotnet/Directory.Packages.props b/dotnet/Directory.Packages.props index 481e23b8083f..97f22b43616d 100644 --- a/dotnet/Directory.Packages.props +++ b/dotnet/Directory.Packages.props @@ -5,6 +5,7 @@ true + diff --git a/dotnet/SK-dotnet.sln b/dotnet/SK-dotnet.sln index 7887f2f92605..b52ad142288e 100644 --- a/dotnet/SK-dotnet.sln +++ b/dotnet/SK-dotnet.sln @@ -311,6 +311,10 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Connectors.Memory.SqlServer EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "CodeInterpreterPlugin", "samples\Demos\CodeInterpreterPlugin\CodeInterpreterPlugin.csproj", "{3ED53702-0E53-473A-A0F4-645DB33541C2}" EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Connectors.AssemblyAI", "src\Connectors\Connectors.AssemblyAI\Connectors.AssemblyAI.csproj", "{3560310D-8E51-42EA-BC8F-D73F1EF52318}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Connectors.AssemblyAI.UnitTests", "src\Connectors\Connectors.AssemblyAI.UnitTests\Connectors.AssemblyAI.UnitTests.csproj", "{CF31162C-DAA8-497A-9088-0FCECE46439B}" +EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "QualityCheckWithFilters", "samples\Demos\QualityCheck\QualityCheckWithFilters\QualityCheckWithFilters.csproj", "{1D3EEB5B-0E06-4700-80D5-164956E43D0A}" EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TimePlugin", "samples\Demos\TimePlugin\TimePlugin.csproj", "{F312FCE1-12D7-4DEF-BC29-2FF6618509F3}" @@ -695,6 +699,18 @@ Global {1F96837A-61EC-4C8F-904A-07BEBD05FDEE}.Publish|Any CPU.Build.0 = Debug|Any CPU {1F96837A-61EC-4C8F-904A-07BEBD05FDEE}.Release|Any CPU.ActiveCfg = Release|Any CPU {1F96837A-61EC-4C8F-904A-07BEBD05FDEE}.Release|Any CPU.Build.0 = Release|Any CPU + {3560310D-8E51-42EA-BC8F-D73F1EF52318}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {3560310D-8E51-42EA-BC8F-D73F1EF52318}.Debug|Any CPU.Build.0 = Debug|Any CPU + {3560310D-8E51-42EA-BC8F-D73F1EF52318}.Publish|Any CPU.ActiveCfg = Publish|Any CPU + {3560310D-8E51-42EA-BC8F-D73F1EF52318}.Publish|Any CPU.Build.0 = Publish|Any CPU + {3560310D-8E51-42EA-BC8F-D73F1EF52318}.Release|Any CPU.ActiveCfg = Release|Any CPU + {3560310D-8E51-42EA-BC8F-D73F1EF52318}.Release|Any CPU.Build.0 = Release|Any CPU + {CF31162C-DAA8-497A-9088-0FCECE46439B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {CF31162C-DAA8-497A-9088-0FCECE46439B}.Debug|Any CPU.Build.0 = Debug|Any CPU + {CF31162C-DAA8-497A-9088-0FCECE46439B}.Publish|Any CPU.ActiveCfg = Debug|Any CPU + {CF31162C-DAA8-497A-9088-0FCECE46439B}.Publish|Any CPU.Build.0 = Debug|Any CPU + {CF31162C-DAA8-497A-9088-0FCECE46439B}.Release|Any CPU.ActiveCfg = Release|Any CPU + {CF31162C-DAA8-497A-9088-0FCECE46439B}.Release|Any CPU.Build.0 = Release|Any CPU {14461919-E88D-49A9-BE8C-DF704CB79122}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {14461919-E88D-49A9-BE8C-DF704CB79122}.Debug|Any CPU.Build.0 = Debug|Any CPU {14461919-E88D-49A9-BE8C-DF704CB79122}.Publish|Any CPU.ActiveCfg = Publish|Any CPU @@ -1206,6 +1222,8 @@ Global {607DD6FA-FA0D-45E6-80BA-22A373609E89} = {5C246969-D794-4EC3-8E8F-F90D4D166420} {BCDD5B96-CCC3-46B9-8217-89CD5885F6A2} = {5A7028A7-4DDF-4E4F-84A9-37CE8F8D7E89} {1F96837A-61EC-4C8F-904A-07BEBD05FDEE} = {1B4CBDE0-10C2-4E7D-9CD0-FE7586C96ED1} + {3560310D-8E51-42EA-BC8F-D73F1EF52318} = {1B4CBDE0-10C2-4E7D-9CD0-FE7586C96ED1} + {CF31162C-DAA8-497A-9088-0FCECE46439B} = {1B4CBDE0-10C2-4E7D-9CD0-FE7586C96ED1} {14461919-E88D-49A9-BE8C-DF704CB79122} = {1B4CBDE0-10C2-4E7D-9CD0-FE7586C96ED1} {47DB70C3-A659-49EE-BD0F-BF5F0E0ECE05} = {1B4CBDE0-10C2-4E7D-9CD0-FE7586C96ED1} {6578D31B-2CF3-4FF4-A845-7A0412FEB42E} = {1B4CBDE0-10C2-4E7D-9CD0-FE7586C96ED1} diff --git a/dotnet/SK-dotnet.sln.DotSettings b/dotnet/SK-dotnet.sln.DotSettings index d8964e230315..d7a9c6622b01 100644 --- a/dotnet/SK-dotnet.sln.DotSettings +++ b/dotnet/SK-dotnet.sln.DotSettings @@ -162,7 +162,7 @@ False TRACE 8201 - Automatic + True True False diff --git a/dotnet/docs/EXPERIMENTS.md b/dotnet/docs/EXPERIMENTS.md index f03f831e5847..900c7b9e2052 100644 --- a/dotnet/docs/EXPERIMENTS.md +++ b/dotnet/docs/EXPERIMENTS.md @@ -12,7 +12,7 @@ You can use the following diagnostic IDs to ignore warnings or errors for a part ## Experimental Feature Codes -| SKEXP​ | Experimental Features Category​​ | +| SKEXP | Experimental Features Category | |-------|--------------------------------| | SKEXP0001 | Semantic Kernel core features | | SKEXP0010 | OpenAI and Azure OpenAI services | @@ -28,21 +28,21 @@ You can use the following diagnostic IDs to ignore warnings or errors for a part ## Experimental Features Tracking -| SKEXP​ | Features​​ | -|-------|----------| -| SKEXP0001 | Embedding services | -| SKEXP0001 | Image services | -| SKEXP0001 | Memory connectors | -| SKEXP0001 | Kernel filters | -| SKEXP0001 | Audio services | -| | | | | | | | +| SKEXP | Features | +|-------|-------------------------------------| +| SKEXP0001 | Embedding services | +| SKEXP0001 | Image services | +| SKEXP0001 | Memory connectors | +| SKEXP0001 | Kernel filters | +| SKEXP0001 | Audio services | +| | | | SKEXP0010 | Azure OpenAI with your data service | | SKEXP0010 | OpenAI embedding service | | SKEXP0010 | OpenAI image service | | SKEXP0010 | OpenAI parameters | | SKEXP0010 | OpenAI chat history extension | | SKEXP0010 | OpenAI file service | -| | | | | | | | +| | | | SKEXP0020 | Azure AI Search memory connector | | SKEXP0020 | Chroma memory connector | | SKEXP0020 | DuckDB memory connector | @@ -55,35 +55,36 @@ You can use the following diagnostic IDs to ignore warnings or errors for a part | SKEXP0020 | MongoDB memory connector | | SKEXP0020 | Pinecone memory connector | | SKEXP0020 | Postgres memory connector | -| | | | | | | | +| | | | SKEXP0040 | GRPC functions | | SKEXP0040 | Markdown functions | | SKEXP0040 | OpenAPI functions | | SKEXP0040 | OpenAPI function extensions | | SKEXP0040 | Prompty Format support | -| | | | | | | | +| | | | SKEXP0050 | Core plugins | | SKEXP0050 | Document plugins | | SKEXP0050 | Memory plugins | | SKEXP0050 | Microsoft 365 plugins | | SKEXP0050 | Web plugins | | SKEXP0050 | Text chunker plugin | -| | | | | | | | +| | | | SKEXP0060 | Handlebars planner | | SKEXP0060 | OpenAI Stepwise planner | -| | | | | | | | -| SKEXP0070 | Ollama AI connector | | | | | | -| SKEXP0070 | Gemini AI connector | | | | | | -| SKEXP0070 | Mistral AI connector | | | | | | -| SKEXP0070 | ONNX AI connector | | | | | | -| SKEXP0070 | Hugging Face AI connector | | | | | | -| SKEXP0070 | Amazon AI connector | | | | | | -| | | | | | | | +| | | +| SKEXP0070 | Ollama AI connector | +| SKEXP0070 | Gemini AI connector | +| SKEXP0070 | Mistral AI connector | +| SKEXP0070 | ONNX AI connector | +| SKEXP0070 | Hugging Face AI connector | +| SKEXP0070 | Assembly AI connector | +| SKEXP0070 | Amazon AI connector | +| | | | SKEXP0080 | Process Framework | -| | | | | | | | +| | | | SKEXP0101 | Experiment with Assistants | | SKEXP0101 | Experiment with Flow Orchestration | -| | | | | | | | +| | | | SKEXP0110 | Agent Framework | -| | | | | | | | -| SKEXP0120 | Native-AOT | \ No newline at end of file +| | | +| SKEXP0120 | Native-AOT | diff --git a/dotnet/src/Connectors/Connectors.AssemblyAI.UnitTests/AssemblyAIServiceCollectionExtensionsTests.cs b/dotnet/src/Connectors/Connectors.AssemblyAI.UnitTests/AssemblyAIServiceCollectionExtensionsTests.cs new file mode 100644 index 000000000000..bc6e3f7562c5 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.AssemblyAI.UnitTests/AssemblyAIServiceCollectionExtensionsTests.cs @@ -0,0 +1,63 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Net.Http; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.SemanticKernel; +using Microsoft.SemanticKernel.AudioToText; +using Microsoft.SemanticKernel.Connectors.AssemblyAI; +using Xunit; + +namespace SemanticKernel.Connectors.AssemblyAI.UnitTests; + +/// +/// Unit tests for class. +/// +public sealed class AssemblyAIServiceCollectionExtensionsTests +{ + private const string ApiKey = "Test123"; + private const string Endpoint = "http://localhost:1234/"; + private const string ServiceId = "AssemblyAI"; + + [Fact] + public void AddServiceToKernelBuilder() + { + // Arrange & Act + using var httpClient = new HttpClient(); + var kernel = Kernel.CreateBuilder() + .AddAssemblyAIAudioToText( + apiKey: ApiKey, + endpoint: new Uri(Endpoint), + serviceId: ServiceId, + httpClient: httpClient + ) + .Build(); + + // Assert + var service = kernel.GetRequiredService(); + Assert.NotNull(service); + Assert.IsType(service); + + service = kernel.GetRequiredService(ServiceId); + Assert.NotNull(service); + Assert.IsType(service); + } + + [Fact] + public void AddServiceToServiceCollection() + { + // Arrange & Act + var services = new ServiceCollection(); + services.AddAssemblyAIAudioToText( + apiKey: ApiKey, + endpoint: new Uri(Endpoint), + serviceId: ServiceId + ); + using var provider = services.BuildServiceProvider(); + + // Assert + var service = provider.GetRequiredKeyedService(ServiceId); + Assert.NotNull(service); + Assert.IsType(service); + } +} diff --git a/dotnet/src/Connectors/Connectors.AssemblyAI.UnitTests/Connectors.AssemblyAI.UnitTests.csproj b/dotnet/src/Connectors/Connectors.AssemblyAI.UnitTests/Connectors.AssemblyAI.UnitTests.csproj new file mode 100644 index 000000000000..24efde4ba2ae --- /dev/null +++ b/dotnet/src/Connectors/Connectors.AssemblyAI.UnitTests/Connectors.AssemblyAI.UnitTests.csproj @@ -0,0 +1,47 @@ + + + + SemanticKernel.Connectors.AssemblyAI.UnitTests + SemanticKernel.Connectors.AssemblyAI.UnitTests + net8.0 + 12 + LatestMajor + true + enable + disable + false + SKEXP0001;SKEXP0070;CS1591 + + + + + + + + + runtime; build; native; contentfiles; analyzers; buildtransitive + all + + + runtime; build; native; contentfiles; analyzers; buildtransitive + all + + + + + + + + + + + + + + Always + + + Always + + + diff --git a/dotnet/src/Connectors/Connectors.AssemblyAI.UnitTests/MultipleHttpMessageHandlerStub.cs b/dotnet/src/Connectors/Connectors.AssemblyAI.UnitTests/MultipleHttpMessageHandlerStub.cs new file mode 100644 index 000000000000..a73ce9290854 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.AssemblyAI.UnitTests/MultipleHttpMessageHandlerStub.cs @@ -0,0 +1,53 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Net.Http; +using System.Net.Http.Headers; +using System.Threading; +using System.Threading.Tasks; + +namespace SemanticKernel.Connectors.AssemblyAI.UnitTests; + +internal sealed class MultipleHttpMessageHandlerStub : DelegatingHandler +{ + private int _callIteration = 0; + + public List RequestHeaders { get; private set; } + + public List ContentHeaders { get; private set; } + + public List RequestContents { get; private set; } + + public List RequestUris { get; private set; } + + public List Methods { get; private set; } + + public List ResponsesToReturn { get; set; } + + public MultipleHttpMessageHandlerStub() + { + this.RequestHeaders = []; + this.ContentHeaders = []; + this.RequestContents = []; + this.RequestUris = []; + this.Methods = []; + this.ResponsesToReturn = []; + } + + protected override async Task SendAsync(HttpRequestMessage request, CancellationToken cancellationToken) + { + this._callIteration++; + + this.Methods.Add(request.Method); + this.RequestUris.Add(request.RequestUri); + this.RequestHeaders.Add(request.Headers); + this.ContentHeaders.Add(request.Content?.Headers); + + var content = request.Content == null ? null : await request.Content.ReadAsByteArrayAsync(cancellationToken).ConfigureAwait(false); + + this.RequestContents.Add(content); + + return await Task.FromResult(this.ResponsesToReturn[this._callIteration - 1]).ConfigureAwait(false); + } +} diff --git a/dotnet/src/Connectors/Connectors.AssemblyAI.UnitTests/Services/AssemblyAIAudioToTextServiceTests.cs b/dotnet/src/Connectors/Connectors.AssemblyAI.UnitTests/Services/AssemblyAIAudioToTextServiceTests.cs new file mode 100644 index 000000000000..114ae59295b7 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.AssemblyAI.UnitTests/Services/AssemblyAIAudioToTextServiceTests.cs @@ -0,0 +1,158 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.IO; +using System.Net.Http; +using System.Text; +using System.Text.Json; +using System.Threading.Tasks; +using Microsoft.SemanticKernel; +using Microsoft.SemanticKernel.Connectors.AssemblyAI; +using Xunit; + +namespace SemanticKernel.Connectors.AssemblyAI.UnitTests; + +/// +/// Unit tests for class. +/// +public sealed class AssemblyAIAudioToTextServiceTests : IDisposable +{ + private const string ExpectedTranscriptText = "Test audio-to-text response"; + private static string UploadFileResponseContent { get; set; } + private static string TranscriptGuid { get; set; } + private static string CreateTranscriptResponseContent { get; set; } + private static string TranscriptCompletedResponseContent { get; set; } + + private readonly MultipleHttpMessageHandlerStub _messageHandlerStub; + private readonly HttpClient _httpClient; + + static AssemblyAIAudioToTextServiceTests() + { + UploadFileResponseContent = File.ReadAllText("./TestData/upload_file_response.json"); + CreateTranscriptResponseContent = File.ReadAllText("./TestData/create_transcript_response.json"); + TranscriptCompletedResponseContent = File.ReadAllText("./TestData/transcript_completed_response.json"); + var json = JsonSerializer.Deserialize(CreateTranscriptResponseContent); + TranscriptGuid = json.GetProperty("id").GetString()!; + } + + public AssemblyAIAudioToTextServiceTests() + { + this._messageHandlerStub = new MultipleHttpMessageHandlerStub(); + this._httpClient = new HttpClient(this._messageHandlerStub, false); + } + + [Fact] + public void ConstructorWithHttpClientWorksCorrectly() + { + // Arrange & Act + var service = new AssemblyAIAudioToTextService("api-key", httpClient: this._httpClient); + + // Assert + Assert.NotNull(service); + } + + [Fact] + public async Task GetTextContentByDefaultWorksCorrectlyAsync() + { + // Arrange + var service = new AssemblyAIAudioToTextService("api-key", httpClient: this._httpClient); + using var uploadFileResponse = new HttpResponseMessage(System.Net.HttpStatusCode.OK); + uploadFileResponse.Content = new StringContent(UploadFileResponseContent); + using var transcribeResponse = new HttpResponseMessage(System.Net.HttpStatusCode.OK); + transcribeResponse.Content = new StringContent(CreateTranscriptResponseContent); + using var transcribedResponse = new HttpResponseMessage(System.Net.HttpStatusCode.OK); + transcribedResponse.Content = new StringContent(TranscriptCompletedResponseContent); + this._messageHandlerStub.ResponsesToReturn = + [ + uploadFileResponse, + transcribeResponse, + transcribedResponse + ]; + + // Act + var result = await service.GetTextContentsAsync( + new AudioContent(new BinaryData("data").ToMemory(), null) + ).ConfigureAwait(true); + + // Assert + Assert.NotNull(result); + Assert.Single(result); + Assert.Equal(ExpectedTranscriptText, result[0].Text); + } + + [Fact] + public async Task GetTextContentByUrlWorksCorrectlyAsync() + { + // Arrange + var service = new AssemblyAIAudioToTextService("api-key", httpClient: this._httpClient); + using var transcribeResponse = new HttpResponseMessage(System.Net.HttpStatusCode.OK); + transcribeResponse.Content = new StringContent(CreateTranscriptResponseContent); + using var transcribedResponse = new HttpResponseMessage(System.Net.HttpStatusCode.OK); + transcribedResponse.Content = new StringContent(TranscriptCompletedResponseContent); + this._messageHandlerStub.ResponsesToReturn = [transcribeResponse, transcribedResponse]; + + // Act + var result = await service.GetTextContentsAsync( + new AudioContent(new Uri("https://storage.googleapis.com/aai-docs-samples/nbc.mp3")) + ).ConfigureAwait(true); + + // Assert + Assert.NotNull(result); + Assert.Single(result); + Assert.Equal(ExpectedTranscriptText, result[0].Text); + } + + [Fact] + public async Task HttpErrorShouldThrowWithErrorMessageAsync() + { + // Arrange + var service = new AssemblyAIAudioToTextService("api-key", httpClient: this._httpClient); + using var uploadFileResponse = new HttpResponseMessage(System.Net.HttpStatusCode.InternalServerError); + this._messageHandlerStub.ResponsesToReturn = + [ + uploadFileResponse + ]; + + // Act & Assert + await Assert.ThrowsAsync( + async () => await service.GetTextContentsAsync( + new AudioContent(new BinaryData("data").ToMemory(), null) + ).ConfigureAwait(true) + ).ConfigureAwait(true); + } + + [Fact] + public async Task JsonErrorShouldThrowWithErrorMessageAsync() + { + // Arrange + var service = new AssemblyAIAudioToTextService("api-key", httpClient: this._httpClient); + using var uploadFileResponse = new HttpResponseMessage(System.Net.HttpStatusCode.Unauthorized); + const string ErrorMessage = "Bad API key"; + uploadFileResponse.Content = new StringContent( + $$""" + { + "error": "{{ErrorMessage}}" + } + """, + Encoding.UTF8, + "application/json" + ); + this._messageHandlerStub.ResponsesToReturn = + [ + uploadFileResponse + ]; + + // Act & Assert + await Assert.ThrowsAsync( + async () => await service.GetTextContentsAsync( + new AudioContent(new BinaryData("data").ToMemory(), null) + ).ConfigureAwait(true) + ).ConfigureAwait(true); + } + + public void Dispose() + { + this._httpClient.Dispose(); + this._messageHandlerStub.Dispose(); + } +} diff --git a/dotnet/src/Connectors/Connectors.AssemblyAI.UnitTests/TestData/create_transcript_response.json b/dotnet/src/Connectors/Connectors.AssemblyAI.UnitTests/TestData/create_transcript_response.json new file mode 100644 index 000000000000..41a7d83fa26b --- /dev/null +++ b/dotnet/src/Connectors/Connectors.AssemblyAI.UnitTests/TestData/create_transcript_response.json @@ -0,0 +1,63 @@ +{ + "id": "ce522f10-d204-42e8-a838-6b95098145cc", + "language_model": "assemblyai_default", + "acoustic_model": "assemblyai_default", + "language_code": "en_us", + "status": "queued", + "audio_url": "http://localhost/path/to/file.mp3", + "text": null, + "words": null, + "utterances": null, + "confidence": null, + "audio_duration": null, + "punctuate": true, + "format_text": true, + "dual_channel": null, + "webhook_url": null, + "webhook_status_code": null, + "webhook_auth": false, + "webhook_auth_header_name": null, + "speed_boost": false, + "auto_highlights_result": null, + "auto_highlights": false, + "audio_start_from": null, + "audio_end_at": null, + "word_boost": [], + "boost_param": null, + "filter_profanity": false, + "redact_pii": false, + "redact_pii_audio": false, + "redact_pii_audio_quality": null, + "redact_pii_policies": null, + "redact_pii_sub": null, + "speaker_labels": false, + "content_safety": false, + "iab_categories": false, + "content_safety_labels": {}, + "iab_categories_result": {}, + "language_detection": false, + "language_confidence_threshold": null, + "language_confidence": null, + "custom_spelling": null, + "throttled": false, + "auto_chapters": false, + "summarization": false, + "summary_type": null, + "summary_model": null, + "custom_topics": false, + "topics": [], + "speech_threshold": null, + "speech_model": null, + "chapters": null, + "disfluencies": false, + "entity_detection": false, + "sentiment_analysis": false, + "sentiment_analysis_results": null, + "entities": null, + "speakers_expected": null, + "summary": null, + "custom_topics_results": null, + "is_deleted": null, + "multichannel": false, + "audio_channels": null +} \ No newline at end of file diff --git a/dotnet/src/Connectors/Connectors.AssemblyAI.UnitTests/TestData/transcript_completed_response.json b/dotnet/src/Connectors/Connectors.AssemblyAI.UnitTests/TestData/transcript_completed_response.json new file mode 100644 index 000000000000..f91de188a1d3 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.AssemblyAI.UnitTests/TestData/transcript_completed_response.json @@ -0,0 +1,86 @@ +{ + "id": "ce522f10-d204-42e8-a838-6b95098145cc", + "language_model": "assemblyai_default", + "acoustic_model": "assemblyai_default", + "language_code": "en_us", + "status": "completed", + "audio_url": "http://localhost/path/to/file.mp3", + "text": "Test audio-to-text response", + "words": [ + { + "start": 120, + "end": 232, + "text": "The", + "confidence": 0.99, + "speaker": null + }, + { + "start": 232, + "end": 416, + "text": "sun", + "confidence": 0.99973, + "speaker": null + } + ], + "utterances": null, + "confidence": 0.993280869565217, + "audio_duration": 6, + "punctuate": true, + "format_text": true, + "dual_channel": null, + "webhook_url": null, + "webhook_status_code": null, + "webhook_auth": false, + "webhook_auth_header_name": null, + "speed_boost": false, + "auto_highlights_result": null, + "auto_highlights": false, + "audio_start_from": null, + "audio_end_at": null, + "word_boost": [], + "boost_param": null, + "filter_profanity": false, + "redact_pii": false, + "redact_pii_audio": false, + "redact_pii_audio_quality": null, + "redact_pii_policies": null, + "redact_pii_sub": null, + "speaker_labels": false, + "content_safety": false, + "iab_categories": false, + "content_safety_labels": { + "status": "unavailable", + "results": [], + "summary": {} + }, + "iab_categories_result": { + "status": "unavailable", + "results": [], + "summary": {} + }, + "language_detection": false, + "language_confidence_threshold": null, + "language_confidence": null, + "custom_spelling": null, + "throttled": false, + "auto_chapters": false, + "summarization": false, + "summary_type": null, + "summary_model": null, + "custom_topics": false, + "topics": [], + "speech_threshold": null, + "speech_model": null, + "chapters": null, + "disfluencies": false, + "entity_detection": false, + "sentiment_analysis": false, + "sentiment_analysis_results": null, + "entities": null, + "speakers_expected": null, + "summary": null, + "custom_topics_results": null, + "is_deleted": null, + "multichannel": false, + "audio_channels": 1 +} \ No newline at end of file diff --git a/dotnet/src/Connectors/Connectors.AssemblyAI.UnitTests/TestData/upload_file_response.json b/dotnet/src/Connectors/Connectors.AssemblyAI.UnitTests/TestData/upload_file_response.json new file mode 100644 index 000000000000..ad7b31685100 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.AssemblyAI.UnitTests/TestData/upload_file_response.json @@ -0,0 +1,3 @@ +{ + "upload_url": "http://localhost/path/to/file.mp3" +} \ No newline at end of file diff --git a/dotnet/src/Connectors/Connectors.AssemblyAI/AssemblyAIAudioToTextExecutionSettings.cs b/dotnet/src/Connectors/Connectors.AssemblyAI/AssemblyAIAudioToTextExecutionSettings.cs new file mode 100644 index 000000000000..39265c0ad2f5 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.AssemblyAI/AssemblyAIAudioToTextExecutionSettings.cs @@ -0,0 +1,78 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Text.Json.Serialization; +using AssemblyAI.Transcripts; + +namespace Microsoft.SemanticKernel.Connectors.AssemblyAI; + +/// +/// Execution settings for AssemblyAI speech-to-text execution. +/// +public sealed class AssemblyAIAudioToTextExecutionSettings : PromptExecutionSettings +{ + /// + /// The time between each poll for the transcript status, until the status is completed. + /// + [JsonPropertyName("transcript_params")] + public TranscriptOptionalParams? TranscriptParams + { + get => this._transcriptParams; + set + { + this.ThrowIfFrozen(); + this._transcriptParams = value; + } + } + + /// + /// The time between each poll for the transcript status, until the status is completed. Defaults to 3s. + /// + [JsonPropertyName("polling_interval")] + public TimeSpan? PollingInterval + { + get => this._pollingInterval; + set + { + this.ThrowIfFrozen(); + this._pollingInterval = value; + } + } + + /// + /// How long to wait until the timeout exception thrown. Defaults to infinite. + /// + [JsonPropertyName("polling_timeout")] + public TimeSpan? PollingTimeout + { + get => this._pollingTimeout; + set + { + this.ThrowIfFrozen(); + this._pollingTimeout = value; + } + } + + /// + public override PromptExecutionSettings Clone() + { + return new AssemblyAIAudioToTextExecutionSettings + { + ModelId = this.ModelId, + ExtensionData = this.ExtensionData is not null ? new Dictionary(this.ExtensionData) : null, + PollingInterval = this.PollingInterval, + PollingTimeout = this.PollingTimeout, + ServiceId = this.ServiceId, + TranscriptParams = this.TranscriptParams?.Clone() + }; + } + + #region private ================================================================================ + + private TimeSpan? _pollingInterval; + private TimeSpan? _pollingTimeout; + private TranscriptOptionalParams? _transcriptParams; + + #endregion +} diff --git a/dotnet/src/Connectors/Connectors.AssemblyAI/AssemblyAIKernelBuilderExtensions.cs b/dotnet/src/Connectors/Connectors.AssemblyAI/AssemblyAIKernelBuilderExtensions.cs new file mode 100644 index 000000000000..ee55e5e54d5d --- /dev/null +++ b/dotnet/src/Connectors/Connectors.AssemblyAI/AssemblyAIKernelBuilderExtensions.cs @@ -0,0 +1,43 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Net.Http; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.SemanticKernel.AudioToText; +using Microsoft.SemanticKernel.Connectors.AssemblyAI; + +namespace Microsoft.SemanticKernel; + +/// +/// Provides extension methods for and related classes to configure AssemblyAI connectors. +/// +public static class AssemblyAIKernelBuilderExtensions +{ + /// + /// Adds the AssemblyAI audio-to-text service to the kernel. + /// + /// The instance to augment. + /// AssemblyAI API key, get your API key from the dashboard. + /// The endpoint URL to the AssemblyAI API. + /// A local identifier for the given AI service. + /// The HttpClient to use with this service. + /// The same instance as . + public static IKernelBuilder AddAssemblyAIAudioToText( + this IKernelBuilder builder, + string apiKey, + Uri? endpoint = null, + string? serviceId = null, + HttpClient? httpClient = null + ) + { + Verify.NotNull(builder); + + builder.Services.AddKeyedSingleton(serviceId, (_, _) + => new AssemblyAIAudioToTextService( + apiKey, + endpoint, + httpClient)); + + return builder; + } +} diff --git a/dotnet/src/Connectors/Connectors.AssemblyAI/AssemblyAIServiceCollectionExtensions.cs b/dotnet/src/Connectors/Connectors.AssemblyAI/AssemblyAIServiceCollectionExtensions.cs new file mode 100644 index 000000000000..fea3c1e6fc30 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.AssemblyAI/AssemblyAIServiceCollectionExtensions.cs @@ -0,0 +1,42 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.SemanticKernel.AudioToText; +using Microsoft.SemanticKernel.Connectors.AssemblyAI; +using Microsoft.SemanticKernel.Http; + +namespace Microsoft.SemanticKernel; + +/// +/// Provides extension methods for and related classes to configure AssemblyAI connectors. +/// +public static class AssemblyAIServiceCollectionExtensions +{ + /// + /// Adds the AssemblyAI audio-to-text service to the list. + /// + /// The instance to augment. + /// AssemblyAI API key, get your API key from the dashboard. + /// The endpoint URL to the AssemblyAI API. + /// A local identifier for the given AI service. + /// The same instance as . + public static IServiceCollection AddAssemblyAIAudioToText( + this IServiceCollection services, + string apiKey, + Uri? endpoint = null, + string? serviceId = null + ) + { + Verify.NotNull(services); + services.AddKeyedSingleton(serviceId, (serviceProvider, _) + => new AssemblyAIAudioToTextService( + apiKey, + endpoint, + HttpClientProvider.GetHttpClient(serviceProvider) + ) + ); + + return services; + } +} diff --git a/dotnet/src/Connectors/Connectors.AssemblyAI/AssemblyInfo.cs b/dotnet/src/Connectors/Connectors.AssemblyAI/AssemblyInfo.cs new file mode 100644 index 000000000000..fe66371dbc58 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.AssemblyAI/AssemblyInfo.cs @@ -0,0 +1,6 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System.Diagnostics.CodeAnalysis; + +// This assembly is currently experimental. +[assembly: Experimental("SKEXP0070")] diff --git a/dotnet/src/Connectors/Connectors.AssemblyAI/Connectors.AssemblyAI.csproj b/dotnet/src/Connectors/Connectors.AssemblyAI/Connectors.AssemblyAI.csproj new file mode 100644 index 000000000000..6d31aed68f8c --- /dev/null +++ b/dotnet/src/Connectors/Connectors.AssemblyAI/Connectors.AssemblyAI.csproj @@ -0,0 +1,30 @@ + + + + + Microsoft.SemanticKernel.Connectors.AssemblyAI + $(AssemblyName) + netstandard2.0 + true + false + + + + + + + + + Semantic Kernel - AssemblyAI connectors + Semantic Kernel connectors for AssemblyAI's speech AI models. + + + + + + + + + + + diff --git a/dotnet/src/Connectors/Connectors.AssemblyAI/Services/AssemblyAIAudioToTextService.cs b/dotnet/src/Connectors/Connectors.AssemblyAI/Services/AssemblyAIAudioToTextService.cs new file mode 100644 index 000000000000..ee2dc4623aa7 --- /dev/null +++ b/dotnet/src/Connectors/Connectors.AssemblyAI/Services/AssemblyAIAudioToTextService.cs @@ -0,0 +1,191 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.Collections.Generic; +using System.Net.Http; +using System.Text; +using System.Threading; +using System.Threading.Tasks; +using AssemblyAI; +using AssemblyAI.Transcripts; +using Microsoft.Extensions.Logging; +using Microsoft.SemanticKernel.AudioToText; +using Microsoft.SemanticKernel.Http; + +namespace Microsoft.SemanticKernel.Connectors.AssemblyAI; + +/// +/// AssemblyAI speech-to-text service. +/// +public sealed class AssemblyAIAudioToTextService : IAudioToTextService +{ + private readonly AssemblyAIClient _client; + + /// + /// Attributes is not used by AssemblyAIAudioToTextService. + /// + public IReadOnlyDictionary Attributes => new Dictionary(); + + /// + /// Creates an instance of the with an AssemblyAI API key. + /// + /// AssemblyAI API key + /// Optional endpoint uri including the port where AssemblyAI server is hosted + /// Optional HTTP client to be used for communication with the AssemblyAI API. + /// Optional logger factory to be used for logging. + public AssemblyAIAudioToTextService( + string apiKey, + Uri? endpoint = null, + HttpClient? httpClient = null, + ILoggerFactory? loggerFactory = null + ) + { + Verify.NotNullOrWhiteSpace(apiKey); + this._client = new AssemblyAIClient(new ClientOptions + { + ApiKey = apiKey, + BaseUrl = endpoint?.ToString() ?? AssemblyAIClientEnvironment.Default, + HttpClient = HttpClientProvider.GetHttpClient(httpClient), + UserAgent = new UserAgent + { + ["integration"] = new( + HttpHeaderConstant.Values.UserAgent, + HttpHeaderConstant.Values.GetAssemblyVersion(typeof(AssemblyAIAudioToTextService)) + ) + } + }); + } + + /// + public async Task> GetTextContentsAsync( + AudioContent content, + PromptExecutionSettings? executionSettings = null, + Kernel? kernel = null, + CancellationToken cancellationToken = default + ) + { + try + { + Verify.NotNull(content); + + if (executionSettings?.ExtensionData is not null && executionSettings.ExtensionData.Count > 0) + { + throw new ArgumentException("ExtensionData is not supported by AssemblyAI, use AssemblyAIAudioToTextExecutionSettings.TranscriptParams.", nameof(executionSettings)); + } + + string uploadUrl; + if (content.Data is { IsEmpty: false }) + { + try + { + var response = await this._client.Files.UploadAsync( + content.Data.Value, + null, + cancellationToken + ).ConfigureAwait(false); + uploadUrl = response.UploadUrl; + } + catch (ApiException apiException) + { + throw new HttpOperationException( + apiException.StatusCode, + apiException.ResponseContent, + "An API exception occurred while uploading the audio file.", + apiException + ); + } + } + else if (content.Uri is not null) + { + // to prevent unintentional file uploads by injection attack + if (content.Uri.IsFile) + { + throw new ArgumentException("File URI is not supported."); + } + + uploadUrl = content.Uri.ToString(); + } + else + { + throw new ArgumentException("AudioContent doesn't have any content.", nameof(content)); + } + + TimeSpan? pollingInterval = null; + TimeSpan? pollingTimeout = null; + TranscriptOptionalParams? transcriptParams = null; + if (executionSettings is AssemblyAIAudioToTextExecutionSettings aaiExecSettings) + { + pollingInterval = aaiExecSettings.PollingInterval; + pollingTimeout = aaiExecSettings.PollingTimeout; + transcriptParams = aaiExecSettings.TranscriptParams; + } + + Transcript transcript; + try + { + transcript = await this._client.Transcripts.SubmitAsync( + new Uri(uploadUrl), + transcriptParams ?? new TranscriptOptionalParams(), + null, + cancellationToken + ) + .ConfigureAwait(false); + } + catch (ApiException apiException) + { + throw new HttpOperationException( + apiException.StatusCode, + apiException.ResponseContent, + "An API exception occurred while submitting transcript.", + apiException + ); + } + + try + { + transcript = await this._client.Transcripts.WaitUntilReadyAsync( + transcript.Id, + pollingInterval: pollingInterval, + pollingTimeout: pollingTimeout, + cancellationToken: cancellationToken + ).ConfigureAwait(false); + } + catch (ApiException apiException) + { + throw new HttpOperationException( + apiException.StatusCode, + apiException.ResponseContent, + "An API exception occurred while polling transcript until it is ready.", + apiException + ); + } + + try + { + transcript.EnsureStatusCompleted(); + } + catch (TranscriptNotCompletedStatusException exception) + { + throw new KernelException( + "The transcript status is not completed. See inner exception for details.", + exception + ); + } + + return + [ + new TextContent( + text: transcript.Text, + modelId: null, + innerContent: transcript, + encoding: Encoding.UTF8, + metadata: null + ) + ]; + } + catch (HttpRequestException ex) + { + throw new HttpOperationException(message: ex.Message, innerException: ex); + } + } +} diff --git a/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Services/OpenAIAudioToTextServiceTests.cs b/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Services/OpenAIAudioToTextServiceTests.cs index ec415ac8e9f7..c3344ed6b403 100644 --- a/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Services/OpenAIAudioToTextServiceTests.cs +++ b/dotnet/src/Connectors/Connectors.OpenAI.UnitTests/Services/OpenAIAudioToTextServiceTests.cs @@ -36,9 +36,7 @@ public OpenAIAudioToTextServiceTests() public void ConstructorWithApiKeyWorksCorrectly(bool includeLoggerFactory) { // Arrange & Act - var service = includeLoggerFactory ? - new OpenAIAudioToTextService("model-id", "api-key", "organization", loggerFactory: this._mockLoggerFactory.Object) : - new OpenAIAudioToTextService("model-id", "api-key", "organization"); + var service = includeLoggerFactory ? new OpenAIAudioToTextService("model-id", "api-key", "organization", loggerFactory: this._mockLoggerFactory.Object) : new OpenAIAudioToTextService("model-id", "api-key", "organization"); // Assert Assert.NotNull(service); diff --git a/dotnet/src/IntegrationTests/Connectors/AssemblyAI/AssemblyAIAudioToTextTests.cs b/dotnet/src/IntegrationTests/Connectors/AssemblyAI/AssemblyAIAudioToTextTests.cs new file mode 100644 index 000000000000..6db734b54351 --- /dev/null +++ b/dotnet/src/IntegrationTests/Connectors/AssemblyAI/AssemblyAIAudioToTextTests.cs @@ -0,0 +1,190 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using System.IO; +using System.Net.Http; +using System.Threading.Tasks; +using AssemblyAI.Transcripts; +using Microsoft.Extensions.Configuration; +using Microsoft.SemanticKernel; +using Microsoft.SemanticKernel.Connectors.AssemblyAI; +using Xunit; +using Xunit.Abstractions; + +namespace SemanticKernel.IntegrationTests.Connectors.AssemblyAI; + +public sealed class AssemblyAIAudioToTextTests : IDisposable +{ + private readonly RedirectOutput _testOutputHelper; + private readonly IConfigurationRoot _configuration; + + public AssemblyAIAudioToTextTests(ITestOutputHelper output) + { + this._testOutputHelper = new RedirectOutput(output); + Console.SetOut(this._testOutputHelper); + + // Load configuration + this._configuration = new ConfigurationBuilder() + .AddJsonFile(path: "testsettings.json", optional: false, reloadOnChange: true) + .AddJsonFile(path: "testsettings.development.json", optional: true, reloadOnChange: true) + .AddEnvironmentVariables() + .AddUserSecrets() + .Build(); + } + + [Fact] + // [Fact(Skip = "This test is for manual verification.")] + public async Task AssemblyAIAudioToTextTestAsync() + { + // Arrange + using var httpClient = new HttpClient(); + const string Filename = "test_audio.wav"; + + string apiKey = this.GetAssemblyAIApiKey(); + + var service = new AssemblyAIAudioToTextService(apiKey, httpClient: httpClient); + + await using Stream audio = File.OpenRead($"./TestData/{Filename}"); + var audioData = await BinaryData.FromStreamAsync(audio); + + // Act + var result = await service.GetTextContentsAsync(new AudioContent(audioData.ToMemory(), null)); + + // Assert + Console.WriteLine(result[0].Text); + Assert.Contains("The sun rises in the east and sets in the west.", result[0].Text, StringComparison.OrdinalIgnoreCase); + } + + private string GetAssemblyAIApiKey() + { + var apiKey = this._configuration["AssemblyAI:ApiKey"]; + if (string.IsNullOrEmpty(apiKey)) + { + throw new ArgumentException("'AssemblyAI:ApiKey' configuration is required."); + } + + return apiKey; + } + + [Fact] + // [Fact(Skip = "This test is for manual verification.")] + public async Task AssemblyAIAudioToTextWithPollingIntervalTestAsync() + { + // Arrange + using var httpClient = new HttpClient(); + const string Filename = "test_audio.wav"; + + var apiKey = this.GetAssemblyAIApiKey(); + + var service = new AssemblyAIAudioToTextService(apiKey, httpClient: httpClient); + + await using Stream audio = File.OpenRead($"./TestData/{Filename}"); + var audioData = await BinaryData.FromStreamAsync(audio); + + // Act + var result = await service.GetTextContentsAsync( + new AudioContent(audioData.ToMemory(), null), + new AssemblyAIAudioToTextExecutionSettings + { + PollingInterval = TimeSpan.FromMilliseconds(750) + } + ); + + // Assert + Console.WriteLine(result[0].Text); + Assert.Contains("The sun rises in the east and sets in the west.", result[0].Text, StringComparison.OrdinalIgnoreCase); + } + + [Fact] + // [Fact(Skip = "This test is for manual verification.")] + public async Task AssemblyAIAudioToTextWithUriTestAsync() + { + // Arrange + using var httpClient = new HttpClient(); + + var apiKey = this.GetAssemblyAIApiKey(); + + var service = new AssemblyAIAudioToTextService(apiKey, httpClient: httpClient); + + // Act + var result = await service.GetTextContentsAsync( + new AudioContent(new Uri("https://storage.googleapis.com/aai-docs-samples/nbc.mp3")) + ); + + // Assert + Assert.Contains( + "There's the traditional red blue divide you're very familiar with. But there's a lot more below the surface going on in both parties. Let's set the table.", + result[0].Text, + StringComparison.Ordinal + ); + Console.WriteLine(result[0].Text); + } + + [Fact] + // [Fact(Skip = "This test is for manual verification.")] + public async Task AssemblyAIAudioToTextWithFileUriShouldThrowTestAsync() + { + // Arrange + using var httpClient = new HttpClient(); + + var apiKey = this.GetAssemblyAIApiKey(); + + var service = new AssemblyAIAudioToTextService(apiKey, httpClient: httpClient); + + // Act & Assert + await Assert.ThrowsAsync( + async () => await service.GetTextContentsAsync(new AudioContent(new Uri("file://C:/file.mp3"))) + ); + } + + [Fact] + // [Fact(Skip = "This test is for manual verification.")] + public async Task AssemblyAIAudioToTextWithLanguageParamTestAsync() + { + // Arrange + using var httpClient = new HttpClient(); + var apiKey = this.GetAssemblyAIApiKey(); + + var sttService = new AssemblyAIAudioToTextService(apiKey, httpClient: httpClient); + var textExecutionSettings = new AssemblyAIAudioToTextExecutionSettings + { + TranscriptParams = new TranscriptOptionalParams + { + LanguageCode = TranscriptLanguageCode.En + } + }; + + // Act + var result = await sttService.GetTextContentsAsync( + new AudioContent(new Uri("https://storage.googleapis.com/aai-docs-samples/nbc.mp3")), + textExecutionSettings + ); + + // Assert + Console.WriteLine(result[0].Text); + Assert.Contains("The sun rises in the east and sets in the west.", result[0].Text, StringComparison.OrdinalIgnoreCase); + } + + [Fact] + // [Fact(Skip = "This test is for manual verification.")] + public async Task AssemblyAIAudioToTextWithLocalhostBaseAddressShouldThrowAsync() + { + // Arrange + var apiKey = this.GetAssemblyAIApiKey(); + var sttService = new AssemblyAIAudioToTextService(apiKey, new Uri("https://localhost:9999")); + + // Act & Assert + var exception = await Assert.ThrowsAsync( + async () => await sttService.GetTextContentsAsync(new AudioContent(new Uri("http://localhost"))) + ); + Assert.Equal( + "Connection refused (localhost:9999)", + exception.Message + ); + } + + public void Dispose() + { + this._testOutputHelper.Dispose(); + } +} diff --git a/dotnet/src/IntegrationTests/IntegrationTests.csproj b/dotnet/src/IntegrationTests/IntegrationTests.csproj index cf38ef4d5c41..84b50f1753b1 100644 --- a/dotnet/src/IntegrationTests/IntegrationTests.csproj +++ b/dotnet/src/IntegrationTests/IntegrationTests.csproj @@ -66,6 +66,7 @@ +