From c2bb26c7e7b04cbae89bea4bdb119795826bd512 Mon Sep 17 00:00:00 2001 From: Roger Barreto <19890735+RogerBarreto@users.noreply.github.com> Date: Mon, 11 Nov 2024 17:24:15 +0000 Subject: [PATCH 1/4] Adding Oga resource, changing demo --- dotnet/samples/Demos/OnnxSimpleRAG/Program.cs | 3 ++- .../Connectors.Onnx/OnnxRuntimeGenAIChatCompletionService.cs | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/dotnet/samples/Demos/OnnxSimpleRAG/Program.cs b/dotnet/samples/Demos/OnnxSimpleRAG/Program.cs index 9727e600145f..91991cc9756f 100644 --- a/dotnet/samples/Demos/OnnxSimpleRAG/Program.cs +++ b/dotnet/samples/Demos/OnnxSimpleRAG/Program.cs @@ -12,6 +12,7 @@ using Microsoft.SemanticKernel; using Microsoft.SemanticKernel.ChatCompletion; using Microsoft.SemanticKernel.Connectors.InMemory; +using Microsoft.SemanticKernel.Connectors.Onnx; using Microsoft.SemanticKernel.Data; using Microsoft.SemanticKernel.Embeddings; using Microsoft.SemanticKernel.PromptTemplates.Handlebars; @@ -38,7 +39,7 @@ var kernel = builder.Build(); // Get the instances of the services -var chatService = kernel.GetRequiredService(); +using var chatService = kernel.GetRequiredService() as OnnxRuntimeGenAIChatCompletionService; var embeddingService = kernel.GetRequiredService(); // Create a vector store and a collection to store information diff --git a/dotnet/src/Connectors/Connectors.Onnx/OnnxRuntimeGenAIChatCompletionService.cs b/dotnet/src/Connectors/Connectors.Onnx/OnnxRuntimeGenAIChatCompletionService.cs index 8a6210253729..5fc2114f5afa 100644 --- a/dotnet/src/Connectors/Connectors.Onnx/OnnxRuntimeGenAIChatCompletionService.cs +++ b/dotnet/src/Connectors/Connectors.Onnx/OnnxRuntimeGenAIChatCompletionService.cs @@ -25,6 +25,7 @@ public sealed class OnnxRuntimeGenAIChatCompletionService : IChatCompletionServi private readonly JsonSerializerOptions? _jsonSerializerOptions; private Model? _model; private Tokenizer? _tokenizer; + private readonly OgaHandle _ogaHandle = new(); private Dictionary AttributesInternal { get; } = new(); @@ -212,5 +213,6 @@ public void Dispose() { this._tokenizer?.Dispose(); this._model?.Dispose(); + this._ogaHandle.Dispose(); } } From 7dc62b414fc74d1c424aea6d8f6d9e7c4d03ead3 Mon Sep 17 00:00:00 2001 From: Roger Barreto <19890735+RogerBarreto@users.noreply.github.com> Date: Mon, 11 Nov 2024 17:37:01 +0000 Subject: [PATCH 2/4] Update Concepts examples --- .../Concepts/ChatCompletion/Onnx_ChatCompletion.cs | 4 +++- .../ChatCompletion/Onnx_ChatCompletionStreaming.cs | 8 ++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/dotnet/samples/Concepts/ChatCompletion/Onnx_ChatCompletion.cs b/dotnet/samples/Concepts/ChatCompletion/Onnx_ChatCompletion.cs index 563ed3475b5e..a1f5d17306c2 100644 --- a/dotnet/samples/Concepts/ChatCompletion/Onnx_ChatCompletion.cs +++ b/dotnet/samples/Concepts/ChatCompletion/Onnx_ChatCompletion.cs @@ -34,7 +34,7 @@ public async Task ServicePromptAsync() Console.WriteLine("======== Onnx - Chat Completion ========"); - var chatService = new OnnxRuntimeGenAIChatCompletionService( + using var chatService = new OnnxRuntimeGenAIChatCompletionService( modelId: TestConfiguration.Onnx.ModelId, modelPath: TestConfiguration.Onnx.ModelPath); @@ -105,5 +105,7 @@ public async Task ChatPromptAsync() reply = await kernel.InvokePromptAsync(chatPrompt.ToString()); Console.WriteLine(reply); + + (kernel.GetRequiredService() as IDisposable)?.Dispose(); } } diff --git a/dotnet/samples/Concepts/ChatCompletion/Onnx_ChatCompletionStreaming.cs b/dotnet/samples/Concepts/ChatCompletion/Onnx_ChatCompletionStreaming.cs index d6ad1f05e7f2..d282d02160d1 100644 --- a/dotnet/samples/Concepts/ChatCompletion/Onnx_ChatCompletionStreaming.cs +++ b/dotnet/samples/Concepts/ChatCompletion/Onnx_ChatCompletionStreaming.cs @@ -86,6 +86,8 @@ public async Task StreamChatPromptAsync() reply = await StreamMessageOutputFromKernelAsync(kernel, chatPrompt.ToString()); Console.WriteLine(reply); + + (kernel.GetRequiredService() as IDisposable)?.Dispose(); } /// @@ -135,7 +137,7 @@ public async Task StreamTextFromChatAsync() } } - private async Task StartStreamingChatAsync(IChatCompletionService chatCompletionService) + private async Task StartStreamingChatAsync(OnnxRuntimeGenAIChatCompletionService chatCompletionService) { Console.WriteLine("Chat content:"); Console.WriteLine("------------------------"); @@ -156,9 +158,11 @@ private async Task StartStreamingChatAsync(IChatCompletionService chatCompletion // Second assistant message await StreamMessageOutputAsync(chatCompletionService, chatHistory, AuthorRole.Assistant); + + chatCompletionService.Dispose(); } - private async Task StreamMessageOutputAsync(IChatCompletionService chatCompletionService, ChatHistory chatHistory, AuthorRole authorRole) + private async Task StreamMessageOutputAsync(OnnxRuntimeGenAIChatCompletionService chatCompletionService, ChatHistory chatHistory, AuthorRole authorRole) { bool roleWritten = false; string fullMessage = string.Empty; From 415b883a0036341892da5650a70224e7acb9404a Mon Sep 17 00:00:00 2001 From: Roger Barreto <19890735+RogerBarreto@users.noreply.github.com> Date: Mon, 11 Nov 2024 19:13:27 +0000 Subject: [PATCH 3/4] Resolve the Disposing problem with Onnx Service --- .../ChatCompletion/Onnx_ChatCompletion.cs | 4 +-- .../Onnx_ChatCompletionStreaming.cs | 2 -- dotnet/samples/Demos/OnnxSimpleRAG/Program.cs | 3 +- .../OnnxRuntimeGenAIChatCompletionService.cs | 32 ++++++------------- ...OnnxTextEmbeddingGenerationServiceTests.cs | 2 -- ...xRuntimeGenAIChatCompletionServiceTests.cs | 4 +-- 6 files changed, 14 insertions(+), 33 deletions(-) diff --git a/dotnet/samples/Concepts/ChatCompletion/Onnx_ChatCompletion.cs b/dotnet/samples/Concepts/ChatCompletion/Onnx_ChatCompletion.cs index a1f5d17306c2..563ed3475b5e 100644 --- a/dotnet/samples/Concepts/ChatCompletion/Onnx_ChatCompletion.cs +++ b/dotnet/samples/Concepts/ChatCompletion/Onnx_ChatCompletion.cs @@ -34,7 +34,7 @@ public async Task ServicePromptAsync() Console.WriteLine("======== Onnx - Chat Completion ========"); - using var chatService = new OnnxRuntimeGenAIChatCompletionService( + var chatService = new OnnxRuntimeGenAIChatCompletionService( modelId: TestConfiguration.Onnx.ModelId, modelPath: TestConfiguration.Onnx.ModelPath); @@ -105,7 +105,5 @@ public async Task ChatPromptAsync() reply = await kernel.InvokePromptAsync(chatPrompt.ToString()); Console.WriteLine(reply); - - (kernel.GetRequiredService() as IDisposable)?.Dispose(); } } diff --git a/dotnet/samples/Concepts/ChatCompletion/Onnx_ChatCompletionStreaming.cs b/dotnet/samples/Concepts/ChatCompletion/Onnx_ChatCompletionStreaming.cs index d282d02160d1..edda6485caee 100644 --- a/dotnet/samples/Concepts/ChatCompletion/Onnx_ChatCompletionStreaming.cs +++ b/dotnet/samples/Concepts/ChatCompletion/Onnx_ChatCompletionStreaming.cs @@ -158,8 +158,6 @@ private async Task StartStreamingChatAsync(OnnxRuntimeGenAIChatCompletionService // Second assistant message await StreamMessageOutputAsync(chatCompletionService, chatHistory, AuthorRole.Assistant); - - chatCompletionService.Dispose(); } private async Task StreamMessageOutputAsync(OnnxRuntimeGenAIChatCompletionService chatCompletionService, ChatHistory chatHistory, AuthorRole authorRole) diff --git a/dotnet/samples/Demos/OnnxSimpleRAG/Program.cs b/dotnet/samples/Demos/OnnxSimpleRAG/Program.cs index 91991cc9756f..9727e600145f 100644 --- a/dotnet/samples/Demos/OnnxSimpleRAG/Program.cs +++ b/dotnet/samples/Demos/OnnxSimpleRAG/Program.cs @@ -12,7 +12,6 @@ using Microsoft.SemanticKernel; using Microsoft.SemanticKernel.ChatCompletion; using Microsoft.SemanticKernel.Connectors.InMemory; -using Microsoft.SemanticKernel.Connectors.Onnx; using Microsoft.SemanticKernel.Data; using Microsoft.SemanticKernel.Embeddings; using Microsoft.SemanticKernel.PromptTemplates.Handlebars; @@ -39,7 +38,7 @@ var kernel = builder.Build(); // Get the instances of the services -using var chatService = kernel.GetRequiredService() as OnnxRuntimeGenAIChatCompletionService; +var chatService = kernel.GetRequiredService(); var embeddingService = kernel.GetRequiredService(); // Create a vector store and a collection to store information diff --git a/dotnet/src/Connectors/Connectors.Onnx/OnnxRuntimeGenAIChatCompletionService.cs b/dotnet/src/Connectors/Connectors.Onnx/OnnxRuntimeGenAIChatCompletionService.cs index 5fc2114f5afa..2e9e16212a0b 100644 --- a/dotnet/src/Connectors/Connectors.Onnx/OnnxRuntimeGenAIChatCompletionService.cs +++ b/dotnet/src/Connectors/Connectors.Onnx/OnnxRuntimeGenAIChatCompletionService.cs @@ -1,6 +1,5 @@ // Copyright (c) Microsoft. All rights reserved. -using System; using System.Collections.Generic; using System.Diagnostics.CodeAnalysis; using System.Runtime.CompilerServices; @@ -18,14 +17,11 @@ namespace Microsoft.SemanticKernel.Connectors.Onnx; /// /// Represents a chat completion service using OnnxRuntimeGenAI. /// -public sealed class OnnxRuntimeGenAIChatCompletionService : IChatCompletionService, IDisposable +public sealed class OnnxRuntimeGenAIChatCompletionService : IChatCompletionService { private readonly string _modelId; private readonly string _modelPath; private readonly JsonSerializerOptions? _jsonSerializerOptions; - private Model? _model; - private Tokenizer? _tokenizer; - private readonly OgaHandle _ogaHandle = new(); private Dictionary AttributesInternal { get; } = new(); @@ -91,13 +87,17 @@ private async IAsyncEnumerable RunInferenceAsync(ChatHistory chatHistory OnnxRuntimeGenAIPromptExecutionSettings onnxPromptExecutionSettings = this.GetOnnxPromptExecutionSettingsSettings(executionSettings); var prompt = this.GetPrompt(chatHistory, onnxPromptExecutionSettings); - var tokens = this.GetTokenizer().Encode(prompt); - using var generatorParams = new GeneratorParams(this.GetModel()); + using var ogaHandle = new OgaHandle(); + using var model = new Model(this._modelPath); + using var tokenizer = new Tokenizer(model); + + var tokens = tokenizer.Encode(prompt); + + using var generatorParams = new GeneratorParams(model); this.UpdateGeneratorParamsFromPromptExecutionSettings(generatorParams, onnxPromptExecutionSettings); generatorParams.SetInputSequences(tokens); - - using var generator = new Generator(this.GetModel(), generatorParams); + using var generator = new Generator(model, generatorParams); bool removeNextTokenStartingWithSpace = true; while (!generator.IsDone()) @@ -111,7 +111,7 @@ private async IAsyncEnumerable RunInferenceAsync(ChatHistory chatHistory var outputTokens = generator.GetSequence(0); var newToken = outputTokens.Slice(outputTokens.Length - 1, 1); - string output = this.GetTokenizer().Decode(newToken); + string output = tokenizer.Decode(newToken); if (removeNextTokenStartingWithSpace && output[0] == ' ') { @@ -124,10 +124,6 @@ private async IAsyncEnumerable RunInferenceAsync(ChatHistory chatHistory } } - private Model GetModel() => this._model ??= new Model(this._modelPath); - - private Tokenizer GetTokenizer() => this._tokenizer ??= new Tokenizer(this.GetModel()); - private string GetPrompt(ChatHistory chatHistory, OnnxRuntimeGenAIPromptExecutionSettings onnxRuntimeGenAIPromptExecutionSettings) { var promptBuilder = new StringBuilder(); @@ -207,12 +203,4 @@ private OnnxRuntimeGenAIPromptExecutionSettings GetOnnxPromptExecutionSettingsSe return OnnxRuntimeGenAIPromptExecutionSettings.FromExecutionSettings(executionSettings); } - - /// - public void Dispose() - { - this._tokenizer?.Dispose(); - this._model?.Dispose(); - this._ogaHandle.Dispose(); - } } diff --git a/dotnet/src/IntegrationTests/Connectors/Onnx/BertOnnxTextEmbeddingGenerationServiceTests.cs b/dotnet/src/IntegrationTests/Connectors/Onnx/BertOnnxTextEmbeddingGenerationServiceTests.cs index be32d4cda549..e2f7f006202c 100644 --- a/dotnet/src/IntegrationTests/Connectors/Onnx/BertOnnxTextEmbeddingGenerationServiceTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Onnx/BertOnnxTextEmbeddingGenerationServiceTests.cs @@ -9,9 +9,7 @@ using System.Security.Cryptography; using System.Text; using System.Threading.Tasks; -using Microsoft.Extensions.DependencyInjection; using Microsoft.SemanticKernel; -using Microsoft.SemanticKernel.ChatCompletion; using Microsoft.SemanticKernel.Connectors.Onnx; using Microsoft.SemanticKernel.Embeddings; using Xunit; diff --git a/dotnet/src/IntegrationTests/Connectors/Onnx/OnnxRuntimeGenAIChatCompletionServiceTests.cs b/dotnet/src/IntegrationTests/Connectors/Onnx/OnnxRuntimeGenAIChatCompletionServiceTests.cs index c6359e3b17a5..c042f633d495 100644 --- a/dotnet/src/IntegrationTests/Connectors/Onnx/OnnxRuntimeGenAIChatCompletionServiceTests.cs +++ b/dotnet/src/IntegrationTests/Connectors/Onnx/OnnxRuntimeGenAIChatCompletionServiceTests.cs @@ -57,7 +57,7 @@ public async Task ItCanUseKernelInvokeStreamingAsyncAsync() [Fact(Skip = "For manual verification only")] public async Task ItCanUseServiceGetStreamingChatMessageContentsAsync() { - using var chat = CreateService(); + var chat = CreateService(); ChatHistory history = []; history.AddUserMessage("Where is the most famous fish market in Seattle, Washington, USA?"); @@ -76,7 +76,7 @@ public async Task ItCanUseServiceGetStreamingChatMessageContentsAsync() [Fact(Skip = "For manual verification only")] public async Task ItCanUseServiceGetChatMessageContentsAsync() { - using var chat = CreateService(); + var chat = CreateService(); ChatHistory history = []; history.AddUserMessage("Where is the most famous fish market in Seattle, Washington, USA?"); From 90f8798c9cf53c458f336750e956ea86975e9d46 Mon Sep 17 00:00:00 2001 From: Roger Barreto <19890735+RogerBarreto@users.noreply.github.com> Date: Mon, 11 Nov 2024 19:33:47 +0000 Subject: [PATCH 4/4] Remove unneeded disposal --- .../Concepts/ChatCompletion/Onnx_ChatCompletionStreaming.cs | 2 -- 1 file changed, 2 deletions(-) diff --git a/dotnet/samples/Concepts/ChatCompletion/Onnx_ChatCompletionStreaming.cs b/dotnet/samples/Concepts/ChatCompletion/Onnx_ChatCompletionStreaming.cs index edda6485caee..d07c6e3240d1 100644 --- a/dotnet/samples/Concepts/ChatCompletion/Onnx_ChatCompletionStreaming.cs +++ b/dotnet/samples/Concepts/ChatCompletion/Onnx_ChatCompletionStreaming.cs @@ -86,8 +86,6 @@ public async Task StreamChatPromptAsync() reply = await StreamMessageOutputFromKernelAsync(kernel, chatPrompt.ToString()); Console.WriteLine(reply); - - (kernel.GetRequiredService() as IDisposable)?.Dispose(); } ///