diff --git a/dotnet/samples/Concepts/Caching/SemanticCachingWithFilters.cs b/dotnet/samples/Concepts/Caching/SemanticCachingWithFilters.cs index cd90de3964b4..78c54df49434 100644 --- a/dotnet/samples/Concepts/Caching/SemanticCachingWithFilters.cs +++ b/dotnet/samples/Concepts/Caching/SemanticCachingWithFilters.cs @@ -1,11 +1,11 @@ // Copyright (c) Microsoft. All rights reserved. using System.Diagnostics; +using Azure.Identity; using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.VectorData; using Microsoft.SemanticKernel; -using Microsoft.SemanticKernel.Connectors.AzureCosmosDBMongoDB; -using Microsoft.SemanticKernel.Connectors.Redis; -using Microsoft.SemanticKernel.Memory; +using Microsoft.SemanticKernel.Embeddings; namespace Caching; @@ -18,12 +18,6 @@ namespace Caching; /// public class SemanticCachingWithFilters(ITestOutputHelper output) : BaseTest(output) { - /// - /// Similarity/relevance score, from 0 to 1, where 1 means exact match. - /// It's possible to change this value during testing to see how caching logic will behave. - /// - private const double SimilarityScore = 0.9; - /// /// Executing similar requests two times using in-memory caching store to compare execution time and results. /// Second execution is faster, because the result is returned from cache. @@ -31,7 +25,10 @@ public class SemanticCachingWithFilters(ITestOutputHelper output) : BaseTest(out [Fact] public async Task InMemoryCacheAsync() { - var kernel = GetKernelWithCache(_ => new VolatileMemoryStore()); + var kernel = GetKernelWithCache(services => + { + services.AddInMemoryVectorStore(); + }); var result1 = await ExecuteAsync(kernel, "First run", "What's the tallest building in New York?"); var result2 = await ExecuteAsync(kernel, "Second run", "What is the highest building in New York City?"); @@ -53,12 +50,15 @@ public async Task InMemoryCacheAsync() /// /// Executing similar requests two times using Redis caching store to compare execution time and results. /// Second execution is faster, because the result is returned from cache. - /// How to run Redis on Docker locally: https://redis.io/docs/latest/operate/oss_and_stack/install/install-stack/docker/ + /// How to run Redis on Docker locally: https://redis.io/docs/latest/operate/oss_and_stack/install/install-stack/docker/. /// [Fact] public async Task RedisCacheAsync() { - var kernel = GetKernelWithCache(_ => new RedisMemoryStore("localhost:6379", vectorSize: 1536)); + var kernel = GetKernelWithCache(services => + { + services.AddRedisVectorStore("localhost:6379"); + }); var result1 = await ExecuteAsync(kernel, "First run", "What's the tallest building in New York?"); var result2 = await ExecuteAsync(kernel, "Second run", "What is the highest building in New York City?"); @@ -84,10 +84,12 @@ public async Task RedisCacheAsync() [Fact] public async Task AzureCosmosDBMongoDBCacheAsync() { - var kernel = GetKernelWithCache(_ => new AzureCosmosDBMongoDBMemoryStore( - TestConfiguration.AzureCosmosDbMongoDb.ConnectionString, - TestConfiguration.AzureCosmosDbMongoDb.DatabaseName, - new(dimensions: 1536))); + var kernel = GetKernelWithCache(services => + { + services.AddAzureCosmosDBMongoDBVectorStore( + TestConfiguration.AzureCosmosDbMongoDb.ConnectionString, + TestConfiguration.AzureCosmosDbMongoDb.DatabaseName); + }); var result1 = await ExecuteAsync(kernel, "First run", "What's the tallest building in New York?"); var result2 = await ExecuteAsync(kernel, "Second run", "What is the highest building in New York City?"); @@ -110,27 +112,41 @@ public async Task AzureCosmosDBMongoDBCacheAsync() /// /// Returns instance with required registered services. /// - private Kernel GetKernelWithCache(Func cacheFactory) + private Kernel GetKernelWithCache(Action configureVectorStore) { var builder = Kernel.CreateBuilder(); - // Add Azure OpenAI chat completion service - builder.AddAzureOpenAIChatCompletion( - TestConfiguration.AzureOpenAI.ChatDeploymentName, - TestConfiguration.AzureOpenAI.Endpoint, - TestConfiguration.AzureOpenAI.ApiKey); - - // Add Azure OpenAI text embedding generation service - builder.AddAzureOpenAITextEmbeddingGeneration( - TestConfiguration.AzureOpenAIEmbeddings.DeploymentName, - TestConfiguration.AzureOpenAIEmbeddings.Endpoint, - TestConfiguration.AzureOpenAIEmbeddings.ApiKey); - - // Add memory store for caching purposes (e.g. in-memory, Redis, Azure Cosmos DB) - builder.Services.AddSingleton(cacheFactory); + if (!string.IsNullOrWhiteSpace(TestConfiguration.AzureOpenAI.ApiKey)) + { + // Add Azure OpenAI chat completion service + builder.AddAzureOpenAIChatCompletion( + TestConfiguration.AzureOpenAI.ChatDeploymentName, + TestConfiguration.AzureOpenAI.Endpoint, + TestConfiguration.AzureOpenAI.ApiKey); + + // Add Azure OpenAI text embedding generation service + builder.AddAzureOpenAITextEmbeddingGeneration( + TestConfiguration.AzureOpenAIEmbeddings.DeploymentName, + TestConfiguration.AzureOpenAIEmbeddings.Endpoint, + TestConfiguration.AzureOpenAI.ApiKey); + } + else + { + // Add Azure OpenAI chat completion service + builder.AddAzureOpenAIChatCompletion( + TestConfiguration.AzureOpenAI.ChatDeploymentName, + TestConfiguration.AzureOpenAI.Endpoint, + new AzureCliCredential()); + + // Add Azure OpenAI text embedding generation service + builder.AddAzureOpenAITextEmbeddingGeneration( + TestConfiguration.AzureOpenAIEmbeddings.DeploymentName, + TestConfiguration.AzureOpenAIEmbeddings.Endpoint, + new AzureCliCredential()); + } - // Add text memory service that will be used to generate embeddings and query/store data. - builder.Services.AddSingleton(); + // Add vector store for caching purposes (e.g. in-memory, Redis, Azure Cosmos DB) + configureVectorStore(builder.Services); // Add prompt render filter to query cache and check if rendered prompt was already answered. builder.Services.AddSingleton(); @@ -164,7 +180,10 @@ public class CacheBaseFilter /// /// Filter which is executed during prompt rendering operation. /// - public sealed class PromptCacheFilter(ISemanticTextMemory semanticTextMemory) : CacheBaseFilter, IPromptRenderFilter + public sealed class PromptCacheFilter( + ITextEmbeddingGenerationService textEmbeddingGenerationService, + IVectorStore vectorStore) + : CacheBaseFilter, IPromptRenderFilter { public async Task OnPromptRenderAsync(PromptRenderContext context, Func next) { @@ -174,20 +193,22 @@ public async Task OnPromptRenderAsync(PromptRenderContext context, Func(CollectionName); + await collection.CreateCollectionIfNotExistsAsync(); + + // Search for similar prompts in cache. + var searchResults = await collection.VectorizedSearchAsync(promptEmbedding, new() { Top = 1 }, context.CancellationToken); + var searchResult = (await searchResults.Results.FirstOrDefaultAsync())?.Record; // If result exists, return it. if (searchResult is not null) { // Override function result. This will prevent calling LLM and will return result immediately. - context.Result = new FunctionResult(context.Function, searchResult.Metadata.AdditionalMetadata) + context.Result = new FunctionResult(context.Function, searchResult.Result) { - Metadata = new Dictionary { [RecordIdKey] = searchResult.Metadata.Id } + Metadata = new Dictionary { [RecordIdKey] = searchResult.Id } }; } } @@ -196,7 +217,10 @@ public async Task OnPromptRenderAsync(PromptRenderContext context, Func /// Filter which is executed during function invocation. /// - public sealed class FunctionCacheFilter(ISemanticTextMemory semanticTextMemory) : CacheBaseFilter, IFunctionInvocationFilter + public sealed class FunctionCacheFilter( + ITextEmbeddingGenerationService textEmbeddingGenerationService, + IVectorStore vectorStore) + : CacheBaseFilter, IFunctionInvocationFilter { public async Task OnFunctionInvocationAsync(FunctionInvocationContext context, Func next) { @@ -212,12 +236,22 @@ public async Task OnFunctionInvocationAsync(FunctionInvocationContext context, F // Get cache record id if result was cached previously or generate new id. var recordId = context.Result.Metadata?.GetValueOrDefault(RecordIdKey, Guid.NewGuid().ToString()) as string; + // Generate prompt embedding. + var promptEmbedding = await textEmbeddingGenerationService.GenerateEmbeddingAsync(context.Result.RenderedPrompt); + // Cache rendered prompt and LLM result. - await semanticTextMemory.SaveInformationAsync( - CollectionName, - context.Result.RenderedPrompt, - recordId!, - additionalMetadata: result.ToString()); + var collection = vectorStore.GetCollection(CollectionName); + await collection.CreateCollectionIfNotExistsAsync(); + + var cacheRecord = new CacheRecord + { + Id = recordId!, + Prompt = context.Result.RenderedPrompt, + Result = result.ToString(), + PromptEmbedding = promptEmbedding + }; + + await collection.UpsertAsync(cacheRecord, cancellationToken: context.CancellationToken); } } } @@ -245,4 +279,23 @@ private async Task ExecuteAsync(Kernel kernel, string title, str } #endregion + + #region Vector Store Record + + private sealed class CacheRecord + { + [VectorStoreRecordKey] + public string Id { get; set; } + + [VectorStoreRecordData] + public string Prompt { get; set; } + + [VectorStoreRecordData] + public string Result { get; set; } + + [VectorStoreRecordVector(Dimensions: 1536)] + public ReadOnlyMemory PromptEmbedding { get; set; } + } + + #endregion } diff --git a/dotnet/samples/Concepts/Optimization/FrugalGPTWithFilters.cs b/dotnet/samples/Concepts/Optimization/FrugalGPTWithFilters.cs index 2ac3fce56b23..a5b9917e6ce0 100644 --- a/dotnet/samples/Concepts/Optimization/FrugalGPTWithFilters.cs +++ b/dotnet/samples/Concepts/Optimization/FrugalGPTWithFilters.cs @@ -2,10 +2,11 @@ using System.Runtime.CompilerServices; using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.VectorData; using Microsoft.SemanticKernel; using Microsoft.SemanticKernel.ChatCompletion; +using Microsoft.SemanticKernel.Connectors.InMemory; using Microsoft.SemanticKernel.Embeddings; -using Microsoft.SemanticKernel.Memory; using Microsoft.SemanticKernel.PromptTemplates.Handlebars; using Microsoft.SemanticKernel.Services; @@ -97,11 +98,11 @@ public async Task ReducePromptSizeAsync() // Add few-shot prompt optimization filter. // The filter uses in-memory store for vector similarity search and text embedding generation service to generate embeddings. - var memoryStore = new VolatileMemoryStore(); + var vectorStore = new InMemoryVectorStore(); var textEmbeddingGenerationService = kernel.GetRequiredService(); // Register optimization filter. - kernel.PromptRenderFilters.Add(new FewShotPromptOptimizationFilter(memoryStore, textEmbeddingGenerationService)); + kernel.PromptRenderFilters.Add(new FewShotPromptOptimizationFilter(vectorStore, textEmbeddingGenerationService)); // Get result again and compare the usage. result = await kernel.InvokeAsync(function, arguments); @@ -167,7 +168,7 @@ public async Task LLMCascadeAsync() /// which are similar to original request. /// private sealed class FewShotPromptOptimizationFilter( - IMemoryStore memoryStore, + IVectorStore vectorStore, ITextEmbeddingGenerationService textEmbeddingGenerationService) : IPromptRenderFilter { /// @@ -176,7 +177,7 @@ private sealed class FewShotPromptOptimizationFilter( private const int TopN = 5; /// - /// Collection name to use in memory store. + /// Collection name to use in vector store. /// private const string CollectionName = "examples"; @@ -188,30 +189,38 @@ public async Task OnPromptRenderAsync(PromptRenderContext context, Func 0 } && !string.IsNullOrEmpty(request)) { - var memoryRecords = new List(); + var exampleRecords = new List(); // Generate embedding for each example. var embeddings = await textEmbeddingGenerationService.GenerateEmbeddingsAsync(examples); - // Create memory record instances with example text and embedding. + // Create vector store record instances with example text and embedding. for (var i = 0; i < examples.Count; i++) { - memoryRecords.Add(MemoryRecord.LocalRecord(Guid.NewGuid().ToString(), examples[i], "description", embeddings[i])); + exampleRecords.Add(new ExampleRecord + { + Id = Guid.NewGuid().ToString(), + Example = examples[i], + ExampleEmbedding = embeddings[i] + }); } - // Create collection and upsert all memory records for search. + // Create collection and upsert all vector store records for search. // It's possible to do it only once and re-use the same examples for future requests. - await memoryStore.CreateCollectionAsync(CollectionName); - await memoryStore.UpsertBatchAsync(CollectionName, memoryRecords).ToListAsync(); + var collection = vectorStore.GetCollection(CollectionName); + await collection.CreateCollectionIfNotExistsAsync(context.CancellationToken); + + await collection.UpsertBatchAsync(exampleRecords, cancellationToken: context.CancellationToken).ToListAsync(context.CancellationToken); // Generate embedding for original request. - var requestEmbedding = await textEmbeddingGenerationService.GenerateEmbeddingAsync(request); + var requestEmbedding = await textEmbeddingGenerationService.GenerateEmbeddingAsync(request, cancellationToken: context.CancellationToken); // Find top N examples which are similar to original request. - var topNExamples = await memoryStore.GetNearestMatchesAsync(CollectionName, requestEmbedding, TopN).ToListAsync(); + var searchResults = await collection.VectorizedSearchAsync(requestEmbedding, new() { Top = TopN }, cancellationToken: context.CancellationToken); + var topNExamples = (await searchResults.Results.ToListAsync(context.CancellationToken)).Select(l => l.Record).ToList(); // Override arguments to use only top N examples, which will be sent to LLM. - context.Arguments["Examples"] = topNExamples.Select(l => l.Item1.Metadata.Text); + context.Arguments["Examples"] = topNExamples.Select(l => l.Example); } // Continue prompt rendering operation. @@ -305,4 +314,16 @@ public async IAsyncEnumerable GetStreamingChatMessa yield return new StreamingChatMessageContent(AuthorRole.Assistant, mockResult); } } + + private sealed class ExampleRecord + { + [VectorStoreRecordKey] + public string Id { get; set; } + + [VectorStoreRecordData] + public string Example { get; set; } + + [VectorStoreRecordVector] + public ReadOnlyMemory ExampleEmbedding { get; set; } + } } diff --git a/dotnet/samples/Concepts/Optimization/PluginSelectionWithFilters.cs b/dotnet/samples/Concepts/Optimization/PluginSelectionWithFilters.cs index 861034b5d336..695ff675e17f 100644 --- a/dotnet/samples/Concepts/Optimization/PluginSelectionWithFilters.cs +++ b/dotnet/samples/Concepts/Optimization/PluginSelectionWithFilters.cs @@ -3,11 +3,11 @@ using System.ComponentModel; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Logging; +using Microsoft.Extensions.VectorData; using Microsoft.SemanticKernel; using Microsoft.SemanticKernel.ChatCompletion; using Microsoft.SemanticKernel.Connectors.OpenAI; using Microsoft.SemanticKernel.Embeddings; -using Microsoft.SemanticKernel.Memory; namespace Optimization; @@ -40,8 +40,8 @@ public async Task UsingVectorSearchWithKernelAsync() var logger = this.LoggerFactory.CreateLogger(); builder.Services.AddSingleton(logger); - // Add memory store to keep functions and search for the most relevant ones for specific request. - builder.Services.AddSingleton(); + // Add vector store to keep functions and search for the most relevant ones for specific request. + builder.Services.AddInMemoryVectorStore(); // Add helper components defined in this example. builder.Services.AddSingleton(); @@ -114,8 +114,8 @@ public async Task UsingVectorSearchWithChatCompletionAsync() var logger = this.LoggerFactory.CreateLogger(); builder.Services.AddSingleton(logger); - // Add memory store to keep functions and search for the most relevant ones for specific request. - builder.Services.AddSingleton(); + // Add vector store to keep functions and search for the most relevant ones for specific request. + builder.Services.AddInMemoryVectorStore(); // Add helper components defined in this example. builder.Services.AddSingleton(); @@ -257,7 +257,8 @@ Task> GetBestFunctionsAsync( string collectionName, string request, KernelPluginCollection plugins, - int numberOfBestFunctions); + int numberOfBestFunctions, + CancellationToken cancellationToken = default); } /// @@ -265,7 +266,7 @@ Task> GetBestFunctionsAsync( /// public interface IPluginStore { - Task SaveAsync(string collectionName, KernelPluginCollection plugins); + Task SaveAsync(string collectionName, KernelPluginCollection plugins, CancellationToken cancellationToken = default); } public class FunctionKeyProvider : IFunctionKeyProvider @@ -280,62 +281,67 @@ public string GetFunctionKey(KernelFunction kernelFunction) public class FunctionProvider( ITextEmbeddingGenerationService textEmbeddingGenerationService, - IMemoryStore memoryStore, + IVectorStore vectorStore, IFunctionKeyProvider functionKeyProvider) : IFunctionProvider { public async Task> GetBestFunctionsAsync( string collectionName, string request, KernelPluginCollection plugins, - int numberOfBestFunctions) + int numberOfBestFunctions, + CancellationToken cancellationToken = default) { // Generate embedding for original request. - var requestEmbedding = await textEmbeddingGenerationService.GenerateEmbeddingAsync(request); + var requestEmbedding = await textEmbeddingGenerationService.GenerateEmbeddingAsync(request, cancellationToken: cancellationToken); + + var collection = vectorStore.GetCollection(collectionName); + await collection.CreateCollectionIfNotExistsAsync(cancellationToken); // Find best functions to call for original request. - var memoryRecordKeys = await memoryStore - .GetNearestMatchesAsync(collectionName, requestEmbedding, limit: numberOfBestFunctions) - .Select(l => l.Item1.Key) - .ToListAsync(); + var searchResults = await collection.VectorizedSearchAsync(requestEmbedding, new() { Top = numberOfBestFunctions }, cancellationToken); + var recordKeys = (await searchResults.Results.ToListAsync(cancellationToken)).Select(l => l.Record.Id); return plugins .SelectMany(plugin => plugin) - .Where(function => memoryRecordKeys.Contains(functionKeyProvider.GetFunctionKey(function))) + .Where(function => recordKeys.Contains(functionKeyProvider.GetFunctionKey(function))) .ToList(); } } public class PluginStore( ITextEmbeddingGenerationService textEmbeddingGenerationService, - IMemoryStore memoryStore, + IVectorStore vectorStore, IFunctionKeyProvider functionKeyProvider) : IPluginStore { - public async Task SaveAsync(string collectionName, KernelPluginCollection plugins) + public async Task SaveAsync(string collectionName, KernelPluginCollection plugins, CancellationToken cancellationToken = default) { // Collect data about imported functions in kernel. - var memoryRecords = new List(); + var functionRecords = new List(); var functionsData = GetFunctionsData(plugins); // Generate embedding for each function. var embeddings = await textEmbeddingGenerationService - .GenerateEmbeddingsAsync(functionsData.Select(l => l.TextToVectorize).ToArray()); + .GenerateEmbeddingsAsync(functionsData.Select(l => l.TextToVectorize).ToArray(), cancellationToken: cancellationToken); - // Create memory record instances with function information and embedding. + // Create vector store record instances with function information and embedding. for (var i = 0; i < functionsData.Count; i++) { - var (function, textToVectorize) = functionsData[i]; + var (function, functionInfo) = functionsData[i]; - memoryRecords.Add(MemoryRecord.LocalRecord( - id: functionKeyProvider.GetFunctionKey(function), - text: textToVectorize, - description: null, - embedding: embeddings[i])); + functionRecords.Add(new FunctionRecord + { + Id = functionKeyProvider.GetFunctionKey(function), + FunctionInfo = functionInfo, + FunctionInfoEmbedding = embeddings[i] + }); } - // Create collection and upsert all memory records for search. + // Create collection and upsert all vector store records for search. // It's possible to do it only once and re-use the same functions for future requests. - await memoryStore.CreateCollectionAsync(collectionName); - await memoryStore.UpsertBatchAsync(collectionName, memoryRecords).ToListAsync(); + var collection = vectorStore.GetCollection(collectionName); + await collection.CreateCollectionIfNotExistsAsync(cancellationToken); + + await collection.UpsertBatchAsync(functionRecords, cancellationToken: cancellationToken).ToListAsync(cancellationToken); } private static List<(KernelFunction Function, string TextToVectorize)> GetFunctionsData(KernelPluginCollection plugins) @@ -405,4 +411,20 @@ private sealed class CalendarPlugin } #endregion + + #region Vector Store Record + + private sealed class FunctionRecord + { + [VectorStoreRecordKey] + public string Id { get; set; } + + [VectorStoreRecordData] + public string FunctionInfo { get; set; } + + [VectorStoreRecordVector] + public ReadOnlyMemory FunctionInfoEmbedding { get; set; } + } + + #endregion } diff --git a/dotnet/samples/Demos/OnnxSimpleRAG/README.md b/dotnet/samples/Demos/OnnxSimpleRAG/README.md index da6a3ad726ff..07882c57d4bc 100644 --- a/dotnet/samples/Demos/OnnxSimpleRAG/README.md +++ b/dotnet/samples/Demos/OnnxSimpleRAG/README.md @@ -13,7 +13,7 @@ In this example we setup two ONNX AI Services: - [Chat Completion Service](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/src/SemanticKernel.Abstractions/AI/ChatCompletion/IChatCompletionService.cs) - Using the Chat Completion Service from [Onnx Connector](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/src/Connectors/Connectors.Onnx/OnnxRuntimeGenAIChatCompletionService.cs) to generate responses from the Local Model. - [Text Embeddings Generation Service]() - Using the Text Embeddings Generation Service from [Onnx Connector](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/src/Connectors/Connectors.Onnx/BertOnnxTextEmbeddingGenerationService.cs) to generate -- [Memory Store](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/src/SemanticKernel.Abstractions/Memory/IMemoryStore.cs) Using Memory Store Service with [VolatileMemoryStore](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/src/Plugins/Plugins.Memory/VolatileMemoryStore.cs) to store and retrieve embeddings in memory for RAG. +- [Vector Store](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/src/Connectors/VectorData.Abstractions/VectorStorage/IVectorStore.cs) Using Vector Store Service with [InMemoryVectorStore](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/src/Connectors/Connectors.Memory.InMemory/InMemoryVectorStore.cs) to store and retrieve embeddings in memory for RAG. - [Semantic Text Memory](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/src/SemanticKernel.Core/Memory/SemanticTextMemory.cs) to manage the embeddings in memory for RAG. - [Text Memory Plugin](https://github.com/microsoft/semantic-kernel/blob/main/dotnet/src/Plugins/Plugins.Memory/TextMemoryPlugin.cs) to enable memory retrieval functions (Recall) to be used with Prompts for RAG.