diff --git a/.github/workflows/memorypipeline-build.yml b/.github/workflows/memorypipeline-build.yml new file mode 100644 index 000000000..dfdaaad8f --- /dev/null +++ b/.github/workflows/memorypipeline-build.yml @@ -0,0 +1,38 @@ +name: Build SemanticMemoryPipelineService + +on: + push: + branches: ["feature-semantic-memory"] + +permissions: + contents: read + +jobs: + memory-pipeline: + runs-on: ubuntu-latest + + environment: feature-semantic-memory + + steps: + - uses: actions/checkout@v3 + with: + clean: true + + - name: Set .Net Core version + uses: actions/setup-dotnet@v1 + with: + dotnet-version: 6.0.x + + - name: Add custom nuget source + run: | + dotnet nuget add source "https://pkgs.dev.azure.com/msctoproj/Lightspeed/_packaging/SemanticMemoryPrivate/nuget/v3/index.json" \ + --name SemanticMemoryPrivate \ + --username az \ + --password ${{ secrets.AZURE_DEVOPS_PAT }} \ + --store-password-in-clear-text + + - name: Build SemanticMemoryPipelineService + run: | + dotnet build memorypipeline/SemanticMemoryPipelineService.csproj \ + -c Release \ + -v normal diff --git a/.vscode/settings.json b/.vscode/settings.json index 26ab3211c..646d85a51 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -48,4 +48,5 @@ "**/.DS_Store": true, "**/Thumbs.db": true }, + "dotnet.defaultSolution": ".\\CopilotChat.sln" } \ No newline at end of file diff --git a/CopilotChat.sln b/CopilotChat.sln index 69ecdfff2..8f87a34e5 100644 --- a/CopilotChat.sln +++ b/CopilotChat.sln @@ -5,6 +5,8 @@ VisualStudioVersion = 17.6.33706.43 MinimumVisualStudioVersion = 10.0.40219.1 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "CopilotChatWebApi", "webapi\CopilotChatWebApi.csproj", "{5252E68F-B653-44CE-9A32-360A75C54E0E}" EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SemanticMemoryPipelineService", "memorypipeline\SemanticMemoryPipelineService.csproj", "{E85B096A-7C2E-4F48-ACF7-6BB0BA78B9C5}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -15,6 +17,10 @@ Global {5252E68F-B653-44CE-9A32-360A75C54E0E}.Debug|Any CPU.Build.0 = Debug|Any CPU {5252E68F-B653-44CE-9A32-360A75C54E0E}.Release|Any CPU.ActiveCfg = Release|Any CPU {5252E68F-B653-44CE-9A32-360A75C54E0E}.Release|Any CPU.Build.0 = Release|Any CPU + {E85B096A-7C2E-4F48-ACF7-6BB0BA78B9C5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {E85B096A-7C2E-4F48-ACF7-6BB0BA78B9C5}.Debug|Any CPU.Build.0 = Debug|Any CPU + {E85B096A-7C2E-4F48-ACF7-6BB0BA78B9C5}.Release|Any CPU.ActiveCfg = Release|Any CPU + {E85B096A-7C2E-4F48-ACF7-6BB0BA78B9C5}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/memorypipeline/Builder.cs b/memorypipeline/Builder.cs new file mode 100644 index 000000000..049fab505 --- /dev/null +++ b/memorypipeline/Builder.cs @@ -0,0 +1,163 @@ +// Copyright (c) Microsoft. All rights reserved. + +using System; +using Microsoft.AspNetCore.Builder; +using Microsoft.Extensions.Configuration; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.SemanticKernel.AI.Embeddings; +using Microsoft.SemanticMemory.Core.AI.AzureOpenAI; +using Microsoft.SemanticMemory.Core.AI.OpenAI; +using Microsoft.SemanticMemory.Core.AppBuilders; +using Microsoft.SemanticMemory.Core.Configuration; +using Microsoft.SemanticMemory.Core.ContentStorage.AzureBlobs; +using Microsoft.SemanticMemory.Core.ContentStorage.FileSystemStorage; +using Microsoft.SemanticMemory.Core.Handlers; +using Microsoft.SemanticMemory.Core.MemoryStorage.AzureCognitiveSearch; +using Microsoft.SemanticMemory.Core.MemoryStorage; +using Microsoft.SemanticMemory.Core.Pipeline.Queue; +using Microsoft.SemanticMemory.Core.Pipeline.Queue.AzureQueues; +using Microsoft.SemanticMemory.Core.Pipeline.Queue.FileBasedQueues; +using Microsoft.SemanticMemory.Core.Pipeline.Queue.RabbitMq; +using Microsoft.SemanticMemory.Core.Pipeline; +using Microsoft.SemanticKernel.Connectors.AI.OpenAI.TextEmbedding; + +namespace SemanticMemory.Service; + +/// +/// Flexible dependency injection using dependencies defined in appsettings.json +/// +public static class Builder +{ + private const string ConfigRoot = "SemanticMemory"; + + public static WebApplicationBuilder CreateBuilder(out SemanticMemoryConfig config) + { + WebApplicationBuilder builder = WebApplication.CreateBuilder(); + config = builder.Configuration.GetSection(ConfigRoot).Get() + ?? throw new ConfigurationException("Configuration is null"); + + builder.Services.AddSingleton(config); + builder.Services.AddSingleton(); + builder.Services.AddSingleton(); + builder.Services.AddSingleton(); + + ConfigureContentStorage(builder, config); + ConfigurePipelineHandlers(builder, config); + ConfigureQueueSystem(builder, config); + ConfigureEmbeddingGenerator(builder, config); + ConfigureEmbeddingStorage(builder, config); + + return builder; + } + + // Service where documents and temporary files are stored + private static void ConfigureContentStorage(WebApplicationBuilder builder, SemanticMemoryConfig config) + { + switch (config.ContentStorageType) + { + case string x when x.Equals("AzureBlobs", StringComparison.OrdinalIgnoreCase): + builder.Services.AddAzureBlobAsContentStorage(builder.Configuration + .GetSection(ConfigRoot).GetSection("Services").GetSection("AzureBlobs") + .Get()!); + break; + + case string x when x.Equals("FileSystemContentStorage", StringComparison.OrdinalIgnoreCase): + builder.Services.AddFileSystemAsContentStorage(builder.Configuration + .GetSection(ConfigRoot).GetSection("Services").GetSection("FileSystemContentStorage") + .Get()!); + break; + + default: + throw new NotSupportedException($"Unknown/unsupported {config.ContentStorageType} content storage"); + } + } + + // Register pipeline handlers as hosted services + private static void ConfigurePipelineHandlers(WebApplicationBuilder builder, SemanticMemoryConfig config) + { + builder.Services.AddHandlerAsHostedService("extract"); + builder.Services.AddHandlerAsHostedService("partition"); + builder.Services.AddHandlerAsHostedService("gen_embeddings"); + builder.Services.AddHandlerAsHostedService("save_embeddings"); + } + + // Orchestration dependencies, ie. which queueing system to use + private static void ConfigureQueueSystem(WebApplicationBuilder builder, SemanticMemoryConfig config) + { + switch (config.DataIngestion.DistributedOrchestration.QueueType) + { + case string y when y.Equals("AzureQueue", StringComparison.OrdinalIgnoreCase): + builder.Services.AddAzureQueue(builder.Configuration + .GetSection(ConfigRoot).GetSection("Services").GetSection("AzureQueue") + .Get()!); + break; + + case string y when y.Equals("RabbitMQ", StringComparison.OrdinalIgnoreCase): + builder.Services.AddRabbitMq(builder.Configuration + .GetSection(ConfigRoot).GetSection("Services").GetSection("RabbitMq") + .Get()!); + break; + + case string y when y.Equals("FileBasedQueue", StringComparison.OrdinalIgnoreCase): + builder.Services.AddFileBasedQueue(builder.Configuration + .GetSection(ConfigRoot).GetSection("Services").GetSection("FileBasedQueue") + .Get()!); + break; + + default: + throw new NotSupportedException($"Unknown/unsupported {config.DataIngestion.DistributedOrchestration.QueueType} queue type"); + } + } + + // List of embedding generators to use (multiple generators allowed during ingestion) + private static void ConfigureEmbeddingGenerator(WebApplicationBuilder builder, SemanticMemoryConfig config) + { + var embeddingGenerationServices = new TypeCollection(); + builder.Services.AddSingleton(embeddingGenerationServices); + foreach (var type in config.DataIngestion.EmbeddingGeneratorTypes) + { + switch (type) + { + case string x when x.Equals("AzureOpenAI", StringComparison.OrdinalIgnoreCase): + case string y when y.Equals("AzureOpenAIEmbedding", StringComparison.OrdinalIgnoreCase): + embeddingGenerationServices.Add(); + builder.Services.AddAzureOpenAIEmbeddingGeneration(builder.Configuration + .GetSection(ConfigRoot).GetSection("Services").GetSection("AzureOpenAIEmbedding") + .Get()!); + break; + + case string x when x.Equals("OpenAI", StringComparison.OrdinalIgnoreCase): + embeddingGenerationServices.Add(); + builder.Services.AddOpenAITextEmbeddingGeneration(builder.Configuration + .GetSection(ConfigRoot).GetSection("Services").GetSection("OpenAI") + .Get()!); + break; + + default: + throw new NotSupportedException($"Unknown/unsupported {type} text generator"); + } + } + } + + // List of Vector DB list where to store embeddings (multiple DBs allowed during ingestion) + private static void ConfigureEmbeddingStorage(WebApplicationBuilder builder, SemanticMemoryConfig config) + { + var vectorDbServices = new TypeCollection(); + builder.Services.AddSingleton(vectorDbServices); + foreach (var type in config.DataIngestion.VectorDbTypes) + { + switch (type) + { + case string x when x.Equals("AzureCognitiveSearch", StringComparison.OrdinalIgnoreCase): + vectorDbServices.Add(); + builder.Services.AddAzureCognitiveSearchAsVectorDb(builder.Configuration + .GetSection(ConfigRoot).GetSection("Services").GetSection("AzureCognitiveSearch") + .Get()!); + break; + + default: + throw new NotSupportedException($"Unknown/unsupported {type} vector DB"); + } + } + } +} diff --git a/memorypipeline/Program.cs b/memorypipeline/Program.cs new file mode 100644 index 000000000..dd1acd716 --- /dev/null +++ b/memorypipeline/Program.cs @@ -0,0 +1,24 @@ +// Copyright (c) Microsoft. All rights reserved. + +// ******************************************************** +// ************** APP BUILD ******************************* +// ******************************************************** + +using System; +using Microsoft.Extensions.Logging; +using Microsoft.SemanticMemory.Core.Configuration; +using Microsoft.SemanticMemory.Core.Diagnostics; +using SemanticMemory.Service; + +var app = Builder.CreateBuilder(out SemanticMemoryConfig config).Build(); + +// ******************************************************** +// ************** START *********************************** +// ******************************************************** + +app.Logger.LogInformation( + "Starting Semantic Memory pipeline service, .NET Env: {0}, Log Level: {1}", + Environment.GetEnvironmentVariable("ASPNETCORE_ENVIRONMENT"), + app.Logger.GetLogLevelName()); + +app.Run(); diff --git a/memorypipeline/SemanticMemoryPipelineService.csproj b/memorypipeline/SemanticMemoryPipelineService.csproj new file mode 100644 index 000000000..dbd8b265c --- /dev/null +++ b/memorypipeline/SemanticMemoryPipelineService.csproj @@ -0,0 +1,16 @@ + + + + Exe + net6.0 + disable + enable + SemanticMemory.Service + ef47f200-b235-45d9-9fd6-765fc59d33f5 + + + + + + + diff --git a/memorypipeline/appsettings.json b/memorypipeline/appsettings.json new file mode 100644 index 000000000..69ddb9d3b --- /dev/null +++ b/memorypipeline/appsettings.json @@ -0,0 +1,100 @@ +{ + "SemanticMemory": { + // "AzureBlobs" or "FileSystemContentStorage" + "ContentStorageType": "FileSystemContentStorage", + // Data ingestion pipelines configuration. + "DataIngestion": { + "DistributedOrchestration": { + // "AzureQueue", "RabbitMQ", "FileBasedQueue" + "QueueType": "FileBasedQueue" + }, + // Multiple generators can be used, e.g. for data migration, A/B testing, etc. + "EmbeddingGeneratorTypes": [ + "AzureOpenAIEmbedding" + ], + // Vectors can be written to multiple storages, e.g. for data migration, A/B testing, etc. + "VectorDbTypes": [ + "AzureCognitiveSearch" + ] + }, + "Services": { + "AzureBlobs": { + // "ConnectionString" or "AzureIdentity" + // AzureIdentity: use automatic AAD authentication mechanism. You can test locally + // using the env vars AZURE_TENANT_ID, AZURE_CLIENT_ID, AZURE_CLIENT_SECRET. + "Auth": "AzureIdentity", + // Azure Storage account name, required when using AzureIdentity auth + // Note: you can use an env var 'SemanticMemory__Services__AzureBlobs__Account' to set this + "Account": "", + // Container where to create directories and upload files + "Container": "smemory", + // Required when Auth == ConnectionString + // Note: you can use an env var 'SemanticMemory__Services__AzureBlobs__ConnectionString' to set this + "ConnectionString": "", + // Setting used only for country clouds + "EndpointSuffix": "core.windows.net" + }, + "AzureQueue": { + // - AzureIdentity: use automatic AAD authentication mechanism + // - ConnectionString: auth using a connection string + "Auth": "AzureIdentity", + // Azure Storage account name, required when using AzureIdentity auth + // Note: you can use an env var 'SemanticMemory__Orchestration__DistributedPipeline__AzureQueue__Account' to set this + "Account": "", + // Required when Auth == ConnectionString + // Note: you can use an env var 'SemanticMemory__Orchestration__DistributedPipeline__AzureQueue__ConnectionString' to set this + "ConnectionString": "", + // Setting used only for country clouds + "EndpointSuffix": "core.windows.net" + }, + "AzureCognitiveSearch": { + // "ApiKey" or "AzureIdentity" + // AzureIdentity: use automatic AAD authentication mechanism. You can test locally + // using the env vars AZURE_TENANT_ID, AZURE_CLIENT_ID, AZURE_CLIENT_SECRET. + "Auth": "ApiKey", + "Endpoint": "https://<...>", + "APIKey": "", + "VectorIndexPrefix": "smemory-", + }, + "AzureOpenAIEmbedding": { + // "ApiKey" or "AzureIdentity" + // AzureIdentity: use automatic AAD authentication mechanism. You can test locally + // using the env vars AZURE_TENANT_ID, AZURE_CLIENT_ID, AZURE_CLIENT_SECRET. + "Auth": "ApiKey", + "Endpoint": "https://<...>.openai.azure.com/", + "Deployment": "", + "APIKey": "", + }, + "FileSystemContentStorage": { + "Directory": "/tmp/semanticmemory/content" + }, + "Qdrant": { + "Endpoint": "https://<...>", + "APIKey": "", + "VectorIndexPrefix": "smemory-" + }, + "OpenAI": { + "EmbeddingModel": "text-embedding-ada-002", + "APIKey": "", + "OrgId": "", + }, + "RabbitMq": { + "Host": "127.0.0.1", + "Port": "5672", + "Username": "user", + "Password": "" + }, + "FileBasedQueue": { + "Path": "/tmp/semanticmemory/queues", + "CreateIfNotExist": true + }, + }, + }, + "Logging": { + "LogLevel": { + "Default": "Information", + "Microsoft.AspNetCore": "Warning" + } + }, + "AllowedHosts": "*" +} \ No newline at end of file diff --git a/memorypipeline/nuget.config b/memorypipeline/nuget.config new file mode 100644 index 000000000..743e81d8f --- /dev/null +++ b/memorypipeline/nuget.config @@ -0,0 +1,19 @@ + + + + + + + + + + + + + + + + + + +