diff --git a/.github/workflows/memorypipeline-build.yml b/.github/workflows/memorypipeline-build.yml
new file mode 100644
index 000000000..dfdaaad8f
--- /dev/null
+++ b/.github/workflows/memorypipeline-build.yml
@@ -0,0 +1,38 @@
+name: Build SemanticMemoryPipelineService
+
+on:
+ push:
+ branches: ["feature-semantic-memory"]
+
+permissions:
+ contents: read
+
+jobs:
+ memory-pipeline:
+ runs-on: ubuntu-latest
+
+ environment: feature-semantic-memory
+
+ steps:
+ - uses: actions/checkout@v3
+ with:
+ clean: true
+
+ - name: Set .Net Core version
+ uses: actions/setup-dotnet@v1
+ with:
+ dotnet-version: 6.0.x
+
+ - name: Add custom nuget source
+ run: |
+ dotnet nuget add source "https://pkgs.dev.azure.com/msctoproj/Lightspeed/_packaging/SemanticMemoryPrivate/nuget/v3/index.json" \
+ --name SemanticMemoryPrivate \
+ --username az \
+ --password ${{ secrets.AZURE_DEVOPS_PAT }} \
+ --store-password-in-clear-text
+
+ - name: Build SemanticMemoryPipelineService
+ run: |
+ dotnet build memorypipeline/SemanticMemoryPipelineService.csproj \
+ -c Release \
+ -v normal
diff --git a/.vscode/settings.json b/.vscode/settings.json
index 26ab3211c..646d85a51 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -48,4 +48,5 @@
"**/.DS_Store": true,
"**/Thumbs.db": true
},
+ "dotnet.defaultSolution": ".\\CopilotChat.sln"
}
\ No newline at end of file
diff --git a/CopilotChat.sln b/CopilotChat.sln
index 69ecdfff2..8f87a34e5 100644
--- a/CopilotChat.sln
+++ b/CopilotChat.sln
@@ -5,6 +5,8 @@ VisualStudioVersion = 17.6.33706.43
MinimumVisualStudioVersion = 10.0.40219.1
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "CopilotChatWebApi", "webapi\CopilotChatWebApi.csproj", "{5252E68F-B653-44CE-9A32-360A75C54E0E}"
EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SemanticMemoryPipelineService", "memorypipeline\SemanticMemoryPipelineService.csproj", "{E85B096A-7C2E-4F48-ACF7-6BB0BA78B9C5}"
+EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@@ -15,6 +17,10 @@ Global
{5252E68F-B653-44CE-9A32-360A75C54E0E}.Debug|Any CPU.Build.0 = Debug|Any CPU
{5252E68F-B653-44CE-9A32-360A75C54E0E}.Release|Any CPU.ActiveCfg = Release|Any CPU
{5252E68F-B653-44CE-9A32-360A75C54E0E}.Release|Any CPU.Build.0 = Release|Any CPU
+ {E85B096A-7C2E-4F48-ACF7-6BB0BA78B9C5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {E85B096A-7C2E-4F48-ACF7-6BB0BA78B9C5}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {E85B096A-7C2E-4F48-ACF7-6BB0BA78B9C5}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {E85B096A-7C2E-4F48-ACF7-6BB0BA78B9C5}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
diff --git a/memorypipeline/Builder.cs b/memorypipeline/Builder.cs
new file mode 100644
index 000000000..049fab505
--- /dev/null
+++ b/memorypipeline/Builder.cs
@@ -0,0 +1,163 @@
+// Copyright (c) Microsoft. All rights reserved.
+
+using System;
+using Microsoft.AspNetCore.Builder;
+using Microsoft.Extensions.Configuration;
+using Microsoft.Extensions.DependencyInjection;
+using Microsoft.SemanticKernel.AI.Embeddings;
+using Microsoft.SemanticMemory.Core.AI.AzureOpenAI;
+using Microsoft.SemanticMemory.Core.AI.OpenAI;
+using Microsoft.SemanticMemory.Core.AppBuilders;
+using Microsoft.SemanticMemory.Core.Configuration;
+using Microsoft.SemanticMemory.Core.ContentStorage.AzureBlobs;
+using Microsoft.SemanticMemory.Core.ContentStorage.FileSystemStorage;
+using Microsoft.SemanticMemory.Core.Handlers;
+using Microsoft.SemanticMemory.Core.MemoryStorage.AzureCognitiveSearch;
+using Microsoft.SemanticMemory.Core.MemoryStorage;
+using Microsoft.SemanticMemory.Core.Pipeline.Queue;
+using Microsoft.SemanticMemory.Core.Pipeline.Queue.AzureQueues;
+using Microsoft.SemanticMemory.Core.Pipeline.Queue.FileBasedQueues;
+using Microsoft.SemanticMemory.Core.Pipeline.Queue.RabbitMq;
+using Microsoft.SemanticMemory.Core.Pipeline;
+using Microsoft.SemanticKernel.Connectors.AI.OpenAI.TextEmbedding;
+
+namespace SemanticMemory.Service;
+
+///
+/// Flexible dependency injection using dependencies defined in appsettings.json
+///
+public static class Builder
+{
+ private const string ConfigRoot = "SemanticMemory";
+
+ public static WebApplicationBuilder CreateBuilder(out SemanticMemoryConfig config)
+ {
+ WebApplicationBuilder builder = WebApplication.CreateBuilder();
+ config = builder.Configuration.GetSection(ConfigRoot).Get()
+ ?? throw new ConfigurationException("Configuration is null");
+
+ builder.Services.AddSingleton(config);
+ builder.Services.AddSingleton();
+ builder.Services.AddSingleton();
+ builder.Services.AddSingleton();
+
+ ConfigureContentStorage(builder, config);
+ ConfigurePipelineHandlers(builder, config);
+ ConfigureQueueSystem(builder, config);
+ ConfigureEmbeddingGenerator(builder, config);
+ ConfigureEmbeddingStorage(builder, config);
+
+ return builder;
+ }
+
+ // Service where documents and temporary files are stored
+ private static void ConfigureContentStorage(WebApplicationBuilder builder, SemanticMemoryConfig config)
+ {
+ switch (config.ContentStorageType)
+ {
+ case string x when x.Equals("AzureBlobs", StringComparison.OrdinalIgnoreCase):
+ builder.Services.AddAzureBlobAsContentStorage(builder.Configuration
+ .GetSection(ConfigRoot).GetSection("Services").GetSection("AzureBlobs")
+ .Get()!);
+ break;
+
+ case string x when x.Equals("FileSystemContentStorage", StringComparison.OrdinalIgnoreCase):
+ builder.Services.AddFileSystemAsContentStorage(builder.Configuration
+ .GetSection(ConfigRoot).GetSection("Services").GetSection("FileSystemContentStorage")
+ .Get()!);
+ break;
+
+ default:
+ throw new NotSupportedException($"Unknown/unsupported {config.ContentStorageType} content storage");
+ }
+ }
+
+ // Register pipeline handlers as hosted services
+ private static void ConfigurePipelineHandlers(WebApplicationBuilder builder, SemanticMemoryConfig config)
+ {
+ builder.Services.AddHandlerAsHostedService("extract");
+ builder.Services.AddHandlerAsHostedService("partition");
+ builder.Services.AddHandlerAsHostedService("gen_embeddings");
+ builder.Services.AddHandlerAsHostedService("save_embeddings");
+ }
+
+ // Orchestration dependencies, ie. which queueing system to use
+ private static void ConfigureQueueSystem(WebApplicationBuilder builder, SemanticMemoryConfig config)
+ {
+ switch (config.DataIngestion.DistributedOrchestration.QueueType)
+ {
+ case string y when y.Equals("AzureQueue", StringComparison.OrdinalIgnoreCase):
+ builder.Services.AddAzureQueue(builder.Configuration
+ .GetSection(ConfigRoot).GetSection("Services").GetSection("AzureQueue")
+ .Get()!);
+ break;
+
+ case string y when y.Equals("RabbitMQ", StringComparison.OrdinalIgnoreCase):
+ builder.Services.AddRabbitMq(builder.Configuration
+ .GetSection(ConfigRoot).GetSection("Services").GetSection("RabbitMq")
+ .Get()!);
+ break;
+
+ case string y when y.Equals("FileBasedQueue", StringComparison.OrdinalIgnoreCase):
+ builder.Services.AddFileBasedQueue(builder.Configuration
+ .GetSection(ConfigRoot).GetSection("Services").GetSection("FileBasedQueue")
+ .Get()!);
+ break;
+
+ default:
+ throw new NotSupportedException($"Unknown/unsupported {config.DataIngestion.DistributedOrchestration.QueueType} queue type");
+ }
+ }
+
+ // List of embedding generators to use (multiple generators allowed during ingestion)
+ private static void ConfigureEmbeddingGenerator(WebApplicationBuilder builder, SemanticMemoryConfig config)
+ {
+ var embeddingGenerationServices = new TypeCollection();
+ builder.Services.AddSingleton(embeddingGenerationServices);
+ foreach (var type in config.DataIngestion.EmbeddingGeneratorTypes)
+ {
+ switch (type)
+ {
+ case string x when x.Equals("AzureOpenAI", StringComparison.OrdinalIgnoreCase):
+ case string y when y.Equals("AzureOpenAIEmbedding", StringComparison.OrdinalIgnoreCase):
+ embeddingGenerationServices.Add();
+ builder.Services.AddAzureOpenAIEmbeddingGeneration(builder.Configuration
+ .GetSection(ConfigRoot).GetSection("Services").GetSection("AzureOpenAIEmbedding")
+ .Get()!);
+ break;
+
+ case string x when x.Equals("OpenAI", StringComparison.OrdinalIgnoreCase):
+ embeddingGenerationServices.Add();
+ builder.Services.AddOpenAITextEmbeddingGeneration(builder.Configuration
+ .GetSection(ConfigRoot).GetSection("Services").GetSection("OpenAI")
+ .Get()!);
+ break;
+
+ default:
+ throw new NotSupportedException($"Unknown/unsupported {type} text generator");
+ }
+ }
+ }
+
+ // List of Vector DB list where to store embeddings (multiple DBs allowed during ingestion)
+ private static void ConfigureEmbeddingStorage(WebApplicationBuilder builder, SemanticMemoryConfig config)
+ {
+ var vectorDbServices = new TypeCollection();
+ builder.Services.AddSingleton(vectorDbServices);
+ foreach (var type in config.DataIngestion.VectorDbTypes)
+ {
+ switch (type)
+ {
+ case string x when x.Equals("AzureCognitiveSearch", StringComparison.OrdinalIgnoreCase):
+ vectorDbServices.Add();
+ builder.Services.AddAzureCognitiveSearchAsVectorDb(builder.Configuration
+ .GetSection(ConfigRoot).GetSection("Services").GetSection("AzureCognitiveSearch")
+ .Get()!);
+ break;
+
+ default:
+ throw new NotSupportedException($"Unknown/unsupported {type} vector DB");
+ }
+ }
+ }
+}
diff --git a/memorypipeline/Program.cs b/memorypipeline/Program.cs
new file mode 100644
index 000000000..dd1acd716
--- /dev/null
+++ b/memorypipeline/Program.cs
@@ -0,0 +1,24 @@
+// Copyright (c) Microsoft. All rights reserved.
+
+// ********************************************************
+// ************** APP BUILD *******************************
+// ********************************************************
+
+using System;
+using Microsoft.Extensions.Logging;
+using Microsoft.SemanticMemory.Core.Configuration;
+using Microsoft.SemanticMemory.Core.Diagnostics;
+using SemanticMemory.Service;
+
+var app = Builder.CreateBuilder(out SemanticMemoryConfig config).Build();
+
+// ********************************************************
+// ************** START ***********************************
+// ********************************************************
+
+app.Logger.LogInformation(
+ "Starting Semantic Memory pipeline service, .NET Env: {0}, Log Level: {1}",
+ Environment.GetEnvironmentVariable("ASPNETCORE_ENVIRONMENT"),
+ app.Logger.GetLogLevelName());
+
+app.Run();
diff --git a/memorypipeline/SemanticMemoryPipelineService.csproj b/memorypipeline/SemanticMemoryPipelineService.csproj
new file mode 100644
index 000000000..dbd8b265c
--- /dev/null
+++ b/memorypipeline/SemanticMemoryPipelineService.csproj
@@ -0,0 +1,16 @@
+
+
+
+ Exe
+ net6.0
+ disable
+ enable
+ SemanticMemory.Service
+ ef47f200-b235-45d9-9fd6-765fc59d33f5
+
+
+
+
+
+
+
diff --git a/memorypipeline/appsettings.json b/memorypipeline/appsettings.json
new file mode 100644
index 000000000..69ddb9d3b
--- /dev/null
+++ b/memorypipeline/appsettings.json
@@ -0,0 +1,100 @@
+{
+ "SemanticMemory": {
+ // "AzureBlobs" or "FileSystemContentStorage"
+ "ContentStorageType": "FileSystemContentStorage",
+ // Data ingestion pipelines configuration.
+ "DataIngestion": {
+ "DistributedOrchestration": {
+ // "AzureQueue", "RabbitMQ", "FileBasedQueue"
+ "QueueType": "FileBasedQueue"
+ },
+ // Multiple generators can be used, e.g. for data migration, A/B testing, etc.
+ "EmbeddingGeneratorTypes": [
+ "AzureOpenAIEmbedding"
+ ],
+ // Vectors can be written to multiple storages, e.g. for data migration, A/B testing, etc.
+ "VectorDbTypes": [
+ "AzureCognitiveSearch"
+ ]
+ },
+ "Services": {
+ "AzureBlobs": {
+ // "ConnectionString" or "AzureIdentity"
+ // AzureIdentity: use automatic AAD authentication mechanism. You can test locally
+ // using the env vars AZURE_TENANT_ID, AZURE_CLIENT_ID, AZURE_CLIENT_SECRET.
+ "Auth": "AzureIdentity",
+ // Azure Storage account name, required when using AzureIdentity auth
+ // Note: you can use an env var 'SemanticMemory__Services__AzureBlobs__Account' to set this
+ "Account": "",
+ // Container where to create directories and upload files
+ "Container": "smemory",
+ // Required when Auth == ConnectionString
+ // Note: you can use an env var 'SemanticMemory__Services__AzureBlobs__ConnectionString' to set this
+ "ConnectionString": "",
+ // Setting used only for country clouds
+ "EndpointSuffix": "core.windows.net"
+ },
+ "AzureQueue": {
+ // - AzureIdentity: use automatic AAD authentication mechanism
+ // - ConnectionString: auth using a connection string
+ "Auth": "AzureIdentity",
+ // Azure Storage account name, required when using AzureIdentity auth
+ // Note: you can use an env var 'SemanticMemory__Orchestration__DistributedPipeline__AzureQueue__Account' to set this
+ "Account": "",
+ // Required when Auth == ConnectionString
+ // Note: you can use an env var 'SemanticMemory__Orchestration__DistributedPipeline__AzureQueue__ConnectionString' to set this
+ "ConnectionString": "",
+ // Setting used only for country clouds
+ "EndpointSuffix": "core.windows.net"
+ },
+ "AzureCognitiveSearch": {
+ // "ApiKey" or "AzureIdentity"
+ // AzureIdentity: use automatic AAD authentication mechanism. You can test locally
+ // using the env vars AZURE_TENANT_ID, AZURE_CLIENT_ID, AZURE_CLIENT_SECRET.
+ "Auth": "ApiKey",
+ "Endpoint": "https://<...>",
+ "APIKey": "",
+ "VectorIndexPrefix": "smemory-",
+ },
+ "AzureOpenAIEmbedding": {
+ // "ApiKey" or "AzureIdentity"
+ // AzureIdentity: use automatic AAD authentication mechanism. You can test locally
+ // using the env vars AZURE_TENANT_ID, AZURE_CLIENT_ID, AZURE_CLIENT_SECRET.
+ "Auth": "ApiKey",
+ "Endpoint": "https://<...>.openai.azure.com/",
+ "Deployment": "",
+ "APIKey": "",
+ },
+ "FileSystemContentStorage": {
+ "Directory": "/tmp/semanticmemory/content"
+ },
+ "Qdrant": {
+ "Endpoint": "https://<...>",
+ "APIKey": "",
+ "VectorIndexPrefix": "smemory-"
+ },
+ "OpenAI": {
+ "EmbeddingModel": "text-embedding-ada-002",
+ "APIKey": "",
+ "OrgId": "",
+ },
+ "RabbitMq": {
+ "Host": "127.0.0.1",
+ "Port": "5672",
+ "Username": "user",
+ "Password": ""
+ },
+ "FileBasedQueue": {
+ "Path": "/tmp/semanticmemory/queues",
+ "CreateIfNotExist": true
+ },
+ },
+ },
+ "Logging": {
+ "LogLevel": {
+ "Default": "Information",
+ "Microsoft.AspNetCore": "Warning"
+ }
+ },
+ "AllowedHosts": "*"
+}
\ No newline at end of file
diff --git a/memorypipeline/nuget.config b/memorypipeline/nuget.config
new file mode 100644
index 000000000..743e81d8f
--- /dev/null
+++ b/memorypipeline/nuget.config
@@ -0,0 +1,19 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+