Skip to content

Commit 2a633f4

Browse files
committed
Memory pipeline
1 parent 0a9b8d7 commit 2a633f4

File tree

8 files changed

+367
-0
lines changed

8 files changed

+367
-0
lines changed
+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
name: Build SemanticMemoryPipelineService
2+
3+
on:
4+
push:
5+
branches: ["feature-semantic-memory"]
6+
7+
permissions:
8+
contents: read
9+
10+
jobs:
11+
memory-pipeline:
12+
runs-on: ubuntu-latest
13+
14+
environment: feature-semantic-memory
15+
16+
steps:
17+
- uses: actions/checkout@v3
18+
with:
19+
clean: true
20+
21+
- name: Set .Net Core version
22+
uses: actions/setup-dotnet@v1
23+
with:
24+
dotnet-version: 6.0.x
25+
26+
- name: Add custom nuget source
27+
run: |
28+
dotnet nuget add source "https://pkgs.dev.azure.com/msctoproj/Lightspeed/_packaging/SemanticMemoryPrivate/nuget/v3/index.json" \
29+
--name SemanticMemoryPrivate \
30+
--username az \
31+
--password ${{ secrets.AZURE_DEVOPS_PAT }} \
32+
--store-password-in-clear-text
33+
34+
- name: Build SemanticMemoryPipelineService
35+
run: |
36+
dotnet build memorypipeline/SemanticMemoryPipelineService.csproj \
37+
-c Release \
38+
-v normal

.vscode/settings.json

+1
Original file line numberDiff line numberDiff line change
@@ -48,4 +48,5 @@
4848
"**/.DS_Store": true,
4949
"**/Thumbs.db": true
5050
},
51+
"dotnet.defaultSolution": ".\\CopilotChat.sln"
5152
}

CopilotChat.sln

+6
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ VisualStudioVersion = 17.6.33706.43
55
MinimumVisualStudioVersion = 10.0.40219.1
66
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "CopilotChatWebApi", "webapi\CopilotChatWebApi.csproj", "{5252E68F-B653-44CE-9A32-360A75C54E0E}"
77
EndProject
8+
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SemanticMemoryPipelineService", "memorypipeline\SemanticMemoryPipelineService.csproj", "{E85B096A-7C2E-4F48-ACF7-6BB0BA78B9C5}"
9+
EndProject
810
Global
911
GlobalSection(SolutionConfigurationPlatforms) = preSolution
1012
Debug|Any CPU = Debug|Any CPU
@@ -15,6 +17,10 @@ Global
1517
{5252E68F-B653-44CE-9A32-360A75C54E0E}.Debug|Any CPU.Build.0 = Debug|Any CPU
1618
{5252E68F-B653-44CE-9A32-360A75C54E0E}.Release|Any CPU.ActiveCfg = Release|Any CPU
1719
{5252E68F-B653-44CE-9A32-360A75C54E0E}.Release|Any CPU.Build.0 = Release|Any CPU
20+
{E85B096A-7C2E-4F48-ACF7-6BB0BA78B9C5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
21+
{E85B096A-7C2E-4F48-ACF7-6BB0BA78B9C5}.Debug|Any CPU.Build.0 = Debug|Any CPU
22+
{E85B096A-7C2E-4F48-ACF7-6BB0BA78B9C5}.Release|Any CPU.ActiveCfg = Release|Any CPU
23+
{E85B096A-7C2E-4F48-ACF7-6BB0BA78B9C5}.Release|Any CPU.Build.0 = Release|Any CPU
1824
EndGlobalSection
1925
GlobalSection(SolutionProperties) = preSolution
2026
HideSolutionNode = FALSE

memorypipeline/Builder.cs

+163
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
// Copyright (c) Microsoft. All rights reserved.
2+
3+
using System;
4+
using Microsoft.AspNetCore.Builder;
5+
using Microsoft.Extensions.Configuration;
6+
using Microsoft.Extensions.DependencyInjection;
7+
using Microsoft.SemanticKernel.AI.Embeddings;
8+
using Microsoft.SemanticMemory.Core.AI.AzureOpenAI;
9+
using Microsoft.SemanticMemory.Core.AI.OpenAI;
10+
using Microsoft.SemanticMemory.Core.AppBuilders;
11+
using Microsoft.SemanticMemory.Core.Configuration;
12+
using Microsoft.SemanticMemory.Core.ContentStorage.AzureBlobs;
13+
using Microsoft.SemanticMemory.Core.ContentStorage.FileSystemStorage;
14+
using Microsoft.SemanticMemory.Core.Handlers;
15+
using Microsoft.SemanticMemory.Core.MemoryStorage.AzureCognitiveSearch;
16+
using Microsoft.SemanticMemory.Core.MemoryStorage;
17+
using Microsoft.SemanticMemory.Core.Pipeline.Queue;
18+
using Microsoft.SemanticMemory.Core.Pipeline.Queue.AzureQueues;
19+
using Microsoft.SemanticMemory.Core.Pipeline.Queue.FileBasedQueues;
20+
using Microsoft.SemanticMemory.Core.Pipeline.Queue.RabbitMq;
21+
using Microsoft.SemanticMemory.Core.Pipeline;
22+
using Microsoft.SemanticKernel.Connectors.AI.OpenAI.TextEmbedding;
23+
24+
namespace SemanticMemory.Service;
25+
26+
/// <summary>
27+
/// Flexible dependency injection using dependencies defined in appsettings.json
28+
/// </summary>
29+
public static class Builder
30+
{
31+
private const string ConfigRoot = "SemanticMemory";
32+
33+
public static WebApplicationBuilder CreateBuilder(out SemanticMemoryConfig config)
34+
{
35+
WebApplicationBuilder builder = WebApplication.CreateBuilder();
36+
config = builder.Configuration.GetSection(ConfigRoot).Get<SemanticMemoryConfig>()
37+
?? throw new ConfigurationException("Configuration is null");
38+
39+
builder.Services.AddSingleton<SemanticMemoryConfig>(config);
40+
builder.Services.AddSingleton<IMimeTypeDetection, MimeTypesDetection>();
41+
builder.Services.AddSingleton<IPipelineOrchestrator, DistributedPipelineOrchestrator>();
42+
builder.Services.AddSingleton<DistributedPipelineOrchestrator, DistributedPipelineOrchestrator>();
43+
44+
ConfigureContentStorage(builder, config);
45+
ConfigurePipelineHandlers(builder, config);
46+
ConfigureQueueSystem(builder, config);
47+
ConfigureEmbeddingGenerator(builder, config);
48+
ConfigureEmbeddingStorage(builder, config);
49+
50+
return builder;
51+
}
52+
53+
// Service where documents and temporary files are stored
54+
private static void ConfigureContentStorage(WebApplicationBuilder builder, SemanticMemoryConfig config)
55+
{
56+
switch (config.ContentStorageType)
57+
{
58+
case string x when x.Equals("AzureBlobs", StringComparison.OrdinalIgnoreCase):
59+
builder.Services.AddAzureBlobAsContentStorage(builder.Configuration
60+
.GetSection(ConfigRoot).GetSection("Services").GetSection("AzureBlobs")
61+
.Get<AzureBlobConfig>()!);
62+
break;
63+
64+
case string x when x.Equals("FileSystemContentStorage", StringComparison.OrdinalIgnoreCase):
65+
builder.Services.AddFileSystemAsContentStorage(builder.Configuration
66+
.GetSection(ConfigRoot).GetSection("Services").GetSection("FileSystemContentStorage")
67+
.Get<FileSystemConfig>()!);
68+
break;
69+
70+
default:
71+
throw new NotSupportedException($"Unknown/unsupported {config.ContentStorageType} content storage");
72+
}
73+
}
74+
75+
// Register pipeline handlers as hosted services
76+
private static void ConfigurePipelineHandlers(WebApplicationBuilder builder, SemanticMemoryConfig config)
77+
{
78+
builder.Services.AddHandlerAsHostedService<TextExtractionHandler>("extract");
79+
builder.Services.AddHandlerAsHostedService<TextPartitioningHandler>("partition");
80+
builder.Services.AddHandlerAsHostedService<GenerateEmbeddingsHandler>("gen_embeddings");
81+
builder.Services.AddHandlerAsHostedService<SaveEmbeddingsHandler>("save_embeddings");
82+
}
83+
84+
// Orchestration dependencies, ie. which queueing system to use
85+
private static void ConfigureQueueSystem(WebApplicationBuilder builder, SemanticMemoryConfig config)
86+
{
87+
switch (config.DataIngestion.DistributedOrchestration.QueueType)
88+
{
89+
case string y when y.Equals("AzureQueue", StringComparison.OrdinalIgnoreCase):
90+
builder.Services.AddAzureQueue(builder.Configuration
91+
.GetSection(ConfigRoot).GetSection("Services").GetSection("AzureQueue")
92+
.Get<AzureQueueConfig>()!);
93+
break;
94+
95+
case string y when y.Equals("RabbitMQ", StringComparison.OrdinalIgnoreCase):
96+
builder.Services.AddRabbitMq(builder.Configuration
97+
.GetSection(ConfigRoot).GetSection("Services").GetSection("RabbitMq")
98+
.Get<RabbitMqConfig>()!);
99+
break;
100+
101+
case string y when y.Equals("FileBasedQueue", StringComparison.OrdinalIgnoreCase):
102+
builder.Services.AddFileBasedQueue(builder.Configuration
103+
.GetSection(ConfigRoot).GetSection("Services").GetSection("FileBasedQueue")
104+
.Get<FileBasedQueueConfig>()!);
105+
break;
106+
107+
default:
108+
throw new NotSupportedException($"Unknown/unsupported {config.DataIngestion.DistributedOrchestration.QueueType} queue type");
109+
}
110+
}
111+
112+
// List of embedding generators to use (multiple generators allowed during ingestion)
113+
private static void ConfigureEmbeddingGenerator(WebApplicationBuilder builder, SemanticMemoryConfig config)
114+
{
115+
var embeddingGenerationServices = new TypeCollection<ITextEmbeddingGeneration>();
116+
builder.Services.AddSingleton(embeddingGenerationServices);
117+
foreach (var type in config.DataIngestion.EmbeddingGeneratorTypes)
118+
{
119+
switch (type)
120+
{
121+
case string x when x.Equals("AzureOpenAI", StringComparison.OrdinalIgnoreCase):
122+
case string y when y.Equals("AzureOpenAIEmbedding", StringComparison.OrdinalIgnoreCase):
123+
embeddingGenerationServices.Add<AzureTextEmbeddingGeneration>();
124+
builder.Services.AddAzureOpenAIEmbeddingGeneration(builder.Configuration
125+
.GetSection(ConfigRoot).GetSection("Services").GetSection("AzureOpenAIEmbedding")
126+
.Get<AzureOpenAIConfig>()!);
127+
break;
128+
129+
case string x when x.Equals("OpenAI", StringComparison.OrdinalIgnoreCase):
130+
embeddingGenerationServices.Add<OpenAITextEmbeddingGeneration>();
131+
builder.Services.AddOpenAITextEmbeddingGeneration(builder.Configuration
132+
.GetSection(ConfigRoot).GetSection("Services").GetSection("OpenAI")
133+
.Get<OpenAIConfig>()!);
134+
break;
135+
136+
default:
137+
throw new NotSupportedException($"Unknown/unsupported {type} text generator");
138+
}
139+
}
140+
}
141+
142+
// List of Vector DB list where to store embeddings (multiple DBs allowed during ingestion)
143+
private static void ConfigureEmbeddingStorage(WebApplicationBuilder builder, SemanticMemoryConfig config)
144+
{
145+
var vectorDbServices = new TypeCollection<ISemanticMemoryVectorDb>();
146+
builder.Services.AddSingleton(vectorDbServices);
147+
foreach (var type in config.DataIngestion.VectorDbTypes)
148+
{
149+
switch (type)
150+
{
151+
case string x when x.Equals("AzureCognitiveSearch", StringComparison.OrdinalIgnoreCase):
152+
vectorDbServices.Add<AzureCognitiveSearchMemory>();
153+
builder.Services.AddAzureCognitiveSearchAsVectorDb(builder.Configuration
154+
.GetSection(ConfigRoot).GetSection("Services").GetSection("AzureCognitiveSearch")
155+
.Get<AzureCognitiveSearchConfig>()!);
156+
break;
157+
158+
default:
159+
throw new NotSupportedException($"Unknown/unsupported {type} vector DB");
160+
}
161+
}
162+
}
163+
}

memorypipeline/Program.cs

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// Copyright (c) Microsoft. All rights reserved.
2+
3+
// ********************************************************
4+
// ************** APP BUILD *******************************
5+
// ********************************************************
6+
7+
using System;
8+
using Microsoft.Extensions.Logging;
9+
using Microsoft.SemanticMemory.Core.Configuration;
10+
using Microsoft.SemanticMemory.Core.Diagnostics;
11+
using SemanticMemory.Service;
12+
13+
var app = Builder.CreateBuilder(out SemanticMemoryConfig config).Build();
14+
15+
// ********************************************************
16+
// ************** START ***********************************
17+
// ********************************************************
18+
19+
app.Logger.LogInformation(
20+
"Starting Semantic Memory pipeline service, .NET Env: {0}, Log Level: {1}",
21+
Environment.GetEnvironmentVariable("ASPNETCORE_ENVIRONMENT"),
22+
app.Logger.GetLogLevelName());
23+
24+
app.Run();
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
<Project Sdk="Microsoft.NET.Sdk.Web">
2+
3+
<PropertyGroup>
4+
<OutputType>Exe</OutputType>
5+
<TargetFramework>net6.0</TargetFramework>
6+
<ImplicitUsings>disable</ImplicitUsings>
7+
<Nullable>enable</Nullable>
8+
<RootNamespace>SemanticMemory.Service</RootNamespace>
9+
<UserSecretsId>ef47f200-b235-45d9-9fd6-765fc59d33f5</UserSecretsId>
10+
</PropertyGroup>
11+
12+
<ItemGroup>
13+
<PackageReference Include="Microsoft.SemanticMemory.Core" Version="0.0.5.1-preview" />
14+
</ItemGroup>
15+
16+
</Project>

memorypipeline/appsettings.json

+100
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
{
2+
"SemanticMemory": {
3+
// "AzureBlobs" or "FileSystemContentStorage"
4+
"ContentStorageType": "FileSystemContentStorage",
5+
// Data ingestion pipelines configuration.
6+
"DataIngestion": {
7+
"DistributedOrchestration": {
8+
// "AzureQueue", "RabbitMQ", "FileBasedQueue"
9+
"QueueType": "FileBasedQueue"
10+
},
11+
// Multiple generators can be used, e.g. for data migration, A/B testing, etc.
12+
"EmbeddingGeneratorTypes": [
13+
"AzureOpenAIEmbedding"
14+
],
15+
// Vectors can be written to multiple storages, e.g. for data migration, A/B testing, etc.
16+
"VectorDbTypes": [
17+
"AzureCognitiveSearch"
18+
]
19+
},
20+
"Services": {
21+
"AzureBlobs": {
22+
// "ConnectionString" or "AzureIdentity"
23+
// AzureIdentity: use automatic AAD authentication mechanism. You can test locally
24+
// using the env vars AZURE_TENANT_ID, AZURE_CLIENT_ID, AZURE_CLIENT_SECRET.
25+
"Auth": "AzureIdentity",
26+
// Azure Storage account name, required when using AzureIdentity auth
27+
// Note: you can use an env var 'SemanticMemory__Services__AzureBlobs__Account' to set this
28+
"Account": "",
29+
// Container where to create directories and upload files
30+
"Container": "smemory",
31+
// Required when Auth == ConnectionString
32+
// Note: you can use an env var 'SemanticMemory__Services__AzureBlobs__ConnectionString' to set this
33+
"ConnectionString": "",
34+
// Setting used only for country clouds
35+
"EndpointSuffix": "core.windows.net"
36+
},
37+
"AzureQueue": {
38+
// - AzureIdentity: use automatic AAD authentication mechanism
39+
// - ConnectionString: auth using a connection string
40+
"Auth": "AzureIdentity",
41+
// Azure Storage account name, required when using AzureIdentity auth
42+
// Note: you can use an env var 'SemanticMemory__Orchestration__DistributedPipeline__AzureQueue__Account' to set this
43+
"Account": "",
44+
// Required when Auth == ConnectionString
45+
// Note: you can use an env var 'SemanticMemory__Orchestration__DistributedPipeline__AzureQueue__ConnectionString' to set this
46+
"ConnectionString": "",
47+
// Setting used only for country clouds
48+
"EndpointSuffix": "core.windows.net"
49+
},
50+
"AzureCognitiveSearch": {
51+
// "ApiKey" or "AzureIdentity"
52+
// AzureIdentity: use automatic AAD authentication mechanism. You can test locally
53+
// using the env vars AZURE_TENANT_ID, AZURE_CLIENT_ID, AZURE_CLIENT_SECRET.
54+
"Auth": "ApiKey",
55+
"Endpoint": "https://<...>",
56+
"APIKey": "",
57+
"VectorIndexPrefix": "smemory-",
58+
},
59+
"AzureOpenAIEmbedding": {
60+
// "ApiKey" or "AzureIdentity"
61+
// AzureIdentity: use automatic AAD authentication mechanism. You can test locally
62+
// using the env vars AZURE_TENANT_ID, AZURE_CLIENT_ID, AZURE_CLIENT_SECRET.
63+
"Auth": "ApiKey",
64+
"Endpoint": "https://<...>.openai.azure.com/",
65+
"Deployment": "",
66+
"APIKey": "",
67+
},
68+
"FileSystemContentStorage": {
69+
"Directory": "/tmp/semanticmemory/content"
70+
},
71+
"Qdrant": {
72+
"Endpoint": "https://<...>",
73+
"APIKey": "",
74+
"VectorIndexPrefix": "smemory-"
75+
},
76+
"OpenAI": {
77+
"EmbeddingModel": "text-embedding-ada-002",
78+
"APIKey": "",
79+
"OrgId": "",
80+
},
81+
"RabbitMq": {
82+
"Host": "127.0.0.1",
83+
"Port": "5672",
84+
"Username": "user",
85+
"Password": ""
86+
},
87+
"FileBasedQueue": {
88+
"Path": "/tmp/semanticmemory/queues",
89+
"CreateIfNotExist": true
90+
},
91+
},
92+
},
93+
"Logging": {
94+
"LogLevel": {
95+
"Default": "Information",
96+
"Microsoft.AspNetCore": "Warning"
97+
}
98+
},
99+
"AllowedHosts": "*"
100+
}

memorypipeline/nuget.config

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
<?xml version="1.0" encoding="utf-8"?>
2+
<configuration>
3+
4+
<packageSources>
5+
<clear />
6+
<add key="nuget.org" value="https://api.nuget.org/v3/index.json" />
7+
<add key="SemanticMemoryPrivate" value="https://pkgs.dev.azure.com/msctoproj/Lightspeed/_packaging/SemanticMemoryPrivate/nuget/v3/index.json" />
8+
</packageSources>
9+
10+
<packageSourceMapping>
11+
<packageSource key="nuget.org">
12+
<package pattern="*" />
13+
</packageSource>
14+
<packageSource key="SemanticMemoryPrivate">
15+
<package pattern="Microsoft.SemanticMemory.*" />
16+
</packageSource>
17+
</packageSourceMapping>
18+
19+
</configuration>

0 commit comments

Comments
 (0)