Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Semantic memory pipeline service #140

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions .github/workflows/memorypipeline-build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
name: Build SemanticMemoryPipelineService

on:
push:
branches: ["feature-semantic-memory"]

permissions:
contents: read

jobs:
memory-pipeline:
runs-on: ubuntu-latest

environment: feature-semantic-memory

steps:
- uses: actions/checkout@v3
with:
clean: true

- name: Set .Net Core version
uses: actions/setup-dotnet@v1
with:
dotnet-version: 6.0.x

- name: Add custom nuget source
run: |
dotnet nuget add source "https://pkgs.dev.azure.com/msctoproj/Lightspeed/_packaging/SemanticMemoryPrivate/nuget/v3/index.json" \
--name SemanticMemoryPrivate \
--username az \
--password ${{ secrets.AZURE_DEVOPS_PAT }} \
--store-password-in-clear-text

- name: Build SemanticMemoryPipelineService
run: |
dotnet build memorypipeline/SemanticMemoryPipelineService.csproj \
-c Release \
-v normal
1 change: 1 addition & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -48,4 +48,5 @@
"**/.DS_Store": true,
"**/Thumbs.db": true
},
"dotnet.defaultSolution": ".\\CopilotChat.sln"
}
6 changes: 6 additions & 0 deletions CopilotChat.sln
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ VisualStudioVersion = 17.6.33706.43
MinimumVisualStudioVersion = 10.0.40219.1
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "CopilotChatWebApi", "webapi\CopilotChatWebApi.csproj", "{5252E68F-B653-44CE-9A32-360A75C54E0E}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SemanticMemoryPipelineService", "memorypipeline\SemanticMemoryPipelineService.csproj", "{E85B096A-7C2E-4F48-ACF7-6BB0BA78B9C5}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand All @@ -15,6 +17,10 @@ Global
{5252E68F-B653-44CE-9A32-360A75C54E0E}.Debug|Any CPU.Build.0 = Debug|Any CPU
{5252E68F-B653-44CE-9A32-360A75C54E0E}.Release|Any CPU.ActiveCfg = Release|Any CPU
{5252E68F-B653-44CE-9A32-360A75C54E0E}.Release|Any CPU.Build.0 = Release|Any CPU
{E85B096A-7C2E-4F48-ACF7-6BB0BA78B9C5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{E85B096A-7C2E-4F48-ACF7-6BB0BA78B9C5}.Debug|Any CPU.Build.0 = Debug|Any CPU
{E85B096A-7C2E-4F48-ACF7-6BB0BA78B9C5}.Release|Any CPU.ActiveCfg = Release|Any CPU
{E85B096A-7C2E-4F48-ACF7-6BB0BA78B9C5}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down
163 changes: 163 additions & 0 deletions memorypipeline/Builder.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
// Copyright (c) Microsoft. All rights reserved.

using System;
using Microsoft.AspNetCore.Builder;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.SemanticKernel.AI.Embeddings;
using Microsoft.SemanticMemory.Core.AI.AzureOpenAI;
using Microsoft.SemanticMemory.Core.AI.OpenAI;
using Microsoft.SemanticMemory.Core.AppBuilders;
using Microsoft.SemanticMemory.Core.Configuration;
using Microsoft.SemanticMemory.Core.ContentStorage.AzureBlobs;
using Microsoft.SemanticMemory.Core.ContentStorage.FileSystemStorage;
using Microsoft.SemanticMemory.Core.Handlers;
using Microsoft.SemanticMemory.Core.MemoryStorage.AzureCognitiveSearch;
using Microsoft.SemanticMemory.Core.MemoryStorage;
using Microsoft.SemanticMemory.Core.Pipeline.Queue;
using Microsoft.SemanticMemory.Core.Pipeline.Queue.AzureQueues;
using Microsoft.SemanticMemory.Core.Pipeline.Queue.FileBasedQueues;
using Microsoft.SemanticMemory.Core.Pipeline.Queue.RabbitMq;
using Microsoft.SemanticMemory.Core.Pipeline;
using Microsoft.SemanticKernel.Connectors.AI.OpenAI.TextEmbedding;

namespace SemanticMemory.Service;

/// <summary>
/// Flexible dependency injection using dependencies defined in appsettings.json
/// </summary>
public static class Builder
{
private const string ConfigRoot = "SemanticMemory";

public static WebApplicationBuilder CreateBuilder(out SemanticMemoryConfig config)
{
WebApplicationBuilder builder = WebApplication.CreateBuilder();
config = builder.Configuration.GetSection(ConfigRoot).Get<SemanticMemoryConfig>()
?? throw new ConfigurationException("Configuration is null");

builder.Services.AddSingleton<SemanticMemoryConfig>(config);
builder.Services.AddSingleton<IMimeTypeDetection, MimeTypesDetection>();
builder.Services.AddSingleton<IPipelineOrchestrator, DistributedPipelineOrchestrator>();
builder.Services.AddSingleton<DistributedPipelineOrchestrator, DistributedPipelineOrchestrator>();

ConfigureContentStorage(builder, config);
ConfigurePipelineHandlers(builder, config);
ConfigureQueueSystem(builder, config);
ConfigureEmbeddingGenerator(builder, config);
ConfigureEmbeddingStorage(builder, config);

return builder;
}

// Service where documents and temporary files are stored
private static void ConfigureContentStorage(WebApplicationBuilder builder, SemanticMemoryConfig config)
{
switch (config.ContentStorageType)
{
case string x when x.Equals("AzureBlobs", StringComparison.OrdinalIgnoreCase):
builder.Services.AddAzureBlobAsContentStorage(builder.Configuration
.GetSection(ConfigRoot).GetSection("Services").GetSection("AzureBlobs")
.Get<AzureBlobConfig>()!);
break;

case string x when x.Equals("FileSystemContentStorage", StringComparison.OrdinalIgnoreCase):
builder.Services.AddFileSystemAsContentStorage(builder.Configuration
.GetSection(ConfigRoot).GetSection("Services").GetSection("FileSystemContentStorage")
.Get<FileSystemConfig>()!);
break;

default:
throw new NotSupportedException($"Unknown/unsupported {config.ContentStorageType} content storage");
}
}

// Register pipeline handlers as hosted services
private static void ConfigurePipelineHandlers(WebApplicationBuilder builder, SemanticMemoryConfig config)
{
builder.Services.AddHandlerAsHostedService<TextExtractionHandler>("extract");
builder.Services.AddHandlerAsHostedService<TextPartitioningHandler>("partition");
builder.Services.AddHandlerAsHostedService<GenerateEmbeddingsHandler>("gen_embeddings");
builder.Services.AddHandlerAsHostedService<SaveEmbeddingsHandler>("save_embeddings");
}

// Orchestration dependencies, ie. which queueing system to use
private static void ConfigureQueueSystem(WebApplicationBuilder builder, SemanticMemoryConfig config)
{
switch (config.DataIngestion.DistributedOrchestration.QueueType)
{
case string y when y.Equals("AzureQueue", StringComparison.OrdinalIgnoreCase):
builder.Services.AddAzureQueue(builder.Configuration
.GetSection(ConfigRoot).GetSection("Services").GetSection("AzureQueue")
.Get<AzureQueueConfig>()!);
break;

case string y when y.Equals("RabbitMQ", StringComparison.OrdinalIgnoreCase):
builder.Services.AddRabbitMq(builder.Configuration
.GetSection(ConfigRoot).GetSection("Services").GetSection("RabbitMq")
.Get<RabbitMqConfig>()!);
break;

case string y when y.Equals("FileBasedQueue", StringComparison.OrdinalIgnoreCase):
builder.Services.AddFileBasedQueue(builder.Configuration
.GetSection(ConfigRoot).GetSection("Services").GetSection("FileBasedQueue")
.Get<FileBasedQueueConfig>()!);
break;

default:
throw new NotSupportedException($"Unknown/unsupported {config.DataIngestion.DistributedOrchestration.QueueType} queue type");
}
}

// List of embedding generators to use (multiple generators allowed during ingestion)
private static void ConfigureEmbeddingGenerator(WebApplicationBuilder builder, SemanticMemoryConfig config)
{
var embeddingGenerationServices = new TypeCollection<ITextEmbeddingGeneration>();
builder.Services.AddSingleton(embeddingGenerationServices);
foreach (var type in config.DataIngestion.EmbeddingGeneratorTypes)
{
switch (type)
{
case string x when x.Equals("AzureOpenAI", StringComparison.OrdinalIgnoreCase):
case string y when y.Equals("AzureOpenAIEmbedding", StringComparison.OrdinalIgnoreCase):
embeddingGenerationServices.Add<AzureTextEmbeddingGeneration>();
builder.Services.AddAzureOpenAIEmbeddingGeneration(builder.Configuration
.GetSection(ConfigRoot).GetSection("Services").GetSection("AzureOpenAIEmbedding")
.Get<AzureOpenAIConfig>()!);
break;

case string x when x.Equals("OpenAI", StringComparison.OrdinalIgnoreCase):
embeddingGenerationServices.Add<OpenAITextEmbeddingGeneration>();
builder.Services.AddOpenAITextEmbeddingGeneration(builder.Configuration
.GetSection(ConfigRoot).GetSection("Services").GetSection("OpenAI")
.Get<OpenAIConfig>()!);
break;

default:
throw new NotSupportedException($"Unknown/unsupported {type} text generator");
}
}
}

// List of Vector DB list where to store embeddings (multiple DBs allowed during ingestion)
private static void ConfigureEmbeddingStorage(WebApplicationBuilder builder, SemanticMemoryConfig config)
{
var vectorDbServices = new TypeCollection<ISemanticMemoryVectorDb>();
builder.Services.AddSingleton(vectorDbServices);
foreach (var type in config.DataIngestion.VectorDbTypes)
{
switch (type)
{
case string x when x.Equals("AzureCognitiveSearch", StringComparison.OrdinalIgnoreCase):
vectorDbServices.Add<AzureCognitiveSearchMemory>();
builder.Services.AddAzureCognitiveSearchAsVectorDb(builder.Configuration
.GetSection(ConfigRoot).GetSection("Services").GetSection("AzureCognitiveSearch")
.Get<AzureCognitiveSearchConfig>()!);
break;

default:
throw new NotSupportedException($"Unknown/unsupported {type} vector DB");
}
}
}
}
24 changes: 24 additions & 0 deletions memorypipeline/Program.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Copyright (c) Microsoft. All rights reserved.

// ********************************************************
// ************** APP BUILD *******************************
// ********************************************************

using System;
using Microsoft.Extensions.Logging;
using Microsoft.SemanticMemory.Core.Configuration;
using Microsoft.SemanticMemory.Core.Diagnostics;
using SemanticMemory.Service;

var app = Builder.CreateBuilder(out SemanticMemoryConfig config).Build();

// ********************************************************
// ************** START ***********************************
// ********************************************************

app.Logger.LogInformation(
"Starting Semantic Memory pipeline service, .NET Env: {0}, Log Level: {1}",
Environment.GetEnvironmentVariable("ASPNETCORE_ENVIRONMENT"),
app.Logger.GetLogLevelName());

app.Run();
16 changes: 16 additions & 0 deletions memorypipeline/SemanticMemoryPipelineService.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<Project Sdk="Microsoft.NET.Sdk.Web">

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework>
<ImplicitUsings>disable</ImplicitUsings>
<Nullable>enable</Nullable>
<RootNamespace>SemanticMemory.Service</RootNamespace>
<UserSecretsId>ef47f200-b235-45d9-9fd6-765fc59d33f5</UserSecretsId>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Microsoft.SemanticMemory.Core" Version="0.0.5.1-preview" />
</ItemGroup>

</Project>
100 changes: 100 additions & 0 deletions memorypipeline/appsettings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
{
"SemanticMemory": {
// "AzureBlobs" or "FileSystemContentStorage"
"ContentStorageType": "FileSystemContentStorage",
// Data ingestion pipelines configuration.
"DataIngestion": {
"DistributedOrchestration": {
// "AzureQueue", "RabbitMQ", "FileBasedQueue"
"QueueType": "FileBasedQueue"
},
// Multiple generators can be used, e.g. for data migration, A/B testing, etc.
"EmbeddingGeneratorTypes": [
"AzureOpenAIEmbedding"
],
// Vectors can be written to multiple storages, e.g. for data migration, A/B testing, etc.
"VectorDbTypes": [
"AzureCognitiveSearch"
]
},
"Services": {
"AzureBlobs": {
// "ConnectionString" or "AzureIdentity"
// AzureIdentity: use automatic AAD authentication mechanism. You can test locally
// using the env vars AZURE_TENANT_ID, AZURE_CLIENT_ID, AZURE_CLIENT_SECRET.
"Auth": "AzureIdentity",
// Azure Storage account name, required when using AzureIdentity auth
// Note: you can use an env var 'SemanticMemory__Services__AzureBlobs__Account' to set this
"Account": "",
// Container where to create directories and upload files
"Container": "smemory",
// Required when Auth == ConnectionString
// Note: you can use an env var 'SemanticMemory__Services__AzureBlobs__ConnectionString' to set this
"ConnectionString": "",
// Setting used only for country clouds
"EndpointSuffix": "core.windows.net"
},
"AzureQueue": {
// - AzureIdentity: use automatic AAD authentication mechanism
// - ConnectionString: auth using a connection string
"Auth": "AzureIdentity",
// Azure Storage account name, required when using AzureIdentity auth
// Note: you can use an env var 'SemanticMemory__Orchestration__DistributedPipeline__AzureQueue__Account' to set this
"Account": "",
// Required when Auth == ConnectionString
// Note: you can use an env var 'SemanticMemory__Orchestration__DistributedPipeline__AzureQueue__ConnectionString' to set this
"ConnectionString": "",
// Setting used only for country clouds
"EndpointSuffix": "core.windows.net"
},
"AzureCognitiveSearch": {
// "ApiKey" or "AzureIdentity"
// AzureIdentity: use automatic AAD authentication mechanism. You can test locally
// using the env vars AZURE_TENANT_ID, AZURE_CLIENT_ID, AZURE_CLIENT_SECRET.
"Auth": "ApiKey",
"Endpoint": "https://<...>",
"APIKey": "",
"VectorIndexPrefix": "smemory-",
},
"AzureOpenAIEmbedding": {
// "ApiKey" or "AzureIdentity"
// AzureIdentity: use automatic AAD authentication mechanism. You can test locally
// using the env vars AZURE_TENANT_ID, AZURE_CLIENT_ID, AZURE_CLIENT_SECRET.
"Auth": "ApiKey",
"Endpoint": "https://<...>.openai.azure.com/",
"Deployment": "",
"APIKey": "",
},
"FileSystemContentStorage": {
"Directory": "/tmp/semanticmemory/content"
},
"Qdrant": {
"Endpoint": "https://<...>",
"APIKey": "",
"VectorIndexPrefix": "smemory-"
},
"OpenAI": {
"EmbeddingModel": "text-embedding-ada-002",
"APIKey": "",
"OrgId": "",
},
"RabbitMq": {
"Host": "127.0.0.1",
"Port": "5672",
"Username": "user",
"Password": ""
},
"FileBasedQueue": {
"Path": "/tmp/semanticmemory/queues",
"CreateIfNotExist": true
},
},
},
"Logging": {
"LogLevel": {
"Default": "Information",
"Microsoft.AspNetCore": "Warning"
}
},
"AllowedHosts": "*"
}
19 changes: 19 additions & 0 deletions memorypipeline/nuget.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<?xml version="1.0" encoding="utf-8"?>
<configuration>

<packageSources>
<clear />
<add key="nuget.org" value="https://api.nuget.org/v3/index.json" />
<add key="SemanticMemoryPrivate" value="https://pkgs.dev.azure.com/msctoproj/Lightspeed/_packaging/SemanticMemoryPrivate/nuget/v3/index.json" />
</packageSources>

<packageSourceMapping>
<packageSource key="nuget.org">
<package pattern="*" />
</packageSource>
<packageSource key="SemanticMemoryPrivate">
<package pattern="Microsoft.SemanticMemory.*" />
</packageSource>
</packageSourceMapping>

</configuration>