Skip to content

Commit

Permalink
feat: Added Elasticsearch database.
Browse files Browse the repository at this point in the history
  • Loading branch information
HavenDV committed Jun 20, 2024
1 parent ca20d77 commit a83c0a8
Show file tree
Hide file tree
Showing 9 changed files with 238 additions and 0 deletions.
10 changes: 10 additions & 0 deletions LangChain.sln
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,10 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Groq", "Groq", "{5DEC2707-D
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "LangChain.Providers.Groq.Tests", "src\Providers\Groq\test\LangChain.Providers.Groq.Tests.csproj", "{CC7F58F4-C824-4BED-8C4A-760C9AB8FC6E}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Elasticsearch", "Elasticsearch", "{46A24203-9C28-4135-ACC6-484C4BE4A6F2}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LangChain.Databases.Elasticsearch", "src\Databases\Elasticsearch\src\LangChain.Databases.Elasticsearch.csproj", "{12007C28-2F0A-4D51-8DF6-5ED41F1ACF44}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand Down Expand Up @@ -734,6 +738,10 @@ Global
{CC7F58F4-C824-4BED-8C4A-760C9AB8FC6E}.Debug|Any CPU.Build.0 = Debug|Any CPU
{CC7F58F4-C824-4BED-8C4A-760C9AB8FC6E}.Release|Any CPU.ActiveCfg = Release|Any CPU
{CC7F58F4-C824-4BED-8C4A-760C9AB8FC6E}.Release|Any CPU.Build.0 = Release|Any CPU
{12007C28-2F0A-4D51-8DF6-5ED41F1ACF44}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{12007C28-2F0A-4D51-8DF6-5ED41F1ACF44}.Debug|Any CPU.Build.0 = Debug|Any CPU
{12007C28-2F0A-4D51-8DF6-5ED41F1ACF44}.Release|Any CPU.ActiveCfg = Release|Any CPU
{12007C28-2F0A-4D51-8DF6-5ED41F1ACF44}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down Expand Up @@ -878,6 +886,8 @@ Global
{FD0A56AD-AFB4-4A21-99C1-9BE5D074EB56} = {5DEC2707-DD62-4DCD-AC5D-6670AC2A1B01}
{5DEC2707-DD62-4DCD-AC5D-6670AC2A1B01} = {E2B9833C-0397-4FAF-A3A8-116E58749750}
{CC7F58F4-C824-4BED-8C4A-760C9AB8FC6E} = {5DEC2707-DD62-4DCD-AC5D-6670AC2A1B01}
{46A24203-9C28-4135-ACC6-484C4BE4A6F2} = {A098FF69-D8B5-4B2B-83D5-F777D3817F15}
{12007C28-2F0A-4D51-8DF6-5ED41F1ACF44} = {46A24203-9C28-4135-ACC6-484C4BE4A6F2}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {5C00D0F1-6138-4ED9-846B-97E43D6DFF1C}
Expand Down
102 changes: 102 additions & 0 deletions src/Databases/Elasticsearch/src/ElasticsearchVectorCollection.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
using Elastic.Clients.Elasticsearch;

namespace LangChain.Databases.Elasticsearch;

/// <summary>
/// Elasticsearch vector collection.
/// </summary>
public class ElasticsearchVectorCollection(
ElasticsearchClient client,
string name = VectorCollection.DefaultName,
string? id = null)
: VectorCollection(name, id), IVectorCollection
{
/// <inheritdoc />
public async Task<Vector?> GetAsync(string id, CancellationToken cancellationToken = default)
{
var record = await client.GetAsync(Name, new Id(id), cancellationToken: cancellationToken).ConfigureAwait(false);
if (record == null)
{
return null;
}

return new Vector
{
Text = string.Empty,
Metadata = new Dictionary<string, object>(),
};
}

/// <inheritdoc />
public async Task<bool> DeleteAsync(
IEnumerable<string> ids,
CancellationToken cancellationToken = default)
{
ids = ids ?? throw new ArgumentNullException(nameof(ids));

foreach (var id in ids)
{
await client.DeleteAsync(Name, new Id(id), cancellationToken).ConfigureAwait(false);
}

return true;
}

/// <inheritdoc />
public Task<IReadOnlyCollection<string>> AddAsync(
IReadOnlyCollection<Vector> items,
CancellationToken cancellationToken = default)
{
items = items ?? throw new ArgumentNullException(nameof(items));

return Task.FromResult<IReadOnlyCollection<string>>([]);
}

/// <inheritdoc />
public Task<VectorSearchResponse> SearchAsync(
VectorSearchRequest request,
VectorSearchSettings? settings = default,
CancellationToken cancellationToken = default)
{
request = request ?? throw new ArgumentNullException(nameof(request));
settings ??= new VectorSearchSettings();

var response = await client.SearchAsync<MyDoc>(s => s

Check failure on line 64 in src/Databases/Elasticsearch/src/ElasticsearchVectorCollection.cs

View workflow job for this annotation

GitHub Actions / Build and test / Build, test and publish

The type or namespace name 'MyDoc' could not be found (are you missing a using directive or an assembly reference?)

Check warning on line 64 in src/Databases/Elasticsearch/src/ElasticsearchVectorCollection.cs

View workflow job for this annotation

GitHub Actions / Build and test / Build, test and publish

Consider calling ConfigureAwait on the awaited task (https://learn.microsoft.com/dotnet/fundamentals/code-analysis/quality-rules/ca2007)

Check warning on line 64 in src/Databases/Elasticsearch/src/ElasticsearchVectorCollection.cs

View workflow job for this annotation

GitHub Actions / Build and test / Build, test and publish

Consider calling ConfigureAwait on the awaited task (https://learn.microsoft.com/dotnet/fundamentals/code-analysis/quality-rules/ca2007)

Check warning on line 64 in src/Databases/Elasticsearch/src/ElasticsearchVectorCollection.cs

View workflow job for this annotation

GitHub Actions / Build and test / Build, test and publish

Consider calling ConfigureAwait on the awaited task (https://learn.microsoft.com/dotnet/fundamentals/code-analysis/quality-rules/ca2007)
.Index("my_index")
.From(0)
.Size(10)
.Query(q => q
.Knn()

Check failure on line 69 in src/Databases/Elasticsearch/src/ElasticsearchVectorCollection.cs

View workflow job for this annotation

GitHub Actions / Build and test / Build, test and publish

No overload for method 'Knn' takes 0 arguments
.Term(t => t.User, "flobernd")

Check failure on line 70 in src/Databases/Elasticsearch/src/ElasticsearchVectorCollection.cs

View workflow job for this annotation

GitHub Actions / Build and test / Build, test and publish

'TermQueryDescriptor<MyDoc>' does not contain a definition for 'User' and no accessible extension method 'User' accepting a first argument of type 'TermQueryDescriptor<MyDoc>' could be found (are you missing a using directive or an assembly reference?)
)
);

if (response.IsValidResponse)
{
var doc = response.Documents.FirstOrDefault();
}

return Task.FromResult(new VectorSearchResponse
{
Items = Array.Empty<string>()
.Select(record =>
{
return new Vector
{
Id = string.Empty,
Text = string.Empty,
Metadata = new Dictionary<string, object>(),
Embedding = [],
Distance = 0.0F,
};
})
.ToArray(),
});
}

/// <inheritdoc />
public Task<bool> IsEmptyAsync(CancellationToken cancellationToken = default)
{
throw new NotImplementedException();
}
}
72 changes: 72 additions & 0 deletions src/Databases/Elasticsearch/src/ElasticsearchVectorDatabase.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
using Elastic.Clients.Elasticsearch;

namespace LangChain.Databases.Elasticsearch;

/// <summary>
/// Elasticsearch vector store.
/// </summary>
public class ElasticsearchVectorDatabase(
ElasticsearchClient client)
: IVectorDatabase
{
/// <inheritdoc />
public async Task<IVectorCollection> GetCollectionAsync(string collectionName, CancellationToken cancellationToken = default)
{
try
{
var collection = await client.GetCollectionAsync(collectionName, cancellationToken).ConfigureAwait(false) ?? throw new InvalidOperationException("Collection not found");

Check failure on line 17 in src/Databases/Elasticsearch/src/ElasticsearchVectorDatabase.cs

View workflow job for this annotation

GitHub Actions / Build and test / Build, test and publish

'ElasticsearchClient' does not contain a definition for 'GetCollectionAsync' and no accessible extension method 'GetCollectionAsync' accepting a first argument of type 'ElasticsearchClient' could be found (are you missing a using directive or an assembly reference?)

return new ElasticsearchVectorCollection(
client,
name: collection.Name,
id: collection.Id);
}
catch (Exception exception)
{
throw new InvalidOperationException("Collection not found", innerException: exception);
}
}

/// <inheritdoc />
public async Task<bool> IsCollectionExistsAsync(string collectionName, CancellationToken cancellationToken = default)
{
await foreach (var name in client.ListCollectionsAsync(cancellationToken).ConfigureAwait(false))

Check failure on line 33 in src/Databases/Elasticsearch/src/ElasticsearchVectorDatabase.cs

View workflow job for this annotation

GitHub Actions / Build and test / Build, test and publish

'ElasticsearchClient' does not contain a definition for 'ListCollectionsAsync' and no accessible extension method 'ListCollectionsAsync' accepting a first argument of type 'ElasticsearchClient' could be found (are you missing a using directive or an assembly reference?)
{
if (name == collectionName)
{
return true;
}
}

return false;
}

/// <inheritdoc />
public async Task CreateCollectionAsync(string collectionName, int dimensions, CancellationToken cancellationToken = default)
{
await client.CreateCollectionAsync(collectionName, cancellationToken).ConfigureAwait(false);

Check failure on line 47 in src/Databases/Elasticsearch/src/ElasticsearchVectorDatabase.cs

View workflow job for this annotation

GitHub Actions / Build and test / Build, test and publish

'ElasticsearchClient' does not contain a definition for 'CreateCollectionAsync' and no accessible extension method 'CreateCollectionAsync' accepting a first argument of type 'ElasticsearchClient' could be found (are you missing a using directive or an assembly reference?)
}

/// <inheritdoc />
public async Task<IReadOnlyList<string>> ListCollectionsAsync(CancellationToken cancellationToken = default)
{
return await client.ListCollectionsAsync(cancellationToken).ToListAsync(cancellationToken: cancellationToken).ConfigureAwait(false);

Check failure on line 53 in src/Databases/Elasticsearch/src/ElasticsearchVectorDatabase.cs

View workflow job for this annotation

GitHub Actions / Build and test / Build, test and publish

'ElasticsearchClient' does not contain a definition for 'ListCollectionsAsync' and no accessible extension method 'ListCollectionsAsync' accepting a first argument of type 'ElasticsearchClient' could be found (are you missing a using directive or an assembly reference?)
}

/// <inheritdoc />
public async Task<IVectorCollection> GetOrCreateCollectionAsync(string collectionName, int dimensions, CancellationToken cancellationToken = default)
{
if (!await IsCollectionExistsAsync(collectionName, cancellationToken).ConfigureAwait(false))
{
await client.CreateCollectionAsync(collectionName, cancellationToken).ConfigureAwait(false);

Check failure on line 61 in src/Databases/Elasticsearch/src/ElasticsearchVectorDatabase.cs

View workflow job for this annotation

GitHub Actions / Build and test / Build, test and publish

'ElasticsearchClient' does not contain a definition for 'CreateCollectionAsync' and no accessible extension method 'CreateCollectionAsync' accepting a first argument of type 'ElasticsearchClient' could be found (are you missing a using directive or an assembly reference?)
}

return await GetCollectionAsync(collectionName, cancellationToken).ConfigureAwait(false);
}

/// <inheritdoc />
public async Task DeleteCollectionAsync(string collectionName, CancellationToken cancellationToken = default)
{
await client.DeleteCollectionAsync(collectionName, cancellationToken).ConfigureAwait(false);

Check failure on line 70 in src/Databases/Elasticsearch/src/ElasticsearchVectorDatabase.cs

View workflow job for this annotation

GitHub Actions / Build and test / Build, test and publish

'ElasticsearchClient' does not contain a definition for 'DeleteCollectionAsync' and no accessible extension method 'DeleteCollectionAsync' accepting a first argument of type 'ElasticsearchClient' could be found (are you missing a using directive or an assembly reference?)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFrameworks>net4.6.2;netstandard2.0;net6.0;net8.0</TargetFrameworks>
<NoWarn>$(NoWarn)</NoWarn>
</PropertyGroup>

<PropertyGroup Label="NuGet">
<Description>Elasticsearch support for LangChain.</Description>
<PackageTags>$(PackageTags);elasticsearch</PackageTags>
</PropertyGroup>

<ItemGroup Label="Usings">
<Using Include="System.Net.Http" />
</ItemGroup>

<ItemGroup>
<PackageReference Include="Elastic.Clients.Elasticsearch" />
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\..\Abstractions\src\LangChain.Databases.Abstractions.csproj" />
</ItemGroup>

</Project>
17 changes: 17 additions & 0 deletions src/Databases/IntegrationTests/DatabaseTests.Configure.cs
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
using DotNet.Testcontainers.Builders;
using Elastic.Clients.Elasticsearch;
using LangChain.Databases.Chroma;
using LangChain.Databases.Elasticsearch;
using LangChain.Databases.InMemory;
using LangChain.Databases.OpenSearch;
using LangChain.Databases.Postgres;
using LangChain.Databases.Sqlite;
using LangChain.Databases.Mongo;
using Testcontainers.Elasticsearch;
using Testcontainers.MongoDb;
using Testcontainers.PostgreSql;

Expand Down Expand Up @@ -121,6 +124,20 @@ private static async Task<DatabaseTestEnvironment> StartEnvironmentForAsync(Supp
Container = container,
};
}

case SupportedDatabase.Elasticsearch:
{
var container = new ElasticsearchBuilder().Build();

await container.StartAsync(cancellationToken);

var client = new ElasticsearchClient(new Uri($"http://localhost:{container.GetMappedPublicPort(9200)}"));
return new DatabaseTestEnvironment
{
VectorDatabase = new ElasticsearchVectorDatabase(client),
Container = container,
};
}
default:
throw new ArgumentOutOfRangeException(nameof(database), database, null);
}
Expand Down
7 changes: 7 additions & 0 deletions src/Databases/IntegrationTests/DatabaseTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ public partial class DatabaseTests
[TestCase(SupportedDatabase.Postgres)]
[TestCase(SupportedDatabase.SqLite)]
[TestCase(SupportedDatabase.Mongo)]
[TestCase(SupportedDatabase.Elasticsearch)]
public async Task CreateAndDeleteCollection_Ok(SupportedDatabase database)
{
await using var environment = await StartEnvironmentForAsync(database);
Expand Down Expand Up @@ -58,6 +59,7 @@ await vectorDatabase.Invoking(y => y.GetCollectionAsync(environment.CollectionNa
[TestCase(SupportedDatabase.Postgres)]
[TestCase(SupportedDatabase.SqLite)]
[TestCase(SupportedDatabase.Mongo)]
[TestCase(SupportedDatabase.Elasticsearch)]
public async Task AddDocuments_Ok(SupportedDatabase database)
{
await using var environment = await StartEnvironmentForAsync(database);
Expand Down Expand Up @@ -103,6 +105,7 @@ public async Task AddDocuments_Ok(SupportedDatabase database)
[TestCase(SupportedDatabase.Postgres)]
[TestCase(SupportedDatabase.SqLite)]
[TestCase(SupportedDatabase.Mongo)]
[TestCase(SupportedDatabase.Elasticsearch)]
public async Task AddTexts_Ok(SupportedDatabase database)
{
await using var environment = await StartEnvironmentForAsync(database);
Expand Down Expand Up @@ -152,6 +155,7 @@ public async Task AddTexts_Ok(SupportedDatabase database)
[TestCase(SupportedDatabase.Postgres)]
[TestCase(SupportedDatabase.SqLite)]
[TestCase(SupportedDatabase.Mongo)]
[TestCase(SupportedDatabase.Elasticsearch)]
public async Task DeleteDocuments_Ok(SupportedDatabase database)
{
await using var environment = await StartEnvironmentForAsync(database);
Expand Down Expand Up @@ -189,6 +193,7 @@ public async Task DeleteDocuments_Ok(SupportedDatabase database)
[TestCase(SupportedDatabase.OpenSearch)]
[TestCase(SupportedDatabase.Postgres)]
[TestCase(SupportedDatabase.SqLite)]
[TestCase(SupportedDatabase.Elasticsearch)]
public async Task SimilaritySearch_Ok(SupportedDatabase database)
{
await using var environment = await StartEnvironmentForAsync(database);
Expand Down Expand Up @@ -220,6 +225,7 @@ public async Task SimilaritySearch_Ok(SupportedDatabase database)
[TestCase(SupportedDatabase.OpenSearch)]
[TestCase(SupportedDatabase.Postgres)]
[TestCase(SupportedDatabase.SqLite)]
[TestCase(SupportedDatabase.Elasticsearch)]
public async Task SimilaritySearchByVector_Ok(SupportedDatabase database)
{
await using var environment = await StartEnvironmentForAsync(database);
Expand Down Expand Up @@ -247,6 +253,7 @@ public async Task SimilaritySearchByVector_Ok(SupportedDatabase database)
[TestCase(SupportedDatabase.OpenSearch)]
[TestCase(SupportedDatabase.Postgres)]
[TestCase(SupportedDatabase.SqLite)]
[TestCase(SupportedDatabase.Elasticsearch)]
public async Task SimilaritySearchWithScores_Ok(SupportedDatabase database)
{
await using var environment = await StartEnvironmentForAsync(database);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
<ProjectReference Include="..\AzureSearch\src\LangChain.Databases.AzureSearch.csproj" />
<ProjectReference Include="..\Chroma\src\LangChain.Databases.Chroma.csproj" />
<ProjectReference Include="..\DuckDb\src\LangChain.Databases.DuckDb.csproj" />
<ProjectReference Include="..\Elasticsearch\src\LangChain.Databases.Elasticsearch.csproj" />
<ProjectReference Include="..\InMemory\src\LangChain.Databases.InMemory.csproj" />
<ProjectReference Include="..\Kendra\src\LangChain.Databases.Kendra.csproj" />
<ProjectReference Include="..\Mongo\src\LangChain.Databases.Mongo.csproj" />
Expand All @@ -26,6 +27,7 @@

<ItemGroup>
<PackageReference Include="Testcontainers" />
<PackageReference Include="Testcontainers.Elasticsearch" />
<PackageReference Include="Testcontainers.PostgreSql" />
<PackageReference Include="Testcontainers.MongoDb" />
<PackageReference Include="Testcontainers.Redis" />
Expand Down
1 change: 1 addition & 0 deletions src/Databases/IntegrationTests/SupportedDatabase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@ public enum SupportedDatabase
Postgres,
Redis,
Mongo,
Elasticsearch,
}
2 changes: 2 additions & 0 deletions src/Directory.Packages.props
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
<PackageVersion Include="Docker.DotNet" Version="3.125.15" />
<PackageVersion Include="DocumentFormat.OpenXml" Version="3.0.2" />
<PackageVersion Include="DotNet.ReproducibleBuilds" Version="1.1.1" />
<PackageVersion Include="Elastic.Clients.Elasticsearch" Version="8.14.2" />
<PackageVersion Include="FluentAssertions" Version="6.12.0" />
<PackageVersion Include="GitHubActionsTestLogger" Version="2.4.1" />
<PackageVersion Include="Google_GenerativeAI" Version="1.0.1" />
Expand Down Expand Up @@ -78,6 +79,7 @@
<PackageVersion Include="System.Text.Json" Version="8.0.3" />
<PackageVersion Include="System.ValueTuple" Version="4.5.0" />
<PackageVersion Include="Testcontainers" Version="3.9.0" />
<PackageVersion Include="Testcontainers.Elasticsearch" Version="3.9.0" />
<PackageVersion Include="Testcontainers.MongoDb" Version="3.9.0" />
<PackageVersion Include="Testcontainers.PostgreSql" Version="3.9.0" />
<PackageVersion Include="Testcontainers.Redis" Version="3.9.0" />
Expand Down

0 comments on commit a83c0a8

Please sign in to comment.