Skip to content

Commit f017938

Browse files
committed
feat: Add LangChain vector store adapter for CrateDB
1 parent 8a7d7ef commit f017938

File tree

11 files changed

+1021
-0
lines changed

11 files changed

+1021
-0
lines changed

docs/docs/Components/components-vector-stores.md

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,30 @@ For more information, see the [Couchbase documentation](https://docs.couchbase.c
253253
|----------------|------------------------|--------------------------------|
254254
| vector_store | CouchbaseVectorStore | A Couchbase vector store instance configured with the specified parameters. |
255255

256+
## CrateDB
257+
258+
This component creates a CrateDB Vector Store with search capabilities.
259+
For more information, see the documentation about the
260+
[CrateDB LangChain adapter](https://cratedb.com/docs/guide/integrate/langchain/).
261+
262+
### Inputs
263+
264+
| Name | Type | Description |
265+
|----------------------------------|---------------|------------------------------------------------------------------|
266+
| collection_name | String | The name of the collection. Default: "langflow". |
267+
| search_query | String | The query to search for in the vector store. |
268+
| ingest_data | Data | The data to ingest into the vector store (list of Data objects). |
269+
| embedding | Embeddings | The embedding function to use for the vector store. |
270+
| server_url | String | SQLAlchemy URL to connect to CrateDB. |
271+
| search_type | String | Type of search to perform: "Similarity" or "MMR". |
272+
| number_of_results | Integer | Number of results to return from the search. Default: 10. |
273+
274+
### Outputs
275+
276+
| Name | Type | Description |
277+
|----------------|--------------------|-------------------------------|
278+
| vector_store | CrateDBVectorStore | CrateDB vector store instance |
279+
| search_results | List[Data] | Results of similarity search |
256280

257281
## Elasticsearch
258282

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ dependencies = [
9696
"langchain-elasticsearch==0.3.0",
9797
"langchain-ollama==0.2.1",
9898
"langchain-community~=0.3.10",
99+
"langchain-cratedb<0.2",
99100
"sqlalchemy[aiosqlite,postgresql_psycopg2binary,postgresql_psycopgbinary]>=2.0.36,<3.0.0",
100101
"atlassian-python-api==3.41.16",
101102
"mem0ai==0.1.34",

src/backend/base/langflow/components/vectorstores/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from .chroma import ChromaVectorStoreComponent
66
from .clickhouse import ClickhouseVectorStoreComponent
77
from .couchbase import CouchbaseVectorStoreComponent
8+
from .cratedb import CrateDBVectorStoreComponent
89
from .elasticsearch import ElasticsearchVectorStoreComponent
910
from .faiss import FaissVectorStoreComponent
1011
from .hcd import HCDVectorStoreComponent
@@ -30,6 +31,7 @@
3031
"ChromaVectorStoreComponent",
3132
"ClickhouseVectorStoreComponent",
3233
"CouchbaseVectorStoreComponent",
34+
"CrateDBVectorStoreComponent",
3335
"ElasticsearchVectorStoreComponent",
3436
"FaissVectorStoreComponent",
3537
"HCDVectorStoreComponent",
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
import typing as t
2+
3+
from langchain_cratedb import CrateDBVectorStore
4+
5+
from langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store
6+
from langflow.helpers import docs_to_data
7+
from langflow.io import HandleInput, IntInput, SecretStrInput, StrInput
8+
from langflow.schema import Data
9+
10+
11+
class CrateDBVectorStoreComponent(LCVectorStoreComponent):
12+
display_name = "CrateDBVector"
13+
description = "CrateDB Vector Store with search capabilities"
14+
name = "cratedb"
15+
icon = "cpu"
16+
17+
inputs = [
18+
SecretStrInput(name="server_url", display_name="CrateDB SQLAlchemy URL", required=True),
19+
StrInput(name="collection_name", display_name="Table", required=True),
20+
*LCVectorStoreComponent.inputs,
21+
HandleInput(name="embedding", display_name="Embedding", input_types=["Embeddings"], required=True),
22+
IntInput(
23+
name="number_of_results",
24+
display_name="Number of Results",
25+
info="Number of results to return.",
26+
value=4,
27+
advanced=True,
28+
),
29+
]
30+
31+
@check_cached_vector_store
32+
def build_vector_store(self) -> CrateDBVectorStore:
33+
documents = []
34+
for _input in self.ingest_data or []:
35+
if isinstance(_input, Data):
36+
documents.append(_input.to_lc_document())
37+
else:
38+
documents.append(_input)
39+
40+
connection_string = self.server_url or "crate://"
41+
42+
if documents:
43+
store = CrateDBVectorStore.from_documents(
44+
embedding=self.embedding,
45+
documents=documents,
46+
collection_name=self.collection_name,
47+
connection=connection_string,
48+
)
49+
else:
50+
store = CrateDBVectorStore.from_existing_index(
51+
embedding=self.embedding,
52+
collection_name=self.collection_name,
53+
connection=connection_string,
54+
)
55+
56+
return store
57+
58+
def search_documents(self) -> list[Data]:
59+
vector_store = self.build_vector_store()
60+
61+
if self.search_query and isinstance(self.search_query, str) and self.search_query.strip():
62+
docs = vector_store.similarity_search(
63+
query=self.search_query,
64+
k=self.number_of_results,
65+
)
66+
67+
data = docs_to_data(docs)
68+
self.status = data
69+
return data
70+
return []
71+
72+
73+
def cratedb_collection_to_data(embedding_documents: list[t.Any]):
74+
"""Converts a collection of CrateDB vectors into a list of data.
75+
76+
Args:
77+
embedding_documents (dict): A list of EmbeddingStore instances.
78+
79+
Returns:
80+
list: A list of data, where each record represents a document in the collection.
81+
"""
82+
data = []
83+
for doc in embedding_documents:
84+
data_dict = {
85+
"id": doc.id,
86+
"text": doc.document,
87+
}
88+
data_dict.update(doc.cmetadata)
89+
data.append(Data(**data_dict))
90+
return data

src/backend/tests/integration/components/cratedb/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)