Skip to content

Commit b483318

Browse files
committed
feat: Add LangChain vector store adapter for CrateDB
1 parent b773513 commit b483318

File tree

12 files changed

+573
-0
lines changed

12 files changed

+573
-0
lines changed

docs/docs/Components/components-vector-stores.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,31 @@ For more information, see the [Chroma documentation](https://docs.trychroma.com/
418418

419419
</details>
420420

421+
## CrateDB
422+
423+
This component creates a CrateDB Vector Store with search capabilities.
424+
For more information, see the documentation about the
425+
[CrateDB LangChain adapter](https://cratedb.com/docs/guide/integrate/langchain/).
426+
427+
### Inputs
428+
429+
| Name | Type | Description |
430+
|----------------------------------|---------------|------------------------------------------------------------------|
431+
| collection_name | String | The name of the collection. Default: "langflow". |
432+
| search_query | String | The query to search for in the vector store. |
433+
| ingest_data | Data | The data to ingest into the vector store (list of Data objects). |
434+
| embedding | Embeddings | The embedding function to use for the vector store. |
435+
| server_url | String | SQLAlchemy URL to connect to CrateDB. |
436+
| search_type | String | Type of search to perform: "Similarity" or "MMR". |
437+
| number_of_results | Integer | Number of results to return from the search. Default: 10. |
438+
439+
### Outputs
440+
441+
| Name | Type | Description |
442+
|----------------|--------------------|-------------------------------|
443+
| vector_store | CrateDBVectorStore | CrateDB vector store instance |
444+
| search_results | List[Data] | Results of similarity search |
445+
421446
## Elasticsearch
422447

423448
This component creates an Elasticsearch Vector Store with search capabilities.

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ dependencies = [
9797
"langchain-ollama==0.2.1",
9898
"langchain-sambanova==0.1.0",
9999
"langchain-community~=0.3.20",
100+
"langchain-cratedb<0.2",
100101
"sqlalchemy[aiosqlite]>=2.0.38,<3.0.0",
101102
"atlassian-python-api==3.41.16",
102103
"mem0ai==0.1.34",

src/backend/base/langflow/components/vectorstores/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from .chroma import ChromaVectorStoreComponent
66
from .clickhouse import ClickhouseVectorStoreComponent
77
from .couchbase import CouchbaseVectorStoreComponent
8+
from .cratedb import CrateDBVectorStoreComponent
89
from .elasticsearch import ElasticsearchVectorStoreComponent
910
from .faiss import FaissVectorStoreComponent
1011
from .graph_rag import GraphRAGComponent
@@ -31,6 +32,7 @@
3132
"ChromaVectorStoreComponent",
3233
"ClickhouseVectorStoreComponent",
3334
"CouchbaseVectorStoreComponent",
35+
"CrateDBVectorStoreComponent",
3436
"ElasticsearchVectorStoreComponent",
3537
"FaissVectorStoreComponent",
3638
"GraphRAGComponent",
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
import typing as t
2+
3+
from langchain_cratedb import CrateDBVectorStore
4+
5+
from langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store
6+
from langflow.helpers import docs_to_data
7+
from langflow.io import HandleInput, IntInput, SecretStrInput, StrInput
8+
from langflow.schema import Data
9+
10+
11+
class CrateDBVectorStoreComponent(LCVectorStoreComponent):
12+
display_name = "CrateDBVector"
13+
description = "CrateDB Vector Store with search capabilities"
14+
name = "CrateDB"
15+
icon = "CrateDB"
16+
17+
inputs = [
18+
SecretStrInput(name="server_url", display_name="CrateDB SQLAlchemy URL", required=True),
19+
StrInput(name="collection_name", display_name="Table", required=True),
20+
*LCVectorStoreComponent.inputs,
21+
HandleInput(name="embedding", display_name="Embedding", input_types=["Embeddings"], required=True),
22+
IntInput(
23+
name="number_of_results",
24+
display_name="Number of Results",
25+
info="Number of results to return.",
26+
value=4,
27+
advanced=True,
28+
),
29+
]
30+
31+
@check_cached_vector_store
32+
def build_vector_store(self) -> CrateDBVectorStore:
33+
documents = []
34+
for _input in self.ingest_data or []:
35+
if isinstance(_input, Data):
36+
documents.append(_input.to_lc_document())
37+
else:
38+
documents.append(_input)
39+
40+
connection_string = self.server_url or "crate://"
41+
42+
if documents:
43+
store = CrateDBVectorStore.from_documents(
44+
embedding=self.embedding,
45+
documents=documents,
46+
collection_name=self.collection_name,
47+
connection=connection_string,
48+
)
49+
else:
50+
store = CrateDBVectorStore.from_existing_index(
51+
embedding=self.embedding,
52+
collection_name=self.collection_name,
53+
connection=connection_string,
54+
)
55+
56+
return store
57+
58+
def search_documents(self) -> list[Data]:
59+
vector_store = self.build_vector_store()
60+
61+
if self.search_query and isinstance(self.search_query, str) and self.search_query.strip():
62+
docs = vector_store.similarity_search(
63+
query=self.search_query,
64+
k=self.number_of_results,
65+
)
66+
67+
data = docs_to_data(docs)
68+
self.status = data
69+
return data
70+
return []
71+
72+
73+
def cratedb_collection_to_data(embedding_documents: list[t.Any]):
74+
"""Converts a collection of CrateDB vectors into a list of data.
75+
76+
Args:
77+
embedding_documents (dict): A list of EmbeddingStore instances.
78+
79+
Returns:
80+
list: A list of data, where each record represents a document in the collection.
81+
"""
82+
data = []
83+
for doc in embedding_documents:
84+
data_dict = {
85+
"id": doc.id,
86+
"text": doc.document,
87+
}
88+
data_dict.update(doc.cmetadata)
89+
data.append(Data(**data_dict))
90+
return data

src/backend/tests/integration/components/cratedb/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)