langflow-ai · amotl · Jan 28, 2025 · coderabbitai · Jun 17, 2025 · coderabbitai
diff --git a/docs/docs/Components/components-vector-stores.md b/docs/docs/Components/components-vector-stores.md
@@ -418,6 +418,31 @@ For more information, see the [Chroma documentation](https://docs.trychroma.com/
 
 </details>
 
+## CrateDB
+
+This component creates a CrateDB Vector Store with search capabilities.
+For more information, see the documentation about the
+[CrateDB LangChain adapter](https://cratedb.com/docs/guide/integrate/langchain/).
+
+### Inputs
+
+| Name                             | Type          | Description                                                      |
+|----------------------------------|---------------|------------------------------------------------------------------|
+| collection_name                  | String        | The name of the collection. Default: "langflow".                 |
+| search_query                     | String        | The query to search for in the vector store.                     |
+| ingest_data                      | Data          | The data to ingest into the vector store (list of Data objects). |
+| embedding                        | Embeddings    | The embedding function to use for the vector store.              |
+| server_url                       | String        | SQLAlchemy URL to connect to CrateDB.                            |
+| search_type                      | String        | Type of search to perform: "Similarity" or "MMR".                |
+| number_of_results                | Integer       | Number of results to return from the search. Default: 10.        |
+
+### Outputs
+
+| Name           | Type               | Description                   |
+|----------------|--------------------|-------------------------------|
+| vector_store   | CrateDBVectorStore | CrateDB vector store instance |
+| search_results | List[Data]         | Results of similarity search  |
+
 ## Elasticsearch
 
 This component creates an Elasticsearch Vector Store with search capabilities.

diff --git a/pyproject.toml b/pyproject.toml
@@ -97,6 +97,7 @@ dependencies = [
     "langchain-ollama==0.2.1",
     "langchain-sambanova==0.1.0",
     "langchain-community~=0.3.20",
+    "langchain-cratedb<0.2",
     "sqlalchemy[aiosqlite]>=2.0.38,<3.0.0",
     "atlassian-python-api==3.41.16",
     "mem0ai==0.1.34",

diff --git a/src/backend/base/langflow/components/vectorstores/__init__.py b/src/backend/base/langflow/components/vectorstores/__init__.py
@@ -5,6 +5,7 @@
 from .chroma import ChromaVectorStoreComponent
 from .clickhouse import ClickhouseVectorStoreComponent
 from .couchbase import CouchbaseVectorStoreComponent
+from .cratedb import CrateDBVectorStoreComponent
 from .elasticsearch import ElasticsearchVectorStoreComponent
 from .faiss import FaissVectorStoreComponent
 from .graph_rag import GraphRAGComponent
@@ -31,6 +32,7 @@
     "ChromaVectorStoreComponent",
     "ClickhouseVectorStoreComponent",
     "CouchbaseVectorStoreComponent",
+    "CrateDBVectorStoreComponent",
     "ElasticsearchVectorStoreComponent",
     "FaissVectorStoreComponent",
     "GraphRAGComponent",

diff --git a/src/backend/base/langflow/components/vectorstores/cratedb.py b/src/backend/base/langflow/components/vectorstores/cratedb.py
@@ -0,0 +1,90 @@
+import typing as t
+
+from langchain_cratedb import CrateDBVectorStore
+
+from langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store
+from langflow.helpers import docs_to_data
+from langflow.io import HandleInput, IntInput, SecretStrInput, StrInput
+from langflow.schema import Data
-from langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store
-from langflow.helpers import docs_to_data
-from langflow.io import HandleInput, IntInput, SecretStrInput, StrInput
-from langflow.schema import Data
+from langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store
+from langflow.helpers.data import docs_to_data
+from langflow.io import HandleInput, IntInput, SecretStrInput, StrInput
+from langflow.schema import Data
-from langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store
-from langflow.helpers import docs_to_data
-from langflow.io import HandleInput, IntInput, SecretStrInput, StrInput
-from langflow.schema import Data
+from langflow.base.vectorstores.model import LCVectorStoreComponent, check_cached_vector_store
+from langflow.helpers.data import docs_to_data
+from langflow.io import HandleInput, IntInput, SecretStrInput, StrInput
+from langflow.schema import Data
+
+
+class CrateDBVectorStoreComponent(LCVectorStoreComponent):
+    display_name = "CrateDBVector"
+    description = "CrateDB Vector Store with search capabilities"
+    name = "CrateDB"
+    icon = "CrateDB"
+
+    inputs = [
+        SecretStrInput(name="server_url", display_name="CrateDB SQLAlchemy URL", required=True),
+        StrInput(name="collection_name", display_name="Table", required=True),
+        *LCVectorStoreComponent.inputs,
+        HandleInput(name="embedding", display_name="Embedding", input_types=["Embeddings"], required=True),
+        IntInput(
+            name="number_of_results",
+            display_name="Number of Results",
+            info="Number of results to return.",
+            value=4,
+            advanced=True,
+        ),
+    ]
+
+    @check_cached_vector_store
+    def build_vector_store(self) -> CrateDBVectorStore:
+        documents = []
+        for _input in self.ingest_data or []:
+            if isinstance(_input, Data):
+                documents.append(_input.to_lc_document())
+            else:
+                documents.append(_input)
+
+        connection_string = self.server_url or "crate://"
+
+        if documents:
+            store = CrateDBVectorStore.from_documents(
+                embedding=self.embedding,
+                documents=documents,
+                collection_name=self.collection_name,
+                connection=connection_string,
+            )
+        else:
+            store = CrateDBVectorStore.from_existing_index(
+                embedding=self.embedding,
+                collection_name=self.collection_name,
+                connection=connection_string,
+            )
+
+        return store
+
+    def search_documents(self) -> list[Data]:
+        vector_store = self.build_vector_store()
+
+        if self.search_query and isinstance(self.search_query, str) and self.search_query.strip():
+            docs = vector_store.similarity_search(
+                query=self.search_query,
+                k=self.number_of_results,
+            )
+
+            data = docs_to_data(docs)
+            self.status = data
+            return data
+        return []
-    def search_documents(self) -> list[Data]:
-        vector_store = self.build_vector_store()
-
-        if self.search_query and isinstance(self.search_query, str) and self.search_query.strip():
-            docs = vector_store.similarity_search(
-                query=self.search_query,
-                k=self.number_of_results,
-            )
-
-            data = docs_to_data(docs)
-            self.status = data
-            return data
-        return []
+    # Remove this override; the base implementation already handles
+    # caching and dispatches to vector_store.search with the chosen
+    # search_type.
-    def search_documents(self) -> list[Data]:
-        vector_store = self.build_vector_store()
-
-        if self.search_query and isinstance(self.search_query, str) and self.search_query.strip():
-            docs = vector_store.similarity_search(
-                query=self.search_query,
-                k=self.number_of_results,
-            )
-
-            data = docs_to_data(docs)
-            self.status = data
-            return data
-        return []
+    # Remove this override; the base implementation already handles
+    # caching and dispatches to vector_store.search with the chosen
+    # search_type.
+
+
+def cratedb_collection_to_data(embedding_documents: list[t.Any]):
+    """Converts a collection of CrateDB vectors into a list of data.
+
+    Args:
+        embedding_documents (dict): A list of EmbeddingStore instances.
+
+    Returns:
+        list: A list of data, where each record represents a document in the collection.
+    """
+    data = []
+    for doc in embedding_documents:
+        data_dict = {
+            "id": doc.id,
+            "text": doc.document,
+        }
+        data_dict.update(doc.cmetadata)
+        data.append(Data(**data_dict))
-        data_dict = {
-            "id": doc.id,
-            "text": doc.document,
-        }
-        data_dict.update(doc.cmetadata)
-        data.append(Data(**data_dict))
+        data_dict = {
+            "id": doc.id,
+            "text": doc.document,
+        }
+        if doc.cmetadata:
+            data_dict.update(doc.cmetadata)
+        data.append(Data(**data_dict))
-        data_dict = {
-            "id": doc.id,
-            "text": doc.document,
-        }
-        data_dict.update(doc.cmetadata)
-        data.append(Data(**data_dict))
+        data_dict = {
+            "id": doc.id,
+            "text": doc.document,
+        }
+        if doc.cmetadata:
+            data_dict.update(doc.cmetadata)
+        data.append(Data(**data_dict))
+    return data
diff --git a/src/backend/tests/integration/components/cratedb/__init__.py b/src/backend/tests/integration/components/cratedb/__init__.py