You can install the Retrievers SDK using pip:
pip install mo-retrieversor using uv:
uv add mo-retrieversfrom retrievers import VectorDatabase, MilvusBackend
from retrievers.indexing import DummyEmbedder
from retrievers.context import Text
vdb = VectorDatabase(MilvusBackend.from_local("index.db"), embedder=DummyEmbedder(), payload_class=Text)
vdb.create_collection("docs", Text, exists_behavior="replace")
vdb.add_records("docs", [Text(text="Modaic makes sharing agents easy."), Text(text="Tables can be queried with SQL.")])
hits = vdb.search("docs", "How do I share agents?", k=1)
print(hits[0].text)from retrievers.context import Text
def simple_splitter(text: str):
step = 500
for i in range(0, len(text), step):
yield text[i:i+step]
doc = Text.from_file("README.md")
doc.chunk_text(simple_splitter)
print(len(doc.chunks))from pathlib import Path
from modaic.context import TableFile
table = TableFile.from_file(
file_ref="employees.xlsx",
file=Path("employees.xlsx"),
file_type="xlsx",
)
print(table.schema_info())
head = table.query("SELECT * FROM this LIMIT 5")
print(head.shape)Build structured filters with Prop:
from retrievers.context import Prop
q = (Prop("age") >= 21) & (Prop("role") == "engineer")from typing import List, Tuple
import numpy as np
from modaic import Indexer, PrecompiledConfig
from retrievers.indexing import DummyEmbedder, Embedder # convenient for demos
from retrievers.context import Text
class DocsConfig(PrecompiledConfig):
index_name: str = "docs"
class DocsIndexer(Indexer):
config: DocsConfig # ! Important: config must be annotated with the config class
def __init__(self, config: DocsConfig, embedder: Embedder | None = None):
super().__init__(config)
self.embedder = embedder or DummyEmbedder(embedding_dim=128)
self._records: list[Tuple[np.ndarray, Text]] = []
def ingest(self, contexts: List[Text]):
vectors = self.embedder([c.text for c in contexts])
for v, c in zip(vectors, contexts):
self._records.append((np.asarray(v), c))
def retrieve(self, query: str, k: int = 5) -> List[Text]:
qv = np.asarray(self.embedder(query))
scored = [(float(np.dot(qv, v)), c) for v, c in self._records]
scored.sort(key=lambda x: x[0], reverse=True)
return [c for _, c in scored[:k]]
indexer = DocsIndexer(DocsConfig())
indexer.ingest([Text(text="Modaic makes sharing agents easy."), Text(text="Tables can be queried with SQL.")])
hits = indexer.retrieve("How do I share agents?", k=1)
print(hits[0].text)Save and load your retriever:
indexer.push_to_hub("yourname/docs-indexer", commit_message="initial indexer")
from modaic import AutoRetriever
loaded_idx = AutoRetriever.from_precompiled("yourname/docs-indexer")
print(loaded_idx.retrieve("share agents", k=1)[0].text)