Replies: 1 comment 2 replies
-
from llama_index.core import VectorStoreIndex, Document
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.llama_cpp import LlamaCPP
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.ingestion import IngestionPipeline
from llama_index.vector_stores.postgres import PGVectorStore
from llama_index.core.vector_stores import VectorStoreQuery
from llama_index.core.schema import TextNode, NodeWithScore
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.retrievers import BaseRetriever
from llama_index.core import QueryBundle
import psycopg2
# Setup embedding model
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en")
# Setup LLM
model_url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF/resolve/main/llama-2-13b-chat.Q4_0.gguf"
llm = LlamaCPP(
model_url=model_url,
temperature=0.1,
max_new_tokens=256,
context_window=3900,
model_kwargs={"n_gpu_layers": 1},
verbose=True,
)
# Initialize Postgres
db_name = "vector_db"
host = "localhost"
password = "password"
port = "5432"
user = "jerry"
conn = psycopg2.connect(
dbname="postgres",
host=host,
password=password,
port=port,
user=user,
)
conn.autocommit = True
with conn.cursor() as c:
c.execute(f"DROP DATABASE IF EXISTS {db_name}")
c.execute(f"CREATE DATABASE {db_name}")
vector_store = PGVectorStore.from_params(
database=db_name,
host=host,
password=password,
port=port,
user=user,
table_name="llama2_paper",
embed_dim=384,
)
# Create the pipeline with transformations
pipeline = IngestionPipeline(
transformations=[
SentenceSplitter(),
]
)
DOCUMENT = """
Your document text here...
"""
# Build the index
documents = [Document(text=DOCUMENT)]
nodes = pipeline.run(documents=documents, show_progress=True)
# Generate embeddings for each node
for node in nodes:
node_embedding = embed_model.get_text_embedding(node.get_content(metadata_mode="all"))
node.embedding = node_embedding
# Load nodes into the vector store
vector_store.add(nodes)
# Build retrieval pipeline
class VectorDBRetriever(BaseRetriever):
def __init__(self, vector_store: PGVectorStore, embed_model: Any, query_mode: str = "default", similarity_top_k: int = 2) -> None:
self._vector_store = vector_store
self._embed_model = embed_model
self._query_mode = query_mode
self._similarity_top_k = similarity_top_k
super().__init__()
def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
query_embedding = embed_model.get_query_embedding(query_bundle.query_str)
vector_store_query = VectorStoreQuery(
query_embedding=query_embedding,
similarity_top_k=self._similarity_top_k,
mode=self._query_mode,
)
query_result = vector_store.query(vector_store_query)
nodes_with_scores = []
for index, node in enumerate(query_result.nodes):
score = query_result.similarities[index] if query_result.similarities is not None else None
nodes_with_scores.append(NodeWithScore(node=node, score=score))
return nodes_with_scores
retriever = VectorDBRetriever(vector_store, embed_model, query_mode="default", similarity_top_k=2)
query_engine = RetrieverQueryEngine.from_args(retriever, llm=llm)
# Example query
query_str = "How does Llama 2 perform compared to other open-source models?"
response = query_engine.query(query_str)
print(str(response))
print(response.source_nodes[0].get_content()) |
Beta Was this translation helpful? Give feedback.
2 replies
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
-
The example provided in the notebook is for the query pipe line. Can you give me a sample code to use the same DSPy in the query engine query? Thank you.
Notebook about :Building optimized RAG with LlamaIndex + DSPy
https://github.com/stanfordnlp/dspy/blob/main/examples/llamaindex/dspy_llamaindex_rag.ipynb
Beta Was this translation helpful? Give feedback.
All reactions