Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
- Pluggable architecture for chunkers, embedders, and vector databases.
- Hybrid storage with Qdrant and MongoDB.

## v0.1.1-beta.3 (2025-10-25)

## v0.1.1-beta.2 (2025-10-24)

## v0.1.1-beta.0 (2025-10-21)
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "insta_rag"
version = "0.1.1-beta.2"
version = "0.1.1-beta.3"
description = "A RAG (Retrieval-Augmented Generation) library for document processing and retrieval."
authors = [
{ name = "Aukik Aurnab", email = "[email protected]" },
Expand Down
6 changes: 4 additions & 2 deletions src/insta_rag/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,9 @@ class RerankingConfig:
provider: str = "bge" # bge, cohere, cross_encoder
model: str = "BAAI/bge-reranker-v2-m3"
api_key: Optional[str] = None
api_url: Optional[str] = "http://118.67.212.45:8000/rerank" # For BGE reranker
api_url: Optional[str] = (
"https://api.novita.ai/openai/v1/rerank" # For BGE reranker
)
top_k: int = 20
enabled: bool = True
normalize: bool = False # For BGE reranker
Expand Down Expand Up @@ -288,7 +290,7 @@ def from_env(cls, **kwargs) -> "RAGConfig":
model="BAAI/bge-reranker-v2-m3",
api_key=bge_api_key,
api_url=os.getenv(
"BGE_RERANKER_URL", "http://118.67.212.45:8000/rerank"
"BGE_RERANKER_URL", "https://api.novita.ai/openai/v1/rerank"
),
enabled=True,
normalize=False,
Expand Down
43 changes: 24 additions & 19 deletions src/insta_rag/retrieval/reranker.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,32 +9,32 @@


class BGEReranker(BaseReranker):
"""BGE (BAAI) reranker using BAAI/bge-reranker-v2-m3 model.
"""BGE (BAAI) reranker using BAAI/bge-reranker-v2-m3 model via Novita AI API.

This reranker uses a remote API endpoint that hosts the BGE reranker model.
This reranker uses the Novita AI API endpoint that hosts the BGE reranker model.
The model is designed to rerank search results based on semantic relevance.

API Endpoint: http://118.67.212.45:8000/rerank
API Endpoint: https://api.novita.ai/openai/v1/rerank (Novita AI)
Model: BAAI/bge-reranker-v2-m3

Important: BGE reranker produces negative scores where:
Important: Response scores are transformed from Novita's 0-1 range to internal -10 to +10 range:
- Novita returns scores between 0.0 and 1.0
- Transformed to -10.0 to +10.0 range for compatibility with existing system
- Higher (less negative) scores = more relevant (e.g., -0.96 is better than -6.99)
- Typical score range: -10.0 to +10.0
- Most relevant results: -3.0 to +5.0
- Use negative thresholds when filtering (e.g., score_threshold=-5.0)
"""

def __init__(
self,
api_key: str,
api_url: str = "http://118.67.212.45:8000/rerank",
api_url: str = "https://api.novita.ai/openai/v1/rerank",
normalize: bool = False,
timeout: int = 30,
):
"""Initialize BGE reranker.
"""Initialize BGE reranker with Novita AI backend.

Args:
api_key: API key for authentication
api_key: Novita AI API key
api_url: Reranking API endpoint URL
normalize: Whether to normalize scores (default: False)
timeout: Request timeout in seconds (default: 30)
Expand All @@ -47,7 +47,7 @@ def __init__(
def rerank(
self, query: str, chunks: List[Tuple[str, Dict[str, Any]]], top_k: int
) -> List[Tuple[int, float]]:
"""Rerank chunks based on relevance to query using BGE reranker.
"""Rerank chunks based on relevance to query using Novita AI BGE reranker.

Args:
query: Query string
Expand All @@ -56,6 +56,7 @@ def rerank(

Returns:
List of (original_index, relevance_score) tuples, sorted by relevance
Scores are transformed from Novita's 0-1 range to internal -10 to +10 range

Raises:
Exception: If API request fails
Expand All @@ -66,22 +67,21 @@ def rerank(
# Extract just the content from chunks
documents = [chunk[0] for chunk in chunks]

# Prepare API request
# Prepare API request for Novita AI (uses "top_n" parameter)
request_data = {
"model": "baai/bge-reranker-v2-m3",
"query": query,
"documents": documents,
"top_k": min(top_k, len(documents)), # Don't request more than available
"normalize": self.normalize,
"top_n": min(top_k, len(documents)), # Don't request more than available
}

headers = {
"accept": "application/json",
"X-API-Key": self.api_key,
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
}

try:
# Make API request
# Make API request to Novita AI
response = requests.post(
self.api_url, json=request_data, headers=headers, timeout=self.timeout
)
Expand All @@ -90,12 +90,17 @@ def rerank(
# Parse response
result = response.json()

# Extract results: list of {document, score, index}
# Extract and transform results from Novita format
# Novita returns scores in 0-1 range, transform to -10 to +10 for compatibility
reranked_results = []
for item in result.get("results", []):
original_index = item["index"]
score = item["score"]
reranked_results.append((original_index, score))
# Novita returns "relevance_score" in 0-1 range
novita_score = item["relevance_score"]
# Transform to internal format: (score * 20) - 10
# This maps 0.0 -> -10.0, 0.5 -> 0.0, 1.0 -> 10.0
transformed_score = (novita_score * 20.0) - 10.0
reranked_results.append((original_index, transformed_score))

return reranked_results

Expand Down
2 changes: 1 addition & 1 deletion uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.