AI-Buddy-Catalyst-Labs · ikramulkayes · Oct 25, 2025 · Oct 25, 2025 · Oct 25, 2025 · Oct 25, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -12,6 +12,8 @@
     - Pluggable architecture for chunkers, embedders, and vector databases.
     - Hybrid storage with Qdrant and MongoDB.
 
+## v0.1.1-beta.3 (2025-10-25)
+
 ## v0.1.1-beta.2 (2025-10-24)
 
 ## v0.1.1-beta.0 (2025-10-21)

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "insta_rag"
-version = "0.1.1-beta.2"
+version = "0.1.1-beta.3"
 description = "A RAG (Retrieval-Augmented Generation) library for document processing and retrieval."
 authors = [
     { name = "Aukik Aurnab", email = "[email protected]" },

diff --git a/src/insta_rag/core/config.py b/src/insta_rag/core/config.py
@@ -62,7 +62,9 @@ class RerankingConfig:
     provider: str = "bge"  # bge, cohere, cross_encoder
     model: str = "BAAI/bge-reranker-v2-m3"
     api_key: Optional[str] = None
-    api_url: Optional[str] = "http://118.67.212.45:8000/rerank"  # For BGE reranker
+    api_url: Optional[str] = (
+        "https://api.novita.ai/openai/v1/rerank"  # For BGE reranker
+    )
     top_k: int = 20
     enabled: bool = True
     normalize: bool = False  # For BGE reranker
@@ -288,7 +290,7 @@ def from_env(cls, **kwargs) -> "RAGConfig":
                 model="BAAI/bge-reranker-v2-m3",
                 api_key=bge_api_key,
                 api_url=os.getenv(
-                    "BGE_RERANKER_URL", "http://118.67.212.45:8000/rerank"
+                    "BGE_RERANKER_URL", "https://api.novita.ai/openai/v1/rerank"
                 ),
                 enabled=True,
                 normalize=False,

diff --git a/src/insta_rag/retrieval/reranker.py b/src/insta_rag/retrieval/reranker.py
@@ -9,32 +9,32 @@
 
 
 class BGEReranker(BaseReranker):
-    """BGE (BAAI) reranker using BAAI/bge-reranker-v2-m3 model.
+    """BGE (BAAI) reranker using BAAI/bge-reranker-v2-m3 model via Novita AI API.
 
-    This reranker uses a remote API endpoint that hosts the BGE reranker model.
+    This reranker uses the Novita AI API endpoint that hosts the BGE reranker model.
     The model is designed to rerank search results based on semantic relevance.
 
-    API Endpoint: http://118.67.212.45:8000/rerank
+    API Endpoint: https://api.novita.ai/openai/v1/rerank (Novita AI)
     Model: BAAI/bge-reranker-v2-m3
 
-    Important: BGE reranker produces negative scores where:
+    Important: Response scores are transformed from Novita's 0-1 range to internal -10 to +10 range:
+    - Novita returns scores between 0.0 and 1.0
+    - Transformed to -10.0 to +10.0 range for compatibility with existing system
     - Higher (less negative) scores = more relevant (e.g., -0.96 is better than -6.99)
-    - Typical score range: -10.0 to +10.0
     - Most relevant results: -3.0 to +5.0
-    - Use negative thresholds when filtering (e.g., score_threshold=-5.0)
     """
 
     def __init__(
         self,
         api_key: str,
-        api_url: str = "http://118.67.212.45:8000/rerank",
+        api_url: str = "https://api.novita.ai/openai/v1/rerank",
         normalize: bool = False,
         timeout: int = 30,
     ):
-        """Initialize BGE reranker.
+        """Initialize BGE reranker with Novita AI backend.
 
         Args:
-            api_key: API key for authentication
+            api_key: Novita AI API key
             api_url: Reranking API endpoint URL
             normalize: Whether to normalize scores (default: False)
             timeout: Request timeout in seconds (default: 30)
@@ -47,7 +47,7 @@ def __init__(
     def rerank(
         self, query: str, chunks: List[Tuple[str, Dict[str, Any]]], top_k: int
     ) -> List[Tuple[int, float]]:
-        """Rerank chunks based on relevance to query using BGE reranker.
+        """Rerank chunks based on relevance to query using Novita AI BGE reranker.
 
         Args:
             query: Query string
@@ -56,6 +56,7 @@ def rerank(
 
         Returns:
             List of (original_index, relevance_score) tuples, sorted by relevance
+            Scores are transformed from Novita's 0-1 range to internal -10 to +10 range
 
         Raises:
             Exception: If API request fails
@@ -66,22 +67,21 @@ def rerank(
         # Extract just the content from chunks
         documents = [chunk[0] for chunk in chunks]
 
-        # Prepare API request
+        # Prepare API request for Novita AI (uses "top_n" parameter)
         request_data = {
+            "model": "baai/bge-reranker-v2-m3",
             "query": query,
             "documents": documents,
-            "top_k": min(top_k, len(documents)),  # Don't request more than available
-            "normalize": self.normalize,
+            "top_n": min(top_k, len(documents)),  # Don't request more than available
         }
 
         headers = {
-            "accept": "application/json",
-            "X-API-Key": self.api_key,
+            "Authorization": f"Bearer {self.api_key}",
             "Content-Type": "application/json",
         }
 
         try:
-            # Make API request
+            # Make API request to Novita AI
             response = requests.post(
                 self.api_url, json=request_data, headers=headers, timeout=self.timeout
             )
@@ -90,12 +90,17 @@ def rerank(
             # Parse response
             result = response.json()
 
-            # Extract results: list of {document, score, index}
+            # Extract and transform results from Novita format
+            # Novita returns scores in 0-1 range, transform to -10 to +10 for compatibility
             reranked_results = []
             for item in result.get("results", []):
                 original_index = item["index"]
-                score = item["score"]
-                reranked_results.append((original_index, score))
+                # Novita returns "relevance_score" in 0-1 range
+                novita_score = item["relevance_score"]
+                # Transform to internal format: (score * 20) - 10
+                # This maps 0.0 -> -10.0, 0.5 -> 0.0, 1.0 -> 10.0
+                transformed_score = (novita_score * 20.0) - 10.0
+                reranked_results.append((original_index, transformed_score))
 
             return reranked_results
 

diff --git a/uv.lock b/uv.lock