From a32cec7b680469cf6ed7d4b8a6de21fd45c82546 Mon Sep 17 00:00:00 2001 From: MD Ikramul Kayes Date: Sat, 25 Oct 2025 12:30:35 +0600 Subject: [PATCH 1/3] added novita ai reranking --- src/insta_rag/core/config.py | 4 +-- src/insta_rag/retrieval/reranker.py | 43 ++++++++++++++++------------- 2 files changed, 26 insertions(+), 21 deletions(-) diff --git a/src/insta_rag/core/config.py b/src/insta_rag/core/config.py index 81c19a3..ea2ef24 100644 --- a/src/insta_rag/core/config.py +++ b/src/insta_rag/core/config.py @@ -62,7 +62,7 @@ class RerankingConfig: provider: str = "bge" # bge, cohere, cross_encoder model: str = "BAAI/bge-reranker-v2-m3" api_key: Optional[str] = None - api_url: Optional[str] = "http://118.67.212.45:8000/rerank" # For BGE reranker + api_url: Optional[str] = "https://api.novita.ai/openai/v1/rerank" # For BGE reranker top_k: int = 20 enabled: bool = True normalize: bool = False # For BGE reranker @@ -288,7 +288,7 @@ def from_env(cls, **kwargs) -> "RAGConfig": model="BAAI/bge-reranker-v2-m3", api_key=bge_api_key, api_url=os.getenv( - "BGE_RERANKER_URL", "http://118.67.212.45:8000/rerank" + "BGE_RERANKER_URL", "https://api.novita.ai/openai/v1/rerank" ), enabled=True, normalize=False, diff --git a/src/insta_rag/retrieval/reranker.py b/src/insta_rag/retrieval/reranker.py index 016b798..133918a 100644 --- a/src/insta_rag/retrieval/reranker.py +++ b/src/insta_rag/retrieval/reranker.py @@ -9,32 +9,32 @@ class BGEReranker(BaseReranker): - """BGE (BAAI) reranker using BAAI/bge-reranker-v2-m3 model. + """BGE (BAAI) reranker using BAAI/bge-reranker-v2-m3 model via Novita AI API. - This reranker uses a remote API endpoint that hosts the BGE reranker model. + This reranker uses the Novita AI API endpoint that hosts the BGE reranker model. The model is designed to rerank search results based on semantic relevance. - API Endpoint: http://118.67.212.45:8000/rerank + API Endpoint: https://api.novita.ai/openai/v1/rerank (Novita AI) Model: BAAI/bge-reranker-v2-m3 - Important: BGE reranker produces negative scores where: + Important: Response scores are transformed from Novita's 0-1 range to internal -10 to +10 range: + - Novita returns scores between 0.0 and 1.0 + - Transformed to -10.0 to +10.0 range for compatibility with existing system - Higher (less negative) scores = more relevant (e.g., -0.96 is better than -6.99) - - Typical score range: -10.0 to +10.0 - Most relevant results: -3.0 to +5.0 - - Use negative thresholds when filtering (e.g., score_threshold=-5.0) """ def __init__( self, api_key: str, - api_url: str = "http://118.67.212.45:8000/rerank", + api_url: str = "https://api.novita.ai/openai/v1/rerank", normalize: bool = False, timeout: int = 30, ): - """Initialize BGE reranker. + """Initialize BGE reranker with Novita AI backend. Args: - api_key: API key for authentication + api_key: Novita AI API key api_url: Reranking API endpoint URL normalize: Whether to normalize scores (default: False) timeout: Request timeout in seconds (default: 30) @@ -47,7 +47,7 @@ def __init__( def rerank( self, query: str, chunks: List[Tuple[str, Dict[str, Any]]], top_k: int ) -> List[Tuple[int, float]]: - """Rerank chunks based on relevance to query using BGE reranker. + """Rerank chunks based on relevance to query using Novita AI BGE reranker. Args: query: Query string @@ -56,6 +56,7 @@ def rerank( Returns: List of (original_index, relevance_score) tuples, sorted by relevance + Scores are transformed from Novita's 0-1 range to internal -10 to +10 range Raises: Exception: If API request fails @@ -66,22 +67,21 @@ def rerank( # Extract just the content from chunks documents = [chunk[0] for chunk in chunks] - # Prepare API request + # Prepare API request for Novita AI (uses "top_n" parameter) request_data = { + "model": "baai/bge-reranker-v2-m3", "query": query, "documents": documents, - "top_k": min(top_k, len(documents)), # Don't request more than available - "normalize": self.normalize, + "top_n": min(top_k, len(documents)), # Don't request more than available } headers = { - "accept": "application/json", - "X-API-Key": self.api_key, + "Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json", } try: - # Make API request + # Make API request to Novita AI response = requests.post( self.api_url, json=request_data, headers=headers, timeout=self.timeout ) @@ -90,12 +90,17 @@ def rerank( # Parse response result = response.json() - # Extract results: list of {document, score, index} + # Extract and transform results from Novita format + # Novita returns scores in 0-1 range, transform to -10 to +10 for compatibility reranked_results = [] for item in result.get("results", []): original_index = item["index"] - score = item["score"] - reranked_results.append((original_index, score)) + # Novita returns "relevance_score" in 0-1 range + novita_score = item["relevance_score"] + # Transform to internal format: (score * 20) - 10 + # This maps 0.0 -> -10.0, 0.5 -> 0.0, 1.0 -> 10.0 + transformed_score = (novita_score * 20.0) - 10.0 + reranked_results.append((original_index, transformed_score)) return reranked_results From 13b6e23787bcdb401bc91316bd54297e57cf37cf Mon Sep 17 00:00:00 2001 From: MD Ikramul Kayes Date: Sat, 25 Oct 2025 12:30:49 +0600 Subject: [PATCH 2/3] =?UTF-8?q?bump:=20version=200.1.1-beta.2=20=E2=86=92?= =?UTF-8?q?=200.1.1-beta.3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 2 ++ pyproject.toml | 2 +- uv.lock | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f6de50a..c7c0fa3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,8 @@ - Pluggable architecture for chunkers, embedders, and vector databases. - Hybrid storage with Qdrant and MongoDB. +## v0.1.1-beta.3 (2025-10-25) + ## v0.1.1-beta.2 (2025-10-24) ## v0.1.1-beta.0 (2025-10-21) diff --git a/pyproject.toml b/pyproject.toml index 0d8a799..54ac19e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "insta_rag" -version = "0.1.1-beta.2" +version = "0.1.1-beta.3" description = "A RAG (Retrieval-Augmented Generation) library for document processing and retrieval." authors = [ { name = "Aukik Aurnab", email = "aukikaurnabx@gmail.com" }, diff --git a/uv.lock b/uv.lock index bcc0da9..d633f07 100644 --- a/uv.lock +++ b/uv.lock @@ -994,7 +994,7 @@ wheels = [ [[package]] name = "insta-rag" -version = "0.1.1b2" +version = "0.1.1b3" source = { editable = "." } dependencies = [ { name = "cohere" }, From 57a9a821e748adc9e8a2fa69a154091c04d1f388 Mon Sep 17 00:00:00 2001 From: ikramulkayes <67923321+ikramulkayes@users.noreply.github.com> Date: Sat, 25 Oct 2025 06:33:28 +0000 Subject: [PATCH 3/3] style: format code with ruff --- src/insta_rag/core/config.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/insta_rag/core/config.py b/src/insta_rag/core/config.py index ea2ef24..b15a3ad 100644 --- a/src/insta_rag/core/config.py +++ b/src/insta_rag/core/config.py @@ -62,7 +62,9 @@ class RerankingConfig: provider: str = "bge" # bge, cohere, cross_encoder model: str = "BAAI/bge-reranker-v2-m3" api_key: Optional[str] = None - api_url: Optional[str] = "https://api.novita.ai/openai/v1/rerank" # For BGE reranker + api_url: Optional[str] = ( + "https://api.novita.ai/openai/v1/rerank" # For BGE reranker + ) top_k: int = 20 enabled: bool = True normalize: bool = False # For BGE reranker