diff --git a/lightrag/api/config.py b/lightrag/api/config.py index 4d8ab1e11e..b5fd4bef73 100644 --- a/lightrag/api/config.py +++ b/lightrag/api/config.py @@ -247,6 +247,7 @@ def parse_args() -> argparse.Namespace: "aws_bedrock", "jina", "gemini", + "voyageai", ], help="Embedding binding type (default: from env or ollama)", ) diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py index 692be45338..a9217da18f 100644 --- a/lightrag/api/lightrag_server.py +++ b/lightrag/api/lightrag_server.py @@ -319,8 +319,9 @@ def create_app(args): "aws_bedrock", "jina", "gemini", + "voyageai", ]: - raise Exception("embedding binding not supported") + raise Exception(f"embedding binding '{args.embedding_binding}' not supported") # Set default hosts if not provided if args.llm_binding_host is None: @@ -701,7 +702,10 @@ def create_optimized_embedding_function( from lightrag.llm.lollms import lollms_embed provider_func = lollms_embed + elif binding == "voyageai": + from lightrag.llm.voyageai import voyageai_embed + provider_func = voyageai_embed # Extract attributes if provider is an EmbeddingFunc if provider_func and isinstance(provider_func, EmbeddingFunc): provider_max_token_size = provider_func.max_token_size @@ -827,7 +831,6 @@ async def optimized_embedding_function(texts, embedding_dim=None): from lightrag.llm.binding_options import GeminiEmbeddingOptions gemini_options = GeminiEmbeddingOptions.options_dict(args) - # Pass model only if provided, let function use its default (gemini-embedding-001) kwargs = { "texts": texts, @@ -841,6 +844,19 @@ async def optimized_embedding_function(texts, embedding_dim=None): if model: kwargs["model"] = model return await actual_func(**kwargs) + elif binding == "voyageai": + from lightrag.llm.voyageai import voyageai_embed + + actual_func = ( + voyageai_embed.func + if isinstance(voyageai_embed, EmbeddingFunc) + else voyageai_embed + ) + return await actual_func( + texts, + api_key=api_key, + embedding_dim=embedding_dim, + ) else: # openai and compatible from lightrag.llm.openai import openai_embed diff --git a/lightrag/llm/anthropic.py b/lightrag/llm/anthropic.py index fe18300c9b..a088faea24 100644 --- a/lightrag/llm/anthropic.py +++ b/lightrag/llm/anthropic.py @@ -2,7 +2,6 @@ import sys import os import logging -import numpy as np from typing import Any, Union, AsyncIterator import pipmaster as pm # Pipmaster for dynamic library install @@ -15,11 +14,6 @@ if not pm.is_installed("anthropic"): pm.install("anthropic") -# Add Voyage AI import -if not pm.is_installed("voyageai"): - pm.install("voyageai") -import voyageai - from anthropic import ( AsyncAnthropic, APIConnectionError, @@ -229,105 +223,3 @@ async def claude_3_haiku_complete( enable_cot=enable_cot, **kwargs, ) - - -# Embedding function (placeholder, as Anthropic does not provide embeddings) -@retry( - stop=stop_after_attempt(3), - wait=wait_exponential(multiplier=1, min=4, max=60), - retry=retry_if_exception_type( - (RateLimitError, APIConnectionError, APITimeoutError) - ), -) -async def anthropic_embed( - texts: list[str], - model: str = "voyage-3", # Default to voyage-3 as a good general-purpose model - base_url: str = None, - api_key: str = None, -) -> np.ndarray: - """ - Generate embeddings using Voyage AI since Anthropic doesn't provide native embedding support. - - Args: - texts: List of text strings to embed - model: Voyage AI model name (e.g., "voyage-3", "voyage-3-large", "voyage-code-3") - base_url: Optional custom base URL (not used for Voyage AI) - api_key: API key for Voyage AI (defaults to VOYAGE_API_KEY environment variable) - - Returns: - numpy array of shape (len(texts), embedding_dimension) containing the embeddings - """ - if not api_key: - api_key = os.environ.get("VOYAGE_API_KEY") - if not api_key: - logger.error("VOYAGE_API_KEY environment variable not set") - raise ValueError( - "VOYAGE_API_KEY environment variable is required for embeddings" - ) - - try: - # Initialize Voyage AI client - voyage_client = voyageai.Client(api_key=api_key) - - # Get embeddings - result = voyage_client.embed( - texts, - model=model, - input_type="document", # Assuming document context; could be made configurable - ) - - # Convert list of embeddings to numpy array - embeddings = np.array(result.embeddings, dtype=np.float32) - - logger.debug(f"Generated embeddings for {len(texts)} texts using {model}") - verbose_debug(f"Embedding shape: {embeddings.shape}") - - return embeddings - - except Exception as e: - logger.error(f"Voyage AI embedding failed: {str(e)}") - raise - - -# Optional: a helper function to get available embedding models -def get_available_embedding_models() -> dict[str, dict]: - """ - Returns a dictionary of available Voyage AI embedding models and their properties. - """ - return { - "voyage-3-large": { - "context_length": 32000, - "dimension": 1024, - "description": "Best general-purpose and multilingual", - }, - "voyage-3": { - "context_length": 32000, - "dimension": 1024, - "description": "General-purpose and multilingual", - }, - "voyage-3-lite": { - "context_length": 32000, - "dimension": 512, - "description": "Optimized for latency and cost", - }, - "voyage-code-3": { - "context_length": 32000, - "dimension": 1024, - "description": "Optimized for code", - }, - "voyage-finance-2": { - "context_length": 32000, - "dimension": 1024, - "description": "Optimized for finance", - }, - "voyage-law-2": { - "context_length": 16000, - "dimension": 1024, - "description": "Optimized for legal", - }, - "voyage-multimodal-3": { - "context_length": 32000, - "dimension": 1024, - "description": "Multimodal text and images", - }, - } diff --git a/lightrag/llm/voyageai.py b/lightrag/llm/voyageai.py new file mode 100644 index 0000000000..ae15b6e216 --- /dev/null +++ b/lightrag/llm/voyageai.py @@ -0,0 +1,176 @@ +import os +import numpy as np +import pipmaster as pm # Pipmaster for dynamic library install + +# Add Voyage AI import +if not pm.is_installed("voyageai"): + pm.install("voyageai") + +from voyageai.error import ( + RateLimitError, + APIConnectionError, +) + +from tenacity import ( + retry, + stop_after_attempt, + wait_exponential, + retry_if_exception_type, +) +from lightrag.utils import wrap_embedding_func_with_attrs, logger + + +# Custome exceptions for VoyageAI errors +class VoyageAIError(Exception): + """Generic VoyageAI API error""" + + pass + + +@wrap_embedding_func_with_attrs(embedding_dim=1024, max_token_size=16000) +@retry( + stop=stop_after_attempt(3), + wait=wait_exponential(multiplier=1, min=4, max=60), + retry=retry_if_exception_type((RateLimitError, APIConnectionError)), +) +async def voyageai_embed( + texts: list[str], + model: str = "voyage-3", + api_key: str | None = None, + embedding_dim: int | None = None, + input_type: str | None = None, + truncation: bool | None = None, +) -> np.ndarray: + """Generate embeddings for a list of texts using VoyageAI's API. + + Args: + texts: List of texts to embed. + model: The VoyageAI embedding model to use. Options include: + - "voyage-3": General purpose (1024 dims, 32K context) + - "voyage-3-lite": Lightweight (512 dims, 32K context) + - "voyage-3-large": Highest accuracy (1024 dims, 32K context) + - "voyage-code-3": Code optimized (1024 dims, 32K context) + - "voyage-law-2": Legal documents (1024 dims, 16K context) + - "voyage-finance-2": Finance (1024 dims, 32K context) + api_key: Optional VoyageAI API key. If None, uses VOYAGEAI_API_KEY environment variable. + input_type: Optional input type hint for the model. Options: + - "query": For search queries + - "document": For documents to be indexed + - None: Let the model decide (default) + truncation: Whether to truncate texts that exceed token limit (default: None). + + Returns: + A numpy array of embeddings, one per input text. + + Raises: + VoyageAIError: If the API call fails or returns invalid data. + + """ + + try: + import voyageai + except ImportError: + raise ImportError( + "voyageai package is required. Install it with: pip install voyageai" + ) + + # Get API key from parameter or environment + logger.debug( + "Starting VoyageAI embedding generation. (Ignore api_key, use env variable)" + ) + if not api_key: + api_key = os.environ.get("VOYAGEAI_API_KEY") + if not api_key: + logger.error("VOYAGEAI_API_KEY environment variable not set") + raise ValueError( + "VOYAGEAI_API_KEY environment variable is required or pass api_key parameter" + ) + + try: + # Create async client + client = voyageai.AsyncClient(api_key=api_key) + + logger.debug(f"VoyageAI embedding request: {len(texts)} texts, model: {model}") + # Calculate total characters for debugging + total_chars = sum(len(t) for t in texts) + avg_chars = total_chars / len(texts) if texts else 0 + logger.debug( + f"VoyageAI embedding request: {len(texts)} texts, " + f"total_chars={total_chars}, avg_chars={avg_chars:.0f}, model={model}" + ) + + # Prepare API call parameters + embed_params = dict( + texts=texts, + model=model, + # Optional parameters -- if None, voyageai client uses defaults + output_dimension=embedding_dim, + truncation=truncation, + input_type=input_type, + ) + # Make API call with timing + result = await client.embed(**embed_params) + + if not result.embeddings: + err_msg = "VoyageAI API returned empty embeddings" + logger.error(err_msg) + raise VoyageAIError(err_msg) + + if len(result.embeddings) != len(texts): + err_msg = f"VoyageAI API returned {len(result.embeddings)} embeddings for {len(texts)} texts" + logger.error(err_msg) + raise VoyageAIError(err_msg) + + # Convert to numpy array with timing + embeddings = np.array(result.embeddings, dtype=np.float32) + logger.debug(f"VoyageAI embeddings generated: shape {embeddings.shape}") + + return embeddings + + except Exception as e: + logger.error(f"VoyageAI embedding error: {e}") + raise + + +# Optional: a helper function to get available embedding models +def get_available_embedding_models() -> dict[str, dict]: + """ + Returns a dictionary of available Voyage AI embedding models and their properties. + """ + return { + "voyage-3-large": { + "context_length": 32000, + "dimension": 1024, + "description": "Best general-purpose and multilingual", + }, + "voyage-3": { + "context_length": 32000, + "dimension": 1024, + "description": "General-purpose and multilingual", + }, + "voyage-3-lite": { + "context_length": 32000, + "dimension": 512, + "description": "Optimized for latency and cost", + }, + "voyage-code-3": { + "context_length": 32000, + "dimension": 1024, + "description": "Optimized for code", + }, + "voyage-finance-2": { + "context_length": 32000, + "dimension": 1024, + "description": "Optimized for finance", + }, + "voyage-law-2": { + "context_length": 16000, + "dimension": 1024, + "description": "Optimized for legal", + }, + "voyage-multimodal-3": { + "context_length": 32000, + "dimension": 1024, + "description": "Multimodal text and images", + }, + }