diff --git a/autobot-backend/api/analytics_llm_patterns.py b/autobot-backend/api/analytics_llm_patterns.py index ea0b8f86c..3e3a9b64f 100644 --- a/autobot-backend/api/analytics_llm_patterns.py +++ b/autobot-backend/api/analytics_llm_patterns.py @@ -31,6 +31,11 @@ from redis.exceptions import RedisError from autobot_shared.redis_client import RedisDatabase, get_redis_client +from constants.model_constants import ( + EXPENSIVE_MODEL_MARKER_GPT4, + EXPENSIVE_MODEL_MARKER_OPUS, + MODEL_COSTS_PER_1M_TOKENS, +) # Prefix provided by analytics_routers.py registry (#1032) router = APIRouter(tags=["llm-patterns", "analytics"]) @@ -42,7 +47,7 @@ # ============================================================================= # O(1) lookup optimization constant (Issue #326) -EXPENSIVE_MODELS = {"opus", "gpt-4"} +EXPENSIVE_MODELS = {EXPENSIVE_MODEL_MARKER_OPUS, EXPENSIVE_MODEL_MARKER_GPT4} class PromptCategory(str, Enum): @@ -86,26 +91,9 @@ class CostLevel(str, Enum): } -# Model costs per 1M tokens (USD) -MODEL_COSTS = { - # Anthropic - "claude-3-opus": {"input": 15.00, "output": 75.00}, - "claude-3-sonnet": {"input": 3.00, "output": 15.00}, - "claude-3-haiku": {"input": 0.25, "output": 1.25}, - "claude-sonnet-4": {"input": 3.00, "output": 15.00}, - # OpenAI - "gpt-4o": {"input": 2.50, "output": 10.00}, - "gpt-4o-mini": {"input": 0.15, "output": 0.60}, - "gpt-4-turbo": {"input": 10.00, "output": 30.00}, - "gpt-3.5-turbo": {"input": 0.50, "output": 1.50}, - # Google - "gemini-1.5-pro": {"input": 1.25, "output": 5.00}, - "gemini-1.5-flash": {"input": 0.075, "output": 0.30}, - # Local (free) - "llama3": {"input": 0.0, "output": 0.0}, - "mistral": {"input": 0.0, "output": 0.0}, - "codellama": {"input": 0.0, "output": 0.0}, -} +# Model costs per 1M tokens (USD) — single source of truth in +# constants/model_constants.MODEL_COSTS_PER_1M_TOKENS (#3528). +MODEL_COSTS = MODEL_COSTS_PER_1M_TOKENS # Pattern detection rules for prompt categorization PROMPT_PATTERNS = { @@ -418,7 +406,7 @@ def _check_model_efficiency( Issue #620: Extracted from analyze_prompt. """ - if "opus" in model.lower() or "gpt-4" in model.lower(): + if EXPENSIVE_MODEL_MARKER_OPUS in model.lower() or EXPENSIVE_MODEL_MARKER_GPT4 in model.lower(): if category in SIMPLE_PROMPT_CATEGORIES: # O(1) lookup (Issue #326) recommendations.append( "Consider using a smaller model (Haiku/GPT-3.5) for this task type" @@ -451,7 +439,7 @@ async def analyze_prompt( "category": category.value, "estimated_tokens": int(token_estimate), "estimated_cost": self._calculate_cost( - model or "gpt-4o", int(token_estimate), int(token_estimate * 1.5) + model or OPENAI_GPT4O, int(token_estimate), int(token_estimate * 1.5) ), "issues": issues, "recommendations": recommendations, diff --git a/autobot-backend/api/llm_providers.py b/autobot-backend/api/llm_providers.py index c8a0b0849..8d6c776e8 100644 --- a/autobot-backend/api/llm_providers.py +++ b/autobot-backend/api/llm_providers.py @@ -34,7 +34,8 @@ async def switch_llm_provider( ): """Switch active LLM provider at runtime. - Body: {"provider": "openai", "model": "gpt-4", "validate": true} + Body: {"provider": "openai", "model": "", "validate": true} + See ModelConstants.DEFAULT_OPENAI_MODEL for the default OpenAI model. """ provider = switch_data.get("provider") if not provider: diff --git a/autobot-backend/chat_history/__init__.py b/autobot-backend/chat_history/__init__.py index 551b1d156..e3e7659d7 100644 --- a/autobot-backend/chat_history/__init__.py +++ b/autobot-backend/chat_history/__init__.py @@ -94,7 +94,7 @@ class ChatHistoryManager( # Get messages with model-aware limits messages = await manager.get_session_messages( session_id=session["id"], - model_name="gpt-4" + model_name="gpt-4" # docstring example — use ModelConstants.DEFAULT_OPENAI_MODEL ) # List all sessions diff --git a/autobot-backend/code_intelligence/llm_pattern_analysis/calculators.py b/autobot-backend/code_intelligence/llm_pattern_analysis/calculators.py index 3a10fe63f..baa1777e3 100644 --- a/autobot-backend/code_intelligence/llm_pattern_analysis/calculators.py +++ b/autobot-backend/code_intelligence/llm_pattern_analysis/calculators.py @@ -20,6 +20,7 @@ UsagePattern, ) from code_intelligence.llm_pattern_analysis.types import UsagePatternType +from constants.model_constants import MODEL_PRICING_PER_1K_TOKENS, OPENAI_GPT35_TURBO # ============================================================================= # Token Tracker @@ -34,17 +35,9 @@ class TokenTracker: to identify optimization opportunities. """ - # Token cost estimates per 1K tokens (based on common pricing) - DEFAULT_COSTS = { - "gpt-4": {"prompt": 0.03, "completion": 0.06}, - "gpt-4-turbo": {"prompt": 0.01, "completion": 0.03}, - "gpt-3.5-turbo": {"prompt": 0.0015, "completion": 0.002}, - "claude-3-opus": {"prompt": 0.015, "completion": 0.075}, - "claude-3-sonnet": {"prompt": 0.003, "completion": 0.015}, - "claude-3-haiku": {"prompt": 0.00025, "completion": 0.00125}, - "ollama": {"prompt": 0.0, "completion": 0.0}, # Local, no API cost - "default": {"prompt": 0.001, "completion": 0.002}, - } + # Token cost estimates per 1K tokens — single source of truth in + # constants/model_constants.MODEL_PRICING_PER_1K_TOKENS (#3528). + DEFAULT_COSTS = MODEL_PRICING_PER_1K_TOKENS def __init__(self): """Initialize the token tracker.""" @@ -155,18 +148,9 @@ class CostCalculator: Provides cost projections and optimization potential analysis. """ - # Model pricing per 1K tokens (USD) - MODEL_PRICING = { - "gpt-4": {"prompt": 0.03, "completion": 0.06}, - "gpt-4-turbo": {"prompt": 0.01, "completion": 0.03}, - "gpt-4o": {"prompt": 0.005, "completion": 0.015}, - "gpt-3.5-turbo": {"prompt": 0.0015, "completion": 0.002}, - "claude-3-opus": {"prompt": 0.015, "completion": 0.075}, - "claude-3-sonnet": {"prompt": 0.003, "completion": 0.015}, - "claude-3-haiku": {"prompt": 0.00025, "completion": 0.00125}, - "claude-sonnet-4": {"prompt": 0.003, "completion": 0.015}, - "ollama": {"prompt": 0.0, "completion": 0.0}, - } + # Model pricing per 1K tokens — single source of truth in + # constants/model_constants.MODEL_PRICING_PER_1K_TOKENS (#3528). + MODEL_PRICING = MODEL_PRICING_PER_1K_TOKENS @classmethod def _estimate_avg_tokens(cls, model_pats: List[UsagePattern]) -> tuple: @@ -264,7 +248,7 @@ def estimate_costs( for model, model_pats in model_patterns.items(): pricing = cls.MODEL_PRICING.get( - model, cls.MODEL_PRICING.get("gpt-3.5-turbo") + model, cls.MODEL_PRICING.get(OPENAI_GPT35_TURBO) ) daily_calls = len(model_pats) * daily_call_multiplier avg_prompt, avg_completion = cls._estimate_avg_tokens(model_pats) diff --git a/autobot-backend/code_intelligence/llm_pattern_analysis/data_models.py b/autobot-backend/code_intelligence/llm_pattern_analysis/data_models.py index 5f82893b6..908e87335 100644 --- a/autobot-backend/code_intelligence/llm_pattern_analysis/data_models.py +++ b/autobot-backend/code_intelligence/llm_pattern_analysis/data_models.py @@ -26,6 +26,11 @@ PromptIssueType, UsagePatternType, ) +from constants.model_constants import ( + ANTHROPIC_CLAUDE3_OPUS, + EXPENSIVE_MODEL_MARKER_GPT4, + EXPENSIVE_MODEL_MARKER_OPUS, +) # ============================================================================= # Simple Data Classes @@ -329,7 +334,11 @@ def get_monthly_savings(self) -> float: def is_expensive_model(self) -> bool: """Check if this is an expensive model.""" - expensive_markers = ["gpt-4", "opus", "claude-3-opus"] + expensive_markers = [ + EXPENSIVE_MODEL_MARKER_GPT4, + EXPENSIVE_MODEL_MARKER_OPUS, + ANTHROPIC_CLAUDE3_OPUS, + ] return any(marker in self.model.lower() for marker in expensive_markers) diff --git a/autobot-backend/constants/model_constants.py b/autobot-backend/constants/model_constants.py index 0aca67fc3..a74c13ded 100644 --- a/autobot-backend/constants/model_constants.py +++ b/autobot-backend/constants/model_constants.py @@ -34,7 +34,7 @@ import os from dataclasses import dataclass from functools import lru_cache -from typing import Optional +from typing import Dict, Optional from autobot_shared.ssot_config import CLASSIFICATION_MODEL as SSOT_CLASSIFICATION_MODEL from autobot_shared.ssot_config import ( @@ -56,9 +56,205 @@ # Change models in autobot_shared/ssot_config.py to change the entire system. FALLBACK_MODEL = DEFAULT_LLM_MODEL -FALLBACK_OPENAI_MODEL = "gpt-4" -FALLBACK_ANTHROPIC_MODEL = "claude-3-5-sonnet-20241022" -FALLBACK_GOOGLE_MODEL = "gemini-pro" + +# ============================================================================= +# EXPLICIT MODEL NAME CONSTANTS (#3528) +# ============================================================================= +# Named constants for every model string used anywhere in the codebase. +# Add new entries here rather than hardcoding strings in service files. + +# OpenAI — preview/reasoning aliases without dated suffix +OPENAI_O1_PREVIEW = "o1-preview" + +# OpenAI — GPT-4 family +OPENAI_GPT4 = "gpt-4" +OPENAI_GPT4_TURBO = "gpt-4-turbo" +OPENAI_GPT4O = "gpt-4o" +OPENAI_GPT4O_MINI = "gpt-4o-mini" +OPENAI_GPT4_VISION_PREVIEW = "gpt-4-vision-preview" +OPENAI_GPT4_TURBO_PREVIEW = "gpt-4-turbo-preview" +# OpenAI — GPT-3.5 family +OPENAI_GPT35_TURBO = "gpt-3.5-turbo" +OPENAI_GPT35_TURBO_16K = "gpt-3.5-turbo-16k" +# OpenAI — reasoning models +OPENAI_O1 = "o1" +OPENAI_O1_MINI = "o1-mini" +OPENAI_O3 = "o3" +OPENAI_O3_MINI = "o3-mini" +OPENAI_O4_MINI = "o4-mini" +# OpenAI — GPT-4.1 family (2025) +OPENAI_GPT41 = "gpt-4.1" +OPENAI_GPT41_MINI = "gpt-4.1-mini" +OPENAI_GPT41_NANO = "gpt-4.1-nano" + +# Anthropic — Claude 4.x +ANTHROPIC_CLAUDE_OPUS4 = "claude-opus-4-20250514" +ANTHROPIC_CLAUDE_HAIKU4_5 = "claude-haiku-4-5-20251001" +ANTHROPIC_CLAUDE_SONNET4 = "claude-sonnet-4-20250514" +# Anthropic — Claude 3.x / Sonnet 4 +ANTHROPIC_CLAUDE35_SONNET = "claude-3-5-sonnet-20241022" +ANTHROPIC_CLAUDE35_HAIKU = "claude-3-5-haiku-20241022" +ANTHROPIC_CLAUDE3_OPUS_DATED = "claude-3-opus-20240229" +ANTHROPIC_CLAUDE3_SONNET_DATED = "claude-3-sonnet-20240229" +ANTHROPIC_CLAUDE3_HAIKU_DATED = "claude-3-haiku-20240307" +# Anthropic — short-form names used in analytics/cost matching +ANTHROPIC_CLAUDE3_OPUS = "claude-3-opus" +ANTHROPIC_CLAUDE3_SONNET = "claude-3-sonnet" +ANTHROPIC_CLAUDE3_HAIKU = "claude-3-haiku" +ANTHROPIC_CLAUDE_SONNET4_SHORT = "claude-sonnet-4" +# Anthropic — release aliases without dated suffix (latest stable pointers) +ANTHROPIC_CLAUDE_SONNET4_6 = "claude-sonnet-4-6" +ANTHROPIC_CLAUDE_OPUS4_6 = "claude-opus-4-6" + +# Google — Gemini 2.5 +GOOGLE_GEMINI25_PRO = "gemini-2.5-pro" +GOOGLE_GEMINI25_FLASH = "gemini-2.5-flash" +# Google — Gemini 2.0 / 1.5 +GOOGLE_GEMINI20_FLASH = "gemini-2.0-flash" +GOOGLE_GEMINI15_PRO = "gemini-1.5-pro" +GOOGLE_GEMINI15_FLASH = "gemini-1.5-flash" +# Google — legacy models +GOOGLE_GEMINI_PRO = "gemini-pro" # plain base model (distinct from vision) +GOOGLE_GEMINI_PRO_VISION = "gemini-pro-vision" + +# DeepSeek hosted API +DEEPSEEK_V3 = "deepseek-v3" +DEEPSEEK_R1_API = "deepseek-r1-api" + +# Local / Ollama free models +LOCAL_LLAMA3 = "llama3" +LOCAL_LLAMA31 = "llama3.1" +LOCAL_LLAMA32 = "llama3.2" +LOCAL_LLAMA33 = "llama3.3" +LOCAL_MISTRAL = "mistral" +LOCAL_MIXTRAL = "mixtral" +LOCAL_CODELLAMA = "codellama" +LOCAL_QWEN25 = "qwen2.5" +LOCAL_QWEN3 = "qwen3" +LOCAL_DEEPSEEK_CODER = "deepseek-coder" +LOCAL_DEEPSEEK_R1 = "deepseek-r1" +LOCAL_PHI3 = "phi3" +LOCAL_PHI4 = "phi4" +LOCAL_GEMMA2 = "gemma2" +LOCAL_GEMMA3 = "gemma3" + +# Substring markers used by cost/efficiency heuristics (#3528) +# These are substrings matched with ``in model.lower()``, not full model IDs. +EXPENSIVE_MODEL_MARKER_OPUS = "opus" +EXPENSIVE_MODEL_MARKER_GPT4 = "gpt-4" + +# Fallback model aliases — defined after constants to reference them directly +FALLBACK_OPENAI_MODEL = OPENAI_GPT4 +FALLBACK_ANTHROPIC_MODEL = ANTHROPIC_CLAUDE35_SONNET +FALLBACK_GOOGLE_MODEL = GOOGLE_GEMINI_PRO + +# ============================================================================= +# MODEL_PRICING — SINGLE SOURCE OF TRUTH (#3528) +# ============================================================================= +# Two formats are needed by different consumers; both derive from the same data. +# +# MODEL_PRICING_PER_1M_TOKENS — USD per 1 million tokens (llm_cost_tracker) +# keys: "input", "output" +# +# MODEL_PRICING_PER_1K_TOKENS — USD per 1 thousand tokens (calculators.py, +# CostCalculator) keys: "prompt", "completion" +# +# Pricing source: provider published rates as of 2026-03. +# Update PRICING_VERSION in llm_cost_tracker.py when editing these tables. + +MODEL_PRICING_PER_1M_TOKENS: Dict[str, Dict[str, float]] = { + # Anthropic Claude 4.x (2025-2026) + ANTHROPIC_CLAUDE_OPUS4: {"input": 15.00, "output": 75.00}, + ANTHROPIC_CLAUDE_HAIKU4_5: {"input": 0.80, "output": 4.00}, + # Anthropic Claude 3.x / Sonnet 4 + ANTHROPIC_CLAUDE_SONNET4: {"input": 3.00, "output": 15.00}, + ANTHROPIC_CLAUDE35_SONNET: {"input": 3.00, "output": 15.00}, + ANTHROPIC_CLAUDE35_HAIKU: {"input": 0.80, "output": 4.00}, + ANTHROPIC_CLAUDE3_OPUS_DATED: {"input": 15.00, "output": 75.00}, + ANTHROPIC_CLAUDE3_SONNET_DATED: {"input": 3.00, "output": 15.00}, + ANTHROPIC_CLAUDE3_HAIKU_DATED: {"input": 0.25, "output": 1.25}, + # OpenAI GPT-4.1 family (2025) + OPENAI_GPT41: {"input": 2.00, "output": 8.00}, + OPENAI_GPT41_MINI: {"input": 0.40, "output": 1.60}, + OPENAI_GPT41_NANO: {"input": 0.10, "output": 0.40}, + # OpenAI GPT-4o / GPT-4 / GPT-3.5 + OPENAI_GPT4O: {"input": 2.50, "output": 10.00}, + OPENAI_GPT4O_MINI: {"input": 0.15, "output": 0.60}, + OPENAI_GPT4_TURBO: {"input": 10.00, "output": 30.00}, + OPENAI_GPT4: {"input": 30.00, "output": 60.00}, + OPENAI_GPT35_TURBO: {"input": 0.50, "output": 1.50}, + # OpenAI reasoning models + OPENAI_O1: {"input": 15.00, "output": 60.00}, + OPENAI_O1_MINI: {"input": 3.00, "output": 12.00}, + OPENAI_O3: {"input": 2.00, "output": 8.00}, + OPENAI_O3_MINI: {"input": 1.10, "output": 4.40}, + OPENAI_O4_MINI: {"input": 1.10, "output": 4.40}, + # Google Gemini 2.5 (2025-2026) + GOOGLE_GEMINI25_PRO: {"input": 1.25, "output": 10.00}, + GOOGLE_GEMINI25_FLASH: {"input": 0.15, "output": 0.60}, + # Google Gemini 2.0 / 1.5 + GOOGLE_GEMINI20_FLASH: {"input": 0.10, "output": 0.40}, + GOOGLE_GEMINI15_PRO: {"input": 1.25, "output": 5.00}, + GOOGLE_GEMINI15_FLASH: {"input": 0.075, "output": 0.30}, + # DeepSeek hosted API models (2025) + DEEPSEEK_V3: {"input": 0.27, "output": 1.10}, + DEEPSEEK_R1_API: {"input": 0.55, "output": 2.19}, + # Local/Ollama models (free) + LOCAL_LLAMA3: {"input": 0.0, "output": 0.0}, + LOCAL_LLAMA31: {"input": 0.0, "output": 0.0}, + LOCAL_LLAMA32: {"input": 0.0, "output": 0.0}, + LOCAL_LLAMA33: {"input": 0.0, "output": 0.0}, + LOCAL_MISTRAL: {"input": 0.0, "output": 0.0}, + LOCAL_MIXTRAL: {"input": 0.0, "output": 0.0}, + LOCAL_CODELLAMA: {"input": 0.0, "output": 0.0}, + LOCAL_QWEN25: {"input": 0.0, "output": 0.0}, + LOCAL_QWEN3: {"input": 0.0, "output": 0.0}, + LOCAL_DEEPSEEK_CODER: {"input": 0.0, "output": 0.0}, + LOCAL_DEEPSEEK_R1: {"input": 0.0, "output": 0.0}, + LOCAL_PHI3: {"input": 0.0, "output": 0.0}, + LOCAL_PHI4: {"input": 0.0, "output": 0.0}, + LOCAL_GEMMA2: {"input": 0.0, "output": 0.0}, + LOCAL_GEMMA3: {"input": 0.0, "output": 0.0}, +} + +# Per-1K token pricing used by TokenTracker / CostCalculator in +# code_intelligence/llm_pattern_analysis/calculators.py (#3528). +# Values are derived from MODEL_PRICING_PER_1M_TOKENS ÷ 1000. +MODEL_PRICING_PER_1K_TOKENS: Dict[str, Dict[str, float]] = { + OPENAI_GPT4: {"prompt": 0.03, "completion": 0.06}, + OPENAI_GPT4_TURBO: {"prompt": 0.01, "completion": 0.03}, + OPENAI_GPT4O: {"prompt": 0.005, "completion": 0.015}, + OPENAI_GPT35_TURBO: {"prompt": 0.0015, "completion": 0.002}, + ANTHROPIC_CLAUDE3_OPUS: {"prompt": 0.015, "completion": 0.075}, + ANTHROPIC_CLAUDE3_SONNET: {"prompt": 0.003, "completion": 0.015}, + ANTHROPIC_CLAUDE3_HAIKU: {"prompt": 0.00025, "completion": 0.00125}, + ANTHROPIC_CLAUDE_SONNET4_SHORT: {"prompt": 0.003, "completion": 0.015}, + "ollama": {"prompt": 0.0, "completion": 0.0}, # Local, no API cost + "default": {"prompt": 0.001, "completion": 0.002}, +} + +# Per-1M token cost table used by analytics_llm_patterns.py (#3528). +# Keys use short-form names to match partial model identifiers submitted by +# clients (e.g. "claude-3-opus" instead of the full dated variant). +MODEL_COSTS_PER_1M_TOKENS: Dict[str, Dict[str, float]] = { + # Anthropic (short-form names for analytics matching) + ANTHROPIC_CLAUDE3_OPUS: {"input": 15.00, "output": 75.00}, + ANTHROPIC_CLAUDE3_SONNET: {"input": 3.00, "output": 15.00}, + ANTHROPIC_CLAUDE3_HAIKU: {"input": 0.25, "output": 1.25}, + ANTHROPIC_CLAUDE_SONNET4_SHORT: {"input": 3.00, "output": 15.00}, + # OpenAI + OPENAI_GPT4O: {"input": 2.50, "output": 10.00}, + OPENAI_GPT4O_MINI: {"input": 0.15, "output": 0.60}, + OPENAI_GPT4_TURBO: {"input": 10.00, "output": 30.00}, + OPENAI_GPT35_TURBO: {"input": 0.50, "output": 1.50}, + # Google + GOOGLE_GEMINI15_PRO: {"input": 1.25, "output": 5.00}, + GOOGLE_GEMINI15_FLASH: {"input": 0.075, "output": 0.30}, + # Local (free) + LOCAL_LLAMA3: {"input": 0.0, "output": 0.0}, + LOCAL_MISTRAL: {"input": 0.0, "output": 0.0}, + LOCAL_CODELLAMA: {"input": 0.0, "output": 0.0}, +} class ModelConstants: diff --git a/autobot-backend/llm_interface_pkg/adapters/anthropic_adapter.py b/autobot-backend/llm_interface_pkg/adapters/anthropic_adapter.py index 53ca12367..ccccdfbb2 100644 --- a/autobot-backend/llm_interface_pkg/adapters/anthropic_adapter.py +++ b/autobot-backend/llm_interface_pkg/adapters/anthropic_adapter.py @@ -12,6 +12,7 @@ import time from typing import List, Optional +from constants.model_constants import ANTHROPIC_CLAUDE_OPUS4_6, ANTHROPIC_CLAUDE_SONNET4_6 from ..models import LLMRequest, LLMResponse from .base import ( AdapterBase, @@ -24,8 +25,8 @@ logger = logging.getLogger(__name__) ANTHROPIC_MODELS = [ - "claude-opus-4-6", - "claude-sonnet-4-6", + ANTHROPIC_CLAUDE_OPUS4_6, + ANTHROPIC_CLAUDE_SONNET4_6, "claude-haiku-4-5-20251001", "claude-sonnet-4-20250514", "claude-3-5-haiku-20241022", @@ -79,7 +80,7 @@ async def execute(self, request: LLMRequest) -> LLMResponse: ) kwargs = { - "model": request.model_name or "claude-sonnet-4-6", + "model": request.model_name or ANTHROPIC_CLAUDE_SONNET4_6, "max_tokens": request.max_tokens or 4096, "messages": messages, "temperature": request.temperature, @@ -139,7 +140,7 @@ async def test_environment(self) -> EnvironmentTestResult: try: client = self._ensure_client() resp = await client.messages.count_tokens( - model="claude-sonnet-4-6", + model=ANTHROPIC_CLAUDE_SONNET4_6, messages=[{"role": "user", "content": "test"}], ) diagnostics.append( diff --git a/autobot-backend/llm_interface_pkg/providers/openai_provider.py b/autobot-backend/llm_interface_pkg/providers/openai_provider.py index 4c07e1979..4cc7545d1 100644 --- a/autobot-backend/llm_interface_pkg/providers/openai_provider.py +++ b/autobot-backend/llm_interface_pkg/providers/openai_provider.py @@ -16,6 +16,7 @@ from opentelemetry.trace import SpanKind, Status, StatusCode from circuit_breaker import circuit_breaker_async +from constants.model_constants import OPENAI_GPT35_TURBO from ..models import LLMRequest, LLMResponse @@ -147,7 +148,7 @@ async def chat_completion(self, request: LLMRequest) -> LLMResponse: import openai client = openai.AsyncOpenAI(api_key=self.api_key) - model = request.model_name or "gpt-3.5-turbo" + model = request.model_name or OPENAI_GPT35_TURBO span_attrs = self._build_span_attributes(model, request) with _tracer.start_as_current_span( diff --git a/autobot-backend/llm_multi_provider.py b/autobot-backend/llm_multi_provider.py index 63eb1e4a0..e3d5fd9b2 100644 --- a/autobot-backend/llm_multi_provider.py +++ b/autobot-backend/llm_multi_provider.py @@ -40,6 +40,12 @@ from autobot_shared.logging_manager import get_llm_logger from autobot_shared.ssot_config import DEFAULT_LLM_MODEL +from constants.model_constants import ( + OPENAI_GPT35_TURBO, + OPENAI_GPT35_TURBO_16K, + OPENAI_GPT4, + OPENAI_GPT4_TURBO, +) load_dotenv() @@ -356,7 +362,7 @@ async def chat_completion(self, request: LLMRequest) -> LLMResponse: self._total_requests += 1 try: - model = request.model_name or self.config.default_model or "gpt-3.5-turbo" + model = request.model_name or self.config.default_model or OPENAI_GPT35_TURBO params = self._build_openai_params(request, model) response = await self.client.chat.completions.create(**params) return self._build_openai_success_response( @@ -387,7 +393,7 @@ async def is_available(self) -> bool: def get_available_models(self) -> List[str]: """Get available OpenAI models.""" - return ["gpt-4", "gpt-4-turbo", "gpt-3.5-turbo", "gpt-3.5-turbo-16k"] + return [OPENAI_GPT4, OPENAI_GPT4_TURBO, OPENAI_GPT35_TURBO, OPENAI_GPT35_TURBO_16K] class MockProvider(LLMProvider): @@ -515,7 +521,7 @@ def _load_default_configs(self) -> Dict[ProviderType, ProviderConfig]: provider_type=ProviderType.OPENAI, enabled=openai_enabled and bool(openai_api_key) and OPENAI_AVAILABLE, api_key=openai_api_key, - default_model="gpt-3.5-turbo", + default_model=OPENAI_GPT35_TURBO, priority=20, ) diff --git a/autobot-backend/llm_providers/anthropic_provider.py b/autobot-backend/llm_providers/anthropic_provider.py index eb9849d3d..aac849f2c 100644 --- a/autobot-backend/llm_providers/anthropic_provider.py +++ b/autobot-backend/llm_providers/anthropic_provider.py @@ -22,6 +22,14 @@ import time from typing import Any, AsyncIterator, Dict, List, Optional +from constants.model_constants import ( + ANTHROPIC_CLAUDE3_OPUS_DATED, + ANTHROPIC_CLAUDE35_HAIKU, + ANTHROPIC_CLAUDE_HAIKU4_5, + ANTHROPIC_CLAUDE_OPUS4_6, + ANTHROPIC_CLAUDE_SONNET4, + ANTHROPIC_CLAUDE_SONNET4_6, +) from llm_interface_pkg.models import LLMRequest, LLMResponse from llm_interface_pkg.types import ProviderType @@ -30,12 +38,12 @@ logger = logging.getLogger(__name__) _ANTHROPIC_MODELS = [ - "claude-opus-4-6", - "claude-sonnet-4-6", - "claude-sonnet-4-20250514", - "claude-haiku-4-5-20251001", - "claude-3-5-haiku-20241022", - "claude-3-opus-20240229", + ANTHROPIC_CLAUDE_OPUS4_6, + ANTHROPIC_CLAUDE_SONNET4_6, + ANTHROPIC_CLAUDE_SONNET4, + ANTHROPIC_CLAUDE_HAIKU4_5, + ANTHROPIC_CLAUDE35_HAIKU, + ANTHROPIC_CLAUDE3_OPUS_DATED, ] @@ -103,7 +111,7 @@ async def chat_completion(self, request: LLMRequest) -> LLMResponse: self._total_requests += 1 start = time.time() model = request.model_name or self._get_setting( - "default_model", "claude-sonnet-4-6" + "default_model", ANTHROPIC_CLAUDE_SONNET4_6 ) try: client = self._ensure_client() @@ -148,7 +156,7 @@ async def stream_completion(self, request: LLMRequest) -> AsyncIterator[str]: """Stream a chat completion from Anthropic, yielding text chunks.""" self._total_requests += 1 model = request.model_name or self._get_setting( - "default_model", "claude-sonnet-4-6" + "default_model", ANTHROPIC_CLAUDE_SONNET4_6 ) try: client = self._ensure_client() diff --git a/autobot-backend/llm_providers/openai_provider.py b/autobot-backend/llm_providers/openai_provider.py index 67d6c80b3..a65ac1d10 100644 --- a/autobot-backend/llm_providers/openai_provider.py +++ b/autobot-backend/llm_providers/openai_provider.py @@ -23,6 +23,14 @@ import time from typing import Any, AsyncIterator, Dict, List, Optional +from constants.model_constants import ( + OPENAI_GPT35_TURBO, + OPENAI_GPT4, + OPENAI_GPT4O, + OPENAI_GPT4O_MINI, + OPENAI_GPT4_TURBO, + OPENAI_O1_MINI, # used in _OPENAI_MODELS list +) from llm_interface_pkg.models import LLMRequest, LLMResponse from llm_interface_pkg.types import ProviderType @@ -31,13 +39,13 @@ logger = logging.getLogger(__name__) _OPENAI_MODELS = [ - "gpt-4o", - "gpt-4o-mini", - "gpt-4-turbo", - "gpt-4", - "gpt-3.5-turbo", - "o1-preview", - "o1-mini", + OPENAI_GPT4O, + OPENAI_GPT4O_MINI, + OPENAI_GPT4_TURBO, + OPENAI_GPT4, + OPENAI_GPT35_TURBO, + "o1-preview", # not yet in model_constants — preview variant + OPENAI_O1_MINI, ] @@ -99,7 +107,7 @@ async def chat_completion(self, request: LLMRequest) -> LLMResponse: """Execute a non-streaming chat completion via OpenAI.""" self._total_requests += 1 start = time.time() - model = request.model_name or self._get_setting("default_model", "gpt-4o-mini") + model = request.model_name or self._get_setting("default_model", OPENAI_GPT4O_MINI) try: client = self._ensure_client() params: Dict[str, Any] = { @@ -142,7 +150,7 @@ async def chat_completion(self, request: LLMRequest) -> LLMResponse: async def stream_completion(self, request: LLMRequest) -> AsyncIterator[str]: """Stream a chat completion from OpenAI, yielding text chunks.""" self._total_requests += 1 - model = request.model_name or self._get_setting("default_model", "gpt-4o-mini") + model = request.model_name or self._get_setting("default_model", OPENAI_GPT4O_MINI) try: client = self._ensure_client() params: Dict[str, Any] = { diff --git a/autobot-backend/models/settings.py b/autobot-backend/models/settings.py index b17a9e81e..f1c59881a 100644 --- a/autobot-backend/models/settings.py +++ b/autobot-backend/models/settings.py @@ -19,7 +19,7 @@ from pydantic import Field, field_validator from pydantic_settings import BaseSettings, SettingsConfigDict -from constants.model_constants import Models +from constants.model_constants import OPENAI_GPT35_TURBO, Models from constants.network_constants import NetworkConstants, ServiceURLs # Issue #380: Module-level tuples for validation constants @@ -52,7 +52,7 @@ class LLMSettings(BaseSettings): # OpenAI configuration (optional) openai_api_key: Optional[str] = Field(default=None, description="OpenAI API key") - openai_model: str = Field(default="gpt-3.5-turbo", description="OpenAI model") + openai_model: str = Field(default=OPENAI_GPT35_TURBO, description="OpenAI model") # HuggingFace configuration (optional) huggingface_api_key: Optional[str] = Field( diff --git a/autobot-backend/modern_ai_integration.py b/autobot-backend/modern_ai_integration.py index cbf7e3954..39f0c4ee3 100644 --- a/autobot-backend/modern_ai_integration.py +++ b/autobot-backend/modern_ai_integration.py @@ -19,6 +19,13 @@ from enum import Enum from typing import Any, Dict, List, Optional +from constants.model_constants import ( + ANTHROPIC_CLAUDE3_OPUS_DATED, + GOOGLE_GEMINI_PRO, + GOOGLE_GEMINI_PRO_VISION, + OPENAI_GPT4_TURBO_PREVIEW, + OPENAI_GPT4_VISION_PREVIEW, +) from memory import EnhancedMemoryManager, TaskPriority from task_execution_tracker import task_tracker from utils.service_registry import get_service_url @@ -252,7 +259,7 @@ def _build_openai_vision_response( return AIResponse( request_id=request.request_id, provider=self.config.provider, - model_name="gpt-4-vision-preview", + model_name=OPENAI_GPT4_VISION_PREVIEW, content=response.choices[0].message.content, usage={ "prompt_tokens": response.usage.prompt_tokens, @@ -289,7 +296,7 @@ async def analyze_image(self, request: AIRequest) -> AIResponse: messages.append({"role": "user", "content": content}) response = await self.client.chat.completions.create( - model="gpt-4-vision-preview", + model=OPENAI_GPT4_VISION_PREVIEW, messages=messages, max_tokens=request.max_tokens or 1000, temperature=request.temperature or self.config.temperature, @@ -399,7 +406,7 @@ def _build_anthropic_vision_response( return AIResponse( request_id=request.request_id, provider=self.config.provider, - model_name="claude-3-opus-20240229", + model_name=ANTHROPIC_CLAUDE3_OPUS_DATED, content=response.content[0].text, usage={ "prompt_tokens": response.usage.input_tokens, @@ -436,7 +443,7 @@ async def analyze_image(self, request: AIRequest) -> AIResponse: messages = [{"role": "user", "content": content}] response = await self.client.messages.create( - model="claude-3-opus-20240229", + model=ANTHROPIC_CLAUDE3_OPUS_DATED, max_tokens=request.max_tokens or 1000, temperature=request.temperature or self.config.temperature, system=request.system_message, @@ -559,7 +566,7 @@ def _build_image_analysis_response( return AIResponse( request_id=request.request_id, provider=self.config.provider, - model_name="gemini-pro-vision", + model_name=GOOGLE_GEMINI_PRO_VISION, content=response.text, usage={"prompt_tokens": 0, "completion_tokens": 0}, finish_reason="stop", @@ -585,7 +592,7 @@ async def analyze_image(self, request: AIRequest) -> AIResponse: try: start_time = time.time() - model = self.client.GenerativeModel("gemini-pro-vision") + model = self.client.GenerativeModel(GOOGLE_GEMINI_PRO_VISION) content = self._prepare_image_content(request) response = model.generate_content( @@ -717,7 +724,7 @@ def _create_openai_config(self) -> AIModelConfig: """Create OpenAI GPT-4V model configuration. Issue #620.""" return AIModelConfig( provider=AIProvider.OPENAI_GPT4V, - model_name="gpt-4-turbo-preview", + model_name=OPENAI_GPT4_TURBO_PREVIEW, capabilities=[ ModelCapability.TEXT_GENERATION, ModelCapability.IMAGE_ANALYSIS, @@ -742,7 +749,7 @@ def _create_anthropic_config(self) -> AIModelConfig: """Create Anthropic Claude model configuration. Issue #620.""" return AIModelConfig( provider=AIProvider.ANTHROPIC_CLAUDE, - model_name="claude-3-opus-20240229", + model_name=ANTHROPIC_CLAUDE3_OPUS_DATED, capabilities=[ ModelCapability.TEXT_GENERATION, ModelCapability.IMAGE_ANALYSIS, @@ -768,7 +775,7 @@ def _create_gemini_config(self) -> AIModelConfig: """Create Google Gemini model configuration. Issue #620.""" return AIModelConfig( provider=AIProvider.GOOGLE_GEMINI, - model_name="gemini-pro", + model_name=GOOGLE_GEMINI_PRO, capabilities=[ ModelCapability.TEXT_GENERATION, ModelCapability.IMAGE_ANALYSIS, diff --git a/autobot-backend/services/analytics_service.py b/autobot-backend/services/analytics_service.py index 5fa639060..c84d17883 100644 --- a/autobot-backend/services/analytics_service.py +++ b/autobot-backend/services/analytics_service.py @@ -24,6 +24,10 @@ from typing import Any, Dict, List, Optional from autobot_shared.redis_client import RedisDatabase, get_redis_client +from constants.model_constants import ( + EXPENSIVE_MODEL_MARKER_GPT4, + EXPENSIVE_MODEL_MARKER_OPUS, +) from services.agent_analytics import AgentAnalytics, get_agent_analytics from services.llm_cost_tracker import LLMCostTracker, get_cost_tracker from services.user_behavior_analytics import ( @@ -588,7 +592,7 @@ def _check_model_token_optimizations( call_count = data.get("call_count", 0) if cost > 10: model_lower = model.lower() - if "opus" in model_lower or "gpt-4" in model_lower: + if EXPENSIVE_MODEL_MARKER_OPUS in model_lower or EXPENSIVE_MODEL_MARKER_GPT4 in model_lower: opts.append( ResourceOptimization( id=f"model-substitute-{model[:20]}", diff --git a/autobot-backend/services/autoresearch/config.py b/autobot-backend/services/autoresearch/config.py index c22efd916..46763393c 100644 --- a/autobot-backend/services/autoresearch/config.py +++ b/autobot-backend/services/autoresearch/config.py @@ -13,6 +13,7 @@ from dataclasses import dataclass, field from pathlib import Path from typing import Optional +from constants.model_constants import ANTHROPIC_CLAUDE_SONNET4_6 @dataclass @@ -108,7 +109,7 @@ class AutoResearchConfig: ) meta_agent_llm_model: str = field( default_factory=lambda: os.getenv( - "AUTOBOT_META_AGENT_LLM_MODEL", "claude-sonnet-4-6" + "AUTOBOT_META_AGENT_LLM_MODEL", ANTHROPIC_CLAUDE_SONNET4_6 ) ) meta_agent_test_timeout: int = field( diff --git a/autobot-backend/services/llm_cost_tracker.py b/autobot-backend/services/llm_cost_tracker.py index fccbc02b5..446c142a4 100644 --- a/autobot-backend/services/llm_cost_tracker.py +++ b/autobot-backend/services/llm_cost_tracker.py @@ -23,6 +23,27 @@ from typing import Any, Dict, List, Optional from autobot_shared.redis_client import RedisDatabase, get_redis_client +from constants.model_constants import ( + ANTHROPIC_CLAUDE35_HAIKU, + ANTHROPIC_CLAUDE_HAIKU4_5, + ANTHROPIC_CLAUDE_OPUS4, + ANTHROPIC_CLAUDE_SONNET4, + DEEPSEEK_R1_API, + DEEPSEEK_V3, + GOOGLE_GEMINI20_FLASH, + GOOGLE_GEMINI25_PRO, + GOOGLE_GEMINI15_PRO, + MODEL_PRICING_PER_1M_TOKENS, + OPENAI_GPT35_TURBO, + OPENAI_GPT41, + OPENAI_GPT4O, + OPENAI_GPT4_TURBO, + OPENAI_O1, + OPENAI_O1_MINI, + OPENAI_O3, + OPENAI_O3_MINI, + OPENAI_O4_MINI, +) logger = logging.getLogger(__name__) @@ -43,63 +64,10 @@ class LLMProvider(str, Enum): LOCAL = "local" -# Model pricing per 1M tokens (USD) - Updated 2026-03 (#1961) -# Format: {"input": price_per_1M_input_tokens, "output": price_per_1M_output_tokens} -# Pricing source: provider published rates as of 2026-03. -MODEL_PRICING: Dict[str, Dict[str, float]] = { - # Anthropic Claude 4.x (2025-2026) - "claude-opus-4-20250514": {"input": 15.00, "output": 75.00}, - "claude-haiku-4-5-20251001": {"input": 0.80, "output": 4.00}, - # Anthropic Claude 3.x / Sonnet 4 - "claude-sonnet-4-20250514": {"input": 3.00, "output": 15.00}, - "claude-3-5-sonnet-20241022": {"input": 3.00, "output": 15.00}, - "claude-3-5-haiku-20241022": {"input": 0.80, "output": 4.00}, - "claude-3-opus-20240229": {"input": 15.00, "output": 75.00}, - "claude-3-sonnet-20240229": {"input": 3.00, "output": 15.00}, - "claude-3-haiku-20240307": {"input": 0.25, "output": 1.25}, - # OpenAI GPT-4.1 family (2025) - "gpt-4.1": {"input": 2.00, "output": 8.00}, - "gpt-4.1-mini": {"input": 0.40, "output": 1.60}, - "gpt-4.1-nano": {"input": 0.10, "output": 0.40}, - # OpenAI GPT-4o / GPT-4 / GPT-3.5 - "gpt-4o": {"input": 2.50, "output": 10.00}, - "gpt-4o-mini": {"input": 0.15, "output": 0.60}, - "gpt-4-turbo": {"input": 10.00, "output": 30.00}, - "gpt-4": {"input": 30.00, "output": 60.00}, - "gpt-3.5-turbo": {"input": 0.50, "output": 1.50}, - # OpenAI reasoning models - "o1": {"input": 15.00, "output": 60.00}, - "o1-mini": {"input": 3.00, "output": 12.00}, - "o3": {"input": 2.00, "output": 8.00}, - "o3-mini": {"input": 1.10, "output": 4.40}, - "o4-mini": {"input": 1.10, "output": 4.40}, - # Google Gemini 2.5 (2025-2026) - "gemini-2.5-pro": {"input": 1.25, "output": 10.00}, - "gemini-2.5-flash": {"input": 0.15, "output": 0.60}, - # Google Gemini 2.0 / 1.5 - "gemini-2.0-flash": {"input": 0.10, "output": 0.40}, - "gemini-1.5-pro": {"input": 1.25, "output": 5.00}, - "gemini-1.5-flash": {"input": 0.075, "output": 0.30}, - # DeepSeek hosted API models (2025) - "deepseek-v3": {"input": 0.27, "output": 1.10}, - "deepseek-r1-api": {"input": 0.55, "output": 2.19}, - # Local/Ollama models (free) - "llama3": {"input": 0.0, "output": 0.0}, - "llama3.1": {"input": 0.0, "output": 0.0}, - "llama3.2": {"input": 0.0, "output": 0.0}, - "llama3.3": {"input": 0.0, "output": 0.0}, - "mistral": {"input": 0.0, "output": 0.0}, - "mixtral": {"input": 0.0, "output": 0.0}, - "codellama": {"input": 0.0, "output": 0.0}, - "qwen2.5": {"input": 0.0, "output": 0.0}, - "qwen3": {"input": 0.0, "output": 0.0}, - "deepseek-coder": {"input": 0.0, "output": 0.0}, - "deepseek-r1": {"input": 0.0, "output": 0.0}, - "phi3": {"input": 0.0, "output": 0.0}, - "phi4": {"input": 0.0, "output": 0.0}, - "gemma2": {"input": 0.0, "output": 0.0}, - "gemma3": {"input": 0.0, "output": 0.0}, -} +# Model pricing per 1M tokens (USD) - single source of truth in +# constants/model_constants.py (#3528). Update PRICING_VERSION above when +# prices change. +MODEL_PRICING: Dict[str, Dict[str, float]] = MODEL_PRICING_PER_1M_TOKENS def _check_pricing_staleness() -> None: @@ -264,25 +232,25 @@ async def get_redis(self): # Pattern-based pricing fallbacks for unknown models (#1961). # Ordered from most specific to least specific. _FALLBACK_PATTERNS: List[tuple] = [ - ("claude-opus", "claude-opus-4-20250514"), - ("claude-sonnet", "claude-sonnet-4-20250514"), - ("claude-haiku", "claude-haiku-4-5-20251001"), - ("claude", "claude-sonnet-4-20250514"), - ("gpt-4o", "gpt-4o"), - ("gpt-4.1", "gpt-4.1"), - ("gpt-4", "gpt-4-turbo"), - ("gpt-3.5", "gpt-3.5-turbo"), - ("o1-mini", "o1-mini"), - ("o3-mini", "o3-mini"), - ("o4-mini", "o4-mini"), - ("o1", "o1"), - ("o3", "o3"), - ("gemini-2.5", "gemini-2.5-pro"), - ("gemini-2.0", "gemini-2.0-flash"), - ("gemini-1.5", "gemini-1.5-pro"), - ("gemini", "gemini-2.0-flash"), - ("deepseek-v3", "deepseek-v3"), - ("deepseek-r1", "deepseek-r1-api"), + ("claude-opus", ANTHROPIC_CLAUDE_OPUS4), + ("claude-sonnet", ANTHROPIC_CLAUDE_SONNET4), + ("claude-haiku", ANTHROPIC_CLAUDE_HAIKU4_5), + ("claude", ANTHROPIC_CLAUDE_SONNET4), + ("gpt-4o", OPENAI_GPT4O), + ("gpt-4.1", OPENAI_GPT41), + ("gpt-4", OPENAI_GPT4_TURBO), + ("gpt-3.5", OPENAI_GPT35_TURBO), + ("o1-mini", OPENAI_O1_MINI), + ("o3-mini", OPENAI_O3_MINI), + ("o4-mini", OPENAI_O4_MINI), + ("o1", OPENAI_O1), + ("o3", OPENAI_O3), + ("gemini-2.5", GOOGLE_GEMINI25_PRO), + ("gemini-2.0", GOOGLE_GEMINI20_FLASH), + ("gemini-1.5", GOOGLE_GEMINI15_PRO), + ("gemini", GOOGLE_GEMINI20_FLASH), + ("deepseek-v3", DEEPSEEK_V3), + ("deepseek-r1", DEEPSEEK_R1_API), ] def _estimate_pricing_by_pattern( diff --git a/autobot-backend/services/provider_health/providers.py b/autobot-backend/services/provider_health/providers.py index 472177dfe..010080a97 100644 --- a/autobot-backend/services/provider_health/providers.py +++ b/autobot-backend/services/provider_health/providers.py @@ -15,6 +15,7 @@ from autobot_shared.ssot_config import get_ollama_url from .base import BaseProviderHealth, ProviderHealthResult, ProviderStatus +from constants.model_constants import ANTHROPIC_CLAUDE3_HAIKU_DATED logger = logging.getLogger(__name__) @@ -276,7 +277,7 @@ async def check_health(self, timeout: float = 5.0) -> ProviderHealthResult: # Minimal validation payload (count_tokens is free) payload = { - "model": "claude-3-haiku-20240307", + "model": ANTHROPIC_CLAUDE3_HAIKU_DATED, "messages": [{"role": "user", "content": "test"}], } diff --git a/autobot-backend/tests/services/test_llm_cost_tracker.py b/autobot-backend/tests/services/test_llm_cost_tracker.py index 97306decd..4af614641 100644 --- a/autobot-backend/tests/services/test_llm_cost_tracker.py +++ b/autobot-backend/tests/services/test_llm_cost_tracker.py @@ -8,6 +8,38 @@ import pytest +from constants.model_constants import ( + ANTHROPIC_CLAUDE35_SONNET, + ANTHROPIC_CLAUDE_HAIKU4_5, + ANTHROPIC_CLAUDE_OPUS4, + ANTHROPIC_CLAUDE_SONNET4, + GOOGLE_GEMINI20_FLASH, + GOOGLE_GEMINI25_FLASH, + GOOGLE_GEMINI25_PRO, + LOCAL_CODELLAMA, + LOCAL_DEEPSEEK_CODER, + LOCAL_DEEPSEEK_R1, + LOCAL_GEMMA2, + LOCAL_GEMMA3, + LOCAL_LLAMA3, + LOCAL_LLAMA31, + LOCAL_LLAMA32, + LOCAL_LLAMA33, + LOCAL_MISTRAL, + LOCAL_MIXTRAL, + LOCAL_PHI3, + LOCAL_PHI4, + LOCAL_QWEN25, + LOCAL_QWEN3, + OPENAI_GPT41, + OPENAI_GPT41_MINI, + OPENAI_GPT41_NANO, + OPENAI_GPT4O, + OPENAI_GPT4_TURBO, + OPENAI_O3, + OPENAI_O3_MINI, + OPENAI_O4_MINI, +) from services.llm_cost_tracker import ( MODEL_PRICING, PRICING_STALENESS_DAYS, @@ -22,42 +54,42 @@ class TestModelPricingCompleteness: REQUIRED_MODELS = [ # Anthropic Claude 4.x - "claude-opus-4-20250514", - "claude-sonnet-4-20250514", - "claude-haiku-4-5-20251001", + ANTHROPIC_CLAUDE_OPUS4, + ANTHROPIC_CLAUDE_SONNET4, + ANTHROPIC_CLAUDE_HAIKU4_5, # OpenAI GPT-4.1 family - "gpt-4.1", - "gpt-4.1-mini", - "gpt-4.1-nano", + OPENAI_GPT41, + OPENAI_GPT41_MINI, + OPENAI_GPT41_NANO, # OpenAI reasoning - "o3", - "o3-mini", - "o4-mini", + OPENAI_O3, + OPENAI_O3_MINI, + OPENAI_O4_MINI, # Google Gemini 2.5 - "gemini-2.5-pro", - "gemini-2.5-flash", + GOOGLE_GEMINI25_PRO, + GOOGLE_GEMINI25_FLASH, # Existing baseline models - "gpt-4o", - "claude-3-5-sonnet-20241022", - "gemini-2.0-flash", + OPENAI_GPT4O, + ANTHROPIC_CLAUDE35_SONNET, + GOOGLE_GEMINI20_FLASH, ] LOCAL_MODELS = [ - "llama3", - "llama3.1", - "llama3.2", - "llama3.3", - "mistral", - "mixtral", - "codellama", - "qwen2.5", - "qwen3", - "deepseek-coder", - "deepseek-r1", - "phi3", - "phi4", - "gemma2", - "gemma3", + LOCAL_LLAMA3, + LOCAL_LLAMA31, + LOCAL_LLAMA32, + LOCAL_LLAMA33, + LOCAL_MISTRAL, + LOCAL_MIXTRAL, + LOCAL_CODELLAMA, + LOCAL_QWEN25, + LOCAL_QWEN3, + LOCAL_DEEPSEEK_CODER, + LOCAL_DEEPSEEK_R1, + LOCAL_PHI3, + LOCAL_PHI4, + LOCAL_GEMMA2, + LOCAL_GEMMA3, ] @pytest.mark.parametrize("model", REQUIRED_MODELS) @@ -100,25 +132,27 @@ def test_paid_models_have_positive_output_price(self): def test_claude_opus_4_more_expensive_than_haiku(self): """Opus tier should cost more than Haiku tier.""" - opus = MODEL_PRICING["claude-opus-4-20250514"]["output"] - haiku = MODEL_PRICING["claude-haiku-4-5-20251001"]["output"] + opus = MODEL_PRICING[ANTHROPIC_CLAUDE_OPUS4]["output"] + haiku = MODEL_PRICING[ANTHROPIC_CLAUDE_HAIKU4_5]["output"] assert opus > haiku, "Claude Opus 4 output should cost more than Haiku 4.5" def test_gpt41_cheaper_than_gpt4_turbo(self): """GPT-4.1 should be cheaper than GPT-4-turbo.""" - gpt41 = MODEL_PRICING["gpt-4.1"]["input"] - turbo = MODEL_PRICING["gpt-4-turbo"]["input"] + gpt41 = MODEL_PRICING[OPENAI_GPT41]["input"] + turbo = MODEL_PRICING[OPENAI_GPT4_TURBO]["input"] assert gpt41 < turbo, "GPT-4.1 input should cost less than GPT-4-turbo" def test_o3_more_expensive_than_o3_mini(self): """o3 reasoning should cost more than o3-mini.""" - o3 = MODEL_PRICING["o3"]["input"] - o3_mini = MODEL_PRICING["o3-mini"]["input"] + o3 = MODEL_PRICING[OPENAI_O3]["input"] + o3_mini = MODEL_PRICING[OPENAI_O3_MINI]["input"] assert o3 >= o3_mini, "o3 input should cost at least as much as o3-mini" def test_deepseek_api_models_have_positive_price(self): """DeepSeek hosted API models should have a positive price.""" - for model in ("deepseek-v3", "deepseek-r1-api"): + from constants.model_constants import DEEPSEEK_R1_API, DEEPSEEK_V3 + + for model in (DEEPSEEK_V3, DEEPSEEK_R1_API): assert ( MODEL_PRICING[model]["input"] > 0 ), f"{model} should have positive input price"