Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 11 additions & 23 deletions autobot-backend/api/analytics_llm_patterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@
from redis.exceptions import RedisError

from autobot_shared.redis_client import RedisDatabase, get_redis_client
from constants.model_constants import (
EXPENSIVE_MODEL_MARKER_GPT4,
EXPENSIVE_MODEL_MARKER_OPUS,
MODEL_COSTS_PER_1M_TOKENS,
)

# Prefix provided by analytics_routers.py registry (#1032)
router = APIRouter(tags=["llm-patterns", "analytics"])
Expand All @@ -42,7 +47,7 @@
# =============================================================================

# O(1) lookup optimization constant (Issue #326)
EXPENSIVE_MODELS = {"opus", "gpt-4"}
EXPENSIVE_MODELS = {EXPENSIVE_MODEL_MARKER_OPUS, EXPENSIVE_MODEL_MARKER_GPT4}


class PromptCategory(str, Enum):
Expand Down Expand Up @@ -86,26 +91,9 @@ class CostLevel(str, Enum):
}


# Model costs per 1M tokens (USD)
MODEL_COSTS = {
# Anthropic
"claude-3-opus": {"input": 15.00, "output": 75.00},
"claude-3-sonnet": {"input": 3.00, "output": 15.00},
"claude-3-haiku": {"input": 0.25, "output": 1.25},
"claude-sonnet-4": {"input": 3.00, "output": 15.00},
# OpenAI
"gpt-4o": {"input": 2.50, "output": 10.00},
"gpt-4o-mini": {"input": 0.15, "output": 0.60},
"gpt-4-turbo": {"input": 10.00, "output": 30.00},
"gpt-3.5-turbo": {"input": 0.50, "output": 1.50},
# Google
"gemini-1.5-pro": {"input": 1.25, "output": 5.00},
"gemini-1.5-flash": {"input": 0.075, "output": 0.30},
# Local (free)
"llama3": {"input": 0.0, "output": 0.0},
"mistral": {"input": 0.0, "output": 0.0},
"codellama": {"input": 0.0, "output": 0.0},
}
# Model costs per 1M tokens (USD) — single source of truth in
# constants/model_constants.MODEL_COSTS_PER_1M_TOKENS (#3528).
MODEL_COSTS = MODEL_COSTS_PER_1M_TOKENS

# Pattern detection rules for prompt categorization
PROMPT_PATTERNS = {
Expand Down Expand Up @@ -418,7 +406,7 @@ def _check_model_efficiency(

Issue #620: Extracted from analyze_prompt.
"""
if "opus" in model.lower() or "gpt-4" in model.lower():
if EXPENSIVE_MODEL_MARKER_OPUS in model.lower() or EXPENSIVE_MODEL_MARKER_GPT4 in model.lower():
if category in SIMPLE_PROMPT_CATEGORIES: # O(1) lookup (Issue #326)
recommendations.append(
"Consider using a smaller model (Haiku/GPT-3.5) for this task type"
Expand Down Expand Up @@ -451,7 +439,7 @@ async def analyze_prompt(
"category": category.value,
"estimated_tokens": int(token_estimate),
"estimated_cost": self._calculate_cost(
model or "gpt-4o", int(token_estimate), int(token_estimate * 1.5)
model or OPENAI_GPT4O, int(token_estimate), int(token_estimate * 1.5)
),
"issues": issues,
"recommendations": recommendations,
Expand Down
3 changes: 2 additions & 1 deletion autobot-backend/api/llm_providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ async def switch_llm_provider(
):
"""Switch active LLM provider at runtime.

Body: {"provider": "openai", "model": "gpt-4", "validate": true}
Body: {"provider": "openai", "model": "<model-name>", "validate": true}
See ModelConstants.DEFAULT_OPENAI_MODEL for the default OpenAI model.
"""
provider = switch_data.get("provider")
if not provider:
Expand Down
2 changes: 1 addition & 1 deletion autobot-backend/chat_history/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ class ChatHistoryManager(
# Get messages with model-aware limits
messages = await manager.get_session_messages(
session_id=session["id"],
model_name="gpt-4"
model_name="gpt-4" # docstring example — use ModelConstants.DEFAULT_OPENAI_MODEL
)

# List all sessions
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
UsagePattern,
)
from code_intelligence.llm_pattern_analysis.types import UsagePatternType
from constants.model_constants import MODEL_PRICING_PER_1K_TOKENS, OPENAI_GPT35_TURBO

# =============================================================================
# Token Tracker
Expand All @@ -34,17 +35,9 @@ class TokenTracker:
to identify optimization opportunities.
"""

# Token cost estimates per 1K tokens (based on common pricing)
DEFAULT_COSTS = {
"gpt-4": {"prompt": 0.03, "completion": 0.06},
"gpt-4-turbo": {"prompt": 0.01, "completion": 0.03},
"gpt-3.5-turbo": {"prompt": 0.0015, "completion": 0.002},
"claude-3-opus": {"prompt": 0.015, "completion": 0.075},
"claude-3-sonnet": {"prompt": 0.003, "completion": 0.015},
"claude-3-haiku": {"prompt": 0.00025, "completion": 0.00125},
"ollama": {"prompt": 0.0, "completion": 0.0}, # Local, no API cost
"default": {"prompt": 0.001, "completion": 0.002},
}
# Token cost estimates per 1K tokens — single source of truth in
# constants/model_constants.MODEL_PRICING_PER_1K_TOKENS (#3528).
DEFAULT_COSTS = MODEL_PRICING_PER_1K_TOKENS

def __init__(self):
"""Initialize the token tracker."""
Expand Down Expand Up @@ -155,18 +148,9 @@ class CostCalculator:
Provides cost projections and optimization potential analysis.
"""

# Model pricing per 1K tokens (USD)
MODEL_PRICING = {
"gpt-4": {"prompt": 0.03, "completion": 0.06},
"gpt-4-turbo": {"prompt": 0.01, "completion": 0.03},
"gpt-4o": {"prompt": 0.005, "completion": 0.015},
"gpt-3.5-turbo": {"prompt": 0.0015, "completion": 0.002},
"claude-3-opus": {"prompt": 0.015, "completion": 0.075},
"claude-3-sonnet": {"prompt": 0.003, "completion": 0.015},
"claude-3-haiku": {"prompt": 0.00025, "completion": 0.00125},
"claude-sonnet-4": {"prompt": 0.003, "completion": 0.015},
"ollama": {"prompt": 0.0, "completion": 0.0},
}
# Model pricing per 1K tokens — single source of truth in
# constants/model_constants.MODEL_PRICING_PER_1K_TOKENS (#3528).
MODEL_PRICING = MODEL_PRICING_PER_1K_TOKENS

@classmethod
def _estimate_avg_tokens(cls, model_pats: List[UsagePattern]) -> tuple:
Expand Down Expand Up @@ -264,7 +248,7 @@ def estimate_costs(

for model, model_pats in model_patterns.items():
pricing = cls.MODEL_PRICING.get(
model, cls.MODEL_PRICING.get("gpt-3.5-turbo")
model, cls.MODEL_PRICING.get(OPENAI_GPT35_TURBO)
)
daily_calls = len(model_pats) * daily_call_multiplier
avg_prompt, avg_completion = cls._estimate_avg_tokens(model_pats)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@
PromptIssueType,
UsagePatternType,
)
from constants.model_constants import (
ANTHROPIC_CLAUDE3_OPUS,
EXPENSIVE_MODEL_MARKER_GPT4,
EXPENSIVE_MODEL_MARKER_OPUS,
)

# =============================================================================
# Simple Data Classes
Expand Down Expand Up @@ -329,7 +334,11 @@ def get_monthly_savings(self) -> float:

def is_expensive_model(self) -> bool:
"""Check if this is an expensive model."""
expensive_markers = ["gpt-4", "opus", "claude-3-opus"]
expensive_markers = [
EXPENSIVE_MODEL_MARKER_GPT4,
EXPENSIVE_MODEL_MARKER_OPUS,
ANTHROPIC_CLAUDE3_OPUS,
]
return any(marker in self.model.lower() for marker in expensive_markers)


Expand Down
204 changes: 200 additions & 4 deletions autobot-backend/constants/model_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
import os
from dataclasses import dataclass
from functools import lru_cache
from typing import Optional
from typing import Dict, Optional

from autobot_shared.ssot_config import CLASSIFICATION_MODEL as SSOT_CLASSIFICATION_MODEL
from autobot_shared.ssot_config import (
Expand All @@ -56,9 +56,205 @@
# Change models in autobot_shared/ssot_config.py to change the entire system.

FALLBACK_MODEL = DEFAULT_LLM_MODEL
FALLBACK_OPENAI_MODEL = "gpt-4"
FALLBACK_ANTHROPIC_MODEL = "claude-3-5-sonnet-20241022"
FALLBACK_GOOGLE_MODEL = "gemini-pro"

# =============================================================================
# EXPLICIT MODEL NAME CONSTANTS (#3528)
# =============================================================================
# Named constants for every model string used anywhere in the codebase.
# Add new entries here rather than hardcoding strings in service files.

# OpenAI — preview/reasoning aliases without dated suffix
OPENAI_O1_PREVIEW = "o1-preview"

# OpenAI — GPT-4 family
OPENAI_GPT4 = "gpt-4"
OPENAI_GPT4_TURBO = "gpt-4-turbo"
OPENAI_GPT4O = "gpt-4o"
OPENAI_GPT4O_MINI = "gpt-4o-mini"
OPENAI_GPT4_VISION_PREVIEW = "gpt-4-vision-preview"
OPENAI_GPT4_TURBO_PREVIEW = "gpt-4-turbo-preview"
# OpenAI — GPT-3.5 family
OPENAI_GPT35_TURBO = "gpt-3.5-turbo"
OPENAI_GPT35_TURBO_16K = "gpt-3.5-turbo-16k"
# OpenAI — reasoning models
OPENAI_O1 = "o1"
OPENAI_O1_MINI = "o1-mini"
OPENAI_O3 = "o3"
OPENAI_O3_MINI = "o3-mini"
OPENAI_O4_MINI = "o4-mini"
# OpenAI — GPT-4.1 family (2025)
OPENAI_GPT41 = "gpt-4.1"
OPENAI_GPT41_MINI = "gpt-4.1-mini"
OPENAI_GPT41_NANO = "gpt-4.1-nano"

# Anthropic — Claude 4.x
ANTHROPIC_CLAUDE_OPUS4 = "claude-opus-4-20250514"
ANTHROPIC_CLAUDE_HAIKU4_5 = "claude-haiku-4-5-20251001"
ANTHROPIC_CLAUDE_SONNET4 = "claude-sonnet-4-20250514"
# Anthropic — Claude 3.x / Sonnet 4
ANTHROPIC_CLAUDE35_SONNET = "claude-3-5-sonnet-20241022"
ANTHROPIC_CLAUDE35_HAIKU = "claude-3-5-haiku-20241022"
ANTHROPIC_CLAUDE3_OPUS_DATED = "claude-3-opus-20240229"
ANTHROPIC_CLAUDE3_SONNET_DATED = "claude-3-sonnet-20240229"
ANTHROPIC_CLAUDE3_HAIKU_DATED = "claude-3-haiku-20240307"
# Anthropic — short-form names used in analytics/cost matching
ANTHROPIC_CLAUDE3_OPUS = "claude-3-opus"
ANTHROPIC_CLAUDE3_SONNET = "claude-3-sonnet"
ANTHROPIC_CLAUDE3_HAIKU = "claude-3-haiku"
ANTHROPIC_CLAUDE_SONNET4_SHORT = "claude-sonnet-4"
# Anthropic — release aliases without dated suffix (latest stable pointers)
ANTHROPIC_CLAUDE_SONNET4_6 = "claude-sonnet-4-6"
ANTHROPIC_CLAUDE_OPUS4_6 = "claude-opus-4-6"

# Google — Gemini 2.5
GOOGLE_GEMINI25_PRO = "gemini-2.5-pro"
GOOGLE_GEMINI25_FLASH = "gemini-2.5-flash"
# Google — Gemini 2.0 / 1.5
GOOGLE_GEMINI20_FLASH = "gemini-2.0-flash"
GOOGLE_GEMINI15_PRO = "gemini-1.5-pro"
GOOGLE_GEMINI15_FLASH = "gemini-1.5-flash"
# Google — legacy models
GOOGLE_GEMINI_PRO = "gemini-pro" # plain base model (distinct from vision)
GOOGLE_GEMINI_PRO_VISION = "gemini-pro-vision"

# DeepSeek hosted API
DEEPSEEK_V3 = "deepseek-v3"
DEEPSEEK_R1_API = "deepseek-r1-api"

# Local / Ollama free models
LOCAL_LLAMA3 = "llama3"
LOCAL_LLAMA31 = "llama3.1"
LOCAL_LLAMA32 = "llama3.2"
LOCAL_LLAMA33 = "llama3.3"
LOCAL_MISTRAL = "mistral"
LOCAL_MIXTRAL = "mixtral"
LOCAL_CODELLAMA = "codellama"
LOCAL_QWEN25 = "qwen2.5"
LOCAL_QWEN3 = "qwen3"
LOCAL_DEEPSEEK_CODER = "deepseek-coder"
LOCAL_DEEPSEEK_R1 = "deepseek-r1"
LOCAL_PHI3 = "phi3"
LOCAL_PHI4 = "phi4"
LOCAL_GEMMA2 = "gemma2"
LOCAL_GEMMA3 = "gemma3"

# Substring markers used by cost/efficiency heuristics (#3528)
# These are substrings matched with ``in model.lower()``, not full model IDs.
EXPENSIVE_MODEL_MARKER_OPUS = "opus"
EXPENSIVE_MODEL_MARKER_GPT4 = "gpt-4"

# Fallback model aliases — defined after constants to reference them directly
FALLBACK_OPENAI_MODEL = OPENAI_GPT4
FALLBACK_ANTHROPIC_MODEL = ANTHROPIC_CLAUDE35_SONNET
FALLBACK_GOOGLE_MODEL = GOOGLE_GEMINI_PRO

# =============================================================================
# MODEL_PRICING — SINGLE SOURCE OF TRUTH (#3528)
# =============================================================================
# Two formats are needed by different consumers; both derive from the same data.
#
# MODEL_PRICING_PER_1M_TOKENS — USD per 1 million tokens (llm_cost_tracker)
# keys: "input", "output"
#
# MODEL_PRICING_PER_1K_TOKENS — USD per 1 thousand tokens (calculators.py,
# CostCalculator) keys: "prompt", "completion"
#
# Pricing source: provider published rates as of 2026-03.
# Update PRICING_VERSION in llm_cost_tracker.py when editing these tables.

MODEL_PRICING_PER_1M_TOKENS: Dict[str, Dict[str, float]] = {
# Anthropic Claude 4.x (2025-2026)
ANTHROPIC_CLAUDE_OPUS4: {"input": 15.00, "output": 75.00},
ANTHROPIC_CLAUDE_HAIKU4_5: {"input": 0.80, "output": 4.00},
# Anthropic Claude 3.x / Sonnet 4
ANTHROPIC_CLAUDE_SONNET4: {"input": 3.00, "output": 15.00},
ANTHROPIC_CLAUDE35_SONNET: {"input": 3.00, "output": 15.00},
ANTHROPIC_CLAUDE35_HAIKU: {"input": 0.80, "output": 4.00},
ANTHROPIC_CLAUDE3_OPUS_DATED: {"input": 15.00, "output": 75.00},
ANTHROPIC_CLAUDE3_SONNET_DATED: {"input": 3.00, "output": 15.00},
ANTHROPIC_CLAUDE3_HAIKU_DATED: {"input": 0.25, "output": 1.25},
# OpenAI GPT-4.1 family (2025)
OPENAI_GPT41: {"input": 2.00, "output": 8.00},
OPENAI_GPT41_MINI: {"input": 0.40, "output": 1.60},
OPENAI_GPT41_NANO: {"input": 0.10, "output": 0.40},
# OpenAI GPT-4o / GPT-4 / GPT-3.5
OPENAI_GPT4O: {"input": 2.50, "output": 10.00},
OPENAI_GPT4O_MINI: {"input": 0.15, "output": 0.60},
OPENAI_GPT4_TURBO: {"input": 10.00, "output": 30.00},
OPENAI_GPT4: {"input": 30.00, "output": 60.00},
OPENAI_GPT35_TURBO: {"input": 0.50, "output": 1.50},
# OpenAI reasoning models
OPENAI_O1: {"input": 15.00, "output": 60.00},
OPENAI_O1_MINI: {"input": 3.00, "output": 12.00},
OPENAI_O3: {"input": 2.00, "output": 8.00},
OPENAI_O3_MINI: {"input": 1.10, "output": 4.40},
OPENAI_O4_MINI: {"input": 1.10, "output": 4.40},
# Google Gemini 2.5 (2025-2026)
GOOGLE_GEMINI25_PRO: {"input": 1.25, "output": 10.00},
GOOGLE_GEMINI25_FLASH: {"input": 0.15, "output": 0.60},
# Google Gemini 2.0 / 1.5
GOOGLE_GEMINI20_FLASH: {"input": 0.10, "output": 0.40},
GOOGLE_GEMINI15_PRO: {"input": 1.25, "output": 5.00},
GOOGLE_GEMINI15_FLASH: {"input": 0.075, "output": 0.30},
# DeepSeek hosted API models (2025)
DEEPSEEK_V3: {"input": 0.27, "output": 1.10},
DEEPSEEK_R1_API: {"input": 0.55, "output": 2.19},
# Local/Ollama models (free)
LOCAL_LLAMA3: {"input": 0.0, "output": 0.0},
LOCAL_LLAMA31: {"input": 0.0, "output": 0.0},
LOCAL_LLAMA32: {"input": 0.0, "output": 0.0},
LOCAL_LLAMA33: {"input": 0.0, "output": 0.0},
LOCAL_MISTRAL: {"input": 0.0, "output": 0.0},
LOCAL_MIXTRAL: {"input": 0.0, "output": 0.0},
LOCAL_CODELLAMA: {"input": 0.0, "output": 0.0},
LOCAL_QWEN25: {"input": 0.0, "output": 0.0},
LOCAL_QWEN3: {"input": 0.0, "output": 0.0},
LOCAL_DEEPSEEK_CODER: {"input": 0.0, "output": 0.0},
LOCAL_DEEPSEEK_R1: {"input": 0.0, "output": 0.0},
LOCAL_PHI3: {"input": 0.0, "output": 0.0},
LOCAL_PHI4: {"input": 0.0, "output": 0.0},
LOCAL_GEMMA2: {"input": 0.0, "output": 0.0},
LOCAL_GEMMA3: {"input": 0.0, "output": 0.0},
}

# Per-1K token pricing used by TokenTracker / CostCalculator in
# code_intelligence/llm_pattern_analysis/calculators.py (#3528).
# Values are derived from MODEL_PRICING_PER_1M_TOKENS ÷ 1000.
MODEL_PRICING_PER_1K_TOKENS: Dict[str, Dict[str, float]] = {
OPENAI_GPT4: {"prompt": 0.03, "completion": 0.06},
OPENAI_GPT4_TURBO: {"prompt": 0.01, "completion": 0.03},
OPENAI_GPT4O: {"prompt": 0.005, "completion": 0.015},
OPENAI_GPT35_TURBO: {"prompt": 0.0015, "completion": 0.002},
ANTHROPIC_CLAUDE3_OPUS: {"prompt": 0.015, "completion": 0.075},
ANTHROPIC_CLAUDE3_SONNET: {"prompt": 0.003, "completion": 0.015},
ANTHROPIC_CLAUDE3_HAIKU: {"prompt": 0.00025, "completion": 0.00125},
ANTHROPIC_CLAUDE_SONNET4_SHORT: {"prompt": 0.003, "completion": 0.015},
"ollama": {"prompt": 0.0, "completion": 0.0}, # Local, no API cost
"default": {"prompt": 0.001, "completion": 0.002},
}

# Per-1M token cost table used by analytics_llm_patterns.py (#3528).
# Keys use short-form names to match partial model identifiers submitted by
# clients (e.g. "claude-3-opus" instead of the full dated variant).
MODEL_COSTS_PER_1M_TOKENS: Dict[str, Dict[str, float]] = {
# Anthropic (short-form names for analytics matching)
ANTHROPIC_CLAUDE3_OPUS: {"input": 15.00, "output": 75.00},
ANTHROPIC_CLAUDE3_SONNET: {"input": 3.00, "output": 15.00},
ANTHROPIC_CLAUDE3_HAIKU: {"input": 0.25, "output": 1.25},
ANTHROPIC_CLAUDE_SONNET4_SHORT: {"input": 3.00, "output": 15.00},
# OpenAI
OPENAI_GPT4O: {"input": 2.50, "output": 10.00},
OPENAI_GPT4O_MINI: {"input": 0.15, "output": 0.60},
OPENAI_GPT4_TURBO: {"input": 10.00, "output": 30.00},
OPENAI_GPT35_TURBO: {"input": 0.50, "output": 1.50},
# Google
GOOGLE_GEMINI15_PRO: {"input": 1.25, "output": 5.00},
GOOGLE_GEMINI15_FLASH: {"input": 0.075, "output": 0.30},
# Local (free)
LOCAL_LLAMA3: {"input": 0.0, "output": 0.0},
LOCAL_MISTRAL: {"input": 0.0, "output": 0.0},
LOCAL_CODELLAMA: {"input": 0.0, "output": 0.0},
}


class ModelConstants:
Expand Down
Loading
Loading