diff --git a/.env.example b/.env.example new file mode 100644 index 000000000..0ba7c789a --- /dev/null +++ b/.env.example @@ -0,0 +1,85 @@ +# ============================================================================= +# LLM Council - Environment Configuration +# ============================================================================= +# Copy this file to .env and configure your desired providers +# You can mix providers freely - use cloud, local, or both! + +# ============================================================================= +# Provider Selection +# ============================================================================= + +# Default provider when model identifier has no prefix +# Options: "openrouter", "ollama", "lmstudio" +# Example: If DEFAULT_PROVIDER=openrouter, then "gpt-4" means "openrouter:gpt-4" +DEFAULT_PROVIDER=openrouter + +# ============================================================================= +# OpenRouter Configuration (Cloud Models) +# ============================================================================= + +# Get your API key from: https://openrouter.ai/keys +# Required if using any OpenRouter models (e.g., GPT-4, Claude, Gemini) +OPENROUTER_API_KEY=your_openrouter_api_key_here + +# ============================================================================= +# Ollama Configuration (Local Models) +# ============================================================================= + +# Ollama server URL +# Default: http://localhost:11434 +# Only change if running Ollama on a different host/port +OLLAMA_BASE_URL=http://localhost:11434 + +# Installation: https://ollama.ai/ +# Download models: ollama pull llama2, ollama pull mistral, etc. + +# ============================================================================= +# LMStudio Configuration (Local Models) +# ============================================================================= + +# LMStudio server URL (OpenAI-compatible API) +# Default: http://localhost:1234/v1/chat/completions +# Only change if using a different port +LMSTUDIO_BASE_URL=http://localhost:1234/v1/chat/completions + +# Installation: https://lmstudio.ai/ +# Make sure to start the local server in LMStudio before using + +# ============================================================================= +# Usage Examples +# ============================================================================= + +# Example 1: Cloud-only setup (OpenRouter) +# ---------------------------------------- +# DEFAULT_PROVIDER=openrouter +# OPENROUTER_API_KEY=sk-or-v1-... +# +# In config.py: +# COUNCIL_MODELS = ["openai/gpt-4", "anthropic/claude-3-sonnet", "google/gemini-pro"] + +# Example 2: Local-only setup (Ollama) +# ------------------------------------ +# DEFAULT_PROVIDER=ollama +# OLLAMA_BASE_URL=http://localhost:11434 +# +# In config.py: +# COUNCIL_MODELS = ["ollama:llama2", "ollama:mistral", "ollama:codellama"] + +# Example 3: Mixed setup (Cloud + Local) +# -------------------------------------- +# DEFAULT_PROVIDER=openrouter +# OPENROUTER_API_KEY=sk-or-v1-... +# OLLAMA_BASE_URL=http://localhost:11434 +# +# In config.py: +# COUNCIL_MODELS = ["ollama:llama2", "openrouter:gpt-4", "lmstudio:mistral"] +# CHAIRMAN_MODEL = "openrouter:gpt-4" # Use cloud for synthesis + +# Example 4: Privacy-focused (100% Local) +# --------------------------------------- +# DEFAULT_PROVIDER=ollama +# OLLAMA_BASE_URL=http://localhost:11434 +# +# In config.py: +# COUNCIL_MODELS = ["llama2", "mistral", "codellama"] # Uses DEFAULT_PROVIDER +# CHAIRMAN_MODEL = "mistral" diff --git a/.gitignore b/.gitignore index 4c2041a54..439f7645b 100644 --- a/.gitignore +++ b/.gitignore @@ -18,4 +18,8 @@ data/ # Frontend frontend/node_modules/ frontend/dist/ -frontend/.vite/ \ No newline at end of file +frontend/.vite/ + +# IDE +.idea/ +.vscode/ \ No newline at end of file diff --git a/CLAUDE.md b/CLAUDE.md index b803720fa..44d2f0feb 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -8,31 +8,95 @@ LLM Council is a 3-stage deliberation system where multiple LLMs collaboratively ## Architecture +### Provider Abstraction System (NEW!) + +**Multi-Provider Support** +LLM Council now supports three providers out of the box: +- **OpenRouter**: Cloud models (GPT-4, Claude, Gemini, etc.) +- **Ollama**: Local models (Llama2, Mistral, CodeLlama, etc.) +- **LMStudio**: Local models via OpenAI-compatible API + +**Model Identifier Format** +Models use a flexible identifier format: +- **Prefixed**: `provider:model` (e.g., `ollama:llama2`, `openrouter:gpt-4`) +- **Simple**: `model` (uses `DEFAULT_PROVIDER` from config) + +This enables: +- **Cloud-only**: All models via OpenRouter (requires API key, pay-per-use) +- **Local-only**: All models via Ollama/LMStudio (100% private, free) +- **Mixed mode**: Combine providers (e.g., local council + cloud chairman) + +**Provider Architecture (`backend/providers/`)** + +`base.py` - Abstract base class +- Defines `Provider` interface with `query()` and `query_batch()` methods +- All providers inherit from this base class +- Enforces consistent API across providers + +`openrouter.py` - OpenRouterProvider +- Cloud provider using OpenRouter API +- Requires API key from environment +- Supports all OpenRouter models (GPT, Claude, Gemini, etc.) + +`ollama.py` - OllamaProvider +- Local provider using Ollama's native API +- Uses ollama library's `AsyncClient.chat()` method +- Default URL: http://localhost:11434 + +`lmstudio.py` - LMStudioProvider +- Local provider using OpenAI-compatible API +- No authentication required +- Default URL: http://localhost:1234/v1/chat/completions + +`factory.py` - ProviderFactory +- Central routing system for model requests +- Parses model identifiers and routes to appropriate provider +- Handles parallel queries across multiple providers +- Caches provider instances for efficiency +- Validates provider configuration on initialization + +**How It Works** +1. `council.py` creates ProviderFactory singleton with config +2. Model identifiers like `["ollama:llama2", "openrouter:gpt-4"]` are passed to factory +3. Factory parses each identifier: `("ollama", "llama2")`, `("openrouter", "gpt-4")` +4. Factory routes to appropriate provider and executes query +5. Results returned with original model identifier as key + +**Configuration** +See `.env.example` for complete setup instructions. Key variables: +- `DEFAULT_PROVIDER`: Provider for unprefixed model names +- `OPENROUTER_API_KEY`: Required for OpenRouter models +- `OLLAMA_BASE_URL`: Ollama server URL (default: localhost:11434) +- `LMSTUDIO_BASE_URL`: LMStudio server URL (default: localhost:1234) + ### Backend Structure (`backend/`) **`config.py`** -- Contains `COUNCIL_MODELS` (list of OpenRouter model identifiers) +- Contains `COUNCIL_MODELS` (list of model identifiers, any provider) - Contains `CHAIRMAN_MODEL` (model that synthesizes final answer) -- Uses environment variable `OPENROUTER_API_KEY` from `.env` +- Contains `CONVERSATION_TITLE_MODEL` (fast model for title generation) +- Contains provider configuration (URLs, API keys via environment) - Backend runs on **port 8001** (NOT 8000 - user had another app on 8000) - -**`openrouter.py`** -- `query_model()`: Single async model query -- `query_models_parallel()`: Parallel queries using `asyncio.gather()` -- Returns dict with 'content' and optional 'reasoning_details' -- Graceful degradation: returns None on failure, continues with successful responses +- **Model identifier examples**: + - Cloud: `"openai/gpt-4"`, `"anthropic/claude-sonnet-4"` + - Local: `"ollama:llama2"`, `"lmstudio:mistral"` + - Mixed: `["ollama:llama2", "openrouter:gpt-4"]` **`council.py`** - The Core Logic -- `stage1_collect_responses()`: Parallel queries to all council models +- `get_factory()`: Creates/returns ProviderFactory singleton with config +- `stage1_collect_responses()`: Parallel queries to all council models via factory - `stage2_collect_rankings()`: - Anonymizes responses as "Response A, B, C, etc." - Creates `label_to_model` mapping for de-anonymization - Prompts models to evaluate and rank (with strict format requirements) - Returns tuple: (rankings_list, label_to_model_dict) - Each ranking includes both raw text and `parsed_ranking` list + - Uses same council models via factory (supports mixed providers) - `stage3_synthesize_final()`: Chairman synthesizes from all responses + rankings - `parse_ranking_from_text()`: Extracts "FINAL RANKING:" section, handles both numbered lists and plain format - `calculate_aggregate_rankings()`: Computes average rank position across all peer evaluations +- `generate_conversation_title()`: Fast title generation for conversations +- All model queries go through ProviderFactory for automatic routing **`storage.py`** - JSON-based conversation storage in `data/conversations/` @@ -143,24 +207,73 @@ Models are hardcoded in `backend/config.py`. Chairman can be same or different f ## Testing Notes -Use `test_openrouter.py` to verify API connectivity and test different model identifiers before adding to council. The script tests both streaming and non-streaming modes. +**Provider Testing** +Before configuring models in production: + +1. **OpenRouter**: Verify API key and model availability + - Check models at https://openrouter.ai/models + - Test with simple query first + +2. **Ollama**: Ensure server is running and models are pulled + ```bash + ollama serve # Start server + ollama pull llama2 # Download model + ollama list # Verify installed models + curl http://localhost:11434/api/tags # Check API + ``` + +3. **LMStudio**: Start local server and load model + - Open LMStudio → Developer → Start Server + - Verify server at http://localhost:1234 + +**Factory Validation** +The ProviderFactory has built-in validation: +```python +from backend.providers import ProviderFactory +factory = ProviderFactory(...) +print(factory.validate_all()) # Check all providers +print(factory.get_available_providers()) # List working providers +``` + +**Testing Mixed Mode** +Set up a test configuration with mixed providers: +```python +COUNCIL_MODELS = [ + "ollama:llama2", # Local + "openrouter:gpt-4o", # Cloud +] +``` +Verify all providers respond correctly before production use. ## Data Flow Summary ``` User Query ↓ -Stage 1: Parallel queries → [individual responses] +ProviderFactory initialization (singleton) + ├─ OpenRouterProvider (if configured) + ├─ OllamaProvider (if configured) + └─ LMStudioProvider (if configured) + ↓ +Stage 1: Parse model identifiers → Route to providers → Parallel queries + → [individual responses with original model IDs] ↓ -Stage 2: Anonymize → Parallel ranking queries → [evaluations + parsed rankings] +Stage 2: Anonymize → Route to providers → Parallel ranking queries + → [evaluations + parsed rankings] ↓ Aggregate Rankings Calculation → [sorted by avg position] ↓ -Stage 3: Chairman synthesis with full context +Stage 3: Route chairman model to provider → Synthesis with full context + → [final answer] ↓ Return: {stage1, stage2, stage3, metadata} ↓ Frontend: Display with tabs + validation UI ``` -The entire flow is async/parallel where possible to minimize latency. +**Key Points:** +- All model queries go through ProviderFactory +- Factory automatically routes based on model identifier prefix +- Parallel execution happens both within and across providers +- Each stage can use models from different providers (mixed mode) +- The entire flow is async/parallel where possible to minimize latency diff --git a/SETUP.md b/SETUP.md new file mode 100644 index 000000000..437537acc --- /dev/null +++ b/SETUP.md @@ -0,0 +1,371 @@ +# LLM Council - Setup Guide + +Complete guide for setting up LLM Council with OpenRouter, Ollama, or LMStudio. + +## Quick Start + +### 1. Install Dependencies + +```bash +# Using uv (recommended) +uv pip install -r requirements.txt + +# Or using pip +pip install -r requirements.txt +``` + +### 2. Configure Providers + +Copy the example environment file: + +```bash +cp .env.example .env +``` + +Edit `.env` and configure your desired providers (see Configuration Options below). + +### 3. Configure Models + +Edit `backend/config.py` and set your council models: + +```python +# Example: All cloud models (OpenRouter) +COUNCIL_MODELS = [ + "openai/gpt-4o", + "anthropic/claude-sonnet-4", + "google/gemini-2.0-flash-exp", +] +CHAIRMAN_MODEL = "google/gemini-2.0-flash-exp" +``` + +### 4. Start the Backend + +```bash +python -m backend.main +``` + +The API will be available at http://localhost:8001 + +### 5. Start the Frontend + +```bash +cd frontend +npm install +npm run dev +``` + +The UI will be available at http://localhost:5173 + +--- + +## Configuration Options + +### Option 1: Cloud Only (OpenRouter) + +**Best for:** Access to cutting-edge models, no local setup required + +**Requirements:** +- OpenRouter API key (get one at https://openrouter.ai/keys) + +**Configuration:** + +`.env`: +```bash +DEFAULT_PROVIDER=openrouter +OPENROUTER_API_KEY=your_api_key_here +``` + +`backend/config.py`: +```python +COUNCIL_MODELS = [ + "openai/gpt-4o", + "anthropic/claude-sonnet-4", + "google/gemini-2.0-flash-exp", + "x-ai/grok-3", +] +CHAIRMAN_MODEL = "google/gemini-2.0-flash-exp" +``` + +**Pros:** +- Latest frontier models +- No local GPU required +- Faster inference + +**Cons:** +- Costs per token +- Requires internet +- Data sent to third parties + +--- + +### Option 2: Local Only (Ollama) + +**Best for:** Privacy, no API costs, offline usage + +**Requirements:** +- Ollama installed (https://ollama.ai/) +- Sufficient RAM/GPU for models + +**Setup:** + +1. Install Ollama: + ```bash + # macOS/Linux + curl https://ollama.ai/install.sh | sh + + # Or download from https://ollama.ai/ + ``` + +2. Pull models: + ```bash + ollama pull llama2:13b-chat + ollama pull mistral:7b-instruct + ollama pull codellama:34b-instruct + ``` + +3. Start Ollama server: + ```bash + ollama serve + ``` + +**Configuration:** + +`.env`: +```bash +DEFAULT_PROVIDER=ollama +OLLAMA_BASE_URL=http://localhost:11434 +``` + +`backend/config.py`: +```python +COUNCIL_MODELS = [ + "ollama:llama2:13b-chat", + "ollama:mistral:7b-instruct", + "ollama:codellama:34b-instruct", +] +CHAIRMAN_MODEL = "ollama:mistral:7b-instruct" +``` + +**Pros:** +- 100% private +- No API costs +- Works offline +- Full control + +**Cons:** +- Requires powerful hardware +- Slower inference than cloud +- Limited to open-source models + +--- + +### Option 3: Local Only (LMStudio) + +**Best for:** GUI-based local model management, privacy + +**Requirements:** +- LMStudio installed (https://lmstudio.ai/) +- Downloaded models + +**Setup:** + +1. Install LMStudio from https://lmstudio.ai/ + +2. Download models in LMStudio UI + +3. Start the local server: + - Open LMStudio + - Go to Developer tab + - Click "Start Server" + - Server runs at http://localhost:1234 + +**Configuration:** + +`.env`: +```bash +DEFAULT_PROVIDER=lmstudio +LMSTUDIO_BASE_URL=http://localhost:1234/v1/chat/completions +``` + +`backend/config.py`: +```python +COUNCIL_MODELS = [ + "lmstudio:mistral-7b", + "lmstudio:llama2-13b", +] +CHAIRMAN_MODEL = "lmstudio:mistral-7b" +``` + +--- + +### Option 4: Mixed Mode (Recommended) + +**Best for:** Balance of quality, cost, and privacy + +**Use case:** Use local models for council deliberation, cloud for synthesis + +**Configuration:** + +`.env`: +```bash +DEFAULT_PROVIDER=openrouter +OPENROUTER_API_KEY=your_api_key_here +OLLAMA_BASE_URL=http://localhost:11434 +``` + +`backend/config.py`: +```python +COUNCIL_MODELS = [ + # Local models for cost-effective deliberation + "ollama:llama2:13b-chat", + "ollama:mistral:7b-instruct", + # High-quality cloud model for comparison + "openrouter:gpt-4o", +] +# Use powerful cloud model for final synthesis +CHAIRMAN_MODEL = "openrouter:anthropic/claude-sonnet-4" +``` + +**Benefits:** +- Reduce API costs (local models for Stage 1 & 2) +- Use best cloud model for final answer (Stage 3) +- Compare local vs cloud performance + +--- + +## Model Identifier Format + +LLM Council supports flexible model identifiers: + +### Prefixed Format (Explicit Provider) +```python +"ollama:llama2" # Ollama provider +"openrouter:gpt-4" # OpenRouter provider +"lmstudio:mistral" # LMStudio provider +``` + +### Simple Format (Uses DEFAULT_PROVIDER) +```python +# If DEFAULT_PROVIDER=openrouter: +"gpt-4" # → openrouter:gpt-4 +"anthropic/claude-3" # → openrouter:anthropic/claude-3 + +# If DEFAULT_PROVIDER=ollama: +"llama2" # → ollama:llama2 +``` + +--- + +## Testing Your Setup + +Run the provider test script: + +```bash +python test_providers.py +``` + +This verifies: +- ✓ Model identifier parsing +- ✓ Provider validation +- ✓ Provider routing + +--- + +## Troubleshooting + +### "Provider validation failed" + +**OpenRouter:** +- Check `OPENROUTER_API_KEY` is set in `.env` +- Verify key is valid at https://openrouter.ai/keys + +**Ollama:** +- Check server is running: `curl http://localhost:11434/api/tags` +- Verify models are installed: `ollama list` +- Try: `ollama serve` to start server + +**LMStudio:** +- Ensure local server is started in LMStudio +- Check server URL: http://localhost:1234 +- Verify a model is loaded + +### "All models failed to respond" + +Check configuration: +```python +from backend.providers import ProviderFactory +from backend.config import * + +factory = ProviderFactory( + default_provider=DEFAULT_PROVIDER, + openrouter_api_key=OPENROUTER_API_KEY, + ollama_base_url=OLLAMA_BASE_URL, + lmstudio_base_url=LMSTUDIO_BASE_URL +) + +print(factory.validate_all()) +print(factory.get_available_providers()) +``` + +### "Module not found" errors + +Install all dependencies: +```bash +pip install fastapi uvicorn[standard] python-dotenv httpx pydantic ollama +``` + +--- + +## Performance Tips + +1. **Local Models:** + - Use quantized models (Q4_K_M, Q5_K_M) for better speed + - Smaller models (7B) for council, larger (13B+) for chairman + - Close other GPU-intensive applications + +2. **Cloud Models:** + - Use fast models for title generation (e.g., gemini-flash) + - Consider rate limits for parallel requests + - Monitor costs with OpenRouter dashboard + +3. **Mixed Mode:** + - Local: Stage 1 & 2 (3-4 models, many requests) + - Cloud: Stage 3 (1 model, 1 request per query) + - Optimal cost/quality balance + +--- + +## Advanced Configuration + +### Custom Ollama Port +```bash +# In .env +OLLAMA_BASE_URL=http://localhost:11435 +``` + +### Custom LMStudio Port +```bash +# In .env +LMSTUDIO_BASE_URL=http://localhost:8080/v1/chat/completions +``` + +### Multiple Chairman Candidates +You can use different models for different purposes: +```python +CHAIRMAN_MODEL = "openrouter:gpt-4o" +CONVERSATION_TITLE_MODEL = "ollama:mistral" # Fast local model for titles +``` + +--- + +## Next Steps + +1. Configure your preferred setup (cloud/local/mixed) +2. Test with `python test_providers.py` +3. Start backend and frontend +4. Try your first query! + +For more details, see: +- `CLAUDE.md` - Technical architecture +- `.env.example` - Configuration examples +- `backend/config.py` - Model configuration diff --git a/backend/config.py b/backend/config.py index a9cf7c473..b98e2de54 100644 --- a/backend/config.py +++ b/backend/config.py @@ -5,22 +5,97 @@ load_dotenv() -# OpenRouter API key +# ============================================================================ +# Provider Configuration +# ============================================================================ + +# Default provider when model identifier has no prefix (e.g., "gpt-4" instead of "openrouter:gpt-4") +# Options: "openrouter", "ollama", "lmstudio" +DEFAULT_PROVIDER = os.getenv("DEFAULT_PROVIDER", "openrouter") + +# OpenRouter API configuration OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY") -# Council members - list of OpenRouter model identifiers +# Ollama server configuration +OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://localhost:11434") + +# LMStudio server configuration +LMSTUDIO_BASE_URL = os.getenv("LMSTUDIO_BASE_URL", "http://localhost:1234/v1/chat/completions") + +# ============================================================================ +# Council Configuration +# ============================================================================ + +# COUNCIL_MODELS: List of models that will participate in the council +# +# Model identifier format: +# - Prefixed: "provider:model" (e.g., "ollama:llama2", "openrouter:gpt-4") +# - Simple: "model" (uses DEFAULT_PROVIDER) +# +# Examples: +# - All OpenRouter: ["openai/gpt-4", "anthropic/claude-3-sonnet"] +# - All Ollama: ["ollama:llama2", "ollama:mistral"] +# - Mixed: ["ollama:llama2", "openrouter:gpt-4", "lmstudio:mistral"] +# +# You can mix providers freely for maximum flexibility! + COUNCIL_MODELS = [ - "openai/gpt-5.1", - "google/gemini-3-pro-preview", - "anthropic/claude-sonnet-4.5", - "x-ai/grok-4", + # Example: All cloud models (requires OPENROUTER_API_KEY) + "openai/gpt-4o", + "google/gemini-2.0-flash-exp", + "anthropic/claude-sonnet-4", + "x-ai/grok-3", + + # Example: All local models (requires Ollama running) + # "ollama:llama2:13b-chat", + # "ollama:mistral:7b-instruct", + # "ollama:codellama:34b-instruct", + + # Example: Mixed mode + # "ollama:llama2:13b-chat", + # "openrouter:gpt-4o", + # "lmstudio:mistral", ] -# Chairman model - synthesizes final response -CHAIRMAN_MODEL = "google/gemini-3-pro-preview" +# CHAIRMAN_MODEL: Model that synthesizes the final response +# Can be from any provider, independent of council models +CHAIRMAN_MODEL = "google/gemini-2.0-flash-exp" -# OpenRouter API endpoint +# CONVERSATION_TITLE_MODEL: Fast model for generating conversation titles +# Recommend using a fast/cheap model for this task +CONVERSATION_TITLE_MODEL = "google/gemini-2.0-flash-exp" + +# ============================================================================ +# Legacy Configuration (for backward compatibility) +# ============================================================================ +# These are kept for reference but not used in the new provider-based system + +# Legacy cloud models +COUNCIL_CLOUD_MODELS = [ + "openai/gpt-4o", + "google/gemini-2.0-flash-exp", + "anthropic/claude-sonnet-4", + "x-ai/grok-3", +] +CHAIRMAN_CLOUD_MODEL = "google/gemini-2.0-flash-exp" +CONVERSATION_CLOUD_MODEL = "google/gemini-2.0-flash-exp" + +# Legacy local models +COUNCIL_LOCAL_MODELS = [ + "llama2:13b-chat", + "mistral:7b-instruct", + "codellama:34b-instruct", +] +CHAIRMAN_LOCAL_MODEL = "mistral:7b-instruct" +CONVERSATION_LOCAL_MODEL = "mistral" + +# Legacy API URLs OPENROUTER_API_URL = "https://openrouter.ai/api/v1/chat/completions" +OLLAMA_API_URL = "http://localhost:11434/api/generate" + +# ============================================================================ +# Storage Configuration +# ============================================================================ # Data directory for conversation storage DATA_DIR = "data/conversations" diff --git a/backend/council.py b/backend/council.py index 5069abec9..f1a5dde1e 100644 --- a/backend/council.py +++ b/backend/council.py @@ -1,8 +1,32 @@ """3-stage LLM Council orchestration.""" from typing import List, Dict, Any, Tuple -from .openrouter import query_models_parallel, query_model -from .config import COUNCIL_MODELS, CHAIRMAN_MODEL +from .providers import ProviderFactory +from .config import ( + COUNCIL_MODELS, + CHAIRMAN_MODEL, + CONVERSATION_TITLE_MODEL, + DEFAULT_PROVIDER, + OPENROUTER_API_KEY, + OLLAMA_BASE_URL, + LMSTUDIO_BASE_URL +) + +# Initialize the provider factory as a module-level singleton +_factory = None + + +def get_factory() -> ProviderFactory: + """Get or create the provider factory singleton.""" + global _factory + if _factory is None: + _factory = ProviderFactory( + default_provider=DEFAULT_PROVIDER, + openrouter_api_key=OPENROUTER_API_KEY, + ollama_base_url=OLLAMA_BASE_URL, + lmstudio_base_url=LMSTUDIO_BASE_URL + ) + return _factory async def stage1_collect_responses(user_query: str) -> List[Dict[str, Any]]: @@ -16,16 +40,17 @@ async def stage1_collect_responses(user_query: str) -> List[Dict[str, Any]]: List of dicts with 'model' and 'response' keys """ messages = [{"role": "user", "content": user_query}] + factory = get_factory() - # Query all models in parallel - responses = await query_models_parallel(COUNCIL_MODELS, messages) + # Query all models in parallel (can be mixed providers!) + responses = await factory.query_models_parallel(COUNCIL_MODELS, messages) # Format results stage1_results = [] - for model, response in responses.items(): + for model_id, response in responses.items(): if response is not None: # Only include successful responses stage1_results.append({ - "model": model, + "model": model_id, "response": response.get('content', '') }) @@ -93,18 +118,19 @@ async def stage2_collect_rankings( Now provide your evaluation and ranking:""" messages = [{"role": "user", "content": ranking_prompt}] + factory = get_factory() # Get rankings from all council models in parallel - responses = await query_models_parallel(COUNCIL_MODELS, messages) + responses = await factory.query_models_parallel(COUNCIL_MODELS, messages) # Format results stage2_results = [] - for model, response in responses.items(): + for model_id, response in responses.items(): if response is not None: full_text = response.get('content', '') parsed = parse_ranking_from_text(full_text) stage2_results.append({ - "model": model, + "model": model_id, "ranking": full_text, "parsed_ranking": parsed }) @@ -157,9 +183,10 @@ async def stage3_synthesize_final( Provide a clear, well-reasoned final answer that represents the council's collective wisdom:""" messages = [{"role": "user", "content": chairman_prompt}] + factory = get_factory() # Query the chairman model - response = await query_model(CHAIRMAN_MODEL, messages) + response = await factory.query_model(CHAIRMAN_MODEL, messages) if response is None: # Fallback if chairman fails @@ -273,9 +300,14 @@ async def generate_conversation_title(user_query: str) -> str: Title:""" messages = [{"role": "user", "content": title_prompt}] + factory = get_factory() - # Use gemini-2.5-flash for title generation (fast and cheap) - response = await query_model("google/gemini-2.5-flash", messages, timeout=30.0) + # Use conversation model for title generation (fast and cheap) + response = await factory.query_model( + CONVERSATION_TITLE_MODEL, + messages, + timeout=30.0 + ) if response is None: # Fallback to a generic title @@ -310,7 +342,7 @@ async def run_full_council(user_query: str) -> Tuple[List, List, Dict, Dict]: if not stage1_results: return [], [], { "model": "error", - "response": "All models failed to respond. Please try again." + "response": "All models failed to respond. Please check your configuration and provider availability." }, {} # Stage 2: Collect rankings diff --git a/backend/main.py b/backend/main.py index e33ce59a6..7f5136aed 100644 --- a/backend/main.py +++ b/backend/main.py @@ -193,7 +193,6 @@ async def event_generator(): } ) - if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=8001) diff --git a/backend/providers/__init__.py b/backend/providers/__init__.py new file mode 100644 index 000000000..23e0264fa --- /dev/null +++ b/backend/providers/__init__.py @@ -0,0 +1,15 @@ +"""Provider abstraction for LLM APIs.""" + +from .base import Provider +from .openrouter import OpenRouterProvider +from .ollama import OllamaProvider +from .lmstudio import LMStudioProvider +from .factory import ProviderFactory + +__all__ = [ + 'Provider', + 'OpenRouterProvider', + 'OllamaProvider', + 'LMStudioProvider', + 'ProviderFactory', +] diff --git a/backend/providers/base.py b/backend/providers/base.py new file mode 100644 index 000000000..940bb35c6 --- /dev/null +++ b/backend/providers/base.py @@ -0,0 +1,85 @@ +"""Base provider interface for LLM API abstraction.""" + +from abc import ABC, abstractmethod +from typing import List, Dict, Any, Optional + + +class Provider(ABC): + """Abstract base class for LLM providers.""" + + def __init__(self, base_url: Optional[str] = None, api_key: Optional[str] = None): + """ + Initialize provider. + + Args: + base_url: Optional base URL for the API + api_key: Optional API key for authentication + """ + self.base_url = base_url + self.api_key = api_key + + @abstractmethod + async def query( + self, + model: str, + messages: List[Dict[str, str]], + timeout: float = 120.0, + **kwargs + ) -> Optional[Dict[str, Any]]: + """ + Query a single model. + + Args: + model: Model identifier (without provider prefix) + messages: List of message dicts with 'role' and 'content' + timeout: Request timeout in seconds + **kwargs: Additional provider-specific parameters + + Returns: + Response dict with 'content' and optional 'reasoning_details', or None if failed + """ + pass + + async def query_batch( + self, + models: List[str], + messages: List[Dict[str, str]], + **kwargs + ) -> Dict[str, Optional[Dict[str, Any]]]: + """ + Query multiple models in parallel. + + Args: + models: List of model identifiers (without provider prefix) + messages: List of message dicts to send to each model + **kwargs: Additional provider-specific parameters + + Returns: + Dict mapping model identifier to response dict (or None if failed) + """ + import asyncio + + # Create tasks for all models + tasks = [self.query(model, messages, **kwargs) for model in models] + + # Wait for all to complete + responses = await asyncio.gather(*tasks) + + # Map models to their responses + return {model: response for model, response in zip(models, responses)} + + @abstractmethod + def validate(self) -> bool: + """ + Validate that the provider is properly configured and reachable. + + Returns: + True if provider is ready to use, False otherwise + """ + pass + + @property + @abstractmethod + def name(self) -> str: + """Return the provider name.""" + pass diff --git a/backend/providers/factory.py b/backend/providers/factory.py new file mode 100644 index 000000000..acda816e9 --- /dev/null +++ b/backend/providers/factory.py @@ -0,0 +1,208 @@ +"""Provider factory for routing model requests to appropriate providers.""" + +from typing import Dict, List, Any, Optional, Tuple +from .base import Provider +from .openrouter import OpenRouterProvider +from .ollama import OllamaProvider +from .lmstudio import LMStudioProvider + + +class ProviderFactory: + """ + Factory class for managing and routing to different LLM providers. + + Supports model identifiers in two formats: + 1. Prefixed: "provider:model" (e.g., "ollama:llama2", "openrouter:gpt-4") + 2. Simple: "model" (uses default_provider) + """ + + def __init__( + self, + default_provider: str = "openrouter", + openrouter_api_key: Optional[str] = None, + ollama_base_url: Optional[str] = None, + lmstudio_base_url: Optional[str] = None + ): + """ + Initialize the provider factory. + + Args: + default_provider: Default provider name for unprefixed model identifiers + openrouter_api_key: API key for OpenRouter + ollama_base_url: Base URL for Ollama server + lmstudio_base_url: Base URL for LMStudio server + """ + self.default_provider = default_provider + self._providers: Dict[str, Provider] = {} + + # Initialize providers + self._providers['openrouter'] = OpenRouterProvider( + api_key=openrouter_api_key + ) + self._providers['ollama'] = OllamaProvider( + base_url=ollama_base_url + ) + self._providers['lmstudio'] = LMStudioProvider( + base_url=lmstudio_base_url + ) + + def parse_model_identifier(self, model_identifier: str) -> Tuple[str, str]: + """ + Parse a model identifier into (provider_name, model_name). + + Args: + model_identifier: Either "provider:model" or "model" + + Returns: + Tuple of (provider_name, model_name) + + Examples: + "ollama:llama2" -> ("ollama", "llama2") + "openrouter:gpt-4" -> ("openrouter", "gpt-4") + "gpt-4" -> ("openrouter", "gpt-4") # uses default_provider + """ + if ':' in model_identifier: + # Prefixed format: "provider:model" + parts = model_identifier.split(':', 1) + provider_name = parts[0].lower() + model_name = parts[1] + return provider_name, model_name + else: + # Simple format: use default provider + return self.default_provider, model_identifier + + def get_provider(self, provider_name: str) -> Optional[Provider]: + """ + Get a provider instance by name. + + Args: + provider_name: Name of the provider ("openrouter", "ollama", "lmstudio") + + Returns: + Provider instance or None if not found + """ + return self._providers.get(provider_name.lower()) + + async def query_model( + self, + model_identifier: str, + messages: List[Dict[str, str]], + timeout: float = 120.0, + **kwargs + ) -> Optional[Dict[str, Any]]: + """ + Query a single model using the appropriate provider. + + Args: + model_identifier: Model identifier (e.g., "ollama:llama2", "gpt-4") + messages: List of message dicts with 'role' and 'content' + timeout: Request timeout in seconds + **kwargs: Additional provider-specific parameters + + Returns: + Response dict with 'content' and optional 'reasoning_details', or None if failed + """ + provider_name, model_name = self.parse_model_identifier(model_identifier) + provider = self.get_provider(provider_name) + + if provider is None: + print(f"[ProviderFactory] Error: Unknown provider '{provider_name}' in model identifier '{model_identifier}'") + return None + + if not provider.validate(): + print(f"[ProviderFactory] Warning: Provider '{provider_name}' is not properly configured") + return None + + return await provider.query(model_name, messages, timeout, **kwargs) + + async def query_models_parallel( + self, + model_identifiers: List[str], + messages: List[Dict[str, str]], + **kwargs + ) -> Dict[str, Optional[Dict[str, Any]]]: + """ + Query multiple models in parallel, automatically routing to appropriate providers. + + Args: + model_identifiers: List of model identifiers (can be mixed providers) + messages: List of message dicts to send to each model + **kwargs: Additional provider-specific parameters + + Returns: + Dict mapping original model identifier to response dict (or None if failed) + + Example: + >>> factory.query_models_parallel([ + ... "ollama:llama2", + ... "openrouter:gpt-4", + ... "lmstudio:mistral" + ... ], messages) + """ + import asyncio + + # Group models by provider for batch optimization + provider_groups: Dict[str, List[Tuple[str, str]]] = {} + for model_id in model_identifiers: + provider_name, model_name = self.parse_model_identifier(model_id) + if provider_name not in provider_groups: + provider_groups[provider_name] = [] + provider_groups[provider_name].append((model_id, model_name)) + + # Query each provider's models in batch + all_tasks = [] + task_to_model_id = {} + + for provider_name, models in provider_groups.items(): + provider = self.get_provider(provider_name) + if provider is None or not provider.validate(): + # Skip invalid providers, return None for their models + for model_id, _ in models: + task_to_model_id[model_id] = None + continue + + # Use provider's batch query if available, otherwise query individually + model_names = [model_name for _, model_name in models] + task = provider.query_batch(model_names, messages, **kwargs) + all_tasks.append((provider_name, models, task)) + + # Execute all provider batches in parallel + results = {} + if all_tasks: + batch_results = await asyncio.gather(*[task for _, _, task in all_tasks]) + + # Map results back to original model identifiers + for (provider_name, models, _), batch_result in zip(all_tasks, batch_results): + for model_id, model_name in models: + results[model_id] = batch_result.get(model_name) + + # Add None results for invalid providers + for model_id, response in task_to_model_id.items(): + if response is None: + results[model_id] = None + + return results + + def validate_all(self) -> Dict[str, bool]: + """ + Validate all providers. + + Returns: + Dict mapping provider name to validation status + """ + return { + name: provider.validate() + for name, provider in self._providers.items() + } + + def get_available_providers(self) -> List[str]: + """ + Get list of available (validated) provider names. + + Returns: + List of provider names that are properly configured + """ + return [ + name for name, provider in self._providers.items() + if provider.validate() + ] diff --git a/backend/providers/lmstudio.py b/backend/providers/lmstudio.py new file mode 100644 index 000000000..705b739ef --- /dev/null +++ b/backend/providers/lmstudio.py @@ -0,0 +1,92 @@ +"""LMStudio API provider implementation.""" + +import httpx +from typing import List, Dict, Any, Optional +from .base import Provider + + +class LMStudioProvider(Provider): + """Provider for LMStudio local models (OpenAI-compatible API).""" + + DEFAULT_BASE_URL = "http://localhost:1234/v1/chat/completions" + + def __init__(self, base_url: Optional[str] = None): + """ + Initialize LMStudio provider. + + Args: + base_url: LMStudio server URL (defaults to http://localhost:1234/v1/chat/completions) + """ + super().__init__(base_url=base_url or self.DEFAULT_BASE_URL) + + @property + def name(self) -> str: + """Return the provider name.""" + return "lmstudio" + + def validate(self) -> bool: + """ + Validate that the provider is properly configured. + + Returns: + True if base_url is set, False otherwise + """ + return self.base_url is not None + + async def query( + self, + model: str, + messages: List[Dict[str, str]], + timeout: float = 120.0, + **kwargs + ) -> Optional[Dict[str, Any]]: + """ + Query a single model via LMStudio API (OpenAI-compatible). + + Args: + model: Model identifier loaded in LMStudio + messages: List of message dicts with 'role' and 'content' + timeout: Request timeout in seconds + **kwargs: Additional parameters (temperature, max_tokens, etc.) + + Returns: + Response dict with 'content' and optional 'reasoning_details', or None if failed + """ + headers = { + "Content-Type": "application/json", + } + + payload = { + "model": model, + "messages": messages, + **kwargs # Allow additional parameters + } + + try: + async with httpx.AsyncClient(timeout=timeout) as client: + response = await client.post( + self.base_url, + headers=headers, + json=payload + ) + response.raise_for_status() + + data = response.json() + + # LMStudio uses OpenAI-compatible response format + message = data['choices'][0]['message'] + + return { + 'content': message.get('content'), + 'reasoning_details': message.get('reasoning_details') + } + + except httpx.HTTPStatusError as e: + print(f"[{self.name}] HTTP error querying model {model}: {e.response.status_code} - {e.response.text}") + return None + except httpx.ConnectError: + print(f"[{self.name}] Connection error: Is LMStudio running at {self.base_url}?") + return None + except Exception as e: + print(f"[{self.name}] Error querying model {model}: {e}") + return None diff --git a/backend/providers/ollama.py b/backend/providers/ollama.py new file mode 100644 index 000000000..45bc049a6 --- /dev/null +++ b/backend/providers/ollama.py @@ -0,0 +1,86 @@ +"""Ollama API provider implementation.""" + +from typing import List, Dict, Any, Optional +from .base import Provider + + +class OllamaProvider(Provider): + """Provider for Ollama local models.""" + + DEFAULT_BASE_URL = "http://localhost:11434" + + def __init__(self, base_url: Optional[str] = None): + """ + Initialize Ollama provider. + + Args: + base_url: Ollama server URL (defaults to http://localhost:11434) + """ + super().__init__(base_url=base_url or self.DEFAULT_BASE_URL) + + @property + def name(self) -> str: + """Return the provider name.""" + return "ollama" + + def validate(self) -> bool: + """ + Validate that the Ollama server is reachable. + + Returns: + True if server is reachable, False otherwise + """ + # For now, assume valid if base_url is set + # Could add actual connectivity check with httpx if needed + return self.base_url is not None + + async def query( + self, + model: str, + messages: List[Dict[str, str]], + timeout: float = 120.0, + **kwargs + ) -> Optional[Dict[str, Any]]: + """ + Query a single model via Ollama API. + + Args: + model: Ollama model identifier (e.g., "llama2", "mistral") + messages: List of message dicts with 'role' and 'content' + timeout: Request timeout in seconds + **kwargs: Additional Ollama-specific parameters + + Returns: + Response dict with 'content' and optional 'reasoning_details', or None if failed + """ + try: + from ollama import AsyncClient + + # Create client with custom host if specified + client = AsyncClient(host=self.base_url) + + # Use the chat API for conversation-style interactions + response = await client.chat( + model=model, + messages=messages, + stream=False, + options=kwargs.get('options', {}) + ) + + # Extract content from Ollama response format + # Ollama returns: {'model': ..., 'message': {'role': 'assistant', 'content': '...'}, ...} + if 'message' in response and 'content' in response['message']: + return { + 'content': response['message']['content'], + 'reasoning_details': None # Ollama doesn't provide this + } + else: + print(f"[{self.name}] Unexpected response format from model {model}: {response}") + return None + + except ImportError: + print(f"[{self.name}] Error: ollama package not installed. Run: pip install ollama") + return None + except Exception as e: + print(f"[{self.name}] Error querying model {model}: {e}") + return None diff --git a/backend/providers/openrouter.py b/backend/providers/openrouter.py new file mode 100644 index 000000000..b296a5add --- /dev/null +++ b/backend/providers/openrouter.py @@ -0,0 +1,96 @@ +"""OpenRouter API provider implementation.""" + +import httpx +from typing import List, Dict, Any, Optional +from .base import Provider + + +class OpenRouterProvider(Provider): + """Provider for OpenRouter API.""" + + DEFAULT_BASE_URL = "https://openrouter.ai/api/v1/chat/completions" + + def __init__(self, api_key: Optional[str] = None, base_url: Optional[str] = None): + """ + Initialize OpenRouter provider. + + Args: + api_key: OpenRouter API key + base_url: Optional custom base URL (defaults to OpenRouter API) + """ + super().__init__( + base_url=base_url or self.DEFAULT_BASE_URL, + api_key=api_key + ) + + @property + def name(self) -> str: + """Return the provider name.""" + return "openrouter" + + def validate(self) -> bool: + """ + Validate that the provider is properly configured. + + Returns: + True if API key is present, False otherwise + """ + return self.api_key is not None and len(self.api_key) > 0 + + async def query( + self, + model: str, + messages: List[Dict[str, str]], + timeout: float = 120.0, + **kwargs + ) -> Optional[Dict[str, Any]]: + """ + Query a single model via OpenRouter API. + + Args: + model: OpenRouter model identifier (e.g., "openai/gpt-4o") + messages: List of message dicts with 'role' and 'content' + timeout: Request timeout in seconds + **kwargs: Additional OpenRouter-specific parameters + + Returns: + Response dict with 'content' and optional 'reasoning_details', or None if failed + """ + if not self.validate(): + print(f"[{self.name}] Error: API key not configured") + return None + + headers = { + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json", + } + + payload = { + "model": model, + "messages": messages, + **kwargs # Allow additional parameters like temperature, max_tokens, etc. + } + + try: + async with httpx.AsyncClient(timeout=timeout) as client: + response = await client.post( + self.base_url, + headers=headers, + json=payload + ) + response.raise_for_status() + + data = response.json() + message = data['choices'][0]['message'] + + return { + 'content': message.get('content'), + 'reasoning_details': message.get('reasoning_details') + } + + except httpx.HTTPStatusError as e: + print(f"[{self.name}] HTTP error querying model {model}: {e.response.status_code} - {e.response.text}") + return None + except Exception as e: + print(f"[{self.name}] Error querying model {model}: {e}") + return None diff --git a/pyproject.toml b/pyproject.toml index 56356ebcb..d7502450f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,4 +10,8 @@ dependencies = [ "python-dotenv>=1.0.0", "httpx>=0.27.0", "pydantic>=2.9.0", + "ollama>=0.6.1", ] + +[tool.setuptools] +packages = ["backend"] diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 000000000..7a9a785a6 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +# LLM Council Backend Dependencies + +fastapi>=0.115.0 +uvicorn[standard]>=0.32.0 +python-dotenv>=1.0.0 +httpx>=0.27.0 +pydantic>=2.9.0 +ollama>=0.6.1 diff --git a/test_providers.py b/test_providers.py new file mode 100644 index 000000000..91b21109e --- /dev/null +++ b/test_providers.py @@ -0,0 +1,88 @@ +"""Test script for provider abstraction system.""" + +import asyncio +from backend.providers import ProviderFactory + + +async def test_provider_parsing(): + """Test model identifier parsing.""" + print("\n=== Testing Model Identifier Parsing ===") + + factory = ProviderFactory(default_provider="openrouter") + + test_cases = [ + ("ollama:llama2", ("ollama", "llama2")), + ("openrouter:gpt-4", ("openrouter", "gpt-4")), + ("lmstudio:mistral", ("lmstudio", "mistral")), + ("gpt-4", ("openrouter", "gpt-4")), # Uses default + ("openai/gpt-4o", ("openrouter", "openai/gpt-4o")), # OpenRouter format + ] + + for model_id, expected in test_cases: + result = factory.parse_model_identifier(model_id) + status = "✓" if result == expected else "✗" + print(f"{status} '{model_id}' → {result} (expected: {expected})") + + +async def test_provider_validation(): + """Test provider validation.""" + print("\n=== Testing Provider Validation ===") + + # Test with no configuration + factory = ProviderFactory() + validation = factory.validate_all() + + print("Provider validation status:") + for provider, is_valid in validation.items(): + status = "✓ Valid" if is_valid else "✗ Invalid" + print(f" {provider}: {status}") + + available = factory.get_available_providers() + print(f"\nAvailable providers: {available}") + + +async def test_provider_routing(): + """Test provider routing without actual API calls.""" + print("\n=== Testing Provider Routing ===") + + factory = ProviderFactory(default_provider="openrouter") + + test_models = [ + "ollama:llama2", + "openrouter:gpt-4", + "lmstudio:mistral", + ] + + for model_id in test_models: + provider_name, model_name = factory.parse_model_identifier(model_id) + provider = factory.get_provider(provider_name) + + if provider: + print(f"✓ '{model_id}' → {provider.name} provider (model: {model_name})") + else: + print(f"✗ '{model_id}' → No provider found for '{provider_name}'") + + +async def main(): + """Run all tests.""" + print("=" * 60) + print("LLM Council - Provider System Tests") + print("=" * 60) + + await test_provider_parsing() + await test_provider_validation() + await test_provider_routing() + + print("\n" + "=" * 60) + print("Tests Complete!") + print("=" * 60) + print("\nNote: These tests verify the provider abstraction layer.") + print("For actual API testing:") + print(" - OpenRouter: Requires OPENROUTER_API_KEY in .env") + print(" - Ollama: Requires 'ollama serve' running on localhost:11434") + print(" - LMStudio: Requires LMStudio server running on localhost:1234") + print() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/uv.lock b/uv.lock index 079224681..761aac4b7 100644 --- a/uv.lock +++ b/uv.lock @@ -188,6 +188,7 @@ source = { virtual = "." } dependencies = [ { name = "fastapi" }, { name = "httpx" }, + { name = "ollama" }, { name = "pydantic" }, { name = "python-dotenv" }, { name = "uvicorn", extra = ["standard"] }, @@ -197,11 +198,25 @@ dependencies = [ requires-dist = [ { name = "fastapi", specifier = ">=0.115.0" }, { name = "httpx", specifier = ">=0.27.0" }, + { name = "ollama", specifier = ">=0.6.1" }, { name = "pydantic", specifier = ">=2.9.0" }, { name = "python-dotenv", specifier = ">=1.0.0" }, { name = "uvicorn", extras = ["standard"], specifier = ">=0.32.0" }, ] +[[package]] +name = "ollama" +version = "0.6.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "httpx" }, + { name = "pydantic" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9d/5a/652dac4b7affc2b37b95386f8ae78f22808af09d720689e3d7a86b6ed98e/ollama-0.6.1.tar.gz", hash = "sha256:478c67546836430034b415ed64fa890fd3d1ff91781a9d548b3325274e69d7c6", size = 51620, upload-time = "2025-11-13T23:02:17.416Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/47/4f/4a617ee93d8208d2bcf26b2d8b9402ceaed03e3853c754940e2290fed063/ollama-0.6.1-py3-none-any.whl", hash = "sha256:fc4c984b345735c5486faeee67d8a265214a31cbb828167782dc642ce0a2bf8c", size = 14354, upload-time = "2025-11-13T23:02:16.292Z" }, +] + [[package]] name = "pydantic" version = "2.12.4"