diff --git a/.env.example b/.env.example
new file mode 100644
index 000000000..104fbf7ba
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,152 @@
+# ================================================================================
+# LLM COUNCIL - Environment Configuration
+# ================================================================================
+#
+# SECURITY WARNING:
+# - Never commit the actual .env file to version control
+# - Keep your API keys secret and secure
+# - The .env file is already listed in .gitignore
+#
+# SETUP INSTRUCTIONS:
+# 1. Copy this file to .env: cp .env.example .env
+# 2. Fill in your actual values in the .env file
+# 3. Choose your provider mode and configure accordingly
+#
+# ================================================================================
+
+# --------------------------------------------------------------------------------
+# LLM PROVIDER MODE
+# --------------------------------------------------------------------------------
+# Determines which LLM provider(s) to use for the council
+#
+# Valid options:
+#   - openrouter: Use OpenRouter API for all models (cloud-based, requires API key)
+#   - ollama: Use local Ollama server for all models (self-hosted, free)
+#   - mixed: Use both providers with explicit prefixes per model
+#
+# Default: openrouter (for backward compatibility)
+# --------------------------------------------------------------------------------
+LLM_PROVIDER=openrouter
+
+
+# --------------------------------------------------------------------------------
+# OPENROUTER CONFIGURATION
+# --------------------------------------------------------------------------------
+# Required for: 'openrouter' and 'mixed' modes
+# Get your API key from: https://openrouter.ai/keys
+#
+# SECURITY: This is a secret key - never share it or commit it to git!
+# --------------------------------------------------------------------------------
+OPENROUTER_API_KEY=your_openrouter_api_key_here
+
+
+# --------------------------------------------------------------------------------
+# OLLAMA CONFIGURATION
+# --------------------------------------------------------------------------------
+# Required for: 'ollama' and 'mixed' modes
+# Default: http://localhost:11434 (standard Ollama installation)
+#
+# Change this if:
+#   - Running Ollama on a different port
+#   - Using a remote Ollama server
+#   - Using Docker with custom networking
+# --------------------------------------------------------------------------------
+OLLAMA_BASE_URL=http://localhost:11434
+
+
+# ================================================================================
+# CONFIGURATION EXAMPLES BY MODE
+# ================================================================================
+#
+# The council models and chairman are configured in backend/config.py, but here
+# are examples of how to set up each mode:
+#
+# --------------------------------------------------------------------------------
+# EXAMPLE 1: OpenRouter Mode (Cloud-based)
+# --------------------------------------------------------------------------------
+# LLM_PROVIDER=openrouter
+# OPENROUTER_API_KEY=sk-or-v1-your-actual-key-here
+#
+# In backend/config.py, use models like:
+#   COUNCIL_MODELS = [
+#       "openai/gpt-4",
+#       "openai/gpt-5.1",
+#       "google/gemini-3-pro-preview",
+#       "anthropic/claude-sonnet-4.5",
+#       "x-ai/grok-4"
+#   ]
+#   CHAIRMAN_MODEL = "google/gemini-3-pro-preview"
+#
+# Available OpenRouter models: https://openrouter.ai/models
+#
+# --------------------------------------------------------------------------------
+# EXAMPLE 2: Ollama Mode (Local/Self-hosted)
+# --------------------------------------------------------------------------------
+# LLM_PROVIDER=ollama
+# OLLAMA_BASE_URL=http://localhost:11434
+#
+# In backend/config.py, use models like:
+#   COUNCIL_MODELS = [
+#       "llama3.1:8b",
+#       "mistral:latest",
+#       "qwen2.5:3b",
+#       "phi3:latest"
+#   ]
+#   CHAIRMAN_MODEL = "llama3.1:8b"
+#
+# Note: You must have these models installed locally via:
+#   ollama pull llama3.1:8b
+#   ollama pull mistral:latest
+#   (etc.)
+#
+# Available Ollama models: https://ollama.ai/library
+#
+# --------------------------------------------------------------------------------
+# EXAMPLE 3: Mixed Mode (Hybrid Cloud + Local)
+# --------------------------------------------------------------------------------
+# LLM_PROVIDER=mixed
+# OPENROUTER_API_KEY=sk-or-v1-your-actual-key-here
+# OLLAMA_BASE_URL=http://localhost:11434
+#
+# In backend/config.py, prefix each model with provider:
+#   COUNCIL_MODELS = [
+#       "ollama:llama3.1:8b",           # Local model (fast, free)
+#       "ollama:mistral:latest",        # Local model (fast, free)
+#       "openrouter:google/gemini-2.5-flash-lite",  # Cloud model (paid)
+#       "openrouter:anthropic/claude-3.5-haiku"   # Cloud model (paid)
+#   ]
+#   CHAIRMAN_MODEL = "openrouter:google/gemini-2.5-flash-lite"
+#
+# Benefits of mixed mode:
+#   - Use free local models for bulk processing
+#   - Use premium cloud models for final synthesis
+#   - Optimize cost vs quality trade-offs
+#
+# ================================================================================
+
+
+# --------------------------------------------------------------------------------
+# ADDITIONAL NOTES
+# --------------------------------------------------------------------------------
+#
+# MODEL NAMING CONVENTIONS:
+#   - OpenRouter: Uses "provider/model-name" format (e.g., "openai/gpt-4")
+#   - Ollama: Uses "model-name:tag" format (e.g., "llama3.1:8b")
+#   - Mixed mode: Uses "provider:model-identifier" format
+#
+# COST CONSIDERATIONS:
+#   - OpenRouter charges per token (varies by model)
+#   - Ollama is free but requires local compute resources
+#   - Mixed mode allows cost optimization strategies
+#
+# PERFORMANCE:
+#   - OpenRouter: Fast API, no local setup required
+#   - Ollama: Speed depends on hardware, no network latency
+#   - Mixed mode: Balance based on your infrastructure
+#
+# PRIVACY:
+#   - OpenRouter: Data sent to third-party cloud services
+#   - Ollama: All processing happens locally (fully private)
+#   - Mixed mode: Be aware which models process sensitive data
+#
+# ================================================================================
diff --git a/.gitignore b/.gitignore
index 4c2041a54..156cf440f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -18,4 +18,11 @@ data/
 # Frontend
 frontend/node_modules/
 frontend/dist/
-frontend/.vite/
\ No newline at end of file
+frontend/.vite/
+
+#superclaude settings
+.claude
+.serena
+
+#backlog.md files
+backlog/
diff --git a/CLAUDE.md b/CLAUDE.md
index b803720fa..8d9c1e114 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -8,20 +8,71 @@ LLM Council is a 3-stage deliberation system where multiple LLMs collaboratively
 
 ## Architecture
 
+### Multi-Provider System
+
+The application now supports three LLM provider modes via a provider abstraction layer:
+
+1. **OpenRouter Mode** (default): All models use OpenRouter API (cloud-based, paid)
+2. **Ollama Mode**: All models use local Ollama server (self-hosted, free)
+3. **Mixed Mode**: Combine both providers with explicit prefixes per model
+
+**Key Design Decisions:**
+- **Backward Compatibility**: Existing OpenRouter configs work without changes
+- **Zero-Configuration Local**: Ollama mode requires no API keys
+- **Cost Optimization**: Mixed mode allows free local + premium cloud models
+- **Provider Abstraction**: Clean separation between provider logic and core council logic
+
 ### Backend Structure (`backend/`)
 
 **`config.py`**
-- Contains `COUNCIL_MODELS` (list of OpenRouter model identifiers)
+- Contains `LLM_PROVIDER` (determines provider mode: openrouter/ollama/mixed)
+- Contains `COUNCIL_MODELS` (list of model identifiers, format depends on mode)
 - Contains `CHAIRMAN_MODEL` (model that synthesizes final answer)
-- Uses environment variable `OPENROUTER_API_KEY` from `.env`
+- Adapts model configurations based on `LLM_PROVIDER` setting
+- Environment variables from `.env`:
+  - `OPENROUTER_API_KEY`: Required for openrouter/mixed modes
+  - `OLLAMA_BASE_URL`: Required for ollama/mixed modes (default: http://localhost:11434)
 - Backend runs on **port 8001** (NOT 8000 - user had another app on 8000)
 
-**`openrouter.py`**
-- `query_model()`: Single async model query
+**Model Specification Formats:**
+- **OpenRouter mode**: `"openai/gpt-4o"`, `"google/gemini-2.5-flash-lite"`
+- **Ollama mode**: `"llama3.1:8b"`, `"mistral:latest"`
+- **Mixed mode**: `"openrouter:openai/gpt-4o"`, `"ollama:llama3.1:8b"`
+
+**`providers/` directory** - Provider Abstraction Layer
+
+**`providers/base.py`**
+- `LLMProvider` abstract base class defining provider interface
+- All providers must implement: `query_model()` and `query_models_parallel()`
+- Ensures consistent API across different provider implementations
+
+**`providers/openrouter.py`**
+- `OpenRouterProvider`: Implementation for OpenRouter API
+- Handles OpenRouter-specific API format and authentication
+- Supports both standard and reasoning models (o1, etc.)
+- `query_model()`: Single async model query to OpenRouter
 - `query_models_parallel()`: Parallel queries using `asyncio.gather()`
 - Returns dict with 'content' and optional 'reasoning_details'
 - Graceful degradation: returns None on failure, continues with successful responses
 
+**`providers/ollama.py`**
+- `OllamaProvider`: Implementation for local Ollama server
+- Communicates with Ollama HTTP API (default: localhost:11434)
+- Supports streaming and non-streaming responses
+- `query_model()`: Single async model query to Ollama
+- `query_models_parallel()`: Parallel queries to local Ollama server
+- Same return format as OpenRouter for consistency
+- Error handling for common issues (server not running, model not found)
+
+**`providers/__init__.py`** - Factory and Routing Logic
+- `get_provider(provider_name)`: Factory function with singleton pattern
+- `parse_model_spec(model_spec)`: Parses model specifications into (provider, model_id)
+- `query_model(model_spec, messages)`: Routes single query to appropriate provider
+- `query_models_parallel(model_specs, messages)`: Intelligent parallel routing
+- Supports both simple mode (global provider) and mixed mode (per-model provider)
+- Groups queries by provider for efficient batch execution
+- All queries execute concurrently (both within and across providers)
+
 **`council.py`** - The Core Logic
 - `stage1_collect_responses()`: Parallel queries to all council models
 - `stage2_collect_rankings()`:
@@ -109,11 +160,224 @@ This strict format allows reliable parsing while still getting thoughtful evalua
 - Users can verify system's interpretation of model outputs
 - This builds trust and allows debugging of edge cases
 
+## Provider Abstraction Pattern
+
+### Architecture Pattern: Abstract Factory
+
+The multi-provider system uses the **Abstract Factory** and **Strategy** patterns:
+
+```
+LLMProvider (Abstract Base Class)
+    ↓
+    ├── OpenRouterProvider (Concrete Implementation)
+    ├── OllamaProvider (Concrete Implementation)
+    └── Future providers... (Extensible)
+
+Factory Function: get_provider(name) → LLMProvider instance
+Routing Layer: parse_model_spec() + query_model() + query_models_parallel()
+```
+
+**Benefits:**
+- **Encapsulation**: Provider-specific logic isolated in dedicated classes
+- **Extensibility**: Add new providers by implementing `LLMProvider` interface
+- **Consistency**: All providers return same response format
+- **Testability**: Mock providers easily for testing
+- **Performance**: Singleton pattern for provider instances
+
+### Model Specification Parsing
+
+The system intelligently routes models to providers:
+
+1. **Simple Mode** (openrouter or ollama):
+   - `"llama3.1:8b"` → Uses `LLM_PROVIDER` from config
+   - No prefix needed, provider determined globally
+
+2. **Mixed Mode**:
+   - `"ollama:llama3.1:8b"` → Explicitly routes to Ollama
+   - `"openrouter:openai/gpt-4"` → Explicitly routes to OpenRouter
+   - All models MUST have prefix in mixed mode
+
+3. **Parsing Logic**:
+   ```python
+   # Check for explicit prefix
+   if model_spec.startswith("ollama:"):
+       return ("ollama", model_spec[7:])
+   elif model_spec.startswith("openrouter:"):
+       return ("openrouter", model_spec[11:])
+
+   # Fall back to global provider (if not mixed mode)
+   return (LLM_PROVIDER, model_spec)
+   ```
+
+### Parallel Execution Strategy
+
+The routing layer optimizes parallel execution:
+
+1. **Input**: List of model specs (potentially mixed providers)
+2. **Grouping**: Group models by provider for batch execution
+3. **Parallel Provider Queries**: Each provider batch runs concurrently
+4. **Result Aggregation**: Map provider results back to original model specs
+5. **Output**: Dictionary of all results (including failures as None)
+
+**Example Flow:**
+```python
+# Input
+model_specs = [
+    "ollama:llama3.1:8b",
+    "ollama:mistral:latest",
+    "openrouter:openai/gpt-4",
+    "openrouter:anthropic/claude-3.5-sonnet"
+]
+
+# Grouping
+{
+    "ollama": ["llama3.1:8b", "mistral:latest"],
+    "openrouter": ["openai/gpt-4", "anthropic/claude-3.5-sonnet"]
+}
+
+# Execution (concurrent)
+await asyncio.gather(
+    ollama_provider.query_models_parallel([...]),  # 2 models
+    openrouter_provider.query_models_parallel([...])  # 2 models
+)
+
+# Result mapping back to original specs
+```
+
+### Error Handling Philosophy
+
+**Graceful Degradation at Every Layer:**
+
+1. **Provider Level**:
+   - Individual model failures return None
+   - Don't propagate exceptions to council logic
+   - Log errors but continue with successful responses
+
+2. **Routing Level**:
+   - Invalid model specs → None result
+   - Provider instantiation failures → None for affected models
+   - Parse errors → None result
+
+3. **Council Level**:
+   - Continue with successful responses from Stage 1
+   - Adapt Stage 2 ranking to available responses
+   - Stage 3 synthesis uses whatever data is available
+
+**Rationale**: Better to get partial results than complete failure. The council system is designed to aggregate multiple perspectives, so it's robust to individual model failures.
+
+## Migration Guide: Single Provider to Multi-Provider
+
+### For Existing Users (Minimal Changes)
+
+If you're already using OpenRouter, **no changes required**. The system defaults to `openrouter` mode for backward compatibility.
+
+**Optional**: Add `LLM_PROVIDER=openrouter` to `.env` for explicitness.
+
+### Migrating to Ollama (Local)
+
+1. Install Ollama (see README.md)
+2. Pull models:
+   ```bash
+   ollama pull llama3.1:8b
+   ollama pull mistral:latest
+   ollama pull qwen2.5:3b
+   ```
+3. Update `.env`:
+   ```bash
+   LLM_PROVIDER=ollama
+   OLLAMA_BASE_URL=http://localhost:11434
+   ```
+4. Update `backend/config.py`:
+   ```python
+   COUNCIL_MODELS = [
+       "llama3.1:8b",
+       "mistral:latest",
+       "qwen2.5:3b",
+   ]
+   CHAIRMAN_MODEL = "llama3.1:8b"
+   ```
+5. Restart backend
+
+### Migrating to Mixed Mode (Hybrid)
+
+1. Set up both OpenRouter and Ollama
+2. Update `.env`:
+   ```bash
+   LLM_PROVIDER=mixed
+   OPENROUTER_API_KEY=sk-or-v1-...
+   OLLAMA_BASE_URL=http://localhost:11434
+   ```
+3. Update `backend/config.py` with prefixes:
+   ```python
+   COUNCIL_MODELS = [
+       "ollama:llama3.1:8b",              # Local, free
+       "ollama:mistral:latest",           # Local, free
+       "openrouter:google/gemini-2.5-flash-lite",  # Cloud, paid
+       "openrouter:anthropic/claude-3.5-haiku",  # Cloud, paid
+   ]
+   CHAIRMAN_MODEL = "openrouter:google/gemini-2.5-flash-lite"
+   ```
+4. Restart backend
+
+### Adding a New Provider (For Developers)
+
+To add support for a new LLM provider (e.g., HuggingFace, Anthropic Direct, etc.):
+
+1. Create `backend/providers/your_provider.py`:
+   ```python
+   from backend.providers.base import LLMProvider
+
+   class YourProvider(LLMProvider):
+       def __init__(self, api_key: str):
+           self.api_key = api_key
+
+       async def query_model(self, model_id, messages, timeout=120.0):
+           # Implement API call to your provider
+           # Return {"content": str, "reasoning_details": dict or None}
+           # Return None on failure
+           pass
+
+       async def query_models_parallel(self, model_ids, messages):
+           # Implement parallel queries
+           # Return {model_id: result_dict} for all models
+           pass
+   ```
+
+2. Update `backend/providers/__init__.py`:
+   ```python
+   from backend.providers.your_provider import YourProvider
+
+   def get_provider(provider_name: str) -> LLMProvider:
+       if provider_name == "your_provider":
+           return YourProvider(api_key=YOUR_PROVIDER_API_KEY)
+       # ... existing code
+
+   def parse_model_spec(model_spec: str):
+       if model_spec.startswith("your_provider:"):
+           return ("your_provider", model_spec[14:])
+       # ... existing code
+   ```
+
+3. Update `backend/config.py`:
+   ```python
+   VALID_PROVIDERS = ["openrouter", "ollama", "mixed", "your_provider"]
+   YOUR_PROVIDER_API_KEY = os.getenv("YOUR_PROVIDER_API_KEY")
+   ```
+
+4. Test thoroughly with both simple and mixed modes
+
 ## Important Implementation Details
 
 ### Relative Imports
 All backend modules use relative imports (e.g., `from .config import ...`) not absolute imports. This is critical for Python's module system to work correctly when running as `python -m backend.main`.
 
+### Provider Imports in council.py
+The core council logic now imports from `backend.providers` instead of `backend.openrouter`:
+```python
+from backend.providers import query_model, query_models_parallel
+```
+This maintains backward compatibility while routing to the appropriate provider.
+
 ### Port Configuration
 - Backend: 8001 (changed from 8000 to avoid conflict)
 - Frontend: 5173 (Vite default)
diff --git a/README.md b/README.md
index 23599b3cf..0b7e9fed4 100644
--- a/README.md
+++ b/README.md
@@ -32,19 +32,30 @@ npm install
 cd ..
 ```
 
-### 2. Configure API Key
+### 2. Choose Your LLM Provider
 
-Create a `.env` file in the project root:
+LLM Council supports three provider modes:
 
-```bash
-OPENROUTER_API_KEY=sk-or-v1-...
-```
+- **OpenRouter** (cloud-based): Access to 100+ models via API, requires paid credits
+- **Ollama** (local): Run models on your own hardware, completely free and private
+- **Mixed** (hybrid): Combine both providers for cost/quality optimization
+
+### 3. Provider Configuration
+
+#### Option A: OpenRouter (Cloud-based)
 
-Get your API key at [openrouter.ai](https://openrouter.ai/). Make sure to purchase the credits you need, or sign up for automatic top up.
+**1. Get API Key**
 
-### 3. Configure Models (Optional)
+Sign up at [openrouter.ai](https://openrouter.ai/) and get your API key. Make sure to purchase credits or enable automatic top-up.
 
-Edit `backend/config.py` to customize the council:
+**2. Create `.env` file**
+
+```bash
+LLM_PROVIDER=openrouter
+OPENROUTER_API_KEY=sk-or-v1-your-actual-key-here
+```
+
+**3. Configure models in `backend/config.py`** (optional)
 
 ```python
 COUNCIL_MODELS = [
@@ -57,6 +68,97 @@ COUNCIL_MODELS = [
 CHAIRMAN_MODEL = "google/gemini-3-pro-preview"
 ```
 
+See available models at [openrouter.ai/models](https://openrouter.ai/models)
+
+#### Option B: Ollama (Local)
+
+**1. Install Ollama**
+
+```bash
+# Linux & Mac
+curl -fsSL https://ollama.com/install.sh | sh
+
+# Windows
+# Download from https://ollama.com/download
+```
+
+**2. Start Ollama service**
+
+```bash
+# Ollama typically starts automatically after installation
+# To verify it's running:
+curl http://localhost:11434/api/version
+```
+
+**3. Pull models**
+
+```bash
+# Download the models you want to use
+ollama pull llama3.1:latest
+ollama pull mistral:latest
+ollama pull qwen2.5:3b
+ollama pull phi3:latest
+```
+
+See available models at [ollama.com/library](https://ollama.com/library)
+
+**4. Create `.env` file**
+
+```bash
+LLM_PROVIDER=ollama
+OLLAMA_BASE_URL=http://localhost:11434
+```
+
+**5. Configure models in `backend/config.py`** (optional)
+
+```python
+COUNCIL_MODELS = [
+    "llama3.1:8b",
+    "mistral:latest",
+    "qwen2.5:3b",
+    "phi3:latest",
+]
+
+CHAIRMAN_MODEL = "llama3.1:8b"
+```
+
+#### Option C: Mixed (Hybrid)
+
+Combine local and cloud models for cost optimization.
+
+**1. Set up both providers**
+
+Follow installation steps for both OpenRouter and Ollama above.
+
+**2. Create `.env` file**
+
+```bash
+LLM_PROVIDER=mixed
+OPENROUTER_API_KEY=sk-or-v1-your-actual-key-here
+OLLAMA_BASE_URL=http://localhost:11434
+```
+
+**3. Configure models in `backend/config.py`** (optional)
+
+Prefix each model with its provider:
+
+```python
+COUNCIL_MODELS = [
+    "ollama:llama3.1:8b",              # Local, fast, free
+    "ollama:mistral:latest",           # Local, fast, free
+    "openrouter:google/gemini-2.5-flash-lite",  # Cloud, paid
+    "openrouter:anthropic/claude-3.5-haiku",  # Cloud, paid
+]
+
+CHAIRMAN_MODEL = "openrouter:google/gemini-2.5-flash-lite"
+```
+
+**Benefits of mixed mode:**
+- Use free local models for council deliberation
+- Use premium cloud models for final synthesis
+- Optimize cost vs quality trade-offs
+- Keep sensitive data local while using cloud for general queries
+
 ## Running the Application
 
 **Option 1: Use the start script**
@@ -81,7 +183,7 @@ Then open http://localhost:5173 in your browser.
 
 ## Tech Stack
 
-- **Backend:** FastAPI (Python 3.10+), async httpx, OpenRouter API
+- **Backend:** FastAPI (Python 3.10+), async httpx, multi-provider support (OpenRouter, Ollama)
 - **Frontend:** React + Vite, react-markdown for rendering
 - **Storage:** JSON files in `data/conversations/`
 - **Package Management:** uv for Python, npm for JavaScript
diff --git a/backend/config.py b/backend/config.py
index a9cf7c473..c202b4caa 100644
--- a/backend/config.py
+++ b/backend/config.py
@@ -1,23 +1,74 @@
-"""Configuration for the LLM Council."""
+"""Configuration for the LLM Council.
+
+Supports three provider modes:
+1. 'openrouter': All models use OpenRouter API (default)
+2. 'ollama': All models use local Ollama server
+3. 'mixed': Prefix each model with provider (e.g., 'ollama:llama3.1:8b', 'openrouter:google/gemini-2.5-flash-lite')
+"""
 
 import os
 from dotenv import load_dotenv
 
 load_dotenv()
 
-# OpenRouter API key
+# LLM Provider configuration
+# Valid values: 'openrouter', 'ollama', 'mixed'
+# Default: 'openrouter' for backward compatibility
+LLM_PROVIDER = os.getenv("LLM_PROVIDER", "openrouter")
+print(f"LLM_PROVIDER set to: {LLM_PROVIDER}")
+# Validate LLM_PROVIDER
+VALID_PROVIDERS = ["openrouter", "ollama", "mixed"]
+if LLM_PROVIDER not in VALID_PROVIDERS:
+    raise ValueError(
+        f"Invalid LLM_PROVIDER '{LLM_PROVIDER}'. Must be one of {VALID_PROVIDERS}"
+    )
+
+# OpenRouter API key (required for 'openrouter' and 'mixed' modes)
 OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
 
-# Council members - list of OpenRouter model identifiers
-COUNCIL_MODELS = [
-    "openai/gpt-5.1",
-    "google/gemini-3-pro-preview",
-    "anthropic/claude-sonnet-4.5",
-    "x-ai/grok-4",
-]
+# Ollama base URL (required for 'ollama' and 'mixed' modes)
+# Default: http://localhost:11434
+OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://localhost:11434")
+
+# Council members - adapts based on LLM_PROVIDER
+#
+# Examples:
+# - openrouter mode: ["openai/gpt-5.1", "google/gemini-3-pro-preview"]
+# - ollama mode: ["llama3.1:8b", "mistral:latest", "qwen2.5:3b"]
+# - mixed mode: ["ollama:llama3.1:8b", "openrouter:google/gemini-2.5-flash-lite"]
+if LLM_PROVIDER == "openrouter":
+    # Original OpenRouter configuration (backward compatible)
+    COUNCIL_MODELS = [
+        "openai/gpt-5.1",
+        "google/gemini-3-pro-preview",
+        "anthropic/claude-sonnet-4.5",
+        "x-ai/grok-4",
+    ]
+    # Original OpenRouter configuration (backward compatible)
+    CHAIRMAN_MODEL = "google/gemini-3-pro-preview"
+    TITLE_MODEL = "google/gemini-2.5-flash-lite"
+elif LLM_PROVIDER == "ollama":
+    # Local Ollama models
+    COUNCIL_MODELS = [
+        "mistral:latest",
+        "qwen2.5:3b",
+        "phi3:latest",
+    ]
+     # Local Ollama chairman
+    CHAIRMAN_MODEL = "mistral:latest"
+    TITLE_MODEL = "phi3:latest"
+else:  # mixed mode
+    # Mixed provider configuration with explicit prefixes
+    COUNCIL_MODELS = [
+        "ollama:phi3:latest",
+        "ollama:mistral:latest",
+        "openrouter:google/gemini-3-pro-preview",
+        "openrouter:anthropic/claude-3.5-haiku",
+    ]
+    # Mixed mode chairman (can use either provider)
+    CHAIRMAN_MODEL = "openrouter:google/gemini-3-pro-preview"
+    TITLE_MODEL = "openrouter:google/gemini-2.5-flash-lite"
 
-# Chairman model - synthesizes final response
-CHAIRMAN_MODEL = "google/gemini-3-pro-preview"
 
 # OpenRouter API endpoint
 OPENROUTER_API_URL = "https://openrouter.ai/api/v1/chat/completions"
diff --git a/backend/council.py b/backend/council.py
index 5069abec9..19b2c7d1c 100644
--- a/backend/council.py
+++ b/backend/council.py
@@ -1,8 +1,8 @@
 """3-stage LLM Council orchestration."""
 
 from typing import List, Dict, Any, Tuple
-from .openrouter import query_models_parallel, query_model
-from .config import COUNCIL_MODELS, CHAIRMAN_MODEL
+from .providers import query_models_parallel, query_model
+from .config import COUNCIL_MODELS, CHAIRMAN_MODEL, TITLE_MODEL
 
 
 async def stage1_collect_responses(user_query: str) -> List[Dict[str, Any]]:
@@ -95,6 +95,7 @@ async def stage2_collect_rankings(
     messages = [{"role": "user", "content": ranking_prompt}]
 
     # Get rankings from all council models in parallel
+    print(f"Stage 2: Querying council models for rankings...{COUNCIL_MODELS}")
     responses = await query_models_parallel(COUNCIL_MODELS, messages)
 
     # Format results
@@ -274,8 +275,8 @@ async def generate_conversation_title(user_query: str) -> str:
 
     messages = [{"role": "user", "content": title_prompt}]
 
-    # Use gemini-2.5-flash for title generation (fast and cheap)
-    response = await query_model("google/gemini-2.5-flash", messages, timeout=30.0)
+    # Use TITLE_MODEL for title generation (fast and cheap)
+    response = await query_model(TITLE_MODEL, messages, timeout=30.0)
 
     if response is None:
         # Fallback to a generic title
diff --git a/backend/providers/__init__.py b/backend/providers/__init__.py
new file mode 100644
index 000000000..be573e97d
--- /dev/null
+++ b/backend/providers/__init__.py
@@ -0,0 +1,286 @@
+"""LLM Provider abstractions for the council system.
+
+This module provides a factory pattern for provider instantiation and intelligent
+routing logic for both simple and mixed provider configurations.
+
+Key Functions:
+    - get_provider(provider_name): Get singleton provider instance
+    - parse_model_spec(model_spec): Parse model spec into (provider, model) tuple
+    - query_model(): Route single query to appropriate provider
+    - query_models_parallel(): Route parallel queries with cross-provider support
+"""
+
+import asyncio
+from typing import Any, Dict, List, Optional, Tuple
+
+from backend.config import LLM_PROVIDER, OPENROUTER_API_KEY, OLLAMA_BASE_URL
+from backend.providers.base import LLMProvider
+from backend.providers.ollama import OllamaProvider
+from backend.providers.openrouter import OpenRouterProvider
+
+
+# Singleton provider instances
+_provider_instances: Dict[str, LLMProvider] = {}
+
+
+def get_provider(provider_name: str) -> LLMProvider:
+    """Get a singleton instance of the specified provider.
+
+    Args:
+        provider_name: Provider name ("openrouter" or "ollama")
+
+    Returns:
+        LLMProvider instance for the specified provider
+
+    Raises:
+        ValueError: If provider_name is not "openrouter" or "ollama"
+
+    Example:
+        provider = get_provider("ollama")
+        result = await provider.query_model("llama3.1:8b", messages)
+    """
+    if provider_name not in ["openrouter", "ollama"]:
+        raise ValueError(
+            f"Invalid provider name '{provider_name}'. Must be 'openrouter' or 'ollama'"
+        )
+
+    # Return existing instance if already created
+    if provider_name in _provider_instances:
+        return _provider_instances[provider_name]
+
+    # Create new instance based on provider type
+    if provider_name == "openrouter":
+        if not OPENROUTER_API_KEY:
+            raise ValueError(
+                "OPENROUTER_API_KEY is required for openrouter provider. "
+                "Set it in your .env file."
+            )
+        _provider_instances[provider_name] = OpenRouterProvider(
+            api_key=OPENROUTER_API_KEY
+        )
+    else:  # ollama
+        _provider_instances[provider_name] = OllamaProvider(
+            base_url=OLLAMA_BASE_URL
+        )
+
+    return _provider_instances[provider_name]
+
+
+def parse_model_spec(model_spec: str) -> Tuple[str, str]:
+    """Parse a model specification into (provider, model) tuple.
+
+    Handles two formats:
+    1. Simple mode: "model_name" -> uses LLM_PROVIDER from config
+    2. Mixed mode: "provider:model_name" -> explicit provider
+
+    Args:
+        model_spec: Model specification string
+            - Simple: "llama3.1:8b" or "openai/gpt-4o"
+            - Mixed: "ollama:llama3.1:8b" or "openrouter:openai/gpt-4o"
+
+    Returns:
+        Tuple of (provider_name, model_identifier)
+            - provider_name: "openrouter" or "ollama"
+            - model_identifier: The model name to pass to the provider
+
+    Raises:
+        ValueError: If mixed mode format is invalid or provider is unknown
+
+    Examples:
+        # Simple mode (uses LLM_PROVIDER from config)
+        parse_model_spec("llama3.1:8b") -> ("ollama", "llama3.1:8b")
+        parse_model_spec("openai/gpt-4o") -> ("openrouter", "openai/gpt-4o")
+
+        # Mixed mode (explicit provider prefix)
+        parse_model_spec("ollama:llama3.1:8b") -> ("ollama", "llama3.1:8b")
+        parse_model_spec("openrouter:openai/gpt-4o") -> ("openrouter", "openai/gpt-4o")
+    """
+    # Check if this is mixed mode format (provider:model)
+    if model_spec.startswith("ollama:"):
+        return ("ollama", model_spec[7:])  # Remove "ollama:" prefix
+    elif model_spec.startswith("openrouter:"):
+        return ("openrouter", model_spec[11:])  # Remove "openrouter:" prefix
+
+    # Simple mode - use global LLM_PROVIDER setting
+    if LLM_PROVIDER == "mixed":
+        raise ValueError(
+            f"In mixed mode, model spec '{model_spec}' must include provider prefix "
+            "(e.g., 'ollama:llama3.1:8b' or 'openrouter:openai/gpt-4o')"
+        )
+
+    return (LLM_PROVIDER, model_spec)
+
+
+async def query_model(
+    model_spec: str,
+    messages: List[Dict[str, str]],
+    timeout: float = 120.0
+) -> Optional[Dict[str, Any]]:
+    """Query a single model, routing to the appropriate provider.
+
+    This is the main routing function for single model queries. It parses the
+    model specification to determine the provider, gets the provider instance,
+    and routes the query accordingly.
+
+    Args:
+        model_spec: Model specification string (see parse_model_spec for format)
+        messages: List of message dicts with 'role' and 'content' keys
+        timeout: Maximum time in seconds to wait for response (default: 120.0)
+
+    Returns:
+        Dictionary with 'content' and optional 'reasoning_details' on success,
+        None on any failure (provider error, network error, timeout, etc.)
+
+    Example:
+        # Simple mode
+        result = await query_model(
+            "llama3.1:8b",
+            [{"role": "user", "content": "Hello"}]
+        )
+
+        # Mixed mode
+        result = await query_model(
+            "openrouter:openai/gpt-4o",
+            [{"role": "user", "content": "Hello"}]
+        )
+
+    Notes:
+        - Maintains backward compatibility with original openrouter.py API
+        - Returns same format as original implementation
+        - Handles all errors gracefully, returns None on failure
+    """
+    try:
+        provider_name, model_id = parse_model_spec(model_spec)
+        provider = get_provider(provider_name)
+        return await provider.query_model(model_id, messages, timeout)
+    except ValueError as e:
+        print(f"Error parsing model spec '{model_spec}': {e}")
+        return None
+    except Exception as e:
+        print(f"Unexpected error querying model '{model_spec}': {e}")
+        return None
+
+
+async def query_models_parallel(
+    model_specs: List[str],
+    messages: List[Dict[str, str]]
+) -> Dict[str, Optional[Dict[str, Any]]]:
+    """Query multiple models in parallel, with cross-provider support.
+
+    This function intelligently routes queries to their respective providers and
+    executes them in parallel. For efficiency, it groups queries by provider to
+    leverage each provider's native parallel execution capabilities.
+
+    Args:
+        model_specs: List of model specification strings (see parse_model_spec)
+        messages: List of message dictionaries to send to all models
+
+    Returns:
+        Dictionary mapping model specifications to their responses.
+        Successful queries return the standard response dict.
+        Failed queries have None as their value.
+
+    Example:
+        # Mixed provider parallel queries
+        results = await query_models_parallel(
+            [
+                "ollama:llama3.1:8b",
+                "ollama:mistral:latest",
+                "openrouter:openai/gpt-4o",
+                "openrouter:anthropic/claude-3.5-sonnet"
+            ],
+            [{"role": "user", "content": "Hello"}]
+        )
+        # results = {
+        #     "ollama:llama3.1:8b": {"content": "Hi!", "reasoning_details": None},
+        #     "ollama:mistral:latest": None,  # Failed
+        #     "openrouter:openai/gpt-4o": {"content": "Hello!", "reasoning_details": {...}},
+        #     "openrouter:anthropic/claude-3.5-sonnet": {"content": "Hi!", "reasoning_details": None}
+        # }
+
+    Notes:
+        - Groups queries by provider for efficient execution
+        - All queries execute concurrently (both within and across providers)
+        - Individual model failures do not affect other queries
+        - Returns entry for every model in the input list
+        - Maintains backward compatibility with original openrouter.py API
+    """
+    # Group models by provider for efficient parallel execution
+    provider_groups: Dict[str, List[Tuple[str, str]]] = {}  # provider -> [(spec, model_id)]
+
+    for model_spec in model_specs:
+        try:
+            provider_name, model_id = parse_model_spec(model_spec)
+            if provider_name not in provider_groups:
+                provider_groups[provider_name] = []
+            provider_groups[provider_name].append((model_spec, model_id))
+        except ValueError as e:
+            print(f"Error parsing model spec '{model_spec}': {e}")
+            # Add None entry for invalid specs
+            provider_groups.setdefault("_invalid", []).append((model_spec, None))
+
+    # Execute queries grouped by provider in parallel
+    all_results = {}
+
+    # Create tasks for each provider group
+    provider_tasks = []
+    for provider_name, specs_and_models in provider_groups.items():
+        if provider_name == "_invalid":
+            # Handle invalid specs - add None entries
+            for model_spec, _ in specs_and_models:
+                all_results[model_spec] = None
+            continue
+
+        # Get provider instance
+        try:
+            provider = get_provider(provider_name)
+        except ValueError as e:
+            print(f"Error getting provider '{provider_name}': {e}")
+            # Mark all models for this provider as failed
+            for model_spec, _ in specs_and_models:
+                all_results[model_spec] = None
+            continue
+
+        # Extract just the model IDs for this provider
+        model_ids = [model_id for _, model_id in specs_and_models]
+
+        # Create task for this provider's batch query
+        async def query_provider_batch(prov, model_ids_list, specs_list):
+            """Helper to query a provider's models and map back to original specs."""
+            results = await prov.query_models_parallel(model_ids_list, messages)
+            # Map back from model_id to original model_spec
+            return {
+                spec: results[model_id]
+                for spec, model_id in specs_list
+            }
+
+        provider_tasks.append(
+            query_provider_batch(provider, model_ids, specs_and_models)
+        )
+
+    # Wait for all provider batches to complete
+    if provider_tasks:
+        provider_results = await asyncio.gather(*provider_tasks)
+
+        # Merge all results
+        for result_dict in provider_results:
+            all_results.update(result_dict)
+
+    # Ensure we have an entry for every input model spec
+    for model_spec in model_specs:
+        if model_spec not in all_results:
+            all_results[model_spec] = None
+
+    return all_results
+
+
+# Export public API
+__all__ = [
+    'LLMProvider',
+    'OllamaProvider',
+    'OpenRouterProvider',
+    'get_provider',
+    'parse_model_spec',
+    'query_model',
+    'query_models_parallel',
+]
diff --git a/backend/providers/base.py b/backend/providers/base.py
new file mode 100644
index 000000000..967c52d68
--- /dev/null
+++ b/backend/providers/base.py
@@ -0,0 +1,109 @@
+"""Abstract base class for LLM providers.
+
+This module defines the interface that all LLM provider implementations must follow.
+"""
+
+from abc import ABC, abstractmethod
+from typing import Any, Dict, List, Optional
+
+
+class LLMProvider(ABC):
+    """Abstract base class for LLM providers.
+
+    This class defines the interface contract that all LLM provider implementations
+    must follow. Providers are responsible for communicating with their respective
+    LLM APIs and handling errors gracefully.
+
+    Interface Contract:
+        All methods should handle errors internally and return None on failure
+        rather than raising exceptions. This allows the system to continue
+        operating even when individual providers or models fail.
+
+    Return Format:
+        All query methods must return a dictionary with the following structure:
+        {
+            "content": str,  # The main response text from the model
+            "reasoning_details": Optional[Any]  # Optional reasoning trace or metadata
+        }
+
+        Returns None if the query fails for any reason (network error, API error,
+        timeout, invalid model, etc.).
+
+    Error Handling:
+        Implementations should:
+        - Catch all exceptions internally
+        - Log errors appropriately
+        - Return None on any failure
+        - Never raise exceptions to callers
+        - Implement appropriate timeout handling
+    """
+
+    @abstractmethod
+    async def query_model(
+        self,
+        model: str,
+        messages: List[Dict[str, str]],
+        timeout: float = 120.0
+    ) -> Optional[Dict[str, Any]]:
+        """Query a single model with the given messages.
+
+        Args:
+            model: Model identifier (format depends on provider implementation)
+            messages: List of message dictionaries with 'role' and 'content' keys
+                     following the standard chat completion format:
+                     [{"role": "user", "content": "..."},
+                      {"role": "assistant", "content": "..."}, ...]
+            timeout: Maximum time in seconds to wait for response (default: 120.0)
+
+        Returns:
+            Dictionary with 'content' and optional 'reasoning_details' on success,
+            None on any failure (timeout, network error, API error, etc.)
+
+        Example:
+            result = await provider.query_model(
+                "gpt-4",
+                [{"role": "user", "content": "Hello"}]
+            )
+            if result:
+                print(result["content"])
+        """
+        pass
+
+    @abstractmethod
+    async def query_models_parallel(
+        self,
+        models: List[str],
+        messages: List[Dict[str, str]]
+    ) -> Dict[str, Optional[Dict[str, Any]]]:
+        """Query multiple models in parallel with the same messages.
+
+        This method should execute all queries concurrently to minimize total
+        latency. Each model query is independent and failures should not affect
+        other queries.
+
+        Args:
+            models: List of model identifiers to query
+            messages: List of message dictionaries to send to all models
+                     (same format as query_model)
+
+        Returns:
+            Dictionary mapping model identifiers to their responses.
+            Successful queries return the standard response dict.
+            Failed queries have None as their value.
+
+        Example:
+            results = await provider.query_models_parallel(
+                ["gpt-4", "claude-3"],
+                [{"role": "user", "content": "Hello"}]
+            )
+            # results = {
+            #     "gpt-4": {"content": "Hi there!", "reasoning_details": None},
+            #     "claude-3": None  # This query failed
+            # }
+
+        Notes:
+            - All queries execute concurrently using asyncio.gather or similar
+            - Individual model failures should not cause the entire operation to fail
+            - The returned dict should contain an entry for every model in the input list
+        """
+        pass
diff --git a/backend/providers/ollama.py b/backend/providers/ollama.py
new file mode 100644
index 000000000..8f2fad443
--- /dev/null
+++ b/backend/providers/ollama.py
@@ -0,0 +1,185 @@
+"""Ollama provider for local LLM instances.
+
+This module implements the LLM provider interface for Ollama, which runs
+models locally. Ollama exposes an OpenAI-compatible API endpoint, making
+integration straightforward.
+
+Key Features:
+    - Uses OpenAI-compatible /v1/chat/completions endpoint
+    - No authentication required (local instance)
+    - Supports Ollama model tags (e.g., llama3.1:8b, mistral:latest)
+    - Graceful handling of connection errors and missing models
+    - Parallel query support via asyncio
+"""
+
+import asyncio
+import logging
+from typing import Any, Dict, List, Optional
+
+import httpx
+
+from .base import LLMProvider
+
+# Configure logging
+logger = logging.getLogger(__name__)
+
+
+class OllamaProvider(LLMProvider):
+    """Provider implementation for local Ollama instances.
+
+    Ollama is a tool for running LLMs locally. It exposes an OpenAI-compatible
+    API endpoint, which this provider uses for communication.
+
+    Attributes:
+        base_url: Base URL for the Ollama instance (default: http://localhost:11434)
+
+    Example:
+        provider = OllamaProvider()
+        result = await provider.query_model(
+            "llama3.1:8b",
+            [{"role": "user", "content": "Hello"}]
+        )
+
+    Common Error Scenarios:
+        - Connection refused: Ollama service not running
+        - 404 Not Found: Model not pulled/available locally
+        - Timeout: Model loading or generation taking too long
+        - Network errors: Local network issues
+    """
+
+    def __init__(self, base_url: str = 'http://localhost:11434'):
+        """Initialize the Ollama provider.
+
+        Args:
+            base_url: Base URL for the Ollama instance. Should not include
+                     trailing slash. Default is http://localhost:11434
+        """
+        self.base_url = base_url.rstrip('/')
+        self.api_endpoint = f"{self.base_url}/v1/chat/completions"
+
+    async def query_model(
+        self,
+        model: str,
+        messages: List[Dict[str, str]],
+        timeout: float = 120.0
+    ) -> Optional[Dict[str, Any]]:
+        """Query a single Ollama model with the given messages.
+
+        This method sends a request to the local Ollama instance using the
+        OpenAI-compatible chat completions endpoint.
+
+        Args:
+            model: Ollama model tag (e.g., "llama3.1:8b", "mistral:latest")
+            messages: List of message dictionaries with 'role' and 'content' keys
+            timeout: Maximum time in seconds to wait for response (default: 120.0)
+
+        Returns:
+            Dictionary with 'content' and 'reasoning_details' on success,
+            None on any failure.
+
+        Error Handling:
+            - Connection errors → None (logs warning about Ollama not running)
+            - 404 errors → None (logs warning about model not found)
+            - Timeouts → None (logs warning about timeout)
+            - Any other errors → None (logs error details)
+        """
+        headers = {
+            "Content-Type": "application/json",
+        }
+
+        payload = {
+            "model": model,
+            "messages": messages,
+        }
+
+        try:
+            async with httpx.AsyncClient(timeout=timeout) as client:
+                response = await client.post(
+                    self.api_endpoint,
+                    headers=headers,
+                    json=payload
+                )
+                response.raise_for_status()
+
+                data = response.json()
+                message = data['choices'][0]['message']
+
+                return {
+                    'content': message.get('content'),
+                    'reasoning_details': None  # Ollama doesn't provide reasoning details
+                }
+
+        except httpx.ConnectError as e:
+            logger.warning(
+                f"Failed to connect to Ollama at {self.base_url}. "
+                f"Is Ollama running? Error: {e}"
+            )
+            return None
+
+        except httpx.HTTPStatusError as e:
+            if e.response.status_code == 404:
+                logger.warning(
+                    f"Model '{model}' not found in Ollama. "
+                    f"Pull it with: ollama pull {model}"
+                )
+            else:
+                logger.error(
+                    f"HTTP error querying Ollama model {model}: "
+                    f"Status {e.response.status_code}, {e}"
+                )
+            return None
+
+        except httpx.TimeoutException as e:
+            logger.warning(
+                f"Timeout querying Ollama model {model} after {timeout}s. "
+                f"Model may be loading or generation is slow. Error: {e}"
+            )
+            return None
+
+        except Exception as e:
+            logger.error(f"Unexpected error querying Ollama model {model}: {e}")
+            return None
+
+    async def query_models_parallel(
+        self,
+        models: List[str],
+        messages: List[Dict[str, str]]
+    ) -> Dict[str, Optional[Dict[str, Any]]]:
+        """Query multiple Ollama models in parallel with the same messages.
+
+        This method executes all queries concurrently to minimize total latency.
+        Each model query is independent, and failures do not affect other queries.
+
+        Args:
+            models: List of Ollama model tags to query
+            messages: List of message dictionaries to send to all models
+
+        Returns:
+            Dictionary mapping model tags to their responses.
+            Successful queries return the standard response dict.
+            Failed queries have None as their value.
+
+        Example:
+            results = await provider.query_models_parallel(
+                ["llama3.1:8b", "mistral:latest"],
+                [{"role": "user", "content": "Hello"}]
+            )
+            # results = {
+            #     "llama3.1:8b": {"content": "Hi!", "reasoning_details": None},
+            #     "mistral:latest": None  # This query failed
+            # }
+
+        Notes:
+            - All queries execute concurrently using asyncio.gather
+            - Individual model failures do not cause the entire operation to fail
+            - The returned dict contains an entry for every model in the input list
+        """
+        # Create tasks for all models
+        tasks = [self.query_model(model, messages) for model in models]
+
+        # Wait for all to complete (return_exceptions=False means gather will
+        # not raise, but our query_model already catches all exceptions)
+        responses = await asyncio.gather(*tasks)
+
+        # Map models to their responses
+        return {model: response for model, response in zip(models, responses)}
diff --git a/backend/providers/openrouter.py b/backend/providers/openrouter.py
new file mode 100644
index 000000000..24146f703
--- /dev/null
+++ b/backend/providers/openrouter.py
@@ -0,0 +1,125 @@
+"""OpenRouter LLM provider implementation.
+
+This module implements the LLMProvider interface for OpenRouter API,
+preserving the exact behavior of the original openrouter.py implementation.
+"""
+
+import asyncio
+import httpx
+from typing import Any, Dict, List, Optional
+
+from backend.providers.base import LLMProvider
+
+
+class OpenRouterProvider(LLMProvider):
+    """OpenRouter API provider implementation.
+
+    This provider communicates with OpenRouter's API to query various LLM models.
+    It implements graceful degradation - failed requests return None without raising
+    exceptions, allowing the system to continue with successful responses.
+
+    Args:
+        api_key: OpenRouter API key for authentication
+        api_url: OpenRouter API endpoint URL (default: https://openrouter.ai/api/v1/chat/completions)
+
+    Example:
+        provider = OpenRouterProvider(api_key="sk-...")
+        result = await provider.query_model(
+            "openai/gpt-4o",
+            [{"role": "user", "content": "Hello"}]
+        )
+    """
+
+    def __init__(
+        self,
+        api_key: str,
+        api_url: str = "https://openrouter.ai/api/v1/chat/completions"
+    ):
+        """Initialize the OpenRouter provider.
+
+        Args:
+            api_key: OpenRouter API key for authentication
+            api_url: OpenRouter API endpoint URL (default: https://openrouter.ai/api/v1/chat/completions)
+        """
+        self.api_key = api_key
+        self.api_url = api_url
+
+    async def query_model(
+        self,
+        model: str,
+        messages: List[Dict[str, str]],
+        timeout: float = 120.0
+    ) -> Optional[Dict[str, Any]]:
+        """Query a single model via OpenRouter API.
+
+        Args:
+            model: OpenRouter model identifier (e.g., "openai/gpt-4o")
+            messages: List of message dicts with 'role' and 'content'
+            timeout: Request timeout in seconds (default: 120.0)
+
+        Returns:
+            Response dict with 'content' and optional 'reasoning_details', or None if failed
+
+        Notes:
+            - Handles all errors internally, returns None on failure
+            - Preserves exact behavior from original implementation
+            - Prints error messages to console for debugging
+        """
+        headers = {
+            "Authorization": f"Bearer {self.api_key}",
+            "Content-Type": "application/json",
+        }
+
+        payload = {
+            "model": model,
+            "messages": messages,
+        }
+
+        try:
+            async with httpx.AsyncClient(timeout=timeout) as client:
+                response = await client.post(
+                    self.api_url,
+                    headers=headers,
+                    json=payload
+                )
+                response.raise_for_status()
+
+                data = response.json()
+                message = data['choices'][0]['message']
+
+                return {
+                    'content': message.get('content'),
+                    'reasoning_details': message.get('reasoning_details')
+                }
+
+        except Exception as e:
+            print(f"Error querying model {model}: {e}")
+            return None
+
+    async def query_models_parallel(
+        self,
+        models: List[str],
+        messages: List[Dict[str, str]]
+    ) -> Dict[str, Optional[Dict[str, Any]]]:
+        """Query multiple models in parallel.
+
+        Args:
+            models: List of OpenRouter model identifiers
+            messages: List of message dicts to send to each model
+
+        Returns:
+            Dict mapping model identifier to response dict (or None if failed)
+
+        Notes:
+            - Uses asyncio.gather for concurrent execution
+            - Individual model failures don't affect other queries
+            - Returns entry for every model in input list
+        """
+        # Create tasks for all models
+        tasks = [self.query_model(model, messages) for model in models]
+
+        # Wait for all to complete
+        responses = await asyncio.gather(*tasks)
+
+        # Map models to their responses
+        return {model: response for model, response in zip(models, responses)}
diff --git a/frontend/package-lock.json b/frontend/package-lock.json
index a6a7c3430..adc26cc05 100644
--- a/frontend/package-lock.json
+++ b/frontend/package-lock.json
@@ -2485,9 +2485,10 @@
       }
     },
     "node_modules/mdast-util-to-hast": {
-      "version": "13.2.0",
-      "resolved": "https://registry.npmjs.org/mdast-util-to-hast/-/mdast-util-to-hast-13.2.0.tgz",
-      "integrity": "sha512-QGYKEuUsYT9ykKBCMOEDLsU5JRObWQusAolFMeko/tYPufNkRffBAQjIE+99jbA87xv6FgmjLtwjh9wBWajwAA==",
+      "version": "13.2.1",
+      "resolved": "https://registry.npmjs.org/mdast-util-to-hast/-/mdast-util-to-hast-13.2.1.tgz",
+      "integrity": "sha512-cctsq2wp5vTsLIcaymblUriiTcZd0CwWtCbLvrOzYCDZoWyMNV8sZ7krj09FSnsiJi3WVsHLM4k6Dq/yaPyCXA==",
+      "license": "MIT",
       "dependencies": {
         "@types/hast": "^3.0.0",
         "@types/mdast": "^4.0.0",