diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 0000000..d258d2a --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,55 @@ +name: Tests + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + backend-tests: + runs-on: ubuntu-latest + defaults: + run: + working-directory: backend + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Install uv + uses: astral-sh/setup-uv@v4 + + - name: Install dependencies + run: uv sync --dev + + - name: Run tests + run: uv run python -m pytest tests/ -v --tb=short + env: + LLM_API_KEY: "test-key" + + frontend-build: + runs-on: ubuntu-latest + defaults: + run: + working-directory: frontend + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Node.js + uses: actions/setup-node@v4 + with: + node-version: "20" + + - name: Install dependencies + run: npm ci + + - name: Build + run: npm run build diff --git a/.gitignore b/.gitignore index 5d1e9ac..b582a97 100644 --- a/.gitignore +++ b/.gitignore @@ -2,7 +2,7 @@ .DS_Store Thumbs.db -# 环境变量(保护敏感信息) +# Environment variables .env .env.local .env.*.local @@ -36,7 +36,7 @@ yarn-error.log* *.swp *.swo -# 测试 +# Test .pytest_cache/ .coverage htmlcov/ @@ -45,17 +45,20 @@ htmlcov/ .cursor/ .claude/ -# 文档与测试程序 +# Documentation and test programs mydoc/ mytest/ -# 日志文件 +# Log files backend/logs/ *.log -# 上传文件 +# Uploads backend/uploads/ -# Docker 数据 +# SQLite data +backend/data/ + +# Docker data/backend/venv311/ backend/venv311/ diff --git a/DESIGN.md b/DESIGN.md new file mode 100644 index 0000000..6c17d48 --- /dev/null +++ b/DESIGN.md @@ -0,0 +1,118 @@ +# MiroFish Design Tokens + +Extracted from `PredictionView.vue`. All new views and components must follow these tokens. + +--- + +## Typography + +| Role | Family | Variable | +|-------------|---------------------------------------------------|------------| +| Monospace | `'JetBrains Mono', 'SF Mono', monospace` | `--mono` | +| Sans-serif | `'Space Grotesk', 'Noto Sans SC', system-ui, sans-serif` | `--sans` | + +### Sizes & Weights + +| Element | Size | Weight | Letter-spacing | +|-------------------|---------|--------|----------------| +| Page title | 28px | 700 | -0.02em | +| Section heading | 16px | 600 | -0.01em | +| Body text | 14px | 400 | normal | +| Small / caption | 12px | 500 | 0.02em | +| Badge label | 11px | 600 | 0.05em | +| Mono data | 13-14px | 500 | normal | + +--- + +## Colors + +| Token | Hex | Usage | +|--------------------|-----------|--------------------------------| +| `--text-primary` | `#000` | Nav, headings, primary text | +| `--orange` | `#FF4500` | Accent, links, active states | +| `--green` | `#10B981` | Success, BUY signal, positive | +| `--red` | `#dc2626` | Error, SELL signal, negative | +| `--amber` | `#F59E0B` | Warning, MEDIUM tier | +| `--amber-bg` | `#FFFBEB` | Amber background fill | +| `--border` | `#EAEAEA` | Panel borders, dividers | +| `--bg-subtle` | `#FAFAFA` | Subtle background fills | +| `--text-secondary` | `#666` | Secondary labels | +| `--text-muted` | `#999` | Muted / tertiary text | + +--- + +## Spacing + +| Token | Value | Usage | +|-------------|---------|----------------------------| +| Max-width | 1400px | Page container | +| Padding | 40px | Container horizontal pad | +| Grid gap | 30px | Between panel columns | + +--- + +## Components + +### Panels +- Border: `1px solid var(--border)` +- Border-radius: **0** (no rounded corners) +- Background: `#fff` +- No box-shadow + +### Badges +- Uppercase text, `11px` font, `600` weight, `0.05em` letter-spacing +- Padding: `4px 10px` +- Border: `1px solid` (color matches text) +- No border-radius + +### Skeleton Loaders +- Background: `var(--bg-subtle)` +- Shimmer animation (left-to-right sweep) +- Match the dimensions of the content they replace + +### Empty States +- Centered text, muted color (`var(--text-muted)`) +- Optional icon above text + +### Progress Bars +- Track: `var(--bg-subtle)` +- Fill: `var(--orange)` or signal color +- Height: 4-6px +- No border-radius + +--- + +## Anti-patterns + +Do **not** use: +- Rounded corners (`border-radius`) +- Box shadows (`box-shadow`) +- Gradient fills (`linear-gradient`, `radial-gradient`) + +--- + +## CSS Variables Reference + +```css +:root { + --mono: 'JetBrains Mono', 'SF Mono', monospace; + --sans: 'Space Grotesk', 'Noto Sans SC', system-ui, sans-serif; + --orange: #FF4500; + --green: #10B981; + --red: #dc2626; + --border: #EAEAEA; + --bg-subtle: #FAFAFA; + --text-primary: #000; + --text-secondary: #666; + --text-muted: #999; +} +``` + +--- + +## Responsive Breakpoints + +| Breakpoint | Target | Notes | +|------------|---------|-----------------------------------| +| 1024px | Tablet | Stack grid to single column | +| 768px | Mobile | Reduce padding, smaller type | diff --git a/README.md b/README.md index a457030..d4422d7 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,8 @@ The [original MiroFish](https://github.com/666ghj/MiroFish) was built for the Ch 3. **Simulation** — Agents interact on simulated social platforms: posting, replying, arguing, shifting opinions. The system tracks sentiment evolution, topic propagation, and influence dynamics in real time. 4. **Report** — A ReportAgent analyzes the post-simulation environment, interviews a focus group of agents, searches the knowledge graph for evidence, and generates a structured analysis. 5. **Interaction** — Chat with any agent from the simulated world. Ask them why they posted what they posted. Full memory and personality persists. +6. **Prediction Markets** — Browse live Polymarket markets, run a multi-agent debate simulation, and generate calibrated trading signals (BUY_YES / BUY_NO / HOLD) with edge and confidence scores. +7. **Backtesting** — Validate signal quality against resolved markets. Computes accuracy, Brier score, ROI, Sharpe ratio, max drawdown, and calibration RMSE. Paper trading mode simulates execution with slippage. ## Screenshot @@ -137,12 +139,15 @@ This fork introduces a clean abstraction layer between the application and the g ┌─────────────────────────────────────────┐ │ Flask API │ │ graph.py simulation.py report.py │ +│ prediction.py backtest.py │ └──────────────┬──────────────────────────┘ │ app.extensions['neo4j_storage'] ┌──────────────▼──────────────────────────┐ │ Service Layer │ │ EntityReader GraphToolsService │ │ GraphMemoryUpdater ReportAgent │ +│ PredictionManager Backtester │ +│ Calibrator PaperTrader │ └──────────────┬──────────────────────────┘ │ storage: GraphStorage ┌──────────────▼──────────────────────────┐ @@ -171,6 +176,8 @@ This fork introduces a clean abstraction layer between the application and the g - Hybrid search: 0.7 × vector similarity + 0.3 × BM25 keyword search - Synchronous NER/RE extraction via local LLM (replaces Zep's async episodes) - All original dataclasses and LLM tools (InsightForge, Panorama, Agent Interviews) preserved +- Prediction pipeline: market → scenario → LLM debate → calibrated signal (60-90s per market) +- SQLite (WAL mode) for backtest results, paper trading positions, calibration models ## Hardware Requirements @@ -203,3 +210,5 @@ This is a modified fork of [MiroFish](https://github.com/666ghj/MiroFish) by [66 - Entire frontend translated from Chinese to English (20 files, 1,000+ strings) - All Zep references replaced with Neo4j across the UI - Rebranded to MiroFish Offline +- Prediction market signal engine (Polymarket integration, LLM debate simulation) +- Backtesting + paper trading system with SQLite storage and 62-test suite diff --git a/ROADMAP.md b/ROADMAP.md index 0d33a19..dd8c352 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -1,8 +1,8 @@ # MiroFish-Offline Roadmap -## Current State (v0.2.0) +## Current State (v0.2.0+) -Fully local fork running on Neo4j CE + Ollama. All Zep Cloud dependencies removed. Core pipeline works: upload text → build knowledge graph → entity extraction → simulation → report generation. +Fully local fork running on Neo4j CE + Ollama. All Zep Cloud dependencies removed. Core pipeline works: upload text → build knowledge graph → entity extraction → simulation → report generation. Prediction market signal engine with backtesting, paper trading, and SQLite storage. 62-test suite. --- @@ -52,7 +52,7 @@ Fully local fork running on Neo4j CE + Ollama. All Zep Cloud dependencies remove - [ ] Authentication & multi-user support - [ ] Graph versioning: snapshot and restore graph states - [ ] Plugin system for custom NER extractors, search strategies, and report templates -- [ ] Comprehensive test suite (unit + integration + E2E) +- [x] Comprehensive test suite — 62 tests (unit + integration) for prediction/backtest system - [ ] Performance benchmarks: document throughput (texts/min) and latency per hardware tier - [ ] Helm chart for Kubernetes deployment diff --git a/TODOS.md b/TODOS.md new file mode 100644 index 0000000..8ddcf20 --- /dev/null +++ b/TODOS.md @@ -0,0 +1,12 @@ +# MiroFish TODOs + +## Backlog + +(empty) + +## Completed + +- [x] ~~**P2** JSON to SQLite migration for historical prediction runs~~ — prediction_runs table + SQLitePredictionStore + migration script +- [x] ~~**P2** CI/CD pipeline via GitHub Actions~~ — .github/workflows/tests.yml (backend pytest + frontend build) +- [x] ~~**P2** SQLite disk-full error handling~~ — StorageError + _safe_write wrapper on all write methods +- [x] ~~**P2** Extract shared CSS components before Phase 2~~ — Phase 2 shipped without this; CSS components added inline per view diff --git a/backend/app/__init__.py b/backend/app/__init__.py index e584f2f..13d9561 100644 --- a/backend/app/__init__.py +++ b/backend/app/__init__.py @@ -1,12 +1,11 @@ """ -MiroFish Backend - Flask Application Factory +MiroFish Backend — Flask application factory """ import os import warnings -# Suppress multiprocessing resource_tracker warnings (from third-party libraries like transformers) -# Must be set before all other imports +# Suppress multiprocessing resource_tracker warnings from third-party libs warnings.filterwarnings("ignore", message=".*resource_tracker.*") from flask import Flask, request @@ -17,19 +16,17 @@ def create_app(config_class=Config): - """Flask application factory function""" + """Flask application factory""" app = Flask(__name__) app.config.from_object(config_class) - # Configure JSON encoding: ensure Chinese displays directly (not as \uXXXX) - # Flask >= 2.3 uses app.json.ensure_ascii, older versions use JSON_AS_ASCII config + # JSON encoding: display CJK characters directly (not \uXXXX) if hasattr(app, 'json') and hasattr(app.json, 'ensure_ascii'): app.json.ensure_ascii = False - # Setup logging logger = setup_logger('mirofish') - # Only print startup info in reloader subprocess (avoid printing twice in debug mode) + # Only log startup info in the reloader child process (avoid duplicate logs in debug mode) is_reloader_process = os.environ.get('WERKZEUG_RUN_MAIN') == 'true' debug_mode = app.config.get('DEBUG', False) should_log_startup = not debug_mode or is_reloader_process @@ -54,11 +51,22 @@ def create_app(config_class=Config): # Store None so endpoints can return 503 gracefully app.extensions['neo4j_storage'] = None - # Register simulation process cleanup function (ensure all simulation processes terminate on server shutdown) + # --- Initialize SQLite storage --- + from .storage.sqlite_store import SQLiteStore + try: + sqlite_store = SQLiteStore(Config.SQLITE_DB_PATH) + app.extensions['sqlite'] = sqlite_store + if should_log_startup: + logger.info("SQLiteStore initialized (%s)", Config.SQLITE_DB_PATH) + except Exception as e: + logger.error("SQLiteStore initialization failed: %s", e) + app.extensions['sqlite'] = None + + # Register simulation process cleanup from .services.simulation_runner import SimulationRunner SimulationRunner.register_cleanup() if should_log_startup: - logger.info("Simulation process cleanup function registered") + logger.info("Simulation process cleanup registered") # Request logging middleware @app.before_request @@ -66,7 +74,7 @@ def log_request(): logger = get_logger('mirofish.request') logger.debug(f"Request: {request.method} {request.path}") if request.content_type and 'json' in request.content_type: - logger.debug(f"Request body: {request.get_json(silent=True)}") + logger.debug(f"Body: {request.get_json(silent=True)}") @app.after_request def log_response(response): @@ -75,10 +83,12 @@ def log_response(response): return response # Register blueprints - from .api import graph_bp, simulation_bp, report_bp + from .api import graph_bp, simulation_bp, report_bp, prediction_bp, backtest_bp app.register_blueprint(graph_bp, url_prefix='/api/graph') app.register_blueprint(simulation_bp, url_prefix='/api/simulation') app.register_blueprint(report_bp, url_prefix='/api/report') + app.register_blueprint(prediction_bp, url_prefix='/api/prediction') + app.register_blueprint(backtest_bp, url_prefix='/api/backtest') # Health check @app.route('/health') @@ -86,7 +96,6 @@ def health(): return {'status': 'ok', 'service': 'MiroFish-Offline Backend'} if should_log_startup: - logger.info("MiroFish-Offline Backend startup complete") + logger.info("MiroFish-Offline Backend started") return app - diff --git a/backend/app/api/__init__.py b/backend/app/api/__init__.py index 1e5fc76..a851348 100644 --- a/backend/app/api/__init__.py +++ b/backend/app/api/__init__.py @@ -1,5 +1,5 @@ """ -API Routes Module +API route modules """ from flask import Blueprint @@ -7,8 +7,11 @@ graph_bp = Blueprint('graph', __name__) simulation_bp = Blueprint('simulation', __name__) report_bp = Blueprint('report', __name__) +prediction_bp = Blueprint('prediction', __name__) +backtest_bp = Blueprint('backtest', __name__) from . import graph # noqa: E402, F401 from . import simulation # noqa: E402, F401 from . import report # noqa: E402, F401 - +from . import prediction # noqa: E402, F401 +from . import backtest # noqa: E402, F401 diff --git a/backend/app/api/backtest.py b/backend/app/api/backtest.py new file mode 100644 index 0000000..5dfe13a --- /dev/null +++ b/backend/app/api/backtest.py @@ -0,0 +1,143 @@ +""" +Backtest API routes +""" + +import threading +from flask import request, jsonify, current_app + +from . import backtest_bp +from ..models.backtest import BacktestRun +from ..services.backtester import Backtester +from ..utils.logger import get_logger + +logger = get_logger('mirofish.api.backtest') + +# Track running backtests to prevent concurrent starts +_running_backtests = {} +_lock = threading.Lock() + +MAX_MARKETS = 500 + + +@backtest_bp.route('/run', methods=['POST']) +def start_backtest(): + """ + Start a backtest run. + + Request JSON: + { + "num_markets": 50, + "config_overrides": {} (optional) + } + """ + try: + store = current_app.extensions.get('sqlite') + if store is None: + return jsonify({"success": False, "error": "SQLite store not initialized"}), 503 + + data = request.get_json() or {} + num_markets = data.get('num_markets', 50) + config_overrides = data.get('config_overrides', {}) + + # Validate input + if not isinstance(num_markets, int) or num_markets < 1: + return jsonify({"success": False, "error": "num_markets must be a positive integer"}), 400 + num_markets = min(num_markets, MAX_MARKETS) + + # DB-level guard: works across processes (gunicorn) + active_id = store.has_active_backtest() + if active_id: + return jsonify({ + "success": False, + "error": "A backtest is already running", + "active_run_id": active_id, + }), 409 + + with _lock: + + # Create run + register thread atomically inside the lock + bt_run = BacktestRun( + config=config_overrides, + total_markets=num_markets, + ) + store.save_backtest_run(bt_run) + + backtester = Backtester(store) + + def run_backtest(): + try: + backtester.run( + num_markets=num_markets, + config_overrides=config_overrides, + bt_run=bt_run, + ) + except Exception as e: + logger.error(f"Backtest thread failed: {e}", exc_info=True) + finally: + with _lock: + _running_backtests.pop(bt_run.id, None) + + thread = threading.Thread(target=run_backtest, daemon=True) + _running_backtests[bt_run.id] = thread + thread.start() + + return jsonify({ + "success": True, + "data": { + "run_id": bt_run.id, + "status": "started", + "message": f"Backtest started with {num_markets} markets", + }, + }) + + except Exception as e: + logger.error(f"Failed to start backtest: {e}", exc_info=True) + return jsonify({"success": False, "error": str(e)}), 500 + + +@backtest_bp.route('/run/', methods=['GET']) +def get_backtest_run(run_id: str): + """Get backtest run status, results, and metrics.""" + try: + store = current_app.extensions.get('sqlite') + if store is None: + return jsonify({"success": False, "error": "SQLite store not initialized"}), 503 + + bt_run = store.get_backtest_run(run_id) + if not bt_run: + return jsonify({"success": False, "error": f"Run not found: {run_id}"}), 404 + + results = store.get_results_by_run(run_id) + + return jsonify({ + "success": True, + "data": { + **bt_run.to_dict(), + "results": [r.to_dict() for r in results], + }, + }) + + except Exception as e: + logger.error(f"Failed to get backtest run: {e}") + return jsonify({"success": False, "error": str(e)}), 500 + + +@backtest_bp.route('/runs', methods=['GET']) +def list_backtest_runs(): + """List all backtest runs.""" + try: + store = current_app.extensions.get('sqlite') + if store is None: + return jsonify({"success": False, "error": "SQLite store not initialized"}), 503 + + runs = store.list_backtest_runs() + + return jsonify({ + "success": True, + "data": [r.to_dict() for r in runs], + "count": len(runs), + }) + + except Exception as e: + logger.error(f"Failed to list backtest runs: {e}") + return jsonify({"success": False, "error": str(e)}), 500 diff --git a/backend/app/api/prediction.py b/backend/app/api/prediction.py new file mode 100644 index 0000000..aeba8a4 --- /dev/null +++ b/backend/app/api/prediction.py @@ -0,0 +1,276 @@ +""" +Prediction Market API routes +""" + +import traceback +import threading +from flask import request, jsonify, current_app + +from . import prediction_bp +from ..config import Config +from ..models.prediction import PredictionMarket, PredictionRunManager, PredictionRunStatus +from ..services.polymarket_client import PolymarketClient +from ..services.prediction_manager import PredictionManager +from ..storage.prediction_store import SQLitePredictionStore +from ..models.task import TaskManager, TaskStatus +from ..utils.logger import get_logger + +logger = get_logger('mirofish.api.prediction') + + +def _get_pred_store(): + """Get the prediction store — SQLite if available, JSON fallback.""" + sqlite_store = current_app.extensions.get('sqlite') + if sqlite_store: + return SQLitePredictionStore(sqlite_store) + return PredictionRunManager + + +def _find_run(run_id: str): + """Find a prediction run in SQLite first, then JSON fallback.""" + store = _get_pred_store() + if isinstance(store, SQLitePredictionStore): + run = store.get_run(run_id) + if run: + return run + # Fall back to JSON for pre-migration runs + return PredictionRunManager.get_run(run_id) + return store.get_run(run_id) + + +# ============== Market Browsing ============== + +@prediction_bp.route('/markets', methods=['GET']) +def get_markets(): + """ + Fetch active markets from Polymarket. + + Query params: + min_volume: Minimum volume filter (default 10000) + limit: Max results (default 50) + search: Search query (optional) + """ + try: + min_volume = request.args.get('min_volume', 10000, type=float) + limit = request.args.get('limit', 50, type=int) + search = request.args.get('search', None) + + client = PolymarketClient() + markets = client.fetch_active_markets( + min_volume=min_volume, + limit=limit, + search=search, + ) + + return jsonify({ + "success": True, + "data": [m.to_dict() for m in markets], + "count": len(markets), + }) + + except Exception as e: + logger.error(f"Failed to fetch markets: {e}") + return jsonify({ + "success": False, + "error": str(e), + }), 500 + + +# ============== Prediction Runs ============== + +@prediction_bp.route('/run', methods=['POST']) +def start_prediction_run(): + """ + Start a prediction run for a market. + + Request JSON: + { + "market": { ... PredictionMarket dict ... } + } + + Returns run_id + task_id for polling. + """ + try: + data = request.get_json() or {} + market_data = data.get('market') + + if not market_data: + return jsonify({"success": False, "error": "market data required"}), 400 + + market = PredictionMarket.from_dict(market_data) + + if not market.title: + return jsonify({"success": False, "error": "market must have a title"}), 400 + + # Capture store in request context (before thread starts) + sqlite_store = current_app.extensions.get('sqlite') + + # Create run — use SQLite store if available, fall back to JSON files + if sqlite_store: + pred_store = SQLitePredictionStore(sqlite_store) + run = pred_store.create_run() + else: + pred_store = PredictionRunManager + run = PredictionRunManager.create_run() + + # Create async task + task_manager = TaskManager() + task_id = task_manager.create_task( + task_type="prediction_run", + metadata={"run_id": run.run_id, "market_title": market.title}, + ) + + def run_pipeline(): + try: + task_manager.update_task( + task_id, + status=TaskStatus.PROCESSING, + progress=0, + message="Starting prediction pipeline...", + ) + + def progress_callback(stage, message): + stage_progress = { + "fetching_market": 5, + "generating_scenario": 25, + "running_simulation": 60, + "analyzing": 85, + "completed": 100, + } + progress = stage_progress.get(stage, 50) + task_manager.update_task( + task_id, + progress=progress, + message=message, + ) + + manager = PredictionManager(result_store=pred_store, sqlite_store=sqlite_store) + result = manager.run_prediction( + market=market, + run=run, + progress_callback=progress_callback, + ) + + if result.status == PredictionRunStatus.COMPLETED: + task_manager.complete_task(task_id, result={ + "run_id": result.run_id, + "status": "completed", + "signal": result.signal, + }) + else: + task_manager.fail_task(task_id, result.error or "Pipeline failed") + + except Exception as e: + logger.error(f"Prediction pipeline failed: {e}", exc_info=True) + task_manager.fail_task(task_id, str(e)) + + thread = threading.Thread(target=run_pipeline, daemon=True) + thread.start() + + return jsonify({ + "success": True, + "data": { + "run_id": run.run_id, + "task_id": task_id, + "status": "started", + "message": "Prediction pipeline started", + }, + }) + + except Exception as e: + logger.error(f"Failed to start prediction run: {e}") + return jsonify({ + "success": False, + "error": str(e), + "traceback": traceback.format_exc(), + }), 500 + + +@prediction_bp.route('/run//status', methods=['GET']) +def get_run_status(run_id: str): + """Get prediction run status""" + try: + run = _find_run(run_id) + if not run: + return jsonify({"success": False, "error": f"Run not found: {run_id}"}), 404 + + return jsonify({ + "success": True, + "data": { + "run_id": run.run_id, + "status": run.status.value, + "progress_message": run.progress_message, + "error": run.error, + }, + }) + + except Exception as e: + logger.error(f"Failed to get run status: {e}") + return jsonify({"success": False, "error": str(e)}), 500 + + +@prediction_bp.route('/run/', methods=['GET']) +def get_run(run_id: str): + """Get full prediction run details""" + try: + run = _find_run(run_id) + if not run: + return jsonify({"success": False, "error": f"Run not found: {run_id}"}), 404 + + return jsonify({ + "success": True, + "data": run.to_dict(), + }) + + except Exception as e: + logger.error(f"Failed to get run: {e}") + return jsonify({"success": False, "error": str(e)}), 500 + + +@prediction_bp.route('/runs', methods=['GET']) +def list_runs(): + """List all prediction runs""" + try: + limit = request.args.get('limit', 50, type=int) + store = _get_pred_store() + if isinstance(store, SQLitePredictionStore): + runs = store.list_runs(limit=limit) + # Also include any pre-migration JSON runs not yet in SQLite + json_runs = PredictionRunManager.list_runs(limit=limit) + sqlite_ids = {r.run_id for r in runs} + for jr in json_runs: + if jr.run_id not in sqlite_ids: + runs.append(jr) + runs.sort(key=lambda r: r.created_at, reverse=True) + runs = runs[:limit] + else: + runs = store.list_runs(limit=limit) + + return jsonify({ + "success": True, + "data": [r.to_dict() for r in runs], + "count": len(runs), + }) + + except Exception as e: + logger.error(f"Failed to list runs: {e}") + return jsonify({"success": False, "error": str(e)}), 500 + + +@prediction_bp.route('/run/', methods=['DELETE']) +def delete_run(run_id: str): + """Delete a prediction run""" + try: + store = _get_pred_store() + if isinstance(store, SQLitePredictionStore): + success = store.delete_run(run_id) or PredictionRunManager.delete_run(run_id) + else: + success = PredictionRunManager.delete_run(run_id) + if not success: + return jsonify({"success": False, "error": f"Run not found: {run_id}"}), 404 + + return jsonify({"success": True, "message": f"Run deleted: {run_id}"}) + + except Exception as e: + logger.error(f"Failed to delete run: {e}") + return jsonify({"success": False, "error": str(e)}), 500 diff --git a/backend/app/config.py b/backend/app/config.py index de706ca..a309216 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -1,60 +1,60 @@ """ -Configuration Management -Loads configuration from .env file in project root directory +Configuration management +Loads config from project root .env file """ import os from dotenv import load_dotenv -# Load .env file from project root +# Load .env from project root # Path: MiroFish/.env (relative to backend/app/config.py) project_root_env = os.path.join(os.path.dirname(__file__), '../../.env') if os.path.exists(project_root_env): load_dotenv(project_root_env, override=True) else: - # If no .env in root, try to load environment variables (for production) + # Fall back to environment variables (for production) load_dotenv(override=True) class Config: - """Flask configuration class""" + """Flask configuration""" - # Flask configuration + # Flask SECRET_KEY = os.environ.get('SECRET_KEY', 'mirofish-secret-key') DEBUG = os.environ.get('FLASK_DEBUG', 'True').lower() == 'true' - # JSON configuration - disable ASCII escaping to display Chinese directly (not as \uXXXX) + # JSON — disable ASCII escaping so CJK characters display directly JSON_AS_ASCII = False - # LLM configuration (unified OpenAI format) + # LLM (unified OpenAI format) LLM_API_KEY = os.environ.get('LLM_API_KEY') LLM_BASE_URL = os.environ.get('LLM_BASE_URL', 'http://localhost:11434/v1') LLM_MODEL_NAME = os.environ.get('LLM_MODEL_NAME', 'qwen2.5:32b') - # Neo4j configuration + # Neo4j graph database NEO4J_URI = os.environ.get('NEO4J_URI', 'bolt://localhost:7687') NEO4J_USER = os.environ.get('NEO4J_USER', 'neo4j') NEO4J_PASSWORD = os.environ.get('NEO4J_PASSWORD', 'mirofish') - # Embedding configuration + # Embedding EMBEDDING_MODEL = os.environ.get('EMBEDDING_MODEL', 'nomic-embed-text') EMBEDDING_BASE_URL = os.environ.get('EMBEDDING_BASE_URL', 'http://localhost:11434') - # File upload configuration + # File upload MAX_CONTENT_LENGTH = 50 * 1024 * 1024 # 50MB UPLOAD_FOLDER = os.path.join(os.path.dirname(__file__), '../uploads') ALLOWED_EXTENSIONS = {'pdf', 'md', 'txt', 'markdown'} - # Text processing configuration - DEFAULT_CHUNK_SIZE = 500 # Default chunk size - DEFAULT_CHUNK_OVERLAP = 50 # Default overlap size + # Text processing + DEFAULT_CHUNK_SIZE = 500 + DEFAULT_CHUNK_OVERLAP = 50 - # OASIS simulation configuration + # OASIS simulation OASIS_DEFAULT_MAX_ROUNDS = int(os.environ.get('OASIS_DEFAULT_MAX_ROUNDS', '10')) OASIS_SIMULATION_DATA_DIR = os.path.join(os.path.dirname(__file__), '../uploads/simulations') - # OASIS platform available actions configuration + # OASIS platform actions OASIS_TWITTER_ACTIONS = [ 'CREATE_POST', 'LIKE_POST', 'REPOST', 'FOLLOW', 'DO_NOTHING', 'QUOTE_POST' ] @@ -64,11 +64,40 @@ class Config: 'TREND', 'REFRESH', 'DO_NOTHING', 'FOLLOW', 'MUTE' ] - # Report Agent configuration + # Report Agent REPORT_AGENT_MAX_TOOL_CALLS = int(os.environ.get('REPORT_AGENT_MAX_TOOL_CALLS', '5')) REPORT_AGENT_MAX_REFLECTION_ROUNDS = int(os.environ.get('REPORT_AGENT_MAX_REFLECTION_ROUNDS', '2')) REPORT_AGENT_TEMPERATURE = float(os.environ.get('REPORT_AGENT_TEMPERATURE', '0.5')) + # Prediction Market + POLYMARKET_GAMMA_URL = os.environ.get('POLYMARKET_GAMMA_URL', 'https://gamma-api.polymarket.com') + PREDICTION_DEFAULT_AGENTS = int(os.environ.get('PREDICTION_DEFAULT_AGENTS', '50')) + PREDICTION_DEFAULT_ROUNDS = int(os.environ.get('PREDICTION_DEFAULT_ROUNDS', '2')) + PREDICTION_SIGNAL_THRESHOLD = float(os.environ.get('PREDICTION_SIGNAL_THRESHOLD', '0.10')) + PREDICTION_TRADE_ENABLED = os.environ.get('PREDICTION_TRADE_ENABLED', 'false').lower() == 'true' + PREDICTION_DATA_DIR = os.path.join(os.path.dirname(__file__), '../uploads/predictions') + + # Simulation LLM override — OASIS/camel-ai needs OpenAI-compatible API + SIMULATION_LLM_API_KEY = os.environ.get('SIMULATION_LLM_API_KEY', '') + SIMULATION_LLM_BASE_URL = os.environ.get('SIMULATION_LLM_BASE_URL', '') + SIMULATION_LLM_MODEL = os.environ.get('SIMULATION_LLM_MODEL', '') + + # Signal calibration parameters + CALIBRATION_MARKET_REGRESSION = float(os.environ.get('CALIBRATION_MARKET_REGRESSION', '0.30')) + CALIBRATION_DATE_DAMPENING_DAYS = int(os.environ.get('CALIBRATION_DATE_DAMPENING_DAYS', '14')) + CALIBRATION_HIGH_EDGE_THRESHOLD = float(os.environ.get('CALIBRATION_HIGH_EDGE_THRESHOLD', '0.25')) + CALIBRATION_HIGH_EDGE_MAX_REDUCTION = float(os.environ.get('CALIBRATION_HIGH_EDGE_MAX_REDUCTION', '0.40')) + CALIBRATION_SHORT_DATE_PENALTY = float(os.environ.get('CALIBRATION_SHORT_DATE_PENALTY', '0.20')) + + # SQLite storage + SQLITE_DB_PATH = os.environ.get( + 'SQLITE_DB_PATH', + os.path.join(os.path.dirname(__file__), '../data/mirofish.db') + ) + + # Paper trading + PAPER_TRADING_MODE = os.environ.get('PAPER_TRADING_MODE', 'true').lower() == 'true' + @classmethod def validate(cls): """Validate required configuration""" diff --git a/backend/app/models/backtest.py b/backend/app/models/backtest.py new file mode 100644 index 0000000..87eb447 --- /dev/null +++ b/backend/app/models/backtest.py @@ -0,0 +1,163 @@ +""" +Backtest data models for historical prediction evaluation. + +Schema: + BacktestRun — one full backtest execution across N markets + BacktestResult — per-market outcome within a run + BacktestMetrics — aggregate statistics for a completed run +""" + +import uuid +from dataclasses import dataclass, field +from datetime import datetime +from enum import Enum +from typing import Any, Dict, List, Optional + + +class BacktestRunStatus(str, Enum): + PENDING = "PENDING" + RUNNING = "RUNNING" + COMPUTING_METRICS = "COMPUTING_METRICS" + COMPLETED = "COMPLETED" + FAILED = "FAILED" + + +@dataclass +class BacktestRun: + """One full backtest execution across N markets.""" + + id: str = field(default_factory=lambda: f"bt_{uuid.uuid4().hex[:12]}") + started_at: str = field(default_factory=lambda: datetime.now().isoformat()) + config: Dict[str, Any] = field(default_factory=dict) + status: str = BacktestRunStatus.PENDING.value + metrics: Optional[Dict[str, Any]] = None + total_markets: int = 0 + completed_markets: int = 0 + failed_markets: int = 0 + + def to_dict(self) -> Dict[str, Any]: + return { + "id": self.id, + "started_at": self.started_at, + "config": self.config, + "status": self.status, + "metrics": self.metrics, + "total_markets": self.total_markets, + "completed_markets": self.completed_markets, + "failed_markets": self.failed_markets, + } + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "BacktestRun": + return cls( + id=data["id"], + started_at=data.get("started_at", ""), + config=data.get("config", {}), + status=data.get("status", BacktestRunStatus.PENDING.value), + metrics=data.get("metrics"), + total_markets=data.get("total_markets", 0), + completed_markets=data.get("completed_markets", 0), + failed_markets=data.get("failed_markets", 0), + ) + + +@dataclass +class BacktestResult: + """Per-market outcome within a backtest run.""" + + id: str = field(default_factory=lambda: f"btr_{uuid.uuid4().hex[:12]}") + run_id: str = "" + market_id: str = "" + market_title: str = "" + predicted_prob: float = 0.0 + market_prob: float = 0.0 + actual_outcome: Optional[str] = None + signal_direction: str = "HOLD" + edge: float = 0.0 + brier_score: Optional[float] = None + correct: Optional[int] = None # 0 or 1 + category: Optional[str] = None + confidence_tier: Optional[str] = None # HIGH, MEDIUM, LOW + + def to_dict(self) -> Dict[str, Any]: + return { + "id": self.id, + "run_id": self.run_id, + "market_id": self.market_id, + "market_title": self.market_title, + "predicted_prob": self.predicted_prob, + "market_prob": self.market_prob, + "actual_outcome": self.actual_outcome, + "signal_direction": self.signal_direction, + "edge": self.edge, + "brier_score": self.brier_score, + "correct": self.correct, + "category": self.category, + "confidence_tier": self.confidence_tier, + } + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "BacktestResult": + return cls( + id=data["id"], + run_id=data.get("run_id", ""), + market_id=data.get("market_id", ""), + market_title=data.get("market_title", ""), + predicted_prob=data.get("predicted_prob", 0.0), + market_prob=data.get("market_prob", 0.0), + actual_outcome=data.get("actual_outcome"), + signal_direction=data.get("signal_direction", "HOLD"), + edge=data.get("edge", 0.0), + brier_score=data.get("brier_score"), + correct=data.get("correct"), + category=data.get("category"), + confidence_tier=data.get("confidence_tier"), + ) + + +@dataclass +class BacktestMetrics: + """Aggregate statistics for a completed backtest run.""" + + accuracy: float = 0.0 + brier_score: float = 0.0 + roi: float = 0.0 + sharpe_ratio: float = 0.0 + max_drawdown: float = 0.0 + calibration_rmse: float = 0.0 + markets_tested: int = 0 + avg_edge: float = 0.0 + category_metrics: Optional[Dict[str, Any]] = None + confidence_tier_metrics: Optional[Dict[str, Any]] = None + + def to_dict(self) -> Dict[str, Any]: + d = { + "accuracy": round(self.accuracy, 4), + "brier_score": round(self.brier_score, 4), + "roi": round(self.roi, 4), + "sharpe_ratio": round(self.sharpe_ratio, 4), + "max_drawdown": round(self.max_drawdown, 4), + "calibration_rmse": round(self.calibration_rmse, 4), + "markets_tested": self.markets_tested, + "avg_edge": round(self.avg_edge, 4), + } + if self.category_metrics is not None: + d["category_metrics"] = self.category_metrics + if self.confidence_tier_metrics is not None: + d["confidence_tier_metrics"] = self.confidence_tier_metrics + return d + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "BacktestMetrics": + return cls( + accuracy=data.get("accuracy", 0.0), + brier_score=data.get("brier_score", 0.0), + roi=data.get("roi", 0.0), + sharpe_ratio=data.get("sharpe_ratio", 0.0), + max_drawdown=data.get("max_drawdown", 0.0), + calibration_rmse=data.get("calibration_rmse", 0.0), + markets_tested=data.get("markets_tested", 0), + avg_edge=data.get("avg_edge", 0.0), + category_metrics=data.get("category_metrics"), + confidence_tier_metrics=data.get("confidence_tier_metrics"), + ) diff --git a/backend/app/models/position.py b/backend/app/models/position.py new file mode 100644 index 0000000..f1eb241 --- /dev/null +++ b/backend/app/models/position.py @@ -0,0 +1,99 @@ +""" +Paper trading models for simulated order execution and position tracking. + +Schema: + PaperOrder — a simulated order placed against a prediction market + PaperPosition — the resulting position from a filled order +""" + +import uuid +from dataclasses import dataclass, field +from datetime import datetime +from enum import Enum +from typing import Any, Dict, Optional + + +class PositionStatus(str, Enum): + OPEN = "OPEN" + CLOSED = "CLOSED" + + +@dataclass +class PaperOrder: + """A simulated order placed against a prediction market.""" + + id: str = field(default_factory=lambda: f"ord_{uuid.uuid4().hex[:12]}") + market_id: str = "" + signal_id: str = "" + side: str = "" # BUY_YES, BUY_NO + outcome: str = "" + size: float = 0.0 + fill_price: float = 0.0 + slippage: float = 0.0 + created_at: str = field(default_factory=lambda: datetime.now().isoformat()) + + def to_dict(self) -> Dict[str, Any]: + return { + "id": self.id, + "market_id": self.market_id, + "signal_id": self.signal_id, + "side": self.side, + "outcome": self.outcome, + "size": self.size, + "fill_price": self.fill_price, + "slippage": self.slippage, + "created_at": self.created_at, + } + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "PaperOrder": + return cls( + id=data["id"], + market_id=data.get("market_id", ""), + signal_id=data.get("signal_id", ""), + side=data.get("side", ""), + outcome=data.get("outcome", ""), + size=data.get("size", 0.0), + fill_price=data.get("fill_price", 0.0), + slippage=data.get("slippage", 0.0), + created_at=data.get("created_at", ""), + ) + + +@dataclass +class PaperPosition: + """The resulting position from a filled paper order.""" + + id: str = field(default_factory=lambda: f"pos_{uuid.uuid4().hex[:12]}") + order_id: str = "" + market_id: str = "" + outcome: str = "" + entry_price: float = 0.0 + cost_basis: float = 0.0 + status: str = PositionStatus.OPEN.value + resolved_pnl: Optional[float] = None + + def to_dict(self) -> Dict[str, Any]: + return { + "id": self.id, + "order_id": self.order_id, + "market_id": self.market_id, + "outcome": self.outcome, + "entry_price": self.entry_price, + "cost_basis": self.cost_basis, + "status": self.status, + "resolved_pnl": self.resolved_pnl, + } + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "PaperPosition": + return cls( + id=data["id"], + order_id=data.get("order_id", ""), + market_id=data.get("market_id", ""), + outcome=data.get("outcome", ""), + entry_price=data.get("entry_price", 0.0), + cost_basis=data.get("cost_basis", 0.0), + status=data.get("status", PositionStatus.OPEN.value), + resolved_pnl=data.get("resolved_pnl"), + ) diff --git a/backend/app/models/prediction.py b/backend/app/models/prediction.py new file mode 100644 index 0000000..6c207ad --- /dev/null +++ b/backend/app/models/prediction.py @@ -0,0 +1,281 @@ +""" +Prediction Market data models and persistence +""" + +import os +import json +import uuid +from datetime import datetime +from typing import Dict, Any, List, Optional +from enum import Enum +from dataclasses import dataclass, field + +from ..config import Config + + +class PredictionRunStatus(str, Enum): + FETCHING_MARKET = "fetching_market" + GENERATING_SCENARIO = "generating_scenario" + RUNNING_SIMULATION = "running_simulation" + ANALYZING = "analyzing" + COMPLETED = "completed" + FAILED = "failed" + + +@dataclass +class PredictionMarket: + """Polymarket market data""" + condition_id: str + title: str + slug: str + description: str + outcomes: List[str] + prices: List[float] + volume: float + liquidity: float + end_date: str + active: bool = True + actual_outcome: Optional[str] = None + + def to_dict(self) -> Dict[str, Any]: + d = { + "condition_id": self.condition_id, + "title": self.title, + "slug": self.slug, + "description": self.description, + "outcomes": self.outcomes, + "prices": self.prices, + "volume": self.volume, + "liquidity": self.liquidity, + "end_date": self.end_date, + "active": self.active, + } + if self.actual_outcome is not None: + d["actual_outcome"] = self.actual_outcome + return d + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> 'PredictionMarket': + return cls( + condition_id=data.get('condition_id', ''), + title=data.get('title', ''), + slug=data.get('slug', ''), + description=data.get('description', ''), + outcomes=data.get('outcomes', []), + prices=data.get('prices', []), + volume=data.get('volume', 0), + liquidity=data.get('liquidity', 0), + end_date=data.get('end_date', ''), + active=data.get('active', True), + actual_outcome=data.get('actual_outcome'), + ) + + +@dataclass +class TradingSignal: + """Trading signal from prediction analysis""" + direction: str # BUY_YES, BUY_NO, HOLD + edge: float # simulated_prob - market_prob (signed) + confidence: float # 0-1 + reasoning: str + simulated_probability: float + market_probability: float + category: Optional[str] = None + confidence_tier: Optional[str] = None # HIGH, MEDIUM, LOW + + def to_dict(self) -> Dict[str, Any]: + d = { + "direction": self.direction, + "edge": round(self.edge, 4), + "confidence": round(self.confidence, 4), + "reasoning": self.reasoning, + "simulated_probability": round(self.simulated_probability, 4), + "market_probability": round(self.market_probability, 4), + } + if self.category is not None: + d["category"] = self.category + if self.confidence_tier is not None: + d["confidence_tier"] = self.confidence_tier + return d + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> 'TradingSignal': + return cls( + direction=data['direction'], + edge=data['edge'], + confidence=data['confidence'], + reasoning=data['reasoning'], + simulated_probability=data['simulated_probability'], + market_probability=data['market_probability'], + category=data.get('category'), + confidence_tier=data.get('confidence_tier'), + ) + + +@dataclass +class SentimentResult: + """Result from sentiment analysis of simulation""" + simulated_probability: float + confidence: float + stance_counts: Dict[str, int] # {for: N, against: N, neutral: N} + key_arguments_for: List[str] + key_arguments_against: List[str] + total_posts_analyzed: int + + def to_dict(self) -> Dict[str, Any]: + return { + "simulated_probability": round(self.simulated_probability, 4), + "confidence": round(self.confidence, 4), + "stance_counts": self.stance_counts, + "key_arguments_for": self.key_arguments_for, + "key_arguments_against": self.key_arguments_against, + "total_posts_analyzed": self.total_posts_analyzed, + } + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> 'SentimentResult': + return cls( + simulated_probability=data['simulated_probability'], + confidence=data['confidence'], + stance_counts=data['stance_counts'], + key_arguments_for=data['key_arguments_for'], + key_arguments_against=data['key_arguments_against'], + total_posts_analyzed=data['total_posts_analyzed'], + ) + + +@dataclass +class PredictionRun: + """Full prediction run state""" + run_id: str + status: PredictionRunStatus + created_at: str + updated_at: str + + # Market info + market: Optional[Dict[str, Any]] = None + + # Pipeline IDs + project_id: Optional[str] = None + graph_id: Optional[str] = None + simulation_id: Optional[str] = None + + # Scenario + scenario: Optional[Dict[str, Any]] = None + + # Results + sentiment: Optional[Dict[str, Any]] = None + signal: Optional[Dict[str, Any]] = None + + # Error + error: Optional[str] = None + progress_message: str = "" + + def to_dict(self) -> Dict[str, Any]: + return { + "run_id": self.run_id, + "status": self.status.value if isinstance(self.status, PredictionRunStatus) else self.status, + "created_at": self.created_at, + "updated_at": self.updated_at, + "market": self.market, + "project_id": self.project_id, + "graph_id": self.graph_id, + "simulation_id": self.simulation_id, + "scenario": self.scenario, + "sentiment": self.sentiment, + "signal": self.signal, + "error": self.error, + "progress_message": self.progress_message, + } + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> 'PredictionRun': + status = data.get('status', 'fetching_market') + if isinstance(status, str): + status = PredictionRunStatus(status) + return cls( + run_id=data['run_id'], + status=status, + created_at=data.get('created_at', ''), + updated_at=data.get('updated_at', ''), + market=data.get('market'), + project_id=data.get('project_id'), + graph_id=data.get('graph_id'), + simulation_id=data.get('simulation_id'), + scenario=data.get('scenario'), + sentiment=data.get('sentiment'), + signal=data.get('signal'), + error=data.get('error'), + progress_message=data.get('progress_message', ''), + ) + + +class PredictionRunManager: + """Manages prediction run persistence — follows ProjectManager pattern""" + + PREDICTIONS_DIR = Config.PREDICTION_DATA_DIR + + @classmethod + def _ensure_dir(cls): + os.makedirs(cls.PREDICTIONS_DIR, exist_ok=True) + + @classmethod + def _get_run_dir(cls, run_id: str) -> str: + return os.path.join(cls.PREDICTIONS_DIR, run_id) + + @classmethod + def _get_run_path(cls, run_id: str) -> str: + return os.path.join(cls._get_run_dir(run_id), 'run.json') + + @classmethod + def create_run(cls) -> PredictionRun: + cls._ensure_dir() + run_id = f"pred_{uuid.uuid4().hex[:12]}" + now = datetime.now().isoformat() + run = PredictionRun( + run_id=run_id, + status=PredictionRunStatus.FETCHING_MARKET, + created_at=now, + updated_at=now, + ) + run_dir = cls._get_run_dir(run_id) + os.makedirs(run_dir, exist_ok=True) + cls.save_run(run) + return run + + @classmethod + def save_run(cls, run: PredictionRun) -> None: + run.updated_at = datetime.now().isoformat() + run_path = cls._get_run_path(run.run_id) + os.makedirs(os.path.dirname(run_path), exist_ok=True) + with open(run_path, 'w', encoding='utf-8') as f: + json.dump(run.to_dict(), f, ensure_ascii=False, indent=2) + + @classmethod + def get_run(cls, run_id: str) -> Optional[PredictionRun]: + run_path = cls._get_run_path(run_id) + if not os.path.exists(run_path): + return None + with open(run_path, 'r', encoding='utf-8') as f: + data = json.load(f) + return PredictionRun.from_dict(data) + + @classmethod + def list_runs(cls, limit: int = 50) -> List[PredictionRun]: + cls._ensure_dir() + runs = [] + for name in os.listdir(cls.PREDICTIONS_DIR): + run = cls.get_run(name) + if run: + runs.append(run) + runs.sort(key=lambda r: r.created_at, reverse=True) + return runs[:limit] + + @classmethod + def delete_run(cls, run_id: str) -> bool: + import shutil + run_dir = cls._get_run_dir(run_id) + if not os.path.exists(run_dir): + return False + shutil.rmtree(run_dir) + return True diff --git a/backend/app/services/backtester.py b/backend/app/services/backtester.py new file mode 100644 index 0000000..72fb451 --- /dev/null +++ b/backend/app/services/backtester.py @@ -0,0 +1,347 @@ +""" +Backtesting engine — runs the prediction pipeline against resolved markets +and computes accuracy metrics. + +State machine: + PENDING → RUNNING → COMPUTING_METRICS → COMPLETED + ↓ ↓ + FAILED FAILED +""" + +import math +from typing import Optional, Callable, Dict, Any, List + +from collections import defaultdict + +from ..config import Config +from ..models.backtest import BacktestRun, BacktestRunStatus, BacktestResult, BacktestMetrics +from ..models.prediction import PredictionMarket, PredictionRun, PredictionRunStatus, PredictionRunManager +from ..services.calibrator import Calibrator +from ..services.market_classifier import MarketClassifier, compute_confidence_tier +from ..services.polymarket_client import PolymarketClient +from ..services.prediction_manager import PredictionManager +from ..storage.sqlite_store import SQLiteStore +from ..utils.logger import get_logger + +logger = get_logger('mirofish.backtester') + + +class Backtester: + """Runs the prediction pipeline against resolved markets for validation.""" + + def __init__(self, store: SQLiteStore, classifier: Optional[MarketClassifier] = None): + self.store = store + self.polymarket = PolymarketClient() + self.classifier = classifier or MarketClassifier(store) + + def run( + self, + num_markets: int = 50, + config_overrides: Optional[Dict[str, Any]] = None, + progress_callback: Optional[Callable] = None, + bt_run: Optional[BacktestRun] = None, + ) -> BacktestRun: + """ + Execute a full backtest. + + Args: + num_markets: Number of resolved markets to test + config_overrides: Optional config overrides for calibration params + progress_callback: Called with (market_index, total, title, success_count, fail_count) + bt_run: Optional pre-created BacktestRun (for API use — allows returning the ID before the thread starts) + + Returns: + Completed BacktestRun with metrics + """ + if bt_run is None: + bt_run = BacktestRun( + config=config_overrides or {}, + total_markets=num_markets, + ) + self.store.save_backtest_run(bt_run) + + try: + # Transition to RUNNING + bt_run.status = BacktestRunStatus.RUNNING.value + self.store.save_backtest_run(bt_run) + + # Fetch resolved markets + logger.info(f"Fetching {num_markets} resolved markets...") + markets = self.polymarket.fetch_resolved_markets(limit=num_markets) + bt_run.total_markets = len(markets) + self.store.save_backtest_run(bt_run) + + if not markets: + logger.warning("No resolved markets found") + bt_run.status = BacktestRunStatus.COMPLETED.value + bt_run.metrics = BacktestMetrics(markets_tested=0).to_dict() + self.store.save_backtest_run(bt_run) + return bt_run + + # Check which markets are already completed (for resume) + completed_ids = set(self.store.get_completed_market_ids(bt_run.id)) + + # Process each market + manager = PredictionManager() + success_count = len(completed_ids) + fail_count = 0 + + for i, market in enumerate(markets): + if market.condition_id in completed_ids: + logger.info(f"Skipping already-completed market: {market.title}") + continue + + try: + logger.info(f"[{i+1}/{len(markets)}] Processing: {market.title}") + + if progress_callback: + progress_callback(i + 1, len(markets), market.title, success_count, fail_count) + + # Run prediction pipeline + run = PredictionRunManager.create_run() + result_run = manager.run_prediction(market=market, run=run) + + if result_run.status == PredictionRunStatus.COMPLETED and result_run.signal: + # Compare prediction vs actual + bt_result = self._evaluate_result(bt_run.id, market, result_run) + self.store.save_backtest_result(bt_result) + success_count += 1 + else: + fail_count += 1 + logger.warning(f"Pipeline failed for {market.title}: {result_run.error}") + + except Exception as e: + fail_count += 1 + logger.error(f"Error processing market {market.title}: {e}") + + # Update progress + bt_run.completed_markets = success_count + bt_run.failed_markets = fail_count + self.store.save_backtest_run(bt_run) + + # Compute metrics + bt_run.status = BacktestRunStatus.COMPUTING_METRICS.value + self.store.save_backtest_run(bt_run) + + metrics = self.compute_metrics(bt_run.id) + bt_run.metrics = metrics.to_dict() + + # Fit and save category calibration offsets + all_results = self.store.get_results_by_run(bt_run.id) + calibrator = Calibrator(store=self.store) + offsets = calibrator.fit_category_offsets(all_results) + if offsets: + calibrator.save_profiles(bt_run.id, offsets, all_results) + + bt_run.status = BacktestRunStatus.COMPLETED.value + self.store.save_backtest_run(bt_run) + + logger.info(f"Backtest completed: {success_count} success, {fail_count} failed") + return bt_run + + except Exception as e: + logger.error(f"Backtest failed: {e}", exc_info=True) + bt_run.status = BacktestRunStatus.FAILED.value + self.store.save_backtest_run(bt_run) + raise + + def _evaluate_result( + self, + run_id: str, + market: PredictionMarket, + prediction: PredictionRun, + ) -> BacktestResult: + """Compare a prediction against the actual market resolution.""" + signal = prediction.signal + predicted_prob = signal['simulated_probability'] + market_prob = signal['market_probability'] + direction = signal['direction'] + edge = signal['edge'] + + actual_outcome = (market.actual_outcome or '').upper() + + # Determine if signal was correct + # YES resolved = probability was 1.0, NO resolved = probability was 0.0 + actual_prob = 1.0 if actual_outcome == 'YES' else 0.0 + + # Signal is correct if direction matches resolution + if direction == 'BUY_YES': + correct = 1 if actual_outcome == 'YES' else 0 + elif direction == 'BUY_NO': + correct = 1 if actual_outcome == 'NO' else 0 + else: + correct = None # HOLD — not evaluated + + # Brier score: (predicted_prob - actual_binary)^2 + brier = (predicted_prob - actual_prob) ** 2 + + # Classify market category and confidence tier + category = self.classifier.classify( + market.condition_id, market.title, market.description or "" + ) + confidence_tier = compute_confidence_tier(edge) + + return BacktestResult( + run_id=run_id, + market_id=market.condition_id, + market_title=market.title, + predicted_prob=predicted_prob, + market_prob=market_prob, + actual_outcome=actual_outcome, + signal_direction=direction, + edge=edge, + brier_score=brier, + correct=correct, + category=category, + confidence_tier=confidence_tier, + ) + + def compute_metrics(self, run_id: str) -> BacktestMetrics: + """Compute aggregate metrics from backtest results.""" + results = self.store.get_results_by_run(run_id) + + if not results: + return BacktestMetrics(markets_tested=0) + + # Filter to actionable signals (non-HOLD) for accuracy + actionable = [r for r in results if r.correct is not None] + all_brier = [r.brier_score for r in results if r.brier_score is not None] + all_edges = [r.edge for r in results] + + # Accuracy + if actionable: + accuracy = sum(r.correct for r in actionable) / len(actionable) + else: + accuracy = 0.0 + + # Brier score (mean) + brier_score = sum(all_brier) / len(all_brier) if all_brier else 0.0 + + # ROI: simple model — bet $1 on each signal, win pays 1/market_prob, lose pays 0 + total_invested = 0.0 + total_return = 0.0 + returns_list = [] + + for r in actionable: + bet = 1.0 + total_invested += bet + if r.correct: + payout = bet / max(r.market_prob if r.signal_direction == 'BUY_YES' else (1 - r.market_prob), 0.01) + profit = payout - bet + else: + profit = -bet + total_return += profit + returns_list.append(profit / bet) + + roi = total_return / total_invested if total_invested > 0 else 0.0 + + # Sharpe ratio (annualized, assuming daily bets) + if len(returns_list) >= 2: + mean_return = sum(returns_list) / len(returns_list) + variance = sum((r - mean_return) ** 2 for r in returns_list) / (len(returns_list) - 1) + std_return = math.sqrt(variance) if variance > 0 else 0.0 + sharpe_ratio = (mean_return / std_return) * math.sqrt(252) if std_return > 0 else 0.0 + else: + sharpe_ratio = 0.0 + + # Max drawdown + cumulative = 0.0 + peak = 0.0 + max_drawdown = 0.0 + for ret in returns_list: + cumulative += ret + if cumulative > peak: + peak = cumulative + dd = peak - cumulative + if dd > max_drawdown: + max_drawdown = dd + + # Calibration RMSE — bin predictions into 10 buckets, compare predicted vs actual + calibration_rmse = self._compute_calibration_rmse(results) + + # Average edge + avg_edge = sum(all_edges) / len(all_edges) if all_edges else 0.0 + + # Per-category metrics + category_metrics = self._compute_group_metrics(results, key_fn=lambda r: r.category or "other") + + # Per-confidence-tier metrics + tier_metrics = self._compute_group_metrics(results, key_fn=lambda r: r.confidence_tier or "LOW") + + return BacktestMetrics( + accuracy=accuracy, + brier_score=brier_score, + roi=roi, + sharpe_ratio=sharpe_ratio, + max_drawdown=max_drawdown, + calibration_rmse=calibration_rmse, + markets_tested=len(results), + avg_edge=avg_edge, + category_metrics=category_metrics, + confidence_tier_metrics=tier_metrics, + ) + + def _compute_group_metrics( + self, results: List[BacktestResult], key_fn + ) -> Dict[str, Any]: + """Compute mini-metrics grouped by an arbitrary key function.""" + groups: Dict[str, List[BacktestResult]] = defaultdict(list) + for r in results: + groups[key_fn(r)].append(r) + + out = {} + for group_name, group_results in sorted(groups.items()): + actionable = [r for r in group_results if r.correct is not None] + briers = [r.brier_score for r in group_results if r.brier_score is not None] + edges = [r.edge for r in group_results] + + acc = sum(r.correct for r in actionable) / len(actionable) if actionable else 0.0 + + # ROI per group + invested = 0.0 + returns = 0.0 + for r in actionable: + invested += 1.0 + if r.correct: + payout = 1.0 / max( + r.market_prob if r.signal_direction == 'BUY_YES' else (1 - r.market_prob), + 0.01, + ) + returns += payout - 1.0 + else: + returns -= 1.0 + group_roi = returns / invested if invested > 0 else 0.0 + + out[group_name] = { + "accuracy": round(acc, 4), + "brier_score": round(sum(briers) / len(briers), 4) if briers else 0.0, + "roi": round(group_roi, 4), + "markets_tested": len(group_results), + "avg_edge": round(sum(edges) / len(edges), 4) if edges else 0.0, + } + return out + + def _compute_calibration_rmse(self, results: List[BacktestResult]) -> float: + """Compute calibration RMSE by binning predictions.""" + if not results: + return 0.0 + + # Bin predictions into 10 buckets + bins: Dict[int, List[tuple]] = {i: [] for i in range(10)} + for r in results: + if r.predicted_prob is not None and r.actual_outcome is not None: + bucket = min(int(r.predicted_prob * 10), 9) + actual = 1.0 if r.actual_outcome == 'YES' else 0.0 + bins[bucket].append((r.predicted_prob, actual)) + + # RMSE across non-empty bins + squared_errors = [] + for bucket_items in bins.values(): + if bucket_items: + mean_pred = sum(p for p, _ in bucket_items) / len(bucket_items) + mean_actual = sum(a for _, a in bucket_items) / len(bucket_items) + squared_errors.append((mean_pred - mean_actual) ** 2) + + if not squared_errors: + return 0.0 + return math.sqrt(sum(squared_errors) / len(squared_errors)) diff --git a/backend/app/services/calibrator.py b/backend/app/services/calibrator.py new file mode 100644 index 0000000..6467cf5 --- /dev/null +++ b/backend/app/services/calibrator.py @@ -0,0 +1,212 @@ +""" +Calibration service — fits Platt scaling or isotonic regression on backtest results +to improve probability estimates. +""" + +import base64 +import hashlib +import hmac +import pickle +from collections import defaultdict +from typing import Dict, Any, Optional, List + +import numpy as np +from sklearn.linear_model import LogisticRegression + +from ..config import Config +from ..models.backtest import BacktestResult +from ..storage.sqlite_store import SQLiteStore +from ..utils.logger import get_logger + +logger = get_logger('mirofish.calibrator') + +MIN_DATAPOINTS = 20 + +# HMAC key derived from the app secret — used to sign/verify pickle blobs +_HMAC_KEY = hashlib.sha256(Config.SECRET_KEY.encode()).digest() + + +def _sign_blob(blob: bytes) -> str: + """Serialize blob + HMAC signature as base64.""" + sig = hmac.new(_HMAC_KEY, blob, hashlib.sha256).digest() + # Format: base64(signature + blob) + return base64.b64encode(sig + blob).decode('ascii') + + +def _verify_and_load(data: str) -> bytes: + """Verify HMAC signature and return raw blob. Raises ValueError on tampering.""" + raw = base64.b64decode(data) + if len(raw) < 32: + raise ValueError("Calibration model data too short — corrupt or tampered") + sig = raw[:32] + blob = raw[32:] + expected = hmac.new(_HMAC_KEY, blob, hashlib.sha256).digest() + if not hmac.compare_digest(sig, expected): + raise ValueError("Calibration model HMAC verification failed — data may be tampered") + return blob + + +class Calibrator: + """Probability calibration via Platt scaling (logistic regression).""" + + def __init__(self, store: Optional[SQLiteStore] = None): + self.store = store + self.model: Optional[LogisticRegression] = None + + def fit(self, results: List[BacktestResult]) -> bool: + """ + Fit calibration model on backtest results. + + Args: + results: List of BacktestResult with predicted_prob and actual_outcome + + Returns: + True if model was fitted, False if insufficient data + """ + # Filter to results with valid data + valid = [ + r for r in results + if r.predicted_prob is not None and r.actual_outcome is not None + ] + + if len(valid) < MIN_DATAPOINTS: + logger.warning( + f"Insufficient data for calibration: {len(valid)} < {MIN_DATAPOINTS}. Skipping." + ) + return False + + X = np.array([r.predicted_prob for r in valid]).reshape(-1, 1) + y = np.array([1.0 if r.actual_outcome == 'YES' else 0.0 for r in valid]) + + # Check for degenerate data (all same class) + if len(np.unique(y)) < 2: + logger.warning("Degenerate data: all outcomes are the same class. Skipping calibration.") + return False + + self.model = LogisticRegression(C=1.0, solver='lbfgs', max_iter=1000) + self.model.fit(X, y) + + logger.info(f"Calibration model fitted on {len(valid)} data points") + return True + + def transform(self, probability: float) -> float: + """ + Apply fitted calibration model to a raw probability. + + Args: + probability: Raw probability from the pipeline + + Returns: + Calibrated probability (or original if no model fitted) + """ + if self.model is None: + return probability + + X = np.array([[probability]]) + calibrated = self.model.predict_proba(X)[0, 1] + return float(calibrated) + + def save(self, run_id: str) -> None: + """Persist fitted model to SQLite as an HMAC-signed pickle blob.""" + if self.model is None or self.store is None: + return + + blob = pickle.dumps(self.model) + model_data = _sign_blob(blob) + + run = self.store.get_backtest_run(run_id) + if run: + config = run.config or {} + config['calibration_model'] = model_data + self.store.update_backtest_run(run_id, config=config) + logger.info(f"Calibration model saved for run {run_id}") + + def load(self, run_id: str) -> bool: + """Load a previously fitted model from SQLite, verifying HMAC signature.""" + if self.store is None: + return False + + run = self.store.get_backtest_run(run_id) + if not run or not run.config: + return False + + model_data = run.config.get('calibration_model') + if not model_data: + return False + + try: + blob = _verify_and_load(model_data) + except ValueError as e: + logger.error(f"Calibration model verification failed for run {run_id}: {e}") + return False + + self.model = pickle.loads(blob) + logger.info(f"Calibration model loaded from run {run_id}") + return True + + # ── Category-specific calibration ───────────────────────────── + + def fit_category_offsets( + self, results: List[BacktestResult] + ) -> Dict[str, float]: + """ + Compute per-category calibration offsets. + offset = mean(predicted) - mean(actual) for each category with ≥20 results. + A positive offset means the model overestimates; subtract it to correct. + """ + groups: Dict[str, List[BacktestResult]] = defaultdict(list) + for r in results: + if r.predicted_prob is not None and r.actual_outcome is not None: + cat = r.category or "other" + groups[cat].append(r) + + offsets = {} + for cat, cat_results in groups.items(): + if len(cat_results) < MIN_DATAPOINTS: + logger.info( + f"Category '{cat}': {len(cat_results)} results < {MIN_DATAPOINTS}, skipping offset" + ) + continue + + mean_pred = sum(r.predicted_prob for r in cat_results) / len(cat_results) + mean_actual = sum( + 1.0 if r.actual_outcome == "YES" else 0.0 for r in cat_results + ) / len(cat_results) + offset = mean_pred - mean_actual + offsets[cat] = offset + logger.info( + f"Category '{cat}': offset={offset:.4f} " + f"(mean_pred={mean_pred:.4f}, mean_actual={mean_actual:.4f}, n={len(cat_results)})" + ) + + return offsets + + def save_profiles(self, run_id: str, offsets: Dict[str, float], results: List[BacktestResult]) -> None: + """Save category offsets to SQLite calibration_profiles table.""" + if self.store is None: + return + + # Count samples per category + counts: Dict[str, int] = defaultdict(int) + for r in results: + if r.predicted_prob is not None and r.actual_outcome is not None: + counts[r.category or "other"] += 1 + + for cat, offset in offsets.items(): + self.store.save_calibration_profile(run_id, cat, offset, counts.get(cat, 0)) + logger.info(f"Saved {len(offsets)} category calibration profiles for run {run_id}") + + def load_profiles(self, run_id: str) -> Dict[str, Dict[str, Any]]: + """Load category calibration profiles from SQLite.""" + if self.store is None: + return {} + return self.store.load_calibration_profiles(run_id) + + def transform_with_category(self, probability: float, category: str, profiles: Dict[str, Dict[str, Any]]) -> float: + """Apply category-specific offset to a probability.""" + profile = profiles.get(category) + if profile is None: + return probability + offset = profile["offset"] + adjusted = probability - offset + return max(0.01, min(0.99, adjusted)) diff --git a/backend/app/services/debate_simulator.py b/backend/app/services/debate_simulator.py new file mode 100644 index 0000000..76e675a --- /dev/null +++ b/backend/app/services/debate_simulator.py @@ -0,0 +1,200 @@ +""" +Direct Debate Simulator — replaces OASIS multi-agent simulation with a single +LLM call that simulates a structured multi-perspective debate. + +~30 seconds per market instead of ~30 minutes with OASIS. +""" + +from typing import Optional, List, Dict, Any +from dataclasses import dataclass + +from ..models.prediction import PredictionMarket, SentimentResult +from ..utils.llm_client import LLMClient +from ..utils.logger import get_logger + +logger = get_logger('mirofish.debate_simulator') + +DEBATE_SYSTEM_PROMPT = """You are simulating a realistic online debate about a prediction market question. + +Generate a Reddit-style discussion with 20 posts from diverse participants. The distribution of stances should reflect the ACTUAL WEIGHT OF EVIDENCE — do NOT force a 50/50 split. + +KEY RULES: +1. If the evidence strongly favors one outcome, most posts should reflect that. A question with a 90% likely NO should have most participants arguing NO. +2. Each participant should argue based on real evidence, data, precedent, and domain knowledge — not just opinions. +3. Include domain experts, general public, contrarians, and analysts. +4. Contrarians exist in every debate — include 2-3 posts arguing the minority position even if the evidence is lopsided. +5. Confidence scores should reflect argument strength: a weak contrarian argument gets 0.3, a strong evidence-backed argument gets 0.9. +6. Consider: base rates, historical precedent, structural factors, incentives, and known constraints. +7. Think step by step about what would ACTUALLY happen based on the evidence before generating the debate. + +BEFORE generating posts, internally assess: given all available evidence, what is the realistic probability of YES? Then generate a debate whose stance distribution roughly matches that assessment. + +Output JSON: +{ + "estimated_probability": 0.XX, + "reasoning": "Brief explanation of your probability estimate before the debate", + "posts": [ + { + "author": "username", + "author_type": "expert|general_public|stakeholder|analyst|contrarian", + "stance": "for|against|neutral", + "confidence": 0.8, + "content": "The full post text with substantive argument...", + "key_argument": "One-sentence summary of the core argument" + } + ], + "debate_summary": "Brief summary of the overall debate dynamics", + "strongest_for": "The single strongest argument for YES", + "strongest_against": "The single strongest argument for NO" +}""" + + +class DebateSimulator: + """Simulates multi-perspective debate via direct LLM call""" + + def __init__(self, llm_client: Optional[LLMClient] = None): + self.llm_client = llm_client or LLMClient() + + def simulate_debate( + self, + market: PredictionMarket, + context_document: str, + ) -> SentimentResult: + """ + Run a simulated debate and return sentiment analysis. + + Args: + market: The prediction market question + context_document: Background context from scenario generator + + Returns: + SentimentResult with probability and breakdown + """ + user_prompt = self._build_prompt(market, context_document) + + messages = [ + {"role": "system", "content": DEBATE_SYSTEM_PROMPT}, + {"role": "user", "content": user_prompt}, + ] + + logger.info(f"Running direct debate simulation for: {market.title}") + + result = self.llm_client.chat_json( + messages=messages, + temperature=0.6, + max_tokens=8192, + ) + + posts = result.get("posts", []) + llm_estimate = result.get("estimated_probability") + logger.info(f"Debate generated {len(posts)} posts, LLM estimate: {llm_estimate}") + + return self._analyze_posts( + posts, + strongest_for=result.get("strongest_for", ""), + strongest_against=result.get("strongest_against", ""), + llm_estimate=llm_estimate, + ) + + def _build_prompt(self, market: PredictionMarket, context: str) -> str: + parts = [ + "# Prediction Market Question", + f"**Question:** {market.title}", + f"**Current Market Prices:** YES {market.prices[0]:.0%} / NO {market.prices[1]:.0%}", + f"**Trading Volume:** ${market.volume:,.0f}", + f"**End Date:** {market.end_date}", + ] + + if market.description: + parts.append(f"\n**Market Description:**\n{market.description[:2000]}") + + if context: + parts.append(f"\n**Background Context:**\n{context[:3000]}") + + parts.append( + "\nSimulate a realistic Reddit debate thread about this question. " + "Include 15-25 posts from diverse participants with substantive arguments." + ) + + return "\n".join(parts) + + def _analyze_posts( + self, + posts: List[Dict[str, Any]], + strongest_for: str = "", + strongest_against: str = "", + llm_estimate: float = None, + ) -> SentimentResult: + """Compute probability from debate posts + LLM direct estimate""" + stance_counts = {"for": 0, "against": 0, "neutral": 0} + weighted_for = 0.0 + weighted_against = 0.0 + args_for = [] + args_against = [] + + for post in posts: + stance = post.get("stance", "neutral") + confidence = float(post.get("confidence", 0.5)) + key_arg = post.get("key_argument", "") + + if stance in stance_counts: + stance_counts[stance] += 1 + else: + stance_counts["neutral"] += 1 + stance = "neutral" + + if stance == "for": + weighted_for += confidence + if key_arg: + args_for.append(key_arg) + elif stance == "against": + weighted_against += confidence + if key_arg: + args_against.append(key_arg) + + # Stance-based probability + total_opinionated = weighted_for + weighted_against + if total_opinionated > 0: + stance_prob = weighted_for / total_opinionated + else: + stance_prob = 0.5 + + # Blend: 50% LLM direct estimate + 50% stance-derived probability + # The LLM estimate captures base rates and domain knowledge + # The stance distribution captures the argument quality + if llm_estimate is not None and 0 <= llm_estimate <= 1: + sim_prob = 0.5 * llm_estimate + 0.5 * stance_prob + else: + sim_prob = stance_prob + + # Confidence based on agreement strength + total_classified = stance_counts["for"] + stance_counts["against"] + if total_classified > 0: + agreement = max(stance_counts["for"], stance_counts["against"]) / total_classified + sample_factor = min(total_classified / 10, 1.0) + result_confidence = agreement * sample_factor + else: + result_confidence = 0.0 + + # Add strongest arguments at the top + if strongest_for and strongest_for not in args_for: + args_for.insert(0, strongest_for) + if strongest_against and strongest_against not in args_against: + args_against.insert(0, strongest_against) + + # Deduplicate + args_for = list(dict.fromkeys(args_for))[:5] + args_against = list(dict.fromkeys(args_against))[:5] + + # Clamp values to [0, 1] + sim_prob = max(0.0, min(1.0, sim_prob)) + result_confidence = max(0.0, min(1.0, result_confidence)) + + return SentimentResult( + simulated_probability=sim_prob, + confidence=result_confidence, + stance_counts=stance_counts, + key_arguments_for=args_for, + key_arguments_against=args_against, + total_posts_analyzed=len(posts), + ) diff --git a/backend/app/services/market_classifier.py b/backend/app/services/market_classifier.py new file mode 100644 index 0000000..c6eb469 --- /dev/null +++ b/backend/app/services/market_classifier.py @@ -0,0 +1,97 @@ +""" +Market classifier — assigns a category to each market via LLM. +Results are cached in SQLite to avoid re-classifying the same market. +""" + +from typing import Dict, List, Optional + +from ..models.prediction import PredictionMarket +from ..storage.sqlite_store import SQLiteStore +from ..utils.llm_client import LLMClient +from ..utils.logger import get_logger + +logger = get_logger('mirofish.market_classifier') + +CATEGORIES = [ + "politics", "sports", "crypto", "economics", + "science", "entertainment", "other", +] + +CLASSIFY_SYSTEM_PROMPT = f"""You are a market classifier. Given a prediction market title and description, +classify it into exactly ONE of these categories: {', '.join(CATEGORIES)}. + +Respond with JSON: {{"category": ""}} + +Rules: +- "politics" = elections, legislation, government policy, geopolitics +- "sports" = athletic competitions, tournaments, player performance +- "crypto" = cryptocurrency prices, blockchain events, DeFi +- "economics" = economic indicators, interest rates, stock market, commodities +- "science" = scientific discoveries, space, climate, health/medicine +- "entertainment" = movies, music, awards, celebrity events, TV, gaming +- "other" = anything that doesn't clearly fit above""" + +# Confidence tier thresholds based on absolute edge size +TIER_THRESHOLD_HIGH = 0.15 # |edge| >= 15% +TIER_THRESHOLD_MEDIUM = 0.08 # |edge| >= 8% + + +def compute_confidence_tier(edge: float) -> str: + """Assign HIGH/MEDIUM/LOW tier based on absolute edge magnitude.""" + abs_edge = abs(edge) + if abs_edge >= TIER_THRESHOLD_HIGH: + return "HIGH" + elif abs_edge >= TIER_THRESHOLD_MEDIUM: + return "MEDIUM" + return "LOW" + + +class MarketClassifier: + """Classifies prediction markets into categories using LLM with SQLite caching.""" + + def __init__(self, store: SQLiteStore, llm_client: Optional[LLMClient] = None): + self.store = store + self.llm_client = llm_client or LLMClient() + + def classify(self, market_id: str, title: str, description: str = "") -> str: + """Classify a single market. Returns cached result if available.""" + cached = self.store.get_market_category(market_id) + if cached: + return cached + + category = self._llm_classify(title, description) + self.store.save_market_category(market_id, category) + logger.info(f"Classified market '{title[:50]}' as '{category}'") + return category + + def classify_batch(self, markets: List[PredictionMarket]) -> Dict[str, str]: + """Classify a batch of markets. Only LLM-calls uncached ones.""" + results = {} + for market in markets: + results[market.condition_id] = self.classify( + market.condition_id, market.title, market.description or "" + ) + return results + + def _llm_classify(self, title: str, description: str) -> str: + """Call LLM to classify a market.""" + user_msg = f"Title: {title}" + if description: + user_msg += f"\nDescription: {description[:500]}" + + try: + result = self.llm_client.chat_json( + messages=[ + {"role": "system", "content": CLASSIFY_SYSTEM_PROMPT}, + {"role": "user", "content": user_msg}, + ], + temperature=0.1, + ) + category = result.get("category", "other").lower().strip() + if category not in CATEGORIES: + logger.warning(f"LLM returned unknown category '{category}', defaulting to 'other'") + return "other" + return category + except Exception as e: + logger.error(f"LLM classification failed: {e}") + return "other" diff --git a/backend/app/services/paper_trader.py b/backend/app/services/paper_trader.py new file mode 100644 index 0000000..9e03a46 --- /dev/null +++ b/backend/app/services/paper_trader.py @@ -0,0 +1,82 @@ +""" +Paper trading service — simulates order execution for prediction market signals. +""" + +import random +from typing import Optional + +from ..models.prediction import PredictionMarket, TradingSignal +from ..models.position import PaperOrder, PaperPosition, PositionStatus +from ..storage.sqlite_store import SQLiteStore +from ..utils.logger import get_logger + +logger = get_logger('mirofish.paper_trader') + +DEFAULT_BET_SIZE = 10.0 # $10 per trade + + +class PaperTrader: + """Simulates order execution with slippage for paper trading.""" + + def __init__(self, store: SQLiteStore, bet_size: float = DEFAULT_BET_SIZE): + self.store = store + self.bet_size = bet_size + + def execute( + self, + signal: TradingSignal, + market: PredictionMarket, + signal_id: str = "", + ) -> Optional[PaperOrder]: + """ + Execute a paper trade based on a signal. + + Args: + signal: Trading signal from prediction pipeline + market: Market data + signal_id: Optional reference to the prediction run + + Returns: + PaperOrder if trade was executed, None for HOLD signals + """ + if signal.direction == "HOLD": + return None + + # Simulate 1-2% slippage + slippage = random.uniform(0.01, 0.02) + + if signal.direction == "BUY_YES": + base_price = market.prices[0] if market.prices else 0.5 + fill_price = min(base_price * (1 + slippage), 0.99) + outcome = "Yes" + else: # BUY_NO + base_price = market.prices[1] if len(market.prices) > 1 else 0.5 + fill_price = min(base_price * (1 + slippage), 0.99) + outcome = "No" + + order = PaperOrder( + market_id=market.condition_id, + signal_id=signal_id, + side=signal.direction, + outcome=outcome, + size=self.bet_size, + fill_price=fill_price, + slippage=slippage, + ) + self.store.save_paper_order(order) + + position = PaperPosition( + order_id=order.id, + market_id=market.condition_id, + outcome=outcome, + entry_price=fill_price, + cost_basis=self.bet_size * fill_price, + status=PositionStatus.OPEN.value, + ) + self.store.save_paper_position(position) + + logger.info( + f"Paper trade: {signal.direction} {outcome} @ {fill_price:.4f} " + f"(slippage {slippage:.2%}) for {market.title}" + ) + return order diff --git a/backend/app/services/polymarket_client.py b/backend/app/services/polymarket_client.py new file mode 100644 index 0000000..8193b02 --- /dev/null +++ b/backend/app/services/polymarket_client.py @@ -0,0 +1,218 @@ +""" +Polymarket client — fetches markets from the Gamma API +""" + +import time +import requests +from typing import List, Optional, Dict, Any + +from ..config import Config +from ..models.prediction import PredictionMarket +from ..utils.logger import get_logger +from ..utils.retry import retry_with_backoff + +logger = get_logger('mirofish.polymarket') + + +class PolymarketClient: + """Fetches prediction market data from Polymarket's Gamma API""" + + def __init__(self, base_url: Optional[str] = None): + self.base_url = base_url or Config.POLYMARKET_GAMMA_URL + + @retry_with_backoff(max_retries=3, exceptions=(requests.RequestException,)) + def fetch_active_markets( + self, + min_volume: float = 10000, + limit: int = 50, + search: Optional[str] = None, + ) -> List[PredictionMarket]: + """ + Fetch active binary markets from Polymarket. + + Args: + min_volume: Minimum trading volume filter + limit: Max markets to return + search: Optional search query + + Returns: + List of PredictionMarket objects + """ + params: Dict[str, Any] = { + "limit": min(limit, 100), + "active": True, + "closed": False, + "order": "volume", + "ascending": False, + } + + url = f"{self.base_url}/markets" + logger.info(f"Fetching markets from {url}") + + resp = requests.get(url, params=params, timeout=30) + resp.raise_for_status() + raw_markets = resp.json() + + if not isinstance(raw_markets, list): + logger.warning(f"Unexpected response format: {type(raw_markets)}") + return [] + + markets = [] + for item in raw_markets: + market = self._parse_market(item) + if market is None: + continue + if market.volume < min_volume: + continue + if search and search.lower() not in market.title.lower(): + continue + markets.append(market) + if len(markets) >= limit: + break + + logger.info(f"Fetched {len(markets)} markets (filtered from {len(raw_markets)})") + return markets + + @retry_with_backoff(max_retries=3, exceptions=(requests.RequestException,)) + def get_market(self, condition_id: str) -> Optional[PredictionMarket]: + """Fetch a single market by condition_id""" + url = f"{self.base_url}/markets/{condition_id}" + resp = requests.get(url, timeout=30) + resp.raise_for_status() + data = resp.json() + return self._parse_market(data) + + def fetch_resolved_markets(self, limit: int = 200) -> List[PredictionMarket]: + """ + Fetch resolved (closed) markets from Polymarket. + + Args: + limit: Max markets to return + + Returns: + List of PredictionMarket objects with actual_outcome set + """ + markets = [] + offset = 0 + page_size = min(limit, 100) + + while len(markets) < limit: + try: + params: Dict[str, Any] = { + "limit": page_size, + "closed": True, + "order": "volume", + "ascending": False, + "offset": offset, + } + + url = f"{self.base_url}/markets" + logger.info(f"Fetching resolved markets (offset={offset})") + + resp = requests.get(url, params=params, timeout=30) + resp.raise_for_status() + raw_markets = resp.json() + + if not isinstance(raw_markets, list) or len(raw_markets) == 0: + break + + for item in raw_markets: + market = self._parse_resolved_market(item) + if market is not None: + markets.append(market) + if len(markets) >= limit: + break + + offset += page_size + + # Courtesy delay between paginated fetches + if len(markets) < limit and len(raw_markets) == page_size: + time.sleep(1.0) + else: + break + + except requests.RequestException as e: + logger.error(f"Failed to fetch resolved markets at offset {offset}: {e}") + break + + logger.info(f"Fetched {len(markets)} resolved markets") + return markets + + def _parse_resolved_market(self, data: Dict[str, Any]) -> Optional[PredictionMarket]: + """Parse a resolved market, extracting actual outcome from resolution data.""" + market = self._parse_market(data) + if market is None: + return None + + # Determine actual outcome from tokens or resolution data + tokens = data.get('tokens', []) + actual_outcome = None + + if tokens: + for token in tokens: + winner = token.get('winner', False) + if winner: + actual_outcome = token.get('outcome', '').upper() + break + + # If no winner token, check resolved status + if actual_outcome is None: + resolved = data.get('resolved', False) + resolution = data.get('resolution', '') + if resolved and resolution: + actual_outcome = resolution.upper() + + if actual_outcome is None: + logger.debug(f"Skipping unresolved market: {market.title}") + return None + + market.active = False + market.actual_outcome = actual_outcome + return market + + def _parse_market(self, data: Dict[str, Any]) -> Optional[PredictionMarket]: + """Parse raw Gamma API response into PredictionMarket""" + try: + import json as _json + + # Gamma API returns tokens with prices for each outcome + tokens = data.get('tokens', []) + outcomes = [] + prices = [] + + if tokens: + for token in tokens: + outcomes.append(token.get('outcome', 'Unknown')) + prices.append(float(token.get('price', 0))) + else: + # Gamma API returns outcomes/outcomePrices as JSON strings + raw_outcomes = data.get('outcomes', '["Yes", "No"]') + raw_prices = data.get('outcomePrices', '["0.5", "0.5"]') + + if isinstance(raw_outcomes, str): + outcomes = _json.loads(raw_outcomes) + else: + outcomes = raw_outcomes or ['Yes', 'No'] + + if isinstance(raw_prices, str): + prices = [float(p) for p in _json.loads(raw_prices)] + elif isinstance(raw_prices, list): + prices = [float(p) for p in raw_prices] + else: + prices = [0.5, 0.5] + + return PredictionMarket( + condition_id=data.get('conditionId', data.get('condition_id', '')), + title=data.get('question', data.get('title', 'Unknown')), + slug=data.get('slug', ''), + description=data.get('description', ''), + outcomes=outcomes, + prices=prices, + volume=float(data.get('volume', 0) or 0), + liquidity=float(data.get('liquidity', 0) or 0), + end_date=data.get('endDate', data.get('end_date', '')), + active=data.get('active', True), + ) + except (KeyError, ValueError, TypeError) as e: + logger.warning(f"Failed to parse market data: {e} — raw keys: {list(data.keys())}") + return None diff --git a/backend/app/services/prediction_manager.py b/backend/app/services/prediction_manager.py new file mode 100644 index 0000000..d7ec35c --- /dev/null +++ b/backend/app/services/prediction_manager.py @@ -0,0 +1,243 @@ +""" +Prediction Manager — orchestrates the prediction pipeline: +market → scenario → direct debate → signal + +Uses direct LLM debate simulation instead of OASIS multi-agent framework. +Pipeline completes in ~60-90 seconds per market. +""" + +import requests +import json +from typing import Dict, Any, Optional, Callable + +from ..config import Config +from ..models.prediction import ( + PredictionMarket, PredictionRun, PredictionRunStatus, + PredictionRunManager, TradingSignal, SentimentResult, +) +from ..services.calibrator import Calibrator +from ..services.market_classifier import MarketClassifier, TIER_THRESHOLD_HIGH, TIER_THRESHOLD_MEDIUM +from ..services.scenario_generator import ScenarioGenerator +from ..services.debate_simulator import DebateSimulator +from ..storage.sqlite_store import SQLiteStore +from ..utils.llm_client import LLMClient +from ..utils.logger import get_logger + +logger = get_logger('mirofish.prediction_manager') + + +class PredictionManager: + """Orchestrates the prediction pipeline""" + + def __init__(self, result_store=None, sqlite_store: Optional[SQLiteStore] = None): + self.llm_client = LLMClient() + self.scenario_gen = ScenarioGenerator(self.llm_client) + self.debate_sim = DebateSimulator(self.llm_client) + self.result_store = result_store or PredictionRunManager + self.sqlite_store = sqlite_store + self.classifier = MarketClassifier(sqlite_store, self.llm_client) if sqlite_store else None + self.category_profiles = self._load_category_profiles() + + def _load_category_profiles(self): + """Load category calibration profiles from the latest completed backtest.""" + if not self.sqlite_store: + return {} + try: + run_id = self.sqlite_store.get_latest_completed_run_id() + if not run_id: + return {} + calibrator = Calibrator(store=self.sqlite_store) + profiles = calibrator.load_profiles(run_id) + if profiles: + logger.info(f"Loaded {len(profiles)} category calibration profiles from run {run_id}") + return profiles + except Exception as e: + logger.warning(f"Could not load category profiles: {e}") + return {} + + def run_prediction( + self, + market: PredictionMarket, + run: PredictionRun, + progress_callback: Optional[Callable] = None, + ) -> PredictionRun: + """ + Execute the prediction pipeline: + 1. Generate balanced scenario context + 2. Run direct debate simulation via LLM + 3. Compute probability from debate stances + 4. Generate trading signal + """ + try: + run.market = market.to_dict() + self._update(run, PredictionRunStatus.FETCHING_MARKET, "Market data loaded", progress_callback) + + # Step 1: Generate scenario (balanced context document) + self._update(run, PredictionRunStatus.GENERATING_SCENARIO, "Generating simulation scenario...", progress_callback) + scenario = self.scenario_gen.generate_scenario(market) + run.scenario = scenario.to_dict() + self.result_store.save_run(run) + + # Step 2: Run direct debate simulation + self._update(run, PredictionRunStatus.RUNNING_SIMULATION, "Simulating multi-perspective debate...", progress_callback) + sentiment = self.debate_sim.simulate_debate( + market=market, + context_document=scenario.context_document, + ) + run.sentiment = sentiment.to_dict() + self.result_store.save_run(run) + + # Step 2.5: Classify market category + category = None + if self.classifier: + category = self.classifier.classify( + market.condition_id, market.title, market.description or "" + ) + + # Step 3: Generate trading signal + self._update(run, PredictionRunStatus.ANALYZING, "Computing trading signal...", progress_callback) + signal = self._generate_signal(market, sentiment, category=category) + run.signal = signal.to_dict() + + self._update(run, PredictionRunStatus.COMPLETED, "Prediction complete", progress_callback) + return run + + except (requests.RequestException, ValueError, json.JSONDecodeError) as e: + logger.error(f"Prediction pipeline failed (recoverable): {e}", exc_info=True) + run.status = PredictionRunStatus.FAILED + run.error = str(e) + run.progress_message = f"Failed: {str(e)}" + self.result_store.save_run(run) + return run + except RuntimeError as e: + logger.error(f"Prediction pipeline runtime error: {e}", exc_info=True) + run.status = PredictionRunStatus.FAILED + run.error = str(e) + run.progress_message = f"Failed: {str(e)}" + self.result_store.save_run(run) + return run + except Exception as e: + logger.error(f"Prediction pipeline unexpected error: {e}", exc_info=True) + run.status = PredictionRunStatus.FAILED + run.error = str(e) + run.progress_message = f"Failed: {str(e)}" + self.result_store.save_run(run) + return run + + def _update(self, run: PredictionRun, status: PredictionRunStatus, message: str, callback=None): + run.status = status + run.progress_message = message + self.result_store.save_run(run) + if callback: + callback(status.value, message) + logger.info(f"[{run.run_id}] {status.value}: {message}") + + def _generate_signal( + self, market: PredictionMarket, sentiment: SentimentResult, category: Optional[str] = None + ) -> TradingSignal: + """Compare simulated probability vs market price to generate trading signal. + + Applies calibration corrections learned from backtesting: + 1. Market regression: blend SimP toward market price (markets are informative) + 2. Confidence penalty for large edges (huge disagreements usually = model error) + 3. Short-dated market dampening (less time for unlikely events) + 4. Category-specific offset (from per-category calibration profiles) + """ + from datetime import datetime + + market_prob = market.prices[0] if market.prices else 0.5 + raw_sim_prob = sentiment.simulated_probability + + if sentiment.total_posts_analyzed == 0 or sentiment.confidence < 0.05: + return TradingSignal( + direction="HOLD", + edge=0.0, + confidence=0.0, + reasoning="Insufficient debate data for signal generation.", + simulated_probability=raw_sim_prob, + market_probability=market_prob, + ) + + # Calibration 1: Regress toward market price + MARKET_WEIGHT = Config.CALIBRATION_MARKET_REGRESSION + sim_prob = (1 - MARKET_WEIGHT) * raw_sim_prob + MARKET_WEIGHT * market_prob + + # Calibration 2: Short-dated dampening + days_to_end = None + if market.end_date: + try: + end_dt = datetime.fromisoformat(market.end_date.replace('Z', '+00:00')) + days_to_end = (end_dt - datetime.now(end_dt.tzinfo)).days + if days_to_end is not None and days_to_end < Config.CALIBRATION_DATE_DAMPENING_DAYS: + penalty = Config.CALIBRATION_SHORT_DATE_PENALTY + sim_prob = (1 - penalty) * sim_prob + penalty * market_prob + except (ValueError, TypeError): + pass + + # Calibration 4: Category-specific offset + category_offset_applied = False + if category and self.category_profiles: + profile = self.category_profiles.get(category) + if profile: + offset = profile["offset"] + sim_prob = max(0.01, min(0.99, sim_prob - offset)) + category_offset_applied = True + + edge = sim_prob - market_prob + threshold = Config.PREDICTION_SIGNAL_THRESHOLD + + # Calibration 3: Confidence penalty for large edges + base_confidence = sentiment.confidence + abs_edge = abs(edge) + if abs_edge > Config.CALIBRATION_HIGH_EDGE_MAX_REDUCTION: + confidence = base_confidence * 0.2 # Massive discount + elif abs_edge > Config.CALIBRATION_HIGH_EDGE_THRESHOLD: + confidence = base_confidence * 0.5 + elif abs_edge > 0.15: + confidence = base_confidence * 0.8 + else: + confidence = base_confidence + + # Build reasoning + parts = [] + if edge > threshold: + direction = "BUY_YES" + parts.append( + f"Calibrated probability ({sim_prob:.1%}) is {edge:.1%} above " + f"market ({market_prob:.1%})." + ) + elif edge < -threshold: + direction = "BUY_NO" + parts.append( + f"Calibrated probability ({sim_prob:.1%}) is {abs(edge):.1%} below " + f"market ({market_prob:.1%})." + ) + else: + direction = "HOLD" + parts.append( + f"Calibrated probability ({sim_prob:.1%}) is within threshold of " + f"market ({market_prob:.1%}). No clear edge." + ) + + if raw_sim_prob != sim_prob: + parts.append(f"Raw debate estimate was {raw_sim_prob:.1%}, adjusted via market regression.") + if days_to_end is not None and days_to_end < Config.CALIBRATION_DATE_DAMPENING_DAYS: + parts.append(f"Short-dated market ({days_to_end}d remaining) — extra dampening applied.") + if abs_edge > Config.CALIBRATION_HIGH_EDGE_THRESHOLD: + parts.append(f"Large edge penalized — confidence reduced (markets are usually right).") + if category_offset_applied and category: + offset = self.category_profiles[category]["offset"] + parts.append(f"Category '{category}' offset ({offset:+.3f}) applied.") + + confidence_tier = "HIGH" if abs_edge >= TIER_THRESHOLD_HIGH else ("MEDIUM" if abs_edge >= TIER_THRESHOLD_MEDIUM else "LOW") + + return TradingSignal( + direction=direction, + edge=edge, + confidence=confidence, + reasoning=" ".join(parts), + simulated_probability=sim_prob, + market_probability=market_prob, + category=category, + confidence_tier=confidence_tier, + ) diff --git a/backend/app/services/scenario_generator.py b/backend/app/services/scenario_generator.py new file mode 100644 index 0000000..6f1060b --- /dev/null +++ b/backend/app/services/scenario_generator.py @@ -0,0 +1,132 @@ +""" +Scenario Generator — converts a prediction market question into a simulation scenario +""" + +from typing import Optional, Dict, Any +from dataclasses import dataclass + +from ..models.prediction import PredictionMarket +from ..utils.llm_client import LLMClient +from ..utils.logger import get_logger + +logger = get_logger('mirofish.scenario_generator') + + +@dataclass +class ScenarioConfig: + """Generated simulation scenario from a market question""" + simulation_requirement: str + context_document: str + suggested_agent_count: int + stance_distribution: Dict[str, float] # {supportive: 0.4, opposing: 0.4, neutral: 0.2} + + def to_dict(self) -> Dict[str, Any]: + return { + "simulation_requirement": self.simulation_requirement, + "context_document": self.context_document, + "suggested_agent_count": self.suggested_agent_count, + "stance_distribution": self.stance_distribution, + } + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> 'ScenarioConfig': + return cls( + simulation_requirement=data['simulation_requirement'], + context_document=data['context_document'], + suggested_agent_count=data.get('suggested_agent_count', 50), + stance_distribution=data.get('stance_distribution', { + "supportive": 0.4, "opposing": 0.4, "neutral": 0.2 + }), + ) + + +SCENARIO_SYSTEM_PROMPT = """You are a research analyst preparing a factual briefing for prediction market analysis. + +Given a prediction market question, create a comprehensive factual context document. Your goal is ACCURACY, not balance — if the evidence overwhelmingly favors one outcome, say so clearly. + +CRITICAL RULES: +1. The context document must be FACTUAL and evidence-based — include real data, precedents, and structural constraints +2. Clearly state which outcome the evidence favors and why +3. Include the strongest arguments for BOTH sides, but weight them by evidence quality +4. Note base rates, historical precedents, and known constraints that affect probability +5. Identify what would need to happen for the less likely outcome to occur + +Output JSON with these fields: +{ + "simulation_requirement": "A clear framing of the question with key factors to consider.", + "context_document": "A 500-1000 word factual briefing covering: current situation, key evidence for and against, base rates, historical precedents, structural constraints, stakeholder positions, and what would need to change for each outcome. Be honest about which side the evidence favors.", + "suggested_agent_count": 20, + "stance_distribution": { + "supportive": 0.3, + "opposing": 0.4, + "neutral": 0.3 + } +}""" + + +class ScenarioGenerator: + """Converts a prediction market question into a simulation scenario""" + + def __init__(self, llm_client: Optional[LLMClient] = None): + self.llm_client = llm_client or LLMClient() + + def generate_scenario(self, market: PredictionMarket) -> ScenarioConfig: + """ + Generate a balanced simulation scenario from a market question. + + Args: + market: PredictionMarket with question and context + + Returns: + ScenarioConfig ready for the simulation pipeline + """ + user_message = self._build_prompt(market) + + messages = [ + {"role": "system", "content": SCENARIO_SYSTEM_PROMPT}, + {"role": "user", "content": user_message}, + ] + + logger.info(f"Generating scenario for market: {market.title}") + + result = self.llm_client.chat_json( + messages=messages, + temperature=0.4, + max_tokens=4096, + ) + + scenario = ScenarioConfig( + simulation_requirement=result.get('simulation_requirement', ''), + context_document=result.get('context_document', ''), + suggested_agent_count=result.get('suggested_agent_count', 50), + stance_distribution=result.get('stance_distribution', { + "supportive": 0.35, "opposing": 0.35, "neutral": 0.30 + }), + ) + + logger.info(f"Scenario generated: {len(scenario.context_document)} chars context") + return scenario + + def _build_prompt(self, market: PredictionMarket) -> str: + """Build the user prompt from market data""" + parts = [ + f"# Prediction Market Question", + f"**Question:** {market.title}", + f"**Outcomes:** {', '.join(market.outcomes)}", + f"**Current Prices:** {', '.join(f'{o}: ${p:.2f}' for o, p in zip(market.outcomes, market.prices))}", + f"**Trading Volume:** ${market.volume:,.0f}", + f"**End Date:** {market.end_date}", + ] + + if market.description: + # Truncate very long descriptions + desc = market.description[:3000] + parts.append(f"\n**Market Description:**\n{desc}") + + parts.append( + "\nCreate a balanced simulation scenario for this market. " + "The simulation should produce organic discourse that reveals " + "the collective intelligence of diverse agents debating this question." + ) + + return '\n'.join(parts) diff --git a/backend/app/storage/migrate_predictions.py b/backend/app/storage/migrate_predictions.py new file mode 100644 index 0000000..314202b --- /dev/null +++ b/backend/app/storage/migrate_predictions.py @@ -0,0 +1,60 @@ +""" +One-time migration: move prediction runs from JSON files to SQLite. + +Usage: + cd backend && uv run python -m app.storage.migrate_predictions +""" + +import json +import os + +from ..config import Config +from ..models.prediction import PredictionRun +from ..storage.sqlite_store import SQLiteStore + + +def migrate(db_path: str = None, predictions_dir: str = None): + """Migrate JSON prediction runs to SQLite.""" + db_path = db_path or Config.SQLITE_DB_PATH + predictions_dir = predictions_dir or Config.PREDICTION_DATA_DIR + + if not os.path.isdir(predictions_dir): + print(f"No predictions directory found at {predictions_dir}") + return 0 + + store = SQLiteStore(db_path=db_path) + migrated = 0 + skipped = 0 + errors = 0 + + for name in sorted(os.listdir(predictions_dir)): + run_path = os.path.join(predictions_dir, name, "run.json") + if not os.path.isfile(run_path): + continue + + try: + with open(run_path, "r", encoding="utf-8") as f: + data = json.load(f) + + run = PredictionRun.from_dict(data) + + # Check if already migrated + existing = store.get_prediction_run(run.run_id) + if existing: + skipped += 1 + continue + + store.save_prediction_run(run) + migrated += 1 + print(f" Migrated: {run.run_id} ({run.market.get('title', '?')[:50] if run.market else '?'})") + + except Exception as e: + errors += 1 + print(f" Error migrating {name}: {e}") + + print(f"\nMigration complete: {migrated} migrated, {skipped} skipped, {errors} errors") + return migrated + + +if __name__ == "__main__": + migrate() diff --git a/backend/app/storage/prediction_store.py b/backend/app/storage/prediction_store.py new file mode 100644 index 0000000..b67f59f --- /dev/null +++ b/backend/app/storage/prediction_store.py @@ -0,0 +1,49 @@ +""" +SQLite-backed prediction run store — drop-in replacement for PredictionRunManager. + +Implements the same classmethod-style interface (create_run, save_run, get_run, +list_runs, delete_run) but persists to SQLite instead of JSON files. +""" + +import uuid +from datetime import datetime +from typing import List, Optional + +from ..models.prediction import PredictionRun, PredictionRunStatus +from ..storage.sqlite_store import SQLiteStore + + +class SQLitePredictionStore: + """SQLite-backed prediction run persistence. + + Drop-in replacement for PredictionRunManager. Unlike the classmethod-based + original, this requires a store instance — matching PredictionManager's DI pattern. + """ + + def __init__(self, store: SQLiteStore): + self.store = store + + def create_run(self) -> PredictionRun: + run_id = f"pred_{uuid.uuid4().hex[:12]}" + now = datetime.now().isoformat() + run = PredictionRun( + run_id=run_id, + status=PredictionRunStatus.FETCHING_MARKET, + created_at=now, + updated_at=now, + ) + self.store.save_prediction_run(run) + return run + + def save_run(self, run: PredictionRun) -> None: + run.updated_at = datetime.now().isoformat() + self.store.save_prediction_run(run) + + def get_run(self, run_id: str) -> Optional[PredictionRun]: + return self.store.get_prediction_run(run_id) + + def list_runs(self, limit: int = 50) -> List[PredictionRun]: + return self.store.list_prediction_runs(limit=limit) + + def delete_run(self, run_id: str) -> bool: + return self.store.delete_prediction_run(run_id) diff --git a/backend/app/storage/sqlite_store.py b/backend/app/storage/sqlite_store.py new file mode 100644 index 0000000..ac65ecc --- /dev/null +++ b/backend/app/storage/sqlite_store.py @@ -0,0 +1,482 @@ +""" +SQLAlchemy Core-based SQLite storage for backtesting and paper trading. + +Schema diagram: + + backtest_runs + ┌──────────────────────┐ + │ id TEXT PK │ + │ started_at TEXT │ + │ config TEXT │ ← JSON + │ status TEXT │ + │ metrics TEXT │ ← JSON + │ total_markets INT │ + │ completed_markets INT│ + │ failed_markets INT │ + └──────────┬───────────┘ + │ 1:N + backtest_results + ┌──────────────────────┐ + │ id TEXT PK │ + │ run_id TEXT FK │──→ backtest_runs.id + │ market_id TEXT │ + │ market_title TEXT │ + │ predicted_prob REAL │ + │ market_prob REAL │ + │ actual_outcome TEXT │ + │ signal_direction TEXT│ + │ edge REAL │ + │ brier_score REAL │ + │ correct INT │ + │ category TEXT │ + │ confidence_tier TEXT │ + └──────────────────────┘ + + market_categories + ┌──────────────────────┐ + │ market_id TEXT PK │ + │ category TEXT │ + │ classified_at TEXT │ + └──────────────────────┘ + + calibration_profiles + ┌──────────────────────┐ + │ id TEXT PK │ + │ run_id TEXT FK │──→ backtest_runs.id + │ category TEXT │ + │ offset REAL │ + │ sample_size INT │ + │ created_at TEXT │ + │ UNIQUE(run_id, category) │ + └──────────────────────┘ + + paper_orders + ┌──────────────────────┐ + │ id TEXT PK │ + │ market_id TEXT │ + │ signal_id TEXT │ + │ side TEXT │ + │ outcome TEXT │ + │ size REAL │ + │ fill_price REAL │ + │ slippage REAL │ + │ created_at TEXT │ + └──────────┬───────────┘ + │ 1:N + paper_positions + ┌──────────────────────┐ + │ id TEXT PK │ + │ order_id TEXT FK │──→ paper_orders.id + │ market_id TEXT │ + │ outcome TEXT │ + │ entry_price REAL │ + │ cost_basis REAL │ + │ status TEXT │ + │ resolved_pnl REAL │ + └──────────────────────┘ +""" + +import json +import logging +import os +import uuid +from datetime import datetime +from typing import Any, Dict, List, Optional + +from sqlalchemy.exc import OperationalError + +from sqlalchemy import ( + Column, + Float, + ForeignKey, + Integer, + MetaData, + String, + Table, + UniqueConstraint, + create_engine, + text, +) +from sqlalchemy.engine import Engine + +from ..models.backtest import BacktestResult, BacktestRun +from ..models.prediction import PredictionRun, PredictionRunStatus +from ..models.position import PaperOrder, PaperPosition + +logger = logging.getLogger(__name__) + + +class StorageError(Exception): + """Raised when a storage operation fails (disk full, I/O error, etc.).""" + pass + +metadata = MetaData() + +backtest_runs = Table( + "backtest_runs", + metadata, + Column("id", String, primary_key=True), + Column("started_at", String), + Column("config", String), # JSON + Column("status", String), + Column("metrics", String), # JSON + Column("total_markets", Integer, default=0), + Column("completed_markets", Integer, default=0), + Column("failed_markets", Integer, default=0), +) + +backtest_results = Table( + "backtest_results", + metadata, + Column("id", String, primary_key=True), + Column("run_id", String, ForeignKey("backtest_runs.id")), + Column("market_id", String), + Column("market_title", String), + Column("predicted_prob", Float), + Column("market_prob", Float), + Column("actual_outcome", String), + Column("signal_direction", String), + Column("edge", Float), + Column("brier_score", Float), + Column("correct", Integer), + Column("category", String), + Column("confidence_tier", String), +) + +market_categories = Table( + "market_categories", + metadata, + Column("market_id", String, primary_key=True), + Column("category", String, nullable=False), + Column("classified_at", String), +) + +calibration_profiles = Table( + "calibration_profiles", + metadata, + Column("id", String, primary_key=True), + Column("run_id", String, ForeignKey("backtest_runs.id")), + Column("category", String, nullable=False), + Column("offset", Float, nullable=False), + Column("sample_size", Integer), + Column("created_at", String), + UniqueConstraint("run_id", "category", name="uq_run_category"), +) + +prediction_runs = Table( + "prediction_runs", + metadata, + Column("run_id", String, primary_key=True), + Column("status", String), + Column("created_at", String), + Column("updated_at", String), + Column("market", String), # JSON + Column("project_id", String), + Column("graph_id", String), + Column("simulation_id", String), + Column("scenario", String), # JSON + Column("sentiment", String), # JSON + Column("signal", String), # JSON + Column("error", String), + Column("progress_message", String), +) + +paper_orders = Table( + "paper_orders", + metadata, + Column("id", String, primary_key=True), + Column("market_id", String), + Column("signal_id", String), + Column("side", String), + Column("outcome", String), + Column("size", Float), + Column("fill_price", Float), + Column("slippage", Float), + Column("created_at", String), +) + +paper_positions = Table( + "paper_positions", + metadata, + Column("id", String, primary_key=True), + Column("order_id", String, ForeignKey("paper_orders.id")), + Column("market_id", String), + Column("outcome", String), + Column("entry_price", Float), + Column("cost_basis", Float), + Column("status", String), + Column("resolved_pnl", Float), +) + + +class SQLiteStore: + """SQLite repository for backtest runs, results, and paper trading.""" + + def __init__(self, db_path: str = "data/mirofish.db"): + # Ensure parent directory exists + db_dir = os.path.dirname(db_path) + if db_dir: + os.makedirs(db_dir, exist_ok=True) + + self.engine: Engine = create_engine( + f"sqlite:///{db_path}", + connect_args={"check_same_thread": False}, + echo=False, + ) + metadata.create_all(self.engine) + # Enable WAL mode for concurrent reads + enforce foreign keys + with self.engine.connect() as conn: + conn.execute(text("PRAGMA journal_mode=WAL")) + conn.execute(text("PRAGMA foreign_keys=ON")) + conn.commit() + self._migrate_add_columns() + logger.info("SQLiteStore initialized: %s (WAL mode)", db_path) + + def _safe_write(self, operation: str, fn): + """Execute a write operation with disk-full error handling. + + Args: + operation: Human-readable description for error messages + fn: Callable that receives a connection and performs the write + """ + try: + with self.engine.connect() as conn: + fn(conn) + conn.commit() + except OperationalError as e: + err_msg = str(e).lower() + if "disk" in err_msg or "i/o" in err_msg or "full" in err_msg or "readonly" in err_msg: + logger.error(f"Storage I/O error during {operation}: {e}") + raise StorageError(f"Disk I/O error during {operation}: {e}") from e + raise + + def _migrate_add_columns(self): + """Add new columns to existing tables (idempotent).""" + migrations = [ + "ALTER TABLE backtest_results ADD COLUMN category TEXT", + "ALTER TABLE backtest_results ADD COLUMN confidence_tier TEXT", + ] + with self.engine.connect() as conn: + for sql in migrations: + try: + conn.execute(text(sql)) + except Exception: + pass # Column already exists + conn.commit() + + # ── Backtest Runs ──────────────────────────────────────────────── + + def save_backtest_run(self, run: BacktestRun) -> None: + d = run.to_dict() + d["config"] = json.dumps(d["config"]) if d["config"] else None + d["metrics"] = json.dumps(d["metrics"]) if d["metrics"] else None + self._safe_write("save_backtest_run", lambda conn: conn.execute( + backtest_runs.insert().prefix_with("OR REPLACE"), d, + )) + + def get_backtest_run(self, run_id: str) -> Optional[BacktestRun]: + with self.engine.connect() as conn: + row = conn.execute( + backtest_runs.select().where(backtest_runs.c.id == run_id) + ).mappings().first() + if row is None: + return None + return self._row_to_backtest_run(row) + + def list_backtest_runs(self) -> List[BacktestRun]: + with self.engine.connect() as conn: + rows = conn.execute( + backtest_runs.select().order_by(backtest_runs.c.started_at.desc()) + ).mappings().all() + return [self._row_to_backtest_run(r) for r in rows] + + def has_active_backtest(self) -> Optional[str]: + """Return the ID of any PENDING/RUNNING backtest, or None.""" + with self.engine.connect() as conn: + row = conn.execute( + backtest_runs.select() + .where(backtest_runs.c.status.in_(["PENDING", "RUNNING", "COMPUTING_METRICS"])) + .limit(1) + ).mappings().first() + return row["id"] if row else None + + def update_backtest_run(self, run_id: str, **kwargs: Any) -> None: + updates: Dict[str, Any] = {} + for key, value in kwargs.items(): + if key in ("config", "metrics") and value is not None: + updates[key] = json.dumps(value) + else: + updates[key] = value + self._safe_write("update_backtest_run", lambda conn: conn.execute( + backtest_runs.update().where(backtest_runs.c.id == run_id).values(**updates) + )) + + @staticmethod + def _row_to_backtest_run(row: Any) -> BacktestRun: + d = dict(row) + d["config"] = json.loads(d["config"]) if d.get("config") else {} + d["metrics"] = json.loads(d["metrics"]) if d.get("metrics") else None + return BacktestRun.from_dict(d) + + # ── Backtest Results ───────────────────────────────────────────── + + def save_backtest_result(self, result: BacktestResult) -> None: + d = result.to_dict() + self._safe_write("save_backtest_result", lambda conn: conn.execute( + backtest_results.insert().prefix_with("OR REPLACE"), d, + )) + + def get_results_by_run(self, run_id: str) -> List[BacktestResult]: + with self.engine.connect() as conn: + rows = conn.execute( + backtest_results.select().where(backtest_results.c.run_id == run_id) + ).mappings().all() + return [BacktestResult.from_dict(dict(r)) for r in rows] + + def get_completed_market_ids(self, run_id: str) -> List[str]: + with self.engine.connect() as conn: + rows = conn.execute( + backtest_results.select() + .with_only_columns(backtest_results.c.market_id) + .where(backtest_results.c.run_id == run_id) + ).all() + return [r[0] for r in rows] + + # ── Paper Orders ───────────────────────────────────────────────── + + def save_paper_order(self, order: PaperOrder) -> None: + d = order.to_dict() + self._safe_write("save_paper_order", lambda conn: conn.execute( + paper_orders.insert().prefix_with("OR REPLACE"), d, + )) + + def get_orders(self) -> List[PaperOrder]: + with self.engine.connect() as conn: + rows = conn.execute(paper_orders.select()).mappings().all() + return [PaperOrder.from_dict(dict(r)) for r in rows] + + # ── Paper Positions ────────────────────────────────────────────── + + def save_paper_position(self, position: PaperPosition) -> None: + d = position.to_dict() + self._safe_write("save_paper_position", lambda conn: conn.execute( + paper_positions.insert().prefix_with("OR REPLACE"), d, + )) + + def get_positions(self) -> List[PaperPosition]: + with self.engine.connect() as conn: + rows = conn.execute(paper_positions.select()).mappings().all() + return [PaperPosition.from_dict(dict(r)) for r in rows] + + # ── Market Categories ───────────────────────────────────────── + + def get_market_category(self, market_id: str) -> Optional[str]: + """Return cached category for a market, or None.""" + with self.engine.connect() as conn: + row = conn.execute( + market_categories.select().where(market_categories.c.market_id == market_id) + ).mappings().first() + return row["category"] if row else None + + def save_market_category(self, market_id: str, category: str) -> None: + """Cache a market's category classification.""" + row = { + "market_id": market_id, + "category": category, + "classified_at": datetime.now().isoformat(), + } + self._safe_write("save_market_category", lambda conn: conn.execute( + market_categories.insert().prefix_with("OR REPLACE"), row, + )) + + # ── Calibration Profiles ────────────────────────────────────── + + def save_calibration_profile( + self, run_id: str, category: str, offset: float, sample_size: int + ) -> None: + """Save a per-category calibration offset for a backtest run.""" + row = { + "id": f"cp_{uuid.uuid4().hex[:12]}", + "run_id": run_id, + "category": category, + "offset": offset, + "sample_size": sample_size, + "created_at": datetime.now().isoformat(), + } + self._safe_write("save_calibration_profile", lambda conn: conn.execute( + calibration_profiles.insert().prefix_with("OR REPLACE"), row, + )) + + def load_calibration_profiles(self, run_id: str) -> Dict[str, Dict[str, Any]]: + """Load all category offsets for a run. Returns {category: {offset, sample_size}}.""" + with self.engine.connect() as conn: + rows = conn.execute( + calibration_profiles.select().where(calibration_profiles.c.run_id == run_id) + ).mappings().all() + return { + row["category"]: {"offset": row["offset"], "sample_size": row["sample_size"]} + for row in rows + } + + def get_latest_completed_run_id(self) -> Optional[str]: + """Return the ID of the most recent COMPLETED backtest run, or None.""" + with self.engine.connect() as conn: + row = conn.execute( + backtest_runs.select() + .where(backtest_runs.c.status == "COMPLETED") + .order_by(backtest_runs.c.started_at.desc()) + .limit(1) + ).mappings().first() + return row["id"] if row else None + + # ── Prediction Runs ─────────────────────────────────────────── + + def save_prediction_run(self, run: PredictionRun) -> None: + """Save or update a prediction run.""" + d = run.to_dict() + # Serialize nested dicts as JSON + for key in ("market", "scenario", "sentiment", "signal"): + if d.get(key) is not None: + d[key] = json.dumps(d[key]) + self._safe_write("save_prediction_run", lambda conn: conn.execute( + prediction_runs.insert().prefix_with("OR REPLACE"), d, + )) + + def get_prediction_run(self, run_id: str) -> Optional[PredictionRun]: + """Get a prediction run by ID.""" + with self.engine.connect() as conn: + row = conn.execute( + prediction_runs.select().where(prediction_runs.c.run_id == run_id) + ).mappings().first() + if row is None: + return None + return self._row_to_prediction_run(row) + + def list_prediction_runs(self, limit: int = 50) -> List[PredictionRun]: + """List prediction runs, most recent first.""" + with self.engine.connect() as conn: + rows = conn.execute( + prediction_runs.select() + .order_by(prediction_runs.c.created_at.desc()) + .limit(limit) + ).mappings().all() + return [self._row_to_prediction_run(r) for r in rows] + + def delete_prediction_run(self, run_id: str) -> bool: + """Delete a prediction run. Returns True if deleted.""" + deleted = [] + def _do_delete(conn): + result = conn.execute( + prediction_runs.delete().where(prediction_runs.c.run_id == run_id) + ) + deleted.append(result.rowcount > 0) + self._safe_write("delete_prediction_run", _do_delete) + return deleted[0] if deleted else False + + @staticmethod + def _row_to_prediction_run(row: Any) -> PredictionRun: + d = dict(row) + for key in ("market", "scenario", "sentiment", "signal"): + if d.get(key): + d[key] = json.loads(d[key]) + return PredictionRun.from_dict(d) diff --git a/backend/app/utils/llm_client.py b/backend/app/utils/llm_client.py index 9a2d926..4f29d5d 100644 --- a/backend/app/utils/llm_client.py +++ b/backend/app/utils/llm_client.py @@ -1,7 +1,7 @@ """ -LLM Client Wrapper -Unified OpenAI format API calls -Supports Ollama num_ctx parameter to prevent prompt truncation +LLM client wrapper +Supports OpenAI-compatible (Ollama / OpenAI) and Anthropic Claude. +Auto-selects backend based on model name. """ import json @@ -14,7 +14,7 @@ class LLMClient: - """LLM Client""" + """LLM client — supports OpenAI-compatible and Anthropic backends""" def __init__( self, @@ -30,20 +30,41 @@ def __init__( if not self.api_key: raise ValueError("LLM_API_KEY not configured") - self.client = OpenAI( - api_key=self.api_key, - base_url=self.base_url, - timeout=timeout, - ) + self._timeout = timeout + self._anthropic_client = None + self._openai_client = None - # Ollama context window size — prevents prompt truncation. - # Read from env OLLAMA_NUM_CTX, default 8192 (Ollama default is only 2048). + # Ollama context window size — prevents prompt truncation self._num_ctx = int(os.environ.get('OLLAMA_NUM_CTX', '8192')) + def _is_anthropic(self) -> bool: + """Check if we're using an Anthropic Claude model.""" + return (self.model or '').startswith('claude') + def _is_ollama(self) -> bool: """Check if we're talking to an Ollama server.""" return '11434' in (self.base_url or '') + def _get_anthropic_client(self): + """Lazy-init Anthropic client.""" + if self._anthropic_client is None: + import anthropic + self._anthropic_client = anthropic.Anthropic( + api_key=self.api_key, + timeout=self._timeout, + ) + return self._anthropic_client + + def _get_openai_client(self): + """Lazy-init OpenAI client.""" + if self._openai_client is None: + self._openai_client = OpenAI( + api_key=self.api_key, + base_url=self.base_url, + timeout=self._timeout, + ) + return self._openai_client + def chat( self, messages: List[Dict[str, str]], @@ -52,17 +73,75 @@ def chat( response_format: Optional[Dict] = None ) -> str: """ - Send chat request + Send a chat request. Args: messages: Message list - temperature: Temperature parameter - max_tokens: Max token count - response_format: Response format (e.g., JSON mode) + temperature: Sampling temperature + max_tokens: Maximum tokens in response + response_format: Response format (e.g. JSON mode) Returns: Model response text """ + if self._is_anthropic(): + return self._chat_anthropic(messages, temperature, max_tokens, response_format) + return self._chat_openai(messages, temperature, max_tokens, response_format) + + def _chat_anthropic( + self, + messages: List[Dict[str, str]], + temperature: float, + max_tokens: int, + response_format: Optional[Dict] = None + ) -> str: + """Send chat request via Anthropic SDK.""" + client = self._get_anthropic_client() + + # Extract system message (Anthropic uses a separate system param) + system = None + user_messages = [] + for msg in messages: + if msg["role"] == "system": + system = (system + "\n\n" + msg["content"]) if system else msg["content"] + else: + user_messages.append(msg) + + # If response_format is JSON, add instruction to system prompt + if response_format and response_format.get("type") == "json_object": + json_instruction = "\n\nIMPORTANT: You must respond with valid JSON only. No markdown, no explanation, just the JSON object." + system = (system + json_instruction) if system else json_instruction + + kwargs = { + "model": self.model, + "messages": user_messages, + "temperature": temperature, + "max_tokens": max_tokens, + } + if system: + kwargs["system"] = system + + response = client.messages.create(**kwargs) + + content = "" + for block in response.content: + if block.type == "text": + content += block.text + + # Remove tags from some models + content = re.sub(r'[\s\S]*?', '', content).strip() + return content + + def _chat_openai( + self, + messages: List[Dict[str, str]], + temperature: float, + max_tokens: int, + response_format: Optional[Dict] = None + ) -> str: + """Send chat request via OpenAI SDK.""" + client = self._get_openai_client() + kwargs = { "model": self.model, "messages": messages, @@ -79,9 +158,9 @@ def chat( "options": {"num_ctx": self._num_ctx} } - response = self.client.chat.completions.create(**kwargs) + response = client.chat.completions.create(**kwargs) content = response.choices[0].message.content - # Some models (like MiniMax M2.5) include thinking content in response, need to remove + # Some models include reasoning — remove it content = re.sub(r'[\s\S]*?', '', content).strip() return content @@ -92,12 +171,12 @@ def chat_json( max_tokens: int = 4096 ) -> Dict[str, Any]: """ - Send chat request and return JSON + Send a chat request and return parsed JSON. Args: messages: Message list - temperature: Temperature parameter - max_tokens: Max token count + temperature: Sampling temperature + max_tokens: Maximum tokens in response Returns: Parsed JSON object @@ -117,4 +196,4 @@ def chat_json( try: return json.loads(cleaned_response) except json.JSONDecodeError: - raise ValueError(f"Invalid JSON format from LLM: {cleaned_response}") + raise ValueError(f"LLM returned invalid JSON: {cleaned_response}") diff --git a/backend/app/utils/retry.py b/backend/app/utils/retry.py index 2a0eacb..9d9747b 100644 --- a/backend/app/utils/retry.py +++ b/backend/app/utils/retry.py @@ -1,6 +1,5 @@ """ -API Call Retry Mechanism -Handles retry logic for external API calls like LLM +API call retry mechanism with exponential backoff """ import time @@ -22,15 +21,15 @@ def retry_with_backoff( on_retry: Optional[Callable[[Exception, int], None]] = None ): """ - Retry decorator with exponential backoff + Retry decorator with exponential backoff. Args: - max_retries: Max retry attempts - initial_delay: Initial delay (seconds) - max_delay: Max delay (seconds) - backoff_factor: Backoff factor + max_retries: Maximum retry attempts + initial_delay: Initial delay in seconds + max_delay: Maximum delay in seconds + backoff_factor: Multiplier for delay growth jitter: Whether to add random jitter - exceptions: Exception types to retry + exceptions: Exception types that trigger retry on_retry: Callback on retry (exception, retry_count) Usage: @@ -43,36 +42,36 @@ def decorator(func: Callable) -> Callable: def wrapper(*args, **kwargs) -> Any: last_exception = None delay = initial_delay - + for attempt in range(max_retries + 1): try: return func(*args, **kwargs) - + except exceptions as e: last_exception = e - + if attempt == max_retries: - logger.error(f"Function {func.__name__} still failed after {max_retries} retries: {str(e)}") + logger.error(f"Function {func.__name__} failed after {max_retries} retries: {str(e)}") raise - - # Calculate delay + + # 计算延迟 current_delay = min(delay, max_delay) if jitter: current_delay = current_delay * (0.5 + random.random()) - + logger.warning( f"Function {func.__name__} attempt {attempt + 1} failed: {str(e)}, " - f"retrying in {current_delay:.1f} seconds..." + f"retrying in {current_delay:.1f}s..." ) - + if on_retry: on_retry(e, attempt + 1) - + time.sleep(current_delay) delay *= backoff_factor - + raise last_exception - + return wrapper return decorator @@ -87,7 +86,7 @@ def retry_with_backoff_async( on_retry: Optional[Callable[[Exception, int], None]] = None ): """ - Async version of retry decorator + Async version of the retry decorator with exponential backoff. """ import asyncio @@ -96,143 +95,35 @@ def decorator(func: Callable) -> Callable: async def wrapper(*args, **kwargs) -> Any: last_exception = None delay = initial_delay - + for attempt in range(max_retries + 1): try: return await func(*args, **kwargs) - + except exceptions as e: last_exception = e - + if attempt == max_retries: - logger.error(f"Async function {func.__name__} still failed after {max_retries} retries: {str(e)}") + logger.error(f"Async function {func.__name__} failed after {max_retries} retries: {str(e)}") raise - + current_delay = min(delay, max_delay) if jitter: current_delay = current_delay * (0.5 + random.random()) - + logger.warning( f"Async function {func.__name__} attempt {attempt + 1} failed: {str(e)}, " - f"retrying in {current_delay:.1f} seconds..." + f"retrying in {current_delay:.1f}s..." ) - + if on_retry: on_retry(e, attempt + 1) - + await asyncio.sleep(current_delay) delay *= backoff_factor - + raise last_exception - + return wrapper return decorator - -class RetryableAPIClient: - """ - Retryable API client wrapper - """ - - def __init__( - self, - max_retries: int = 3, - initial_delay: float = 1.0, - max_delay: float = 30.0, - backoff_factor: float = 2.0 - ): - self.max_retries = max_retries - self.initial_delay = initial_delay - self.max_delay = max_delay - self.backoff_factor = backoff_factor - - def call_with_retry( - self, - func: Callable, - *args, - exceptions: Tuple[Type[Exception], ...] = (Exception,), - **kwargs - ) -> Any: - """ - Execute function call with retry on failure - - Args: - func: Function to call - *args: Function arguments - exceptions: Exception types to retry - **kwargs: Function keyword arguments - - Returns: - Function return value - """ - last_exception = None - delay = self.initial_delay - - for attempt in range(self.max_retries + 1): - try: - return func(*args, **kwargs) - - except exceptions as e: - last_exception = e - - if attempt == self.max_retries: - logger.error(f"API call still failed after {self.max_retries} retries: {str(e)}") - raise - - current_delay = min(delay, self.max_delay) - current_delay = current_delay * (0.5 + random.random()) - - logger.warning( - f"API call attempt {attempt + 1} failed: {str(e)}, " - f"retrying in {current_delay:.1f} seconds..." - ) - - time.sleep(current_delay) - delay *= self.backoff_factor - - raise last_exception - - def call_batch_with_retry( - self, - items: list, - process_func: Callable, - exceptions: Tuple[Type[Exception], ...] = (Exception,), - continue_on_failure: bool = True - ) -> Tuple[list, list]: - """ - Batch call with individual retry for each failed item - - Args: - items: List of items to process - process_func: Processing function, accepts single item as parameter - exceptions: Exception types to retry - continue_on_failure: Whether to continue processing other items after failure - - Returns: - (successful results list, failed items list) - """ - results = [] - failures = [] - - for idx, item in enumerate(items): - try: - result = self.call_with_retry( - process_func, - item, - exceptions=exceptions - ) - results.append(result) - - except Exception as e: - logger.error(f"Failed to process item {idx + 1}: {str(e)}") - failures.append({ - "index": idx, - "item": item, - "error": str(e) - }) - - if not continue_on_failure: - raise - - return results, failures - diff --git a/backend/pyproject.toml b/backend/pyproject.toml index 233ef23..fc3dc5d 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -9,29 +9,35 @@ authors = [ ] dependencies = [ - # 核心框架 + # Core framework "flask>=3.0.0", "flask-cors>=6.0.0", - - # LLM 相关 + + # LLM "openai>=1.0.0", - + # Neo4j graph database driver "neo4j>=5.15.0", - - # OASIS 社交媒体模拟 + + # OASIS social media simulation "camel-oasis==0.2.5", "camel-ai==0.2.78", - - # 文件处理 + + # File processing "PyMuPDF>=1.24.0", - # 编码检测(支持非UTF-8编码的文本文件) + # Encoding detection (non-UTF-8 text files) "charset-normalizer>=3.0.0", "chardet>=5.0.0", - - # 工具库 + + # Utilities "python-dotenv>=1.0.0", "pydantic>=2.0.0", + + # SQLite storage (SQLAlchemy Core) + "sqlalchemy>=2.0.0", + + # Calibration (Platt scaling) + "scikit-learn>=1.4.0", ] [project.optional-dependencies] diff --git a/backend/requirements.txt b/backend/requirements.txt index 5cffdbf..e29d220 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -12,6 +12,8 @@ flask-cors>=6.0.0 # ============= LLM 相关 ============= # OpenAI SDK(统一使用 OpenAI 格式调用 LLM / Ollama) openai>=1.0.0 +# Anthropic SDK(支持 Claude 模型) +anthropic>=0.39.0 # HTTP client for Ollama embedding API requests>=2.28.0 diff --git a/backend/tests/__init__.py b/backend/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py new file mode 100644 index 0000000..1ede8c7 --- /dev/null +++ b/backend/tests/conftest.py @@ -0,0 +1,120 @@ +""" +Shared pytest fixtures for the MiroFish backtesting test suite. +""" + +import os +import tempfile +from unittest.mock import MagicMock, patch + +import pytest + +from app.models.prediction import PredictionMarket + + +# --------------------------------------------------------------------------- +# SQLite store (in-memory) +# --------------------------------------------------------------------------- + +@pytest.fixture +def sqlite_store(tmp_path): + """In-memory SQLite store backed by a tmpdir file so WAL works correctly.""" + from app.storage.sqlite_store import SQLiteStore + + db_path = str(tmp_path / "test.db") + store = SQLiteStore(db_path=db_path) + return store + + +# --------------------------------------------------------------------------- +# Flask app / test client +# --------------------------------------------------------------------------- + +@pytest.fixture +def app(sqlite_store): + """Flask test app with mocked Neo4j and in-memory SQLite.""" + with patch("app.storage.Neo4jStorage"): + with patch("app.services.simulation_runner.SimulationRunner.register_cleanup"): + from app import create_app + + class TestConfig: + SECRET_KEY = "test-secret" + DEBUG = False + JSON_AS_ASCII = False + TESTING = True + NEO4J_URI = "bolt://localhost:7687" + NEO4J_USER = "neo4j" + NEO4J_PASSWORD = "test" + SQLITE_DB_PATH = ":memory:" + LLM_API_KEY = "test-key" + + test_app = create_app(config_class=TestConfig) + + # Replace the sqlite extension with our fixture store + test_app.extensions["sqlite"] = sqlite_store + + yield test_app + + +@pytest.fixture +def client(app): + """Flask test client.""" + return app.test_client() + + +# --------------------------------------------------------------------------- +# Mock LLM client +# --------------------------------------------------------------------------- + +@pytest.fixture +def mock_llm_client(): + """Mock LLM client that returns predetermined JSON responses.""" + client = MagicMock() + client.chat.return_value = { + "choices": [ + { + "message": { + "content": '{"probability": 0.65, "confidence": 0.7, "reasoning": "Test reasoning"}' + } + } + ] + } + return client + + +# --------------------------------------------------------------------------- +# Sample markets +# --------------------------------------------------------------------------- + +@pytest.fixture +def sample_market(): + """A PredictionMarket fixture for testing.""" + return PredictionMarket( + condition_id="cond_abc123", + title="Will BTC exceed $100k by end of 2025?", + slug="btc-100k-2025", + description="Whether Bitcoin will exceed $100,000 USD", + outcomes=["Yes", "No"], + prices=[0.60, 0.40], + volume=500000.0, + liquidity=100000.0, + end_date="2025-12-31T23:59:59Z", + active=True, + ) + + +@pytest.fixture +def sample_resolved_market(): + """A resolved PredictionMarket with actual_outcome set.""" + return PredictionMarket( + condition_id="cond_resolved_001", + title="Will ETH merge complete by 2023?", + slug="eth-merge-2023", + description="Whether Ethereum will complete the merge", + outcomes=["Yes", "No"], + prices=[0.85, 0.15], + volume=1000000.0, + liquidity=250000.0, + end_date="2023-12-31T23:59:59Z", + active=False, + actual_outcome="YES", + ) diff --git a/backend/tests/test_backtest_api.py b/backend/tests/test_backtest_api.py new file mode 100644 index 0000000..76504a7 --- /dev/null +++ b/backend/tests/test_backtest_api.py @@ -0,0 +1,145 @@ +""" +Tests for app.api.backtest — Flask API endpoints. +""" + +import threading +import time +from unittest.mock import patch, MagicMock + +import pytest + +from app.models.backtest import BacktestRun, BacktestRunStatus + + +class TestStartBacktest: + + def test_start_backtest(self, client, sqlite_store): + with patch("app.api.backtest.Backtester") as MockBT: + mock_bt = MagicMock() + MockBT.return_value = mock_bt + + resp = client.post( + "/api/backtest/run", + json={"num_markets": 5}, + ) + + data = resp.get_json() + assert resp.status_code == 200 + assert data["success"] is True + assert data["data"]["status"] == "started" + assert "run_id" in data["data"] + + # Clean up: clear running backtests + import app.api.backtest as bt_mod + with bt_mod._lock: + bt_mod._running_backtests.clear() + + +class TestGetBacktestStatus: + + def test_get_backtest_status(self, client, sqlite_store): + run = BacktestRun( + id="bt_api_status", + status=BacktestRunStatus.COMPLETED.value, + metrics={"accuracy": 0.75}, + ) + sqlite_store.save_backtest_run(run) + + resp = client.get("/api/backtest/run/bt_api_status") + data = resp.get_json() + + assert resp.status_code == 200 + assert data["success"] is True + assert data["data"]["id"] == "bt_api_status" + assert data["data"]["status"] == "COMPLETED" + + +class TestGetBacktestNotFound: + + def test_get_backtest_not_found(self, client, sqlite_store): + resp = client.get("/api/backtest/run/nonexistent_id") + data = resp.get_json() + + assert resp.status_code == 404 + assert data["success"] is False + + +class TestListBacktestsEmpty: + + def test_list_backtests_empty(self, client, sqlite_store): + resp = client.get("/api/backtest/runs") + data = resp.get_json() + + assert resp.status_code == 200 + assert data["success"] is True + assert data["count"] == 0 + assert data["data"] == [] + + +class TestGetBacktestWithCategoryMetrics: + + def test_category_metrics_in_response(self, client, sqlite_store): + """API response includes category_metrics when present.""" + from app.models.backtest import BacktestResult + + run = BacktestRun( + id="bt_api_cat", + status=BacktestRunStatus.COMPLETED.value, + metrics={ + "accuracy": 0.75, + "brier_score": 0.20, + "roi": 0.10, + "sharpe_ratio": 1.5, + "max_drawdown": 0.05, + "calibration_rmse": 0.08, + "markets_tested": 10, + "avg_edge": 0.05, + "category_metrics": { + "politics": {"accuracy": 0.80, "markets_tested": 5}, + "crypto": {"accuracy": 0.60, "markets_tested": 5}, + }, + "confidence_tier_metrics": { + "HIGH": {"accuracy": 0.90, "markets_tested": 4}, + "LOW": {"accuracy": 0.50, "markets_tested": 6}, + }, + }, + ) + sqlite_store.save_backtest_run(run) + + # Add a result with category + sqlite_store.save_backtest_result(BacktestResult( + id="btr_api_cat_1", run_id="bt_api_cat", + market_id="mkt_1", category="politics", confidence_tier="HIGH", + predicted_prob=0.7, market_prob=0.5, + )) + + resp = client.get("/api/backtest/run/bt_api_cat") + data = resp.get_json() + + assert resp.status_code == 200 + assert data["data"]["metrics"]["category_metrics"]["politics"]["accuracy"] == 0.80 + assert data["data"]["metrics"]["confidence_tier_metrics"]["HIGH"]["accuracy"] == 0.90 + assert data["data"]["results"][0]["category"] == "politics" + assert data["data"]["results"][0]["confidence_tier"] == "HIGH" + + +class TestConcurrentBacktestRejected: + + def test_concurrent_backtest_rejected(self, client, sqlite_store): + """Second backtest should be rejected with 409 via DB-level guard.""" + # Insert a RUNNING backtest into the DB + run = BacktestRun(id="bt_already_running", status="RUNNING") + sqlite_store.save_backtest_run(run) + + resp = client.post( + "/api/backtest/run", + json={"num_markets": 5}, + ) + data = resp.get_json() + + assert resp.status_code == 409 + assert data["success"] is False + assert "already running" in data["error"] + + # Clean up + sqlite_store.update_backtest_run("bt_already_running", status="COMPLETED") diff --git a/backend/tests/test_backtester.py b/backend/tests/test_backtester.py new file mode 100644 index 0000000..e9dda20 --- /dev/null +++ b/backend/tests/test_backtester.py @@ -0,0 +1,195 @@ +""" +Tests for app.services.backtester.Backtester (integration-level with mocked externals). +""" + +from unittest.mock import patch, MagicMock + +import pytest + +from app.models.backtest import BacktestRun, BacktestResult, BacktestRunStatus +from app.models.prediction import ( + PredictionMarket, + PredictionRun, + PredictionRunStatus, + TradingSignal, +) +from app.services.backtester import Backtester + + +def _make_resolved_market(idx: int, actual_outcome: str = "YES") -> PredictionMarket: + return PredictionMarket( + condition_id=f"cond_{idx}", + title=f"Test Market {idx}", + slug=f"test-market-{idx}", + description=f"Description {idx}", + outcomes=["Yes", "No"], + prices=[0.60, 0.40], + volume=100000.0, + liquidity=50000.0, + end_date="2025-12-31T23:59:59Z", + active=False, + actual_outcome=actual_outcome, + ) + + +def _make_completed_prediction_run(sim_prob: float = 0.70, market_prob: float = 0.60): + run = PredictionRun( + run_id="pred_test", + status=PredictionRunStatus.COMPLETED, + created_at="2025-01-01", + updated_at="2025-01-01", + signal=TradingSignal( + direction="BUY_YES", + edge=sim_prob - market_prob, + confidence=0.8, + reasoning="test", + simulated_probability=sim_prob, + market_probability=market_prob, + ).to_dict(), + ) + return run + + +class TestBacktestFullPipeline: + + @patch("app.services.backtester.PredictionManager") + @patch("app.services.backtester.PredictionRunManager") + @patch("app.services.backtester.PolymarketClient") + def test_backtest_full_pipeline( + self, MockPolyClient, MockRunMgr, MockPredMgr, sqlite_store + ): + """Mock 3 resolved markets, verify results and metrics.""" + markets = [_make_resolved_market(i, "YES") for i in range(3)] + + mock_poly = MagicMock() + mock_poly.fetch_resolved_markets.return_value = markets + MockPolyClient.return_value = mock_poly + + mock_run = _make_completed_prediction_run() + MockRunMgr.create_run.return_value = mock_run + + mock_mgr = MagicMock() + mock_mgr.run_prediction.return_value = mock_run + MockPredMgr.return_value = mock_mgr + + backtester = Backtester(sqlite_store) + backtester.polymarket = mock_poly + + result_run = backtester.run(num_markets=3) + + assert result_run.status == BacktestRunStatus.COMPLETED.value + assert result_run.metrics is not None + assert result_run.completed_markets == 3 + assert result_run.failed_markets == 0 + + results = sqlite_store.get_results_by_run(result_run.id) + assert len(results) == 3 + + +class TestBacktestResumeAfterCrash: + + @patch("app.services.backtester.PredictionManager") + @patch("app.services.backtester.PredictionRunManager") + @patch("app.services.backtester.PolymarketClient") + def test_backtest_resume_after_crash( + self, MockPolyClient, MockRunMgr, MockPredMgr, sqlite_store + ): + """Pre-populate some results, verify they are skipped on resume.""" + markets = [_make_resolved_market(i, "YES") for i in range(3)] + + mock_poly = MagicMock() + mock_poly.fetch_resolved_markets.return_value = markets + MockPolyClient.return_value = mock_poly + + mock_run = _make_completed_prediction_run() + MockRunMgr.create_run.return_value = mock_run + + mock_mgr = MagicMock() + mock_mgr.run_prediction.return_value = mock_run + MockPredMgr.return_value = mock_mgr + + backtester = Backtester(sqlite_store) + backtester.polymarket = mock_poly + + # Run once fully + first_run = backtester.run(num_markets=3) + + # Pre-populate result for market 0 in a NEW run + new_bt_run = BacktestRun(id="bt_resume_test", total_markets=3) + sqlite_store.save_backtest_run(new_bt_run) + + pre_result = BacktestResult( + id="btr_pre", + run_id="bt_resume_test", + market_id="cond_0", + market_title="Test Market 0", + predicted_prob=0.70, + market_prob=0.60, + actual_outcome="YES", + signal_direction="BUY_YES", + edge=0.10, + brier_score=0.09, + correct=1, + ) + sqlite_store.save_backtest_result(pre_result) + + completed_ids = sqlite_store.get_completed_market_ids("bt_resume_test") + assert "cond_0" in completed_ids + + +class TestBacktestZeroMarkets: + + @patch("app.services.backtester.PredictionManager") + @patch("app.services.backtester.PolymarketClient") + def test_backtest_zero_markets(self, MockPolyClient, MockPredMgr, sqlite_store): + """Empty list from polymarket should return completed with zero metrics.""" + mock_poly = MagicMock() + mock_poly.fetch_resolved_markets.return_value = [] + MockPolyClient.return_value = mock_poly + + backtester = Backtester(sqlite_store) + backtester.polymarket = mock_poly + + result_run = backtester.run(num_markets=10) + + assert result_run.status == BacktestRunStatus.COMPLETED.value + assert result_run.metrics is not None + assert result_run.metrics["markets_tested"] == 0 + + +class TestBacktestAllFailures: + + @patch("app.services.backtester.PredictionManager") + @patch("app.services.backtester.PredictionRunManager") + @patch("app.services.backtester.PolymarketClient") + def test_backtest_all_failures( + self, MockPolyClient, MockRunMgr, MockPredMgr, sqlite_store + ): + """All pipeline runs fail — should still complete with 0 success.""" + markets = [_make_resolved_market(i) for i in range(3)] + + mock_poly = MagicMock() + mock_poly.fetch_resolved_markets.return_value = markets + MockPolyClient.return_value = mock_poly + + failed_run = PredictionRun( + run_id="pred_fail", + status=PredictionRunStatus.FAILED, + created_at="2025-01-01", + updated_at="2025-01-01", + error="LLM timeout", + ) + MockRunMgr.create_run.return_value = failed_run + + mock_mgr = MagicMock() + mock_mgr.run_prediction.return_value = failed_run + MockPredMgr.return_value = mock_mgr + + backtester = Backtester(sqlite_store) + backtester.polymarket = mock_poly + + result_run = backtester.run(num_markets=3) + + assert result_run.status == BacktestRunStatus.COMPLETED.value + assert result_run.completed_markets == 0 + assert result_run.failed_markets == 3 diff --git a/backend/tests/test_backtester_categories.py b/backend/tests/test_backtester_categories.py new file mode 100644 index 0000000..13f44b4 --- /dev/null +++ b/backend/tests/test_backtester_categories.py @@ -0,0 +1,113 @@ +""" +Tests for per-category and per-tier metrics in backtester. +""" + +from app.models.backtest import BacktestRun, BacktestResult, BacktestMetrics +from app.services.backtester import Backtester + + +class TestPerCategoryMetrics: + + def test_category_metrics_computed(self, sqlite_store): + """compute_metrics returns per-category breakdown.""" + run = BacktestRun(id="bt_cat_test") + sqlite_store.save_backtest_run(run) + + # Create results across two categories + for i in range(5): + sqlite_store.save_backtest_result(BacktestResult( + id=f"btr_pol_{i}", run_id="bt_cat_test", + market_id=f"mkt_pol_{i}", market_title=f"Politics {i}", + predicted_prob=0.7, market_prob=0.5, + actual_outcome="YES", signal_direction="BUY_YES", + edge=0.2, brier_score=0.09, correct=1, + category="politics", confidence_tier="HIGH", + )) + + for i in range(3): + sqlite_store.save_backtest_result(BacktestResult( + id=f"btr_spt_{i}", run_id="bt_cat_test", + market_id=f"mkt_spt_{i}", market_title=f"Sports {i}", + predicted_prob=0.6, market_prob=0.5, + actual_outcome="NO", signal_direction="BUY_YES", + edge=0.1, brier_score=0.36, correct=0, + category="sports", confidence_tier="MEDIUM", + )) + + backtester = Backtester(store=sqlite_store) + metrics = backtester.compute_metrics("bt_cat_test") + + assert metrics.category_metrics is not None + assert "politics" in metrics.category_metrics + assert "sports" in metrics.category_metrics + + pol = metrics.category_metrics["politics"] + assert pol["accuracy"] == 1.0 + assert pol["markets_tested"] == 5 + + spt = metrics.category_metrics["sports"] + assert spt["accuracy"] == 0.0 + assert spt["markets_tested"] == 3 + + def test_confidence_tier_metrics_computed(self, sqlite_store): + """compute_metrics returns per-tier breakdown.""" + run = BacktestRun(id="bt_tier_test") + sqlite_store.save_backtest_run(run) + + # HIGH tier + for i in range(4): + sqlite_store.save_backtest_result(BacktestResult( + id=f"btr_h_{i}", run_id="bt_tier_test", + market_id=f"mkt_h_{i}", predicted_prob=0.8, market_prob=0.5, + actual_outcome="YES", signal_direction="BUY_YES", + edge=0.3, brier_score=0.04, correct=1, + category="crypto", confidence_tier="HIGH", + )) + + # LOW tier + for i in range(2): + sqlite_store.save_backtest_result(BacktestResult( + id=f"btr_l_{i}", run_id="bt_tier_test", + market_id=f"mkt_l_{i}", predicted_prob=0.52, market_prob=0.5, + actual_outcome="NO", signal_direction="HOLD", + edge=0.02, brier_score=0.27, correct=None, + category="other", confidence_tier="LOW", + )) + + backtester = Backtester(store=sqlite_store) + metrics = backtester.compute_metrics("bt_tier_test") + + assert metrics.confidence_tier_metrics is not None + assert "HIGH" in metrics.confidence_tier_metrics + assert "LOW" in metrics.confidence_tier_metrics + assert metrics.confidence_tier_metrics["HIGH"]["markets_tested"] == 4 + + def test_empty_results_no_category_metrics(self, sqlite_store): + """compute_metrics with no results returns None category_metrics.""" + run = BacktestRun(id="bt_empty_cat") + sqlite_store.save_backtest_run(run) + + backtester = Backtester(store=sqlite_store) + metrics = backtester.compute_metrics("bt_empty_cat") + + assert metrics.markets_tested == 0 + # Empty results return default None for category/tier metrics + assert metrics.category_metrics is None + assert metrics.confidence_tier_metrics is None + + +class TestCategoryInResults: + + def test_result_has_category_fields(self): + """BacktestResult serializes category and confidence_tier.""" + result = BacktestResult( + id="btr_x", run_id="bt_x", market_id="mkt_x", + category="crypto", confidence_tier="HIGH", + ) + d = result.to_dict() + assert d["category"] == "crypto" + assert d["confidence_tier"] == "HIGH" + + restored = BacktestResult.from_dict(d) + assert restored.category == "crypto" + assert restored.confidence_tier == "HIGH" diff --git a/backend/tests/test_backtester_metrics.py b/backend/tests/test_backtester_metrics.py new file mode 100644 index 0000000..b2c632b --- /dev/null +++ b/backend/tests/test_backtester_metrics.py @@ -0,0 +1,216 @@ +""" +Tests for Backtester.compute_metrics — isolated metric calculations. +""" + +import math + +import pytest + +from app.models.backtest import BacktestResult, BacktestRun +from app.services.backtester import Backtester + + +def _result( + run_id: str, + idx: int, + predicted: float, + market: float, + actual: str, + direction: str, + correct: int = None, + edge: float = 0.0, +) -> BacktestResult: + actual_prob = 1.0 if actual == "YES" else 0.0 + brier = (predicted - actual_prob) ** 2 + return BacktestResult( + id=f"btr_{idx}", + run_id=run_id, + market_id=f"mkt_{idx}", + market_title=f"Market {idx}", + predicted_prob=predicted, + market_prob=market, + actual_outcome=actual, + signal_direction=direction, + edge=edge, + brier_score=brier, + correct=correct, + ) + + +class TestAccuracyPerfect: + + def test_accuracy_perfect(self, sqlite_store): + run = BacktestRun(id="bt_perf") + sqlite_store.save_backtest_run(run) + + for i in range(5): + r = _result("bt_perf", i, 0.80, 0.55, "YES", "BUY_YES", correct=1, edge=0.25) + sqlite_store.save_backtest_result(r) + + bt = Backtester(sqlite_store) + metrics = bt.compute_metrics("bt_perf") + assert metrics.accuracy == 1.0 + + +class TestAccuracyWorst: + + def test_accuracy_worst(self, sqlite_store): + run = BacktestRun(id="bt_worst") + sqlite_store.save_backtest_run(run) + + for i in range(5): + r = _result("bt_worst", i, 0.80, 0.55, "NO", "BUY_YES", correct=0, edge=0.25) + sqlite_store.save_backtest_result(r) + + bt = Backtester(sqlite_store) + metrics = bt.compute_metrics("bt_worst") + assert metrics.accuracy == 0.0 + + +class TestAccuracyAllHold: + + def test_accuracy_all_hold(self, sqlite_store): + run = BacktestRun(id="bt_hold") + sqlite_store.save_backtest_run(run) + + for i in range(5): + r = _result("bt_hold", i, 0.50, 0.50, "YES", "HOLD", correct=None, edge=0.0) + sqlite_store.save_backtest_result(r) + + bt = Backtester(sqlite_store) + metrics = bt.compute_metrics("bt_hold") + # No actionable signals — accuracy should be 0 + assert metrics.accuracy == 0.0 + + +class TestBrierScoreCalculation: + + def test_brier_score_calculation(self, sqlite_store): + run = BacktestRun(id="bt_brier") + sqlite_store.save_backtest_run(run) + + # predicted=0.8, actual=YES => brier = (0.8-1.0)^2 = 0.04 + r1 = _result("bt_brier", 0, 0.80, 0.55, "YES", "BUY_YES", correct=1) + # predicted=0.3, actual=NO => brier = (0.3-0.0)^2 = 0.09 + r2 = _result("bt_brier", 1, 0.30, 0.55, "NO", "BUY_NO", correct=1) + sqlite_store.save_backtest_result(r1) + sqlite_store.save_backtest_result(r2) + + bt = Backtester(sqlite_store) + metrics = bt.compute_metrics("bt_brier") + expected_brier = (0.04 + 0.09) / 2 + assert abs(metrics.brier_score - expected_brier) < 1e-6 + + +class TestROICalculation: + + def test_roi_calculation(self, sqlite_store): + run = BacktestRun(id="bt_roi") + sqlite_store.save_backtest_run(run) + + # Win at market_prob=0.60, payout = 1/0.60 = 1.667, profit = 0.667 + r1 = _result("bt_roi", 0, 0.80, 0.60, "YES", "BUY_YES", correct=1) + # Loss, profit = -1.0 + r2 = _result("bt_roi", 1, 0.80, 0.60, "NO", "BUY_YES", correct=0) + sqlite_store.save_backtest_result(r1) + sqlite_store.save_backtest_result(r2) + + bt = Backtester(sqlite_store) + metrics = bt.compute_metrics("bt_roi") + + # total_invested=2, total_return = 0.667 + (-1.0) = -0.333 + expected_roi = ((1 / 0.60 - 1) + (-1.0)) / 2.0 + assert abs(metrics.roi - expected_roi) < 1e-4 + + +class TestSharpeRatioZeroVariance: + + def test_sharpe_ratio_zero_variance(self, sqlite_store): + """All same return should give sharpe=0 (zero std dev).""" + run = BacktestRun(id="bt_sharpe0") + sqlite_store.save_backtest_run(run) + + # All wins at same market_prob => same return + for i in range(5): + r = _result("bt_sharpe0", i, 0.80, 0.60, "YES", "BUY_YES", correct=1) + sqlite_store.save_backtest_result(r) + + bt = Backtester(sqlite_store) + metrics = bt.compute_metrics("bt_sharpe0") + assert metrics.sharpe_ratio == 0.0 + + +class TestSharpeRatioNormal: + + def test_sharpe_ratio_normal(self, sqlite_store): + """Mixed wins/losses should produce non-zero sharpe.""" + run = BacktestRun(id="bt_sharpe_n") + sqlite_store.save_backtest_run(run) + + # Win + r1 = _result("bt_sharpe_n", 0, 0.80, 0.60, "YES", "BUY_YES", correct=1) + # Loss + r2 = _result("bt_sharpe_n", 1, 0.80, 0.60, "NO", "BUY_YES", correct=0) + # Win + r3 = _result("bt_sharpe_n", 2, 0.80, 0.60, "YES", "BUY_YES", correct=1) + sqlite_store.save_backtest_result(r1) + sqlite_store.save_backtest_result(r2) + sqlite_store.save_backtest_result(r3) + + bt = Backtester(sqlite_store) + metrics = bt.compute_metrics("bt_sharpe_n") + # Should be a real number, not zero or infinity + assert metrics.sharpe_ratio != 0.0 + assert math.isfinite(metrics.sharpe_ratio) + + +class TestMaxDrawdown: + + def test_max_drawdown(self, sqlite_store): + run = BacktestRun(id="bt_dd") + sqlite_store.save_backtest_run(run) + + # Win, Win, Loss, Loss — drawdown = 2 losses from peak + for i, (actual, correct) in enumerate([ + ("YES", 1), ("YES", 1), ("NO", 0), ("NO", 0) + ]): + r = _result("bt_dd", i, 0.80, 0.60, actual, "BUY_YES", correct=correct) + sqlite_store.save_backtest_result(r) + + bt = Backtester(sqlite_store) + metrics = bt.compute_metrics("bt_dd") + assert metrics.max_drawdown > 0.0 + + +class TestCalibrationRMSE: + + def test_calibration_rmse(self, sqlite_store): + run = BacktestRun(id="bt_cal") + sqlite_store.save_backtest_run(run) + + # Well-calibrated: predicted ~0.8 and 80% resolve YES + for i in range(10): + actual = "YES" if i < 8 else "NO" + r = _result("bt_cal", i, 0.80, 0.55, actual, "BUY_YES", correct=1 if actual == "YES" else 0) + sqlite_store.save_backtest_result(r) + + bt = Backtester(sqlite_store) + metrics = bt.compute_metrics("bt_cal") + # Should be reasonably small for well-calibrated predictions + assert metrics.calibration_rmse < 0.5 + + +class TestEmptyResults: + + def test_empty_results(self, sqlite_store): + run = BacktestRun(id="bt_empty") + sqlite_store.save_backtest_run(run) + + bt = Backtester(sqlite_store) + metrics = bt.compute_metrics("bt_empty") + assert metrics.markets_tested == 0 + assert metrics.accuracy == 0.0 + assert metrics.brier_score == 0.0 + assert metrics.roi == 0.0 + assert metrics.sharpe_ratio == 0.0 + assert metrics.max_drawdown == 0.0 diff --git a/backend/tests/test_calibrator.py b/backend/tests/test_calibrator.py new file mode 100644 index 0000000..5fa4960 --- /dev/null +++ b/backend/tests/test_calibrator.py @@ -0,0 +1,131 @@ +""" +Tests for app.services.calibrator.Calibrator +""" + +import pytest + +from app.models.backtest import BacktestResult, BacktestRun +from app.services.calibrator import Calibrator, MIN_DATAPOINTS + + +def _make_result(idx: int, predicted: float, actual: str) -> BacktestResult: + return BacktestResult( + id=f"btr_cal_{idx}", + run_id="bt_cal_test", + market_id=f"mkt_{idx}", + predicted_prob=predicted, + actual_outcome=actual, + ) + + +class TestFitInsufficientData: + + def test_fit_insufficient_data(self): + cal = Calibrator() + results = [_make_result(i, 0.5, "YES") for i in range(10)] + assert cal.fit(results) is False + assert cal.model is None + + +class TestFitNormal: + + def test_fit_normal(self): + cal = Calibrator() + results = [] + for i in range(35): + if i < 20: + results.append(_make_result(i, 0.7 + (i % 5) * 0.05, "YES")) + else: + results.append(_make_result(i, 0.3 + (i % 5) * 0.05, "NO")) + + assert cal.fit(results) is True + assert cal.model is not None + + +class TestTransformNoModel: + + def test_transform_no_model(self): + cal = Calibrator() + assert cal.model is None + assert cal.transform(0.65) == 0.65 + + +class TestTransformWithModel: + + def test_transform_with_model(self): + cal = Calibrator() + results = [] + for i in range(35): + if i < 20: + results.append(_make_result(i, 0.7 + (i % 5) * 0.05, "YES")) + else: + results.append(_make_result(i, 0.3 + (i % 5) * 0.05, "NO")) + + cal.fit(results) + calibrated = cal.transform(0.75) + # Should return a valid probability between 0 and 1 + assert 0.0 <= calibrated <= 1.0 + # Should differ from input (model was fitted) + # (Not guaranteed to differ much, but should be a float) + assert isinstance(calibrated, float) + + +class TestDegenerateData: + + def test_degenerate_data(self): + """All same outcome should return False.""" + cal = Calibrator() + results = [_make_result(i, 0.5 + i * 0.01, "YES") for i in range(30)] + assert cal.fit(results) is False + assert cal.model is None + + +class TestSaveAndLoad: + + def test_save_and_load(self, sqlite_store): + run = BacktestRun(id="bt_cal_persist", config={}) + sqlite_store.save_backtest_run(run) + + # Fit a model + cal = Calibrator(store=sqlite_store) + results = [] + for i in range(35): + if i < 20: + results.append(_make_result(i, 0.7 + (i % 5) * 0.05, "YES")) + else: + results.append(_make_result(i, 0.3 + (i % 5) * 0.05, "NO")) + cal.fit(results) + + original_output = cal.transform(0.75) + + # Save + cal.save("bt_cal_persist") + + # Load into a new calibrator + cal2 = Calibrator(store=sqlite_store) + assert cal2.model is None + loaded = cal2.load("bt_cal_persist") + assert loaded is True + assert cal2.model is not None + + # Should produce same output + loaded_output = cal2.transform(0.75) + assert abs(original_output - loaded_output) < 1e-6 + + def test_load_nonexistent(self, sqlite_store): + cal = Calibrator(store=sqlite_store) + assert cal.load("nonexistent_run") is False + + def test_load_tampered_data_rejected(self, sqlite_store): + """Tampered model data should fail HMAC verification.""" + import base64 + run = BacktestRun(id="bt_cal_tamper", config={}) + sqlite_store.save_backtest_run(run) + + # Store a fake model blob (not properly signed) + fake_data = base64.b64encode(b"\x00" * 64).decode('ascii') + sqlite_store.update_backtest_run("bt_cal_tamper", config={"calibration_model": fake_data}) + + cal = Calibrator(store=sqlite_store) + assert cal.load("bt_cal_tamper") is False + assert cal.model is None diff --git a/backend/tests/test_calibrator_categories.py b/backend/tests/test_calibrator_categories.py new file mode 100644 index 0000000..8042b81 --- /dev/null +++ b/backend/tests/test_calibrator_categories.py @@ -0,0 +1,104 @@ +""" +Tests for category-specific calibration in calibrator. +""" + +from app.models.backtest import BacktestRun, BacktestResult +from app.services.calibrator import Calibrator + + +def _make_results(category, n, predicted_prob, actual_outcome): + """Helper to create n BacktestResult instances.""" + return [ + BacktestResult( + id=f"btr_{category}_{i}", + run_id="bt_cal", + market_id=f"mkt_{category}_{i}", + predicted_prob=predicted_prob, + actual_outcome=actual_outcome, + category=category, + ) + for i in range(n) + ] + + +class TestFitCategoryOffsets: + + def test_computes_offsets(self, sqlite_store): + """fit_category_offsets computes mean(pred) - mean(actual) per category.""" + # Politics: predicted=0.7, actual=YES (1.0) → offset = 0.7 - 1.0 = -0.3 + results = _make_results("politics", 25, 0.7, "YES") + # Crypto: predicted=0.8, actual=NO (0.0) → offset = 0.8 - 0.0 = 0.8 + results += _make_results("crypto", 25, 0.8, "NO") + + calibrator = Calibrator(store=sqlite_store) + offsets = calibrator.fit_category_offsets(results) + + assert "politics" in offsets + assert "crypto" in offsets + assert abs(offsets["politics"] - (-0.3)) < 0.001 + assert abs(offsets["crypto"] - 0.8) < 0.001 + + def test_skips_small_categories(self, sqlite_store): + """Categories with < 20 results are skipped.""" + results = _make_results("politics", 25, 0.7, "YES") + results += _make_results("sports", 5, 0.6, "NO") # Too few + + calibrator = Calibrator(store=sqlite_store) + offsets = calibrator.fit_category_offsets(results) + + assert "politics" in offsets + assert "sports" not in offsets + + def test_empty_results(self, sqlite_store): + """Empty results produce empty offsets.""" + calibrator = Calibrator(store=sqlite_store) + offsets = calibrator.fit_category_offsets([]) + assert offsets == {} + + +class TestSaveAndLoadProfiles: + + def test_save_and_load_profiles(self, sqlite_store): + """Profiles round-trip through SQLite.""" + run = BacktestRun(id="bt_cal_prof") + sqlite_store.save_backtest_run(run) + + results = _make_results("politics", 25, 0.7, "YES") + calibrator = Calibrator(store=sqlite_store) + offsets = {"politics": -0.3, "crypto": 0.8} + + calibrator.save_profiles("bt_cal_prof", offsets, results) + + loaded = calibrator.load_profiles("bt_cal_prof") + assert "politics" in loaded + assert "crypto" in loaded + assert abs(loaded["politics"]["offset"] - (-0.3)) < 0.001 + assert abs(loaded["crypto"]["offset"] - 0.8) < 0.001 + + +class TestTransformWithCategory: + + def test_applies_offset(self, sqlite_store): + """transform_with_category subtracts the category offset.""" + calibrator = Calibrator(store=sqlite_store) + profiles = {"politics": {"offset": 0.1, "sample_size": 30}} + + # prob=0.7, offset=0.1 → adjusted = 0.7 - 0.1 = 0.6 + result = calibrator.transform_with_category(0.7, "politics", profiles) + assert abs(result - 0.6) < 0.001 + + def test_clamps_to_valid_range(self, sqlite_store): + """Adjusted probability is clamped to [0.01, 0.99].""" + calibrator = Calibrator(store=sqlite_store) + profiles = {"crypto": {"offset": 0.95, "sample_size": 30}} + + result = calibrator.transform_with_category(0.1, "crypto", profiles) + assert result == 0.01 # 0.1 - 0.95 = -0.85 → clamped to 0.01 + + def test_unknown_category_passthrough(self, sqlite_store): + """Unknown category returns probability unchanged.""" + calibrator = Calibrator(store=sqlite_store) + profiles = {"politics": {"offset": 0.1, "sample_size": 30}} + + result = calibrator.transform_with_category(0.7, "sports", profiles) + assert result == 0.7 diff --git a/backend/tests/test_config.py b/backend/tests/test_config.py new file mode 100644 index 0000000..20990fc --- /dev/null +++ b/backend/tests/test_config.py @@ -0,0 +1,65 @@ +""" +Tests for app.config.Config +""" + +import os +from unittest.mock import patch + +import pytest + + +class TestCalibrationDefaults: + + def test_calibration_defaults(self): + from app.config import Config + + assert Config.CALIBRATION_MARKET_REGRESSION == 0.30 + assert Config.CALIBRATION_DATE_DAMPENING_DAYS == 14 + assert Config.CALIBRATION_HIGH_EDGE_THRESHOLD == 0.25 + assert Config.CALIBRATION_HIGH_EDGE_MAX_REDUCTION == 0.40 + assert Config.CALIBRATION_SHORT_DATE_PENALTY == 0.20 + + +class TestCalibrationFromEnv: + + def test_calibration_from_env(self): + """Set env vars and verify overrides by re-evaluating the expressions.""" + env_overrides = { + "CALIBRATION_MARKET_REGRESSION": "0.50", + "CALIBRATION_DATE_DAMPENING_DAYS": "7", + "CALIBRATION_HIGH_EDGE_THRESHOLD": "0.30", + "CALIBRATION_HIGH_EDGE_MAX_REDUCTION": "0.50", + "CALIBRATION_SHORT_DATE_PENALTY": "0.40", + } + + with patch.dict(os.environ, env_overrides): + # Re-evaluate config values from env + market_reg = float(os.environ.get("CALIBRATION_MARKET_REGRESSION", "0.30")) + dampening_days = int(os.environ.get("CALIBRATION_DATE_DAMPENING_DAYS", "14")) + high_edge = float(os.environ.get("CALIBRATION_HIGH_EDGE_THRESHOLD", "0.25")) + max_reduction = float(os.environ.get("CALIBRATION_HIGH_EDGE_MAX_REDUCTION", "0.40")) + short_penalty = float(os.environ.get("CALIBRATION_SHORT_DATE_PENALTY", "0.20")) + + assert market_reg == 0.50 + assert dampening_days == 7 + assert high_edge == 0.30 + assert max_reduction == 0.50 + assert short_penalty == 0.40 + + +class TestSQLiteDBPathDefault: + + def test_sqlite_db_path_default(self): + from app.config import Config + + # Should contain 'mirofish.db' in the default path + assert "mirofish.db" in Config.SQLITE_DB_PATH + + +class TestPaperTradingModeDefault: + + def test_paper_trading_mode_default(self): + from app.config import Config + + # Default is 'true' + assert Config.PAPER_TRADING_MODE is True diff --git a/backend/tests/test_market_classifier.py b/backend/tests/test_market_classifier.py new file mode 100644 index 0000000..3b281c2 --- /dev/null +++ b/backend/tests/test_market_classifier.py @@ -0,0 +1,102 @@ +""" +Tests for app.services.market_classifier — LLM classification with SQLite caching. +""" + +from unittest.mock import MagicMock, patch + +from app.models.prediction import PredictionMarket +from app.services.market_classifier import MarketClassifier, compute_confidence_tier, CATEGORIES + + +class TestComputeConfidenceTier: + + def test_high_tier(self): + assert compute_confidence_tier(0.20) == "HIGH" + assert compute_confidence_tier(-0.15) == "HIGH" + + def test_medium_tier(self): + assert compute_confidence_tier(0.10) == "MEDIUM" + assert compute_confidence_tier(-0.08) == "MEDIUM" + + def test_low_tier(self): + assert compute_confidence_tier(0.05) == "LOW" + assert compute_confidence_tier(0.0) == "LOW" + + +class TestMarketClassifierCaching: + + def test_returns_cached_category(self, sqlite_store): + """classify() returns cached result without calling LLM.""" + sqlite_store.save_market_category("mkt_cached", "sports") + + mock_llm = MagicMock() + classifier = MarketClassifier(store=sqlite_store, llm_client=mock_llm) + + result = classifier.classify("mkt_cached", "Will team X win?", "") + assert result == "sports" + mock_llm.chat_json.assert_not_called() + + def test_llm_called_on_cache_miss(self, sqlite_store): + """classify() calls LLM and caches result on miss.""" + mock_llm = MagicMock() + mock_llm.chat_json.return_value = {"category": "politics"} + + classifier = MarketClassifier(store=sqlite_store, llm_client=mock_llm) + result = classifier.classify("mkt_new", "Will candidate X win?", "Election question") + + assert result == "politics" + mock_llm.chat_json.assert_called_once() + + # Verify it was cached + cached = sqlite_store.get_market_category("mkt_new") + assert cached == "politics" + + def test_unknown_category_defaults_to_other(self, sqlite_store): + """LLM returning unknown category falls back to 'other'.""" + mock_llm = MagicMock() + mock_llm.chat_json.return_value = {"category": "weather"} + + classifier = MarketClassifier(store=sqlite_store, llm_client=mock_llm) + result = classifier.classify("mkt_unknown", "Will it rain?", "") + + assert result == "other" + + def test_llm_error_defaults_to_other(self, sqlite_store): + """LLM error falls back to 'other'.""" + mock_llm = MagicMock() + mock_llm.chat_json.side_effect = RuntimeError("LLM down") + + classifier = MarketClassifier(store=sqlite_store, llm_client=mock_llm) + result = classifier.classify("mkt_err", "Will X happen?", "") + + assert result == "other" + + +class TestBatchClassification: + + def test_classify_batch(self, sqlite_store): + """classify_batch classifies multiple markets, using cache where available.""" + sqlite_store.save_market_category("mkt_a", "crypto") + + mock_llm = MagicMock() + mock_llm.chat_json.return_value = {"category": "sports"} + + classifier = MarketClassifier(store=sqlite_store, llm_client=mock_llm) + + markets = [ + PredictionMarket( + condition_id="mkt_a", title="BTC price", slug="btc", + description="", outcomes=["Yes", "No"], prices=[0.5, 0.5], + volume=1000.0, liquidity=500.0, end_date="2025-12-31", + ), + PredictionMarket( + condition_id="mkt_b", title="Team wins", slug="team", + description="", outcomes=["Yes", "No"], prices=[0.5, 0.5], + volume=1000.0, liquidity=500.0, end_date="2025-12-31", + ), + ] + + results = classifier.classify_batch(markets) + assert results == {"mkt_a": "crypto", "mkt_b": "sports"} + # Only mkt_b should trigger LLM call + assert mock_llm.chat_json.call_count == 1 diff --git a/backend/tests/test_paper_trader.py b/backend/tests/test_paper_trader.py new file mode 100644 index 0000000..e3e53b2 --- /dev/null +++ b/backend/tests/test_paper_trader.py @@ -0,0 +1,110 @@ +""" +Tests for app.services.paper_trader.PaperTrader +""" + +from unittest.mock import patch + +import pytest + +from app.models.prediction import PredictionMarket, TradingSignal +from app.services.paper_trader import PaperTrader + + +def _make_signal(direction: str) -> TradingSignal: + return TradingSignal( + direction=direction, + edge=0.15, + confidence=0.80, + reasoning="Test signal", + simulated_probability=0.70, + market_probability=0.55, + ) + + +class TestExecuteBuyYes: + + def test_execute_buy_yes(self, sqlite_store, sample_market): + trader = PaperTrader(sqlite_store) + signal = _make_signal("BUY_YES") + + with patch("app.services.paper_trader.random.uniform", return_value=0.015): + order = trader.execute(signal, sample_market, signal_id="sig_001") + + assert order is not None + assert order.side == "BUY_YES" + assert order.outcome == "Yes" + assert order.size == 10.0 + assert order.slippage == 0.015 + + # Verify persisted + orders = sqlite_store.get_orders() + assert len(orders) == 1 + + positions = sqlite_store.get_positions() + assert len(positions) == 1 + assert positions[0].order_id == order.id + assert positions[0].outcome == "Yes" + + +class TestExecuteBuyNo: + + def test_execute_buy_no(self, sqlite_store, sample_market): + trader = PaperTrader(sqlite_store) + signal = _make_signal("BUY_NO") + + with patch("app.services.paper_trader.random.uniform", return_value=0.015): + order = trader.execute(signal, sample_market, signal_id="sig_002") + + assert order is not None + assert order.side == "BUY_NO" + assert order.outcome == "No" + + +class TestExecuteHold: + + def test_execute_hold(self, sqlite_store, sample_market): + trader = PaperTrader(sqlite_store) + signal = _make_signal("HOLD") + + order = trader.execute(signal, sample_market) + assert order is None + + assert sqlite_store.get_orders() == [] + assert sqlite_store.get_positions() == [] + + +class TestSlippageRange: + + def test_slippage_range(self, sqlite_store, sample_market): + """Verify slippage is in the 1-2% range.""" + trader = PaperTrader(sqlite_store) + signal = _make_signal("BUY_YES") + + slippages = [] + for _ in range(50): + order = trader.execute(signal, sample_market, signal_id="sig_slip") + slippages.append(order.slippage) + + assert all(0.01 <= s <= 0.02 for s in slippages) + + +class TestSQLiteWrite: + + def test_sqlite_write(self, sqlite_store, sample_market): + """Verify records are actually persisted in SQLite.""" + trader = PaperTrader(sqlite_store) + signal = _make_signal("BUY_YES") + + with patch("app.services.paper_trader.random.uniform", return_value=0.015): + order = trader.execute(signal, sample_market, signal_id="sig_write") + + orders = sqlite_store.get_orders() + assert len(orders) == 1 + assert orders[0].market_id == sample_market.condition_id + assert orders[0].signal_id == "sig_write" + + positions = sqlite_store.get_positions() + assert len(positions) == 1 + assert positions[0].market_id == sample_market.condition_id + assert positions[0].status == "OPEN" + assert positions[0].cost_basis == pytest.approx(10.0 * order.fill_price) diff --git a/backend/tests/test_polymarket_client.py b/backend/tests/test_polymarket_client.py new file mode 100644 index 0000000..5ca96a4 --- /dev/null +++ b/backend/tests/test_polymarket_client.py @@ -0,0 +1,147 @@ +""" +Tests for app.services.polymarket_client.PolymarketClient (mocked HTTP). +""" + +from unittest.mock import patch, MagicMock + +import pytest +import requests + +from app.services.polymarket_client import PolymarketClient + + +SAMPLE_MARKET_JSON = { + "conditionId": "cond_001", + "question": "Will it rain tomorrow?", + "slug": "rain-tomorrow", + "description": "Rain forecast market", + "tokens": [ + {"outcome": "Yes", "price": "0.65", "winner": False}, + {"outcome": "No", "price": "0.35", "winner": False}, + ], + "volume": "50000", + "liquidity": "10000", + "endDate": "2025-12-31", + "active": True, +} + +SAMPLE_RESOLVED_JSON = { + "conditionId": "cond_resolved", + "question": "Did it rain?", + "slug": "did-it-rain", + "description": "Resolved rain market", + "tokens": [ + {"outcome": "Yes", "price": "1.0", "winner": True}, + {"outcome": "No", "price": "0.0", "winner": False}, + ], + "volume": "80000", + "liquidity": "20000", + "endDate": "2025-06-01", + "active": False, + "resolved": True, +} + + +class TestFetchActiveMarketsSuccess: + + @patch("app.services.polymarket_client.requests.get") + def test_fetch_active_markets_success(self, mock_get): + mock_resp = MagicMock() + mock_resp.json.return_value = [SAMPLE_MARKET_JSON] + mock_resp.raise_for_status.return_value = None + mock_get.return_value = mock_resp + + client = PolymarketClient(base_url="http://fake-api") + markets = client.fetch_active_markets(min_volume=1000, limit=10) + + assert len(markets) == 1 + assert markets[0].condition_id == "cond_001" + assert markets[0].title == "Will it rain tomorrow?" + assert markets[0].prices[0] == 0.65 + + +class TestFetchActiveMarketsRetryOnTimeout: + + @patch("app.services.polymarket_client.time.sleep") + @patch("app.services.polymarket_client.requests.get") + def test_fetch_active_markets_retry_on_timeout(self, mock_get, mock_sleep): + """First call times out, second succeeds.""" + mock_resp_ok = MagicMock() + mock_resp_ok.json.return_value = [SAMPLE_MARKET_JSON] + mock_resp_ok.raise_for_status.return_value = None + + mock_get.side_effect = [ + requests.Timeout("Connection timed out"), + mock_resp_ok, + ] + + client = PolymarketClient(base_url="http://fake-api") + markets = client.fetch_active_markets(min_volume=1000, limit=10) + + assert len(markets) == 1 + assert mock_get.call_count == 2 + + +class TestFetchActiveMarketsMalformedJson: + + @patch("app.services.polymarket_client.requests.get") + def test_fetch_active_markets_malformed_json(self, mock_get): + """Non-list response returns empty list.""" + mock_resp = MagicMock() + mock_resp.json.return_value = {"error": "bad request"} + mock_resp.raise_for_status.return_value = None + mock_get.return_value = mock_resp + + client = PolymarketClient(base_url="http://fake-api") + markets = client.fetch_active_markets(min_volume=1000, limit=10) + + assert markets == [] + + +class TestFetchActiveMarketsEmpty: + + @patch("app.services.polymarket_client.requests.get") + def test_fetch_active_markets_empty(self, mock_get): + mock_resp = MagicMock() + mock_resp.json.return_value = [] + mock_resp.raise_for_status.return_value = None + mock_get.return_value = mock_resp + + client = PolymarketClient(base_url="http://fake-api") + markets = client.fetch_active_markets(min_volume=1000, limit=10) + + assert markets == [] + + +class TestFetchResolvedMarkets: + + @patch("app.services.polymarket_client.requests.get") + def test_fetch_resolved_markets(self, mock_get): + mock_resp = MagicMock() + mock_resp.json.return_value = [SAMPLE_RESOLVED_JSON] + mock_resp.raise_for_status.return_value = None + mock_get.return_value = mock_resp + + client = PolymarketClient(base_url="http://fake-api") + markets = client.fetch_resolved_markets(limit=10) + + assert len(markets) == 1 + assert markets[0].actual_outcome == "YES" + assert markets[0].active is False + + +class TestGetMarketSuccess: + + @patch("app.services.polymarket_client.requests.get") + def test_get_market_success(self, mock_get): + mock_resp = MagicMock() + mock_resp.json.return_value = SAMPLE_MARKET_JSON + mock_resp.raise_for_status.return_value = None + mock_get.return_value = mock_resp + + client = PolymarketClient(base_url="http://fake-api") + market = client.get_market("cond_001") + + assert market is not None + assert market.condition_id == "cond_001" + mock_get.assert_called_once() diff --git a/backend/tests/test_prediction_manager_categories.py b/backend/tests/test_prediction_manager_categories.py new file mode 100644 index 0000000..598963d --- /dev/null +++ b/backend/tests/test_prediction_manager_categories.py @@ -0,0 +1,94 @@ +""" +Tests for category offset integration in PredictionManager._generate_signal(). +""" + +from unittest.mock import MagicMock, patch + +from app.models.backtest import BacktestRun +from app.models.prediction import PredictionMarket, SentimentResult +from app.services.prediction_manager import PredictionManager + + +def _make_market(prices=None): + return PredictionMarket( + condition_id="cond_test", + title="Test market", + slug="test", + description="Test", + outcomes=["Yes", "No"], + prices=prices or [0.50, 0.50], + volume=100000.0, + liquidity=50000.0, + end_date="2027-12-31T23:59:59Z", + active=True, + ) + + +def _make_sentiment(simulated_probability=0.70, confidence=0.8, total_posts=20): + return SentimentResult( + simulated_probability=simulated_probability, + confidence=confidence, + stance_counts={"for": 12, "against": 6, "neutral": 2}, + key_arguments_for=["arg1"], + key_arguments_against=["arg2"], + total_posts_analyzed=total_posts, + ) + + +class TestCategoryOffsetAppliedInSignal: + + @patch("app.services.prediction_manager.LLMClient") + def test_category_offset_adjusts_probability(self, MockLLM, sqlite_store): + """When category profiles exist, _generate_signal applies the offset.""" + # Setup: completed run with calibration profile + run = BacktestRun(id="bt_pm_test", status="COMPLETED", started_at="2025-06-01T00:00:00") + sqlite_store.save_backtest_run(run) + sqlite_store.save_calibration_profile("bt_pm_test", "politics", 0.10, 30) + + manager = PredictionManager(sqlite_store=sqlite_store) + + # Verify profiles were loaded + assert "politics" in manager.category_profiles + assert manager.category_profiles["politics"]["offset"] == 0.10 + + market = _make_market(prices=[0.50, 0.50]) + sentiment = _make_sentiment(simulated_probability=0.70) + + # Without category offset + signal_no_cat = manager._generate_signal(market, sentiment, category=None) + + # With category offset (politics offset = +0.10 → subtract from sim_prob) + signal_with_cat = manager._generate_signal(market, sentiment, category="politics") + + # The sim_prob should be lower with category offset applied + assert signal_with_cat.simulated_probability < signal_no_cat.simulated_probability + assert signal_with_cat.category == "politics" + assert signal_with_cat.confidence_tier is not None + assert "Category 'politics' offset" in signal_with_cat.reasoning + + @patch("app.services.prediction_manager.LLMClient") + def test_no_profiles_no_adjustment(self, MockLLM, sqlite_store): + """When no completed backtest exists, no offset is applied.""" + manager = PredictionManager(sqlite_store=sqlite_store) + assert manager.category_profiles == {} + + market = _make_market(prices=[0.50, 0.50]) + sentiment = _make_sentiment(simulated_probability=0.70) + + signal = manager._generate_signal(market, sentiment, category="politics") + assert "offset" not in signal.reasoning.lower() + + @patch("app.services.prediction_manager.LLMClient") + def test_unknown_category_no_adjustment(self, MockLLM, sqlite_store): + """Category not in profiles passes through without adjustment.""" + run = BacktestRun(id="bt_pm_unk", status="COMPLETED", started_at="2025-06-01T00:00:00") + sqlite_store.save_backtest_run(run) + sqlite_store.save_calibration_profile("bt_pm_unk", "politics", 0.10, 30) + + manager = PredictionManager(sqlite_store=sqlite_store) + + market = _make_market(prices=[0.50, 0.50]) + sentiment = _make_sentiment(simulated_probability=0.70) + + signal = manager._generate_signal(market, sentiment, category="sports") + assert "offset" not in signal.reasoning.lower() diff --git a/backend/tests/test_prediction_manager_di.py b/backend/tests/test_prediction_manager_di.py new file mode 100644 index 0000000..25ac2e4 --- /dev/null +++ b/backend/tests/test_prediction_manager_di.py @@ -0,0 +1,78 @@ +""" +Tests for PredictionManager dependency injection of result_store. +""" + +from unittest.mock import patch, MagicMock + +import pytest + +from app.models.prediction import PredictionRunManager + + +class TestDefaultStoreIsPredictionRunManager: + + @patch("app.services.prediction_manager.DebateSimulator") + @patch("app.services.prediction_manager.ScenarioGenerator") + @patch("app.services.prediction_manager.LLMClient") + def test_default_store_is_prediction_run_manager( + self, MockLLM, MockScenGen, MockDebate + ): + from app.services.prediction_manager import PredictionManager + + mgr = PredictionManager() + assert mgr.result_store is PredictionRunManager + + +class TestCustomStoreUsed: + + @patch("app.services.prediction_manager.DebateSimulator") + @patch("app.services.prediction_manager.ScenarioGenerator") + @patch("app.services.prediction_manager.LLMClient") + def test_custom_store_used(self, MockLLM, MockScenGen, MockDebate): + from app.services.prediction_manager import PredictionManager + from app.models.prediction import ( + PredictionRun, + PredictionRunStatus, + PredictionMarket, + ) + + custom_store = MagicMock() + mgr = PredictionManager(result_store=custom_store) + assert mgr.result_store is custom_store + + # Create a market and run prediction — save_run should be called on custom_store + market = PredictionMarket( + condition_id="cond_di", + title="DI test", + slug="di-test", + description="test", + outcomes=["Yes", "No"], + prices=[0.5, 0.5], + volume=10000, + liquidity=5000, + end_date="2025-12-31", + ) + run = PredictionRun( + run_id="pred_di", + status=PredictionRunStatus.FETCHING_MARKET, + created_at="2025-01-01", + updated_at="2025-01-01", + ) + + # Make the scenario generator and debate simulator return mocks + mock_scenario = MagicMock() + mock_scenario.to_dict.return_value = {} + mock_scenario.context_document = "test context" + MockScenGen.return_value.generate_scenario.return_value = mock_scenario + + mock_sentiment = MagicMock() + mock_sentiment.to_dict.return_value = {} + mock_sentiment.simulated_probability = 0.6 + mock_sentiment.confidence = 0.7 + mock_sentiment.total_posts_analyzed = 10 + MockDebate.return_value.simulate_debate.return_value = mock_sentiment + + mgr.run_prediction(market=market, run=run) + + # Verify the custom store's save_run was called (multiple times during pipeline) + assert custom_store.save_run.call_count >= 1 diff --git a/backend/tests/test_prediction_store.py b/backend/tests/test_prediction_store.py new file mode 100644 index 0000000..d6fdb28 --- /dev/null +++ b/backend/tests/test_prediction_store.py @@ -0,0 +1,88 @@ +""" +Tests for SQLite-backed prediction run storage. +""" + +from app.models.prediction import PredictionRun, PredictionRunStatus +from app.storage.prediction_store import SQLitePredictionStore + + +class TestSQLitePredictionStore: + + def test_create_and_get_run(self, sqlite_store): + store = SQLitePredictionStore(sqlite_store) + run = store.create_run() + + assert run.run_id.startswith("pred_") + assert run.status == PredictionRunStatus.FETCHING_MARKET + + loaded = store.get_run(run.run_id) + assert loaded is not None + assert loaded.run_id == run.run_id + + def test_save_and_get_with_nested_data(self, sqlite_store): + store = SQLitePredictionStore(sqlite_store) + run = store.create_run() + + run.market = {"title": "Test market", "prices": [0.5, 0.5]} + run.signal = {"direction": "BUY_YES", "edge": 0.15} + run.sentiment = {"confidence": 0.8, "stance_counts": {"for": 10}} + run.scenario = {"context_document": "Test context"} + run.status = PredictionRunStatus.COMPLETED + store.save_run(run) + + loaded = store.get_run(run.run_id) + assert loaded.market["title"] == "Test market" + assert loaded.signal["direction"] == "BUY_YES" + assert loaded.sentiment["confidence"] == 0.8 + assert loaded.scenario["context_document"] == "Test context" + assert loaded.status == PredictionRunStatus.COMPLETED + + def test_list_runs_ordered(self, sqlite_store): + store = SQLitePredictionStore(sqlite_store) + + runs = [] + for _ in range(3): + runs.append(store.create_run()) + + listed = store.list_runs() + assert len(listed) == 3 + # Most recent first + assert listed[0].run_id == runs[-1].run_id + + def test_list_runs_with_limit(self, sqlite_store): + store = SQLitePredictionStore(sqlite_store) + for _ in range(5): + store.create_run() + + listed = store.list_runs(limit=2) + assert len(listed) == 2 + + def test_delete_run(self, sqlite_store): + store = SQLitePredictionStore(sqlite_store) + run = store.create_run() + + assert store.delete_run(run.run_id) is True + assert store.get_run(run.run_id) is None + + def test_delete_nonexistent(self, sqlite_store): + store = SQLitePredictionStore(sqlite_store) + assert store.delete_run("nonexistent") is False + + def test_get_nonexistent(self, sqlite_store): + store = SQLitePredictionStore(sqlite_store) + assert store.get_run("nonexistent") is None + + def test_update_preserves_data(self, sqlite_store): + """save_run updates existing run without losing fields.""" + store = SQLitePredictionStore(sqlite_store) + run = store.create_run() + run.market = {"title": "Test"} + store.save_run(run) + + run.status = PredictionRunStatus.COMPLETED + run.error = "something failed" + store.save_run(run) + + loaded = store.get_run(run.run_id) + assert loaded.market["title"] == "Test" + assert loaded.error == "something failed" diff --git a/backend/tests/test_retry.py b/backend/tests/test_retry.py new file mode 100644 index 0000000..e40d937 --- /dev/null +++ b/backend/tests/test_retry.py @@ -0,0 +1,106 @@ +""" +Tests for app.utils.retry.retry_with_backoff +""" + +from unittest.mock import patch, MagicMock + +import pytest + +from app.utils.retry import retry_with_backoff + + +class TestRetrySuccessFirstAttempt: + + def test_retry_success_first_attempt(self): + call_count = 0 + + @retry_with_backoff(max_retries=3, exceptions=(ValueError,)) + def succeed_immediately(): + nonlocal call_count + call_count += 1 + return "ok" + + result = succeed_immediately() + assert result == "ok" + assert call_count == 1 + + +class TestRetrySuccessAfterFailures: + + @patch("app.utils.retry.time.sleep") + def test_retry_success_after_failures(self, mock_sleep): + call_count = 0 + + @retry_with_backoff(max_retries=3, initial_delay=1.0, jitter=False, exceptions=(ValueError,)) + def fail_then_succeed(): + nonlocal call_count + call_count += 1 + if call_count < 3: + raise ValueError("transient error") + return "recovered" + + result = fail_then_succeed() + assert result == "recovered" + assert call_count == 3 + assert mock_sleep.call_count == 2 + + +class TestRetryMaxExceeded: + + @patch("app.utils.retry.time.sleep") + def test_retry_max_exceeded(self, mock_sleep): + + @retry_with_backoff(max_retries=2, initial_delay=0.01, jitter=False, exceptions=(ValueError,)) + def always_fail(): + raise ValueError("permanent error") + + with pytest.raises(ValueError, match="permanent error"): + always_fail() + + # initial attempt + 2 retries = 3 calls total, 2 sleeps + assert mock_sleep.call_count == 2 + + +class TestExponentialBackoffTiming: + + @patch("app.utils.retry.time.sleep") + def test_exponential_backoff_timing(self, mock_sleep): + call_count = 0 + + @retry_with_backoff( + max_retries=3, + initial_delay=1.0, + backoff_factor=2.0, + jitter=False, + exceptions=(RuntimeError,), + ) + def fail_three_times(): + nonlocal call_count + call_count += 1 + if call_count <= 3: + raise RuntimeError("fail") + return "ok" + + result = fail_three_times() + assert result == "ok" + + # Delays should be 1.0, 2.0, 4.0 (without jitter) + delays = [c.args[0] for c in mock_sleep.call_args_list] + assert delays == [1.0, 2.0, 4.0] + + +class TestNonRetryableException: + + @patch("app.utils.retry.time.sleep") + def test_non_retryable_exception(self, mock_sleep): + """Exception not in the exceptions tuple should raise immediately.""" + + @retry_with_backoff(max_retries=3, exceptions=(ValueError,)) + def raise_type_error(): + raise TypeError("not retryable") + + with pytest.raises(TypeError, match="not retryable"): + raise_type_error() + + # Should not sleep at all — raised immediately + mock_sleep.assert_not_called() diff --git a/backend/tests/test_sqlite_store.py b/backend/tests/test_sqlite_store.py new file mode 100644 index 0000000..33432c6 --- /dev/null +++ b/backend/tests/test_sqlite_store.py @@ -0,0 +1,316 @@ +""" +Tests for app.storage.sqlite_store.SQLiteStore +""" + +import pytest +from unittest.mock import patch, MagicMock + +from app.models.backtest import BacktestRun, BacktestResult, BacktestRunStatus +from app.models.position import PaperOrder, PaperPosition, PositionStatus +from app.storage.sqlite_store import StorageError +from sqlalchemy import text +from sqlalchemy.exc import OperationalError + + +class TestSaveAndGetBacktestRun: + + def test_save_and_get_backtest_run(self, sqlite_store): + run = BacktestRun( + id="bt_test001", + status=BacktestRunStatus.RUNNING.value, + config={"num_markets": 10}, + total_markets=10, + ) + sqlite_store.save_backtest_run(run) + + loaded = sqlite_store.get_backtest_run("bt_test001") + assert loaded is not None + assert loaded.id == "bt_test001" + assert loaded.status == BacktestRunStatus.RUNNING.value + assert loaded.config == {"num_markets": 10} + assert loaded.total_markets == 10 + + def test_get_nonexistent_run_returns_none(self, sqlite_store): + assert sqlite_store.get_backtest_run("nonexistent") is None + + +class TestListBacktestRunsOrdered: + + def test_list_backtest_runs_ordered(self, sqlite_store): + run_a = BacktestRun(id="bt_a", started_at="2025-01-01T00:00:00") + run_b = BacktestRun(id="bt_b", started_at="2025-06-01T00:00:00") + run_c = BacktestRun(id="bt_c", started_at="2025-03-01T00:00:00") + + sqlite_store.save_backtest_run(run_a) + sqlite_store.save_backtest_run(run_b) + sqlite_store.save_backtest_run(run_c) + + runs = sqlite_store.list_backtest_runs() + assert len(runs) == 3 + # Should be ordered by started_at descending + assert runs[0].id == "bt_b" + assert runs[1].id == "bt_c" + assert runs[2].id == "bt_a" + + +class TestUpdateBacktestRun: + + def test_update_backtest_run(self, sqlite_store): + run = BacktestRun(id="bt_upd", status=BacktestRunStatus.PENDING.value) + sqlite_store.save_backtest_run(run) + + sqlite_store.update_backtest_run( + "bt_upd", + status=BacktestRunStatus.COMPLETED.value, + metrics={"accuracy": 0.75}, + completed_markets=5, + ) + + loaded = sqlite_store.get_backtest_run("bt_upd") + assert loaded.status == BacktestRunStatus.COMPLETED.value + assert loaded.metrics == {"accuracy": 0.75} + assert loaded.completed_markets == 5 + + +class TestBacktestResults: + + def test_save_and_get_backtest_result(self, sqlite_store): + # Need a parent run first + run = BacktestRun(id="bt_res_run") + sqlite_store.save_backtest_run(run) + + result = BacktestResult( + id="btr_001", + run_id="bt_res_run", + market_id="mkt_abc", + market_title="Test Market", + predicted_prob=0.70, + market_prob=0.55, + actual_outcome="YES", + signal_direction="BUY_YES", + edge=0.15, + brier_score=0.09, + correct=1, + ) + sqlite_store.save_backtest_result(result) + + results = sqlite_store.get_results_by_run("bt_res_run") + assert len(results) == 1 + r = results[0] + assert r.id == "btr_001" + assert r.market_id == "mkt_abc" + assert r.predicted_prob == 0.70 + assert r.correct == 1 + + def test_get_results_by_run(self, sqlite_store): + run = BacktestRun(id="bt_multi") + sqlite_store.save_backtest_run(run) + + for i in range(5): + result = BacktestResult( + id=f"btr_m{i}", + run_id="bt_multi", + market_id=f"mkt_{i}", + ) + sqlite_store.save_backtest_result(result) + + results = sqlite_store.get_results_by_run("bt_multi") + assert len(results) == 5 + + # Different run should return empty + assert sqlite_store.get_results_by_run("bt_other") == [] + + def test_get_completed_market_ids(self, sqlite_store): + run = BacktestRun(id="bt_cids") + sqlite_store.save_backtest_run(run) + + for mid in ["mkt_a", "mkt_b", "mkt_c"]: + result = BacktestResult( + id=f"btr_{mid}", + run_id="bt_cids", + market_id=mid, + ) + sqlite_store.save_backtest_result(result) + + ids = sqlite_store.get_completed_market_ids("bt_cids") + assert set(ids) == {"mkt_a", "mkt_b", "mkt_c"} + + +class TestHasActiveBacktest: + + def test_no_active_backtest(self, sqlite_store): + assert sqlite_store.has_active_backtest() is None + + def test_running_backtest_detected(self, sqlite_store): + run = BacktestRun(id="bt_active", status="RUNNING") + sqlite_store.save_backtest_run(run) + assert sqlite_store.has_active_backtest() == "bt_active" + + def test_completed_not_detected(self, sqlite_store): + run = BacktestRun(id="bt_done", status="COMPLETED") + sqlite_store.save_backtest_run(run) + assert sqlite_store.has_active_backtest() is None + + +class TestPaperOrders: + + def test_save_and_get_paper_order(self, sqlite_store): + order = PaperOrder( + id="ord_test001", + market_id="mkt_xyz", + signal_id="sig_001", + side="BUY_YES", + outcome="Yes", + size=10.0, + fill_price=0.62, + slippage=0.015, + ) + sqlite_store.save_paper_order(order) + + orders = sqlite_store.get_orders() + assert len(orders) == 1 + o = orders[0] + assert o.id == "ord_test001" + assert o.side == "BUY_YES" + assert o.fill_price == 0.62 + + +class TestPaperPositions: + + def test_save_and_get_paper_position(self, sqlite_store): + # Save the order first (FK dependency) + order = PaperOrder(id="ord_pos_test", market_id="mkt_pos") + sqlite_store.save_paper_order(order) + + position = PaperPosition( + id="pos_test001", + order_id="ord_pos_test", + market_id="mkt_pos", + outcome="Yes", + entry_price=0.60, + cost_basis=6.0, + status=PositionStatus.OPEN.value, + ) + sqlite_store.save_paper_position(position) + + positions = sqlite_store.get_positions() + assert len(positions) == 1 + p = positions[0] + assert p.id == "pos_test001" + assert p.entry_price == 0.60 + assert p.status == "OPEN" + + +class TestMarketCategories: + + def test_save_and_get_market_category(self, sqlite_store): + sqlite_store.save_market_category("mkt_cat_001", "politics") + assert sqlite_store.get_market_category("mkt_cat_001") == "politics" + + def test_get_nonexistent_returns_none(self, sqlite_store): + assert sqlite_store.get_market_category("nonexistent") is None + + def test_upsert_overwrites(self, sqlite_store): + sqlite_store.save_market_category("mkt_cat_002", "sports") + sqlite_store.save_market_category("mkt_cat_002", "entertainment") + assert sqlite_store.get_market_category("mkt_cat_002") == "entertainment" + + +class TestCalibrationProfiles: + + def test_save_and_load_profiles(self, sqlite_store): + run = BacktestRun(id="bt_cp_test") + sqlite_store.save_backtest_run(run) + + sqlite_store.save_calibration_profile("bt_cp_test", "politics", -0.05, 30) + sqlite_store.save_calibration_profile("bt_cp_test", "crypto", 0.12, 25) + + profiles = sqlite_store.load_calibration_profiles("bt_cp_test") + assert len(profiles) == 2 + assert profiles["politics"]["offset"] == -0.05 + assert profiles["politics"]["sample_size"] == 30 + assert profiles["crypto"]["offset"] == 0.12 + + def test_empty_profiles(self, sqlite_store): + profiles = sqlite_store.load_calibration_profiles("nonexistent") + assert profiles == {} + + +class TestLatestCompletedRunId: + + def test_returns_latest_completed(self, sqlite_store): + sqlite_store.save_backtest_run(BacktestRun( + id="bt_old", started_at="2025-01-01T00:00:00", status="COMPLETED" + )) + sqlite_store.save_backtest_run(BacktestRun( + id="bt_new", started_at="2025-06-01T00:00:00", status="COMPLETED" + )) + sqlite_store.save_backtest_run(BacktestRun( + id="bt_running", started_at="2025-07-01T00:00:00", status="RUNNING" + )) + assert sqlite_store.get_latest_completed_run_id() == "bt_new" + + def test_returns_none_when_no_completed(self, sqlite_store): + sqlite_store.save_backtest_run(BacktestRun(id="bt_pending", status="PENDING")) + assert sqlite_store.get_latest_completed_run_id() is None + + +class TestBacktestResultCategoryColumns: + + def test_category_columns_persisted(self, sqlite_store): + run = BacktestRun(id="bt_cat_col") + sqlite_store.save_backtest_run(run) + + result = BacktestResult( + id="btr_cat_001", run_id="bt_cat_col", + market_id="mkt_x", category="sports", confidence_tier="HIGH", + ) + sqlite_store.save_backtest_result(result) + + results = sqlite_store.get_results_by_run("bt_cat_col") + assert len(results) == 1 + assert results[0].category == "sports" + assert results[0].confidence_tier == "HIGH" + + +class TestDiskFullErrorHandling: + + def test_disk_full_raises_storage_error(self, sqlite_store): + """Disk I/O errors during writes raise StorageError.""" + run = BacktestRun(id="bt_disk_test") + + with patch.object(sqlite_store.engine, "connect") as mock_connect: + mock_conn = MagicMock() + mock_conn.__enter__ = MagicMock(return_value=mock_conn) + mock_conn.__exit__ = MagicMock(return_value=False) + mock_conn.execute.side_effect = OperationalError( + "INSERT", {}, Exception("disk I/O error") + ) + mock_connect.return_value = mock_conn + + with pytest.raises(StorageError, match="Disk I/O error"): + sqlite_store.save_backtest_run(run) + + def test_non_disk_error_propagates_as_operational(self, sqlite_store): + """Non-disk OperationalErrors propagate unchanged.""" + run = BacktestRun(id="bt_other_err") + + with patch.object(sqlite_store.engine, "connect") as mock_connect: + mock_conn = MagicMock() + mock_conn.__enter__ = MagicMock(return_value=mock_conn) + mock_conn.__exit__ = MagicMock(return_value=False) + mock_conn.execute.side_effect = OperationalError( + "INSERT", {}, Exception("table not found") + ) + mock_connect.return_value = mock_conn + + with pytest.raises(OperationalError): + sqlite_store.save_backtest_run(run) + + +class TestWALMode: + + def test_wal_mode_enabled(self, sqlite_store): + with sqlite_store.engine.connect() as conn: + result = conn.execute(text("PRAGMA journal_mode")).scalar() + assert result == "wal" diff --git a/backend/uv.lock b/backend/uv.lock index f1ce4b6..ad9093f 100644 --- a/backend/uv.lock +++ b/backend/uv.lock @@ -592,6 +592,53 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/51/c7/b64cae5dba3a1b138d7123ec36bb5ccd39d39939f18454407e5468f4763f/fsspec-2025.12.0-py3-none-any.whl", hash = "sha256:8bf1fe301b7d8acfa6e8571e3b1c3d158f909666642431cc78a1b7b4dbc5ec5b", size = 201422, upload-time = "2025-12-03T15:23:41.434Z" }, ] +[[package]] +name = "greenlet" +version = "3.3.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a3/51/1664f6b78fc6ebbd98019a1fd730e83fa78f2db7058f72b1463d3612b8db/greenlet-3.3.2.tar.gz", hash = "sha256:2eaf067fc6d886931c7962e8c6bede15d2f01965560f3359b27c80bde2d151f2", size = 188267, upload-time = "2026-02-20T20:54:15.531Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f3/47/16400cb42d18d7a6bb46f0626852c1718612e35dcb0dffa16bbaffdf5dd2/greenlet-3.3.2-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:c56692189a7d1c7606cb794be0a8381470d95c57ce5be03fb3d0ef57c7853b86", size = 278890, upload-time = "2026-02-20T20:19:39.263Z" }, + { url = "https://files.pythonhosted.org/packages/a3/90/42762b77a5b6aa96cd8c0e80612663d39211e8ae8a6cd47c7f1249a66262/greenlet-3.3.2-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1ebd458fa8285960f382841da585e02201b53a5ec2bac6b156fc623b5ce4499f", size = 581120, upload-time = "2026-02-20T20:47:30.161Z" }, + { url = "https://files.pythonhosted.org/packages/bf/6f/f3d64f4fa0a9c7b5c5b3c810ff1df614540d5aa7d519261b53fba55d4df9/greenlet-3.3.2-cp311-cp311-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a443358b33c4ec7b05b79a7c8b466f5d275025e750298be7340f8fc63dff2a55", size = 594363, upload-time = "2026-02-20T20:55:56.965Z" }, + { url = "https://files.pythonhosted.org/packages/72/83/3e06a52aca8128bdd4dcd67e932b809e76a96ab8c232a8b025b2850264c5/greenlet-3.3.2-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8e2cd90d413acbf5e77ae41e5d3c9b3ac1d011a756d7284d7f3f2b806bbd6358", size = 594156, upload-time = "2026-02-20T20:20:59.955Z" }, + { url = "https://files.pythonhosted.org/packages/70/79/0de5e62b873e08fe3cef7dbe84e5c4bc0e8ed0c7ff131bccb8405cd107c8/greenlet-3.3.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:442b6057453c8cb29b4fb36a2ac689382fc71112273726e2423f7f17dc73bf99", size = 1554649, upload-time = "2026-02-20T20:49:32.293Z" }, + { url = "https://files.pythonhosted.org/packages/5a/00/32d30dee8389dc36d42170a9c66217757289e2afb0de59a3565260f38373/greenlet-3.3.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:45abe8eb6339518180d5a7fa47fa01945414d7cca5ecb745346fc6a87d2750be", size = 1619472, upload-time = "2026-02-20T20:21:07.966Z" }, + { url = "https://files.pythonhosted.org/packages/f1/3a/efb2cf697fbccdf75b24e2c18025e7dfa54c4f31fab75c51d0fe79942cef/greenlet-3.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:1e692b2dae4cc7077cbb11b47d258533b48c8fde69a33d0d8a82e2fe8d8531d5", size = 230389, upload-time = "2026-02-20T20:17:18.772Z" }, + { url = "https://files.pythonhosted.org/packages/e1/a1/65bbc059a43a7e2143ec4fc1f9e3f673e04f9c7b371a494a101422ac4fd5/greenlet-3.3.2-cp311-cp311-win_arm64.whl", hash = "sha256:02b0a8682aecd4d3c6c18edf52bc8e51eacdd75c8eac52a790a210b06aa295fd", size = 229645, upload-time = "2026-02-20T20:18:18.695Z" }, + { url = "https://files.pythonhosted.org/packages/ea/ab/1608e5a7578e62113506740b88066bf09888322a311cff602105e619bd87/greenlet-3.3.2-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:ac8d61d4343b799d1e526db579833d72f23759c71e07181c2d2944e429eb09cd", size = 280358, upload-time = "2026-02-20T20:17:43.971Z" }, + { url = "https://files.pythonhosted.org/packages/a5/23/0eae412a4ade4e6623ff7626e38998cb9b11e9ff1ebacaa021e4e108ec15/greenlet-3.3.2-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3ceec72030dae6ac0c8ed7591b96b70410a8be370b6a477b1dbc072856ad02bd", size = 601217, upload-time = "2026-02-20T20:47:31.462Z" }, + { url = "https://files.pythonhosted.org/packages/f8/16/5b1678a9c07098ecb9ab2dd159fafaf12e963293e61ee8d10ecb55273e5e/greenlet-3.3.2-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a2a5be83a45ce6188c045bcc44b0ee037d6a518978de9a5d97438548b953a1ac", size = 611792, upload-time = "2026-02-20T20:55:58.423Z" }, + { url = "https://files.pythonhosted.org/packages/50/1f/5155f55bd71cabd03765a4aac9ac446be129895271f73872c36ebd4b04b6/greenlet-3.3.2-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:43e99d1749147ac21dde49b99c9abffcbc1e2d55c67501465ef0930d6e78e070", size = 613875, upload-time = "2026-02-20T20:21:01.102Z" }, + { url = "https://files.pythonhosted.org/packages/fc/dd/845f249c3fcd69e32df80cdab059b4be8b766ef5830a3d0aa9d6cad55beb/greenlet-3.3.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4c956a19350e2c37f2c48b336a3afb4bff120b36076d9d7fb68cb44e05d95b79", size = 1571467, upload-time = "2026-02-20T20:49:33.495Z" }, + { url = "https://files.pythonhosted.org/packages/2a/50/2649fe21fcc2b56659a452868e695634722a6655ba245d9f77f5656010bf/greenlet-3.3.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6c6f8ba97d17a1e7d664151284cb3315fc5f8353e75221ed4324f84eb162b395", size = 1640001, upload-time = "2026-02-20T20:21:09.154Z" }, + { url = "https://files.pythonhosted.org/packages/9b/40/cc802e067d02af8b60b6771cea7d57e21ef5e6659912814babb42b864713/greenlet-3.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:34308836d8370bddadb41f5a7ce96879b72e2fdfb4e87729330c6ab52376409f", size = 231081, upload-time = "2026-02-20T20:17:28.121Z" }, + { url = "https://files.pythonhosted.org/packages/58/2e/fe7f36ff1982d6b10a60d5e0740c759259a7d6d2e1dc41da6d96de32fff6/greenlet-3.3.2-cp312-cp312-win_arm64.whl", hash = "sha256:d3a62fa76a32b462a97198e4c9e99afb9ab375115e74e9a83ce180e7a496f643", size = 230331, upload-time = "2026-02-20T20:17:23.34Z" }, + { url = "https://files.pythonhosted.org/packages/ac/48/f8b875fa7dea7dd9b33245e37f065af59df6a25af2f9561efa8d822fde51/greenlet-3.3.2-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:aa6ac98bdfd716a749b84d4034486863fd81c3abde9aa3cf8eff9127981a4ae4", size = 279120, upload-time = "2026-02-20T20:19:01.9Z" }, + { url = "https://files.pythonhosted.org/packages/49/8d/9771d03e7a8b1ee456511961e1b97a6d77ae1dea4a34a5b98eee706689d3/greenlet-3.3.2-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ab0c7e7901a00bc0a7284907273dc165b32e0d109a6713babd04471327ff7986", size = 603238, upload-time = "2026-02-20T20:47:32.873Z" }, + { url = "https://files.pythonhosted.org/packages/59/0e/4223c2bbb63cd5c97f28ffb2a8aee71bdfb30b323c35d409450f51b91e3e/greenlet-3.3.2-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d248d8c23c67d2291ffd47af766e2a3aa9fa1c6703155c099feb11f526c63a92", size = 614219, upload-time = "2026-02-20T20:55:59.817Z" }, + { url = "https://files.pythonhosted.org/packages/7a/34/259b28ea7a2a0c904b11cd36c79b8cef8019b26ee5dbe24e73b469dea347/greenlet-3.3.2-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b6997d360a4e6a4e936c0f9625b1c20416b8a0ea18a8e19cabbefc712e7397ab", size = 616774, upload-time = "2026-02-20T20:21:02.454Z" }, + { url = "https://files.pythonhosted.org/packages/0a/03/996c2d1689d486a6e199cb0f1cf9e4aa940c500e01bdf201299d7d61fa69/greenlet-3.3.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:64970c33a50551c7c50491671265d8954046cb6e8e2999aacdd60e439b70418a", size = 1571277, upload-time = "2026-02-20T20:49:34.795Z" }, + { url = "https://files.pythonhosted.org/packages/d9/c4/2570fc07f34a39f2caf0bf9f24b0a1a0a47bc2e8e465b2c2424821389dfc/greenlet-3.3.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1a9172f5bf6bd88e6ba5a84e0a68afeac9dc7b6b412b245dd64f52d83c81e55b", size = 1640455, upload-time = "2026-02-20T20:21:10.261Z" }, + { url = "https://files.pythonhosted.org/packages/91/39/5ef5aa23bc545aa0d31e1b9b55822b32c8da93ba657295840b6b34124009/greenlet-3.3.2-cp313-cp313-win_amd64.whl", hash = "sha256:a7945dd0eab63ded0a48e4dcade82939783c172290a7903ebde9e184333ca124", size = 230961, upload-time = "2026-02-20T20:16:58.461Z" }, + { url = "https://files.pythonhosted.org/packages/62/6b/a89f8456dcb06becff288f563618e9f20deed8dd29beea14f9a168aef64b/greenlet-3.3.2-cp313-cp313-win_arm64.whl", hash = "sha256:394ead29063ee3515b4e775216cb756b2e3b4a7e55ae8fd884f17fa579e6b327", size = 230221, upload-time = "2026-02-20T20:17:37.152Z" }, + { url = "https://files.pythonhosted.org/packages/3f/ae/8bffcbd373b57a5992cd077cbe8858fff39110480a9d50697091faea6f39/greenlet-3.3.2-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:8d1658d7291f9859beed69a776c10822a0a799bc4bfe1bd4272bb60e62507dab", size = 279650, upload-time = "2026-02-20T20:18:00.783Z" }, + { url = "https://files.pythonhosted.org/packages/d1/c0/45f93f348fa49abf32ac8439938726c480bd96b2a3c6f4d949ec0124b69f/greenlet-3.3.2-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:18cb1b7337bca281915b3c5d5ae19f4e76d35e1df80f4ad3c1a7be91fadf1082", size = 650295, upload-time = "2026-02-20T20:47:34.036Z" }, + { url = "https://files.pythonhosted.org/packages/b3/de/dd7589b3f2b8372069ab3e4763ea5329940fc7ad9dcd3e272a37516d7c9b/greenlet-3.3.2-cp314-cp314-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c2e47408e8ce1c6f1ceea0dffcdf6ebb85cc09e55c7af407c99f1112016e45e9", size = 662163, upload-time = "2026-02-20T20:56:01.295Z" }, + { url = "https://files.pythonhosted.org/packages/d2/d8/09bfa816572a4d83bccd6750df1926f79158b1c36c5f73786e26dbe4ee38/greenlet-3.3.2-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:63d10328839d1973e5ba35e98cccbca71b232b14051fd957b6f8b6e8e80d0506", size = 664160, upload-time = "2026-02-20T20:21:04.015Z" }, + { url = "https://files.pythonhosted.org/packages/48/cf/56832f0c8255d27f6c35d41b5ec91168d74ec721d85f01a12131eec6b93c/greenlet-3.3.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:8e4ab3cfb02993c8cc248ea73d7dae6cec0253e9afa311c9b37e603ca9fad2ce", size = 1619181, upload-time = "2026-02-20T20:49:36.052Z" }, + { url = "https://files.pythonhosted.org/packages/0a/23/b90b60a4aabb4cec0796e55f25ffbfb579a907c3898cd2905c8918acaa16/greenlet-3.3.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:94ad81f0fd3c0c0681a018a976e5c2bd2ca2d9d94895f23e7bb1af4e8af4e2d5", size = 1687713, upload-time = "2026-02-20T20:21:11.684Z" }, + { url = "https://files.pythonhosted.org/packages/f3/ca/2101ca3d9223a1dc125140dbc063644dca76df6ff356531eb27bc267b446/greenlet-3.3.2-cp314-cp314-win_amd64.whl", hash = "sha256:8c4dd0f3997cf2512f7601563cc90dfb8957c0cff1e3a1b23991d4ea1776c492", size = 232034, upload-time = "2026-02-20T20:20:08.186Z" }, + { url = "https://files.pythonhosted.org/packages/f6/4a/ecf894e962a59dea60f04877eea0fd5724618da89f1867b28ee8b91e811f/greenlet-3.3.2-cp314-cp314-win_arm64.whl", hash = "sha256:cd6f9e2bbd46321ba3bbb4c8a15794d32960e3b0ae2cc4d49a1a53d314805d71", size = 231437, upload-time = "2026-02-20T20:18:59.722Z" }, + { url = "https://files.pythonhosted.org/packages/98/6d/8f2ef704e614bcf58ed43cfb8d87afa1c285e98194ab2cfad351bf04f81e/greenlet-3.3.2-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:e26e72bec7ab387ac80caa7496e0f908ff954f31065b0ffc1f8ecb1338b11b54", size = 286617, upload-time = "2026-02-20T20:19:29.856Z" }, + { url = "https://files.pythonhosted.org/packages/5e/0d/93894161d307c6ea237a43988f27eba0947b360b99ac5239ad3fe09f0b47/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8b466dff7a4ffda6ca975979bab80bdadde979e29fc947ac3be4451428d8b0e4", size = 655189, upload-time = "2026-02-20T20:47:35.742Z" }, + { url = "https://files.pythonhosted.org/packages/f5/2c/d2d506ebd8abcb57386ec4f7ba20f4030cbe56eae541bc6fd6ef399c0b41/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b8bddc5b73c9720bea487b3bffdb1840fe4e3656fba3bd40aa1489e9f37877ff", size = 658225, upload-time = "2026-02-20T20:56:02.527Z" }, + { url = "https://files.pythonhosted.org/packages/8e/30/3a09155fbf728673a1dea713572d2d31159f824a37c22da82127056c44e4/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b26b0f4428b871a751968285a1ac9648944cea09807177ac639b030bddebcea4", size = 657907, upload-time = "2026-02-20T20:21:05.259Z" }, + { url = "https://files.pythonhosted.org/packages/f3/fd/d05a4b7acd0154ed758797f0a43b4c0962a843bedfe980115e842c5b2d08/greenlet-3.3.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1fb39a11ee2e4d94be9a76671482be9398560955c9e568550de0224e41104727", size = 1618857, upload-time = "2026-02-20T20:49:37.309Z" }, + { url = "https://files.pythonhosted.org/packages/6f/e1/50ee92a5db521de8f35075b5eff060dd43d39ebd46c2181a2042f7070385/greenlet-3.3.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:20154044d9085151bc309e7689d6f7ba10027f8f5a8c0676ad398b951913d89e", size = 1680010, upload-time = "2026-02-20T20:21:13.427Z" }, + { url = "https://files.pythonhosted.org/packages/29/4b/45d90626aef8e65336bed690106d1382f7a43665e2249017e9527df8823b/greenlet-3.3.2-cp314-cp314t-win_amd64.whl", hash = "sha256:c04c5e06ec3e022cbfe2cd4a846e1d4e50087444f875ff6d2c2ad8445495cf1a", size = 237086, upload-time = "2026-02-20T20:20:45.786Z" }, +] + [[package]] name = "h11" version = "0.16.0" @@ -1238,8 +1285,8 @@ wheels = [ ] [[package]] -name = "mirofish-backend" -version = "0.1.0" +name = "mirofish-offline-backend" +version = "0.2.0" source = { editable = "." } dependencies = [ { name = "camel-ai" }, @@ -1248,11 +1295,13 @@ dependencies = [ { name = "charset-normalizer" }, { name = "flask" }, { name = "flask-cors" }, + { name = "neo4j" }, { name = "openai" }, { name = "pydantic" }, { name = "pymupdf" }, { name = "python-dotenv" }, - { name = "zep-cloud" }, + { name = "scikit-learn" }, + { name = "sqlalchemy" }, ] [package.optional-dependencies] @@ -1276,6 +1325,7 @@ requires-dist = [ { name = "charset-normalizer", specifier = ">=3.0.0" }, { name = "flask", specifier = ">=3.0.0" }, { name = "flask-cors", specifier = ">=6.0.0" }, + { name = "neo4j", specifier = ">=5.15.0" }, { name = "openai", specifier = ">=1.0.0" }, { name = "pipreqs", marker = "extra == 'dev'", specifier = ">=0.5.0" }, { name = "pydantic", specifier = ">=2.0.0" }, @@ -1283,7 +1333,8 @@ requires-dist = [ { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.0.0" }, { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.23.0" }, { name = "python-dotenv", specifier = ">=1.0.0" }, - { name = "zep-cloud", specifier = "==3.13.0" }, + { name = "scikit-learn", specifier = ">=1.4.0" }, + { name = "sqlalchemy", specifier = ">=2.0.0" }, ] provides-extras = ["dev"] @@ -2926,6 +2977,59 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/14/a0/bb38d3b76b8cae341dad93a2dd83ab7462e6dbcdd84d43f54ee60a8dc167/soupsieve-2.8-py3-none-any.whl", hash = "sha256:0cc76456a30e20f5d7f2e14a98a4ae2ee4e5abdc7c5ea0aafe795f344bc7984c", size = 36679, upload-time = "2025-08-27T15:39:50.179Z" }, ] +[[package]] +name = "sqlalchemy" +version = "2.0.48" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "greenlet", marker = "platform_machine == 'AMD64' or platform_machine == 'WIN32' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'ppc64le' or platform_machine == 'win32' or platform_machine == 'x86_64'" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/1f/73/b4a9737255583b5fa858e0bb8e116eb94b88c910164ed2ed719147bde3de/sqlalchemy-2.0.48.tar.gz", hash = "sha256:5ca74f37f3369b45e1f6b7b06afb182af1fd5dde009e4ffd831830d98cbe5fe7", size = 9886075, upload-time = "2026-03-02T15:28:51.474Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d7/6d/b8b78b5b80f3c3ab3f7fa90faa195ec3401f6d884b60221260fd4d51864c/sqlalchemy-2.0.48-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1b4c575df7368b3b13e0cebf01d4679f9a28ed2ae6c1cd0b1d5beffb6b2007dc", size = 2157184, upload-time = "2026-03-02T15:38:28.161Z" }, + { url = "https://files.pythonhosted.org/packages/21/4b/4f3d4a43743ab58b95b9ddf5580a265b593d017693df9e08bd55780af5bb/sqlalchemy-2.0.48-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e83e3f959aaa1c9df95c22c528096d94848a1bc819f5d0ebf7ee3df0ca63db6c", size = 3313555, upload-time = "2026-03-02T15:58:57.21Z" }, + { url = "https://files.pythonhosted.org/packages/21/dd/3b7c53f1dbbf736fd27041aee68f8ac52226b610f914085b1652c2323442/sqlalchemy-2.0.48-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6f7b7243850edd0b8b97043f04748f31de50cf426e939def5c16bedb540698f7", size = 3313057, upload-time = "2026-03-02T15:52:29.366Z" }, + { url = "https://files.pythonhosted.org/packages/d9/cc/3e600a90ae64047f33313d7d32e5ad025417f09d2ded487e8284b5e21a15/sqlalchemy-2.0.48-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:82745b03b4043e04600a6b665cb98697c4339b24e34d74b0a2ac0a2488b6f94d", size = 3265431, upload-time = "2026-03-02T15:58:59.096Z" }, + { url = "https://files.pythonhosted.org/packages/8b/19/780138dacfe3f5024f4cf96e4005e91edf6653d53d3673be4844578faf1d/sqlalchemy-2.0.48-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:e5e088bf43f6ee6fec7dbf1ef7ff7774a616c236b5c0cb3e00662dd71a56b571", size = 3287646, upload-time = "2026-03-02T15:52:31.569Z" }, + { url = "https://files.pythonhosted.org/packages/40/fd/f32ced124f01a23151f4777e4c705f3a470adc7bd241d9f36a7c941a33bf/sqlalchemy-2.0.48-cp311-cp311-win32.whl", hash = "sha256:9c7d0a77e36b5f4b01ca398482230ab792061d243d715299b44a0b55c89fe617", size = 2116956, upload-time = "2026-03-02T15:46:54.535Z" }, + { url = "https://files.pythonhosted.org/packages/58/d5/dd767277f6feef12d05651538f280277e661698f617fa4d086cce6055416/sqlalchemy-2.0.48-cp311-cp311-win_amd64.whl", hash = "sha256:583849c743e0e3c9bb7446f5b5addeacedc168d657a69b418063dfdb2d90081c", size = 2141627, upload-time = "2026-03-02T15:46:55.849Z" }, + { url = "https://files.pythonhosted.org/packages/ef/91/a42ae716f8925e9659df2da21ba941f158686856107a61cc97a95e7647a3/sqlalchemy-2.0.48-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:348174f228b99f33ca1f773e85510e08927620caa59ffe7803b37170df30332b", size = 2155737, upload-time = "2026-03-02T15:49:13.207Z" }, + { url = "https://files.pythonhosted.org/packages/b9/52/f75f516a1f3888f027c1cfb5d22d4376f4b46236f2e8669dcb0cddc60275/sqlalchemy-2.0.48-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:53667b5f668991e279d21f94ccfa6e45b4e3f4500e7591ae59a8012d0f010dcb", size = 3337020, upload-time = "2026-03-02T15:50:34.547Z" }, + { url = "https://files.pythonhosted.org/packages/37/9a/0c28b6371e0cdcb14f8f1930778cb3123acfcbd2c95bb9cf6b4a2ba0cce3/sqlalchemy-2.0.48-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:34634e196f620c7a61d18d5cf7dc841ca6daa7961aed75d532b7e58b309ac894", size = 3349983, upload-time = "2026-03-02T15:53:25.542Z" }, + { url = "https://files.pythonhosted.org/packages/1c/46/0aee8f3ff20b1dcbceb46ca2d87fcc3d48b407925a383ff668218509d132/sqlalchemy-2.0.48-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:546572a1793cc35857a2ffa1fe0e58571af1779bcc1ffa7c9fb0839885ed69a9", size = 3279690, upload-time = "2026-03-02T15:50:36.277Z" }, + { url = "https://files.pythonhosted.org/packages/ce/8c/a957bc91293b49181350bfd55e6dfc6e30b7f7d83dc6792d72043274a390/sqlalchemy-2.0.48-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:07edba08061bc277bfdc772dd2a1a43978f5a45994dd3ede26391b405c15221e", size = 3314738, upload-time = "2026-03-02T15:53:27.519Z" }, + { url = "https://files.pythonhosted.org/packages/4b/44/1d257d9f9556661e7bdc83667cc414ba210acfc110c82938cb3611eea58f/sqlalchemy-2.0.48-cp312-cp312-win32.whl", hash = "sha256:908a3fa6908716f803b86896a09a2c4dde5f5ce2bb07aacc71ffebb57986ce99", size = 2115546, upload-time = "2026-03-02T15:54:31.591Z" }, + { url = "https://files.pythonhosted.org/packages/f2/af/c3c7e1f3a2b383155a16454df62ae8c62a30dd238e42e68c24cebebbfae6/sqlalchemy-2.0.48-cp312-cp312-win_amd64.whl", hash = "sha256:68549c403f79a8e25984376480959975212a670405e3913830614432b5daa07a", size = 2142484, upload-time = "2026-03-02T15:54:34.072Z" }, + { url = "https://files.pythonhosted.org/packages/d1/c6/569dc8bf3cd375abc5907e82235923e986799f301cd79a903f784b996fca/sqlalchemy-2.0.48-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e3070c03701037aa418b55d36532ecb8f8446ed0135acb71c678dbdf12f5b6e4", size = 2152599, upload-time = "2026-03-02T15:49:14.41Z" }, + { url = "https://files.pythonhosted.org/packages/6d/ff/f4e04a4bd5a24304f38cb0d4aa2ad4c0fb34999f8b884c656535e1b2b74c/sqlalchemy-2.0.48-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2645b7d8a738763b664a12a1542c89c940daa55196e8d73e55b169cc5c99f65f", size = 3278825, upload-time = "2026-03-02T15:50:38.269Z" }, + { url = "https://files.pythonhosted.org/packages/fe/88/cb59509e4668d8001818d7355d9995be90c321313078c912420603a7cb95/sqlalchemy-2.0.48-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b19151e76620a412c2ac1c6f977ab1b9fa7ad43140178345136456d5265b32ed", size = 3295200, upload-time = "2026-03-02T15:53:29.366Z" }, + { url = "https://files.pythonhosted.org/packages/87/dc/1609a4442aefd750ea2f32629559394ec92e89ac1d621a7f462b70f736ff/sqlalchemy-2.0.48-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5b193a7e29fd9fa56e502920dca47dffe60f97c863494946bd698c6058a55658", size = 3226876, upload-time = "2026-03-02T15:50:39.802Z" }, + { url = "https://files.pythonhosted.org/packages/37/c3/6ae2ab5ea2fa989fbac4e674de01224b7a9d744becaf59bb967d62e99bed/sqlalchemy-2.0.48-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:36ac4ddc3d33e852da9cb00ffb08cea62ca05c39711dc67062ca2bb1fae35fd8", size = 3265045, upload-time = "2026-03-02T15:53:31.421Z" }, + { url = "https://files.pythonhosted.org/packages/6f/82/ea4665d1bb98c50c19666e672f21b81356bd6077c4574e3d2bbb84541f53/sqlalchemy-2.0.48-cp313-cp313-win32.whl", hash = "sha256:389b984139278f97757ea9b08993e7b9d1142912e046ab7d82b3fbaeb0209131", size = 2113700, upload-time = "2026-03-02T15:54:35.825Z" }, + { url = "https://files.pythonhosted.org/packages/b7/2b/b9040bec58c58225f073f5b0c1870defe1940835549dafec680cbd58c3c3/sqlalchemy-2.0.48-cp313-cp313-win_amd64.whl", hash = "sha256:d612c976cbc2d17edfcc4c006874b764e85e990c29ce9bd411f926bbfb02b9a2", size = 2139487, upload-time = "2026-03-02T15:54:37.079Z" }, + { url = "https://files.pythonhosted.org/packages/f4/f4/7b17bd50244b78a49d22cc63c969d71dc4de54567dc152a9b46f6fae40ce/sqlalchemy-2.0.48-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:69f5bc24904d3bc3640961cddd2523e361257ef68585d6e364166dfbe8c78fae", size = 3558851, upload-time = "2026-03-02T15:57:48.607Z" }, + { url = "https://files.pythonhosted.org/packages/20/0d/213668e9aca61d370f7d2a6449ea4ec699747fac67d4bda1bb3d129025be/sqlalchemy-2.0.48-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fd08b90d211c086181caed76931ecfa2bdfc83eea3cfccdb0f82abc6c4b876cb", size = 3525525, upload-time = "2026-03-02T16:04:38.058Z" }, + { url = "https://files.pythonhosted.org/packages/85/d7/a84edf412979e7d59c69b89a5871f90a49228360594680e667cb2c46a828/sqlalchemy-2.0.48-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:1ccd42229aaac2df431562117ac7e667d702e8e44afdb6cf0e50fa3f18160f0b", size = 3466611, upload-time = "2026-03-02T15:57:50.759Z" }, + { url = "https://files.pythonhosted.org/packages/86/55/42404ce5770f6be26a2b0607e7866c31b9a4176c819e9a7a5e0a055770be/sqlalchemy-2.0.48-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f0dcbc588cd5b725162c076eb9119342f6579c7f7f55057bb7e3c6ff27e13121", size = 3475812, upload-time = "2026-03-02T16:04:40.092Z" }, + { url = "https://files.pythonhosted.org/packages/ae/ae/29b87775fadc43e627cf582fe3bda4d02e300f6b8f2747c764950d13784c/sqlalchemy-2.0.48-cp313-cp313t-win32.whl", hash = "sha256:9764014ef5e58aab76220c5664abb5d47d5bc858d9debf821e55cfdd0f128485", size = 2141335, upload-time = "2026-03-02T15:52:51.518Z" }, + { url = "https://files.pythonhosted.org/packages/91/44/f39d063c90f2443e5b46ec4819abd3d8de653893aae92df42a5c4f5843de/sqlalchemy-2.0.48-cp313-cp313t-win_amd64.whl", hash = "sha256:e2f35b4cccd9ed286ad62e0a3c3ac21e06c02abc60e20aa51a3e305a30f5fa79", size = 2173095, upload-time = "2026-03-02T15:52:52.79Z" }, + { url = "https://files.pythonhosted.org/packages/f7/b3/f437eaa1cf028bb3c927172c7272366393e73ccd104dcf5b6963f4ab5318/sqlalchemy-2.0.48-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:e2d0d88686e3d35a76f3e15a34e8c12d73fc94c1dea1cd55782e695cc14086dd", size = 2154401, upload-time = "2026-03-02T15:49:17.24Z" }, + { url = "https://files.pythonhosted.org/packages/6c/1c/b3abdf0f402aa3f60f0df6ea53d92a162b458fca2321d8f1f00278506402/sqlalchemy-2.0.48-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:49b7bddc1eebf011ea5ab722fdbe67a401caa34a350d278cc7733c0e88fecb1f", size = 3274528, upload-time = "2026-03-02T15:50:41.489Z" }, + { url = "https://files.pythonhosted.org/packages/f2/5e/327428a034407651a048f5e624361adf3f9fbac9d0fa98e981e9c6ff2f5e/sqlalchemy-2.0.48-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:426c5ca86415d9b8945c7073597e10de9644802e2ff502b8e1f11a7a2642856b", size = 3279523, upload-time = "2026-03-02T15:53:32.962Z" }, + { url = "https://files.pythonhosted.org/packages/2a/ca/ece73c81a918add0965b76b868b7b5359e068380b90ef1656ee995940c02/sqlalchemy-2.0.48-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:288937433bd44e3990e7da2402fabc44a3c6c25d3704da066b85b89a85474ae0", size = 3224312, upload-time = "2026-03-02T15:50:42.996Z" }, + { url = "https://files.pythonhosted.org/packages/88/11/fbaf1ae91fa4ee43f4fe79661cead6358644824419c26adb004941bdce7c/sqlalchemy-2.0.48-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:8183dc57ae7d9edc1346e007e840a9f3d6aa7b7f165203a99e16f447150140d2", size = 3246304, upload-time = "2026-03-02T15:53:34.937Z" }, + { url = "https://files.pythonhosted.org/packages/fa/a8/5fb0deb13930b4f2f698c5541ae076c18981173e27dd00376dbaea7a9c82/sqlalchemy-2.0.48-cp314-cp314-win32.whl", hash = "sha256:1182437cb2d97988cfea04cf6cdc0b0bb9c74f4d56ec3d08b81e23d621a28cc6", size = 2116565, upload-time = "2026-03-02T15:54:38.321Z" }, + { url = "https://files.pythonhosted.org/packages/95/7e/e83615cb63f80047f18e61e31e8e32257d39458426c23006deeaf48f463b/sqlalchemy-2.0.48-cp314-cp314-win_amd64.whl", hash = "sha256:144921da96c08feb9e2b052c5c5c1d0d151a292c6135623c6b2c041f2a45f9e0", size = 2142205, upload-time = "2026-03-02T15:54:39.831Z" }, + { url = "https://files.pythonhosted.org/packages/83/e3/69d8711b3f2c5135e9cde5f063bc1605860f0b2c53086d40c04017eb1f77/sqlalchemy-2.0.48-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5aee45fd2c6c0f2b9cdddf48c48535e7471e42d6fb81adfde801da0bd5b93241", size = 3563519, upload-time = "2026-03-02T15:57:52.387Z" }, + { url = "https://files.pythonhosted.org/packages/f8/4f/a7cce98facca73c149ea4578981594aaa5fd841e956834931de503359336/sqlalchemy-2.0.48-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7cddca31edf8b0653090cbb54562ca027c421c58ddde2c0685f49ff56a1690e0", size = 3528611, upload-time = "2026-03-02T16:04:42.097Z" }, + { url = "https://files.pythonhosted.org/packages/cd/7d/5936c7a03a0b0cb0fa0cc425998821c6029756b0855a8f7ee70fba1de955/sqlalchemy-2.0.48-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:7a936f1bb23d370b7c8cc079d5fce4c7d18da87a33c6744e51a93b0f9e97e9b3", size = 3472326, upload-time = "2026-03-02T15:57:54.423Z" }, + { url = "https://files.pythonhosted.org/packages/f4/33/cea7dfc31b52904efe3dcdc169eb4514078887dff1f5ae28a7f4c5d54b3c/sqlalchemy-2.0.48-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:e004aa9248e8cb0a5f9b96d003ca7c1c0a5da8decd1066e7b53f59eb8ce7c62b", size = 3478453, upload-time = "2026-03-02T16:04:44.584Z" }, + { url = "https://files.pythonhosted.org/packages/c8/95/32107c4d13be077a9cae61e9ae49966a35dc4bf442a8852dd871db31f62e/sqlalchemy-2.0.48-cp314-cp314t-win32.whl", hash = "sha256:b8438ec5594980d405251451c5b7ea9aa58dda38eb7ac35fb7e4c696712ee24f", size = 2147209, upload-time = "2026-03-02T15:52:54.274Z" }, + { url = "https://files.pythonhosted.org/packages/d2/d7/1e073da7a4bc645eb83c76067284a0374e643bc4be57f14cc6414656f92c/sqlalchemy-2.0.48-cp314-cp314t-win_amd64.whl", hash = "sha256:d854b3970067297f3a7fbd7a4683587134aa9b3877ee15aa29eea478dc68f933", size = 2182198, upload-time = "2026-03-02T15:52:55.606Z" }, + { url = "https://files.pythonhosted.org/packages/46/2c/9664130905f03db57961b8980b05cab624afd114bf2be2576628a9f22da4/sqlalchemy-2.0.48-py3-none-any.whl", hash = "sha256:a66fe406437dd65cacd96a72689a3aaaecaebbcd62d81c5ac1c0fdbeac835096", size = 1940202, upload-time = "2026-03-02T15:52:43.285Z" }, +] + [[package]] name = "sse-starlette" version = "3.0.4" @@ -3488,19 +3592,3 @@ sdist = { url = "https://files.pythonhosted.org/packages/d4/c8/cc640404a0981e6c1 wheels = [ { url = "https://files.pythonhosted.org/packages/8b/90/89a2ff242ccab6a24fbab18dbbabc67c51a6f0ed01f9a0f41689dc177419/yarg-0.1.9-py2.py3-none-any.whl", hash = "sha256:4f9cebdc00fac946c9bf2783d634e538a71c7d280a4d806d45fd4dc0ef441492", size = 19162, upload-time = "2014-08-11T22:01:41.104Z" }, ] - -[[package]] -name = "zep-cloud" -version = "3.13.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "httpx" }, - { name = "pydantic" }, - { name = "pydantic-core" }, - { name = "python-dateutil" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/32/c7/c835debf13302f8aaf8d0561ac6ff5a9bc15cc140cd692a1330fb1900c55/zep_cloud-3.13.0.tar.gz", hash = "sha256:c55d9c511773bb2177ae8e08546141404f87d2099affafabd7ec4b4505763e48", size = 63116, upload-time = "2025-11-20T15:25:40.745Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/f7/e1/bbf03c6c8007c0cb238780e7fc6d8e1a52633893933a41aa09678618985a/zep_cloud-3.13.0-py3-none-any.whl", hash = "sha256:b2fbdeef73e262194c8f67b58f76471de6ee87e1a629541a09d8f7bbf475f12b", size = 110601, upload-time = "2025-11-20T15:25:38.484Z" }, -] diff --git a/claude.md b/claude.md new file mode 100644 index 0000000..31844f3 --- /dev/null +++ b/claude.md @@ -0,0 +1,73 @@ +## Workflow Orchestration + +### 1. Plan Mode Default +* Enter plan mode for ANY non-trivial task (3+ steps or architectural decisions) +* If something goes sideways, STOP and re-plan immediately - don't keep pushing +* Use plan mode for verification steps, not just building +* Write detailed specs upfront to reduce ambiguity + +### 2. Subagent Strategy +* Use subagents liberally to keep main context window clean +* Offload research, exploration, and parallel analysis to subagents +* For complex problems, throw more compute at it via subagents +* One tack per subagent for focused execution + +### 3. Self-Improvement Loop +* After ANY correction from the user: update 'tasks/lessons.md' with the pattern +* Write rules for yourself that prevent the same mistake +* Ruthlessly iterate on these lessons until mistake rate drops +* Review lessons at session start for relevant project + +### 4. Verification Before Done +* Never mark a task complete without proving it works +* Diff behavior between main and your changes when relevant +* Ask yourself: "Would a staff engineer approve this?" +* Run tests, check logs, demonstrate corrections + +### 5. Demand Elegance (Balanced) +* For non-trivial changes: pause and ask "is there a more elegant way?" +* If a fix feels tacky: "Knowing everything I know now, implement the elegant solution" +* Skip this for simple, obvious fixes - don't over-engineer +* Challenge your own work before presenting it + +### 6. Autonomous Bug Fixing +* When given a bug report: just fix it. Don't ask for hand-holding +* Point at logs, errors, failing tests - then resolve them +* Zero context switching required from the user +* Go fix failing CI tests without being told how + +## Task Management + +1. **Plan First**: Write plan to 'tasks/todo.md' with checkable items +2. **Verify Plan**: Check in before starting implementation +3. **Track Progress**: Mark items complete as you go +4. **Explain Changes**: High-level summary at each step +5. **Document Results**: Add review section to 'tasks/todo.md' +6. **Capture Lessons**: Update 'tasks/lessons.md' after corrections + +## gstack + +### Web Browsing +* Use the `/browse` skill from gstack for ALL web browsing +* NEVER use `mcp__chrome-devtools__*` tools — always use `/browse` instead + +### Available Skills +* `/plan-ceo-review` — CEO review of implementation plans +* `/plan-eng-review` — Engineering review of implementation plans +* `/plan-design-review` — Design review of implementation plans +* `/design-consultation` — Interactive design consultation +* `/review` — Pre-landing PR review +* `/ship` — Ship the current branch (push, PR, merge) +* `/browse` — Web browsing and research +* `/qa` — Quality assurance testing +* `/qa-only` — QA without fixing issues +* `/qa-design-review` — QA focused on design review +* `/setup-browser-cookies` — Configure browser cookies for authenticated browsing +* `/retro` — Post-ship retrospective +* `/document-release` — Generate release documentation + +## Core Principles + +* **Simplicity First**: Make every change as simple as possible. Impact minimal code. +* **No Laziness**: Find root causes. No temporary fixes. Senior developer standards. +* **Minimal Impact**: Changes should only touch what's necessary. Avoid introducing bugs. diff --git a/docs/PRD_polymarket_monetization.md b/docs/PRD_polymarket_monetization.md new file mode 100644 index 0000000..11011d7 --- /dev/null +++ b/docs/PRD_polymarket_monetization.md @@ -0,0 +1,613 @@ +# PRD: MiroFish Polymarket Monetization Engine + +## 1. Executive Summary + +MiroFish is a multi-agent swarm intelligence engine that simulates public opinion to generate prediction market trading signals. The existing prototype fetches Polymarket markets, runs agent-based simulations, analyzes sentiment, and outputs BUY_YES / BUY_NO / HOLD signals — but cannot execute trades and lacks the infrastructure needed to monetize reliably. + +This PRD defines everything required to turn MiroFish into a **fully autonomous Polymarket trading system** that generates revenue from prediction market alpha. + +--- + +## 2. Current State (v0.2.0) + +### What Works +| Component | Status | Notes | +|-----------|--------|-------| +| Polymarket market fetching | Working | Via Gamma API, binary markets only | +| Scenario generation | Working | LLM converts market question → balanced simulation scenario | +| Knowledge graph construction | Working | Neo4j CE, entity/relationship extraction | +| Agent persona generation | Working | 50 default agents with personality profiles | +| OASIS simulation (Reddit) | Working | 5 rounds default, CREATE_POST + CREATE_COMMENT | +| Sentiment analysis | Working | LLM classifies stance, computes weighted P(YES) | +| Signal generation | Working | Edge = simulated_prob - market_prob, 10% threshold | +| Frontend dashboard | Working | Market browser, run progress, signal display | + +### What's Missing for Monetization +| Gap | Impact | Priority | +|-----|--------|----------| +| No trade execution | Cannot act on signals | P0 | +| No wallet / key management | Cannot interact with Polymarket contracts | P0 | +| No position tracking / P&L | Cannot measure performance | P0 | +| No backtesting framework | Cannot validate signal quality before risking capital | P0 | +| Single-platform simulation | Reddit-only limits signal diversity | P1 | +| No market filtering intelligence | Runs on any market, no selectivity | P1 | +| No confidence calibration | Raw confidence scores are uncalibrated | P1 | +| No risk management | No position sizing, stop-loss, or exposure limits | P1 | +| No scheduling / automation | Manual trigger only, no continuous scanning | P1 | +| Signal accuracy unknown | No historical performance data | P2 | +| No multi-market correlation | Treats each market independently | P2 | +| No real-time market price monitoring | Stale prices between runs | P2 | + +--- + +## 3. Target Architecture + +``` +┌─────────────────────────────────────────────────────────┐ +│ MiroFish Engine │ +│ │ +│ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ +│ │ Market │→ │ Signal │→ │ Risk │ │ +│ │ Scanner │ │ Pipeline │ │ Manager │ │ +│ └──────────┘ └──────────┘ └──────────┘ │ +│ │ │ │ │ +│ ▼ ▼ ▼ │ +│ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ +│ │ Market │ │ Backtest │ │ Position │ │ +│ │ Filter │ │ Engine │ │ Tracker │ │ +│ └──────────┘ └──────────┘ └──────────┘ │ +│ │ │ │ +│ ▼ ▼ │ +│ ┌──────────┐ ┌──────────┐ │ +│ │ Trade │ │ P&L │ │ +│ │ Executor │ │ Dashboard│ │ +│ └──────────┘ └──────────┘ │ +│ │ │ +└──────────────────────│───────────────────────────────────┘ + ▼ + Polymarket CLOB API + (Polygon / USDC) +``` + +--- + +## 4. Feature Requirements + +### 4.1 — Trade Execution (P0) + +**Goal:** Execute trades on Polymarket based on generated signals. + +#### 4.1.1 Polymarket CLOB Client +- Integrate with Polymarket's CLOB (Central Limit Order Book) API +- Support order types: market order, limit order (GTC, GTD) +- Handle Polygon network interactions (USDC approvals, CTF contract) +- API endpoint: `https://clob.polymarket.com` + +#### 4.1.2 Wallet Management +- Secure private key storage (encrypted at rest, environment variable or keyfile) +- Polymarket API key + secret generation (derived from wallet signature) +- USDC balance checking on Polygon +- Support for Polymarket's proxy wallet system (allowances) +- Config additions to `.env`: + ``` + POLYMARKET_PRIVATE_KEY= # Polygon wallet private key (encrypted) + POLYMARKET_API_KEY= # CLOB API key + POLYMARKET_API_SECRET= # CLOB API secret + POLYMARKET_API_PASSPHRASE= # CLOB API passphrase + POLYMARKET_CHAIN_ID=137 # Polygon mainnet + POLYMARKET_FUNDER_ADDRESS= # Proxy wallet address + ``` + +#### 4.1.3 Order Lifecycle +- Create order from signal (direction → outcome token, edge → size) +- Monitor order fill status +- Cancel stale unfilled orders (configurable timeout) +- Persist order history (order_id, market, side, size, price, fill, timestamp) +- New model: `Order` dataclass in `models/prediction.py` + +#### 4.1.4 New Files +- `backend/app/services/polymarket_trader.py` — CLOB client, order placement, fill monitoring +- `backend/app/services/wallet_manager.py` — Key management, balance queries, approvals + +--- + +### 4.2 — Risk Management (P0) + +**Goal:** Prevent catastrophic losses through position limits and sizing rules. + +#### 4.2.1 Position Sizing +- Kelly criterion-based sizing: `f = edge / odds` (capped at half-Kelly) +- Maximum position size per market (configurable, default: $50 USDC) +- Maximum total exposure across all markets (configurable, default: $500 USDC) +- Minimum edge threshold before trade (configurable, default: 10%) +- Minimum confidence threshold (configurable, default: 0.4) + +#### 4.2.2 Exposure Tracking +- Track all open positions with current market prices +- Real-time P&L calculation (unrealized + realized) +- Daily drawdown limit (configurable, default: 20% of bankroll) +- Auto-pause trading if drawdown limit hit + +#### 4.2.3 Config Additions +``` +RISK_MAX_POSITION_SIZE=50 # Max USDC per market +RISK_MAX_TOTAL_EXPOSURE=500 # Max USDC across all markets +RISK_MIN_EDGE=0.10 # Minimum edge to trade +RISK_MIN_CONFIDENCE=0.40 # Minimum signal confidence +RISK_KELLY_FRACTION=0.5 # Half-Kelly +RISK_MAX_DAILY_DRAWDOWN=0.20 # 20% daily drawdown limit +RISK_COOLDOWN_MINUTES=60 # Cooldown after hitting drawdown limit +``` + +#### 4.2.4 New Files +- `backend/app/services/risk_manager.py` — Position sizing, exposure limits, drawdown tracking +- `backend/app/models/position.py` — Position, PortfolioState dataclasses + +--- + +### 4.3 — Backtesting Engine (P0) + +**Goal:** Validate signal quality on historical data before risking real capital. + +#### 4.3.1 Historical Data Collection +- Fetch resolved Polymarket markets via Gamma API (`closed=true`) +- Store market snapshots: prices at discovery time, resolution outcome, resolution time +- Minimum dataset: 200+ resolved binary markets + +#### 4.3.2 Backtest Pipeline +- For each historical market: + 1. Run prediction pipeline (scenario → simulation → sentiment → signal) + 2. Compare signal vs. actual resolution + 3. Record: predicted_prob, market_prob_at_time, actual_outcome, edge, would_have_traded +- Metrics: accuracy, Brier score, ROI (simulated), Sharpe ratio, max drawdown +- Output: backtest report (JSON + markdown summary) + +#### 4.3.3 Calibration +- Plot calibration curve: predicted probability vs. actual frequency +- Apply Platt scaling or isotonic regression if miscalibrated +- Store calibration model for live signal adjustment + +#### 4.3.4 New Files +- `backend/app/services/backtester.py` — Backtest orchestration, metrics computation +- `backend/app/services/calibrator.py` — Probability calibration +- `backend/app/api/backtest.py` — API endpoints for running/viewing backtests +- `backend/app/models/backtest.py` — BacktestRun, BacktestResult dataclasses + +#### 4.3.5 API Endpoints +- `POST /api/backtest/run` — Start backtest on N historical markets +- `GET /api/backtest/run//status` — Poll progress +- `GET /api/backtest/run/` — Get results with metrics +- `GET /api/backtest/runs` — List all backtests + +--- + +### 4.4 — Market Scanner & Filtering (P1) + +**Goal:** Automatically identify high-value trading opportunities. + +#### 4.4.1 Market Selection Criteria +- Minimum volume: $50K (configurable) +- Minimum liquidity: $10K (configurable) +- Time to resolution: 1-30 days (avoid too short or too long) +- Binary markets only (YES/NO outcomes) +- Price range filter: 0.10 - 0.90 (avoid near-certain markets) +- Category filters: politics, crypto, sports, science, culture + +#### 4.4.2 Continuous Scanning +- Scheduled market scan every N hours (configurable, default: 6 hours) +- New market detection: compare against previously seen condition_ids +- Re-scan existing positions: check for price movement > 5% +- Priority queue: score markets by (volume × liquidity × time_remaining) + +#### 4.4.3 Config Additions +``` +SCANNER_INTERVAL_HOURS=6 +SCANNER_MIN_VOLUME=50000 +SCANNER_MIN_LIQUIDITY=10000 +SCANNER_MIN_DAYS_TO_RESOLUTION=1 +SCANNER_MAX_DAYS_TO_RESOLUTION=30 +SCANNER_PRICE_MIN=0.10 +SCANNER_PRICE_MAX=0.90 +SCANNER_MAX_CONCURRENT_RUNS=3 +``` + +#### 4.4.4 New Files +- `backend/app/services/market_scanner.py` — Scheduled scanning, filtering, prioritization +- Modify `backend/app/services/polymarket_client.py` — Add category filters, pagination + +--- + +### 4.5 — Dual-Platform Simulation (P1) + +**Goal:** Run both Reddit and Twitter simulations for richer signal diversity. + +#### 4.5.1 Changes +- Modify `PredictionManager.run_prediction()` to run both platforms in parallel +- Aggregate sentiment from both platforms (weighted average: 50/50 or configurable) +- Compare platform agreement as a confidence signal (high agreement → higher confidence) +- Track per-platform stance breakdown in `SentimentResult` + +#### 4.5.2 Config Additions +``` +PREDICTION_PLATFORMS=reddit,twitter # Platforms to simulate +PREDICTION_PLATFORM_WEIGHTS=0.5,0.5 # Aggregation weights +PREDICTION_REQUIRE_AGREEMENT=false # Only trade if platforms agree on direction +``` + +--- + +### 4.6 — Scheduling & Automation (P1) + +**Goal:** Fully autonomous operation — scan, predict, trade, repeat. + +#### 4.6.1 Scheduler +- Cron-based or interval-based job scheduler (APScheduler or Celery Beat) +- Jobs: + - `scan_markets` — Every 6h: fetch new markets, filter, queue for prediction + - `run_predictions` — Process queued markets (max 3 concurrent) + - `execute_trades` — Convert completed signals to orders + - `monitor_positions` — Every 1h: update P&L, check stop-loss + - `cleanup` — Daily: archive old runs, purge expired market data + +#### 4.6.2 Job Persistence +- Store job state in filesystem (consistent with existing pattern) +- Resume incomplete jobs on restart +- Dead-letter queue for failed runs (retry up to 3 times) + +#### 4.6.3 New Files +- `backend/app/services/scheduler.py` — Job scheduling, queue management +- `backend/app/services/trade_executor.py` — Signal-to-order conversion with risk checks + +--- + +### 4.7 — Portfolio Dashboard (P1) + +**Goal:** Real-time visibility into positions, P&L, and signal performance. + +#### 4.7.1 Backend API Endpoints +- `GET /api/portfolio/summary` — Total value, P&L, open positions count +- `GET /api/portfolio/positions` — All positions with current prices and unrealized P&L +- `GET /api/portfolio/history` — Trade history with realized P&L +- `GET /api/portfolio/metrics` — Win rate, avg edge, ROI, Sharpe, max drawdown +- `GET /api/portfolio/signals` — Signal performance log (signal vs. outcome) + +#### 4.7.2 Frontend View +- New route: `/portfolio` +- Components: + - Portfolio summary card (total value, daily P&L, win rate) + - Open positions table (market, direction, entry price, current price, P&L) + - Trade history table with filters + - Performance chart (cumulative P&L over time) + - Signal accuracy chart (calibration curve) + - Risk gauge (current exposure vs. limits) + +#### 4.7.3 New Files +- `backend/app/api/portfolio.py` — Portfolio API endpoints +- `backend/app/services/portfolio_tracker.py` — Position aggregation, P&L calculation +- `frontend/src/views/PortfolioView.vue` — Dashboard UI +- `frontend/src/api/portfolio.js` — API client + +--- + +### 4.8 — Signal Quality Improvements (P2) + +#### 4.8.1 Multi-Run Consensus +- Run N simulations per market (default: 3) with different random seeds +- Average the simulated probabilities across runs +- Standard deviation as uncertainty measure → feeds into confidence +- Only trade if all N runs agree on direction + +#### 4.8.2 Web Research Augmentation +- Before simulation, fetch recent news articles related to the market question +- Inject news summaries into the context document alongside the LLM-generated scenario +- Sources: news APIs (NewsAPI, GDELT), Wikipedia current events +- Improves agent grounding in real-world facts + +#### 4.8.3 Agent Diversity Tuning +- Vary agent expertise levels: 20% domain experts, 30% informed observers, 50% general public +- Add contrarian agents (10%) to stress-test consensus +- Scale agent count with market complexity (higher volume/liquidity → more agents) + +#### 4.8.4 Temporal Weighting +- Weight later simulation rounds higher than earlier rounds (agents refine opinions over time) +- Detect opinion shift direction (converging or diverging) as meta-signal + +--- + +### 4.9 — Monitoring & Alerting (P2) + +#### 4.9.1 Alerts +- Telegram/Discord webhook for: + - New signal generated (market, direction, edge, confidence) + - Trade executed (market, side, size, price) + - Position resolved (market, outcome, P&L) + - Drawdown limit approaching (>15% of limit) + - System errors (pipeline failure, API timeout) + +#### 4.9.2 Health Checks +- `GET /api/status` — System health (Neo4j, Ollama, Polymarket API, wallet balance) +- Log aggregation with structured JSON logs +- Pipeline execution time tracking (per stage) + +#### 4.9.3 Config Additions +``` +ALERT_WEBHOOK_URL= # Telegram/Discord webhook +ALERT_ON_SIGNAL=true +ALERT_ON_TRADE=true +ALERT_ON_RESOLUTION=true +ALERT_ON_DRAWDOWN=true +``` + +--- + +## 5. Data Models (New & Modified) + +### 5.1 New: `Order` +```python +@dataclass +class Order: + order_id: str # Polymarket CLOB order ID + run_id: str # Linked prediction run + market_condition_id: str + market_title: str + side: str # BUY or SELL + outcome: str # YES or NO + size: float # USDC amount + price: float # Limit price (0-1) + status: str # PENDING, FILLED, PARTIAL, CANCELLED, FAILED + filled_size: float + avg_fill_price: float + created_at: str + updated_at: str +``` + +### 5.2 New: `Position` +```python +@dataclass +class Position: + position_id: str + market_condition_id: str + market_title: str + outcome: str # YES or NO + entry_price: float + current_price: float + size: float # Number of outcome tokens + cost_basis: float # Total USDC spent + unrealized_pnl: float + status: str # OPEN, CLOSED, RESOLVED + resolution: Optional[str] # YES, NO (after market resolves) + realized_pnl: Optional[float] # Final P&L after resolution + opened_at: str + closed_at: Optional[str] +``` + +### 5.3 New: `PortfolioState` +```python +@dataclass +class PortfolioState: + total_value: float # Cash + unrealized position value + cash_balance: float # Available USDC + total_exposure: float # Sum of open position cost bases + unrealized_pnl: float + realized_pnl: float + total_pnl: float + win_rate: float # Resolved positions only + total_trades: int + open_positions: int + daily_drawdown: float # Current day's drawdown % +``` + +### 5.4 Modified: `PredictionRun` +Add fields: +```python + order_id: Optional[str] = None # Linked trade order + position_id: Optional[str] = None # Linked position + calibrated_probability: Optional[float] = None # Post-calibration probability + consensus_runs: Optional[int] = None # Number of consensus runs + consensus_std: Optional[float] = None # Cross-run standard deviation +``` + +--- + +## 6. API Endpoints (New) + +| Method | Path | Description | +|--------|------|-------------| +| POST | `/api/trade/execute` | Execute trade from signal | +| GET | `/api/trade/orders` | List all orders | +| GET | `/api/trade/orders/` | Get order details | +| DELETE | `/api/trade/orders/` | Cancel order | +| GET | `/api/portfolio/summary` | Portfolio overview | +| GET | `/api/portfolio/positions` | Open positions | +| GET | `/api/portfolio/history` | Trade history | +| GET | `/api/portfolio/metrics` | Performance metrics | +| POST | `/api/backtest/run` | Start backtest | +| GET | `/api/backtest/run/` | Backtest results | +| GET | `/api/backtest/runs` | List backtests | +| POST | `/api/scanner/start` | Start market scanner | +| POST | `/api/scanner/stop` | Stop market scanner | +| GET | `/api/scanner/status` | Scanner state + queue | +| GET | `/api/status` | System health check | + +--- + +## 7. Frontend Routes (New) + +| Route | View | Description | +|-------|------|-------------| +| `/portfolio` | PortfolioView.vue | Positions, P&L, performance charts | +| `/backtest` | BacktestView.vue | Backtest runs, metrics, calibration | +| `/settings` | SettingsView.vue | Risk params, wallet, scanner config | + +--- + +## 8. Implementation Phases + +### Phase 1: Backtesting & Validation (2 weeks) +**Goal:** Prove signal quality before risking capital. +- [ ] Historical market data collector (resolved markets from Gamma API) +- [ ] Backtest pipeline (run prediction on historical markets, compare to resolution) +- [ ] Metrics computation (accuracy, Brier score, simulated ROI, calibration curve) +- [ ] Calibration service (Platt scaling on predicted probabilities) +- [ ] Backtest API endpoints + basic frontend view +- **Exit criteria:** 200+ markets backtested, documented accuracy & ROI + +### Phase 2: Trade Execution & Risk (2 weeks) +**Goal:** Safely execute trades with guardrails. +- [ ] Polymarket CLOB client (py-clob-client integration) +- [ ] Wallet manager (key storage, balance queries, USDC approvals) +- [ ] Risk manager (position sizing, exposure limits, drawdown tracking) +- [ ] Order lifecycle (create, monitor fill, cancel stale) +- [ ] Position tracker (open/close positions, P&L computation) +- [ ] Trade execution API endpoints +- **Exit criteria:** Successfully place and fill a $1 test trade on Polymarket + +### Phase 3: Automation & Monitoring (1 week) +**Goal:** Autonomous operation with visibility. +- [ ] Market scanner with filtering and priority queue +- [ ] Job scheduler (scan → predict → trade → monitor cycle) +- [ ] Portfolio dashboard (frontend view with positions, P&L, charts) +- [ ] Alert webhooks (Telegram/Discord notifications) +- [ ] Health check endpoint +- **Exit criteria:** System runs autonomously for 48h, placing trades and reporting results + +### Phase 4: Signal Optimization (ongoing) +**Goal:** Improve edge over time. +- [ ] Multi-run consensus (3 runs per market, average probabilities) +- [ ] Dual-platform simulation (Reddit + Twitter) +- [ ] Web research augmentation (inject real news into scenario) +- [ ] Agent diversity tuning +- [ ] A/B test simulation parameters (rounds, agent count, platform weights) +- **Exit criteria:** Measurable improvement in backtest ROI vs. Phase 1 baseline + +--- + +## 9. Configuration Summary + +All new config via `.env` (following existing pattern): + +```env +# === Trade Execution === +POLYMARKET_PRIVATE_KEY= +POLYMARKET_API_KEY= +POLYMARKET_API_SECRET= +POLYMARKET_API_PASSPHRASE= +POLYMARKET_CHAIN_ID=137 +POLYMARKET_FUNDER_ADDRESS= +PREDICTION_TRADE_ENABLED=false # Master kill switch (already exists) + +# === Risk Management === +RISK_MAX_POSITION_SIZE=50 +RISK_MAX_TOTAL_EXPOSURE=500 +RISK_MIN_EDGE=0.10 +RISK_MIN_CONFIDENCE=0.40 +RISK_KELLY_FRACTION=0.5 +RISK_MAX_DAILY_DRAWDOWN=0.20 +RISK_COOLDOWN_MINUTES=60 + +# === Market Scanner === +SCANNER_INTERVAL_HOURS=6 +SCANNER_MIN_VOLUME=50000 +SCANNER_MIN_LIQUIDITY=10000 +SCANNER_MIN_DAYS_TO_RESOLUTION=1 +SCANNER_MAX_DAYS_TO_RESOLUTION=30 +SCANNER_PRICE_MIN=0.10 +SCANNER_PRICE_MAX=0.90 +SCANNER_MAX_CONCURRENT_RUNS=3 + +# === Signal Quality === +PREDICTION_PLATFORMS=reddit,twitter +PREDICTION_PLATFORM_WEIGHTS=0.5,0.5 +PREDICTION_CONSENSUS_RUNS=3 +PREDICTION_REQUIRE_AGREEMENT=false + +# === Alerts === +ALERT_WEBHOOK_URL= +ALERT_ON_SIGNAL=true +ALERT_ON_TRADE=true +ALERT_ON_RESOLUTION=true +ALERT_ON_DRAWDOWN=true +``` + +--- + +## 10. Dependencies (New) + +| Package | Purpose | Version | +|---------|---------|---------| +| `py-clob-client` | Polymarket CLOB API client | latest | +| `web3` | Polygon blockchain interaction | ^6.0 | +| `eth-account` | Wallet key management | ^0.11 | +| `apscheduler` | Job scheduling | ^3.10 | +| `scikit-learn` | Probability calibration (isotonic regression) | ^1.3 | +| `requests` | HTTP (already present) | existing | + +--- + +## 11. Risk Considerations + +### Financial Risk +- **Start small:** $1-5 trades during Phase 2 validation +- **Half-Kelly sizing** prevents ruin from miscalibrated signals +- **Drawdown circuit breaker** auto-pauses trading during bad streaks +- **PREDICTION_TRADE_ENABLED=false** by default — explicit opt-in required + +### Technical Risk +- **LLM quality:** Signal quality is bottlenecked by local LLM (qwen2.5). Consider testing with stronger models via API (Claude, GPT-4) for higher-stakes markets +- **Simulation time:** Full pipeline takes 10-30 min per market. Scanner must prioritize +- **API rate limits:** Polymarket CLOB API has rate limits — implement backoff +- **Network reliability:** Polygon RPC can be flaky — use redundant RPC endpoints + +### Regulatory Risk +- Polymarket operates under different regulatory frameworks by jurisdiction +- This system is for research and personal use +- Users are responsible for compliance with their local laws + +--- + +## 12. Success Metrics + +| Metric | Target (Phase 1) | Target (Phase 4) | +|--------|------------------|------------------| +| Backtest accuracy (binary) | >55% | >60% | +| Brier score | <0.25 | <0.22 | +| Simulated ROI (backtest) | >5% | >15% | +| Calibration RMSE | <0.15 | <0.10 | +| Avg edge on traded markets | >8% | >12% | +| Win rate (live trades) | N/A | >55% | +| Max drawdown | N/A | <25% | +| Markets scanned per day | N/A | 20+ | +| Avg pipeline time per market | <30min | <20min | + +--- + +## 13. File Structure (New Files) + +``` +backend/app/ +├── api/ +│ ├── backtest.py # Backtest API endpoints +│ ├── portfolio.py # Portfolio/P&L API endpoints +│ └── trade.py # Trade execution API endpoints +├── models/ +│ ├── backtest.py # BacktestRun, BacktestResult +│ └── position.py # Order, Position, PortfolioState +├── services/ +│ ├── backtester.py # Backtest orchestration +│ ├── calibrator.py # Probability calibration +│ ├── market_scanner.py # Market filtering & scheduling +│ ├── polymarket_trader.py # CLOB order execution +│ ├── portfolio_tracker.py # Position & P&L tracking +│ ├── risk_manager.py # Sizing, limits, drawdown +│ ├── scheduler.py # Job scheduling +│ ├── trade_executor.py # Signal → order pipeline +│ └── wallet_manager.py # Key management, balances +frontend/src/ +├── api/ +│ ├── backtest.js # Backtest API client +│ └── portfolio.js # Portfolio API client +├── views/ +│ ├── BacktestView.vue # Backtest dashboard +│ ├── PortfolioView.vue # Portfolio dashboard +│ └── SettingsView.vue # Configuration UI +``` diff --git a/docs/designs/polymarket-monetization-expansion.md b/docs/designs/polymarket-monetization-expansion.md new file mode 100644 index 0000000..db51ec2 --- /dev/null +++ b/docs/designs/polymarket-monetization-expansion.md @@ -0,0 +1,116 @@ +--- +status: ACTIVE +--- +# CEO Plan: Polymarket Monetization Engine +Generated by /plan-ceo-review on 2026-03-18 +Branch: main | Mode: SCOPE EXPANSION +Repo: nikmcfly/MiroFish-Offline + +## Vision + +### 10x Check +Transform MiroFish from a single-strategy signal prototype into a **multi-strategy prediction market alpha platform**: +- Multiple signal sources (LLM debate + news sentiment + market microstructure + cross-market correlation) ensembled with learned weights +- Multi-market coverage (Polymarket + Kalshi + Manifold Markets) +- Self-improving feedback loop that tracks per-category, per-persona, per-strategy accuracy +- Signal marketplace potential — publish signals as a service +- Paper trading mode for zero-risk validation before real capital deployment + +### Platonic Ideal +A real-time conviction dashboard where the system communicates its own edge, limitations, and decision reasoning transparently. The user sees a heat map of high-confidence opportunities, clicks into a full "conviction trail" for any signal, and trusts the system because it shows honest accuracy metrics and knows its own blind spots. The emotional arc: curiosity -> understanding -> trust -> confidence. + +## Scope Decisions + +| # | Proposal | Effort | Decision | Reasoning | +|---|----------|--------|----------|-----------| +| 1 | Paper Trading Mode | M | ACCEPTED | De-risks everything else. Validates full pipeline without financial risk. | +| 2 | Self-Improving Feedback Loop | L | ACCEPTED | Compounds intelligence over time. Per-category/persona accuracy tracking. | +| 3 | Conviction Trail & Audit UI | M | ACCEPTED | Trust and transparency. Critical for debugging bad trades. | +| 4 | Multi-Strategy Signal Ensemble | XL | ACCEPTED | Biggest differentiation. News sentiment + microstructure + cross-market. | +| 5 | Category Heatmap + Signal Badges | S | ACCEPTED | Low-effort, high-delight. Visual edge communication. | +| 6 | Multi-Market Coverage (Kalshi + Manifold) | L | ACCEPTED | Manifold = free calibration. Kalshi = US-regulated. Cross-market arb. | + +## Accepted Scope (added to this plan) +- Paper trading / shadow mode (phantom P&L, no blockchain interaction) +- Self-improving feedback loop (per-category, per-persona, per-strategy accuracy tracking + auto-adjustment) +- Conviction trail UI (decision tree from signal -> debate -> evidence -> probability) +- Multi-strategy signal ensemble framework (debate + news + microstructure + cross-market) +- Category performance heatmap + signal quality badges (gold/silver/bronze) +- Multi-market coverage (Polymarket + Kalshi + Manifold Markets) + +## Architecture Decisions + +| Decision | Choice | Rationale | +|---|---|---| +| Storage | SQLite (WAL) for trading data, Neo4j for knowledge graph | Concurrent access, relational queries, zero-config | +| Market abstraction | MarketAdapter interface | Platform-agnostic upstream code | +| API security | Bearer token auth on financial endpoints | Prevent unauthorized trade execution | +| Key storage | Encrypted keyfile (AES-256) | Better than plain .env for private keys | +| Error handling | Auto-pause on trade execution errors | Conservative for real money | +| Stale prices | Re-check live price at execution time | Prevent trading on outdated edge | +| Tests | Full P0+P1 suite (~50-70 test cases) | Financial code requires comprehensive tests | +| Observability | SQLite metrics + file logs + webhooks | Local-first, zero new infrastructure | +| Dashboard UX | Single-page with tabbed sections | Glanceable trading dashboard | + +## System Architecture + +``` ++---------------------------------------------------------------------------+ +| MiroFish Alpha Platform | +| | +| +-------------------------------------------------------+ | +| | SIGNAL STRATEGIES (Ensemble) | | +| | +----------+ +----------+ +----------+ +--------+ | | +| | | Debate | | News | | Market | | Cross- | | | +| | | Simulator| | Sentiment| | Micro- | | Market | | | +| | | (exists) | | (new) | | structure| | Correl.| | | +| | +----+-----+ +----+-----+ +----+-----+ +---+----+ | | +| | +------+------+------+-----+ | | | +| | v v | | | +| | +---------------------+ | | | +| | | Ensemble Model |<-------------+ | | +| | | (learned weights) | | | +| | +---------+-----------+ | | +| +-------------------|------------------------------------+ | +| v | +| +----------+ +----------+ +----------+ +----------+ | +| | Market |->| Risk |->| Trade |->| Position | | +| | Scanner | | Manager | | Executor | | Tracker | | +| +----------+ +----------+ +----+-----+ +----------+ | +| | | | | +| | +----+----+ | | +| | | Paper | Live | | +| | | vs Live | | | +| | +----+----+ | | +| | +----+--------------+ | +| | v v | +| +----------+ +-----------------+ +-----------------+ | +| | Feedback | | Market Adapters | | Portfolio/P&L | | +| | Loop | | +----++----++--+| | + Conviction | | +| | (learns) | | |Poly||Kal.||MF|| | Trail UI | | +| +----------+ | +----++----++--+| | + Heatmap | | +| +-----------------+ +-----------------+ | +| | | ++---------------------------------------------------------------------------+ + v + Polymarket CLOB / Kalshi API / Manifold API +``` + +## Implementation Phases + +1. SQLite schema + Backtesting + Paper Trading (safe, no real money) +2. Market Adapters + Risk Manager + Encrypted Keyfile (still paper mode) +3. Enable live Polymarket ($1 test trades) +4. Multi-strategy ensemble + Kalshi/Manifold +5. Feedback loop + heatmap + conviction trail + +## Deferred to TODOS.md +- JSON->SQLite migration for historical prediction runs (P2) +- CI/CD pipeline via GitHub Actions (P2) + +## Not In Scope +- Signal marketplace / publishing signals as a service +- LLM fine-tuning on prediction market data +- Multi-user support / user accounts +- Mobile app +- Cloud deployment / Docker compose diff --git a/docs/progress.md b/docs/progress.md index b07423f..45791ad 100644 --- a/docs/progress.md +++ b/docs/progress.md @@ -39,6 +39,17 @@ Migration from Zep Cloud + DashScope (Alibaba Qwen API) to local Neo4j CE + Olla ## PHASE 7 — Publish (TODO) - **TASK-019**: Rename to MiroFish-Offline, add AGPL-3.0 license, publish to GitHub +## PHASE 8 — Prediction Markets + Backtesting (COMPLETE) +- **Prediction Engine**: Polymarket client, scenario generator, LLM debate simulator, calibrated signal generation +- **Backtesting**: runs pipeline against resolved markets, computes accuracy/Brier/ROI/Sharpe/drawdown/calibration RMSE +- **Paper Trading**: simulated order execution with 1-2% slippage, positions tracked in SQLite +- **Calibration**: Platt scaling via LogisticRegression, HMAC-signed persistence +- **SQLite Storage**: SQLAlchemy Core, WAL mode, FK enforcement, 4 tables (backtest_runs, backtest_results, paper_orders, paper_positions) +- **API**: POST /api/backtest/run, GET /run/:id, GET /runs with DB-level concurrent guard +- **Frontend**: BacktestView (metrics grid, sortable results table, live polling), PredictionView (market browser, signal display) +- **Tests**: 62 tests covering all new code paths +- **Cleanup**: deleted dead sentiment_analyzer.py, translated Chinese comments, extracted calibration config + ## Files Created (New) | File | Replaces | Status | |------|----------|--------| diff --git a/frontend/src/api/backtest.js b/frontend/src/api/backtest.js new file mode 100644 index 0000000..c57614b --- /dev/null +++ b/frontend/src/api/backtest.js @@ -0,0 +1,16 @@ +import service from './index' + +// Start a backtest run +export const startBacktest = (numMarkets = 50, configOverrides = {}) => { + return service.post('/api/backtest/run', { num_markets: numMarkets, config_overrides: configOverrides }) +} + +// Get a specific backtest run +export const getBacktestRun = (runId) => { + return service.get(`/api/backtest/run/${runId}`) +} + +// List all backtest runs +export const listBacktests = () => { + return service.get('/api/backtest/runs') +} diff --git a/frontend/src/api/prediction.js b/frontend/src/api/prediction.js new file mode 100644 index 0000000..b7e5588 --- /dev/null +++ b/frontend/src/api/prediction.js @@ -0,0 +1,35 @@ +import service, { requestWithRetry } from './index' + +// Fetch active markets from Polymarket +export const fetchMarkets = (params = {}) => { + return service.get('/api/prediction/markets', { params }) +} + +// Start a prediction run +export const startPredictionRun = (market) => { + return requestWithRetry( + () => service.post('/api/prediction/run', { market }), + 3, + 1000 + ) +} + +// Get prediction run status +export const getRunStatus = (runId) => { + return service.get(`/api/prediction/run/${runId}/status`) +} + +// Get full prediction run details +export const getRun = (runId) => { + return service.get(`/api/prediction/run/${runId}`) +} + +// List all prediction runs +export const listRuns = (limit = 50) => { + return service.get('/api/prediction/runs', { params: { limit } }) +} + +// Delete a prediction run +export const deleteRun = (runId) => { + return service.delete(`/api/prediction/run/${runId}`) +} diff --git a/frontend/src/router/index.js b/frontend/src/router/index.js index 62d2320..28e6e9b 100644 --- a/frontend/src/router/index.js +++ b/frontend/src/router/index.js @@ -5,6 +5,8 @@ import SimulationView from '../views/SimulationView.vue' import SimulationRunView from '../views/SimulationRunView.vue' import ReportView from '../views/ReportView.vue' import InteractionView from '../views/InteractionView.vue' +import PredictionView from '../views/PredictionView.vue' +import BacktestView from '../views/BacktestView.vue' const routes = [ { @@ -12,6 +14,16 @@ const routes = [ name: 'Home', component: Home }, + { + path: '/prediction', + name: 'Prediction', + component: PredictionView + }, + { + path: '/backtest', + name: 'Backtest', + component: BacktestView + }, { path: '/process/:projectId', name: 'Process', diff --git a/frontend/src/views/BacktestView.vue b/frontend/src/views/BacktestView.vue new file mode 100644 index 0000000..857b08f --- /dev/null +++ b/frontend/src/views/BacktestView.vue @@ -0,0 +1,1207 @@ + + + + + diff --git a/frontend/src/views/Home.vue b/frontend/src/views/Home.vue index 36bb714..85cc8e0 100644 --- a/frontend/src/views/Home.vue +++ b/frontend/src/views/Home.vue @@ -4,6 +4,8 @@