RyanmartinCRE · RyanmartinCRE · Oct 9, 2025 · Oct 9, 2025
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,25 @@
+# Python
+__pycache__/
+*.py[cod]
+*.pyo
+*.pyd
+
+# Virtual environments
+.env
+.venv
+venv/
+ENV/
+env/
+
+# Distribution / packaging
+*.egg-info/
+*.egg
+*.zip
+
+# OS files
+.DS_Store
+Thumbs.db
+
+# Logs and artifacts
+data/raw/*.tmp
+data/artifacts/*.log
diff --git a/cerberus/LICENSE b/cerberus/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2024 Project Cerberus
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/cerberus/README.md b/cerberus/README.md
@@ -0,0 +1,103 @@
+# Project Cerberus
+
+Project Cerberus is a research-focused algorithmic trading stack that demonstrates a
+survivorship-aware, look-ahead-safe equity momentum + profitability strategy with
+institutional-style plumbing. The repository contains utilities for data acquisition,
+backtesting, risk management, paper trading, and interactive monitoring via Streamlit.
+
+> **Disclaimer:** This project is for research and educational purposes only. It does
+> not constitute investment advice, an offer, or solicitation to buy or sell securities.
+
+## Features
+- Survivorship-aware S&P 500 universe with 90-day-lagged fundamentals in free mode.
+- Monthly momentum (12-1 and 6-1 variants) combined with profitability filters.
+- Randomized month-end rebalances, tiered transaction costs, capacity constraints,
+  volatility-regime sizing, tax-drag toggle, and risk kill switches.
+- Robust evaluation metrics including Deflated Sharpe Ratio and CSCV-style PBO.
+- Paper trading integration with Alpaca and fill-quality logging.
+- Streamlit dashboard with performance visualization and alerting.
+- Clean abstraction between the strategy engine and data providers to allow a swap-in
+  paid data source (Tiingo/Sharadar) without touching strategy logic.
+
+## Installation
+
+```bash
+python -m venv .venv
+# macOS/Linux
+source .venv/bin/activate
+# Windows PowerShell
+.venv\\Scripts\\Activate.ps1
+pip install -r requirements.txt
+```
+
+## Configuration
+
+All runtime configuration lives in `conf/cerberus.yaml`. Key sections:
+
+- `data`: Source selection (`free`, `tiingo`, `sharadar`), start date, caching options,
+  and optional ticker overrides for fast iteration.
+- `portfolio`: Number of holdings and per-name weight and ADV caps.
+- `rebalance`: Random rebalance window and reproducibility seed.
+- `signals`: Momentum lookback/skip structure and robustness grids for PBO.
+- `risk`: Regime filters using VIX/realized volatility.
+- `costs`: Tiered transaction cost settings by %ADV bucket.
+- `tax`: Short-term capital gains drag toggle and rate.
+- `evaluation`: Required performance gates and PBO sampling density.
+- `paper`: Alpaca paper trading configuration.
+- `dashboard`: Alert thresholds for drawdown and slippage.
+
+## Commands
+
+All commands are executed from the repository root:
+
+```bash
+python -m src.cli load          # Download data for the configured provider
+python -m src.cli backtest      # Run the Cerberus v2 backtest and write artifacts
+python -m src.cli dual          # Run the ETF dual-momentum demo
+python -m src.cli paper --date YYYY-MM-DD  # Execute a paper-trade rebalance via Alpaca
+streamlit run src/dashboard.py  # Launch the monitoring dashboard
+```
+
+## Data Model
+
+The data loaders return five aligned panels:
+
+1. `adj_close_daily` – Adjusted close prices by trading day and ticker.
+2. `volume_daily` – Corresponding daily share volume.
+3. `gp_monthly_lag90` – Gross profit per ticker, lagged 90 days and sampled monthly.
+4. `assets_monthly_lag90` – Total assets with the same lag/alignment.
+5. `sp500_membership_monthly` – Boolean panel of S&P 500 membership by month end.
+
+These panels are persisted to CSV in `data/raw/` and consumed by the strategy engine.
+
+## Performance Gates
+
+Backtests are expected to meet the following minimums before capital deployment:
+
+- Annualized Sharpe Ratio ≥ 1.0
+- Calmar Ratio ≥ 0.5
+- Deflated Sharpe Ratio > 0
+- CSCV-style Probability of Backtest Overfitting (PBO) < 0.5
+
+If the backtest fails to meet the Sharpe or Calmar thresholds the CLI will exit with
+an error to prevent accidental promotion to paper/live trading.
+
+## Swapping Data Providers
+
+The strategy interacts with the data layer through a stable interface returning the
+five panels listed above. To upgrade to Tiingo or Sharadar, implement the TODOs in
+`src/data_tiingo.py` or `src/data_sharadar.py` and update `data.mode` in the config.
+No changes are required in the strategy or evaluation code.
+
+## Known Limitations
+
+- Free-mode fundamentals rely on Yahoo Finance reporting dates, which may differ from
+  official SEC filing dates. A 90-day lag is applied as a conservative approximation.
+- Historical S&P 500 membership from community-maintained sources may include small
+  errors. Validation guards ensure monthly membership counts remain within reasonable
+  bounds, but institutional users should license high-quality datasets for production.
+- Yahoo Finance occasionally rate-limits requests; caching is enabled by default to
+  avoid repeated downloads.
+
+Despite these limitations, the free stack provides a solid foundation for iterative
+research before investing in commercial data feeds.
diff --git a/cerberus/conf/cerberus.yaml b/cerberus/conf/cerberus.yaml
@@ -0,0 +1,42 @@
+data:
+  mode: "free"
+  start: "2000-01-01"
+  sp500_changes_csv: "https://raw.githubusercontent.com/fja05680/sp500/master/S%26P%20500%20Historical%20Components%20%26%20Changes(08-15-2024).csv"
+  cache_raw: true
+  cache_interim: true
+  tickers_override: []
+portfolio:
+  top_n: 30
+  max_pos_weight: 0.10
+  adv_cap_pct: 0.05
+rebalance:
+  window_last_days: 3
+  seed: 42
+signals:
+  lookback_months: 12
+  skip_months: 1
+  robustness_lookbacks: [6, 12]
+  robustness_topn: [20, 25, 30]
+risk:
+  use_vix_filter: true
+  vix_threshold: 30
+  realized_vol_threshold: 0.25
+costs:
+  small_threshold: 0.01
+  med_threshold: 0.02
+  rate_small_bps: 10
+  rate_med_bps: 20
+  rate_large_bps: 50
+tax:
+  apply_tax_drag: false
+  stcg_rate: 0.35
+evaluation:
+  pbo_trials: 10
+  min_sharpe: 1.0
+  min_calmar: 0.5
+paper:
+  alpaca_base_url: "https://paper-api.alpaca.markets"
+  symbols_blocklist: []
+dashboard:
+  dd_alert: -0.20
+  slip_alert_multiplier: 4
diff --git a/cerberus/pyproject.toml b/cerberus/pyproject.toml
@@ -0,0 +1,15 @@
+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"
+
+[tool.black]
+line-length = 100
+target-version = ["py310"]
+
+[tool.ruff]
+line-length = 100
+select = ["E", "F", "B", "I"]
+ignore = ["E501"]
+
+[tool.pytest.ini_options]
+addopts = "-q"
diff --git a/cerberus/requirements.txt b/cerberus/requirements.txt
@@ -0,0 +1,11 @@
+pandas>=2.0
+numpy>=1.24
+yfinance>=0.2
+requests>=2.31
+beautifulsoup4>=4.12
+lxml>=4.9
+streamlit>=1.31
+pyyaml>=6.0
+typer>=0.9
+scipy>=1.10
+pytest>=7.4
diff --git a/cerberus/src/__init__.py b/cerberus/src/__init__.py
@@ -0,0 +1,64 @@
+"""Top-level package for Project Cerberus.
+
+This module exposes convenience attributes so that downstream code can
+import the major Cerberus components without drilling into the package
+structure. The heavy imports are intentionally avoided at module import
+so that clients can cherry-pick submodules on demand without incurring
+side effects (e.g., network calls during configuration loading).
+"""
+
+from __future__ import annotations
+
+from importlib import import_module
+from typing import Any
+
+__all__ = [
+    "get_module",
+    "AVAILABLE_MODULES",
+]
+
+# Mapping of friendly names to import paths used by :func:`get_module`.
+AVAILABLE_MODULES = {
+    "cli": "cerberus.src.cli",
+    "strategy": "cerberus.src.strategy",
+    "metrics": "cerberus.src.metrics",
+    "data_free": "cerberus.src.data_free",
+    "data_tiingo": "cerberus.src.data_tiingo",
+    "data_sharadar": "cerberus.src.data_sharadar",
+    "dual_momentum": "cerberus.src.dual_momentum",
+    "paper_alpaca": "cerberus.src.paper_alpaca",
+    "utils.io": "cerberus.src.utils.io",
+    "utils.dates": "cerberus.src.utils.dates",
+    "utils.logging": "cerberus.src.utils.logging",
+}
+
+
+def get_module(name: str) -> Any:
+    """Dynamically import and return a Cerberus submodule.
+
+    Parameters
+    ----------
+    name:
+        Friendly identifier defined in :data:`AVAILABLE_MODULES`.
+
+    Returns
+    -------
+    Any
+        The imported module object.
+
+    Raises
+    ------
+    KeyError
+        If *name* is not registered.
+    ImportError
+        If the underlying module cannot be imported.
+    """
+
+    try:
+        module_path = AVAILABLE_MODULES[name]
+    except KeyError as exc:  # pragma: no cover - defensive guard
+        raise KeyError(
+            f"Unknown Cerberus module '{name}'. Available keys: {sorted(AVAILABLE_MODULES)}"
+        ) from exc
+
+    return import_module(module_path)