diff --git a/backend/app/__init__.py b/backend/app/__init__.py index e584f2f..bdb341e 100644 --- a/backend/app/__init__.py +++ b/backend/app/__init__.py @@ -80,11 +80,22 @@ def log_response(response): app.register_blueprint(simulation_bp, url_prefix='/api/simulation') app.register_blueprint(report_bp, url_prefix='/api/report') - # Health check + # Register ops/health blueprints (Prometheus monitoring layer) + from .routes import health_bp, ops_bp + app.register_blueprint(health_bp) + app.register_blueprint(ops_bp) + + # Legacy simple health route (keep for backward compatibility) @app.route('/health') def health(): return {'status': 'ok', 'service': 'MiroFish-Offline Backend'} + # Run startup preflight checks (logs warnings; never blocks startup) + from .preflight import run_preflight_checks + with app.app_context(): + preflight_results = run_preflight_checks() + app.extensions['preflight'] = preflight_results + if should_log_startup: logger.info("MiroFish-Offline Backend startup complete") diff --git a/backend/app/api/report.py b/backend/app/api/report.py index c25c895..eb30084 100644 --- a/backend/app/api/report.py +++ b/backend/app/api/report.py @@ -6,11 +6,12 @@ import os import traceback import threading -from flask import request, jsonify, send_file +from flask import request, jsonify, send_file, current_app from . import report_bp from ..config import Config from ..services.report_agent import ReportAgent, ReportManager, ReportStatus +from ..services.graph_tools import GraphToolsService from ..services.simulation_manager import SimulationManager from ..models.project import ProjectManager from ..models.task import TaskManager, TaskStatus @@ -109,6 +110,14 @@ def generate_report(): import uuid report_id = f"report_{uuid.uuid4().hex[:12]}" + # Capture GraphStorage before background thread starts + storage = current_app.extensions.get('neo4j_storage') + if not storage: + return jsonify({ + "success": False, + "error": "GraphStorage not initialized - check Neo4j connection" + }), 500 + # Create async task task_manager = TaskManager() task_id = task_manager.create_task( @@ -130,11 +139,13 @@ def run_generate(): message="Initializing Report Agent..." ) - # Create Report Agent + # Create Report Agent with injected graph tools + tools = GraphToolsService(storage=storage) agent = ReportAgent( graph_id=graph_id, simulation_id=simulation_id, - simulation_requirement=simulation_requirement + simulation_requirement=simulation_requirement, + graph_tools=tools ) # Progress callback diff --git a/backend/app/config.py b/backend/app/config.py index de706ca..d9a3178 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -31,6 +31,7 @@ class Config: LLM_API_KEY = os.environ.get('LLM_API_KEY') LLM_BASE_URL = os.environ.get('LLM_BASE_URL', 'http://localhost:11434/v1') LLM_MODEL_NAME = os.environ.get('LLM_MODEL_NAME', 'qwen2.5:32b') + NER_MODEL = os.environ.get('NER_MODEL', 'minimax-m2.7:cloud') # Neo4j configuration NEO4J_URI = os.environ.get('NEO4J_URI', 'bolt://localhost:7687') diff --git a/backend/app/preflight.py b/backend/app/preflight.py new file mode 100644 index 0000000..ef38402 --- /dev/null +++ b/backend/app/preflight.py @@ -0,0 +1,147 @@ +""" +MiroFish Startup Preflight Checks +Run on app startup to detect missing services/config early. +Logs warnings but never blocks startup. +""" + +import os +import time +import requests +import logging + +from .config import Config + +logger = logging.getLogger('mirofish.preflight') + +# Required environment variable names (as strings, checked against os.environ) +_REQUIRED_ENV_VARS = [ + 'NEO4J_URI', + 'NEO4J_PASSWORD', + 'LLM_BASE_URL', + 'LLM_MODEL_NAME', +] + + +# --------------------------------------------------------------------------- +# Individual checks +# --------------------------------------------------------------------------- + +def _check_neo4j() -> dict: + """Verify Neo4j HTTP endpoint is reachable.""" + try: + uri = Config.NEO4J_URI or 'bolt://localhost:7687' + http_uri = uri.replace('bolt://', 'http://').replace(':7687', ':7474') + r = requests.get(http_uri, timeout=5) + if r.status_code == 200: + return {"ok": True, "url": http_uri} + return {"ok": False, "url": http_uri, "error": f"HTTP {r.status_code}"} + except Exception as e: + return {"ok": False, "error": str(e)} + + +def _check_ollama() -> dict: + """Verify Ollama /api/tags endpoint is reachable.""" + try: + base = (Config.LLM_BASE_URL or 'http://localhost:11434/v1').rstrip('/') + base = base.replace('host.docker.internal', 'localhost') + if base.endswith('/v1'): + base = base[:-3] + url = f"{base}/api/tags" + r = requests.get(url, timeout=5) + if r.status_code == 200: + return {"ok": True, "url": url} + return {"ok": False, "url": url, "error": f"HTTP {r.status_code}"} + except Exception as e: + return {"ok": False, "error": str(e)} + + +def _check_required_model() -> dict: + """Verify the configured LLM model is present in Ollama.""" + try: + base = (Config.LLM_BASE_URL or 'http://localhost:11434/v1').rstrip('/') + base = base.replace('host.docker.internal', 'localhost') + if base.endswith('/v1'): + base = base[:-3] + r = requests.get(f"{base}/api/tags", timeout=5) + models = [m['name'] for m in r.json().get('models', [])] + required = Config.LLM_MODEL_NAME + ok = required in models + result = {"ok": ok, "required": required} + if not ok: + result["available"] = models + return result + except Exception as e: + return {"ok": False, "error": str(e)} + + +def _check_uploads_dir() -> dict: + """Verify uploads directory exists and is writable.""" + folder = os.path.abspath(Config.UPLOAD_FOLDER) + try: + os.makedirs(folder, exist_ok=True) + test_path = os.path.join(folder, '.preflight') + with open(test_path, 'w') as f: + f.write('ok') + os.remove(test_path) + return {"ok": True, "path": folder} + except Exception as e: + return {"ok": False, "path": folder, "error": str(e)} + + +def _check_required_env_vars() -> dict: + """Check that required environment variables are set.""" + missing = [] + for var in _REQUIRED_ENV_VARS: + # Accept values set either via os.environ or via Config class attributes + env_val = os.environ.get(var) + config_attr = var.replace('LLM_BASE_URL', 'LLM_BASE_URL') \ + .replace('LLM_MODEL_NAME', 'LLM_MODEL_NAME') + config_val = getattr(Config, config_attr, None) + if not env_val and not config_val: + missing.append(var) + return {"ok": len(missing) == 0, "missing": missing} + + +# --------------------------------------------------------------------------- +# Main entry point +# --------------------------------------------------------------------------- + +def run_preflight_checks() -> dict: + """ + Run all startup preflight checks. + + Logs warnings for any failures but does NOT prevent app startup. + Returns a structured dict that the app can store and expose via /api/health. + + Example return value:: + + { + "neo4j": {"ok": True, "url": "http://localhost:7474"}, + "ollama": {"ok": False, "error": "Connection refused"}, + "required_model": {"ok": True, "required": "qwen2.5:32b"}, + "uploads_dir": {"ok": True, "path": "/app/uploads"}, + "env_vars": {"ok": True, "missing": []}, + } + """ + results: dict = {} + + logger.info("Running startup preflight checks...") + + results['neo4j'] = _check_neo4j() + results['ollama'] = _check_ollama() + results['required_model'] = _check_required_model() + results['uploads_dir'] = _check_uploads_dir() + results['env_vars'] = _check_required_env_vars() + + failed = [k for k, v in results.items() if not v.get('ok', False)] + + if not failed: + logger.info("✅ Preflight checks passed — all systems nominal") + else: + logger.warning("⚠️ Preflight warnings: %s", failed) + for key in failed: + detail = results[key] + error_msg = detail.get('error') or detail.get('missing') or 'check failed' + logger.warning(" %s: %s", key, error_msg) + + return results diff --git a/backend/app/routes/__init__.py b/backend/app/routes/__init__.py new file mode 100644 index 0000000..8e3e799 --- /dev/null +++ b/backend/app/routes/__init__.py @@ -0,0 +1,9 @@ +""" +MiroFish Routes Package +Ops and health endpoints for Prometheus monitoring. +""" + +from .health import health_bp +from .ops import ops_bp + +__all__ = ['health_bp', 'ops_bp'] diff --git a/backend/app/routes/health.py b/backend/app/routes/health.py new file mode 100644 index 0000000..076d2cb --- /dev/null +++ b/backend/app/routes/health.py @@ -0,0 +1,154 @@ +""" +MiroFish Health Endpoints +Prometheus-ready health checks for Neo4j, Ollama, and uploads directory. +""" + +import os +import time +import requests +from datetime import datetime +from flask import Blueprint, jsonify, current_app + +from ..config import Config +from ..utils.logger import get_logger + +health_bp = Blueprint('health', __name__) +logger = get_logger('mirofish.health') + + +# --------------------------------------------------------------------------- +# Internal helpers +# --------------------------------------------------------------------------- + +def _check_neo4j() -> tuple: + """Ping the Neo4j HTTP browser endpoint. Returns (ok: bool, latency_ms: int).""" + try: + # Convert bolt://host:7687 → http://host:7474 + uri = Config.NEO4J_URI or 'bolt://localhost:7687' + http_uri = uri.replace('bolt://', 'http://').replace(':7687', ':7474') + start = time.monotonic() + r = requests.get(http_uri, timeout=5) + ms = int((time.monotonic() - start) * 1000) + return r.status_code == 200, ms + except Exception as e: + logger.debug(f"Neo4j health check failed: {e}") + return False, -1 + + +def _check_ollama() -> tuple: + """Ping Ollama /api/tags. Returns (ok: bool, latency_ms: int).""" + try: + base = (Config.LLM_BASE_URL or 'http://localhost:11434/v1').rstrip('/v1').rstrip('/') + # Normalise docker-internal hostname when running outside container + base = base.replace('host.docker.internal', 'localhost') + # If base still ends with /v1, strip it + if base.endswith('/v1'): + base = base[:-3] + start = time.monotonic() + r = requests.get(f"{base}/api/tags", timeout=5) + ms = int((time.monotonic() - start) * 1000) + return r.status_code == 200, ms + except Exception as e: + logger.debug(f"Ollama health check failed: {e}") + return False, -1 + + +def _check_uploads_dir() -> bool: + """Verify the uploads directory exists and is writable.""" + try: + folder = os.path.abspath(Config.UPLOAD_FOLDER) + os.makedirs(folder, exist_ok=True) + test_path = os.path.join(folder, '.health_check') + with open(test_path, 'w') as f: + f.write('ok') + os.remove(test_path) + return True + except Exception as e: + logger.debug(f"Uploads dir check failed: {e}") + return False + + +# --------------------------------------------------------------------------- +# Routes +# --------------------------------------------------------------------------- + +@health_bp.route('/api/health', methods=['GET']) +def health_check(): + """ + Core health check — returns status of all stack components. + + Response schema:: + + { + "status": "healthy" | "degraded" | "unhealthy", + "timestamp": "", + "components": { + "backend": {"status": "up", "latency_ms": 0}, + "neo4j": {"status": "up", "latency_ms": 42}, + "ollama": {"status": "down", "latency_ms": -1}, + "uploads_dir": {"status": "ok", "writable": true} + } + } + + HTTP 200 when healthy/degraded, 503 when unhealthy. + """ + components = {} + overall = "healthy" + + # Neo4j — critical; down → unhealthy + neo4j_ok, neo4j_ms = _check_neo4j() + components["neo4j"] = {"status": "up" if neo4j_ok else "down", "latency_ms": neo4j_ms} + if not neo4j_ok: + overall = "unhealthy" + + # Ollama — important but not fatal; down → degraded (unless already unhealthy) + ollama_ok, ollama_ms = _check_ollama() + components["ollama"] = {"status": "up" if ollama_ok else "down", "latency_ms": ollama_ms} + if not ollama_ok and overall == "healthy": + overall = "degraded" + + # Uploads dir — needed for artifacts; unavailable → degraded + uploads_ok = _check_uploads_dir() + components["uploads_dir"] = {"status": "ok" if uploads_ok else "error", "writable": uploads_ok} + if not uploads_ok and overall == "healthy": + overall = "degraded" + + # Backend is trivially up (we got here) + components["backend"] = {"status": "up", "latency_ms": 0} + + # Surface any stored preflight results + preflight = current_app.extensions.get('preflight') + + body = { + "status": overall, + "timestamp": datetime.now().isoformat(), + "components": components, + } + if preflight is not None: + body["preflight"] = preflight + + http_code = 503 if overall == "unhealthy" else 200 + return jsonify(body), http_code + + +@health_bp.route('/api/health/ready', methods=['GET']) +def readiness_check(): + """ + Readiness check — are ALL components up and ready for simulation? + + Prometheus should call this before starting any simulation run. + Returns HTTP 200 when ready, 503 when not. + """ + neo4j_ok, _ = _check_neo4j() + ollama_ok, _ = _check_ollama() + uploads_ok = _check_uploads_dir() + + ready = neo4j_ok and ollama_ok and uploads_ok + + return jsonify({ + "ready": ready, + "timestamp": datetime.now().isoformat(), + "neo4j": neo4j_ok, + "ollama": ollama_ok, + "uploads": uploads_ok, + }), 200 if ready else 503 diff --git a/backend/app/routes/ops.py b/backend/app/routes/ops.py new file mode 100644 index 0000000..652f8ae --- /dev/null +++ b/backend/app/routes/ops.py @@ -0,0 +1,114 @@ +""" +MiroFish Ops Endpoints +Prometheus-friendly polling endpoints for simulation lifecycle monitoring. +""" + +from datetime import datetime +from flask import Blueprint, jsonify + +from ..utils.logger import get_logger +from ..services.simulation_runner import SimulationRunner, RunnerStatus + +ops_bp = Blueprint('ops', __name__) +logger = get_logger('mirofish.ops') + +# Terminal runner states +_TERMINAL_STATUSES = {RunnerStatus.COMPLETED, RunnerStatus.STOPPED, RunnerStatus.FAILED} + +# Stall threshold: no state update in this many seconds → stalled +_STALL_THRESHOLD_SECONDS = 600 # 10 minutes + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _is_stalled(run_state) -> bool: + """ + Return True if simulation is RUNNING but hasn't updated in 10+ minutes. + """ + if run_state.runner_status != RunnerStatus.RUNNING: + return False + if not run_state.updated_at: + return False + try: + last = datetime.fromisoformat(run_state.updated_at) + elapsed = (datetime.now() - last).total_seconds() + return elapsed > _STALL_THRESHOLD_SECONDS + except Exception: + return False + + +def _percent_complete(run_state) -> float: + total = run_state.total_rounds or 0 + current = run_state.current_round or 0 + if total <= 0: + return 0.0 + return round(min(current / total * 100, 100.0), 2) + + +# --------------------------------------------------------------------------- +# Routes +# --------------------------------------------------------------------------- + +@ops_bp.route('/api/ops/simulation//poll', methods=['GET']) +def poll_simulation(simulation_id: str): + """ + Prometheus-friendly simulation status poll. + + Returns a compact, machine-readable status object. No human-only text. + No ambiguous states. Prometheus should call this every 60 seconds during + an active simulation. + + Response schema:: + + { + "simulation_id": "sim_abc123", + "terminal": false, // true = final state reached + "success": null, // true/false once terminal, null while running + "status": "running", // running | completed | failed | stopped | stalled | idle + "percent_complete": 42.5, + "rounds_done": 42, + "rounds_total": 100, + "stalled": false, // true = RUNNING but no progress for 10+ min + "stalled_since": null, // ISO8601 timestamp of last update when stalled + "last_updated": "2026-01-01T00:00:00" + } + + HTTP 200 on success, 404 if simulation not found. + """ + try: + run_state = SimulationRunner.get_run_state(simulation_id) + except Exception as e: + logger.error(f"ops/poll: error fetching run state for {simulation_id}: {e}") + return jsonify({"error": "Internal error fetching simulation state"}), 500 + + if run_state is None: + return jsonify({"error": f"Simulation '{simulation_id}' not found"}), 404 + + status = run_state.runner_status + terminal = status in _TERMINAL_STATUSES + + # Determine success + if status == RunnerStatus.COMPLETED: + success = True + elif status in (RunnerStatus.FAILED, RunnerStatus.STOPPED): + success = False + else: + success = None + + stalled = _is_stalled(run_state) + status_label = "stalled" if stalled else status.value + + return jsonify({ + "simulation_id": simulation_id, + "terminal": terminal, + "success": success, + "status": status_label, + "percent_complete": _percent_complete(run_state), + "rounds_done": run_state.current_round, + "rounds_total": run_state.total_rounds, + "stalled": stalled, + "stalled_since": run_state.updated_at if stalled else None, + "last_updated": run_state.updated_at, + }), 200 diff --git a/backend/app/services/__init__.py b/backend/app/services/__init__.py index 3aae398..e0a6f52 100644 --- a/backend/app/services/__init__.py +++ b/backend/app/services/__init__.py @@ -16,6 +16,8 @@ EventConfig, PlatformConfig ) +from .platform_simulators import PlatformSimulator, TwitterSimulator, RedditSimulator +from .outcome_scorer import SimulationOutcome, OutcomeScorer from .simulation_runner import ( SimulationRunner, SimulationRunState, @@ -55,6 +57,11 @@ 'TimeSimulationConfig', 'EventConfig', 'PlatformConfig', + 'PlatformSimulator', + 'TwitterSimulator', + 'RedditSimulator', + 'SimulationOutcome', + 'OutcomeScorer', 'SimulationRunner', 'SimulationRunState', 'RunnerStatus', diff --git a/backend/app/services/entity_reader.py b/backend/app/services/entity_reader.py index fcc3096..4923076 100644 --- a/backend/app/services/entity_reader.py +++ b/backend/app/services/entity_reader.py @@ -5,9 +5,12 @@ Replaces zep_entity_reader.py — all Zep Cloud calls replaced by GraphStorage. """ +from difflib import SequenceMatcher +import json from typing import Dict, Any, List, Optional, Set from dataclasses import dataclass, field +from ..config import Config from ..utils.logger import get_logger from ..storage import GraphStorage @@ -73,8 +76,15 @@ class EntityReader: 3. Get related edges and linked node information for each entity """ - def __init__(self, storage: GraphStorage): + def __init__( + self, + storage: GraphStorage, + ner_model: Optional[str] = None, + deduplication_threshold: float = 0.85, + ): self.storage = storage + self.ner_model = ner_model or Config.NER_MODEL + self.deduplication_threshold = deduplication_threshold def get_all_nodes(self, graph_id: str) -> List[Dict[str, Any]]: """ @@ -230,8 +240,20 @@ def filter_defined_entities( filtered_entities.append(entity) - logger.info(f"Filter completed: total nodes {total_count}, matched {len(filtered_entities)}, " - f"entity types: {entity_types_found}") + filtered_entities = self._deduplicate_entities( + filtered_entities, + similarity_threshold=self.deduplication_threshold, + ) + entity_types_found = { + entity.get_entity_type() + for entity in filtered_entities + if entity.get_entity_type() + } + + logger.info( + f"Filter completed: total nodes {total_count}, matched {len(filtered_entities)}, " + f"entity types: {entity_types_found}" + ) return FilteredEntities( entities=filtered_entities, @@ -338,3 +360,88 @@ def get_entities_by_type( enrich_with_edges=enrich_with_edges ) return result.entities + + def _deduplicate_entities( + self, + entities: List[EntityNode], + similarity_threshold: float = 0.85, + ) -> List[EntityNode]: + """ + Collapse near-duplicate entities using fuzzy name matching. + + Only entities with the same resolved entity type are considered duplicates. + The first entity encountered remains canonical and absorbs merged context. + """ + if len(entities) < 2: + return entities + + deduplicated: List[EntityNode] = [] + + for entity in entities: + canonical_match = None + entity_type = entity.get_entity_type() + normalized_name = self._normalize_entity_name(entity.name) + + for candidate in deduplicated: + candidate_type = candidate.get_entity_type() + if entity_type != candidate_type: + continue + + candidate_name = self._normalize_entity_name(candidate.name) + similarity = SequenceMatcher(None, normalized_name, candidate_name).ratio() + if similarity >= similarity_threshold: + canonical_match = candidate + break + + if canonical_match is None: + deduplicated.append(entity) + continue + + self._merge_entity_into_canonical(canonical_match, entity) + + return deduplicated + + @staticmethod + def _normalize_entity_name(name: str) -> str: + return " ".join((name or "").lower().split()) + + def _merge_entity_into_canonical(self, canonical: EntityNode, duplicate: EntityNode) -> None: + """Merge duplicate entity details into the canonical entity in place.""" + canonical.labels = sorted(set(canonical.labels) | set(duplicate.labels)) + + if not canonical.summary and duplicate.summary: + canonical.summary = duplicate.summary + elif duplicate.summary and len(duplicate.summary) > len(canonical.summary): + canonical.summary = duplicate.summary + + merged_attributes = dict(canonical.attributes) + for key, value in duplicate.attributes.items(): + if key not in merged_attributes or not merged_attributes[key]: + merged_attributes[key] = value + canonical.attributes = merged_attributes + + canonical.related_edges = self._merge_unique_dicts( + canonical.related_edges, + duplicate.related_edges, + ) + canonical.related_nodes = self._merge_unique_dicts( + canonical.related_nodes, + duplicate.related_nodes, + ) + + @staticmethod + def _merge_unique_dicts( + existing_items: List[Dict[str, Any]], + incoming_items: List[Dict[str, Any]], + ) -> List[Dict[str, Any]]: + seen = set() + merged_items: List[Dict[str, Any]] = [] + + for item in existing_items + incoming_items: + marker = json.dumps(item, sort_keys=True, ensure_ascii=False) + if marker in seen: + continue + seen.add(marker) + merged_items.append(item) + + return merged_items diff --git a/backend/app/services/executive_summary.py b/backend/app/services/executive_summary.py new file mode 100644 index 0000000..1adb0f1 --- /dev/null +++ b/backend/app/services/executive_summary.py @@ -0,0 +1,329 @@ +""" +Executive Summary Generator + +Generates a structured executive summary from a completed simulation report. +Saves both machine-readable JSON and human-readable Markdown formats. + +Usage: + generator = ExecutiveSummaryGenerator(llm_client, simulation_data) + summary = generator.generate(full_report_path) + generator.save(summary, output_dir) +""" + +import os +import json +from dataclasses import dataclass, field, asdict +from typing import List, Optional, Any +from datetime import datetime + +from ..utils.logger import get_logger + +logger = get_logger('mirofish.executive_summary') + +# ─── Prompt ─────────────────────────────────────────────────────────────────── + +EXECUTIVE_SUMMARY_SYSTEM_PROMPT = """\ +You are an expert analyst extracting a structured executive summary from a simulation report. + +CRITICAL LANGUAGE REQUIREMENT: You MUST respond ENTIRELY in English. +Do NOT use Chinese, Japanese, Korean, or any non-English language anywhere in your output. +Output ONLY valid JSON — no prose, no markdown, no code fences. +""" + +EXECUTIVE_SUMMARY_USER_PROMPT_TEMPLATE = """\ +Given the following simulation report, extract a structured executive summary as JSON. + +--- REPORT START --- +{report_excerpt} +--- REPORT END --- + +Extract the following fields and return ONLY valid JSON (no other text): +{{ + "viral_outcome": "<1 sentence describing how the story spread>", + "dominant_platform": "", + "backlash_pattern": "<1 sentence describing sentiment arc>", + "risk_level": "", + "risk_summary": "<2-3 sentences on key risks identified>", + "narrative_shift": , + "polarization_score": , + "consensus_reached": , + "winning_narrative": "", + "top_supporters": ["", ""], + "top_critics": ["", ""], + "key_neutral_voices": [""], + "key_events": [{{"round": , "event": ""}}], + "twitter_sentiment": , + "reddit_sentiment": , + "strategic_recommendations": ["", "", ""] +}} + +Rules: +- All string values must be in English +- numeric fields must be actual numbers (not strings) +- Output ONLY the JSON object — no explanation, no markdown, no code fences +""" + +# ─── Dataclass ──────────────────────────────────────────────────────────────── + +@dataclass +class ExecutiveSummary: + """Structured executive summary for a simulation report.""" + + simulation_id: str + report_id: str + generated_at: str + + # Outcome + viral_outcome: str = "" + dominant_platform: str = "" + backlash_pattern: str = "" + risk_level: str = "MEDIUM" + risk_summary: str = "" + + # Narrative metrics + narrative_shift: float = 0.0 + polarization_score: float = 0.0 + consensus_reached: bool = False + winning_narrative: Optional[str] = None + + # Key actors + top_supporters: List[str] = field(default_factory=list) + top_critics: List[str] = field(default_factory=list) + key_neutral_voices: List[str] = field(default_factory=list) + + # Timeline + key_events: List[dict] = field(default_factory=list) + + # Platform sentiment + twitter_sentiment: float = 0.0 + reddit_sentiment: float = 0.0 + + # Recommendations + strategic_recommendations: List[str] = field(default_factory=list) + + +# ─── Generator ──────────────────────────────────────────────────────────────── + +class ExecutiveSummaryGenerator: + """Generates a structured executive summary from simulation data and a full report.""" + + # Maximum characters from report to include in the LLM prompt + REPORT_EXCERPT_CHARS = 8000 + + def __init__(self, llm_client: Any, simulation_data: dict): + """ + Args: + llm_client: An LLMClient instance (must expose .chat() and/or .chat_json()) + simulation_data: Dict containing at least 'simulation_id' and 'report_id' + """ + self.llm = llm_client + self.sim_data = simulation_data + + def generate(self, full_report_path: str) -> ExecutiveSummary: + """ + Generate an ExecutiveSummary from the full report markdown file. + + Args: + full_report_path: Absolute path to full_report.md + + Returns: + ExecutiveSummary dataclass populated with extracted values + """ + if not os.path.exists(full_report_path): + raise FileNotFoundError(f"Report file not found: {full_report_path}") + + with open(full_report_path, 'r', encoding='utf-8', errors='replace') as f: + full_report = f.read() + + # Truncate to avoid excessive token usage + report_excerpt = full_report[:self.REPORT_EXCERPT_CHARS] + if len(full_report) > self.REPORT_EXCERPT_CHARS: + report_excerpt += "\n\n... [report truncated for summary extraction] ..." + + logger.info(f"Generating executive summary for report: {full_report_path}") + + prompt = EXECUTIVE_SUMMARY_USER_PROMPT_TEMPLATE.format(report_excerpt=report_excerpt) + + # Prefer chat_json if available, fall back to chat + manual parse + summary_data: dict = {} + try: + if hasattr(self.llm, 'chat_json'): + summary_data = self.llm.chat_json( + messages=[ + {"role": "system", "content": EXECUTIVE_SUMMARY_SYSTEM_PROMPT}, + {"role": "user", "content": prompt}, + ], + temperature=0.2, + ) + else: + raw = self.llm.chat( + messages=[ + {"role": "system", "content": EXECUTIVE_SUMMARY_SYSTEM_PROMPT}, + {"role": "user", "content": prompt}, + ], + temperature=0.2, + ) + # Strip possible markdown code fences + raw = raw.strip() + if raw.startswith("```"): + raw = raw.split("```", 2)[1] + if raw.startswith("json"): + raw = raw[4:] + raw = raw.rsplit("```", 1)[0] + summary_data = json.loads(raw.strip()) + except Exception as e: + logger.error(f"Executive summary LLM extraction failed: {e}") + summary_data = {} + + # Build the dataclass — use defaults for any missing/malformed fields + summary = ExecutiveSummary( + simulation_id=self.sim_data.get('simulation_id', ''), + report_id=self.sim_data.get('report_id', ''), + generated_at=datetime.now().isoformat(), + viral_outcome=str(summary_data.get('viral_outcome', '')), + dominant_platform=str(summary_data.get('dominant_platform', '')), + backlash_pattern=str(summary_data.get('backlash_pattern', '')), + risk_level=str(summary_data.get('risk_level', 'MEDIUM')).upper(), + risk_summary=str(summary_data.get('risk_summary', '')), + narrative_shift=self._safe_float(summary_data.get('narrative_shift', 0.0)), + polarization_score=self._safe_float(summary_data.get('polarization_score', 0.0)), + consensus_reached=bool(summary_data.get('consensus_reached', False)), + winning_narrative=summary_data.get('winning_narrative'), + top_supporters=self._safe_list(summary_data.get('top_supporters', [])), + top_critics=self._safe_list(summary_data.get('top_critics', [])), + key_neutral_voices=self._safe_list(summary_data.get('key_neutral_voices', [])), + key_events=self._safe_events(summary_data.get('key_events', [])), + twitter_sentiment=self._safe_float(summary_data.get('twitter_sentiment', 0.0)), + reddit_sentiment=self._safe_float(summary_data.get('reddit_sentiment', 0.0)), + strategic_recommendations=self._safe_list(summary_data.get('strategic_recommendations', [])), + ) + + logger.info( + f"Executive summary generated: risk={summary.risk_level}, " + f"polarization={summary.polarization_score:.2f}, " + f"consensus={summary.consensus_reached}" + ) + return summary + + def save(self, summary: ExecutiveSummary, output_dir: str): + """ + Save the executive summary as both JSON and Markdown. + + Args: + summary: ExecutiveSummary dataclass + output_dir: Directory to write files into (created if absent) + + Returns: + Tuple[str, str]: (json_path, md_path) + """ + os.makedirs(output_dir, exist_ok=True) + + # JSON — machine-readable + json_path = os.path.join(output_dir, 'executive_summary.json') + with open(json_path, 'w', encoding='utf-8') as f: + json.dump(asdict(summary), f, indent=2, ensure_ascii=False) + + # Markdown — human-readable card + md_path = os.path.join(output_dir, 'executive_summary.md') + with open(md_path, 'w', encoding='utf-8') as f: + f.write(self._render_markdown(summary)) + + logger.info(f"Executive summary saved: {json_path}, {md_path}") + return json_path, md_path + + # ─── Private helpers ────────────────────────────────────────────────────── + + @staticmethod + def _safe_float(value: Any) -> float: + try: + return float(value) + except (TypeError, ValueError): + return 0.0 + + @staticmethod + def _safe_list(value: Any) -> List[str]: + if isinstance(value, list): + return [str(v) for v in value] + return [] + + @staticmethod + def _safe_events(value: Any) -> List[dict]: + if not isinstance(value, list): + return [] + cleaned = [] + for item in value: + if isinstance(item, dict): + cleaned.append({ + "round": int(item.get("round", 0)), + "event": str(item.get("event", "")), + }) + return cleaned + + @staticmethod + def _render_markdown(summary: ExecutiveSummary) -> str: + """Render the executive summary as a clean Markdown card.""" + risk_emoji = {"HIGH": "🔴", "MEDIUM": "🟡", "LOW": "🟢"}.get(summary.risk_level, "⚪") + + supporters_str = ", ".join(summary.top_supporters) if summary.top_supporters else "—" + critics_str = ", ".join(summary.top_critics) if summary.top_critics else "—" + neutrals_str = ", ".join(summary.key_neutral_voices) if summary.key_neutral_voices else "—" + + events_lines = "\n".join( + f"- **Round {e['round']}:** {e['event']}" for e in summary.key_events + ) if summary.key_events else "- No key events recorded" + + recommendations_lines = "\n".join( + f"{i + 1}. {r}" for i, r in enumerate(summary.strategic_recommendations) + ) if summary.strategic_recommendations else "1. No recommendations available" + + return f"""\ +# Executive Summary — Simulation {summary.simulation_id} +*Report ID: {summary.report_id}* +*Generated: {summary.generated_at}* + +--- + +## 📊 Outcome + +**{summary.viral_outcome}** + +| Metric | Value | +|--------|-------| +| Dominant Platform | {summary.dominant_platform} | +| Backlash Pattern | {summary.backlash_pattern} | +| Risk Level | {risk_emoji} {summary.risk_level} | +| Narrative Shift | {summary.narrative_shift:+.2f} | +| Polarization Score | {summary.polarization_score:.2f} | +| Consensus Reached | {"Yes" if summary.consensus_reached else "No"} | +| Winning Narrative | {summary.winning_narrative or "None — ongoing fracture"} | +| Twitter Sentiment | {summary.twitter_sentiment:+.2f} | +| Reddit Sentiment | {summary.reddit_sentiment:+.2f} | + +--- + +## 🎭 Key Actors + +**Top Supporters:** {supporters_str} +**Top Critics:** {critics_str} +**Key Neutrals:** {neutrals_str} + +--- + +## ⚠️ Risk Assessment + +{risk_emoji} **{summary.risk_level}** + +{summary.risk_summary} + +--- + +## 📅 Key Timeline Events + +{events_lines} + +--- + +## 💡 Strategic Recommendations + +{recommendations_lines} +""" diff --git a/backend/app/services/oasis_profile_generator.py b/backend/app/services/oasis_profile_generator.py index 2555997..1b1a288 100644 --- a/backend/app/services/oasis_profile_generator.py +++ b/backend/app/services/oasis_profile_generator.py @@ -56,6 +56,16 @@ class OasisAgentProfile: source_entity_type: Optional[str] = None created_at: str = field(default_factory=lambda: datetime.now().strftime("%Y-%m-%d")) + + # --- Phase 2: Behavioral variance fields --- + # How likely the agent follows trends vs. holds personal views (0=contrarian, 1=total follower) + conformity: float = 0.5 + # How quickly the agent responds to trending topics (0=slow, 1=instant) + reactiveness: float = 0.5 + # How likely the agent changes stance when exposed to counter-arguments (0=stubborn, 1=easily swayed) + persuadability: float = 0.3 + # Random behavior variance preventing fully mechanical simulation (0=deterministic, 1=chaotic) + behavioral_noise: float = 0.1 def to_reddit_format(self) -> Dict[str, Any]: """Convert to Reddit platform format""" @@ -1138,3 +1148,432 @@ def save_profiles_to_json( logger.warning("save_profiles_to_json is deprecated, please use save_profiles method") self.save_profiles(profiles, file_path, platform) + +# ============================================================================= +# Phase 2: FatigueState — agent engagement fatigue tracking +# ============================================================================= + +@dataclass +class FatigueState: + """ + Tracks an agent's engagement fatigue over simulation rounds. + + Fatigue accumulates as the agent posts/comments and decays when the agent + is inactive. The effective activity level is the agent's base rate minus + a fatigue penalty, clamped to a minimum so agents never go fully silent. + + Usage example:: + + state = FatigueState(base_activity_level=0.8) + effective = state.get_current_activity_level(current_round=5) + # After acting: state.record_activity(current_round=5) + """ + + # Intrinsic activity level; does not change over the simulation (0.0-1.0) + base_activity_level: float = 0.5 + + # Accumulated fatigue in the current session; rises when acting, decays at rest (0.0-1.0) + fatigue_accumulation: float = 0.0 + + # Round number of the agent's most recent action (-1 = has never acted) + last_activity_round: int = -1 + + # How fast fatigue decays per idle round (tune empirically; 0.05 ≈ full recovery in ~20 rounds) + fatigue_decay_rate: float = 0.05 + + # How much fatigue accumulates per action + fatigue_per_action: float = 0.10 + + # Minimum effective activity (prevents agents going permanently silent) + min_activity_floor: float = 0.05 + + def get_current_activity_level(self, current_round: int) -> float: + """ + Compute the effective activity level for *current_round*. + + Fatigue decays linearly with idle time since the last action. + The result is clamped to [min_activity_floor, base_activity_level]. + + Args: + current_round: The simulation round number being evaluated. + + Returns: + Effective activity level in [min_activity_floor, base_activity_level]. + """ + if self.last_activity_round < 0: + # Agent has never acted; no fatigue yet + return self.base_activity_level + + rounds_idle = max(0, current_round - self.last_activity_round) + current_fatigue = max(0.0, self.fatigue_accumulation - rounds_idle * self.fatigue_decay_rate) + + effective = self.base_activity_level * (1.0 - current_fatigue) + return max(self.min_activity_floor, min(self.base_activity_level, effective)) + + def record_activity(self, current_round: int) -> None: + """ + Update fatigue state after the agent acts in *current_round*. + + Should be called once per action taken. + + Args: + current_round: The simulation round in which the action occurred. + """ + # Decay any existing fatigue first for the idle period + if self.last_activity_round >= 0: + rounds_idle = max(0, current_round - self.last_activity_round) + self.fatigue_accumulation = max( + 0.0, self.fatigue_accumulation - rounds_idle * self.fatigue_decay_rate + ) + + # Then add new fatigue for this action and mark the round + self.fatigue_accumulation = min(1.0, self.fatigue_accumulation + self.fatigue_per_action) + self.last_activity_round = current_round + + def record_rest(self, current_round: int) -> None: + """ + Passive fatigue decay when the agent skips a round. + + Args: + current_round: The simulation round being skipped. + """ + if self.last_activity_round >= 0: + rounds_idle = max(0, current_round - self.last_activity_round) + self.fatigue_accumulation = max( + 0.0, self.fatigue_accumulation - rounds_idle * self.fatigue_decay_rate + ) + + +# ============================================================================= +# Phase 2: ArchetypePersonaGenerator — synthetic personas from seed archetypes +# ============================================================================= + +# Default archetype definitions; each entry maps archetype name → behavioral defaults +_ARCHETYPE_DEFAULTS: Dict[str, Dict[str, Any]] = { + "Enthusiast": { + "conformity": 0.7, + "reactiveness": 0.8, + "persuadability": 0.5, + "behavioral_noise": 0.15, + "sentiment_bias_range": (0.4, 0.9), + "stance": "supportive", + "activity_level_range": (0.6, 0.9), + }, + "Skeptic": { + "conformity": 0.3, + "reactiveness": 0.5, + "persuadability": 0.2, + "behavioral_noise": 0.1, + "sentiment_bias_range": (-0.7, -0.2), + "stance": "opposing", + "activity_level_range": (0.4, 0.7), + }, + "Neutral": { + "conformity": 0.5, + "reactiveness": 0.4, + "persuadability": 0.5, + "behavioral_noise": 0.1, + "sentiment_bias_range": (-0.2, 0.2), + "stance": "neutral", + "activity_level_range": (0.3, 0.6), + }, + "Influencer": { + "conformity": 0.4, + "reactiveness": 0.9, + "persuadability": 0.3, + "behavioral_noise": 0.2, + "sentiment_bias_range": (0.1, 0.7), + "stance": "supportive", + "activity_level_range": (0.7, 1.0), + }, + "Contrarian": { + "conformity": 0.1, + "reactiveness": 0.6, + "persuadability": 0.1, + "behavioral_noise": 0.25, + "sentiment_bias_range": (-0.9, -0.3), + "stance": "opposing", + "activity_level_range": (0.5, 0.8), + }, +} + + +class ArchetypePersonaGenerator: + """ + Generates synthetic OASIS agent personas from a fixed set of seed archetypes. + + Two-tier persona model + ───────────────────── + * **Seed personas** (real): high-fidelity, named, derived from document entities. + * **Archetype personas** (synthetic): generated here from behavioral templates, + scaled to populate a realistic crowd around the seed population. + + Archetypes + ────────── + Enthusiast, Skeptic, Neutral, Influencer, Contrarian — each has predefined + behavioral defaults (conformity, reactiveness, persuadability, behavioral_noise) + with small random variance applied so no two synthetic agents are identical. + + Usage:: + + gen = ArchetypePersonaGenerator(seed_profiles, archetype_count=20) + synthetic = gen.generate_synthetic_personas(next_user_id_start=len(seed_profiles)) + + Args: + seed_personas: Profiles already created from document entities. + archetype_count: Total number of synthetic personas to generate (split evenly + across archetypes; remainder goes to Neutral). + archetypes: Override the list of archetype names to use. Defaults to all five. + llm_client: Optional OpenAI-compatible client for LLM-assisted name/bio + generation. If None, rule-based generation is used. + model_name: LLM model name (used only when llm_client is provided). + """ + + DEFAULT_ARCHETYPES = list(_ARCHETYPE_DEFAULTS.keys()) + + def __init__( + self, + seed_personas: List[OasisAgentProfile], + archetype_count: int = 10, + archetypes: Optional[List[str]] = None, + llm_client=None, + model_name: Optional[str] = None, + ): + self.seed_personas = seed_personas + self.archetype_count = max(1, archetype_count) + self.archetypes = archetypes or self.DEFAULT_ARCHETYPES + self.llm_client = llm_client + self.model_name = model_name + + # Validate archetype names + for name in self.archetypes: + if name not in _ARCHETYPE_DEFAULTS: + raise ValueError( + f"Unknown archetype '{name}'. " + f"Valid options: {list(_ARCHETYPE_DEFAULTS.keys())}" + ) + + # ------------------------------------------------------------------ + # Public API + # ------------------------------------------------------------------ + + def generate_synthetic_personas( + self, next_user_id_start: int = 0 + ) -> List[OasisAgentProfile]: + """ + Generate synthetic personas distributed across all configured archetypes. + + Args: + next_user_id_start: user_id to assign to the first generated persona. + Subsequent personas get sequential IDs. + + Returns: + List of synthetic OasisAgentProfile instances. + """ + num_archetypes = len(self.archetypes) + per_archetype = self.archetype_count // num_archetypes + remainder = self.archetype_count % num_archetypes + + synthetic: List[OasisAgentProfile] = [] + uid = next_user_id_start + + for i, archetype in enumerate(self.archetypes): + # Distribute remainder to earlier archetypes + count = per_archetype + (1 if i < remainder else 0) + for variant in range(count): + profile = self._generate_persona_for_archetype(archetype, variant, uid) + synthetic.append(profile) + uid += 1 + + logger.info( + f"ArchetypePersonaGenerator: generated {len(synthetic)} synthetic personas " + f"across {num_archetypes} archetypes" + ) + return synthetic + + # ------------------------------------------------------------------ + # Internal helpers + # ------------------------------------------------------------------ + + def _generate_persona_for_archetype( + self, archetype: str, variant: int, user_id: int + ) -> OasisAgentProfile: + """ + Create a single synthetic persona for *archetype*. + + Behavioral fields are drawn from the archetype template with small + Gaussian noise applied so agents in the same archetype differ. + + Args: + archetype: One of the archetype names in _ARCHETYPE_DEFAULTS. + variant: Zero-based index within the archetype batch (used for + seeding name/username variance). + user_id: Integer user_id for the new profile. + + Returns: + A fully populated OasisAgentProfile. + """ + defaults = _ARCHETYPE_DEFAULTS[archetype] + + # --- Behavioral variance with small noise --- + def jitter(value: float, noise: float = 0.08) -> float: + return float(max(0.0, min(1.0, value + random.gauss(0, noise)))) + + conformity = jitter(defaults["conformity"]) + reactiveness = jitter(defaults["reactiveness"]) + persuadability = jitter(defaults["persuadability"]) + behavioral_noise = jitter(defaults["behavioral_noise"], noise=0.04) + + # Sentiment bias sampled uniformly within archetype range + s_lo, s_hi = defaults["sentiment_bias_range"] + sentiment_bias = round(random.uniform(s_lo, s_hi), 3) + + # Activity level + a_lo, a_hi = defaults["activity_level_range"] + activity_level = round(random.uniform(a_lo, a_hi), 3) + + # Social graph metrics scaled loosely by archetype + follower_multiplier = 3.0 if archetype == "Influencer" else 1.0 + follower_count = int(random.randint(100, 1000) * follower_multiplier) + friend_count = random.randint(50, 500) + statuses_count = random.randint(100, 3000) + + # Archetype-flavoured name/bio + name, username, bio, persona_text = self._generate_identity(archetype, variant, user_id) + + profile = OasisAgentProfile( + user_id=user_id, + user_name=username, + name=name, + bio=bio, + persona=persona_text, + karma=random.randint(200, 8000), + friend_count=friend_count, + follower_count=follower_count, + statuses_count=statuses_count, + age=random.randint(18, 60), + gender=random.choice(["male", "female"]), + mbti=random.choice(OasisProfileGenerator.MBTI_TYPES), + country=random.choice(OasisProfileGenerator.COUNTRIES), + profession=self._archetype_profession(archetype), + interested_topics=self._archetype_topics(archetype), + source_entity_uuid=None, + source_entity_type=f"synthetic:{archetype}", + # Phase 2 behavioral variance + conformity=round(conformity, 3), + reactiveness=round(reactiveness, 3), + persuadability=round(persuadability, 3), + behavioral_noise=round(behavioral_noise, 3), + ) + return profile + + def _generate_identity( + self, archetype: str, variant: int, user_id: int + ) -> tuple: + """ + Return (name, username, bio, persona) for a synthetic persona. + + Uses LLM when a client is available; falls back to rule-based templates. + """ + if self.llm_client and self.model_name: + return self._llm_identity(archetype, variant) + return self._rule_based_identity(archetype, variant, user_id) + + def _llm_identity(self, archetype: str, variant: int) -> tuple: + """Generate identity fields via LLM.""" + prompt = ( + f"Generate a realistic social media persona for a '{archetype}' archetype.\n" + f"This is variant #{variant + 1}.\n" + f"Return JSON with keys: name, username, bio (≤160 chars), persona (≤400 chars).\n" + f"The persona should reflect the '{archetype}' behavioral pattern authentically." + ) + try: + resp = self.llm_client.chat.completions.create( + model=self.model_name, + messages=[ + {"role": "system", "content": "You generate realistic social media user profiles. Return pure JSON only."}, + {"role": "user", "content": prompt}, + ], + response_format={"type": "json_object"}, + temperature=0.85, + ) + data = json.loads(resp.choices[0].message.content) + name = data.get("name", f"User_{archetype}_{variant}") + username = data.get("username", f"user_{archetype.lower()}_{variant}_{random.randint(100,999)}") + bio = data.get("bio", f"A {archetype.lower()} observer of current events.") + persona = data.get("persona", f"Synthetic {archetype} persona.") + return name, username, bio, persona + except Exception as e: + logger.warning(f"LLM identity generation failed for archetype {archetype}: {e}") + return self._rule_based_identity(archetype, variant, random.randint(1000, 9999)) + + def _rule_based_identity(self, archetype: str, variant: int, user_id: int) -> tuple: + """Deterministic rule-based identity for a synthetic persona.""" + first_names = [ + "Alex", "Jordan", "Morgan", "Taylor", "Casey", + "Riley", "Cameron", "Avery", "Quinn", "Blake", + ] + last_names = [ + "Smith", "Chen", "Rivera", "Patel", "Kim", + "Brown", "Davis", "Wilson", "Martinez", "Lee", + ] + first = first_names[user_id % len(first_names)] + last = last_names[(user_id // len(first_names)) % len(last_names)] + name = f"{first} {last}" + username = f"{first.lower()}_{last.lower()}_{random.randint(10, 99)}" + + bio_templates = { + "Enthusiast": f"Passionate about innovation and progress. Always excited to discuss new ideas. #{archetype}", + "Skeptic": f"I question everything. Critical thinker. Not buying the hype.", + "Neutral": f"Just here to read and stay informed. Opinions are my own.", + "Influencer": f"Content creator | thought leader | {random.randint(1, 50)}K followers. Let's connect!", + "Contrarian": f"If everyone agrees, someone is wrong. Usually them.", + } + persona_templates = { + "Enthusiast": ( + f"{name} is an avid follower of current trends and jumps at the chance to engage with new ideas. " + f"They share content frequently, tend to amplify positive narratives, and often recruit others to their views." + ), + "Skeptic": ( + f"{name} approaches every claim with healthy scepticism. They ask probing questions, " + f"challenge official narratives, and rarely change their mind without hard evidence." + ), + "Neutral": ( + f"{name} reads widely but posts sparingly. They prefer to understand all sides before " + f"forming an opinion and rarely take public stances on divisive issues." + ), + "Influencer": ( + f"{name} has a significant online following and knows how to craft messages that resonate. " + f"They are highly reactive to trends and their posts often kickstart broader conversations." + ), + "Contrarian": ( + f"{name} instinctively pushes back against consensus. They relish pointing out flaws in " + f"popular positions and are unafraid of controversy, though this sometimes alienates allies." + ), + } + bio = bio_templates.get(archetype, f"Synthetic {archetype} persona #{variant}") + persona = persona_templates.get(archetype, f"{name} is a synthetic {archetype.lower()} agent.") + return name, username, bio, persona + + @staticmethod + def _archetype_profession(archetype: str) -> str: + professions = { + "Enthusiast": "Tech enthusiast / early adopter", + "Skeptic": "Journalist / researcher", + "Neutral": "General public", + "Influencer": "Content creator / blogger", + "Contrarian": "Independent commentator", + } + return professions.get(archetype, "Social media user") + + @staticmethod + def _archetype_topics(archetype: str) -> List[str]: + topics = { + "Enthusiast": ["Technology", "Innovation", "Science", "Startups"], + "Skeptic": ["Politics", "Media criticism", "Fact-checking", "Research"], + "Neutral": ["General news", "Local events", "Lifestyle"], + "Influencer": ["Trending topics", "Social issues", "Pop culture", "Tech"], + "Contrarian": ["Politics", "Economics", "Philosophy", "Conspiracy theories"], + } + return topics.get(archetype, ["General"]) + diff --git a/backend/app/services/outcome_scorer.py b/backend/app/services/outcome_scorer.py new file mode 100644 index 0000000..1171bd7 --- /dev/null +++ b/backend/app/services/outcome_scorer.py @@ -0,0 +1,142 @@ +""" +Simulation outcome scoring primitives. +""" + +from collections import Counter, defaultdict +from dataclasses import dataclass, field +import math +from typing import Any, Dict, List, Tuple + + +@dataclass +class SimulationOutcome: + """Core quantitative metrics describing a simulation run.""" + + total_posts: int = 0 + total_engagement: int = 0 + average_post_reach: float = 0.0 + viral_posts_count: int = 0 + sentiment_standard_deviation: float = 0.0 + platform_distribution: Dict[str, float] = field(default_factory=dict) + top_influencers: List[Tuple[str, float]] = field(default_factory=list) + peak_activity_round: int = 0 + + +class OutcomeScorer: + """Compute additive core metrics from simulation post-like records.""" + + def score_simulation( + self, + posts: List[Any], + viral_threshold: int = 1000, + influencer_limit: int = 5, + ) -> SimulationOutcome: + total_posts = len(posts) + total_engagement = self._sum_engagement(posts) + average_post_reach = self._compute_average_reach(posts) + + return SimulationOutcome( + total_posts=total_posts, + total_engagement=total_engagement, + average_post_reach=average_post_reach, + viral_posts_count=sum( + 1 for post in posts if self._engagement_value(post) >= viral_threshold + ), + sentiment_standard_deviation=self._compute_sentiment_standard_deviation(posts), + platform_distribution=self._compute_platform_distribution(posts), + top_influencers=self._find_top_influencers(posts, influencer_limit=influencer_limit), + peak_activity_round=self._find_peak_activity_round(posts), + ) + + @staticmethod + def _value(source: Any, key: str, default: Any = 0) -> Any: + if isinstance(source, dict): + return source.get(key, default) + return getattr(source, key, default) + + def _engagement_value(self, post: Any) -> int: + direct_value = self._value(post, "engagement", None) + if direct_value is not None: + return int(direct_value) + + components = ( + self._value(post, "likes", 0), + self._value(post, "upvotes", 0), + self._value(post, "comments", 0), + self._value(post, "replies", 0), + self._value(post, "shares", 0), + self._value(post, "retweets", 0), + self._value(post, "reposts", 0), + ) + return int(sum(int(component or 0) for component in components)) + + def _sum_engagement(self, posts: List[Any]) -> int: + return sum(self._engagement_value(post) for post in posts) + + def _compute_average_reach(self, posts: List[Any]) -> float: + if not posts: + return 0.0 + + total_reach = 0.0 + for post in posts: + reach = self._value(post, "reach", None) + if reach is None: + reach = self._value(post, "impressions", self._engagement_value(post)) + total_reach += float(reach or 0.0) + + return total_reach / len(posts) + + def _compute_sentiment_standard_deviation(self, posts: List[Any]) -> float: + sentiments = [ + float(self._value(post, "sentiment")) + for post in posts + if self._value(post, "sentiment", None) is not None + ] + if len(sentiments) < 2: + return 0.0 + + mean = sum(sentiments) / len(sentiments) + variance = sum((value - mean) ** 2 for value in sentiments) / len(sentiments) + return math.sqrt(variance) + + def _compute_platform_distribution(self, posts: List[Any]) -> Dict[str, float]: + if not posts: + return {} + + platform_counts = Counter( + str(self._value(post, "platform", "unknown")).lower() + for post in posts + ) + total = sum(platform_counts.values()) + return { + platform: count / total + for platform, count in platform_counts.items() + } + + def _find_top_influencers( + self, + posts: List[Any], + influencer_limit: int = 5, + ) -> List[Tuple[str, float]]: + influence_scores: Dict[str, float] = defaultdict(float) + + for post in posts: + author_name = self._value(post, "author_name", None) + if author_name is None: + author_name = self._value(post, "agent_name", self._value(post, "author_id", "unknown")) + influence_scores[str(author_name)] += float(self._engagement_value(post)) + + ranked = sorted( + influence_scores.items(), + key=lambda item: item[1], + reverse=True, + ) + return ranked[:influencer_limit] + + def _find_peak_activity_round(self, posts: List[Any]) -> int: + if not posts: + return 0 + + rounds = Counter(int(self._value(post, "created_round", 0) or 0) for post in posts) + peak_round, _ = max(rounds.items(), key=lambda item: (item[1], -item[0])) + return peak_round diff --git a/backend/app/services/platform_simulators.py b/backend/app/services/platform_simulators.py new file mode 100644 index 0000000..8eb2ced --- /dev/null +++ b/backend/app/services/platform_simulators.py @@ -0,0 +1,95 @@ +""" +Platform-specific simulation scoring helpers. +""" + +from abc import ABC, abstractmethod +from typing import Any + + +class PlatformSimulator(ABC): + """Base scoring interface for platform-specific feed behavior.""" + + platform_name = "generic" + + @abstractmethod + def score_post_visibility(self, post: Any, agent: Any, round_num: int) -> float: + """Return a normalized visibility score for a candidate post.""" + + @abstractmethod + def compute_engagement_multiplier(self, post: Any, round_num: int) -> float: + """Return a platform-shaped engagement multiplier for a post.""" + + @staticmethod + def _value(source: Any, key: str, default: float = 0.0) -> float: + if isinstance(source, dict): + value = source.get(key, default) + else: + value = getattr(source, key, default) + + try: + return float(value) + except (TypeError, ValueError): + return float(default) + + @staticmethod + def _clamp(value: float, minimum: float = 0.0, maximum: float = 1.0) -> float: + return max(minimum, min(maximum, value)) + + +class TwitterSimulator(PlatformSimulator): + """Twitter/X-like behavior: fast decay, strong engagement amplification.""" + + platform_name = "twitter" + + def score_post_visibility(self, post: Any, agent: Any, round_num: int) -> float: + created_round = self._value(post, "created_round", round_num) + likes = self._value(post, "likes") + reposts = self._value(post, "retweets", self._value(post, "reposts")) + replies = self._value(post, "comments", self._value(post, "replies")) + follower_count = self._value( + agent, + "followers", + self._value(agent, "follower_count", self._value(agent, "influence_weight")), + ) + + age_penalty = max(0.1, 1.0 - max(0.0, round_num - created_round) * 0.12) + engagement_boost = min(2.5, 1.0 + (likes + reposts + replies) * 0.015) + follower_boost = min(2.0, 1.0 + follower_count * 0.0002) + + raw_score = age_penalty * engagement_boost * follower_boost + return self._clamp(raw_score / 3.5) + + def compute_engagement_multiplier(self, post: Any, round_num: int) -> float: + del round_num + sentiment = abs(self._value(post, "sentiment")) + controversy = self._value(post, "controversy", sentiment) + return max(0.75, 1.0 + (sentiment * 0.35) + (controversy * 0.25)) + + +class RedditSimulator(PlatformSimulator): + """Reddit-like behavior: slower decay, quality and community weighting.""" + + platform_name = "reddit" + + def score_post_visibility(self, post: Any, agent: Any, round_num: int) -> float: + del agent + created_round = self._value(post, "created_round", round_num) + upvotes = self._value(post, "upvotes", self._value(post, "likes")) + comments = self._value(post, "comments") + upvote_ratio = self._value(post, "upvote_ratio", 0.75) + subreddit_size = self._value(post, "subreddit_subscribers", self._value(post, "community_size")) + + age_penalty = max(0.2, 1.0 - max(0.0, round_num - created_round) * 0.04) + quality_boost = min(2.0, (max(upvote_ratio, 0.1) ** 1.25) + (upvotes * 0.002)) + discussion_boost = min(1.5, 1.0 + comments * 0.01) + community_boost = min(1.5, 1.0 + subreddit_size / 1000000.0) + + raw_score = age_penalty * quality_boost * discussion_boost * community_boost + return self._clamp(raw_score / 4.5) + + def compute_engagement_multiplier(self, post: Any, round_num: int) -> float: + created_round = self._value(post, "created_round", round_num) + age = max(0.0, round_num - created_round) + discussion_depth = self._value(post, "comments") + durability = max(0.7, 1.1 - age * 0.02) + return max(0.65, durability + min(0.4, discussion_depth * 0.01)) diff --git a/backend/app/services/report_agent.py b/backend/app/services/report_agent.py index 0f8a4d1..4423b01 100644 --- a/backend/app/services/report_agent.py +++ b/backend/app/services/report_agent.py @@ -551,6 +551,11 @@ def to_dict(self) -> Dict[str, Any]: PLAN_SYSTEM_PROMPT = """\ You are an expert in writing "future prediction reports" with a "god's eye view" of the simulated world - you can gain insights into the behavior, statements, and interactions of every agent in the simulation. +CRITICAL LANGUAGE REQUIREMENT: You MUST write ENTIRELY in English. +Do NOT use Chinese, Japanese, Korean, or any non-English language anywhere in your output. +Even if the source documents or simulation data contain non-English text, your report output must be 100% English. +All titles, section headers, analysis, summaries, and conclusions must be in English. + [Core Concept] We built a simulated world and injected specific "simulation requirements" as variables into it. The evolution result of the simulated world is a prediction of what might happen in the future. What you're observing is not "experimental data" but a "rehearsal of the future". @@ -614,6 +619,11 @@ def to_dict(self) -> Dict[str, Any]: SECTION_SYSTEM_PROMPT_TEMPLATE = """\ You are an expert in writing "future prediction reports" and are writing a section of the report. +CRITICAL LANGUAGE REQUIREMENT: You MUST write ENTIRELY in English. +Do NOT use Chinese, Japanese, Korean, or any non-English language anywhere in your output. +Even if the source documents or simulation data contain non-English text, you must translate everything to English. +All section content, analysis, quotes (translated), and conclusions must be in English. + Report Title: {report_title} Report Summary: {report_summary} Prediction Scenario (Simulation Requirement): {simulation_requirement} @@ -1534,6 +1544,95 @@ def _generate_section_react( return final_answer + def _validate_section_output(self, content: str, section_title: str) -> str: + """ + Validate and clean section output for encoding quality. + + - Strips null bytes and most control characters + - Normalizes line endings + - Detects excessive non-ASCII content ratio; if > 20%, logs a warning + (re-generation is not triggered here to avoid breaking the ReACT flow, + but the caller can inspect the return value and decide) + + Args: + content: Raw section content from LLM + section_title: Section title (used for log messages) + + Returns: + Cleaned section content string + """ + # Check non-ASCII ratio + non_ascii_ratio = sum(1 for c in content if ord(c) > 127) / max(len(content), 1) + + if non_ascii_ratio > 0.2: + logger.warning( + f"Section '{section_title}' has {non_ascii_ratio:.1%} non-ASCII content — " + "possible language corruption. Consider re-generation." + ) + + # Strip null bytes + content = content.replace('\x00', '') + + # Strip most control characters (keep tab \x09, LF \x0a, CR \x0d) + content = re.sub(r'[\x01-\x08\x0b\x0c\x0e-\x1f\x7f]', '', content) + + # Normalize line endings to LF + content = content.replace('\r\n', '\n').replace('\r', '\n') + + return content + + def _run_report_sanity_check(self, report_path: str) -> Dict[str, Any]: + """ + Run a post-generation sanity check on the assembled full report. + + Checks: + - Minimum section length (< 200 chars → warning) + - Unicode replacement characters (encoding corruption) + - High non-ASCII ratio (> 10% → language contamination warning) + + Args: + report_path: Path to full_report.md + + Returns: + dict with keys: issues (List[str]), section_count (int), total_chars (int) + """ + if not os.path.exists(report_path): + return {"issues": [f"Report file not found: {report_path}"], "section_count": 0, "total_chars": 0} + + with open(report_path, 'r', encoding='utf-8', errors='replace') as f: + content = f.read() + + issues: List[str] = [] + + # Check minimum section length + sections = content.split('## ') + for section in sections[1:]: + if len(section.strip()) < 200: + issues.append(f"Section too short (< 200 chars): {section[:50].strip()}...") + + # Check for encoding artifacts + if '\ufffd' in content: + issues.append("Unicode replacement characters (\\ufffd) found — encoding corruption likely") + + # Check for mixed language / non-ASCII ratio + total_chars = len(content) + non_ascii_ratio = sum(1 for c in content if ord(c) > 127) / max(total_chars, 1) + if non_ascii_ratio > 0.1: + issues.append(f"High non-ASCII ratio: {non_ascii_ratio:.1%} — possible language contamination") + + result = { + "issues": issues, + "section_count": len(sections) - 1, + "total_chars": total_chars, + } + + if issues: + logger.warning(f"Report sanity check found {len(issues)} issue(s): {issues}") + else: + logger.info(f"Report sanity check passed — {result['section_count']} sections, {total_chars} chars") + + return result + def generate_report( self, progress_callback: Optional[Callable[[str, int, str], None]] = None, @@ -1671,6 +1770,9 @@ def generate_report( section_index=section_num ) + # Validate and clean section output + section_content = self._validate_section_output(section_content, section.title) + section.content = section_content generated_sections.append(f"## {section.title}\n\n{section_content}") @@ -1712,6 +1814,12 @@ def generate_report( report.markdown_content = ReportManager.assemble_full_report(report_id, outline) report.status = ReportStatus.COMPLETED report.completed_at = datetime.now().isoformat() + + # Run post-generation sanity check + full_report_path = ReportManager._get_report_markdown_path(report_id) + sanity_result = self._run_report_sanity_check(full_report_path) + if sanity_result.get("issues"): + logger.warning(f"Sanity check issues for report {report_id}: {sanity_result['issues']}") # Calculate total elapsed time total_time_seconds = (datetime.now() - start_time).total_seconds() diff --git a/backend/app/services/simulation_config_generator.py b/backend/app/services/simulation_config_generator.py index f04b4be..a24ff2c 100644 --- a/backend/app/services/simulation_config_generator.py +++ b/backend/app/services/simulation_config_generator.py @@ -983,5 +983,326 @@ def _generate_agent_config_by_rule(self, entity: EntityNode) -> Dict[str, Any]: "stance": "neutral", "influence_weight": 1.0 } - + + +# ============================================================================= +# Phase 2: MultiFrameInitializer — competing narrative frames at t=0 +# ============================================================================= + +import random as _random + +@dataclass +class NarrativeFrame: + """ + Represents one particular framing of the story at simulation start. + + Example frames for "AI startup raises $100M funding": + - positive: "Huge vote of confidence in AI; good for innovation" + - skeptical: "Another AI bubble; what will really change?" + - concerned: "Centralised AI power; bad for society" + - technical: "Interesting approach, but does it scale?" + """ + frame_id: str # Unique identifier, e.g. "positive", "skeptical" + label: str # Human-readable label + description: str # 2-3 sentence explanation of this framing + initial_sentiment: float # -1.0 (very negative) to 1.0 (very positive) + talking_points: List[str] = field(default_factory=list) + agent_ids: List[int] = field(default_factory=list) # Agents assigned to this frame + + +class MultiFrameInitializer: + """ + Creates N competing narrative frames at t=0 and distributes agents to them. + + Each frame represents a distinct interpretive lens through which a sub-group + of agents views the story. Frames are generated via LLM (when a client is + provided) or via heuristic templates; agents are assigned based on their + ``sentiment_bias`` field from ``AgentActivityConfig``. + + Usage:: + + init = MultiFrameInitializer(story_content, num_frames=4, llm_client=client, model_name="...") + frames = init.frames + assignments = init.distribute_agents(agent_configs) + + Args: + story_content: The raw document text or simulation-requirement string + that describes the narrative being simulated. + num_frames: Number of competing frames to generate (2–8 recommended). + llm_client: Optional OpenAI-compatible client. When None, rule-based + frame generation is used. + model_name: LLM model name string. + """ + + # Sentiment thresholds for frame assignment + POSITIVE_THRESHOLD = 0.3 + NEGATIVE_THRESHOLD = -0.3 + + def __init__( + self, + story_content: str, + num_frames: int = 4, + llm_client=None, + model_name: Optional[str] = None, + ): + if num_frames < 2: + raise ValueError("num_frames must be at least 2 to create competing narratives") + + self.story_content = story_content + self.num_frames = num_frames + self.llm_client = llm_client + self.model_name = model_name + + self.frames: List[NarrativeFrame] = self._generate_frames() + + # ------------------------------------------------------------------ + # Public API + # ------------------------------------------------------------------ + + def distribute_agents( + self, agent_configs: List[AgentActivityConfig] + ) -> Dict[str, List[int]]: + """ + Assign each agent to the frame that best matches their ``sentiment_bias``. + + Positive-biased agents → most positive frame. + Negative-biased agents → most negative frame. + Neutral agents are split evenly across middle frames. + + Also populates ``frame.agent_ids`` on each NarrativeFrame for convenience. + + Args: + agent_configs: List of AgentActivityConfig produced by the generator. + + Returns: + Dict mapping frame_id → list of agent_ids assigned to that frame. + """ + # Sort frames by initial_sentiment ascending so index maps cleanly + sorted_frames = sorted(self.frames, key=lambda f: f.initial_sentiment) + frame_assignments: Dict[str, List[int]] = {f.frame_id: [] for f in sorted_frames} + + # Clear any prior assignments on frame objects + for frame in sorted_frames: + frame.agent_ids = [] + + num_frames = len(sorted_frames) + + for agent in agent_configs: + bias = getattr(agent, "sentiment_bias", 0.0) + frame = self._find_best_frame(bias, sorted_frames) + frame_assignments[frame.frame_id].append(agent.agent_id) + frame.agent_ids.append(agent.agent_id) + + # Log distribution + for frame in sorted_frames: + logger.info( + f"MultiFrameInitializer: frame '{frame.frame_id}' " + f"({frame.initial_sentiment:+.2f}) → {len(frame.agent_ids)} agents" + ) + + return frame_assignments + + # ------------------------------------------------------------------ + # Frame generation + # ------------------------------------------------------------------ + + def _generate_frames(self) -> List[NarrativeFrame]: + """Generate NarrativeFrame objects via LLM or rule-based fallback.""" + if self.llm_client and self.model_name: + try: + return self._generate_frames_llm() + except Exception as e: + logger.warning(f"MultiFrameInitializer: LLM frame generation failed ({e}), using rule-based fallback") + return self._generate_frames_rule_based() + + def _generate_frames_llm(self) -> List[NarrativeFrame]: + """Use LLM to produce N alternative narrative framings of the story.""" + story_snippet = self.story_content[:2000] + + prompt = ( + f"Given this story:\n\n{story_snippet}\n\n" + f"Generate {self.num_frames} distinct ways different people could frame or interpret this story.\n" + f"For each frame provide:\n" + f" - frame_id: short snake_case identifier (e.g. positive, skeptical, concerned)\n" + f" - label: concise human-readable label\n" + f" - description: 2-3 sentences describing this interpretive lens\n" + f" - initial_sentiment: float from -1.0 (very negative) to 1.0 (very positive)\n" + f" - talking_points: list of 3-5 representative talking points\n\n" + f"Ensure frames cover a spectrum from positive to negative.\n" + f"Return JSON: {{\"frames\": [ ... ]}}" + ) + + response = self.llm_client.chat.completions.create( + model=self.model_name, + messages=[ + { + "role": "system", + "content": ( + "You are a social narrative analyst. " + "Return pure JSON only. No markdown, no explanation." + ), + }, + {"role": "user", "content": prompt}, + ], + response_format={"type": "json_object"}, + temperature=0.8, + ) + + raw = json.loads(response.choices[0].message.content) + frames_data = raw.get("frames", []) + + frames: List[NarrativeFrame] = [] + for item in frames_data[: self.num_frames]: + frames.append( + NarrativeFrame( + frame_id=item.get("frame_id", f"frame_{len(frames)}"), + label=item.get("label", item.get("frame_id", "Frame")), + description=item.get("description", ""), + initial_sentiment=float(item.get("initial_sentiment", 0.0)), + talking_points=item.get("talking_points", []), + ) + ) + + if not frames: + raise ValueError("LLM returned no valid frames") + + logger.info(f"MultiFrameInitializer: generated {len(frames)} frames via LLM") + return frames + + def _generate_frames_rule_based(self) -> List[NarrativeFrame]: + """ + Heuristic frame generation covering a sentiment spectrum. + + Uses a fixed set of archetypes and trims/extends to exactly num_frames. + """ + template_pool: List[Dict[str, Any]] = [ + { + "frame_id": "positive", + "label": "Positive / Optimistic", + "description": ( + "This group interprets the story as fundamentally good news. " + "They highlight benefits, progress, and opportunities, and " + "actively share upbeat takes." + ), + "initial_sentiment": 0.7, + "talking_points": [ + "This is a significant step forward.", + "The benefits far outweigh any concerns.", + "We should celebrate this progress.", + ], + }, + { + "frame_id": "supportive", + "label": "Supportive / Constructive", + "description": ( + "Broadly in favour but with nuanced caveats. " + "They want to see it succeed and offer constructive feedback." + ), + "initial_sentiment": 0.4, + "talking_points": [ + "Good direction overall, though execution matters.", + "Let's make sure the details are done right.", + "Cautious optimism — promising so far.", + ], + }, + { + "frame_id": "neutral", + "label": "Neutral / Wait-and-See", + "description": ( + "Observers who neither endorse nor oppose. " + "They track developments without strong sentiment." + ), + "initial_sentiment": 0.0, + "talking_points": [ + "Withholding judgment until more information is available.", + "Both sides have valid points worth considering.", + "Too early to draw conclusions.", + ], + }, + { + "frame_id": "skeptical", + "label": "Skeptical / Critical", + "description": ( + "This group questions the narrative and scrutinises the claims " + "being made. They raise concerns and highlight potential downsides." + ), + "initial_sentiment": -0.35, + "talking_points": [ + "The evidence doesn't fully support the optimism.", + "We've seen this before — be cautious.", + "Important questions remain unanswered.", + ], + }, + { + "frame_id": "concerned", + "label": "Concerned / Alarmed", + "description": ( + "Deeply worried about the implications. " + "They frame the story as a warning sign or threat and call for action." + ), + "initial_sentiment": -0.6, + "talking_points": [ + "This sets a dangerous precedent.", + "The risks to society are being ignored.", + "We need stronger oversight immediately.", + ], + }, + { + "frame_id": "hostile", + "label": "Hostile / Oppositional", + "description": ( + "Strongly against the narrative; view it as harmful or dishonest. " + "They actively try to counter or undermine it." + ), + "initial_sentiment": -0.85, + "talking_points": [ + "This is misleading and should be challenged.", + "Follow the money — whose interests are being served?", + "We must push back against this narrative.", + ], + }, + ] + + # Select num_frames spanning the full sentiment range + # Always include positive and hostile (or the most extreme available) + if self.num_frames >= len(template_pool): + selected = list(template_pool) + else: + # Evenly sample indices across the sorted pool + step = (len(template_pool) - 1) / max(self.num_frames - 1, 1) + indices = [round(i * step) for i in range(self.num_frames)] + selected = [template_pool[idx] for idx in indices] + + frames = [ + NarrativeFrame( + frame_id=t["frame_id"], + label=t["label"], + description=t["description"], + initial_sentiment=t["initial_sentiment"], + talking_points=t["talking_points"], + ) + for t in selected + ] + + logger.info(f"MultiFrameInitializer: generated {len(frames)} frames via rule-based fallback") + return frames + + # ------------------------------------------------------------------ + # Agent assignment logic + # ------------------------------------------------------------------ + + def _find_best_frame( + self, sentiment_bias: float, sorted_frames: List[NarrativeFrame] + ) -> NarrativeFrame: + """ + Return the frame whose initial_sentiment is closest to *sentiment_bias*. + + Uses simple minimum-distance matching so agents cluster naturally + around the frame that matches their disposition. + """ + best = min( + sorted_frames, + key=lambda f: abs(f.initial_sentiment - sentiment_bias), + ) + return best diff --git a/docker-compose.yml b/docker-compose.yml index 8e6cc1b..83441c5 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,8 +1,6 @@ services: mirofish: build: . - # Or use pre-built image (when available): - # image: ghcr.io/nikmcfly/mirofish-offline:latest container_name: mirofish-offline env_file: - .env @@ -15,15 +13,13 @@ services: depends_on: neo4j: condition: service_healthy - ollama: - condition: service_started neo4j: image: neo4j:5.15-community container_name: mirofish-neo4j ports: - - "7474:7474" # Neo4j Browser - - "7687:7687" # Bolt protocol + - "7474:7474" + - "7687:7687" environment: - NEO4J_AUTH=${NEO4J_USER:-neo4j}/${NEO4J_PASSWORD:-mirofish} - NEO4J_PLUGINS=["apoc"] @@ -40,23 +36,6 @@ services: retries: 10 start_period: 30s - ollama: - image: ollama/ollama:latest - container_name: mirofish-ollama - ports: - - "11434:11434" - volumes: - - ollama_data:/root/.ollama - restart: unless-stopped - deploy: - resources: - reservations: - devices: - - driver: nvidia - count: all - capabilities: [gpu] - volumes: neo4j_data: neo4j_logs: - ollama_data: