Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 42 additions & 11 deletions .env.example
Original file line number Diff line number Diff line change
@@ -1,16 +1,47 @@
# LLM API配置(支持 OpenAI SDK 格式的任意 LLM API)
# 推荐使用阿里百炼平台qwen-plus模型:https://bailian.console.aliyun.com/
# 注意消耗较大,可先进行小于40轮的模拟尝试
# ===== LLM Provider =====
# Supported values: auto | openai | anthropic | github-copilot | ollama | claude
# 'auto' detects the provider from the model name and available env vars.
LLM_PROVIDER=auto

# ===== Option 1: Anthropic Claude (recommended for quality) =====
# Get your key at: https://console.anthropic.com/settings/keys
# LLM_API_KEY=sk-ant-api03-...
# LLM_MODEL_NAME=claude-sonnet-4-20250514
# Notes:
# - LLM_BASE_URL is not needed; the SDK uses the official endpoint automatically.
# - Model names starting with 'claude-' auto-select the anthropic provider.

# ===== Option 2: Alibaba Bailian / Qwen (recommended for cost) =====
# Get your key at: https://bailian.console.aliyun.com/
# Note: high simulation rounds consume significant tokens — start with <40 rounds.
LLM_API_KEY=your_api_key_here
LLM_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1
LLM_MODEL_NAME=qwen-plus

# ===== ZEP记忆图谱配置 =====
# 每月免费额度即可支撑简单使用:https://app.getzep.com/
ZEP_API_KEY=your_zep_api_key_here
# ===== Option 3: GitHub Copilot (no separate API key needed) =====
# Uses your existing Copilot subscription. Rate limits are lower than direct APIs.
# LLM_PROVIDER=github-copilot
# GITHUB_TOKEN=ghp_your_github_pat_here # or use GH_TOKEN / COPILOT_GITHUB_TOKEN
# LLM_MODEL_NAME=gpt-4o

# ===== Option 4: Ollama (fully local, no API key) =====
# Requires Ollama running locally: https://ollama.ai
# LLM_PROVIDER=ollama
# LLM_BASE_URL=http://localhost:11434/v1
# LLM_MODEL_NAME=llama3.2

# ===== 加速 LLM 配置(可选)=====
# 注意如果不使用加速配置,env文件中就不要出现下面的配置项
LLM_BOOST_API_KEY=your_api_key_here
LLM_BOOST_BASE_URL=your_base_url_here
LLM_BOOST_MODEL_NAME=your_model_name_here
# ===== Option 5: Claude CLI subprocess =====
# Uses the `claude` CLI — no API key needed beyond your Claude account login.
# LLM_PROVIDER=claude
# CLAUDE_CLI_PATH=claude
# LLM_MODEL_NAME=claude-sonnet-4-20250514

# ===== Optional: Boost LLM (faster/cheaper secondary model) =====
# If unset, the primary LLM is used for all calls.
# LLM_BOOST_API_KEY=your_api_key_here
# LLM_BOOST_BASE_URL=your_base_url_here
# LLM_BOOST_MODEL_NAME=your_model_name_here

# ===== Zep memory graph =====
# Free tier is sufficient for basic use: https://app.getzep.com/
ZEP_API_KEY=your_zep_api_key_here
66 changes: 32 additions & 34 deletions backend/app/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
"""
MiroFish Backend - Flask应用工厂
MiroFish Backend — Flask application factory.
"""

import os
import warnings

# 抑制 multiprocessing resource_tracker 的警告(来自第三方库如 transformers
# 需要在所有其他导入之前设置
# Suppress multiprocessing resource_tracker warnings from third-party libs (e.g. transformers).
# Must be set before all other imports.
warnings.filterwarnings("ignore", message=".*resource_tracker.*")

from flask import Flask, request
Expand All @@ -17,64 +17,62 @@


def create_app(config_class=Config):
"""Flask应用工厂函数"""
"""Create and configure the Flask application."""
app = Flask(__name__)
app.config.from_object(config_class)
# 设置JSON编码:确保中文直接显示(而不是 \uXXXX 格式)
# Flask >= 2.3 使用 app.json.ensure_ascii,旧版本使用 JSON_AS_ASCII 配置

# Ensure non-ASCII characters (e.g. Chinese) render as-is in JSON responses
# Flask >= 2.3 uses app.json.ensure_ascii; older versions use JSON_AS_ASCII config key
if hasattr(app, 'json') and hasattr(app.json, 'ensure_ascii'):
app.json.ensure_ascii = False

# 设置日志

logger = setup_logger('mirofish')
# 只在 reloader 子进程中打印启动信息(避免 debug 模式下打印两次)

# Only log startup info once — avoid duplicate output in debug/reloader mode
is_reloader_process = os.environ.get('WERKZEUG_RUN_MAIN') == 'true'
debug_mode = app.config.get('DEBUG', False)
should_log_startup = not debug_mode or is_reloader_process

if should_log_startup:
logger.info("=" * 50)
logger.info("MiroFish Backend 启动中...")
logger.info("MiroFish Backend starting...")
logger.info("=" * 50)
# 启用CORS

# Enable CORS for all API routes
CORS(app, resources={r"/api/*": {"origins": "*"}})
# 注册模拟进程清理函数(确保服务器关闭时终止所有模拟进程)

# Register simulation process cleanup on server shutdown
from .services.simulation_runner import SimulationRunner
SimulationRunner.register_cleanup()
if should_log_startup:
logger.info("已注册模拟进程清理函数")
# 请求日志中间件
logger.info("Simulation process cleanup registered")

# Request/response logging middleware
@app.before_request
def log_request():
logger = get_logger('mirofish.request')
logger.debug(f"请求: {request.method} {request.path}")
req_logger = get_logger('mirofish.request')
req_logger.debug(f"Request: {request.method} {request.path}")
if request.content_type and 'json' in request.content_type:
logger.debug(f"请求体: {request.get_json(silent=True)}")
req_logger.debug(f"Body: {request.get_json(silent=True)}")

@app.after_request
def log_response(response):
logger = get_logger('mirofish.request')
logger.debug(f"响应: {response.status_code}")
req_logger = get_logger('mirofish.request')
req_logger.debug(f"Response: {response.status_code}")
return response
# 注册蓝图

# Register blueprints
from .api import graph_bp, simulation_bp, report_bp
app.register_blueprint(graph_bp, url_prefix='/api/graph')
app.register_blueprint(simulation_bp, url_prefix='/api/simulation')
app.register_blueprint(report_bp, url_prefix='/api/report')
# 健康检查

# Health check
@app.route('/health')
def health():
return {'status': 'ok', 'service': 'MiroFish Backend'}

if should_log_startup:
logger.info("MiroFish Backend 启动完成")

return app
logger.info("MiroFish Backend ready")

return app
2 changes: 1 addition & 1 deletion backend/app/api/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
API路由模块
API route modules.
"""

from flask import Blueprint
Expand Down
164 changes: 124 additions & 40 deletions backend/app/config.py
Original file line number Diff line number Diff line change
@@ -1,75 +1,159 @@
"""
配置管理
统一从项目根目录的 .env 文件加载配置
Configuration management.
Loads settings from the .env file at the project root (MiroFish/.env).
"""

import os
import shutil
from dotenv import load_dotenv

# 加载项目根目录的 .env 文件
# 路径: MiroFish/.env (相对于 backend/app/config.py)
project_root_env = os.path.join(os.path.dirname(__file__), '../../.env')
# Load .env from project root — path relative to backend/app/config.py
_project_root_env = os.path.join(os.path.dirname(__file__), '../../.env')

if os.path.exists(project_root_env):
load_dotenv(project_root_env, override=True)
if os.path.exists(_project_root_env):
load_dotenv(_project_root_env, override=True)
else:
# 如果根目录没有 .env,尝试加载环境变量(用于生产环境)
# No .env at root — fall back to environment variables (production mode)
load_dotenv(override=True)


class Config:
"""Flask配置类"""
# Flask配置
"""Flask configuration class."""

# ── Flask ────────────────────────────────────────────────────────────────
SECRET_KEY = os.environ.get('SECRET_KEY', 'mirofish-secret-key')
DEBUG = os.environ.get('FLASK_DEBUG', 'True').lower() == 'true'
# JSON配置 - 禁用ASCII转义,让中文直接显示(而不是 \uXXXX 格式)

# Disable ASCII escaping so non-ASCII characters display correctly in JSON
JSON_AS_ASCII = False

# LLM配置(统一使用OpenAI格式)

# ── LLM provider selection ────────────────────────────────────────────────
# Supported values:
# auto (default) — infer from LLM_MODEL_NAME and available env vars
# openai — OpenAI SDK (works with any OpenAI-compatible API)
# anthropic — Anthropic Claude SDK (direct API, requires LLM_API_KEY)
# github-copilot — GitHub Copilot via token exchange (requires GITHUB_TOKEN)
# ollama — Local Ollama via OpenAI-compatible endpoint
# claude — Claude CLI subprocess (requires `claude` CLI installed)
LLM_PROVIDER = os.environ.get('LLM_PROVIDER', 'auto')

# ── LLM credentials & model ──────────────────────────────────────────────
LLM_API_KEY = os.environ.get('LLM_API_KEY')
LLM_BASE_URL = os.environ.get('LLM_BASE_URL', 'https://api.openai.com/v1')
LLM_MODEL_NAME = os.environ.get('LLM_MODEL_NAME', 'gpt-4o-mini')

# Zep配置

# Optional: faster/cheaper secondary LLM for non-critical calls
LLM_BOOST_API_KEY = os.environ.get('LLM_BOOST_API_KEY')
LLM_BOOST_BASE_URL = os.environ.get('LLM_BOOST_BASE_URL')
LLM_BOOST_MODEL_NAME = os.environ.get('LLM_BOOST_MODEL_NAME')

# ── Provider-specific settings ───────────────────────────────────────────
# Claude CLI: path to the `claude` executable (used when LLM_PROVIDER=claude)
CLAUDE_CLI_PATH = os.environ.get('CLAUDE_CLI_PATH', 'claude')

# GitHub Copilot: token env vars (checked in priority order)
# COPILOT_GITHUB_TOKEN > GH_TOKEN > GITHUB_TOKEN
GITHUB_TOKEN = (
os.environ.get('COPILOT_GITHUB_TOKEN')
or os.environ.get('GH_TOKEN')
or os.environ.get('GITHUB_TOKEN')
)

@classmethod
def get_resolved_provider(cls) -> str:
"""
Resolve the effective LLM provider.

In 'auto' mode the provider is inferred:
1. Model name starts with 'claude-' → anthropic
2. GitHub token present, no API key → github-copilot
3. Otherwise → openai
"""
provider = (cls.LLM_PROVIDER or 'auto').lower()
if provider != 'auto':
return provider

model = (cls.LLM_MODEL_NAME or '').lower()
if model.startswith('claude-'):
return 'anthropic'
if cls.GITHUB_TOKEN and not cls.LLM_API_KEY:
return 'github-copilot'
return 'openai'

# ── Zep memory graph ─────────────────────────────────────────────────────
ZEP_API_KEY = os.environ.get('ZEP_API_KEY')
# 文件上传配置
MAX_CONTENT_LENGTH = 50 * 1024 * 1024 # 50MB

# ── File uploads ─────────────────────────────────────────────────────────
MAX_CONTENT_LENGTH = 50 * 1024 * 1024 # 50 MB
UPLOAD_FOLDER = os.path.join(os.path.dirname(__file__), '../uploads')
ALLOWED_EXTENSIONS = {'pdf', 'md', 'txt', 'markdown'}
# 文本处理配置
DEFAULT_CHUNK_SIZE = 500 # 默认切块大小
DEFAULT_CHUNK_OVERLAP = 50 # 默认重叠大小
# OASIS模拟配置

# ── Text chunking ────────────────────────────────────────────────────────
DEFAULT_CHUNK_SIZE = 500
DEFAULT_CHUNK_OVERLAP = 50

# ── OASIS simulation ─────────────────────────────────────────────────────
OASIS_DEFAULT_MAX_ROUNDS = int(os.environ.get('OASIS_DEFAULT_MAX_ROUNDS', '10'))
OASIS_SIMULATION_DATA_DIR = os.path.join(os.path.dirname(__file__), '../uploads/simulations')

# OASIS平台可用动作配置
OASIS_SIMULATION_DATA_DIR = os.path.join(
os.path.dirname(__file__), '../uploads/simulations'
)

OASIS_TWITTER_ACTIONS = [
'CREATE_POST', 'LIKE_POST', 'REPOST', 'FOLLOW', 'DO_NOTHING', 'QUOTE_POST'
'CREATE_POST', 'LIKE_POST', 'REPOST', 'FOLLOW', 'DO_NOTHING', 'QUOTE_POST',
]
OASIS_REDDIT_ACTIONS = [
'LIKE_POST', 'DISLIKE_POST', 'CREATE_POST', 'CREATE_COMMENT',
'LIKE_COMMENT', 'DISLIKE_COMMENT', 'SEARCH_POSTS', 'SEARCH_USER',
'TREND', 'REFRESH', 'DO_NOTHING', 'FOLLOW', 'MUTE'
'TREND', 'REFRESH', 'DO_NOTHING', 'FOLLOW', 'MUTE',
]
# Report Agent配置

# ── Report Agent ─────────────────────────────────────────────────────────
REPORT_AGENT_MAX_TOOL_CALLS = int(os.environ.get('REPORT_AGENT_MAX_TOOL_CALLS', '5'))
REPORT_AGENT_MAX_REFLECTION_ROUNDS = int(os.environ.get('REPORT_AGENT_MAX_REFLECTION_ROUNDS', '2'))
REPORT_AGENT_MAX_REFLECTION_ROUNDS = int(
os.environ.get('REPORT_AGENT_MAX_REFLECTION_ROUNDS', '2')
)
REPORT_AGENT_TEMPERATURE = float(os.environ.get('REPORT_AGENT_TEMPERATURE', '0.5'))

@classmethod
def validate(cls):
"""验证必要配置"""
"""Validate required configuration and return a list of error strings."""
import logging
log = logging.getLogger('mirofish.config')
errors = []
if not cls.LLM_API_KEY:
errors.append("LLM_API_KEY 未配置")
provider = cls.get_resolved_provider()

if provider == 'anthropic':
if not cls.LLM_API_KEY:
errors.append(
"LLM_API_KEY is required for the anthropic provider. "
"Get a key at https://console.anthropic.com/settings/keys"
)
elif provider == 'openai':
if not cls.LLM_API_KEY:
errors.append("LLM_API_KEY is not configured (required for openai provider)")
elif provider == 'github-copilot':
if not cls.GITHUB_TOKEN:
errors.append(
"github-copilot provider requires a GitHub token. "
"Set COPILOT_GITHUB_TOKEN, GH_TOKEN, or GITHUB_TOKEN in .env."
)
elif provider == 'ollama':
if cls.LLM_BASE_URL and '11434' not in cls.LLM_BASE_URL:
log.warning(
"LLM_BASE_URL (%s) doesn't include port 11434; "
"make sure Ollama is reachable at that address.",
cls.LLM_BASE_URL,
)
elif provider == 'claude':
if shutil.which(cls.CLAUDE_CLI_PATH) is None:
errors.append(
f"Claude CLI not found at '{cls.CLAUDE_CLI_PATH}'. "
"Install it (https://claude.ai/download) or set CLAUDE_CLI_PATH in .env."
)

if not cls.ZEP_API_KEY:
errors.append("ZEP_API_KEY 未配置")
errors.append(
"ZEP_API_KEY is not configured. "
"Get a free key at https://app.getzep.com/"
)
return errors

Loading