diff --git a/backend/app/api/graph.py b/backend/app/api/graph.py index 12ff1ba2d..3b9c17c4a 100644 --- a/backend/app/api/graph.py +++ b/backend/app/api/graph.py @@ -1,6 +1,6 @@ """ -图谱相关API路由 -采用项目上下文机制,服务端持久化状态 +Graph-related API routes +Uses project context mechanism for server-side state persistence """ import os @@ -18,33 +18,33 @@ from ..models.task import TaskManager, TaskStatus from ..models.project import ProjectManager, ProjectStatus -# 获取日志器 +# Get logger logger = get_logger('mirofish.api') def allowed_file(filename: str) -> bool: - """检查文件扩展名是否允许""" + """Check if file extension is allowed""" if not filename or '.' not in filename: return False ext = os.path.splitext(filename)[1].lower().lstrip('.') return ext in Config.ALLOWED_EXTENSIONS -# ============== 项目管理接口 ============== +# ============== Project Management Interfaces ============== @graph_bp.route('/project/', methods=['GET']) def get_project(project_id: str): """ - 获取项目详情 + Get project details """ project = ProjectManager.get_project(project_id) - + if not project: return jsonify({ "success": False, - "error": f"项目不存在: {project_id}" + "error": f"Project does not exist: {project_id}" }), 404 - + return jsonify({ "success": True, "data": project.to_dict() @@ -54,11 +54,11 @@ def get_project(project_id: str): @graph_bp.route('/project/list', methods=['GET']) def list_projects(): """ - 列出所有项目 + List all projects """ limit = request.args.get('limit', 50, type=int) projects = ProjectManager.list_projects(limit=limit) - + return jsonify({ "success": True, "data": [p.to_dict() for p in projects], @@ -69,69 +69,69 @@ def list_projects(): @graph_bp.route('/project/', methods=['DELETE']) def delete_project(project_id: str): """ - 删除项目 + Delete project """ success = ProjectManager.delete_project(project_id) - + if not success: return jsonify({ "success": False, - "error": f"项目不存在或删除失败: {project_id}" + "error": f"Project does not exist or delete failed: {project_id}" }), 404 - + return jsonify({ "success": True, - "message": f"项目已删除: {project_id}" + "message": f"Project deleted: {project_id}" }) @graph_bp.route('/project//reset', methods=['POST']) def reset_project(project_id: str): """ - 重置项目状态(用于重新构建图谱) + Reset project status (for rebuilding graph) """ project = ProjectManager.get_project(project_id) - + if not project: return jsonify({ "success": False, - "error": f"项目不存在: {project_id}" + "error": f"Project does not exist: {project_id}" }), 404 - - # 重置到本体已生成状态 + + # Reset to ontology generated status if project.ontology: project.status = ProjectStatus.ONTOLOGY_GENERATED else: project.status = ProjectStatus.CREATED - + project.graph_id = None project.graph_build_task_id = None project.error = None ProjectManager.save_project(project) - + return jsonify({ "success": True, - "message": f"项目已重置: {project_id}", + "message": f"Project reset: {project_id}", "data": project.to_dict() }) -# ============== 接口1:上传文件并生成本体 ============== +# ============== Interface 1: Upload file and generate ontology ============== @graph_bp.route('/ontology/generate', methods=['POST']) def generate_ontology(): """ - 接口1:上传文件,分析生成本体定义 - - 请求方式:multipart/form-data - - 参数: - files: 上传的文件(PDF/MD/TXT),可多个 - simulation_requirement: 模拟需求描述(必填) - project_name: 项目名称(可选) - additional_context: 额外说明(可选) - - 返回: + Interface 1: Upload file, analyze and generate ontology definition + + Request method: multipart/form-data + + Parameters: + files: Uploaded files (PDF/MD/TXT), multiple allowed + simulation_requirement: Simulation requirement description (required) + project_name: Project name (optional) + additional_context: Additional context (optional) + + Returns: { "success": true, "data": { @@ -147,84 +147,84 @@ def generate_ontology(): } """ try: - logger.info("=== 开始生成本体定义 ===") - - # 获取参数 + logger.info("=== Starting ontology generation ===") + + # Get parameters simulation_requirement = request.form.get('simulation_requirement', '') project_name = request.form.get('project_name', 'Unnamed Project') additional_context = request.form.get('additional_context', '') - - logger.debug(f"项目名称: {project_name}") - logger.debug(f"模拟需求: {simulation_requirement[:100]}...") - + + logger.debug(f"Project name: {project_name}") + logger.debug(f"Simulation requirement: {simulation_requirement[:100]}...") + if not simulation_requirement: return jsonify({ "success": False, - "error": "请提供模拟需求描述 (simulation_requirement)" + "error": "Please provide simulation requirement description (simulation_requirement)" }), 400 - - # 获取上传的文件 + + # Get uploaded files uploaded_files = request.files.getlist('files') if not uploaded_files or all(not f.filename for f in uploaded_files): return jsonify({ "success": False, - "error": "请至少上传一个文档文件" + "error": "Please upload at least one document file" }), 400 - - # 创建项目 + + # Create project project = ProjectManager.create_project(name=project_name) project.simulation_requirement = simulation_requirement - logger.info(f"创建项目: {project.project_id}") - - # 保存文件并提取文本 + logger.info(f"Created project: {project.project_id}") + + # Save files and extract text document_texts = [] all_text = "" - + for file in uploaded_files: if file and file.filename and allowed_file(file.filename): - # 保存文件到项目目录 + # Save file to project directory file_info = ProjectManager.save_file_to_project( - project.project_id, - file, + project.project_id, + file, file.filename ) project.files.append({ "filename": file_info["original_filename"], "size": file_info["size"] }) - - # 提取文本 + + # Extract text text = FileParser.extract_text(file_info["path"]) text = TextProcessor.preprocess_text(text) document_texts.append(text) all_text += f"\n\n=== {file_info['original_filename']} ===\n{text}" - + if not document_texts: ProjectManager.delete_project(project.project_id) return jsonify({ "success": False, - "error": "没有成功处理任何文档,请检查文件格式" + "error": "No documents were successfully processed, please check file formats" }), 400 - - # 保存提取的文本 + + # Save extracted text project.total_text_length = len(all_text) ProjectManager.save_extracted_text(project.project_id, all_text) - logger.info(f"文本提取完成,共 {len(all_text)} 字符") - - # 生成本体 - logger.info("调用 LLM 生成本体定义...") + logger.info(f"Text extraction complete, total {len(all_text)} characters") + + # Generate ontology + logger.info("Calling LLM to generate ontology definition...") generator = OntologyGenerator() ontology = generator.generate( document_texts=document_texts, simulation_requirement=simulation_requirement, additional_context=additional_context if additional_context else None ) - - # 保存本体到项目 + + # Save ontology to project entity_count = len(ontology.get("entity_types", [])) edge_count = len(ontology.get("edge_types", [])) - logger.info(f"本体生成完成: {entity_count} 个实体类型, {edge_count} 个关系类型") - + logger.info(f"Ontology generation complete: {entity_count} entity types, {edge_count} edge types") + project.ontology = { "entity_types": ontology.get("entity_types", []), "edge_types": ontology.get("edge_types", []) @@ -232,8 +232,8 @@ def generate_ontology(): project.analysis_summary = ontology.get("analysis_summary", "") project.status = ProjectStatus.ONTOLOGY_GENERATED ProjectManager.save_project(project) - logger.info(f"=== 本体生成完成 === 项目ID: {project.project_id}") - + logger.info(f"=== Ontology generation complete === Project ID: {project.project_id}") + return jsonify({ "success": True, "data": { @@ -245,7 +245,7 @@ def generate_ontology(): "total_text_length": project.total_text_length } }) - + except Exception as e: return jsonify({ "success": False, @@ -254,170 +254,170 @@ def generate_ontology(): }), 500 -# ============== 接口2:构建图谱 ============== +# ============== Interface 2: Build graph ============== @graph_bp.route('/build', methods=['POST']) def build_graph(): """ - 接口2:根据project_id构建图谱 - - 请求(JSON): + Interface 2: Build graph based on project_id + + Request (JSON): { - "project_id": "proj_xxxx", // 必填,来自接口1 - "graph_name": "图谱名称", // 可选 - "chunk_size": 500, // 可选,默认500 - "chunk_overlap": 50 // 可选,默认50 + "project_id": "proj_xxxx", // Required, from interface 1 + "graph_name": "Graph name", // Optional + "chunk_size": 500, // Optional, default 500 + "chunk_overlap": 50 // Optional, default 50 } - - 返回: + + Returns: { "success": true, "data": { "project_id": "proj_xxxx", "task_id": "task_xxxx", - "message": "图谱构建任务已启动" + "message": "Graph build task started" } } """ try: - logger.info("=== 开始构建图谱 ===") - - # 检查配置 + logger.info("=== Starting graph build ===") + + # Check configuration errors = [] if not Config.ZEP_API_KEY: - errors.append("ZEP_API_KEY未配置") + errors.append("ZEP_API_KEY not configured") if errors: - logger.error(f"配置错误: {errors}") + logger.error(f"Configuration error: {errors}") return jsonify({ "success": False, - "error": "配置错误: " + "; ".join(errors) + "error": "Configuration error: " + "; ".join(errors) }), 500 - - # 解析请求 + + # Parse request data = request.get_json() or {} project_id = data.get('project_id') - logger.debug(f"请求参数: project_id={project_id}") - + logger.debug(f"Request parameters: project_id={project_id}") + if not project_id: return jsonify({ "success": False, - "error": "请提供 project_id" + "error": "Please provide project_id" }), 400 - - # 获取项目 + + # Get project project = ProjectManager.get_project(project_id) if not project: return jsonify({ "success": False, - "error": f"项目不存在: {project_id}" + "error": f"Project does not exist: {project_id}" }), 404 - - # 检查项目状态 - force = data.get('force', False) # 强制重新构建 - + + # Check project status + force = data.get('force', False) # Force rebuild + if project.status == ProjectStatus.CREATED: return jsonify({ "success": False, - "error": "项目尚未生成本体,请先调用 /ontology/generate" + "error": "Project has not generated ontology yet, please call /ontology/generate first" }), 400 - + if project.status == ProjectStatus.GRAPH_BUILDING and not force: return jsonify({ "success": False, - "error": "图谱正在构建中,请勿重复提交。如需强制重建,请添加 force: true", + "error": "Graph is being built, please do not resubmit. To force rebuild, add force: true", "task_id": project.graph_build_task_id }), 400 - - # 如果强制重建,重置状态 + + # If force rebuild, reset status if force and project.status in [ProjectStatus.GRAPH_BUILDING, ProjectStatus.FAILED, ProjectStatus.GRAPH_COMPLETED]: project.status = ProjectStatus.ONTOLOGY_GENERATED project.graph_id = None project.graph_build_task_id = None project.error = None - - # 获取配置 + + # Get configuration graph_name = data.get('graph_name', project.name or 'MiroFish Graph') chunk_size = data.get('chunk_size', project.chunk_size or Config.DEFAULT_CHUNK_SIZE) chunk_overlap = data.get('chunk_overlap', project.chunk_overlap or Config.DEFAULT_CHUNK_OVERLAP) - - # 更新项目配置 + + # Update project configuration project.chunk_size = chunk_size project.chunk_overlap = chunk_overlap - - # 获取提取的文本 + + # Get extracted text text = ProjectManager.get_extracted_text(project_id) if not text: return jsonify({ "success": False, - "error": "未找到提取的文本内容" + "error": "Extracted text content not found" }), 400 - - # 获取本体 + + # Get ontology ontology = project.ontology if not ontology: return jsonify({ "success": False, - "error": "未找到本体定义" + "error": "Ontology definition not found" }), 400 - - # 创建异步任务 + + # Create async task task_manager = TaskManager() - task_id = task_manager.create_task(f"构建图谱: {graph_name}") - logger.info(f"创建图谱构建任务: task_id={task_id}, project_id={project_id}") - - # 更新项目状态 + task_id = task_manager.create_task(f"Building graph: {graph_name}") + logger.info(f"Created graph build task: task_id={task_id}, project_id={project_id}") + + # Update project status project.status = ProjectStatus.GRAPH_BUILDING project.graph_build_task_id = task_id ProjectManager.save_project(project) - - # 启动后台任务 + + # Start background task def build_task(): build_logger = get_logger('mirofish.build') try: - build_logger.info(f"[{task_id}] 开始构建图谱...") + build_logger.info(f"[{task_id}] Starting graph build...") task_manager.update_task( - task_id, + task_id, status=TaskStatus.PROCESSING, - message="初始化图谱构建服务..." + message="Initializing graph build service..." ) - - # 创建图谱构建服务 + + # Create graph build service builder = GraphBuilderService(api_key=Config.ZEP_API_KEY) - - # 分块 + + # Chunk text task_manager.update_task( task_id, - message="文本分块中...", + message="Splitting text into chunks...", progress=5 ) chunks = TextProcessor.split_text( - text, - chunk_size=chunk_size, + text, + chunk_size=chunk_size, overlap=chunk_overlap ) total_chunks = len(chunks) - - # 创建图谱 + + # Create graph task_manager.update_task( task_id, - message="创建Zep图谱...", + message="Creating Zep graph...", progress=10 ) graph_id = builder.create_graph(name=graph_name) - - # 更新项目的graph_id + + # Update project's graph_id project.graph_id = graph_id ProjectManager.save_project(project) - - # 设置本体 + + # Set ontology task_manager.update_task( task_id, - message="设置本体定义...", + message="Setting ontology definition...", progress=15 ) builder.set_ontology(graph_id, ontology) - - # 添加文本(progress_callback 签名是 (msg, progress_ratio)) + + # Add text (progress_callback signature is (msg, progress_ratio)) def add_progress_callback(msg, progress_ratio): progress = 15 + int(progress_ratio * 40) # 15% - 55% task_manager.update_task( @@ -425,27 +425,27 @@ def add_progress_callback(msg, progress_ratio): message=msg, progress=progress ) - + task_manager.update_task( task_id, - message=f"开始添加 {total_chunks} 个文本块...", + message=f"Starting to add {total_chunks} text chunks...", progress=15 ) - + episode_uuids = builder.add_text_batches( - graph_id, + graph_id, chunks, batch_size=3, progress_callback=add_progress_callback ) - - # 等待Zep处理完成(查询每个episode的processed状态) + + # Wait for Zep to process (query each episode's processed status) task_manager.update_task( task_id, - message="等待Zep处理数据...", + message="Waiting for Zep to process data...", progress=55 ) - + def wait_progress_callback(msg, progress_ratio): progress = 55 + int(progress_ratio * 35) # 55% - 90% task_manager.update_task( @@ -453,30 +453,30 @@ def wait_progress_callback(msg, progress_ratio): message=msg, progress=progress ) - + builder._wait_for_episodes(episode_uuids, wait_progress_callback) - - # 获取图谱数据 + + # Get graph data task_manager.update_task( task_id, - message="获取图谱数据...", + message="Getting graph data...", progress=95 ) graph_data = builder.get_graph_data(graph_id) - - # 更新项目状态 + + # Update project status project.status = ProjectStatus.GRAPH_COMPLETED ProjectManager.save_project(project) - + node_count = graph_data.get("node_count", 0) edge_count = graph_data.get("edge_count", 0) - build_logger.info(f"[{task_id}] 图谱构建完成: graph_id={graph_id}, 节点={node_count}, 边={edge_count}") - - # 完成 + build_logger.info(f"[{task_id}] Graph build complete: graph_id={graph_id}, nodes={node_count}, edges={edge_count}") + + # Complete task_manager.update_task( task_id, status=TaskStatus.COMPLETED, - message="图谱构建完成", + message="Graph build complete", progress=100, result={ "project_id": project_id, @@ -486,36 +486,36 @@ def wait_progress_callback(msg, progress_ratio): "chunk_count": total_chunks } ) - + except Exception as e: - # 更新项目状态为失败 - build_logger.error(f"[{task_id}] 图谱构建失败: {str(e)}") + # Update project status to failed + build_logger.error(f"[{task_id}] Graph build failed: {str(e)}") build_logger.debug(traceback.format_exc()) - + project.status = ProjectStatus.FAILED project.error = str(e) ProjectManager.save_project(project) - + task_manager.update_task( task_id, status=TaskStatus.FAILED, - message=f"构建失败: {str(e)}", + message=f"Build failed: {str(e)}", error=traceback.format_exc() ) - - # 启动后台线程 + + # Start background thread thread = threading.Thread(target=build_task, daemon=True) thread.start() - + return jsonify({ "success": True, "data": { "project_id": project_id, "task_id": task_id, - "message": "图谱构建任务已启动,请通过 /task/{task_id} 查询进度" + "message": "Graph build task started, query progress via /task/{task_id}" } }) - + except Exception as e: return jsonify({ "success": False, @@ -524,21 +524,21 @@ def wait_progress_callback(msg, progress_ratio): }), 500 -# ============== 任务查询接口 ============== +# ============== Task Query Interface ============== @graph_bp.route('/task/', methods=['GET']) def get_task(task_id: str): """ - 查询任务状态 + Query task status """ task = TaskManager().get_task(task_id) - + if not task: return jsonify({ "success": False, - "error": f"任务不存在: {task_id}" + "error": f"Task does not exist: {task_id}" }), 404 - + return jsonify({ "success": True, "data": task.to_dict() @@ -548,10 +548,10 @@ def get_task(task_id: str): @graph_bp.route('/tasks', methods=['GET']) def list_tasks(): """ - 列出所有任务 + List all tasks """ tasks = TaskManager().list_tasks() - + return jsonify({ "success": True, "data": [t.to_dict() for t in tasks], @@ -559,28 +559,28 @@ def list_tasks(): }) -# ============== 图谱数据接口 ============== +# ============== Graph Data Interface ============== @graph_bp.route('/data/', methods=['GET']) def get_graph_data(graph_id: str): """ - 获取图谱数据(节点和边) + Get graph data (nodes and edges) """ try: if not Config.ZEP_API_KEY: return jsonify({ "success": False, - "error": "ZEP_API_KEY未配置" + "error": "ZEP_API_KEY not configured" }), 500 - + builder = GraphBuilderService(api_key=Config.ZEP_API_KEY) graph_data = builder.get_graph_data(graph_id) - + return jsonify({ "success": True, "data": graph_data }) - + except Exception as e: return jsonify({ "success": False, @@ -592,26 +592,26 @@ def get_graph_data(graph_id: str): @graph_bp.route('/delete/', methods=['DELETE']) def delete_graph(graph_id: str): """ - 删除Zep图谱 + Delete Zep graph """ try: if not Config.ZEP_API_KEY: return jsonify({ "success": False, - "error": "ZEP_API_KEY未配置" + "error": "ZEP_API_KEY not configured" }), 500 - + builder = GraphBuilderService(api_key=Config.ZEP_API_KEY) builder.delete_graph(graph_id) - + return jsonify({ "success": True, - "message": f"图谱已删除: {graph_id}" + "message": f"Graph deleted: {graph_id}" }) - + except Exception as e: return jsonify({ "success": False, "error": str(e), "traceback": traceback.format_exc() - }), 500 + }), 500 \ No newline at end of file diff --git a/backend/app/api/report.py b/backend/app/api/report.py index e05c73c39..60b456a15 100644 --- a/backend/app/api/report.py +++ b/backend/app/api/report.py @@ -1,6 +1,6 @@ """ -Report API路由 -提供模拟报告生成、获取、对话等接口 +Report API routes +Provides interfaces for simulation report generation, retrieval, and chat """ import os @@ -19,56 +19,56 @@ logger = get_logger('mirofish.api.report') -# ============== 报告生成接口 ============== +# ============== Report Generation Interface ============== @report_bp.route('/generate', methods=['POST']) def generate_report(): """ - 生成模拟分析报告(异步任务) - - 这是一个耗时操作,接口会立即返回task_id, - 使用 GET /api/report/generate/status 查询进度 - - 请求(JSON): + Generate simulation analysis report (async task) + + This is a time-consuming operation, the interface will return task_id immediately, + use GET /api/report/generate/status to query progress + + Request (JSON): { - "simulation_id": "sim_xxxx", // 必填,模拟ID - "force_regenerate": false // 可选,强制重新生成 + "simulation_id": "sim_xxxx", // Required, simulation ID + "force_regenerate": false // Optional, force regenerate } - - 返回: + + Returns: { "success": true, "data": { "simulation_id": "sim_xxxx", "task_id": "task_xxxx", "status": "generating", - "message": "报告生成任务已启动" + "message": "Report generation task started" } } """ try: data = request.get_json() or {} - + simulation_id = data.get('simulation_id') if not simulation_id: return jsonify({ "success": False, - "error": "请提供 simulation_id" + "error": "Please provide simulation_id" }), 400 - + force_regenerate = data.get('force_regenerate', False) - - # 获取模拟信息 + + # Get simulation info manager = SimulationManager() state = manager.get_simulation(simulation_id) - + if not state: return jsonify({ "success": False, - "error": f"模拟不存在: {simulation_id}" + "error": f"Simulation does not exist: {simulation_id}" }), 404 - - # 检查是否已有报告 + + # Check if report already exists if not force_regenerate: existing_report = ReportManager.get_report_by_simulation(simulation_id) if existing_report and existing_report.status == ReportStatus.COMPLETED: @@ -78,38 +78,38 @@ def generate_report(): "simulation_id": simulation_id, "report_id": existing_report.report_id, "status": "completed", - "message": "报告已存在", + "message": "Report already exists", "already_generated": True } }) - - # 获取项目信息 + + # Get project info project = ProjectManager.get_project(state.project_id) if not project: return jsonify({ "success": False, - "error": f"项目不存在: {state.project_id}" + "error": f"Project does not exist: {state.project_id}" }), 404 - + graph_id = state.graph_id or project.graph_id if not graph_id: return jsonify({ "success": False, - "error": "缺少图谱ID,请确保已构建图谱" + "error": "Missing graph ID, please ensure graph has been built" }), 400 - + simulation_requirement = project.simulation_requirement if not simulation_requirement: return jsonify({ "success": False, - "error": "缺少模拟需求描述" + "error": "Missing simulation requirement description" }), 400 - - # 提前生成 report_id,以便立即返回给前端 + + # Generate report_id in advance so it can be returned to frontend immediately import uuid report_id = f"report_{uuid.uuid4().hex[:12]}" - - # 创建异步任务 + + # Create async task task_manager = TaskManager() task_id = task_manager.create_task( task_type="report_generate", @@ -119,41 +119,41 @@ def generate_report(): "report_id": report_id } ) - - # 定义后台任务 + + # Define background task def run_generate(): try: task_manager.update_task( task_id, status=TaskStatus.PROCESSING, progress=0, - message="初始化Report Agent..." + message="Initializing Report Agent..." ) - - # 创建Report Agent + + # Create Report Agent agent = ReportAgent( graph_id=graph_id, simulation_id=simulation_id, simulation_requirement=simulation_requirement ) - - # 进度回调 + + # Progress callback def progress_callback(stage, progress, message): task_manager.update_task( task_id, progress=progress, message=f"[{stage}] {message}" ) - - # 生成报告(传入预先生成的 report_id) + + # Generate report (pass pre-generated report_id) report = agent.generate_report( progress_callback=progress_callback, report_id=report_id ) - - # 保存报告 + + # Save report ReportManager.save_report(report) - + if report.status == ReportStatus.COMPLETED: task_manager.complete_task( task_id, @@ -164,16 +164,16 @@ def progress_callback(stage, progress, message): } ) else: - task_manager.fail_task(task_id, report.error or "报告生成失败") - + task_manager.fail_task(task_id, report.error or "Report generation failed") + except Exception as e: - logger.error(f"报告生成失败: {str(e)}") + logger.error(f"Report generation failed: {str(e)}") task_manager.fail_task(task_id, str(e)) - - # 启动后台线程 + + # Start background thread thread = threading.Thread(target=run_generate, daemon=True) thread.start() - + return jsonify({ "success": True, "data": { @@ -181,13 +181,13 @@ def progress_callback(stage, progress, message): "report_id": report_id, "task_id": task_id, "status": "generating", - "message": "报告生成任务已启动,请通过 /api/report/generate/status 查询进度", + "message": "Report generation task started, query progress via /api/report/generate/status", "already_generated": False } }) - + except Exception as e: - logger.error(f"启动报告生成任务失败: {str(e)}") + logger.error(f"Failed to start report generation task: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -198,15 +198,15 @@ def progress_callback(stage, progress, message): @report_bp.route('/generate/status', methods=['POST']) def get_generate_status(): """ - 查询报告生成任务进度 - - 请求(JSON): + Query report generation task progress + + Request (JSON): { - "task_id": "task_xxxx", // 可选,generate返回的task_id - "simulation_id": "sim_xxxx" // 可选,模拟ID + "task_id": "task_xxxx", // Optional, task_id returned by generate + "simulation_id": "sim_xxxx" // Optional, simulation ID } - - 返回: + + Returns: { "success": true, "data": { @@ -219,11 +219,11 @@ def get_generate_status(): """ try: data = request.get_json() or {} - + task_id = data.get('task_id') simulation_id = data.get('simulation_id') - - # 如果提供了simulation_id,先检查是否已有完成的报告 + + # If simulation_id provided, first check if report already exists if simulation_id: existing_report = ReportManager.get_report_by_simulation(simulation_id) if existing_report and existing_report.status == ReportStatus.COMPLETED: @@ -234,47 +234,47 @@ def get_generate_status(): "report_id": existing_report.report_id, "status": "completed", "progress": 100, - "message": "报告已生成", + "message": "Report already generated", "already_completed": True } }) - + if not task_id: return jsonify({ "success": False, - "error": "请提供 task_id 或 simulation_id" + "error": "Please provide task_id or simulation_id" }), 400 - + task_manager = TaskManager() task = task_manager.get_task(task_id) - + if not task: return jsonify({ "success": False, - "error": f"任务不存在: {task_id}" + "error": f"Task does not exist: {task_id}" }), 404 - + return jsonify({ "success": True, "data": task.to_dict() }) - + except Exception as e: - logger.error(f"查询任务状态失败: {str(e)}") + logger.error(f"Failed to query task status: {str(e)}") return jsonify({ "success": False, "error": str(e) }), 500 -# ============== 报告获取接口 ============== +# ============== Report Retrieval Interface ============== @report_bp.route('/', methods=['GET']) def get_report(report_id: str): """ - 获取报告详情 - - 返回: + Get report details + + Returns: { "success": true, "data": { @@ -290,20 +290,20 @@ def get_report(report_id: str): """ try: report = ReportManager.get_report(report_id) - + if not report: return jsonify({ "success": False, - "error": f"报告不存在: {report_id}" + "error": f"Report does not exist: {report_id}" }), 404 - + return jsonify({ "success": True, "data": report.to_dict() }) - + except Exception as e: - logger.error(f"获取报告失败: {str(e)}") + logger.error(f"Failed to get report: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -314,9 +314,9 @@ def get_report(report_id: str): @report_bp.route('/by-simulation/', methods=['GET']) def get_report_by_simulation(simulation_id: str): """ - 根据模拟ID获取报告 - - 返回: + Get report by simulation ID + + Returns: { "success": true, "data": { @@ -327,22 +327,22 @@ def get_report_by_simulation(simulation_id: str): """ try: report = ReportManager.get_report_by_simulation(simulation_id) - + if not report: return jsonify({ "success": False, - "error": f"该模拟暂无报告: {simulation_id}", + "error": f"No report for this simulation: {simulation_id}", "has_report": False }), 404 - + return jsonify({ "success": True, "data": report.to_dict(), "has_report": True }) - + except Exception as e: - logger.error(f"获取报告失败: {str(e)}") + logger.error(f"Failed to get report: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -353,13 +353,13 @@ def get_report_by_simulation(simulation_id: str): @report_bp.route('/list', methods=['GET']) def list_reports(): """ - 列出所有报告 - - Query参数: - simulation_id: 按模拟ID过滤(可选) - limit: 返回数量限制(默认50) - - 返回: + List all reports + + Query parameters: + simulation_id: Filter by simulation ID (optional) + limit: Return count limit (default 50) + + Returns: { "success": true, "data": [...], @@ -369,20 +369,20 @@ def list_reports(): try: simulation_id = request.args.get('simulation_id') limit = request.args.get('limit', 50, type=int) - + reports = ReportManager.list_reports( simulation_id=simulation_id, limit=limit ) - + return jsonify({ "success": True, "data": [r.to_dict() for r in reports], "count": len(reports) }) - + except Exception as e: - logger.error(f"列出报告失败: {str(e)}") + logger.error(f"Failed to list reports: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -393,42 +393,42 @@ def list_reports(): @report_bp.route('//download', methods=['GET']) def download_report(report_id: str): """ - 下载报告(Markdown格式) - - 返回Markdown文件 + Download report (Markdown format) + + Returns Markdown file """ try: report = ReportManager.get_report(report_id) - + if not report: return jsonify({ "success": False, - "error": f"报告不存在: {report_id}" + "error": f"Report does not exist: {report_id}" }), 404 - + md_path = ReportManager._get_report_markdown_path(report_id) - + if not os.path.exists(md_path): - # 如果MD文件不存在,生成一个临时文件 + # If MD file doesn't exist, generate a temporary file import tempfile with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f: f.write(report.markdown_content) temp_path = f.name - + return send_file( temp_path, as_attachment=True, download_name=f"{report_id}.md" ) - + return send_file( md_path, as_attachment=True, download_name=f"{report_id}.md" ) - + except Exception as e: - logger.error(f"下载报告失败: {str(e)}") + logger.error(f"Failed to download report: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -438,23 +438,23 @@ def download_report(report_id: str): @report_bp.route('/', methods=['DELETE']) def delete_report(report_id: str): - """删除报告""" + """Delete report""" try: success = ReportManager.delete_report(report_id) - + if not success: return jsonify({ "success": False, - "error": f"报告不存在: {report_id}" + "error": f"Report does not exist: {report_id}" }), 404 - + return jsonify({ "success": True, - "message": f"报告已删除: {report_id}" + "message": f"Report deleted: {report_id}" }) - + except Exception as e: - logger.error(f"删除报告失败: {str(e)}") + logger.error(f"Failed to delete report: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -462,96 +462,96 @@ def delete_report(report_id: str): }), 500 -# ============== Report Agent对话接口 ============== +# ============== Report Agent Chat Interface ============== @report_bp.route('/chat', methods=['POST']) def chat_with_report_agent(): """ - 与Report Agent对话 - - Report Agent可以在对话中自主调用检索工具来回答问题 - - 请求(JSON): + Chat with Report Agent + + Report Agent can autonomously call retrieval tools to answer questions during conversation + + Request (JSON): { - "simulation_id": "sim_xxxx", // 必填,模拟ID - "message": "请解释一下舆情走向", // 必填,用户消息 - "chat_history": [ // 可选,对话历史 + "simulation_id": "sim_xxxx", // Required, simulation ID + "message": "Please explain the public opinion trend", // Required, user message + "chat_history": [ // Optional, conversation history {"role": "user", "content": "..."}, {"role": "assistant", "content": "..."} ] } - - 返回: + + Returns: { "success": true, "data": { - "response": "Agent回复...", - "tool_calls": [调用的工具列表], - "sources": [信息来源] + "response": "Agent response...", + "tool_calls": [list of tools called], + "sources": [information sources] } } """ try: data = request.get_json() or {} - + simulation_id = data.get('simulation_id') message = data.get('message') chat_history = data.get('chat_history', []) - + if not simulation_id: return jsonify({ "success": False, - "error": "请提供 simulation_id" + "error": "Please provide simulation_id" }), 400 - + if not message: return jsonify({ "success": False, - "error": "请提供 message" + "error": "Please provide message" }), 400 - - # 获取模拟和项目信息 + + # Get simulation and project info manager = SimulationManager() state = manager.get_simulation(simulation_id) - + if not state: return jsonify({ "success": False, - "error": f"模拟不存在: {simulation_id}" + "error": f"Simulation does not exist: {simulation_id}" }), 404 - + project = ProjectManager.get_project(state.project_id) if not project: return jsonify({ "success": False, - "error": f"项目不存在: {state.project_id}" + "error": f"Project does not exist: {state.project_id}" }), 404 - + graph_id = state.graph_id or project.graph_id if not graph_id: return jsonify({ "success": False, - "error": "缺少图谱ID" + "error": "Missing graph ID" }), 400 - + simulation_requirement = project.simulation_requirement or "" - - # 创建Agent并进行对话 + + # Create Agent and chat agent = ReportAgent( graph_id=graph_id, simulation_id=simulation_id, simulation_requirement=simulation_requirement ) - + result = agent.chat(message=message, chat_history=chat_history) - + return jsonify({ "success": True, "data": result }) - + except Exception as e: - logger.error(f"对话失败: {str(e)}") + logger.error(f"Chat failed: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -559,42 +559,42 @@ def chat_with_report_agent(): }), 500 -# ============== 报告进度与分章节接口 ============== +# ============== Report Progress and Section Interfaces ============== @report_bp.route('//progress', methods=['GET']) def get_report_progress(report_id: str): """ - 获取报告生成进度(实时) - - 返回: + Get report generation progress (real-time) + + Returns: { "success": true, "data": { "status": "generating", "progress": 45, - "message": "正在生成章节: 关键发现", - "current_section": "关键发现", - "completed_sections": ["执行摘要", "模拟背景"], + "message": "Generating section: Key Findings", + "current_section": "Key Findings", + "completed_sections": ["Executive Summary", "Simulation Background"], "updated_at": "2025-12-09T..." } } """ try: progress = ReportManager.get_progress(report_id) - + if not progress: return jsonify({ "success": False, - "error": f"报告不存在或进度信息不可用: {report_id}" + "error": f"Report does not exist or progress info unavailable: {report_id}" }), 404 - + return jsonify({ "success": True, "data": progress }) - + except Exception as e: - logger.error(f"获取报告进度失败: {str(e)}") + logger.error(f"Failed to get report progress: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -605,11 +605,11 @@ def get_report_progress(report_id: str): @report_bp.route('//sections', methods=['GET']) def get_report_sections(report_id: str): """ - 获取已生成的章节列表(分章节输出) - - 前端可以轮询此接口获取已生成的章节内容,无需等待整个报告完成 - - 返回: + Get list of generated sections (section-by-section output) + + Frontend can poll this interface to get generated section content without waiting for entire report + + Returns: { "success": true, "data": { @@ -618,7 +618,7 @@ def get_report_sections(report_id: str): { "filename": "section_01.md", "section_index": 1, - "content": "## 执行摘要\\n\\n..." + "content": "## Executive Summary\\n\\n..." }, ... ], @@ -629,11 +629,11 @@ def get_report_sections(report_id: str): """ try: sections = ReportManager.get_generated_sections(report_id) - - # 获取报告状态 + + # Get report status report = ReportManager.get_report(report_id) is_complete = report is not None and report.status == ReportStatus.COMPLETED - + return jsonify({ "success": True, "data": { @@ -643,9 +643,9 @@ def get_report_sections(report_id: str): "is_complete": is_complete } }) - + except Exception as e: - logger.error(f"获取章节列表失败: {str(e)}") + logger.error(f"Failed to get section list: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -656,29 +656,29 @@ def get_report_sections(report_id: str): @report_bp.route('//section/', methods=['GET']) def get_single_section(report_id: str, section_index: int): """ - 获取单个章节内容 - - 返回: + Get single section content + + Returns: { "success": true, "data": { "filename": "section_01.md", - "content": "## 执行摘要\\n\\n..." + "content": "## Executive Summary\\n\\n..." } } """ try: section_path = ReportManager._get_section_path(report_id, section_index) - + if not os.path.exists(section_path): return jsonify({ "success": False, - "error": f"章节不存在: section_{section_index:02d}.md" + "error": f"Section does not exist: section_{section_index:02d}.md" }), 404 - + with open(section_path, 'r', encoding='utf-8') as f: content = f.read() - + return jsonify({ "success": True, "data": { @@ -687,9 +687,9 @@ def get_single_section(report_id: str, section_index: int): "content": content } }) - + except Exception as e: - logger.error(f"获取章节内容失败: {str(e)}") + logger.error(f"Failed to get section content: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -697,16 +697,16 @@ def get_single_section(report_id: str, section_index: int): }), 500 -# ============== 报告状态检查接口 ============== +# ============== Report Status Check Interface ============== @report_bp.route('/check/', methods=['GET']) def check_report_status(simulation_id: str): """ - 检查模拟是否有报告,以及报告状态 - - 用于前端判断是否解锁Interview功能 - - 返回: + Check if simulation has report and report status + + Used by frontend to determine whether to unlock Interview feature + + Returns: { "success": true, "data": { @@ -720,14 +720,14 @@ def check_report_status(simulation_id: str): """ try: report = ReportManager.get_report_by_simulation(simulation_id) - + has_report = report is not None report_status = report.status.value if report else None report_id = report.report_id if report else None - - # 只有报告完成后才解锁interview + + # Only unlock interview after report is completed interview_unlocked = has_report and report.status == ReportStatus.COMPLETED - + return jsonify({ "success": True, "data": { @@ -738,9 +738,9 @@ def check_report_status(simulation_id: str): "interview_unlocked": interview_unlocked } }) - + except Exception as e: - logger.error(f"检查报告状态失败: {str(e)}") + logger.error(f"Failed to check report status: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -748,22 +748,22 @@ def check_report_status(simulation_id: str): }), 500 -# ============== Agent 日志接口 ============== +# ============== Agent Log Interface ============== @report_bp.route('//agent-log', methods=['GET']) def get_agent_log(report_id: str): """ - 获取 Report Agent 的详细执行日志 - - 实时获取报告生成过程中的每一步动作,包括: - - 报告开始、规划开始/完成 - - 每个章节的开始、工具调用、LLM响应、完成 - - 报告完成或失败 - - Query参数: - from_line: 从第几行开始读取(可选,默认0,用于增量获取) - - 返回: + Get detailed execution log of Report Agent + + Real-time acquisition of every step during report generation: + - Report start, planning start/complete + - Each section start, tool call, LLM response, complete + - Report complete or failed + + Query parameters: + from_line: Start from which line (optional, default 0, for incremental fetch) + + Returns: { "success": true, "data": { @@ -774,7 +774,7 @@ def get_agent_log(report_id: str): "report_id": "report_xxxx", "action": "tool_call", "stage": "generating", - "section_title": "执行摘要", + "section_title": "Executive Summary", "section_index": 1, "details": { "tool_name": "insight_forge", @@ -792,16 +792,16 @@ def get_agent_log(report_id: str): """ try: from_line = request.args.get('from_line', 0, type=int) - + log_data = ReportManager.get_agent_log(report_id, from_line=from_line) - + return jsonify({ "success": True, "data": log_data }) - + except Exception as e: - logger.error(f"获取Agent日志失败: {str(e)}") + logger.error(f"Failed to get agent log: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -812,9 +812,9 @@ def get_agent_log(report_id: str): @report_bp.route('//agent-log/stream', methods=['GET']) def stream_agent_log(report_id: str): """ - 获取完整的 Agent 日志(一次性获取全部) - - 返回: + Get complete Agent log (get all at once) + + Returns: { "success": true, "data": { @@ -825,7 +825,7 @@ def stream_agent_log(report_id: str): """ try: logs = ReportManager.get_agent_log_stream(report_id) - + return jsonify({ "success": True, "data": { @@ -833,9 +833,9 @@ def stream_agent_log(report_id: str): "count": len(logs) } }) - + except Exception as e: - logger.error(f"获取Agent日志失败: {str(e)}") + logger.error(f"Failed to get agent log: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -843,27 +843,27 @@ def stream_agent_log(report_id: str): }), 500 -# ============== 控制台日志接口 ============== +# ============== Console Log Interface ============== @report_bp.route('//console-log', methods=['GET']) def get_console_log(report_id: str): """ - 获取 Report Agent 的控制台输出日志 - - 实时获取报告生成过程中的控制台输出(INFO、WARNING等), - 这与 agent-log 接口返回的结构化 JSON 日志不同, - 是纯文本格式的控制台风格日志。 - - Query参数: - from_line: 从第几行开始读取(可选,默认0,用于增量获取) - - 返回: + Get console output log of Report Agent + + Real-time acquisition of console output during report generation (INFO, WARNING, etc.), + This is different from the structured JSON log returned by agent-log interface, + it's pure text format console-style log. + + Query parameters: + from_line: Start from which line (optional, default 0, for incremental fetch) + + Returns: { "success": true, "data": { "logs": [ - "[19:46:14] INFO: 搜索完成: 找到 15 条相关事实", - "[19:46:14] INFO: 图谱搜索: graph_id=xxx, query=...", + "[19:46:14] INFO: Search complete: Found 15 relevant facts", + "[19:46:14] INFO: Graph search: graph_id=xxx, query=...", ... ], "total_lines": 100, @@ -874,16 +874,16 @@ def get_console_log(report_id: str): """ try: from_line = request.args.get('from_line', 0, type=int) - + log_data = ReportManager.get_console_log(report_id, from_line=from_line) - + return jsonify({ "success": True, "data": log_data }) - + except Exception as e: - logger.error(f"获取控制台日志失败: {str(e)}") + logger.error(f"Failed to get console log: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -894,9 +894,9 @@ def get_console_log(report_id: str): @report_bp.route('//console-log/stream', methods=['GET']) def stream_console_log(report_id: str): """ - 获取完整的控制台日志(一次性获取全部) - - 返回: + Get complete console log (get all at once) + + Returns: { "success": true, "data": { @@ -907,7 +907,7 @@ def stream_console_log(report_id: str): """ try: logs = ReportManager.get_console_log_stream(report_id) - + return jsonify({ "success": True, "data": { @@ -915,9 +915,9 @@ def stream_console_log(report_id: str): "count": len(logs) } }) - + except Exception as e: - logger.error(f"获取控制台日志失败: {str(e)}") + logger.error(f"Failed to get console log: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -925,49 +925,49 @@ def stream_console_log(report_id: str): }), 500 -# ============== 工具调用接口(供调试使用)============== +# ============== Tool Call Interface (for debugging) =============== @report_bp.route('/tools/search', methods=['POST']) def search_graph_tool(): """ - 图谱搜索工具接口(供调试使用) - - 请求(JSON): + Graph search tool interface (for debugging) + + Request (JSON): { "graph_id": "mirofish_xxxx", - "query": "搜索查询", + "query": "Search query", "limit": 10 } """ try: data = request.get_json() or {} - + graph_id = data.get('graph_id') query = data.get('query') limit = data.get('limit', 10) - + if not graph_id or not query: return jsonify({ "success": False, - "error": "请提供 graph_id 和 query" + "error": "Please provide graph_id and query" }), 400 - + from ..services.zep_tools import ZepToolsService - + tools = ZepToolsService() result = tools.search_graph( graph_id=graph_id, query=query, limit=limit ) - + return jsonify({ "success": True, "data": result.to_dict() }) - + except Exception as e: - logger.error(f"图谱搜索失败: {str(e)}") + logger.error(f"Graph search failed: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -978,38 +978,38 @@ def search_graph_tool(): @report_bp.route('/tools/statistics', methods=['POST']) def get_graph_statistics_tool(): """ - 图谱统计工具接口(供调试使用) - - 请求(JSON): + Graph statistics tool interface (for debugging) + + Request (JSON): { "graph_id": "mirofish_xxxx" } """ try: data = request.get_json() or {} - + graph_id = data.get('graph_id') - + if not graph_id: return jsonify({ "success": False, - "error": "请提供 graph_id" + "error": "Please provide graph_id" }), 400 - + from ..services.zep_tools import ZepToolsService - + tools = ZepToolsService() result = tools.get_graph_statistics(graph_id) - + return jsonify({ "success": True, "data": result }) - + except Exception as e: - logger.error(f"获取图谱统计失败: {str(e)}") + logger.error(f"Failed to get graph statistics: {str(e)}") return jsonify({ "success": False, "error": str(e), "traceback": traceback.format_exc() - }), 500 + }), 500 \ No newline at end of file diff --git a/backend/app/config.py b/backend/app/config.py index 953dfa50a..f56865ab8 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -1,54 +1,54 @@ """ -配置管理 -统一从项目根目录的 .env 文件加载配置 +Configuration Management +Loads configuration from .env file in project root """ import os from dotenv import load_dotenv -# 加载项目根目录的 .env 文件 -# 路径: MiroFish/.env (相对于 backend/app/config.py) +# Load .env file from project root +# Path: MiroFish/.env (relative to backend/app/config.py) project_root_env = os.path.join(os.path.dirname(__file__), '../../.env') if os.path.exists(project_root_env): load_dotenv(project_root_env, override=True) else: - # 如果根目录没有 .env,尝试加载环境变量(用于生产环境) + # If no .env in root, load environment variables (for production) load_dotenv(override=True) class Config: - """Flask配置类""" - - # Flask配置 + """Flask configuration class""" + + # Flask configuration SECRET_KEY = os.environ.get('SECRET_KEY', 'mirofish-secret-key') DEBUG = os.environ.get('FLASK_DEBUG', 'True').lower() == 'true' - - # JSON配置 - 禁用ASCII转义,让中文直接显示(而不是 \uXXXX 格式) + + # JSON configuration - disable ASCII escape to display Chinese directly (instead of \uXXXX format) JSON_AS_ASCII = False - - # LLM配置(统一使用OpenAI格式) + + # LLM configuration (unified OpenAI SDK format) LLM_API_KEY = os.environ.get('LLM_API_KEY') LLM_BASE_URL = os.environ.get('LLM_BASE_URL', 'https://api.openai.com/v1') LLM_MODEL_NAME = os.environ.get('LLM_MODEL_NAME', 'gpt-4o-mini') - - # Zep配置 + + # Zep configuration ZEP_API_KEY = os.environ.get('ZEP_API_KEY') - - # 文件上传配置 + + # File upload configuration MAX_CONTENT_LENGTH = 50 * 1024 * 1024 # 50MB UPLOAD_FOLDER = os.path.join(os.path.dirname(__file__), '../uploads') ALLOWED_EXTENSIONS = {'pdf', 'md', 'txt', 'markdown'} - - # 文本处理配置 - DEFAULT_CHUNK_SIZE = 500 # 默认切块大小 - DEFAULT_CHUNK_OVERLAP = 50 # 默认重叠大小 - - # OASIS模拟配置 + + # Text processing configuration + DEFAULT_CHUNK_SIZE = 500 # Default chunk size + DEFAULT_CHUNK_OVERLAP = 50 # Default overlap size + + # OASIS simulation configuration OASIS_DEFAULT_MAX_ROUNDS = int(os.environ.get('OASIS_DEFAULT_MAX_ROUNDS', '10')) OASIS_SIMULATION_DATA_DIR = os.path.join(os.path.dirname(__file__), '../uploads/simulations') - - # OASIS平台可用动作配置 + + # OASIS platform available actions configuration OASIS_TWITTER_ACTIONS = [ 'CREATE_POST', 'LIKE_POST', 'REPOST', 'FOLLOW', 'DO_NOTHING', 'QUOTE_POST' ] @@ -57,19 +57,18 @@ class Config: 'LIKE_COMMENT', 'DISLIKE_COMMENT', 'SEARCH_POSTS', 'SEARCH_USER', 'TREND', 'REFRESH', 'DO_NOTHING', 'FOLLOW', 'MUTE' ] - - # Report Agent配置 + + # Report Agent configuration REPORT_AGENT_MAX_TOOL_CALLS = int(os.environ.get('REPORT_AGENT_MAX_TOOL_CALLS', '5')) REPORT_AGENT_MAX_REFLECTION_ROUNDS = int(os.environ.get('REPORT_AGENT_MAX_REFLECTION_ROUNDS', '2')) REPORT_AGENT_TEMPERATURE = float(os.environ.get('REPORT_AGENT_TEMPERATURE', '0.5')) - + @classmethod def validate(cls): - """验证必要配置""" + """Validate required configuration""" errors = [] if not cls.LLM_API_KEY: - errors.append("LLM_API_KEY 未配置") + errors.append("LLM_API_KEY not configured") if not cls.ZEP_API_KEY: - errors.append("ZEP_API_KEY 未配置") - return errors - + errors.append("ZEP_API_KEY not configured") + return errors \ No newline at end of file diff --git a/backend/app/models/project.py b/backend/app/models/project.py index 089789374..7cb1f22e1 100644 --- a/backend/app/models/project.py +++ b/backend/app/models/project.py @@ -1,6 +1,6 @@ """ -项目上下文管理 -用于在服务端持久化项目状态,避免前端在接口间传递大量数据 +Project context management +Used to persist project state on server side, avoiding passing large amounts of data between interfaces """ import os @@ -15,45 +15,45 @@ class ProjectStatus(str, Enum): - """项目状态""" - CREATED = "created" # 刚创建,文件已上传 - ONTOLOGY_GENERATED = "ontology_generated" # 本体已生成 - GRAPH_BUILDING = "graph_building" # 图谱构建中 - GRAPH_COMPLETED = "graph_completed" # 图谱构建完成 - FAILED = "failed" # 失败 + """Project status""" + CREATED = "created" # Just created, files uploaded + ONTOLOGY_GENERATED = "ontology_generated" # Ontology generated + GRAPH_BUILDING = "graph_building" # Graph building in progress + GRAPH_COMPLETED = "graph_completed" # Graph build completed + FAILED = "failed" # Failed @dataclass class Project: - """项目数据模型""" + """Project data model""" project_id: str name: str status: ProjectStatus created_at: str updated_at: str - - # 文件信息 + + # File information files: List[Dict[str, str]] = field(default_factory=list) # [{filename, path, size}] total_text_length: int = 0 - - # 本体信息(接口1生成后填充) + + # Ontology information (filled after interface 1) ontology: Optional[Dict[str, Any]] = None analysis_summary: Optional[str] = None - - # 图谱信息(接口2完成后填充) + + # Graph information (filled after interface 2) graph_id: Optional[str] = None graph_build_task_id: Optional[str] = None - - # 配置 + + # Configuration simulation_requirement: Optional[str] = None chunk_size: int = 500 chunk_overlap: int = 50 - - # 错误信息 + + # Error information error: Optional[str] = None - + def to_dict(self) -> Dict[str, Any]: - """转换为字典""" + """Convert to dictionary""" return { "project_id": self.project_id, "name": self.name, @@ -71,14 +71,14 @@ def to_dict(self) -> Dict[str, Any]: "chunk_overlap": self.chunk_overlap, "error": self.error } - + @classmethod def from_dict(cls, data: Dict[str, Any]) -> 'Project': - """从字典创建""" + """Create from dictionary""" status = data.get('status', 'created') if isinstance(status, str): status = ProjectStatus(status) - + return cls( project_id=data['project_id'], name=data.get('name', 'Unnamed Project'), @@ -99,52 +99,52 @@ def from_dict(cls, data: Dict[str, Any]) -> 'Project': class ProjectManager: - """项目管理器 - 负责项目的持久化存储和检索""" - - # 项目存储根目录 + """Project manager - responsible for project persistence storage and retrieval""" + + # Project storage root directory PROJECTS_DIR = os.path.join(Config.UPLOAD_FOLDER, 'projects') - + @classmethod def _ensure_projects_dir(cls): - """确保项目目录存在""" + """Ensure project directory exists""" os.makedirs(cls.PROJECTS_DIR, exist_ok=True) - + @classmethod def _get_project_dir(cls, project_id: str) -> str: - """获取项目目录路径""" + """Get project directory path""" return os.path.join(cls.PROJECTS_DIR, project_id) - + @classmethod def _get_project_meta_path(cls, project_id: str) -> str: - """获取项目元数据文件路径""" + """Get project metadata file path""" return os.path.join(cls._get_project_dir(project_id), 'project.json') - + @classmethod def _get_project_files_dir(cls, project_id: str) -> str: - """获取项目文件存储目录""" + """Get project file storage directory""" return os.path.join(cls._get_project_dir(project_id), 'files') - + @classmethod def _get_project_text_path(cls, project_id: str) -> str: - """获取项目提取文本存储路径""" + """Get project extracted text storage path""" return os.path.join(cls._get_project_dir(project_id), 'extracted_text.txt') - + @classmethod def create_project(cls, name: str = "Unnamed Project") -> Project: """ - 创建新项目 - + Create new project + Args: - name: 项目名称 - + name: Project name + Returns: - 新创建的Project对象 + Newly created Project object """ cls._ensure_projects_dir() - + project_id = f"proj_{uuid.uuid4().hex[:12]}" now = datetime.now().isoformat() - + project = Project( project_id=project_id, name=name, @@ -152,154 +152,153 @@ def create_project(cls, name: str = "Unnamed Project") -> Project: created_at=now, updated_at=now ) - - # 创建项目目录结构 + + # Create project directory structure project_dir = cls._get_project_dir(project_id) files_dir = cls._get_project_files_dir(project_id) os.makedirs(project_dir, exist_ok=True) os.makedirs(files_dir, exist_ok=True) - - # 保存项目元数据 + + # Save project metadata cls.save_project(project) - + return project - + @classmethod def save_project(cls, project: Project) -> None: - """保存项目元数据""" + """Save project metadata""" project.updated_at = datetime.now().isoformat() meta_path = cls._get_project_meta_path(project.project_id) - + with open(meta_path, 'w', encoding='utf-8') as f: json.dump(project.to_dict(), f, ensure_ascii=False, indent=2) - + @classmethod def get_project(cls, project_id: str) -> Optional[Project]: """ - 获取项目 - + Get project + Args: - project_id: 项目ID - + project_id: Project ID + Returns: - Project对象,如果不存在返回None + Project object, returns None if not exists """ meta_path = cls._get_project_meta_path(project_id) - + if not os.path.exists(meta_path): return None - + with open(meta_path, 'r', encoding='utf-8') as f: data = json.load(f) - + return Project.from_dict(data) - + @classmethod def list_projects(cls, limit: int = 50) -> List[Project]: """ - 列出所有项目 - + List all projects + Args: - limit: 返回数量限制 - + limit: Return count limit + Returns: - 项目列表,按创建时间倒序 + List of projects, sorted by creation time (descending) """ cls._ensure_projects_dir() - + projects = [] for project_id in os.listdir(cls.PROJECTS_DIR): project = cls.get_project(project_id) if project: projects.append(project) - - # 按创建时间倒序排序 + + # Sort by creation time (descending) projects.sort(key=lambda p: p.created_at, reverse=True) - + return projects[:limit] - + @classmethod def delete_project(cls, project_id: str) -> bool: """ - 删除项目及其所有文件 - + Delete project and all its files + Args: - project_id: 项目ID - + project_id: Project ID + Returns: - 是否删除成功 + Whether deletion was successful """ project_dir = cls._get_project_dir(project_id) - + if not os.path.exists(project_dir): return False - + shutil.rmtree(project_dir) return True - + @classmethod def save_file_to_project(cls, project_id: str, file_storage, original_filename: str) -> Dict[str, str]: """ - 保存上传的文件到项目目录 - + Save uploaded file to project directory + Args: - project_id: 项目ID - file_storage: Flask的FileStorage对象 - original_filename: 原始文件名 - + project_id: Project ID + file_storage: Flask's FileStorage object + original_filename: Original filename + Returns: - 文件信息字典 {filename, path, size} + File info dictionary {filename, path, size} """ files_dir = cls._get_project_files_dir(project_id) os.makedirs(files_dir, exist_ok=True) - - # 生成安全的文件名 + + # Generate safe filename ext = os.path.splitext(original_filename)[1].lower() safe_filename = f"{uuid.uuid4().hex[:8]}{ext}" file_path = os.path.join(files_dir, safe_filename) - - # 保存文件 + + # Save file file_storage.save(file_path) - - # 获取文件大小 + + # Get file size file_size = os.path.getsize(file_path) - + return { "original_filename": original_filename, "saved_filename": safe_filename, "path": file_path, "size": file_size } - + @classmethod def save_extracted_text(cls, project_id: str, text: str) -> None: - """保存提取的文本""" + """Save extracted text""" text_path = cls._get_project_text_path(project_id) with open(text_path, 'w', encoding='utf-8') as f: f.write(text) - + @classmethod def get_extracted_text(cls, project_id: str) -> Optional[str]: - """获取提取的文本""" + """Get extracted text""" text_path = cls._get_project_text_path(project_id) - + if not os.path.exists(text_path): return None - + with open(text_path, 'r', encoding='utf-8') as f: return f.read() - + @classmethod def get_project_files(cls, project_id: str) -> List[str]: - """获取项目的所有文件路径""" + """Get all file paths of the project""" files_dir = cls._get_project_files_dir(project_id) - + if not os.path.exists(files_dir): return [] - + return [ - os.path.join(files_dir, f) - for f in os.listdir(files_dir) + os.path.join(files_dir, f) + for f in os.listdir(files_dir) if os.path.isfile(os.path.join(files_dir, f)) - ] - + ] \ No newline at end of file diff --git a/backend/app/models/task.py b/backend/app/models/task.py index e15f35fbd..7b40e3a19 100644 --- a/backend/app/models/task.py +++ b/backend/app/models/task.py @@ -1,6 +1,6 @@ """ -任务状态管理 -用于跟踪长时间运行的任务(如图谱构建) +Task status management +Used to track long-running tasks (like graph building) """ import uuid @@ -12,30 +12,30 @@ class TaskStatus(str, Enum): - """任务状态枚举""" - PENDING = "pending" # 等待中 - PROCESSING = "processing" # 处理中 - COMPLETED = "completed" # 已完成 - FAILED = "failed" # 失败 + """Task status enum""" + PENDING = "pending" # Waiting + PROCESSING = "processing" # Processing + COMPLETED = "completed" # Completed + FAILED = "failed" # Failed @dataclass class Task: - """任务数据类""" + """Task data class""" task_id: str task_type: str status: TaskStatus created_at: datetime updated_at: datetime - progress: int = 0 # 总进度百分比 0-100 - message: str = "" # 状态消息 - result: Optional[Dict] = None # 任务结果 - error: Optional[str] = None # 错误信息 - metadata: Dict = field(default_factory=dict) # 额外元数据 - progress_detail: Dict = field(default_factory=dict) # 详细进度信息 - + progress: int = 0 # Overall progress percentage 0-100 + message: str = "" # Status message + result: Optional[Dict] = None # Task result + error: Optional[str] = None # Error message + metadata: Dict = field(default_factory=dict) # Extra metadata + progress_detail: Dict = field(default_factory=dict) # Detailed progress info + def to_dict(self) -> Dict[str, Any]: - """转换为字典""" + """Convert to dictionary""" return { "task_id": self.task_id, "task_type": self.task_type, @@ -53,15 +53,15 @@ def to_dict(self) -> Dict[str, Any]: class TaskManager: """ - 任务管理器 - 线程安全的任务状态管理 + Task manager + Thread-safe task status management """ - + _instance = None _lock = threading.Lock() - + def __new__(cls): - """单例模式""" + """Singleton pattern""" if cls._instance is None: with cls._lock: if cls._instance is None: @@ -69,21 +69,21 @@ def __new__(cls): cls._instance._tasks: Dict[str, Task] = {} cls._instance._task_lock = threading.Lock() return cls._instance - + def create_task(self, task_type: str, metadata: Optional[Dict] = None) -> str: """ - 创建新任务 - + Create new task + Args: - task_type: 任务类型 - metadata: 额外元数据 - + task_type: Task type + metadata: Extra metadata + Returns: - 任务ID + Task ID """ task_id = str(uuid.uuid4()) now = datetime.now() - + task = Task( task_id=task_id, task_type=task_type, @@ -92,17 +92,17 @@ def create_task(self, task_type: str, metadata: Optional[Dict] = None) -> str: updated_at=now, metadata=metadata or {} ) - + with self._task_lock: self._tasks[task_id] = task - + return task_id - + def get_task(self, task_id: str) -> Optional[Task]: - """获取任务""" + """Get task""" with self._task_lock: return self._tasks.get(task_id) - + def update_task( self, task_id: str, @@ -114,16 +114,16 @@ def update_task( progress_detail: Optional[Dict] = None ): """ - 更新任务状态 - + Update task status + Args: - task_id: 任务ID - status: 新状态 - progress: 进度 - message: 消息 - result: 结果 - error: 错误信息 - progress_detail: 详细进度信息 + task_id: Task ID + status: New status + progress: Progress + message: Message + result: Result + error: Error message + progress_detail: Detailed progress info """ with self._task_lock: task = self._tasks.get(task_id) @@ -141,44 +141,43 @@ def update_task( task.error = error if progress_detail is not None: task.progress_detail = progress_detail - + def complete_task(self, task_id: str, result: Dict): - """标记任务完成""" + """Mark task as completed""" self.update_task( task_id, status=TaskStatus.COMPLETED, progress=100, - message="任务完成", + message="Task completed", result=result ) - + def fail_task(self, task_id: str, error: str): - """标记任务失败""" + """Mark task as failed""" self.update_task( task_id, status=TaskStatus.FAILED, - message="任务失败", + message="Task failed", error=error ) - + def list_tasks(self, task_type: Optional[str] = None) -> list: - """列出任务""" + """List tasks""" with self._task_lock: tasks = list(self._tasks.values()) if task_type: tasks = [t for t in tasks if t.task_type == task_type] return [t.to_dict() for t in sorted(tasks, key=lambda x: x.created_at, reverse=True)] - + def cleanup_old_tasks(self, max_age_hours: int = 24): - """清理旧任务""" + """Clean up old tasks""" from datetime import timedelta cutoff = datetime.now() - timedelta(hours=max_age_hours) - + with self._task_lock: old_ids = [ tid for tid, task in self._tasks.items() if task.created_at < cutoff and task.status in [TaskStatus.COMPLETED, TaskStatus.FAILED] ] for tid in old_ids: - del self._tasks[tid] - + del self._tasks[tid] \ No newline at end of file diff --git a/backend/app/services/simulation_ipc.py b/backend/app/services/simulation_ipc.py index 9d70d0bea..351c5b5f7 100644 --- a/backend/app/services/simulation_ipc.py +++ b/backend/app/services/simulation_ipc.py @@ -1,11 +1,11 @@ """ -模拟IPC通信模块 -用于Flask后端和模拟脚本之间的进程间通信 +Simulation IPC communication module +Inter-process communication between Flask backend and simulation scripts -通过文件系统实现简单的命令/响应模式: -1. Flask写入命令到 commands/ 目录 -2. 模拟脚本轮询命令目录,执行命令并写入响应到 responses/ 目录 -3. Flask轮询响应目录获取结果 +Implemented via simple file-based command/response pattern: +1. Flask writes commands to commands/ directory +2. Simulation script polls command directory, executes commands and writes responses to responses/ directory +3. Flask polls response directory to get results """ import os @@ -23,14 +23,14 @@ class CommandType(str, Enum): - """命令类型""" - INTERVIEW = "interview" # 单个Agent采访 - BATCH_INTERVIEW = "batch_interview" # 批量采访 - CLOSE_ENV = "close_env" # 关闭环境 + """Command type""" + INTERVIEW = "interview" # Single agent interview + BATCH_INTERVIEW = "batch_interview" # Batch interview + CLOSE_ENV = "close_env" # Close environment class CommandStatus(str, Enum): - """命令状态""" + """Command status""" PENDING = "pending" PROCESSING = "processing" COMPLETED = "completed" @@ -39,12 +39,12 @@ class CommandStatus(str, Enum): @dataclass class IPCCommand: - """IPC命令""" + """IPC command""" command_id: str command_type: CommandType args: Dict[str, Any] timestamp: str = field(default_factory=lambda: datetime.now().isoformat()) - + def to_dict(self) -> Dict[str, Any]: return { "command_id": self.command_id, @@ -52,7 +52,7 @@ def to_dict(self) -> Dict[str, Any]: "args": self.args, "timestamp": self.timestamp } - + @classmethod def from_dict(cls, data: Dict[str, Any]) -> 'IPCCommand': return cls( @@ -65,13 +65,13 @@ def from_dict(cls, data: Dict[str, Any]) -> 'IPCCommand': @dataclass class IPCResponse: - """IPC响应""" + """IPC response""" command_id: str status: CommandStatus result: Optional[Dict[str, Any]] = None error: Optional[str] = None timestamp: str = field(default_factory=lambda: datetime.now().isoformat()) - + def to_dict(self) -> Dict[str, Any]: return { "command_id": self.command_id, @@ -80,7 +80,7 @@ def to_dict(self) -> Dict[str, Any]: "error": self.error, "timestamp": self.timestamp } - + @classmethod def from_dict(cls, data: Dict[str, Any]) -> 'IPCResponse': return cls( @@ -94,26 +94,26 @@ def from_dict(cls, data: Dict[str, Any]) -> 'IPCResponse': class SimulationIPCClient: """ - 模拟IPC客户端(Flask端使用) - - 用于向模拟进程发送命令并等待响应 + Simulation IPC client (used by Flask side) + + Used to send commands to simulation process and wait for response """ - + def __init__(self, simulation_dir: str): """ - 初始化IPC客户端 - + Initialize IPC client + Args: - simulation_dir: 模拟数据目录 + simulation_dir: Simulation data directory """ self.simulation_dir = simulation_dir self.commands_dir = os.path.join(simulation_dir, "ipc_commands") self.responses_dir = os.path.join(simulation_dir, "ipc_responses") - - # 确保目录存在 + + # Ensure directories exist os.makedirs(self.commands_dir, exist_ok=True) os.makedirs(self.responses_dir, exist_ok=True) - + def send_command( self, command_type: CommandType, @@ -122,19 +122,19 @@ def send_command( poll_interval: float = 0.5 ) -> IPCResponse: """ - 发送命令并等待响应 - + Send command and wait for response + Args: - command_type: 命令类型 - args: 命令参数 - timeout: 超时时间(秒) - poll_interval: 轮询间隔(秒) - + command_type: Command type + args: Command parameters + timeout: Timeout (seconds) + poll_interval: Poll interval (seconds) + Returns: IPCResponse - + Raises: - TimeoutError: 等待响应超时 + TimeoutError: Wait response timeout """ command_id = str(uuid.uuid4()) command = IPCCommand( @@ -142,50 +142,50 @@ def send_command( command_type=command_type, args=args ) - - # 写入命令文件 + + # Write command file command_file = os.path.join(self.commands_dir, f"{command_id}.json") with open(command_file, 'w', encoding='utf-8') as f: json.dump(command.to_dict(), f, ensure_ascii=False, indent=2) - - logger.info(f"发送IPC命令: {command_type.value}, command_id={command_id}") - - # 等待响应 + + logger.info(f"Sending IPC command: {command_type.value}, command_id={command_id}") + + # Wait for response response_file = os.path.join(self.responses_dir, f"{command_id}.json") start_time = time.time() - + while time.time() - start_time < timeout: if os.path.exists(response_file): try: with open(response_file, 'r', encoding='utf-8') as f: response_data = json.load(f) response = IPCResponse.from_dict(response_data) - - # 清理命令和响应文件 + + # Clean up command and response files try: os.remove(command_file) os.remove(response_file) except OSError: pass - - logger.info(f"收到IPC响应: command_id={command_id}, status={response.status.value}") + + logger.info(f"Received IPC response: command_id={command_id}, status={response.status.value}") return response except (json.JSONDecodeError, KeyError) as e: - logger.warning(f"解析响应失败: {e}") - + logger.warning(f"Failed to parse response: {e}") + time.sleep(poll_interval) - - # 超时 - logger.error(f"等待IPC响应超时: command_id={command_id}") - - # 清理命令文件 + + # Timeout + logger.error(f"Waiting for IPC response timeout: command_id={command_id}") + + # Clean up command file try: os.remove(command_file) except OSError: pass - - raise TimeoutError(f"等待命令响应超时 ({timeout}秒)") - + + raise TimeoutError(f"Wait for command response timeout ({timeout}s)") + def send_interview( self, agent_id: int, @@ -194,19 +194,19 @@ def send_interview( timeout: float = 60.0 ) -> IPCResponse: """ - 发送单个Agent采访命令 - + Send single agent interview command + Args: agent_id: Agent ID - prompt: 采访问题 - platform: 指定平台(可选) - - "twitter": 只采访Twitter平台 - - "reddit": 只采访Reddit平台 - - None: 双平台模拟时同时采访两个平台,单平台模拟时采访该平台 - timeout: 超时时间 - + prompt: Interview question + platform: Specify platform (optional) + - "twitter": Only interview Twitter platform + - "reddit": Only interview Reddit platform + - None: Interview both platforms in dual-platform simulation, or the platform in single-platform simulation + timeout: Timeout + Returns: - IPCResponse,result字段包含采访结果 + IPCResponse, result field contains interview result """ args = { "agent_id": agent_id, @@ -214,13 +214,13 @@ def send_interview( } if platform: args["platform"] = platform - + return self.send_command( command_type=CommandType.INTERVIEW, args=args, timeout=timeout ) - + def send_batch_interview( self, interviews: List[Dict[str, Any]], @@ -228,36 +228,36 @@ def send_batch_interview( timeout: float = 120.0 ) -> IPCResponse: """ - 发送批量采访命令 - + Send batch interview command + Args: - interviews: 采访列表,每个元素包含 {"agent_id": int, "prompt": str, "platform": str(可选)} - platform: 默认平台(可选,会被每个采访项的platform覆盖) - - "twitter": 默认只采访Twitter平台 - - "reddit": 默认只采访Reddit平台 - - None: 双平台模拟时每个Agent同时采访两个平台 - timeout: 超时时间 - + interviews: Interview list, each element contains {"agent_id": int, "prompt": str, "platform": str(optional)} + platform: Default platform (optional, overridden by each interview item's platform) + - "twitter": Default only interview Twitter platform + - "reddit": Default only interview Reddit platform + - None: Each agent interviews both platforms in dual-platform simulation + timeout: Timeout + Returns: - IPCResponse,result字段包含所有采访结果 + IPCResponse, result field contains all interview results """ args = {"interviews": interviews} if platform: args["platform"] = platform - + return self.send_command( command_type=CommandType.BATCH_INTERVIEW, args=args, timeout=timeout ) - + def send_close_env(self, timeout: float = 30.0) -> IPCResponse: """ - 发送关闭环境命令 - + Send close environment command + Args: - timeout: 超时时间 - + timeout: Timeout + Returns: IPCResponse """ @@ -266,17 +266,17 @@ def send_close_env(self, timeout: float = 30.0) -> IPCResponse: args={}, timeout=timeout ) - + def check_env_alive(self) -> bool: """ - 检查模拟环境是否存活 - - 通过检查 env_status.json 文件来判断 + Check if simulation environment is alive + + Determined by checking env_status.json file """ status_file = os.path.join(self.simulation_dir, "env_status.json") if not os.path.exists(status_file): return False - + try: with open(status_file, 'r', encoding='utf-8') as f: status = json.load(f) @@ -287,108 +287,108 @@ def check_env_alive(self) -> bool: class SimulationIPCServer: """ - 模拟IPC服务器(模拟脚本端使用) - - 轮询命令目录,执行命令并返回响应 + Simulation IPC server (used by simulation script side) + + Polls command directory, executes commands and returns responses """ - + def __init__(self, simulation_dir: str): """ - 初始化IPC服务器 - + Initialize IPC server + Args: - simulation_dir: 模拟数据目录 + simulation_dir: Simulation data directory """ self.simulation_dir = simulation_dir self.commands_dir = os.path.join(simulation_dir, "ipc_commands") self.responses_dir = os.path.join(simulation_dir, "ipc_responses") - - # 确保目录存在 + + # Ensure directories exist os.makedirs(self.commands_dir, exist_ok=True) os.makedirs(self.responses_dir, exist_ok=True) - - # 环境状态 + + # Environment status self._running = False - + def start(self): - """标记服务器为运行状态""" + """Mark server as running""" self._running = True self._update_env_status("alive") - + def stop(self): - """标记服务器为停止状态""" + """Mark server as stopped""" self._running = False self._update_env_status("stopped") - + def _update_env_status(self, status: str): - """更新环境状态文件""" + """Update environment status file""" status_file = os.path.join(self.simulation_dir, "env_status.json") with open(status_file, 'w', encoding='utf-8') as f: json.dump({ "status": status, "timestamp": datetime.now().isoformat() }, f, ensure_ascii=False, indent=2) - + def poll_commands(self) -> Optional[IPCCommand]: """ - 轮询命令目录,返回第一个待处理的命令 - + Poll command directory, return first pending command + Returns: - IPCCommand 或 None + IPCCommand or None """ if not os.path.exists(self.commands_dir): return None - - # 按时间排序获取命令文件 + + # Get command files sorted by time command_files = [] for filename in os.listdir(self.commands_dir): if filename.endswith('.json'): filepath = os.path.join(self.commands_dir, filename) command_files.append((filepath, os.path.getmtime(filepath))) - + command_files.sort(key=lambda x: x[1]) - + for filepath, _ in command_files: try: with open(filepath, 'r', encoding='utf-8') as f: data = json.load(f) return IPCCommand.from_dict(data) except (json.JSONDecodeError, KeyError, OSError) as e: - logger.warning(f"读取命令文件失败: {filepath}, {e}") + logger.warning(f"Failed to read command file: {filepath}, {e}") continue - + return None - + def send_response(self, response: IPCResponse): """ - 发送响应 - + Send response + Args: - response: IPC响应 + response: IPC response """ response_file = os.path.join(self.responses_dir, f"{response.command_id}.json") with open(response_file, 'w', encoding='utf-8') as f: json.dump(response.to_dict(), f, ensure_ascii=False, indent=2) - - # 删除命令文件 + + # Delete command file command_file = os.path.join(self.commands_dir, f"{response.command_id}.json") try: os.remove(command_file) except OSError: pass - + def send_success(self, command_id: str, result: Dict[str, Any]): - """发送成功响应""" + """Send success response""" self.send_response(IPCResponse( command_id=command_id, status=CommandStatus.COMPLETED, result=result )) - + def send_error(self, command_id: str, error: str): - """发送错误响应""" + """Send error response""" self.send_response(IPCResponse( command_id=command_id, status=CommandStatus.FAILED, error=error - )) + )) \ No newline at end of file diff --git a/backend/app/services/text_processor.py b/backend/app/services/text_processor.py index 91e32acc5..c9b65b83d 100644 --- a/backend/app/services/text_processor.py +++ b/backend/app/services/text_processor.py @@ -1,5 +1,5 @@ """ -文本处理服务 +Text processing service """ from typing import List, Optional @@ -7,13 +7,13 @@ class TextProcessor: - """文本处理器""" - + """Text processor""" + @staticmethod def extract_from_files(file_paths: List[str]) -> str: - """从多个文件提取文本""" + """Extract text from multiple files""" return FileParser.extract_from_multiple(file_paths) - + @staticmethod def split_text( text: str, @@ -21,51 +21,50 @@ def split_text( overlap: int = 50 ) -> List[str]: """ - 分割文本 - + Split text + Args: - text: 原始文本 - chunk_size: 块大小 - overlap: 重叠大小 - + text: Raw text + chunk_size: Chunk size + overlap: Overlap size + Returns: - 文本块列表 + List of text chunks """ return split_text_into_chunks(text, chunk_size, overlap) - + @staticmethod def preprocess_text(text: str) -> str: """ - 预处理文本 - - 移除多余空白 - - 标准化换行 - + Preprocess text + - Remove extra whitespace + - Normalize line breaks + Args: - text: 原始文本 - + text: Raw text + Returns: - 处理后的文本 + Processed text """ import re - - # 标准化换行 + + # Normalize line breaks text = text.replace('\r\n', '\n').replace('\r', '\n') - - # 移除连续空行(保留最多两个换行) + + # Remove consecutive blank lines (keep max two line breaks) text = re.sub(r'\n{3,}', '\n\n', text) - - # 移除行首行尾空白 + + # Remove leading/trailing whitespace from each line lines = [line.strip() for line in text.split('\n')] text = '\n'.join(lines) - + return text.strip() - + @staticmethod def get_text_stats(text: str) -> dict: - """获取文本统计信息""" + """Get text statistics""" return { "total_chars": len(text), "total_lines": text.count('\n') + 1, "total_words": len(text.split()), - } - + } \ No newline at end of file diff --git a/backend/app/utils/file_parser.py b/backend/app/utils/file_parser.py index 3f1d8ed2e..77c866c7f 100644 --- a/backend/app/utils/file_parser.py +++ b/backend/app/utils/file_parser.py @@ -1,6 +1,6 @@ """ -文件解析工具 -支持PDF、Markdown、TXT文件的文本提取 +File parser utility +Supports text extraction from PDF, Markdown, TXT files """ import os @@ -10,29 +10,29 @@ def _read_text_with_fallback(file_path: str) -> str: """ - 读取文本文件,UTF-8失败时自动探测编码。 - - 采用多级回退策略: - 1. 首先尝试 UTF-8 解码 - 2. 使用 charset_normalizer 检测编码 - 3. 回退到 chardet 检测编码 - 4. 最终使用 UTF-8 + errors='replace' 兜底 - + Read text file, automatically detect encoding when UTF-8 fails. + + Multi-level fallback strategy: + 1. First try UTF-8 decode + 2. Use charset_normalizer to detect encoding + 3. Fall back to chardet for encoding detection + 4. Finally use UTF-8 + errors='replace' as fallback + Args: - file_path: 文件路径 - + file_path: File path + Returns: - 解码后的文本内容 + Decoded text content """ data = Path(file_path).read_bytes() - - # 首先尝试 UTF-8 + + # First try UTF-8 try: return data.decode('utf-8') except UnicodeDecodeError: pass - - # 尝试使用 charset_normalizer 检测编码 + + # Try charset_normalizer to detect encoding encoding = None try: from charset_normalizer import from_bytes @@ -41,8 +41,8 @@ def _read_text_with_fallback(file_path: str) -> str: encoding = best.encoding except Exception: pass - - # 回退到 chardet + + # Fall back to chardet if not encoding: try: import chardet @@ -50,140 +50,139 @@ def _read_text_with_fallback(file_path: str) -> str: encoding = result.get('encoding') if result else None except Exception: pass - - # 最终兜底:使用 UTF-8 + replace + + # Final fallback: use UTF-8 + replace if not encoding: encoding = 'utf-8' - + return data.decode(encoding, errors='replace') class FileParser: - """文件解析器""" - + """File parser""" + SUPPORTED_EXTENSIONS = {'.pdf', '.md', '.markdown', '.txt'} - + @classmethod def extract_text(cls, file_path: str) -> str: """ - 从文件中提取文本 - + Extract text from file + Args: - file_path: 文件路径 - + file_path: File path + Returns: - 提取的文本内容 + Extracted text content """ path = Path(file_path) - + if not path.exists(): - raise FileNotFoundError(f"文件不存在: {file_path}") - + raise FileNotFoundError(f"File does not exist: {file_path}") + suffix = path.suffix.lower() - + if suffix not in cls.SUPPORTED_EXTENSIONS: - raise ValueError(f"不支持的文件格式: {suffix}") - + raise ValueError(f"Unsupported file format: {suffix}") + if suffix == '.pdf': return cls._extract_from_pdf(file_path) elif suffix in {'.md', '.markdown'}: return cls._extract_from_md(file_path) elif suffix == '.txt': return cls._extract_from_txt(file_path) - - raise ValueError(f"无法处理的文件格式: {suffix}") - + + raise ValueError(f"Cannot process file format: {suffix}") + @staticmethod def _extract_from_pdf(file_path: str) -> str: - """从PDF提取文本""" + """Extract text from PDF""" try: import fitz # PyMuPDF except ImportError: - raise ImportError("需要安装PyMuPDF: pip install PyMuPDF") - + raise ImportError("PyMuPDF required: pip install PyMuPDF") + text_parts = [] with fitz.open(file_path) as doc: for page in doc: text = page.get_text() if text.strip(): text_parts.append(text) - + return "\n\n".join(text_parts) - + @staticmethod def _extract_from_md(file_path: str) -> str: - """从Markdown提取文本,支持自动编码检测""" + """Extract text from Markdown, supports automatic encoding detection""" return _read_text_with_fallback(file_path) - + @staticmethod def _extract_from_txt(file_path: str) -> str: - """从TXT提取文本,支持自动编码检测""" + """Extract text from TXT, supports automatic encoding detection""" return _read_text_with_fallback(file_path) - + @classmethod def extract_from_multiple(cls, file_paths: List[str]) -> str: """ - 从多个文件提取文本并合并 - + Extract text from multiple files and merge + Args: - file_paths: 文件路径列表 - + file_paths: List of file paths + Returns: - 合并后的文本 + Merged text """ all_texts = [] - + for i, file_path in enumerate(file_paths, 1): try: text = cls.extract_text(file_path) filename = Path(file_path).name - all_texts.append(f"=== 文档 {i}: {filename} ===\n{text}") + all_texts.append(f"=== Document {i}: {filename} ===\n{text}") except Exception as e: - all_texts.append(f"=== 文档 {i}: {file_path} (提取失败: {str(e)}) ===") - + all_texts.append(f"=== Document {i}: {file_path} (extraction failed: {str(e)}) ===") + return "\n\n".join(all_texts) def split_text_into_chunks( - text: str, - chunk_size: int = 500, + text: str, + chunk_size: int = 500, overlap: int = 50 ) -> List[str]: """ - 将文本分割成小块 - + Split text into chunks + Args: - text: 原始文本 - chunk_size: 每块的字符数 - overlap: 重叠字符数 - + text: Raw text + chunk_size: Character count per chunk + overlap: Overlap character count + Returns: - 文本块列表 + List of text chunks """ if len(text) <= chunk_size: return [text] if text.strip() else [] - + chunks = [] start = 0 - + while start < len(text): end = start + chunk_size - - # 尝试在句子边界处分割 + + # Try to split at sentence boundaries if end < len(text): - # 查找最近的句子结束符 + # Find nearest sentence separator for sep in ['。', '!', '?', '.\n', '!\n', '?\n', '\n\n', '. ', '! ', '? ']: last_sep = text[start:end].rfind(sep) if last_sep != -1 and last_sep > chunk_size * 0.3: end = start + last_sep + len(sep) break - + chunk = text[start:end].strip() if chunk: chunks.append(chunk) - - # 下一个块从重叠位置开始 + + # Next chunk starts from overlap position start = end - overlap if end < len(text) else len(text) - - return chunks + return chunks \ No newline at end of file diff --git a/backend/app/utils/logger.py b/backend/app/utils/logger.py index 1978c0b84..f57a9e1ad 100644 --- a/backend/app/utils/logger.py +++ b/backend/app/utils/logger.py @@ -1,6 +1,6 @@ """ -日志配置模块 -提供统一的日志管理,同时输出到控制台和文件 +Logging configuration module +Provides unified logging management, outputting to both console and file """ import os @@ -12,58 +12,58 @@ def _ensure_utf8_stdout(): """ - 确保 stdout/stderr 使用 UTF-8 编码 - 解决 Windows 控制台中文乱码问题 + Ensure stdout/stderr uses UTF-8 encoding + Fix Chinese character garbled issue on Windows console """ if sys.platform == 'win32': - # Windows 下重新配置标准输出为 UTF-8 + # Reconfigure stdout to UTF-8 on Windows if hasattr(sys.stdout, 'reconfigure'): sys.stdout.reconfigure(encoding='utf-8', errors='replace') if hasattr(sys.stderr, 'reconfigure'): sys.stderr.reconfigure(encoding='utf-8', errors='replace') -# 日志目录 +# Log directory LOG_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'logs') def setup_logger(name: str = 'mirofish', level: int = logging.DEBUG) -> logging.Logger: """ - 设置日志器 - + Setup logger + Args: - name: 日志器名称 - level: 日志级别 - + name: Logger name + level: Log level + Returns: - 配置好的日志器 + Configured logger """ - # 确保日志目录存在 + # Ensure log directory exists os.makedirs(LOG_DIR, exist_ok=True) - - # 创建日志器 + + # Create logger logger = logging.getLogger(name) logger.setLevel(level) - - # 阻止日志向上传播到根 logger,避免重复输出 + + # Prevent log propagation to root logger to avoid duplicate output logger.propagate = False - - # 如果已经有处理器,不重复添加 + + # If handlers already exist, don't add again if logger.handlers: return logger - - # 日志格式 + + # Log format detailed_formatter = logging.Formatter( '[%(asctime)s] %(levelname)s [%(name)s.%(funcName)s:%(lineno)d] %(message)s', datefmt='%Y-%m-%d %H:%M:%S' ) - + simple_formatter = logging.Formatter( '[%(asctime)s] %(levelname)s: %(message)s', datefmt='%H:%M:%S' ) - - # 1. 文件处理器 - 详细日志(按日期命名,带轮转) + + # 1. File handler - detailed log (date-named, with rotation) log_filename = datetime.now().strftime('%Y-%m-%d') + '.log' file_handler = RotatingFileHandler( os.path.join(LOG_DIR, log_filename), @@ -73,30 +73,30 @@ def setup_logger(name: str = 'mirofish', level: int = logging.DEBUG) -> logging. ) file_handler.setLevel(logging.DEBUG) file_handler.setFormatter(detailed_formatter) - - # 2. 控制台处理器 - 简洁日志(INFO及以上) - # 确保 Windows 下使用 UTF-8 编码,避免中文乱码 + + # 2. Console handler - simple log (INFO and above) + # Ensure UTF-8 encoding on Windows to avoid Chinese character garbling _ensure_utf8_stdout() console_handler = logging.StreamHandler(sys.stdout) console_handler.setLevel(logging.INFO) console_handler.setFormatter(simple_formatter) - - # 添加处理器 + + # Add handlers logger.addHandler(file_handler) logger.addHandler(console_handler) - + return logger def get_logger(name: str = 'mirofish') -> logging.Logger: """ - 获取日志器(如果不存在则创建) - + Get logger (create if doesn't exist) + Args: - name: 日志器名称 - + name: Logger name + Returns: - 日志器实例 + Logger instance """ logger = logging.getLogger(name) if not logger.handlers: @@ -104,11 +104,11 @@ def get_logger(name: str = 'mirofish') -> logging.Logger: return logger -# 创建默认日志器 +# Create default logger logger = setup_logger() -# 便捷方法 +# Convenience methods def debug(msg, *args, **kwargs): logger.debug(msg, *args, **kwargs) @@ -122,5 +122,4 @@ def error(msg, *args, **kwargs): logger.error(msg, *args, **kwargs) def critical(msg, *args, **kwargs): - logger.critical(msg, *args, **kwargs) - + logger.critical(msg, *args, **kwargs) \ No newline at end of file diff --git a/frontend/src/App.vue b/frontend/src/App.vue index b7cd71ca6..b006247f2 100644 --- a/frontend/src/App.vue +++ b/frontend/src/App.vue @@ -3,11 +3,11 @@ + \ No newline at end of file diff --git a/frontend/src/views/InteractionView.vue b/frontend/src/views/InteractionView.vue index b153590d7..7bfd1085a 100644 --- a/frontend/src/views/InteractionView.vue +++ b/frontend/src/views/InteractionView.vue @@ -15,7 +15,7 @@ :class="{ active: viewMode === mode }" @click="viewMode = mode" > - {{ { graph: '图谱', split: '双栏', workbench: '工作台' }[mode] }} + {{ { graph: 'Graph', split: 'Split', workbench: 'Workbench' }[mode] }} @@ -23,7 +23,7 @@
Step 5/5 - 深度互动 + Deep Interaction
@@ -47,7 +47,7 @@ />
- +
{ // --- Data Logic --- const loadReportData = async () => { try { - addLog(`加载报告数据: ${currentReportId.value}`) - - // 获取 report 信息以获取 simulation_id + addLog(`Loading report data: ${currentReportId.value}`) + + // Get report info to get simulation_id const reportRes = await getReport(currentReportId.value) if (reportRes.success && reportRes.data) { const reportData = reportRes.data simulationId.value = reportData.simulation_id if (simulationId.value) { - // 获取 simulation 信息 + // Get simulation info const simRes = await getSimulation(simulationId.value) if (simRes.success && simRes.data) { const simData = simRes.data - - // 获取 project 信息 + + // Get project info if (simData.project_id) { const projRes = await getProject(simData.project_id) if (projRes.success && projRes.data) { projectData.value = projRes.data - addLog(`项目加载成功: ${projRes.data.project_id}`) - - // 获取 graph 数据 + addLog(`Project loaded successfully: ${projRes.data.project_id}`) + + // Get graph data if (projRes.data.graph_id) { await loadGraph(projRes.data.graph_id) } @@ -170,10 +170,10 @@ const loadReportData = async () => { } } } else { - addLog(`获取报告信息失败: ${reportRes.error || '未知错误'}`) + addLog(`Failed to get report info: ${reportRes.error || 'Unknown error'}`) } } catch (err) { - addLog(`加载异常: ${err.message}`) + addLog(`Load error: ${err.message}`) } } @@ -184,10 +184,10 @@ const loadGraph = async (graphId) => { const res = await getGraphData(graphId) if (res.success) { graphData.value = res.data - addLog('图谱数据加载成功') + addLog('Graph data loaded successfully') } } catch (err) { - addLog(`图谱加载失败: ${err.message}`) + addLog(`Graph load failed: ${err.message}`) } finally { graphLoading.value = false } @@ -208,7 +208,7 @@ watch(() => route.params.reportId, (newId) => { }, { immediate: true }) onMounted(() => { - addLog('InteractionView 初始化') + addLog('InteractionView initialized') loadReportData() }) diff --git a/frontend/src/views/MainView.vue b/frontend/src/views/MainView.vue index 6ff299112..f05933b0a 100644 --- a/frontend/src/views/MainView.vue +++ b/frontend/src/views/MainView.vue @@ -8,14 +8,14 @@
-
@@ -48,8 +48,8 @@
- - + - + { const handleNextStep = (params = {}) => { if (currentStep.value < 5) { currentStep.value++ - addLog(`进入 Step ${currentStep.value}: ${stepNames[currentStep.value - 1]}`) - - // 如果是从 Step 2 进入 Step 3,记录模拟轮数配置 + addLog(`Entering Step ${currentStep.value}: ${stepNames[currentStep.value - 1]}`) + + // If entering Step 3 from Step 2, record simulation rounds config if (currentStep.value === 3 && params.maxRounds) { - addLog(`自定义模拟轮数: ${params.maxRounds} 轮`) + addLog(`Custom simulation rounds: ${params.maxRounds} rounds`) } } } @@ -171,7 +171,7 @@ const handleNextStep = (params = {}) => { const handleGoBack = () => { if (currentStep.value > 1) { currentStep.value-- - addLog(`返回 Step ${currentStep.value}: ${stepNames[currentStep.value - 1]}`) + addLog(`Returning to Step ${currentStep.value}: ${stepNames[currentStep.value - 1]}`) } } diff --git a/frontend/src/views/Process.vue b/frontend/src/views/Process.vue index 2d2d3cc1a..b93f82bcc 100644 --- a/frontend/src/views/Process.vue +++ b/frontend/src/views/Process.vue @@ -1,13 +1,13 @@