From a301bb09c0cb515a03022120691f302f7cc9edc0 Mon Sep 17 00:00:00 2001 From: Anurag Date: Thu, 26 Mar 2026 05:00:38 +0530 Subject: [PATCH 01/13] feat: refresh UX copy and harden graph builds --- .env.example | 20 +- Dockerfile | 14 +- README-EN.md | 24 +- README.md | 152 ++++---- backend/app/api/graph.py | 94 ++--- backend/app/models/task.py | 5 +- backend/app/services/graph_builder.py | 15 +- backend/app/services/ontology_generator.py | 10 +- backend/app/services/simulation_manager.py | 36 +- backend/app/utils/ontology_normalizer.py | 119 ++++++ backend/pyproject.toml | 14 +- backend/requirements.txt | 20 +- backend/tests/test_ontology_normalizer.py | 38 ++ docker-compose.yml | 9 +- frontend/index.html | 6 +- frontend/package-lock.json | 4 - frontend/src/App.vue | 8 +- frontend/src/api/graph.js | 20 +- frontend/src/api/index.js | 16 +- frontend/src/api/report.js | 16 +- frontend/src/api/simulation.js | 59 ++- frontend/src/components/GraphPanel.vue | 14 +- frontend/src/components/HistoryDatabase.vue | 42 +-- frontend/src/components/Step1GraphBuild.vue | 44 +-- frontend/src/components/Step2EnvSetup.vue | 349 ++++++++---------- frontend/src/components/Step3Simulation.vue | 54 ++- frontend/src/components/Step4Report.vue | 202 +++++----- frontend/src/components/Step5Interaction.vue | 116 +++--- frontend/src/store/pendingUpload.js | 4 +- frontend/src/views/Home.vue | 70 ++-- frontend/src/views/InteractionView.vue | 30 +- frontend/src/views/MainView.vue | 18 +- frontend/src/views/Process.vue | 140 +++---- frontend/src/views/ReportView.vue | 30 +- frontend/src/views/SimulationRunView.vue | 87 +++-- frontend/src/views/SimulationView.vue | 85 +++-- package.json | 2 +- .../image/Screenshot/screenshot-1.png | Bin .../image/Screenshot/screenshot-2.png | Bin .../image/Screenshot/screenshot-3.png | Bin .../image/Screenshot/screenshot-4.png | Bin .../image/Screenshot/screenshot-5.png | Bin .../image/Screenshot/screenshot-6.png | Bin .../image/dream-of-red-chamber-cover.jpg | Bin .../image/qq-group.png | Bin .../image/wuhan-demo-cover.png | Bin 46 files changed, 1066 insertions(+), 920 deletions(-) create mode 100644 backend/app/utils/ontology_normalizer.py create mode 100644 backend/tests/test_ontology_normalizer.py rename "static/image/Screenshot/\350\277\220\350\241\214\346\210\252\345\233\2761.png" => static/image/Screenshot/screenshot-1.png (100%) rename "static/image/Screenshot/\350\277\220\350\241\214\346\210\252\345\233\2762.png" => static/image/Screenshot/screenshot-2.png (100%) rename "static/image/Screenshot/\350\277\220\350\241\214\346\210\252\345\233\2763.png" => static/image/Screenshot/screenshot-3.png (100%) rename "static/image/Screenshot/\350\277\220\350\241\214\346\210\252\345\233\2764.png" => static/image/Screenshot/screenshot-4.png (100%) rename "static/image/Screenshot/\350\277\220\350\241\214\346\210\252\345\233\2765.png" => static/image/Screenshot/screenshot-5.png (100%) rename "static/image/Screenshot/\350\277\220\350\241\214\346\210\252\345\233\2766.png" => static/image/Screenshot/screenshot-6.png (100%) rename "static/image/\347\272\242\346\245\274\346\242\246\346\250\241\346\213\237\346\216\250\346\274\224\345\260\201\351\235\242.jpg" => static/image/dream-of-red-chamber-cover.jpg (100%) rename "static/image/QQ\347\276\244.png" => static/image/qq-group.png (100%) rename "static/image/\346\255\246\345\244\247\346\250\241\346\213\237\346\274\224\347\244\272\345\260\201\351\235\242.png" => static/image/wuhan-demo-cover.png (100%) diff --git a/.env.example b/.env.example index 78a3b72c0..4e2a83405 100644 --- a/.env.example +++ b/.env.example @@ -1,16 +1,16 @@ -# LLM API配置(支持 OpenAI SDK 格式的任意 LLM API) -# 推荐使用阿里百炼平台qwen-plus模型:https://bailian.console.aliyun.com/ -# 注意消耗较大,可先进行小于40轮的模拟尝试 +# LLM API configuration (supports any LLM API compatible with the OpenAI SDK format) +# Recommended: use the qwen-plus model on Alibaba Bailian: https://bailian.console.aliyun.com/ +# Note: usage can be expensive, so try simulations with fewer than 40 rounds first LLM_API_KEY=your_api_key_here -LLM_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1 -LLM_MODEL_NAME=qwen-plus +LLM_BASE_URL=https://api.openai.com/v1 +LLM_MODEL_NAME=gpt-4o -# ===== ZEP记忆图谱配置 ===== -# 每月免费额度即可支撑简单使用:https://app.getzep.com/ +# ===== ZEP memory graph configuration ===== +# The free monthly quota is enough for basic usage: https://app.getzep.com/ ZEP_API_KEY=your_zep_api_key_here -# ===== 加速 LLM 配置(可选)===== -# 注意如果不使用加速配置,env文件中就不要出现下面的配置项 +# ===== Accelerated LLM configuration (optional) ===== +# If you are not using accelerated configuration, do not include the fields below in your env file LLM_BOOST_API_KEY=your_api_key_here LLM_BOOST_BASE_URL=your_base_url_here -LLM_BOOST_MODEL_NAME=your_model_name_here \ No newline at end of file +LLM_BOOST_MODEL_NAME=your_model_name_here diff --git a/Dockerfile b/Dockerfile index e65646860..6a73d0a4b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,29 +1,29 @@ FROM python:3.11 -# 安装 Node.js (满足 >=18)及必要工具 +# Install Node.js (version 18 or later) and required tools RUN apt-get update \ && apt-get install -y --no-install-recommends nodejs npm \ && rm -rf /var/lib/apt/lists/* -# 从 uv 官方镜像复制 uv +# Copy `uv` from the official uv image COPY --from=ghcr.io/astral-sh/uv:0.9.26 /uv /uvx /bin/ WORKDIR /app -# 先复制依赖描述文件以利用缓存 +# Copy dependency manifests first to take advantage of layer caching COPY package.json package-lock.json ./ COPY frontend/package.json frontend/package-lock.json ./frontend/ COPY backend/pyproject.toml backend/uv.lock ./backend/ -# 安装依赖(Node + Python) +# Install dependencies (Node + Python) RUN npm ci \ && npm ci --prefix frontend \ && cd backend && uv sync --frozen -# 复制项目源码 +# Copy the project source COPY . . EXPOSE 3000 5001 -# 同时启动前后端(开发模式) -CMD ["npm", "run", "dev"] \ No newline at end of file +# Start both frontend and backend services (development mode) +CMD ["npm", "run", "dev"] diff --git a/README-EN.md b/README-EN.md index 4b003a63f..023f304a9 100644 --- a/README-EN.md +++ b/README-EN.md @@ -4,7 +4,7 @@ 666ghj%2FMiroFish | Trendshift -简洁通用的群体智能引擎,预测万物 +A simple, universal swarm intelligence engine for predicting anything
A Simple and Universal Swarm Intelligence Engine, Predicting Anything @@ -20,7 +20,7 @@ [![X](https://img.shields.io/badge/X-Follow-000000?style=flat-square&logo=x&logoColor=white)](https://x.com/mirofish_ai) [![Instagram](https://img.shields.io/badge/Instagram-Follow-E4405F?style=flat-square&logo=instagram&logoColor=white)](https://www.instagram.com/mirofish_ai/) -[English](./README-EN.md) | [中文文档](./README.md) +[README](./README.md) | [English Copy](./README-EN.md) @@ -49,16 +49,16 @@ Welcome to visit our online demo environment and experience a prediction simulat
- - + + - - + + - - + +
Screenshot 1Screenshot 2Screenshot 1Screenshot 2
Screenshot 3Screenshot 4Screenshot 3Screenshot 4
Screenshot 5Screenshot 6Screenshot 5Screenshot 6
@@ -68,7 +68,7 @@ Welcome to visit our online demo environment and experience a prediction simulat ### 1. Wuhan University Public Opinion Simulation + MiroFish Project Introduction
-MiroFish Demo Video +MiroFish Demo Video Click the image to watch the complete demo video for prediction using BettaFish-generated "Wuhan University Public Opinion Report"
@@ -76,7 +76,7 @@ Click the image to watch the complete demo video for prediction using BettaFish- ### 2. Dream of the Red Chamber Lost Ending Simulation
-MiroFish Demo Video +MiroFish Demo Video Click the image to watch MiroFish's deep prediction of the lost ending based on hundreds of thousands of words from the first 80 chapters of "Dream of the Red Chamber"
@@ -179,7 +179,7 @@ Reads `.env` from root directory by default, maps ports `3000 (frontend) / 5001 ## 📬 Join the Conversation
-QQ Group +QQ Group
  @@ -200,4 +200,4 @@ MiroFish's simulation engine is powered by **[OASIS (Open Agent Social Interacti Star History Chart - \ No newline at end of file + diff --git a/README.md b/README.md index 4f5cffe74..0b1763b75 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ 666ghj%2FMiroFish | Trendshift -简洁通用的群体智能引擎,预测万物 +A simple, universal swarm intelligence engine for predicting anything
A Simple and Universal Swarm Intelligence Engine, Predicting Anything @@ -20,179 +20,179 @@ [![X](https://img.shields.io/badge/X-Follow-000000?style=flat-square&logo=x&logoColor=white)](https://x.com/mirofish_ai) [![Instagram](https://img.shields.io/badge/Instagram-Follow-E4405F?style=flat-square&logo=instagram&logoColor=white)](https://www.instagram.com/mirofish_ai/) -[English](./README-EN.md) | [中文文档](./README.md) +[README](./README.md) | [English Copy](./README-EN.md) -## ⚡ 项目概述 +## ⚡ Overview -**MiroFish** 是一款基于多智能体技术的新一代 AI 预测引擎。通过提取现实世界的种子信息(如突发新闻、政策草案、金融信号),自动构建出高保真的平行数字世界。在此空间内,成千上万个具备独立人格、长期记忆与行为逻辑的智能体进行自由交互与社会演化。你可透过「上帝视角」动态注入变量,精准推演未来走向——**让未来在数字沙盘中预演,助决策在百战模拟后胜出**。 +**MiroFish** is a next-generation AI prediction engine powered by multi-agent technology. By extracting seed information from the real world (such as breaking news, policy drafts, or financial signals), it automatically constructs a high-fidelity parallel digital world. Within this space, thousands of intelligent agents with independent personalities, long-term memory, and behavioral logic freely interact and undergo social evolution. You can inject variables dynamically from a "God's-eye view" to precisely deduce future trajectories — **rehearse the future in a digital sandbox, and win decisions after countless simulations**. -> 你只需:上传种子材料(数据分析报告或者有趣的小说故事),并用自然语言描述预测需求
-> MiroFish 将返回:一份详尽的预测报告,以及一个可深度交互的高保真数字世界 +> You only need to: upload seed materials (data analysis reports or interesting novel stories) and describe your prediction requirements in natural language
+> MiroFish will return: a detailed prediction report and a deeply interactive high-fidelity digital world -### 我们的愿景 +### Our Vision -MiroFish 致力于打造映射现实的群体智能镜像,通过捕捉个体互动引发的群体涌现,突破传统预测的局限: +MiroFish is dedicated to creating a swarm intelligence mirror that maps reality. By capturing the collective emergence triggered by individual interactions, we break through the limitations of traditional prediction: -- **于宏观**:我们是决策者的预演实验室,让政策与公关在零风险中试错 -- **于微观**:我们是个人用户的创意沙盘,无论是推演小说结局还是探索脑洞,皆可有趣、好玩、触手可及 +- **At the Macro Level**: We are a rehearsal laboratory for decision-makers, allowing policies and public relations to be tested at zero risk +- **At the Micro Level**: We are a creative sandbox for individual users, whether deducing novel endings or exploring imaginative scenarios, everything can be fun, playful, and accessible -从严肃预测到趣味仿真,我们让每一个如果都能看见结果,让预测万物成为可能。 +From serious predictions to playful simulations, we let every "what if" see its outcome, making it possible to predict anything. -## 🌐 在线体验 +## 🌐 Live Demo -欢迎访问在线 Demo 演示环境,体验我们为你准备的一次关于热点舆情事件的推演预测:[mirofish-live-demo](https://666ghj.github.io/mirofish-demo/) +Visit our online demo environment and experience a prediction simulation around a trending public-opinion event: [mirofish-live-demo](https://666ghj.github.io/mirofish-demo/) -## 📸 系统截图 +## 📸 Screenshots
- - + + - - + + - - + +
截图1截图2Screenshot 1Screenshot 2
截图3截图4Screenshot 3Screenshot 4
截图5截图6Screenshot 5Screenshot 6
-## 🎬 演示视频 +## 🎬 Demo Videos -### 1. 武汉大学舆情推演预测 + MiroFish项目讲解 +### 1. Wuhan University Public Opinion Simulation + MiroFish Project Introduction
-MiroFish Demo Video +MiroFish Demo Video -点击图片查看使用微舆BettaFish生成的《武大舆情报告》进行预测的完整演示视频 +Click the image to watch the complete demo video for prediction using the BettaFish-generated "Wuhan University Public Opinion Report."
-### 2. 《红楼梦》失传结局推演预测 +### 2. Dream of the Red Chamber Lost Ending Simulation
-MiroFish Demo Video +MiroFish Demo Video -点击图片查看基于《红楼梦》前80回数十万字,MiroFish深度预测失传结局 +Click the image to watch MiroFish predict the lost ending based on the first 80 chapters of *Dream of the Red Chamber*.
-> **金融方向推演预测**、**时政要闻推演预测**等示例陆续更新中... +> **Financial prediction**, **current-events forecasting**, and more examples are coming soon. -## 🔄 工作流程 +## 🔄 Workflow -1. **图谱构建**:现实种子提取 & 个体与群体记忆注入 & GraphRAG构建 -2. **环境搭建**:实体关系抽取 & 人设生成 & 环境配置Agent注入仿真参数 -3. **开始模拟**:双平台并行模拟 & 自动解析预测需求 & 动态更新时序记忆 -4. **报告生成**:ReportAgent拥有丰富的工具集与模拟后环境进行深度交互 -5. **深度互动**:与模拟世界中的任意一位进行对话 & 与ReportAgent进行对话 +1. **Graph Building**: Seed extraction, individual and collective memory injection, and GraphRAG construction +2. **Environment Setup**: Entity relationship extraction, persona generation, and agent configuration injection +3. **Simulation**: Dual-platform parallel simulation, automatic prediction-requirement parsing, and dynamic temporal memory updates +4. **Report Generation**: ReportAgent uses a rich toolset to interact deeply with the post-simulation environment +5. **Deep Interaction**: Chat with any agent in the simulated world and continue the conversation with ReportAgent -## 🚀 快速开始 +## 🚀 Quick Start -### 一、源码部署(推荐) +### Option 1: Source Deployment (Recommended) -#### 前置要求 +#### Prerequisites -| 工具 | 版本要求 | 说明 | 安装检查 | -|------|---------|------|---------| -| **Node.js** | 18+ | 前端运行环境,包含 npm | `node -v` | -| **Python** | ≥3.11, ≤3.12 | 后端运行环境 | `python --version` | -| **uv** | 最新版 | Python 包管理器 | `uv --version` | +| Tool | Version | Description | Check Installation | +|------|---------|-------------|-------------------| +| **Node.js** | 18+ | Frontend runtime, includes npm | `node -v` | +| **Python** | ≥3.11, ≤3.12 | Backend runtime | `python --version` | +| **uv** | Latest | Python package manager | `uv --version` | -#### 1. 配置环境变量 +#### 1. Configure Environment Variables ```bash -# 复制示例配置文件 +# Copy the example configuration file cp .env.example .env -# 编辑 .env 文件,填入必要的 API 密钥 +# Edit the .env file and fill in the required API keys ``` -**必需的环境变量:** +**Required Environment Variables:** ```env -# LLM API配置(支持 OpenAI SDK 格式的任意 LLM API) -# 推荐使用阿里百炼平台qwen-plus模型:https://bailian.console.aliyun.com/ -# 注意消耗较大,可先进行小于40轮的模拟尝试 +# LLM API configuration (supports any LLM API compatible with the OpenAI SDK format) +# Recommended: use the qwen-plus model on Alibaba Bailian: https://bailian.console.aliyun.com/ +# Note: usage can be expensive, so try simulations with fewer than 40 rounds first LLM_API_KEY=your_api_key LLM_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1 LLM_MODEL_NAME=qwen-plus -# Zep Cloud 配置 -# 每月免费额度即可支撑简单使用:https://app.getzep.com/ +# ZEP memory graph configuration +# The free monthly quota is enough for basic usage: https://app.getzep.com/ ZEP_API_KEY=your_zep_api_key ``` -#### 2. 安装依赖 +#### 2. Install Dependencies ```bash -# 一键安装所有依赖(根目录 + 前端 + 后端) +# One-click installation of all dependencies (root + frontend + backend) npm run setup:all ``` -或者分步安装: +Or install them step by step: ```bash -# 安装 Node 依赖(根目录 + 前端) +# Install Node dependencies (root + frontend) npm run setup -# 安装 Python 依赖(后端,自动创建虚拟环境) +# Install Python dependencies (backend, auto-creates virtual environment) npm run setup:backend ``` -#### 3. 启动服务 +#### 3. Start Services ```bash -# 同时启动前后端(在项目根目录执行) +# Start both frontend and backend (run from the project root) npm run dev ``` -**服务地址:** -- 前端:`http://localhost:3000` -- 后端 API:`http://localhost:5001` +**Service URLs:** +- Frontend: `http://localhost:3000` +- Backend API: `http://localhost:5001` -**单独启动:** +**Start Individually:** ```bash -npm run backend # 仅启动后端 -npm run frontend # 仅启动前端 +npm run backend # Start the backend only +npm run frontend # Start the frontend only ``` -### 二、Docker 部署 +### Option 2: Docker Deployment ```bash -# 1. 配置环境变量(同源码部署) +# 1. Configure environment variables (same as source deployment) cp .env.example .env -# 2. 拉取镜像并启动 +# 2. Pull the image and start docker compose up -d ``` -默认会读取根目录下的 `.env`,并映射端口 `3000(前端)/5001(后端)` +Docker reads `.env` from the project root by default and maps ports `3000 (frontend) / 5001 (backend)`. -> 在 `docker-compose.yml` 中已通过注释提供加速镜像地址,可按需替换 +> A mirror image URL is provided as a comment in `docker-compose.yml` if you need a faster pull source. -## 📬 更多交流 +## 📬 Join the Conversation
-QQ交流群 +QQ Group
  -MiroFish团队长期招募全职/实习,如果你对多Agent应用感兴趣,欢迎投递简历至:**mirofish@shanda.com** +The MiroFish team is recruiting for full-time and internship roles. If you are interested in multi-agent simulation and LLM applications, send your resume to: **mirofish@shanda.com** -## 📄 致谢 +## 📄 Acknowledgments -**MiroFish 得到了盛大集团的战略支持和孵化!** +**MiroFish has received strategic support and incubation from Shanda Group.** -MiroFish 的仿真引擎由 **[OASIS](https://github.com/camel-ai/oasis)** 驱动,我们衷心感谢 CAMEL-AI 团队的开源贡献! +MiroFish's simulation engine is powered by **[OASIS](https://github.com/camel-ai/oasis)**, and we sincerely thank the CAMEL-AI team for their open-source contributions. -## 📈 项目统计 +## 📈 Project Statistics diff --git a/backend/app/api/graph.py b/backend/app/api/graph.py index 12ff1ba2d..053fbbb60 100644 --- a/backend/app/api/graph.py +++ b/backend/app/api/graph.py @@ -42,7 +42,7 @@ def get_project(project_id: str): if not project: return jsonify({ "success": False, - "error": f"项目不存在: {project_id}" + "error": f"Project not found: {project_id}" }), 404 return jsonify({ @@ -76,12 +76,12 @@ def delete_project(project_id: str): if not success: return jsonify({ "success": False, - "error": f"项目不存在或删除失败: {project_id}" + "error": f"Project not found or could not be deleted: {project_id}" }), 404 return jsonify({ "success": True, - "message": f"项目已删除: {project_id}" + "message": f"Project deleted: {project_id}" }) @@ -95,7 +95,7 @@ def reset_project(project_id: str): if not project: return jsonify({ "success": False, - "error": f"项目不存在: {project_id}" + "error": f"Project not found: {project_id}" }), 404 # 重置到本体已生成状态 @@ -111,7 +111,7 @@ def reset_project(project_id: str): return jsonify({ "success": True, - "message": f"项目已重置: {project_id}", + "message": f"Project reset: {project_id}", "data": project.to_dict() }) @@ -147,20 +147,20 @@ def generate_ontology(): } """ try: - logger.info("=== 开始生成本体定义 ===") + logger.info("=== Starting ontology generation ===") # 获取参数 simulation_requirement = request.form.get('simulation_requirement', '') project_name = request.form.get('project_name', 'Unnamed Project') additional_context = request.form.get('additional_context', '') - logger.debug(f"项目名称: {project_name}") - logger.debug(f"模拟需求: {simulation_requirement[:100]}...") + logger.debug(f"Project name: {project_name}") + logger.debug(f"Simulation requirement: {simulation_requirement[:100]}...") if not simulation_requirement: return jsonify({ "success": False, - "error": "请提供模拟需求描述 (simulation_requirement)" + "error": "Please provide a simulation requirement description (simulation_requirement)." }), 400 # 获取上传的文件 @@ -168,13 +168,13 @@ def generate_ontology(): if not uploaded_files or all(not f.filename for f in uploaded_files): return jsonify({ "success": False, - "error": "请至少上传一个文档文件" + "error": "Please upload at least one document file." }), 400 # 创建项目 project = ProjectManager.create_project(name=project_name) project.simulation_requirement = simulation_requirement - logger.info(f"创建项目: {project.project_id}") + logger.info(f"Created project: {project.project_id}") # 保存文件并提取文本 document_texts = [] @@ -203,16 +203,16 @@ def generate_ontology(): ProjectManager.delete_project(project.project_id) return jsonify({ "success": False, - "error": "没有成功处理任何文档,请检查文件格式" + "error": "No documents were processed successfully. Please check the file format." }), 400 # 保存提取的文本 project.total_text_length = len(all_text) ProjectManager.save_extracted_text(project.project_id, all_text) - logger.info(f"文本提取完成,共 {len(all_text)} 字符") + logger.info(f"Text extraction completed: {len(all_text)} characters") # 生成本体 - logger.info("调用 LLM 生成本体定义...") + logger.info("Calling the LLM to generate the ontology...") generator = OntologyGenerator() ontology = generator.generate( document_texts=document_texts, @@ -223,7 +223,7 @@ def generate_ontology(): # 保存本体到项目 entity_count = len(ontology.get("entity_types", [])) edge_count = len(ontology.get("edge_types", [])) - logger.info(f"本体生成完成: {entity_count} 个实体类型, {edge_count} 个关系类型") + logger.info(f"Ontology generation completed: {entity_count} entity types, {edge_count} edge types") project.ontology = { "entity_types": ontology.get("entity_types", []), @@ -232,7 +232,7 @@ def generate_ontology(): project.analysis_summary = ontology.get("analysis_summary", "") project.status = ProjectStatus.ONTOLOGY_GENERATED ProjectManager.save_project(project) - logger.info(f"=== 本体生成完成 === 项目ID: {project.project_id}") + logger.info(f"=== Ontology generation completed === project_id: {project.project_id}") return jsonify({ "success": True, @@ -275,33 +275,33 @@ def build_graph(): "data": { "project_id": "proj_xxxx", "task_id": "task_xxxx", - "message": "图谱构建任务已启动" + "message": "Graph build task started" } } """ try: - logger.info("=== 开始构建图谱 ===") + logger.info("=== Starting graph build ===") # 检查配置 errors = [] if not Config.ZEP_API_KEY: - errors.append("ZEP_API_KEY未配置") + errors.append("ZEP_API_KEY is not configured") if errors: - logger.error(f"配置错误: {errors}") + logger.error(f"Configuration error: {errors}") return jsonify({ "success": False, - "error": "配置错误: " + "; ".join(errors) + "error": "Configuration error: " + "; ".join(errors) }), 500 # 解析请求 data = request.get_json() or {} project_id = data.get('project_id') - logger.debug(f"请求参数: project_id={project_id}") + logger.debug(f"Request parameters: project_id={project_id}") if not project_id: return jsonify({ "success": False, - "error": "请提供 project_id" + "error": "Please provide project_id." }), 400 # 获取项目 @@ -309,7 +309,7 @@ def build_graph(): if not project: return jsonify({ "success": False, - "error": f"项目不存在: {project_id}" + "error": f"Project not found: {project_id}" }), 404 # 检查项目状态 @@ -318,13 +318,13 @@ def build_graph(): if project.status == ProjectStatus.CREATED: return jsonify({ "success": False, - "error": "项目尚未生成本体,请先调用 /ontology/generate" + "error": "The project does not have an ontology yet. Call /ontology/generate first." }), 400 if project.status == ProjectStatus.GRAPH_BUILDING and not force: return jsonify({ "success": False, - "error": "图谱正在构建中,请勿重复提交。如需强制重建,请添加 force: true", + "error": "A graph build is already in progress. To force a rebuild, set force: true.", "task_id": project.graph_build_task_id }), 400 @@ -349,7 +349,7 @@ def build_graph(): if not text: return jsonify({ "success": False, - "error": "未找到提取的文本内容" + "error": "Extracted text content was not found." }), 400 # 获取本体 @@ -357,13 +357,13 @@ def build_graph(): if not ontology: return jsonify({ "success": False, - "error": "未找到本体定义" + "error": "Ontology definition was not found." }), 400 # 创建异步任务 task_manager = TaskManager() - task_id = task_manager.create_task(f"构建图谱: {graph_name}") - logger.info(f"创建图谱构建任务: task_id={task_id}, project_id={project_id}") + task_id = task_manager.create_task(f"Build graph: {graph_name}") + logger.info(f"Created graph build task: task_id={task_id}, project_id={project_id}") # 更新项目状态 project.status = ProjectStatus.GRAPH_BUILDING @@ -374,11 +374,11 @@ def build_graph(): def build_task(): build_logger = get_logger('mirofish.build') try: - build_logger.info(f"[{task_id}] 开始构建图谱...") + build_logger.info(f"[{task_id}] Starting graph build...") task_manager.update_task( task_id, status=TaskStatus.PROCESSING, - message="初始化图谱构建服务..." + message="Initializing the graph build service..." ) # 创建图谱构建服务 @@ -387,7 +387,7 @@ def build_task(): # 分块 task_manager.update_task( task_id, - message="文本分块中...", + message="Chunking text...", progress=5 ) chunks = TextProcessor.split_text( @@ -400,7 +400,7 @@ def build_task(): # 创建图谱 task_manager.update_task( task_id, - message="创建Zep图谱...", + message="Creating the Zep graph...", progress=10 ) graph_id = builder.create_graph(name=graph_name) @@ -412,7 +412,7 @@ def build_task(): # 设置本体 task_manager.update_task( task_id, - message="设置本体定义...", + message="Applying the ontology definition...", progress=15 ) builder.set_ontology(graph_id, ontology) @@ -428,7 +428,7 @@ def add_progress_callback(msg, progress_ratio): task_manager.update_task( task_id, - message=f"开始添加 {total_chunks} 个文本块...", + message=f"Adding {total_chunks} text chunks...", progress=15 ) @@ -442,7 +442,7 @@ def add_progress_callback(msg, progress_ratio): # 等待Zep处理完成(查询每个episode的processed状态) task_manager.update_task( task_id, - message="等待Zep处理数据...", + message="Waiting for Zep to process the data...", progress=55 ) @@ -459,7 +459,7 @@ def wait_progress_callback(msg, progress_ratio): # 获取图谱数据 task_manager.update_task( task_id, - message="获取图谱数据...", + message="Fetching graph data...", progress=95 ) graph_data = builder.get_graph_data(graph_id) @@ -470,13 +470,13 @@ def wait_progress_callback(msg, progress_ratio): node_count = graph_data.get("node_count", 0) edge_count = graph_data.get("edge_count", 0) - build_logger.info(f"[{task_id}] 图谱构建完成: graph_id={graph_id}, 节点={node_count}, 边={edge_count}") + build_logger.info(f"[{task_id}] Graph build completed: graph_id={graph_id}, nodes={node_count}, edges={edge_count}") # 完成 task_manager.update_task( task_id, status=TaskStatus.COMPLETED, - message="图谱构建完成", + message="Graph build completed", progress=100, result={ "project_id": project_id, @@ -489,7 +489,7 @@ def wait_progress_callback(msg, progress_ratio): except Exception as e: # 更新项目状态为失败 - build_logger.error(f"[{task_id}] 图谱构建失败: {str(e)}") + build_logger.error(f"[{task_id}] Graph build failed: {str(e)}") build_logger.debug(traceback.format_exc()) project.status = ProjectStatus.FAILED @@ -499,7 +499,7 @@ def wait_progress_callback(msg, progress_ratio): task_manager.update_task( task_id, status=TaskStatus.FAILED, - message=f"构建失败: {str(e)}", + message=f"Build failed: {str(e)}", error=traceback.format_exc() ) @@ -512,7 +512,7 @@ def wait_progress_callback(msg, progress_ratio): "data": { "project_id": project_id, "task_id": task_id, - "message": "图谱构建任务已启动,请通过 /task/{task_id} 查询进度" + "message": "Graph build task started. Check progress via /task/{task_id}." } }) @@ -536,7 +536,7 @@ def get_task(task_id: str): if not task: return jsonify({ "success": False, - "error": f"任务不存在: {task_id}" + "error": f"Task not found: {task_id}" }), 404 return jsonify({ @@ -570,7 +570,7 @@ def get_graph_data(graph_id: str): if not Config.ZEP_API_KEY: return jsonify({ "success": False, - "error": "ZEP_API_KEY未配置" + "error": "ZEP_API_KEY is not configured" }), 500 builder = GraphBuilderService(api_key=Config.ZEP_API_KEY) @@ -598,7 +598,7 @@ def delete_graph(graph_id: str): if not Config.ZEP_API_KEY: return jsonify({ "success": False, - "error": "ZEP_API_KEY未配置" + "error": "ZEP_API_KEY is not configured" }), 500 builder = GraphBuilderService(api_key=Config.ZEP_API_KEY) @@ -606,7 +606,7 @@ def delete_graph(graph_id: str): return jsonify({ "success": True, - "message": f"图谱已删除: {graph_id}" + "message": f"Graph deleted: {graph_id}" }) except Exception as e: diff --git a/backend/app/models/task.py b/backend/app/models/task.py index e15f35fbd..f1fabd586 100644 --- a/backend/app/models/task.py +++ b/backend/app/models/task.py @@ -148,7 +148,7 @@ def complete_task(self, task_id: str, result: Dict): task_id, status=TaskStatus.COMPLETED, progress=100, - message="任务完成", + message="Task completed", result=result ) @@ -157,7 +157,7 @@ def fail_task(self, task_id: str, error: str): self.update_task( task_id, status=TaskStatus.FAILED, - message="任务失败", + message="Task failed", error=error ) @@ -181,4 +181,3 @@ def cleanup_old_tasks(self, max_age_hours: int = 24): ] for tid in old_ids: del self._tasks[tid] - diff --git a/backend/app/services/graph_builder.py b/backend/app/services/graph_builder.py index 0e0444bf3..e75f9700d 100644 --- a/backend/app/services/graph_builder.py +++ b/backend/app/services/graph_builder.py @@ -7,6 +7,7 @@ import uuid import time import threading +import logging from typing import Dict, Any, List, Optional, Callable from dataclasses import dataclass @@ -16,9 +17,13 @@ from ..config import Config from ..models.task import TaskManager, TaskStatus from ..utils.zep_paging import fetch_all_nodes, fetch_all_edges +from ..utils.ontology_normalizer import normalize_ontology_for_zep from .text_processor import TextProcessor +logger = logging.getLogger(__name__) + + @dataclass class GraphInfo: """图谱信息""" @@ -206,6 +211,15 @@ def set_ontology(self, graph_id: str, ontology: Dict[str, Any]): # 抑制 Pydantic v2 关于 Field(default=None) 的警告 # 这是 Zep SDK 要求的用法,警告来自动态类创建,可以安全忽略 warnings.filterwarnings('ignore', category=UserWarning, module='pydantic') + + ontology, entity_name_mapping = normalize_ontology_for_zep(ontology) + renamed_entities = { + original: normalized + for original, normalized in entity_name_mapping.items() + if original != normalized + } + if renamed_entities: + logger.info("Normalized ontology entity names for Zep compatibility: %s", renamed_entities) # Zep 保留名称,不能作为属性名 RESERVED_NAMES = {'uuid', 'name', 'group_id', 'name_embedding', 'summary', 'created_at'} @@ -497,4 +511,3 @@ def get_graph_data(self, graph_id: str) -> Dict[str, Any]: def delete_graph(self, graph_id: str): """删除图谱""" self.client.graph.delete(graph_id=graph_id) - diff --git a/backend/app/services/ontology_generator.py b/backend/app/services/ontology_generator.py index 2d3e39bd8..0e8d79369 100644 --- a/backend/app/services/ontology_generator.py +++ b/backend/app/services/ontology_generator.py @@ -6,6 +6,7 @@ import json from typing import Dict, Any, List, Optional from ..utils.llm_client import LLMClient +from ..utils.ontology_normalizer import normalize_ontology_for_zep # 本体生成的系统提示词 @@ -61,7 +62,7 @@ "name": "关系类型名称(英文,UPPER_SNAKE_CASE)", "description": "简短描述(英文,不超过100字符)", "source_targets": [ - {"source": "源实体类型", "target": "目标实体类型"} + {"source": "源实体类型(必须与实体类型名称完全一致)", "target": "目标实体类型(必须与实体类型名称完全一致)"} ], "attributes": [] } @@ -250,6 +251,7 @@ def _build_user_message( 3. 前8个是根据文本内容设计的具体类型 4. 所有实体类型必须是现实中可以发声的主体,不能是抽象概念 5. 属性名不能使用 name、uuid、group_id 等保留字,用 full_name、org_name 等替代 +6. 实体类型名称必须只包含字母和数字,不能包含下划线、空格、连字符,例如 `StudentLeader` 是合法的,`Student_Leader` 不合法 """ return message @@ -341,8 +343,9 @@ def _validate_and_process(self, result: Dict[str, Any]) -> Dict[str, Any]: if len(result["edge_types"]) > MAX_EDGE_TYPES: result["edge_types"] = result["edge_types"][:MAX_EDGE_TYPES] - - return result + + normalized_result, _ = normalize_ontology_for_zep(result) + return normalized_result def generate_python_code(self, ontology: Dict[str, Any]) -> str: """ @@ -450,4 +453,3 @@ def generate_python_code(self, ontology: Dict[str, Any]) -> str: code_lines.append('}') return '\n'.join(code_lines) - diff --git a/backend/app/services/simulation_manager.py b/backend/app/services/simulation_manager.py index 96c496fd4..15f4d7877 100644 --- a/backend/app/services/simulation_manager.py +++ b/backend/app/services/simulation_manager.py @@ -260,7 +260,7 @@ def prepare_simulation( """ state = self._load_simulation_state(simulation_id) if not state: - raise ValueError(f"模拟不存在: {simulation_id}") + raise ValueError(f"Simulation not found: {simulation_id}") try: state.status = SimulationStatus.PREPARING @@ -270,12 +270,12 @@ def prepare_simulation( # ========== 阶段1: 读取并过滤实体 ========== if progress_callback: - progress_callback("reading", 0, "正在连接Zep图谱...") + progress_callback("reading", 0, "Connecting to the Zep graph...") reader = ZepEntityReader() if progress_callback: - progress_callback("reading", 30, "正在读取节点数据...") + progress_callback("reading", 30, "Reading node data...") filtered = reader.filter_defined_entities( graph_id=state.graph_id, @@ -289,14 +289,14 @@ def prepare_simulation( if progress_callback: progress_callback( "reading", 100, - f"完成,共 {filtered.filtered_count} 个实体", + f"Completed. {filtered.filtered_count} entities found.", current=filtered.filtered_count, total=filtered.filtered_count ) if filtered.filtered_count == 0: state.status = SimulationStatus.FAILED - state.error = "没有找到符合条件的实体,请检查图谱是否正确构建" + state.error = "No matching entities were found. Please verify that the graph was built correctly." self._save_simulation_state(state) return state @@ -306,7 +306,7 @@ def prepare_simulation( if progress_callback: progress_callback( "generating_profiles", 0, - "开始生成...", + "Starting profile generation...", current=0, total=total_entities ) @@ -352,7 +352,7 @@ def profile_progress(current, total, msg): if progress_callback: progress_callback( "generating_profiles", 95, - "保存Profile文件...", + "Saving profile files...", current=total_entities, total=total_entities ) @@ -375,7 +375,7 @@ def profile_progress(current, total, msg): if progress_callback: progress_callback( "generating_profiles", 100, - f"完成,共 {len(profiles)} 个Profile", + f"Completed. {len(profiles)} profiles generated.", current=len(profiles), total=len(profiles) ) @@ -384,7 +384,7 @@ def profile_progress(current, total, msg): if progress_callback: progress_callback( "generating_config", 0, - "正在分析模拟需求...", + "Analyzing the simulation requirement...", current=0, total=3 ) @@ -394,7 +394,7 @@ def profile_progress(current, total, msg): if progress_callback: progress_callback( "generating_config", 30, - "正在调用LLM生成配置...", + "Generating configuration with the LLM...", current=1, total=3 ) @@ -413,7 +413,7 @@ def profile_progress(current, total, msg): if progress_callback: progress_callback( "generating_config", 70, - "正在保存配置文件...", + "Saving the configuration file...", current=2, total=3 ) @@ -429,7 +429,7 @@ def profile_progress(current, total, msg): if progress_callback: progress_callback( "generating_config", 100, - "配置生成完成", + "Configuration generation completed.", current=3, total=3 ) @@ -481,7 +481,7 @@ def get_profiles(self, simulation_id: str, platform: str = "reddit") -> List[Dic """获取模拟的Agent Profile""" state = self._load_simulation_state(simulation_id) if not state: - raise ValueError(f"模拟不存在: {simulation_id}") + raise ValueError(f"Simulation not found: {simulation_id}") sim_dir = self._get_simulation_dir(simulation_id) profile_path = os.path.join(sim_dir, f"{platform}_profiles.json") @@ -519,10 +519,10 @@ def get_run_instructions(self, simulation_id: str) -> Dict[str, str]: "parallel": f"python {scripts_dir}/run_parallel_simulation.py --config {config_path}", }, "instructions": ( - f"1. 激活conda环境: conda activate MiroFish\n" - f"2. 运行模拟 (脚本位于 {scripts_dir}):\n" - f" - 单独运行Twitter: python {scripts_dir}/run_twitter_simulation.py --config {config_path}\n" - f" - 单独运行Reddit: python {scripts_dir}/run_reddit_simulation.py --config {config_path}\n" - f" - 并行运行双平台: python {scripts_dir}/run_parallel_simulation.py --config {config_path}" + f"1. Activate the conda environment: conda activate MiroFish\n" + f"2. Run the simulation (scripts are in {scripts_dir}):\n" + f" - Twitter only: python {scripts_dir}/run_twitter_simulation.py --config {config_path}\n" + f" - Reddit only: python {scripts_dir}/run_reddit_simulation.py --config {config_path}\n" + f" - Run both platforms in parallel: python {scripts_dir}/run_parallel_simulation.py --config {config_path}" ) } diff --git a/backend/app/utils/ontology_normalizer.py b/backend/app/utils/ontology_normalizer.py new file mode 100644 index 000000000..eae0c8b20 --- /dev/null +++ b/backend/app/utils/ontology_normalizer.py @@ -0,0 +1,119 @@ +""" +Utilities for normalizing ontology names before sending them to Zep. +""" + +from __future__ import annotations + +import copy +import re +from typing import Any, Dict, Tuple + + +PASCAL_CASE_PATTERN = re.compile(r"^[A-Z][A-Za-z0-9]*$") + + +def _split_name_parts(raw_name: str) -> list[str]: + text = str(raw_name or "").strip() + if not text: + return [] + + text = re.sub(r"[^A-Za-z0-9]+", " ", text) + text = re.sub(r"(?<=[a-z0-9])(?=[A-Z])", " ", text) + text = re.sub(r"(?<=[A-Z])(?=[A-Z][a-z])", " ", text) + text = re.sub(r"(?<=[A-Za-z])(?=[0-9])", " ", text) + text = re.sub(r"(?<=[0-9])(?=[A-Za-z])", " ", text) + return [part for part in text.split() if part] + + +def normalize_pascal_case_name(raw_name: str, default_prefix: str = "Entity") -> str: + """ + Convert an arbitrary label into Zep-safe PascalCase. + """ + text = str(raw_name or "").strip() + if text and PASCAL_CASE_PATTERN.match(text): + return text + + parts = _split_name_parts(text) + if not parts: + return default_prefix + + normalized_parts = [] + for part in parts: + if part.isdigit(): + normalized_parts.append(part) + elif part.isupper() and len(part) > 1: + normalized_parts.append(part) + else: + normalized_parts.append(part[0].upper() + part[1:].lower()) + + normalized = "".join(normalized_parts) + + if not normalized: + normalized = default_prefix + elif not normalized[0].isalpha(): + normalized = f"{default_prefix}{normalized}" + + return normalized + + +def _ensure_unique_name(base_name: str, used_names: set[str]) -> str: + candidate = base_name + suffix = 2 + + while candidate in used_names: + candidate = f"{base_name}{suffix}" + suffix += 1 + + used_names.add(candidate) + return candidate + + +def normalize_ontology_for_zep(ontology: Dict[str, Any]) -> Tuple[Dict[str, Any], Dict[str, str]]: + """ + Normalize ontology entity names and source/target references for Zep validation. + + Returns: + A tuple of (normalized_ontology, entity_name_mapping) + """ + normalized = copy.deepcopy(ontology or {}) + entity_types = normalized.setdefault("entity_types", []) + edge_types = normalized.setdefault("edge_types", []) + + used_entity_names: set[str] = set() + entity_name_mapping: Dict[str, str] = {} + + for entity in entity_types: + raw_name = str(entity.get("name", "")).strip() + safe_name = normalize_pascal_case_name(raw_name, default_prefix="Entity") + safe_name = _ensure_unique_name(safe_name, used_entity_names) + + entity["name"] = safe_name + + if raw_name: + entity_name_mapping[raw_name] = safe_name + entity_name_mapping[raw_name.strip()] = safe_name + entity_name_mapping[safe_name] = safe_name + + for edge in edge_types: + source_targets = edge.setdefault("source_targets", []) + for source_target in source_targets: + raw_source = str(source_target.get("source", "")).strip() + raw_target = str(source_target.get("target", "")).strip() + + if raw_source: + source_target["source"] = entity_name_mapping.get( + raw_source, + normalize_pascal_case_name(raw_source, default_prefix="Entity"), + ) + else: + source_target["source"] = "Entity" + + if raw_target: + source_target["target"] = entity_name_mapping.get( + raw_target, + normalize_pascal_case_name(raw_target, default_prefix="Entity"), + ) + else: + source_target["target"] = "Entity" + + return normalized, entity_name_mapping diff --git a/backend/pyproject.toml b/backend/pyproject.toml index 4f5361d53..ec46a6ab1 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "mirofish-backend" version = "0.1.0" -description = "MiroFish - 简洁通用的群体智能引擎,预测万物" +description = "MiroFish - A simple, universal swarm intelligence engine for predicting anything" requires-python = ">=3.11" license = { text = "AGPL-3.0" } authors = [ @@ -9,27 +9,27 @@ authors = [ ] dependencies = [ - # 核心框架 + # Core framework "flask>=3.0.0", "flask-cors>=6.0.0", - # LLM 相关 + # LLM support "openai>=1.0.0", # Zep Cloud "zep-cloud==3.13.0", - # OASIS 社交媒体模拟 + # OASIS social media simulation "camel-oasis==0.2.5", "camel-ai==0.2.78", - # 文件处理 + # File processing "PyMuPDF>=1.24.0", - # 编码检测(支持非UTF-8编码的文本文件) + # Encoding detection (supports text files that are not UTF-8) "charset-normalizer>=3.0.0", "chardet>=5.0.0", - # 工具库 + # Utilities "python-dotenv>=1.0.0", "pydantic>=2.0.0", ] diff --git a/backend/requirements.txt b/backend/requirements.txt index 4f146296b..93a3b5bf7 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -5,31 +5,31 @@ # Install: pip install -r requirements.txt # =========================================== -# ============= 核心框架 ============= +# ============= Core Framework ============= flask>=3.0.0 flask-cors>=6.0.0 -# ============= LLM 相关 ============= -# OpenAI SDK(统一使用 OpenAI 格式调用 LLM) +# ============= LLM Support ============= +# OpenAI SDK (all LLM calls use the OpenAI-compatible format) openai>=1.0.0 # ============= Zep Cloud ============= zep-cloud==3.13.0 -# ============= OASIS 社交媒体模拟 ============= -# OASIS 社交模拟框架 +# ============= OASIS Social Media Simulation ============= +# OASIS social simulation framework camel-oasis==0.2.5 camel-ai==0.2.78 -# ============= 文件处理 ============= +# ============= File Processing ============= PyMuPDF>=1.24.0 -# 编码检测(支持非UTF-8编码的文本文件) +# Encoding detection (supports text files that are not UTF-8) charset-normalizer>=3.0.0 chardet>=5.0.0 -# ============= 工具库 ============= -# 环境变量加载 +# ============= Utilities ============= +# Environment variable loading python-dotenv>=1.0.0 -# 数据验证 +# Data validation pydantic>=2.0.0 diff --git a/backend/tests/test_ontology_normalizer.py b/backend/tests/test_ontology_normalizer.py new file mode 100644 index 000000000..6e6402e74 --- /dev/null +++ b/backend/tests/test_ontology_normalizer.py @@ -0,0 +1,38 @@ +from app.utils.ontology_normalizer import normalize_ontology_for_zep + + +def test_normalize_ontology_entity_names_and_source_targets(): + ontology = { + "entity_types": [ + { + "name": "IH_Team", + "description": "Escalation team", + "attributes": [], + }, + { + "name": "billing department", + "description": "Billing org", + "attributes": [], + }, + ], + "edge_types": [ + { + "name": "LEADS", + "description": "Leadership relation", + "source_targets": [ + {"source": "IH_Team", "target": "billing department"}, + ], + "attributes": [], + } + ], + } + + normalized, entity_name_mapping = normalize_ontology_for_zep(ontology) + + assert entity_name_mapping["IH_Team"] == "IHTeam" + assert entity_name_mapping["billing department"] == "BillingDepartment" + assert normalized["entity_types"][0]["name"] == "IHTeam" + assert normalized["entity_types"][1]["name"] == "BillingDepartment" + assert normalized["edge_types"][0]["source_targets"] == [ + {"source": "IHTeam", "target": "BillingDepartment"} + ] diff --git a/docker-compose.yml b/docker-compose.yml index 637f1dfae..b7ea34507 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,7 +1,10 @@ services: mirofish: - image: ghcr.io/666ghj/mirofish:latest - # 加速镜像(如拉取缓慢可替换上方地址) + image: mirofish-local + build: + context: . + platform: linux/arm64 + # Mirror image for faster pulls if needed # image: ghcr.nju.edu.cn/666ghj/mirofish:latest container_name: mirofish env_file: @@ -11,4 +14,4 @@ services: - "5001:5001" restart: unless-stopped volumes: - - ./backend/uploads:/app/backend/uploads \ No newline at end of file + - ./backend/uploads:/app/backend/uploads diff --git a/frontend/index.html b/frontend/index.html index 009c924a4..72f28baec 100644 --- a/frontend/index.html +++ b/frontend/index.html @@ -1,5 +1,5 @@ - + @@ -7,8 +7,8 @@ - - MiroFish - 预测万物 + + MiroFish - Predict Anything
diff --git a/frontend/package-lock.json b/frontend/package-lock.json index 8c4fa710d..fee02cad8 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -1331,7 +1331,6 @@ "resolved": "https://registry.npmjs.org/d3-selection/-/d3-selection-3.0.0.tgz", "integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==", "license": "ISC", - "peer": true, "engines": { "node": ">=12" } @@ -1809,7 +1808,6 @@ "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "dev": true, "license": "MIT", - "peer": true, "engines": { "node": ">=12" }, @@ -1943,7 +1941,6 @@ "integrity": "sha512-ITcnkFeR3+fI8P1wMgItjGrR10170d8auB4EpMLPqmx6uxElH3a/hHGQabSHKdqd4FXWO1nFIp9rRn7JQ34ACQ==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "esbuild": "^0.25.0", "fdir": "^6.5.0", @@ -2018,7 +2015,6 @@ "resolved": "https://registry.npmjs.org/vue/-/vue-3.5.25.tgz", "integrity": "sha512-YLVdgv2K13WJ6n+kD5owehKtEXwdwXuj2TTyJMsO7pSeKw2bfRNZGjhB7YzrpbMYj5b5QsUebHpOqR3R3ziy/g==", "license": "MIT", - "peer": true, "dependencies": { "@vue/compiler-dom": "3.5.25", "@vue/compiler-sfc": "3.5.25", diff --git a/frontend/src/App.vue b/frontend/src/App.vue index b7cd71ca6..a76fb0f97 100644 --- a/frontend/src/App.vue +++ b/frontend/src/App.vue @@ -3,11 +3,11 @@ \ No newline at end of file + diff --git a/frontend/src/components/Step4Report.vue b/frontend/src/components/Step4Report.vue index 22f2bdcfd..a50cd286e 100644 --- a/frontend/src/components/Step4Report.vue +++ b/frontend/src/components/Step4Report.vue @@ -58,7 +58,7 @@ - 正在生成{{ section.title }}... + Generating {{ section.title }}... @@ -129,7 +129,7 @@
@@ -141,7 +141,7 @@ - 发送问卷调查到世界中 + Send A Survey Into The World @@ -155,7 +155,7 @@
R
Report Agent - Chat
-
报告生成智能体的快速对话版本,可调用 4 种专业工具,拥有MiroFish的完整记忆
+
A fast conversational version of Report Agent with access to four specialized tools and the full MiroFish memory context.
+ | - +
- 问卷问题 + Survey Question
@@ -369,15 +369,15 @@ @click="submitSurvey" > - 发送问卷 + Send Survey
- 调查结果 - {{ surveyResults.length }} 条回复 + Survey Results + {{ surveyResults.length }} responses
{{ (result.agent_name || 'A')[0] }}
{{ result.agent_name }} - {{ result.profession || '未知职业' }} + {{ result.profession || 'Unknown profession' }}
@@ -535,7 +535,7 @@ const selectAgent = (agent, idx) => { // 恢复该 Agent 的对话记录 chatHistory.value = chatHistoryCache.value[`agent_${idx}`] || [] - addLog(`选择对话对象: ${agent.username}`) + addLog(`Selected conversation target: ${agent.username}`) } const formatTime = (timestamp) => { @@ -662,10 +662,10 @@ const sendMessage = async () => { await sendToAgent(message) } } catch (err) { - addLog(`发送失败: ${err.message}`) + addLog(`Send failed: ${err.message}`) chatHistory.value.push({ role: 'assistant', - content: `抱歉,发生了错误: ${err.message}`, + content: `Sorry, something went wrong: ${err.message}`, timestamp: new Date().toISOString() }) } finally { @@ -677,7 +677,7 @@ const sendMessage = async () => { } const sendToReportAgent = async (message) => { - addLog(`向 Report Agent 发送: ${message.substring(0, 50)}...`) + addLog(`Sending to Report Agent: ${message.substring(0, 50)}...`) // Build chat history for API const historyForApi = chatHistory.value @@ -697,21 +697,21 @@ const sendToReportAgent = async (message) => { if (res.success && res.data) { chatHistory.value.push({ role: 'assistant', - content: res.data.response || res.data.answer || '无响应', + content: res.data.response || res.data.answer || 'No response', timestamp: new Date().toISOString() }) - addLog('Report Agent 已回复') + addLog('Report Agent replied') } else { - throw new Error(res.error || '请求失败') + throw new Error(res.error || 'Request failed') } } const sendToAgent = async (message) => { if (!selectedAgent.value || selectedAgentIndex.value === null) { - throw new Error('请先选择一个模拟个体') + throw new Error('Please choose a simulated individual first') } - addLog(`向 ${selectedAgent.value.username} 发送: ${message.substring(0, 50)}...`) + addLog(`Sending to ${selectedAgent.value.username}: ${message.substring(0, 50)}...`) // Build prompt with chat history let prompt = message @@ -719,9 +719,9 @@ const sendToAgent = async (message) => { const historyContext = chatHistory.value .filter(msg => msg.content !== message) .slice(-6) - .map(msg => `${msg.role === 'user' ? '提问者' : '你'}:${msg.content}`) + .map(msg => `${msg.role === 'user' ? 'Interviewer' : 'You'}: ${msg.content}`) .join('\n') - prompt = `以下是我们之前的对话:\n${historyContext}\n\n现在我的新问题是:${message}` + prompt = `Here is our previous conversation:\n${historyContext}\n\nMy new question is: ${message}` } const res = await interviewAgents({ @@ -761,12 +761,12 @@ const sendToAgent = async (message) => { content: responseContent, timestamp: new Date().toISOString() }) - addLog(`${selectedAgent.value.username} 已回复`) + addLog(`${selectedAgent.value.username} replied`) } else { - throw new Error('无响应数据') + throw new Error('No response data') } } else { - throw new Error(res.error || '请求失败') + throw new Error(res.error || 'Request failed') } } @@ -803,7 +803,7 @@ const submitSurvey = async () => { if (selectedAgents.value.size === 0 || !surveyQuestion.value.trim()) return isSurveying.value = true - addLog(`发送问卷给 ${selectedAgents.value.size} 个对象...`) + addLog(`Sending a survey to ${selectedAgents.value.size} targets...`) try { const interviews = Array.from(selectedAgents.value).map(idx => ({ @@ -830,20 +830,20 @@ const submitSurvey = async () => { const agent = profiles.value[agentIdx] // 优先使用 reddit 平台回复,其次 twitter - let responseContent = '无响应' + let responseContent = 'No response' if (typeof resultsDict === 'object' && !Array.isArray(resultsDict)) { const redditKey = `reddit_${agentIdx}` const twitterKey = `twitter_${agentIdx}` const agentResult = resultsDict[redditKey] || resultsDict[twitterKey] if (agentResult) { - responseContent = agentResult.response || agentResult.answer || '无响应' + responseContent = agentResult.response || agentResult.answer || 'No response' } } else if (Array.isArray(resultsDict)) { // 兼容数组格式 const matchedResult = resultsDict.find(r => r.agent_id === agentIdx) if (matchedResult) { - responseContent = matchedResult.response || matchedResult.answer || '无响应' + responseContent = matchedResult.response || matchedResult.answer || 'No response' } } @@ -857,12 +857,12 @@ const submitSurvey = async () => { } surveyResults.value = surveyResultsList - addLog(`收到 ${surveyResults.value.length} 条回复`) + addLog(`Received ${surveyResults.value.length} responses`) } else { - throw new Error(res.error || '请求失败') + throw new Error(res.error || 'Request failed') } } catch (err) { - addLog(`问卷发送失败: ${err.message}`) + addLog(`Survey failed: ${err.message}`) } finally { isSurveying.value = false } @@ -873,7 +873,7 @@ const loadReportData = async () => { if (!props.reportId) return try { - addLog(`加载报告数据: ${props.reportId}`) + addLog(`Loading report data: ${props.reportId}`) // Get report info const reportRes = await getReport(props.reportId) @@ -882,7 +882,7 @@ const loadReportData = async () => { await loadAgentLogs() } } catch (err) { - addLog(`加载报告失败: ${err.message}`) + addLog(`Failed to load report: ${err.message}`) } } @@ -904,10 +904,10 @@ const loadAgentLogs = async () => { } }) - addLog('报告数据加载完成') + addLog('Report data loaded') } } catch (err) { - addLog(`加载报告日志失败: ${err.message}`) + addLog(`Failed to load report logs: ${err.message}`) } } @@ -918,10 +918,10 @@ const loadProfiles = async () => { const res = await getSimulationProfilesRealtime(props.simulationId, 'reddit') if (res.success && res.data) { profiles.value = res.data.profiles || [] - addLog(`加载了 ${profiles.value.length} 个模拟个体`) + addLog(`Loaded ${profiles.value.length} simulated individuals`) } } catch (err) { - addLog(`加载模拟个体失败: ${err.message}`) + addLog(`Failed to load simulated individuals: ${err.message}`) } } @@ -935,7 +935,7 @@ const handleClickOutside = (e) => { // Lifecycle onMounted(() => { - addLog('Step5 深度互动初始化') + addLog('Step5 deep interaction initialized') loadReportData() loadProfiles() document.addEventListener('click', handleClickOutside) diff --git a/frontend/src/store/pendingUpload.js b/frontend/src/store/pendingUpload.js index 958c3d0a6..bdac77cad 100644 --- a/frontend/src/store/pendingUpload.js +++ b/frontend/src/store/pendingUpload.js @@ -1,6 +1,6 @@ /** - * 临时存储待上传的文件和需求 - * 用于首页点击启动引擎后立即跳转,在Process页面再进行API调用 + * Temporarily store files and requirements before upload + * Used when the home page immediately navigates to Process and performs the API call there */ import { reactive } from 'vue' diff --git a/frontend/src/views/Home.vue b/frontend/src/views/Home.vue index afe01a0c4..5fa1cc94e 100644 --- a/frontend/src/views/Home.vue +++ b/frontend/src/views/Home.vue @@ -5,7 +5,7 @@ @@ -15,21 +15,21 @@
- 简洁通用的群体智能引擎 - / v0.1-预览版 + A lean, general-purpose collective intelligence engine + / v0.1-preview

- 上传任意报告
- 即刻推演未来 + Upload Any Report
+ Simulate the Future Instantly

- 即使只有一段文字,MiroFish 也能基于其中的现实种子,全自动生成与之对应的至多百万级Agent构成的平行世界。通过上帝视角注入变量,在复杂的群体交互中寻找动态环境下的“局部最优解” + Even a single paragraph is enough for MiroFish to extract real-world seeds and automatically build a parallel world with up to millions of agents. Inject variables from a god's-eye view and search for a dynamic "local optimum" across complex collective interactions.

- 让未来在 Agent 群中预演,让决策在百战后胜出_ + Let the future play out across agents, and let decisions win after many trials_

@@ -53,65 +53,65 @@
- 系统状态 + System Status
-

准备就绪

+

Ready

- 预测引擎待命中,可上传多份非结构化数据以初始化模拟序列 + The prediction engine is standing by. Upload multiple unstructured files to initialize a simulation run.

-
低成本
-
常规模拟平均5$/次
+
Low Cost
+
Typical runs average about $5 each
-
高可用
-
最多百万级Agent模拟
+
High Scale
+
Simulate up to millions of agents
- 工作流序列 + Workflow Sequence
01
-
图谱构建
-
现实种子提取 & 个体与群体记忆注入 & GraphRAG构建
+
Graph Build
+
Seed extraction, individual and collective memory injection, and GraphRAG construction
02
-
环境搭建
-
实体关系抽取 & 人设生成 & 环境配置Agent注入仿真参数
+
Environment Setup
+
Entity and relationship extraction, persona generation, and runtime parameter injection
03
-
开始模拟
-
双平台并行模拟 & 自动解析预测需求 & 动态更新时序记忆
+
Run Simulation
+
Parallel dual-platform simulation, automatic prediction parsing, and live temporal memory updates
04
-
报告生成
-
ReportAgent拥有丰富的工具集与模拟后环境进行深度交互
+
Report Generation
+
Report Agent uses a rich toolset to investigate the post-simulation world
05
-
深度互动
-
与模拟世界中的任意一位进行对话 & 与ReportAgent进行对话
+
Deep Interaction
+
Talk with any simulated individual or continue with Report Agent
@@ -124,8 +124,8 @@
- 01 / 现实种子 - 支持格式: PDF, MD, TXT + 01 / Seed Data + Supported formats: PDF, MD, TXT
-
拖拽文件上传
-
或点击浏览文件系统
+
Drag And Drop Files
+
or click to browse your files
@@ -164,23 +164,23 @@
- 输入参数 + Input Parameters
- >_ 02 / 模拟提示词 + >_ 02 / Simulation Prompt
-
引擎: MiroFish-V1.0
+
Engine: MiroFish-V1.0
@@ -191,8 +191,8 @@ @click="startSimulation" :disabled="!canSubmit || loading" > - 启动引擎 - 初始化中... + Start Engine + Initializing...
diff --git a/frontend/src/views/InteractionView.vue b/frontend/src/views/InteractionView.vue index b153590d7..c9761c0e0 100644 --- a/frontend/src/views/InteractionView.vue +++ b/frontend/src/views/InteractionView.vue @@ -15,7 +15,7 @@ :class="{ active: viewMode === mode }" @click="viewMode = mode" > - {{ { graph: '图谱', split: '双栏', workbench: '工作台' }[mode] }} + {{ { graph: 'Graph', split: 'Split', workbench: 'Workbench' }[mode] }}
@@ -23,7 +23,7 @@
Step 5/5 - 深度互动 + Deep Interaction
@@ -47,7 +47,7 @@ />
- +
{ // --- Data Logic --- const loadReportData = async () => { try { - addLog(`加载报告数据: ${currentReportId.value}`) + addLog(`Loading report data: ${currentReportId.value}`) - // 获取 report 信息以获取 simulation_id + // Fetch report info to get the simulation_id const reportRes = await getReport(currentReportId.value) if (reportRes.success && reportRes.data) { const reportData = reportRes.data simulationId.value = reportData.simulation_id if (simulationId.value) { - // 获取 simulation 信息 + // Fetch simulation info const simRes = await getSimulation(simulationId.value) if (simRes.success && simRes.data) { const simData = simRes.data - // 获取 project 信息 + // Fetch project info if (simData.project_id) { const projRes = await getProject(simData.project_id) if (projRes.success && projRes.data) { projectData.value = projRes.data - addLog(`项目加载成功: ${projRes.data.project_id}`) + addLog(`Project loaded successfully: ${projRes.data.project_id}`) - // 获取 graph 数据 + // Fetch graph data if (projRes.data.graph_id) { await loadGraph(projRes.data.graph_id) } @@ -170,10 +170,10 @@ const loadReportData = async () => { } } } else { - addLog(`获取报告信息失败: ${reportRes.error || '未知错误'}`) + addLog(`Failed to fetch report info: ${reportRes.error || 'Unknown error'}`) } } catch (err) { - addLog(`加载异常: ${err.message}`) + addLog(`Load error: ${err.message}`) } } @@ -184,10 +184,10 @@ const loadGraph = async (graphId) => { const res = await getGraphData(graphId) if (res.success) { graphData.value = res.data - addLog('图谱数据加载成功') + addLog('Graph data loaded successfully') } } catch (err) { - addLog(`图谱加载失败: ${err.message}`) + addLog(`Failed to load graph data: ${err.message}`) } finally { graphLoading.value = false } @@ -208,7 +208,7 @@ watch(() => route.params.reportId, (newId) => { }, { immediate: true }) onMounted(() => { - addLog('InteractionView 初始化') + addLog('InteractionView initialized') loadReportData() }) diff --git a/frontend/src/views/MainView.vue b/frontend/src/views/MainView.vue index 6ff299112..ff515621e 100644 --- a/frontend/src/views/MainView.vue +++ b/frontend/src/views/MainView.vue @@ -15,7 +15,7 @@ :class="{ active: viewMode === mode }" @click="viewMode = mode" > - {{ { graph: '图谱', split: '双栏', workbench: '工作台' }[mode] }} + {{ { graph: 'Graph', split: 'Split', workbench: 'Workbench' }[mode] }}
@@ -48,7 +48,7 @@
- + - + { const handleNextStep = (params = {}) => { if (currentStep.value < 5) { currentStep.value++ - addLog(`进入 Step ${currentStep.value}: ${stepNames[currentStep.value - 1]}`) + addLog(`Entering Step ${currentStep.value}: ${stepNames[currentStep.value - 1]}`) - // 如果是从 Step 2 进入 Step 3,记录模拟轮数配置 + // When moving from Step 2 to Step 3, log the round configuration if (currentStep.value === 3 && params.maxRounds) { - addLog(`自定义模拟轮数: ${params.maxRounds} 轮`) + addLog(`Custom simulation rounds: ${params.maxRounds}`) } } } @@ -171,7 +171,7 @@ const handleNextStep = (params = {}) => { const handleGoBack = () => { if (currentStep.value > 1) { currentStep.value-- - addLog(`返回 Step ${currentStep.value}: ${stepNames[currentStep.value - 1]}`) + addLog(`Returning to Step ${currentStep.value}: ${stepNames[currentStep.value - 1]}`) } } diff --git a/frontend/src/views/Process.vue b/frontend/src/views/Process.vue index 2d2d3cc1a..74eeff9cb 100644 --- a/frontend/src/views/Process.vue +++ b/frontend/src/views/Process.vue @@ -7,7 +7,7 @@ -

等待本体生成

-

生成完成后将自动开始构建图谱

+

Waiting for ontology generation

+

Graph building will start automatically once ontology generation finishes

@@ -200,8 +200,8 @@
-

图谱构建中

-

数据即将显示...

+

Building graph

+

Data will appear shortly...

@@ -225,7 +225,7 @@
- 构建流程 + Build Workflow
@@ -234,7 +234,7 @@
01
-
本体生成
+
Ontology Generation
/api/graph/ontology/generate
@@ -244,15 +244,15 @@
-
接口说明
+
Endpoint
- 上传文档后,LLM分析文档内容,自动生成适合舆论模拟的本体结构(实体类型 + 关系类型) + After the documents are uploaded, the LLM analyzes the content and generates an ontology tailored for public-opinion simulation with entity types and relation types.
-
生成进度
+
Generation Progress
{{ ontologyProgress.message }} @@ -261,7 +261,7 @@
-
生成的实体类型 ({{ projectData.ontology.entity_types?.length || 0 }})
+
Generated Entity Types ({{ projectData.ontology.entity_types?.length || 0 }})
-
生成的关系类型 ({{ projectData.ontology.relation_types?.length || 0 }})
+
Generated Relation Types ({{ projectData.ontology.relation_types?.length || 0 }})
{{ rel.target_type }}
- +{{ projectData.ontology.relation_types.length - 5 }} 更多关系... + +{{ projectData.ontology.relation_types.length - 5 }} more relations...
-
等待本体生成...
+
Waiting for ontology generation...
@@ -305,7 +305,7 @@
02
-
图谱构建
+
Graph Build
/api/graph/build
@@ -315,20 +315,20 @@
-
接口说明
+
Endpoint
- 基于生成的本体,将文档分块后调用 Zep API 构建知识图谱,提取实体和关系 + Using the generated ontology, the documents are chunked and sent to the Zep API to build the knowledge graph and extract entities and relationships.
-
等待本体生成完成...
+
Waiting for ontology generation to finish...
-
构建进度
+
Build Progress
@@ -339,19 +339,19 @@
-
构建结果
+
Build Results
{{ graphData.node_count }} - 实体节点 + Entity Nodes
{{ graphData.edge_count }} - 关系边 + Relationship Edges
{{ entityTypes.length }} - 实体类型 + Entity Types
@@ -363,8 +363,8 @@
03
-
构建完成
-
准备进入下一步骤
+
Build Complete
+
Ready for the next step
{{ getPhaseStatusText(2) }} @@ -375,7 +375,7 @@
@@ -385,23 +385,23 @@
- 项目信息 + Project Info
- 项目名称 + Project Name {{ projectData.name }}
- 项目ID + Project ID {{ projectData.project_id }}
- 图谱ID + Graph ID {{ projectData.graph_id }}
- 模拟需求 + Simulation Requirement {{ projectData.simulation_requirement || '-' }}
@@ -451,11 +451,11 @@ const statusClass = computed(() => { }) const statusText = computed(() => { - if (error.value) return '构建失败' - if (currentPhase.value >= 2) return '构建完成' - if (currentPhase.value === 1) return '图谱构建中' - if (currentPhase.value === 0) return '本体生成中' - return '初始化中' + if (error.value) return 'Build failed' + if (currentPhase.value >= 2) return 'Build complete' + if (currentPhase.value === 1) return 'Building graph' + if (currentPhase.value === 0) return 'Generating ontology' + return 'Initializing' }) const entityTypes = computed(() => { @@ -482,7 +482,7 @@ const goHome = () => { const goToNextStep = () => { // TODO: 进入环境搭建步骤 - alert('环境搭建功能开发中...') + alert('Environment setup is still in development...') } const toggleFullScreen = () => { @@ -503,7 +503,7 @@ const formatDate = (dateStr) => { if (!dateStr) return '-' try { const date = new Date(dateStr) - return date.toLocaleString('zh-CN', { + return date.toLocaleString('en-US', { year: 'numeric', month: 'short', day: 'numeric', @@ -540,14 +540,14 @@ const getPhaseStatusClass = (phase) => { } const getPhaseStatusText = (phase) => { - if (currentPhase.value > phase) return '已完成' + if (currentPhase.value > phase) return 'Completed' if (currentPhase.value === phase) { if (phase === 1 && buildProgress.value) { return `${buildProgress.value.progress}%` } - return '进行中' + return 'In Progress' } - return '等待中' + return 'Pending' } // 初始化 - 处理新建项目或加载已有项目 @@ -569,7 +569,7 @@ const handleNewProject = async () => { const pending = getPendingUpload() if (!pending.isPending || pending.files.length === 0) { - error.value = '没有待上传的文件,请返回首页重新操作' + error.value = 'No pending files were found. Please return to the home page and try again.' loading.value = false return } @@ -577,7 +577,7 @@ const handleNewProject = async () => { try { loading.value = true currentPhase.value = 0 // 本体生成阶段 - ontologyProgress.value = { message: '正在上传文件并分析文档...' } + ontologyProgress.value = { message: 'Uploading files and analyzing documents...' } // 构建 FormData const formDataObj = new FormData() @@ -608,11 +608,11 @@ const handleNewProject = async () => { // 自动开始图谱构建 await startBuildGraph() } else { - error.value = response.error || '本体生成失败' + error.value = response.error || 'Failed to generate ontology' } } catch (err) { console.error('Handle new project error:', err) - error.value = '项目初始化失败: ' + (err.message || '未知错误') + error.value = 'Project initialization failed: ' + (err.message || 'Unknown error') } finally { loading.value = false } @@ -645,11 +645,11 @@ const loadProject = async () => { await loadGraph(response.data.graph_id) } } else { - error.value = response.error || '加载项目失败' + error.value = response.error || 'Failed to load project' } } catch (err) { console.error('Load project error:', err) - error.value = '加载项目失败: ' + (err.message || '未知错误') + error.value = 'Failed to load project: ' + (err.message || 'Unknown error') } finally { loading.value = false } @@ -668,7 +668,7 @@ const updatePhaseByStatus = (status) => { currentPhase.value = 2 break case 'failed': - error.value = projectData.value?.error || '处理失败' + error.value = projectData.value?.error || 'Processing failed' break } } @@ -680,13 +680,13 @@ const startBuildGraph = async () => { // 设置初始进度 buildProgress.value = { progress: 0, - message: '正在启动图谱构建...' + message: 'Starting graph build...' } const response = await buildGraph({ project_id: currentProjectId.value }) if (response.success) { - buildProgress.value.message = '图谱构建任务已启动...' + buildProgress.value.message = 'Graph build task started...' // 保存 task_id 用于轮询 const taskId = response.data.task_id @@ -697,12 +697,12 @@ const startBuildGraph = async () => { // 启动任务状态轮询 startPollingTask(taskId) } else { - error.value = response.error || '启动图谱构建失败' + error.value = response.error || 'Failed to start graph build' buildProgress.value = null } } catch (err) { console.error('Build graph error:', err) - error.value = '启动图谱构建失败: ' + (err.message || '未知错误') + error.value = 'Failed to start graph build: ' + (err.message || 'Unknown error') buildProgress.value = null } } @@ -791,13 +791,13 @@ const pollTaskStatus = async (taskId) => { // 更新进度显示 buildProgress.value = { progress: task.progress || 0, - message: task.message || '处理中...' + message: task.message || 'Processing...' } console.log('Task status:', task.status, 'Progress:', task.progress) if (task.status === 'completed') { - console.log('✅ 图谱构建完成,正在加载完整数据...') + console.log('✅ Graph build complete, loading full data...') stopPolling() stopGraphPolling() @@ -806,7 +806,7 @@ const pollTaskStatus = async (taskId) => { // 更新进度显示为完成状态 buildProgress.value = { progress: 100, - message: '构建完成,正在加载图谱...' + message: 'Build complete, loading graph...' } // 重新加载项目数据获取 graph_id @@ -816,9 +816,9 @@ const pollTaskStatus = async (taskId) => { // 最终加载完整图谱数据 if (projectResponse.data.graph_id) { - console.log('📊 加载完整图谱:', projectResponse.data.graph_id) + console.log('📊 Loading full graph:', projectResponse.data.graph_id) await loadGraph(projectResponse.data.graph_id) - console.log('✅ 图谱加载完成') + console.log('✅ Graph loading complete') } } @@ -827,7 +827,7 @@ const pollTaskStatus = async (taskId) => { } else if (task.status === 'failed') { stopPolling() stopGraphPolling() - error.value = '图谱构建失败: ' + (task.error || '未知错误') + error.value = 'Graph build failed: ' + (task.error || 'Unknown error') buildProgress.value = null } } @@ -905,7 +905,7 @@ const renderGraph = () => { .attr('y', height / 2) .attr('text-anchor', 'middle') .attr('fill', '#999') - .text('等待图谱数据...') + .text('Waiting for graph data...') return } @@ -917,7 +917,7 @@ const renderGraph = () => { const nodes = nodesData.map(n => ({ id: n.uuid, - name: n.name || '未命名', + name: n.name || 'Untitled', type: n.labels?.find(l => l !== 'Entity' && l !== 'Node') || 'Entity', rawData: n // 保存原始数据 })) @@ -933,8 +933,8 @@ const renderGraph = () => { type: e.fact_type || e.name || 'RELATED_TO', rawData: { ...e, - source_name: nodeMap[e.source_node_uuid]?.name || '未知', - target_name: nodeMap[e.target_node_uuid]?.name || '未知' + source_name: nodeMap[e.source_node_uuid]?.name || 'Unknown', + target_name: nodeMap[e.target_node_uuid]?.name || 'Unknown' } })) @@ -2065,4 +2065,4 @@ onUnmounted(() => { display: none; } } - \ No newline at end of file + diff --git a/frontend/src/views/ReportView.vue b/frontend/src/views/ReportView.vue index 84a3e2a3f..576702a62 100644 --- a/frontend/src/views/ReportView.vue +++ b/frontend/src/views/ReportView.vue @@ -15,7 +15,7 @@ :class="{ active: viewMode === mode }" @click="viewMode = mode" > - {{ { graph: '图谱', split: '双栏', workbench: '工作台' }[mode] }} + {{ { graph: 'Graph', split: 'Split', workbench: 'Workbench' }[mode] }}
@@ -23,7 +23,7 @@
Step 4/5 - 报告生成 + Report Generation
@@ -47,7 +47,7 @@ />
- +
{ // --- Data Logic --- const loadReportData = async () => { try { - addLog(`加载报告数据: ${currentReportId.value}`) + addLog(`Loading report data: ${currentReportId.value}`) - // 获取 report 信息以获取 simulation_id + // Fetch report info to get the simulation_id const reportRes = await getReport(currentReportId.value) if (reportRes.success && reportRes.data) { const reportData = reportRes.data simulationId.value = reportData.simulation_id if (simulationId.value) { - // 获取 simulation 信息 + // Fetch simulation info const simRes = await getSimulation(simulationId.value) if (simRes.success && simRes.data) { const simData = simRes.data - // 获取 project 信息 + // Fetch project info if (simData.project_id) { const projRes = await getProject(simData.project_id) if (projRes.success && projRes.data) { projectData.value = projRes.data - addLog(`项目加载成功: ${projRes.data.project_id}`) + addLog(`Project loaded successfully: ${projRes.data.project_id}`) - // 获取 graph 数据 + // Fetch graph data if (projRes.data.graph_id) { await loadGraph(projRes.data.graph_id) } @@ -169,10 +169,10 @@ const loadReportData = async () => { } } } else { - addLog(`获取报告信息失败: ${reportRes.error || '未知错误'}`) + addLog(`Failed to fetch report info: ${reportRes.error || 'Unknown error'}`) } } catch (err) { - addLog(`加载异常: ${err.message}`) + addLog(`Load error: ${err.message}`) } } @@ -183,10 +183,10 @@ const loadGraph = async (graphId) => { const res = await getGraphData(graphId) if (res.success) { graphData.value = res.data - addLog('图谱数据加载成功') + addLog('Graph data loaded successfully') } } catch (err) { - addLog(`图谱加载失败: ${err.message}`) + addLog(`Failed to load graph data: ${err.message}`) } finally { graphLoading.value = false } @@ -207,7 +207,7 @@ watch(() => route.params.reportId, (newId) => { }, { immediate: true }) onMounted(() => { - addLog('ReportView 初始化') + addLog('ReportView initialized') loadReportData() }) diff --git a/frontend/src/views/SimulationRunView.vue b/frontend/src/views/SimulationRunView.vue index 14ebc5f9d..b67538a50 100644 --- a/frontend/src/views/SimulationRunView.vue +++ b/frontend/src/views/SimulationRunView.vue @@ -15,7 +15,7 @@ :class="{ active: viewMode === mode }" @click="viewMode = mode" > - {{ { graph: '图谱', split: '双栏', workbench: '工作台' }[mode] }} + {{ { graph: 'Graph', split: 'Split', workbench: 'Workbench' }[mode] }}
@@ -23,7 +23,7 @@
Step 3/5 - 开始模拟 + Start Simulation
@@ -47,7 +47,7 @@ />
- +
{ } const handleGoBack = async () => { - // 在返回 Step 2 之前,先关闭正在运行的模拟 - addLog('准备返回 Step 2,正在关闭模拟...') + // Close any running simulation before returning to Step 2 + addLog('Preparing to return to Step 2, shutting down the simulation...') - // 停止轮询 + // Stop polling stopGraphRefresh() try { - // 先尝试优雅关闭模拟环境 + // Try graceful environment shutdown first const envStatusRes = await getEnvStatus({ simulation_id: currentSimulationId.value }) if (envStatusRes.success && envStatusRes.data?.env_alive) { - addLog('正在关闭模拟环境...') + addLog('Closing the simulation environment...') try { await closeSimulationEnv({ simulation_id: currentSimulationId.value, timeout: 10 }) - addLog('✓ 模拟环境已关闭') + addLog('✓ Simulation environment closed') } catch (closeErr) { - addLog(`关闭模拟环境失败,尝试强制停止...`) + addLog('Failed to close the simulation environment, attempting a force-stop...') try { await stopSimulation({ simulation_id: currentSimulationId.value }) - addLog('✓ 模拟已强制停止') + addLog('✓ Simulation force-stopped') } catch (stopErr) { - addLog(`强制停止失败: ${stopErr.message}`) + addLog(`Force-stop failed: ${stopErr.message}`) } } } else { - // 环境未运行,检查是否需要停止进程 + // The environment is not running, but the process may still need to be stopped if (isSimulating.value) { - addLog('正在停止模拟进程...') + addLog('Stopping the simulation process...') try { await stopSimulation({ simulation_id: currentSimulationId.value }) - addLog('✓ 模拟已停止') + addLog('✓ Simulation stopped') } catch (err) { - addLog(`停止模拟失败: ${err.message}`) + addLog(`Failed to stop the simulation: ${err.message}`) } } } } catch (err) { - addLog(`检查模拟状态失败: ${err.message}`) + addLog(`Failed to check simulation status: ${err.message}`) } - // 返回到 Step 2 (环境搭建) + // Return to Step 2 (Environment Setup) router.push({ name: 'Simulation', params: { simulationId: currentSimulationId.value } }) } const handleNextStep = () => { - // Step3Simulation 组件会直接处理报告生成和路由跳转 - // 这个方法仅作为备用 - addLog('进入 Step 4: 报告生成') + // Step3Simulation handles report generation and routing directly + // This method only serves as a fallback + addLog('Entering Step 4: Report Generation') } // --- Data Logic --- const loadSimulationData = async () => { try { - addLog(`加载模拟数据: ${currentSimulationId.value}`) + addLog(`Loading simulation data: ${currentSimulationId.value}`) - // 获取 simulation 信息 + // Fetch simulation info const simRes = await getSimulation(currentSimulationId.value) if (simRes.success && simRes.data) { const simData = simRes.data - // 获取 simulation config 以获取 minutes_per_round + // Fetch the simulation config to get minutes_per_round try { const configRes = await getSimulationConfig(currentSimulationId.value) if (configRes.success && configRes.data?.time_config?.minutes_per_round) { minutesPerRound.value = configRes.data.time_config.minutes_per_round - addLog(`时间配置: 每轮 ${minutesPerRound.value} 分钟`) + addLog(`Time configuration: ${minutesPerRound.value} minutes per round`) } } catch (configErr) { - addLog(`获取时间配置失败,使用默认值: ${minutesPerRound.value}分钟/轮`) + addLog(`Failed to fetch time configuration, using default: ${minutesPerRound.value} minutes/round`) } - // 获取 project 信息 + // Fetch project info if (simData.project_id) { const projRes = await getProject(simData.project_id) if (projRes.success && projRes.data) { projectData.value = projRes.data - addLog(`项目加载成功: ${projRes.data.project_id}`) + addLog(`Project loaded successfully: ${projRes.data.project_id}`) - // 获取 graph 数据 + // Fetch graph data if (projRes.data.graph_id) { await loadGraph(projRes.data.graph_id) } } } } else { - addLog(`加载模拟数据失败: ${simRes.error || '未知错误'}`) + addLog(`Failed to load simulation data: ${simRes.error || 'Unknown error'}`) } } catch (err) { - addLog(`加载异常: ${err.message}`) + addLog(`Load error: ${err.message}`) } } const loadGraph = async (graphId) => { - // 当正在模拟时,自动刷新不显示全屏 loading,以免闪烁 - // 手动刷新或初始加载时显示 loading + // Avoid showing a fullscreen loader during auto-refresh while simulating + // Show loading only for manual refreshes or the initial load if (!isSimulating.value) { graphLoading.value = true } @@ -252,11 +252,11 @@ const loadGraph = async (graphId) => { if (res.success) { graphData.value = res.data if (!isSimulating.value) { - addLog('图谱数据加载成功') + addLog('Graph data loaded successfully') } } } catch (err) { - addLog(`图谱加载失败: ${err.message}`) + addLog(`Failed to load graph data: ${err.message}`) } finally { graphLoading.value = false } @@ -273,8 +273,8 @@ let graphRefreshTimer = null const startGraphRefresh = () => { if (graphRefreshTimer) return - addLog('开启图谱实时刷新 (30s)') - // 立即刷新一次,然后每30秒刷新 + addLog('Starting live graph refresh (30s)') + // Refresh immediately, then every 30 seconds graphRefreshTimer = setInterval(refreshGraph, 30000) } @@ -282,7 +282,7 @@ const stopGraphRefresh = () => { if (graphRefreshTimer) { clearInterval(graphRefreshTimer) graphRefreshTimer = null - addLog('停止图谱实时刷新') + addLog('Stopping live graph refresh') } } @@ -295,11 +295,11 @@ watch(isSimulating, (newValue) => { }, { immediate: true }) onMounted(() => { - addLog('SimulationRunView 初始化') + addLog('SimulationRunView initialized') - // 记录 maxRounds 配置(值已在初始化时从 query 参数获取) + // Log the maxRounds configuration retrieved from the query string if (maxRounds.value) { - addLog(`自定义模拟轮数: ${maxRounds.value}`) + addLog(`Custom simulation rounds: ${maxRounds.value}`) } loadSimulationData() @@ -444,4 +444,3 @@ onUnmounted(() => { border-right: 1px solid #EAEAEA; } - diff --git a/frontend/src/views/SimulationView.vue b/frontend/src/views/SimulationView.vue index 4b44b3972..c3692884d 100644 --- a/frontend/src/views/SimulationView.vue +++ b/frontend/src/views/SimulationView.vue @@ -15,7 +15,7 @@ :class="{ active: viewMode === mode }" @click="viewMode = mode" > - {{ { graph: '图谱', split: '双栏', workbench: '工作台' }[mode] }} + {{ { graph: 'Graph', split: 'Split', workbench: 'Workbench' }[mode] }}
@@ -23,7 +23,7 @@
Step 2/5 - 环境搭建 + Environment Setup
@@ -46,7 +46,7 @@ />
- +
{ } const handleGoBack = () => { - // 返回到 process 页面 + // Return to the process page if (projectData.value?.project_id) { router.push({ name: 'Process', params: { projectId: projectData.value.project_id } }) } else { @@ -146,122 +146,122 @@ const handleGoBack = () => { } const handleNextStep = (params = {}) => { - addLog('进入 Step 3: 开始模拟') + addLog('Entering Step 3: Start Simulation') - // 记录模拟轮数配置 + // Log the simulation round configuration if (params.maxRounds) { - addLog(`自定义模拟轮数: ${params.maxRounds} 轮`) + addLog(`Custom simulation rounds: ${params.maxRounds}`) } else { - addLog('使用自动配置的模拟轮数') + addLog('Using the automatically configured number of rounds') } - // 构建路由参数 + // Build route parameters const routeParams = { name: 'SimulationRun', params: { simulationId: currentSimulationId.value } } - // 如果有自定义轮数,通过 query 参数传递 + // Pass custom rounds through the query string when present if (params.maxRounds) { routeParams.query = { maxRounds: params.maxRounds } } - // 跳转到 Step 3 页面 + // Navigate to Step 3 router.push(routeParams) } // --- Data Logic --- /** - * 检查并关闭正在运行的模拟 - * 当用户从 Step 3 返回到 Step 2 时,默认用户要退出模拟 + * Check for a running simulation and stop it + * When the user returns from Step 3 to Step 2, assume they want to exit the simulation */ const checkAndStopRunningSimulation = async () => { if (!currentSimulationId.value) return try { - // 先检查模拟环境是否存活 + // Check whether the simulation environment is still alive first const envStatusRes = await getEnvStatus({ simulation_id: currentSimulationId.value }) if (envStatusRes.success && envStatusRes.data?.env_alive) { - addLog('检测到模拟环境正在运行,正在关闭...') + addLog('Detected a running simulation environment, closing it...') - // 尝试优雅关闭模拟环境 + // Try a graceful environment shutdown first try { const closeRes = await closeSimulationEnv({ simulation_id: currentSimulationId.value, - timeout: 10 // 10秒超时 + timeout: 10 // 10-second timeout }) if (closeRes.success) { - addLog('✓ 模拟环境已关闭') + addLog('✓ Simulation environment closed') } else { - addLog(`关闭模拟环境失败: ${closeRes.error || '未知错误'}`) - // 如果优雅关闭失败,尝试强制停止 + addLog(`Failed to close the simulation environment: ${closeRes.error || 'Unknown error'}`) + // Fall back to a forced stop if graceful shutdown fails await forceStopSimulation() } } catch (closeErr) { - addLog(`关闭模拟环境异常: ${closeErr.message}`) - // 如果优雅关闭异常,尝试强制停止 + addLog(`Simulation environment shutdown error: ${closeErr.message}`) + // Fall back to a forced stop if graceful shutdown errors out await forceStopSimulation() } } else { - // 环境未运行,但可能进程还在,检查模拟状态 + // The environment is not running, but the process may still exist const simRes = await getSimulation(currentSimulationId.value) if (simRes.success && simRes.data?.status === 'running') { - addLog('检测到模拟状态为运行中,正在停止...') + addLog('Simulation status is still running, stopping it...') await forceStopSimulation() } } } catch (err) { - // 检查环境状态失败不影响后续流程 - console.warn('检查模拟状态失败:', err) + // Failure to read environment status should not block the rest of the flow + console.warn('Failed to check simulation status:', err) } } /** - * 强制停止模拟 + * Force-stop the simulation */ const forceStopSimulation = async () => { try { const stopRes = await stopSimulation({ simulation_id: currentSimulationId.value }) if (stopRes.success) { - addLog('✓ 模拟已强制停止') + addLog('✓ Simulation force-stopped') } else { - addLog(`强制停止模拟失败: ${stopRes.error || '未知错误'}`) + addLog(`Failed to force-stop the simulation: ${stopRes.error || 'Unknown error'}`) } } catch (err) { - addLog(`强制停止模拟异常: ${err.message}`) + addLog(`Force-stop error: ${err.message}`) } } const loadSimulationData = async () => { try { - addLog(`加载模拟数据: ${currentSimulationId.value}`) + addLog(`Loading simulation data: ${currentSimulationId.value}`) - // 获取 simulation 信息 + // Fetch simulation info const simRes = await getSimulation(currentSimulationId.value) if (simRes.success && simRes.data) { const simData = simRes.data - // 获取 project 信息 + // Fetch project info if (simData.project_id) { const projRes = await getProject(simData.project_id) if (projRes.success && projRes.data) { projectData.value = projRes.data - addLog(`项目加载成功: ${projRes.data.project_id}`) + addLog(`Project loaded successfully: ${projRes.data.project_id}`) - // 获取 graph 数据 + // Fetch graph data if (projRes.data.graph_id) { await loadGraph(projRes.data.graph_id) } } } } else { - addLog(`加载模拟数据失败: ${simRes.error || '未知错误'}`) + addLog(`Failed to load simulation data: ${simRes.error || 'Unknown error'}`) } } catch (err) { - addLog(`加载异常: ${err.message}`) + addLog(`Load error: ${err.message}`) } } @@ -271,10 +271,10 @@ const loadGraph = async (graphId) => { const res = await getGraphData(graphId) if (res.success) { graphData.value = res.data - addLog('图谱数据加载成功') + addLog('Graph data loaded successfully') } } catch (err) { - addLog(`图谱加载失败: ${err.message}`) + addLog(`Failed to load graph data: ${err.message}`) } finally { graphLoading.value = false } @@ -287,12 +287,12 @@ const refreshGraph = () => { } onMounted(async () => { - addLog('SimulationView 初始化') + addLog('SimulationView initialized') - // 检查并关闭正在运行的模拟(用户从 Step 3 返回时) + // Check and stop any running simulation when returning from Step 3 await checkAndStopRunningSimulation() - // 加载模拟数据 + // Load simulation data loadSimulationData() }) @@ -431,4 +431,3 @@ onMounted(async () => { border-right: 1px solid #EAEAEA; } - diff --git a/package.json b/package.json index 63ace21a9..a22f2300c 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "mirofish", "version": "0.1.0", - "description": "MiroFish - 简洁通用的群体智能引擎,预测万物", + "description": "MiroFish - A simple, universal swarm intelligence engine for predicting anything", "scripts": { "setup": "npm install && cd frontend && npm install", "setup:backend": "cd backend && uv sync", diff --git "a/static/image/Screenshot/\350\277\220\350\241\214\346\210\252\345\233\2761.png" b/static/image/Screenshot/screenshot-1.png similarity index 100% rename from "static/image/Screenshot/\350\277\220\350\241\214\346\210\252\345\233\2761.png" rename to static/image/Screenshot/screenshot-1.png diff --git "a/static/image/Screenshot/\350\277\220\350\241\214\346\210\252\345\233\2762.png" b/static/image/Screenshot/screenshot-2.png similarity index 100% rename from "static/image/Screenshot/\350\277\220\350\241\214\346\210\252\345\233\2762.png" rename to static/image/Screenshot/screenshot-2.png diff --git "a/static/image/Screenshot/\350\277\220\350\241\214\346\210\252\345\233\2763.png" b/static/image/Screenshot/screenshot-3.png similarity index 100% rename from "static/image/Screenshot/\350\277\220\350\241\214\346\210\252\345\233\2763.png" rename to static/image/Screenshot/screenshot-3.png diff --git "a/static/image/Screenshot/\350\277\220\350\241\214\346\210\252\345\233\2764.png" b/static/image/Screenshot/screenshot-4.png similarity index 100% rename from "static/image/Screenshot/\350\277\220\350\241\214\346\210\252\345\233\2764.png" rename to static/image/Screenshot/screenshot-4.png diff --git "a/static/image/Screenshot/\350\277\220\350\241\214\346\210\252\345\233\2765.png" b/static/image/Screenshot/screenshot-5.png similarity index 100% rename from "static/image/Screenshot/\350\277\220\350\241\214\346\210\252\345\233\2765.png" rename to static/image/Screenshot/screenshot-5.png diff --git "a/static/image/Screenshot/\350\277\220\350\241\214\346\210\252\345\233\2766.png" b/static/image/Screenshot/screenshot-6.png similarity index 100% rename from "static/image/Screenshot/\350\277\220\350\241\214\346\210\252\345\233\2766.png" rename to static/image/Screenshot/screenshot-6.png diff --git "a/static/image/\347\272\242\346\245\274\346\242\246\346\250\241\346\213\237\346\216\250\346\274\224\345\260\201\351\235\242.jpg" b/static/image/dream-of-red-chamber-cover.jpg similarity index 100% rename from "static/image/\347\272\242\346\245\274\346\242\246\346\250\241\346\213\237\346\216\250\346\274\224\345\260\201\351\235\242.jpg" rename to static/image/dream-of-red-chamber-cover.jpg diff --git "a/static/image/QQ\347\276\244.png" b/static/image/qq-group.png similarity index 100% rename from "static/image/QQ\347\276\244.png" rename to static/image/qq-group.png diff --git "a/static/image/\346\255\246\345\244\247\346\250\241\346\213\237\346\274\224\347\244\272\345\260\201\351\235\242.png" b/static/image/wuhan-demo-cover.png similarity index 100% rename from "static/image/\346\255\246\345\244\247\346\250\241\346\213\237\346\274\224\347\244\272\345\260\201\351\235\242.png" rename to static/image/wuhan-demo-cover.png From 7b8ca26fa686348126ba3996e8d99c5ad3ef2f40 Mon Sep 17 00:00:00 2001 From: Anurag Date: Thu, 26 Mar 2026 05:01:26 +0530 Subject: [PATCH 02/13] chore: translate Zep tool text output --- backend/app/services/zep_tools.py | 98 +++++++++++++++---------------- 1 file changed, 49 insertions(+), 49 deletions(-) diff --git a/backend/app/services/zep_tools.py b/backend/app/services/zep_tools.py index 384cf540f..144a22f15 100644 --- a/backend/app/services/zep_tools.py +++ b/backend/app/services/zep_tools.py @@ -43,10 +43,10 @@ def to_dict(self) -> Dict[str, Any]: def to_text(self) -> str: """转换为文本格式,供LLM理解""" - text_parts = [f"搜索查询: {self.query}", f"找到 {self.total_count} 条相关信息"] + text_parts = [f"Search Query: {self.query}", f"Found {self.total_count} relevant items"] if self.facts: - text_parts.append("\n### 相关事实:") + text_parts.append("\n### Related Facts:") for i, fact in enumerate(self.facts, 1): text_parts.append(f"{i}. {fact}") @@ -73,8 +73,8 @@ def to_dict(self) -> Dict[str, Any]: def to_text(self) -> str: """转换为文本格式""" - entity_type = next((l for l in self.labels if l not in ["Entity", "Node"]), "未知类型") - return f"实体: {self.name} (类型: {entity_type})\n摘要: {self.summary}" + entity_type = next((l for l in self.labels if l not in ["Entity", "Node"]), "Unknown Type") + return f"Entity: {self.name} (Type: {entity_type})\nSummary: {self.summary}" @dataclass @@ -112,14 +112,14 @@ def to_text(self, include_temporal: bool = False) -> str: """转换为文本格式""" source = self.source_node_name or self.source_node_uuid[:8] target = self.target_node_name or self.target_node_uuid[:8] - base_text = f"关系: {source} --[{self.name}]--> {target}\n事实: {self.fact}" + base_text = f"Relation: {source} --[{self.name}]--> {target}\nFact: {self.fact}" if include_temporal: - valid_at = self.valid_at or "未知" - invalid_at = self.invalid_at or "至今" - base_text += f"\n时效: {valid_at} - {invalid_at}" + valid_at = self.valid_at or "Unknown" + invalid_at = self.invalid_at or "Present" + base_text += f"\nValidity: {valid_at} - {invalid_at}" if self.expired_at: - base_text += f" (已过期: {self.expired_at})" + base_text += f" (Expired: {self.expired_at})" return base_text @@ -170,40 +170,40 @@ def to_dict(self) -> Dict[str, Any]: def to_text(self) -> str: """转换为详细的文本格式,供LLM理解""" text_parts = [ - f"## 未来预测深度分析", - f"分析问题: {self.query}", - f"预测场景: {self.simulation_requirement}", - f"\n### 预测数据统计", - f"- 相关预测事实: {self.total_facts}条", - f"- 涉及实体: {self.total_entities}个", - f"- 关系链: {self.total_relationships}条" + "## Deep Forecast Analysis", + f"Analysis Question: {self.query}", + f"Prediction Scenario: {self.simulation_requirement}", + "\n### Forecast Statistics", + f"- Relevant Forecast Facts: {self.total_facts}", + f"- Entities Involved: {self.total_entities}", + f"- Relationship Chains: {self.total_relationships}" ] # 子问题 if self.sub_queries: - text_parts.append(f"\n### 分析的子问题") + text_parts.append("\n### Analysis Sub-questions") for i, sq in enumerate(self.sub_queries, 1): text_parts.append(f"{i}. {sq}") # 语义搜索结果 if self.semantic_facts: - text_parts.append(f"\n### 【关键事实】(请在报告中引用这些原文)") + text_parts.append("\n### Key Facts") for i, fact in enumerate(self.semantic_facts, 1): text_parts.append(f"{i}. \"{fact}\"") # 实体洞察 if self.entity_insights: - text_parts.append(f"\n### 【核心实体】") + text_parts.append("\n### Core Entities") for entity in self.entity_insights: - text_parts.append(f"- **{entity.get('name', '未知')}** ({entity.get('type', '实体')})") + text_parts.append(f"- **{entity.get('name', 'Unknown')}** ({entity.get('type', 'Entity')})") if entity.get('summary'): - text_parts.append(f" 摘要: \"{entity.get('summary')}\"") + text_parts.append(f" Summary: \"{entity.get('summary')}\"") if entity.get('related_facts'): - text_parts.append(f" 相关事实: {len(entity.get('related_facts', []))}条") + text_parts.append(f" Related Facts: {len(entity.get('related_facts', []))}") # 关系链 if self.relationship_chains: - text_parts.append(f"\n### 【关系链】") + text_parts.append("\n### Relationship Chains") for chain in self.relationship_chains: text_parts.append(f"- {chain}") @@ -249,32 +249,32 @@ def to_dict(self) -> Dict[str, Any]: def to_text(self) -> str: """转换为文本格式(完整版本,不截断)""" text_parts = [ - f"## 广度搜索结果(未来全景视图)", - f"查询: {self.query}", - f"\n### 统计信息", - f"- 总节点数: {self.total_nodes}", - f"- 总边数: {self.total_edges}", - f"- 当前有效事实: {self.active_count}条", - f"- 历史/过期事实: {self.historical_count}条" + "## Panorama Search Results", + f"Query: {self.query}", + "\n### Statistics", + f"- Total Nodes: {self.total_nodes}", + f"- Total Edges: {self.total_edges}", + f"- Active Facts: {self.active_count}", + f"- Historical / Expired Facts: {self.historical_count}" ] # 当前有效的事实(完整输出,不截断) if self.active_facts: - text_parts.append(f"\n### 【当前有效事实】(模拟结果原文)") + text_parts.append("\n### Active Facts") for i, fact in enumerate(self.active_facts, 1): text_parts.append(f"{i}. \"{fact}\"") # 历史/过期事实(完整输出,不截断) if self.historical_facts: - text_parts.append(f"\n### 【历史/过期事实】(演变过程记录)") + text_parts.append("\n### Historical / Expired Facts") for i, fact in enumerate(self.historical_facts, 1): text_parts.append(f"{i}. \"{fact}\"") # 关键实体(完整输出,不截断) if self.all_nodes: - text_parts.append(f"\n### 【涉及实体】") + text_parts.append("\n### Entities Involved") for node in self.all_nodes: - entity_type = next((l for l in node.labels if l not in ["Entity", "Node"]), "实体") + entity_type = next((l for l in node.labels if l not in ["Entity", "Node"]), "Entity") text_parts.append(f"- **{node.name}** ({entity_type})") return "\n".join(text_parts) @@ -303,11 +303,11 @@ def to_dict(self) -> Dict[str, Any]: def to_text(self) -> str: text = f"**{self.agent_name}** ({self.agent_role})\n" # 显示完整的agent_bio,不截断 - text += f"_简介: {self.agent_bio}_\n\n" + text += f"_Bio: {self.agent_bio}_\n\n" text += f"**Q:** {self.question}\n\n" text += f"**A:** {self.response}\n" if self.key_quotes: - text += "\n**关键引言:**\n" + text += "\n**Key Quotes:**\n" for quote in self.key_quotes: # 清理各种引号 clean_quote = quote.replace('\u201c', '').replace('\u201d', '').replace('"', '') @@ -319,7 +319,7 @@ def to_text(self) -> str: # 过滤包含问题编号的垃圾内容(问题1-9) skip = False for d in '123456789': - if f'\u95ee\u9898{d}' in clean_quote: + if f'\u95ee\u9898{d}' in clean_quote or f'Question {d}' in clean_quote: skip = True break if skip: @@ -374,25 +374,25 @@ def to_dict(self) -> Dict[str, Any]: def to_text(self) -> str: """转换为详细的文本格式,供LLM理解和报告引用""" text_parts = [ - "## 深度采访报告", - f"**采访主题:** {self.interview_topic}", - f"**采访人数:** {self.interviewed_count} / {self.total_agents} 位模拟Agent", - "\n### 采访对象选择理由", - self.selection_reasoning or "(自动选择)", + "## In-Depth Interview Report", + f"**Interview Topic:** {self.interview_topic}", + f"**Interview Count:** {self.interviewed_count} / {self.total_agents} simulated agents", + "\n### Why These Agents Were Selected", + self.selection_reasoning or "(Selected automatically)", "\n---", - "\n### 采访实录", + "\n### Interview Transcript", ] if self.interviews: for i, interview in enumerate(self.interviews, 1): - text_parts.append(f"\n#### 采访 #{i}: {interview.agent_name}") + text_parts.append(f"\n#### Interview #{i}: {interview.agent_name}") text_parts.append(interview.to_text()) text_parts.append("\n---") else: - text_parts.append("(无采访记录)\n\n---") + text_parts.append("(No interview records)\n\n---") - text_parts.append("\n### 采访摘要与核心观点") - text_parts.append(self.summary or "(无摘要)") + text_parts.append("\n### Interview Summary & Key Takeaways") + text_parts.append(self.summary or "(No summary)") return "\n".join(text_parts) @@ -424,12 +424,12 @@ class ZepToolsService: def __init__(self, api_key: Optional[str] = None, llm_client: Optional[LLMClient] = None): self.api_key = api_key or Config.ZEP_API_KEY if not self.api_key: - raise ValueError("ZEP_API_KEY 未配置") + raise ValueError("ZEP_API_KEY is not configured") self.client = Zep(api_key=self.api_key) # LLM客户端用于InsightForge生成子问题 self._llm_client = llm_client - logger.info("ZepToolsService 初始化完成") + logger.info("ZepToolsService initialized") @property def llm(self) -> LLMClient: From 19663ad54672275daf3b0df4ec79b5445a73c887 Mon Sep 17 00:00:00 2001 From: Anurag Date: Sat, 28 Mar 2026 18:46:20 +0530 Subject: [PATCH 03/13] refactor backend to use pluggable graph providers --- backend/app/__init__.py | 13 +- backend/app/api/graph.py | 24 +- backend/app/api/simulation.py | 12 +- backend/app/config.py | 56 +- backend/app/services/graph_builder.py | 284 +------- .../app/services/graph_provider/__init__.py | 16 + backend/app/services/graph_provider/base.py | 89 +++ .../app/services/graph_provider/factory.py | 33 + .../graph_provider/graphiti_local_provider.py | 638 ++++++++++++++++++ backend/app/services/graph_provider/models.py | 41 ++ .../graph_provider/zep_cloud_provider.py | 297 ++++++++ .../app/services/oasis_profile_generator.py | 29 +- backend/app/services/zep_entity_reader.py | 31 +- .../app/services/zep_graph_memory_updater.py | 17 +- backend/app/services/zep_tools.py | 96 ++- 15 files changed, 1299 insertions(+), 377 deletions(-) create mode 100644 backend/app/services/graph_provider/__init__.py create mode 100644 backend/app/services/graph_provider/base.py create mode 100644 backend/app/services/graph_provider/factory.py create mode 100644 backend/app/services/graph_provider/graphiti_local_provider.py create mode 100644 backend/app/services/graph_provider/models.py create mode 100644 backend/app/services/graph_provider/zep_cloud_provider.py diff --git a/backend/app/__init__.py b/backend/app/__init__.py index aba624bba..b352f0f85 100644 --- a/backend/app/__init__.py +++ b/backend/app/__init__.py @@ -13,6 +13,7 @@ from flask_cors import CORS from .config import Config +from .services.graph_provider import initialize_selected_graph_backend from .utils.logger import setup_logger, get_logger @@ -41,6 +42,11 @@ def create_app(config_class=Config): # 启用CORS CORS(app, resources={r"/api/*": {"origins": "*"}}) + + # 初始化选中的图谱后端 + initialize_selected_graph_backend() + if should_log_startup: + logger.info(f"图谱后端已初始化: {Config.GRAPH_BACKEND}") # 注册模拟进程清理函数(确保服务器关闭时终止所有模拟进程) from .services.simulation_runner import SimulationRunner @@ -71,10 +77,13 @@ def log_response(response): # 健康检查 @app.route('/health') def health(): - return {'status': 'ok', 'service': 'MiroFish Backend'} + return { + 'status': 'ok', + 'service': 'MiroFish Backend', + 'graph_backend': Config.GRAPH_BACKEND, + } if should_log_startup: logger.info("MiroFish Backend 启动完成") return app - diff --git a/backend/app/api/graph.py b/backend/app/api/graph.py index 053fbbb60..cec8c58a5 100644 --- a/backend/app/api/graph.py +++ b/backend/app/api/graph.py @@ -283,9 +283,7 @@ def build_graph(): logger.info("=== Starting graph build ===") # 检查配置 - errors = [] - if not Config.ZEP_API_KEY: - errors.append("ZEP_API_KEY is not configured") + errors = Config.validate_graph_backend() if errors: logger.error(f"Configuration error: {errors}") return jsonify({ @@ -382,7 +380,7 @@ def build_task(): ) # 创建图谱构建服务 - builder = GraphBuilderService(api_key=Config.ZEP_API_KEY) + builder = GraphBuilderService() # 分块 task_manager.update_task( @@ -400,7 +398,7 @@ def build_task(): # 创建图谱 task_manager.update_task( task_id, - message="Creating the Zep graph...", + message="Creating graph namespace...", progress=10 ) graph_id = builder.create_graph(name=graph_name) @@ -442,7 +440,7 @@ def add_progress_callback(msg, progress_ratio): # 等待Zep处理完成(查询每个episode的processed状态) task_manager.update_task( task_id, - message="Waiting for Zep to process the data...", + message="Waiting for graph ingestion to complete...", progress=55 ) @@ -454,7 +452,7 @@ def wait_progress_callback(msg, progress_ratio): progress=progress ) - builder._wait_for_episodes(episode_uuids, wait_progress_callback) + builder._wait_for_episodes(graph_id, episode_uuids, wait_progress_callback) # 获取图谱数据 task_manager.update_task( @@ -567,13 +565,13 @@ def get_graph_data(graph_id: str): 获取图谱数据(节点和边) """ try: - if not Config.ZEP_API_KEY: + if Config.validate_graph_backend(): return jsonify({ "success": False, - "error": "ZEP_API_KEY is not configured" + "error": "Graph backend is not configured correctly" }), 500 - builder = GraphBuilderService(api_key=Config.ZEP_API_KEY) + builder = GraphBuilderService() graph_data = builder.get_graph_data(graph_id) return jsonify({ @@ -595,13 +593,13 @@ def delete_graph(graph_id: str): 删除Zep图谱 """ try: - if not Config.ZEP_API_KEY: + if Config.validate_graph_backend(): return jsonify({ "success": False, - "error": "ZEP_API_KEY is not configured" + "error": "Graph backend is not configured correctly" }), 500 - builder = GraphBuilderService(api_key=Config.ZEP_API_KEY) + builder = GraphBuilderService() builder.delete_graph(graph_id) return jsonify({ diff --git a/backend/app/api/simulation.py b/backend/app/api/simulation.py index 3a0f68168..b3c8a84d3 100644 --- a/backend/app/api/simulation.py +++ b/backend/app/api/simulation.py @@ -56,10 +56,10 @@ def get_graph_entities(graph_id: str): enrich: 是否获取相关边信息(默认true) """ try: - if not Config.ZEP_API_KEY: + if Config.validate_graph_backend(): return jsonify({ "success": False, - "error": "ZEP_API_KEY未配置" + "error": "图谱后端未正确配置" }), 500 entity_types_str = request.args.get('entity_types', '') @@ -93,10 +93,10 @@ def get_graph_entities(graph_id: str): def get_entity_detail(graph_id: str, entity_uuid: str): """获取单个实体的详细信息""" try: - if not Config.ZEP_API_KEY: + if Config.validate_graph_backend(): return jsonify({ "success": False, - "error": "ZEP_API_KEY未配置" + "error": "图谱后端未正确配置" }), 500 reader = ZepEntityReader() @@ -126,10 +126,10 @@ def get_entity_detail(graph_id: str, entity_uuid: str): def get_entities_by_type(graph_id: str, entity_type: str): """获取指定类型的所有实体""" try: - if not Config.ZEP_API_KEY: + if Config.validate_graph_backend(): return jsonify({ "success": False, - "error": "ZEP_API_KEY未配置" + "error": "图谱后端未正确配置" }), 500 enrich = request.args.get('enrich', 'true').lower() == 'true' diff --git a/backend/app/config.py b/backend/app/config.py index 953dfa50a..ecbe27a33 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -32,8 +32,34 @@ class Config: LLM_BASE_URL = os.environ.get('LLM_BASE_URL', 'https://api.openai.com/v1') LLM_MODEL_NAME = os.environ.get('LLM_MODEL_NAME', 'gpt-4o-mini') - # Zep配置 + # 图谱后端配置 + GRAPH_BACKEND = os.environ.get('GRAPH_BACKEND', 'zep_cloud').strip().lower() + + # Zep Cloud配置 ZEP_API_KEY = os.environ.get('ZEP_API_KEY') + + # Local Graphiti + Neo4j 配置 + NEO4J_URI = os.environ.get('NEO4J_URI', 'bolt://localhost:7687') + NEO4J_USER = os.environ.get('NEO4J_USER', 'neo4j') + NEO4J_PASSWORD = os.environ.get('NEO4J_PASSWORD') + NEO4J_DATABASE = os.environ.get('NEO4J_DATABASE', 'neo4j') + + GRAPHITI_AUTO_INIT = os.environ.get('GRAPHITI_AUTO_INIT', 'True').lower() == 'true' + GRAPHITI_TELEMETRY_ENABLED = os.environ.get('GRAPHITI_TELEMETRY_ENABLED', 'False').lower() == 'true' + GRAPHITI_MAX_COROUTINES = int(os.environ.get('GRAPHITI_MAX_COROUTINES', '10')) + GRAPHITI_SEARCH_RERANKER = os.environ.get('GRAPHITI_SEARCH_RERANKER', 'rrf').strip().lower() + + GRAPHITI_LLM_API_KEY = os.environ.get('GRAPHITI_LLM_API_KEY') or LLM_API_KEY + GRAPHITI_LLM_BASE_URL = os.environ.get('GRAPHITI_LLM_BASE_URL') or LLM_BASE_URL + GRAPHITI_LLM_MODEL = os.environ.get('GRAPHITI_LLM_MODEL') or LLM_MODEL_NAME + + GRAPHITI_EMBEDDER_API_KEY = os.environ.get('GRAPHITI_EMBEDDER_API_KEY') or LLM_API_KEY + GRAPHITI_EMBEDDER_BASE_URL = os.environ.get('GRAPHITI_EMBEDDER_BASE_URL') or LLM_BASE_URL + GRAPHITI_EMBEDDER_MODEL = os.environ.get('GRAPHITI_EMBEDDER_MODEL', 'text-embedding-3-small') + + GRAPHITI_RERANKER_API_KEY = os.environ.get('GRAPHITI_RERANKER_API_KEY') or LLM_API_KEY + GRAPHITI_RERANKER_BASE_URL = os.environ.get('GRAPHITI_RERANKER_BASE_URL') or LLM_BASE_URL + GRAPHITI_RERANKER_MODEL = os.environ.get('GRAPHITI_RERANKER_MODEL') or LLM_MODEL_NAME # 文件上传配置 MAX_CONTENT_LENGTH = 50 * 1024 * 1024 # 50MB @@ -63,13 +89,35 @@ class Config: REPORT_AGENT_MAX_REFLECTION_ROUNDS = int(os.environ.get('REPORT_AGENT_MAX_REFLECTION_ROUNDS', '2')) REPORT_AGENT_TEMPERATURE = float(os.environ.get('REPORT_AGENT_TEMPERATURE', '0.5')) + @classmethod + def validate_graph_backend(cls): + """验证图谱后端配置""" + errors = [] + + if cls.GRAPH_BACKEND == 'zep_cloud': + if not cls.ZEP_API_KEY: + errors.append("ZEP_API_KEY 未配置") + elif cls.GRAPH_BACKEND == 'graphiti_local': + if not cls.NEO4J_URI: + errors.append("NEO4J_URI 未配置") + if not cls.NEO4J_USER: + errors.append("NEO4J_USER 未配置") + if not cls.NEO4J_PASSWORD: + errors.append("NEO4J_PASSWORD 未配置") + if not cls.GRAPHITI_LLM_API_KEY: + errors.append("GRAPHITI_LLM_API_KEY/LLM_API_KEY 未配置") + if not cls.GRAPHITI_EMBEDDER_API_KEY: + errors.append("GRAPHITI_EMBEDDER_API_KEY/LLM_API_KEY 未配置") + else: + errors.append(f"不支持的 GRAPH_BACKEND: {cls.GRAPH_BACKEND}") + + return errors + @classmethod def validate(cls): """验证必要配置""" errors = [] if not cls.LLM_API_KEY: errors.append("LLM_API_KEY 未配置") - if not cls.ZEP_API_KEY: - errors.append("ZEP_API_KEY 未配置") + errors.extend(cls.validate_graph_backend()) return errors - diff --git a/backend/app/services/graph_builder.py b/backend/app/services/graph_builder.py index e75f9700d..9ef90ee9f 100644 --- a/backend/app/services/graph_builder.py +++ b/backend/app/services/graph_builder.py @@ -5,19 +5,14 @@ import os import uuid -import time import threading import logging from typing import Dict, Any, List, Optional, Callable from dataclasses import dataclass -from zep_cloud.client import Zep -from zep_cloud import EpisodeData, EntityEdgeSourceTarget - from ..config import Config from ..models.task import TaskManager, TaskStatus -from ..utils.zep_paging import fetch_all_nodes, fetch_all_edges -from ..utils.ontology_normalizer import normalize_ontology_for_zep +from .graph_provider import create_graph_provider from .text_processor import TextProcessor @@ -49,10 +44,7 @@ class GraphBuilderService: def __init__(self, api_key: Optional[str] = None): self.api_key = api_key or Config.ZEP_API_KEY - if not self.api_key: - raise ValueError("ZEP_API_KEY 未配置") - - self.client = Zep(api_key=self.api_key) + self.provider = create_graph_provider() self.task_manager = TaskManager() def build_graph_async( @@ -160,6 +152,7 @@ def _build_graph_worker( ) self._wait_for_episodes( + graph_id, episode_uuids, lambda msg, prog: self.task_manager.update_task( task_id, @@ -190,114 +183,12 @@ def _build_graph_worker( self.task_manager.fail_task(task_id, error_msg) def create_graph(self, name: str) -> str: - """创建Zep图谱(公开方法)""" - graph_id = f"mirofish_{uuid.uuid4().hex[:16]}" - - self.client.graph.create( - graph_id=graph_id, - name=name, - description="MiroFish Social Simulation Graph" - ) - - return graph_id + """创建图谱(公开方法)""" + return self.provider.create_graph(name) def set_ontology(self, graph_id: str, ontology: Dict[str, Any]): """设置图谱本体(公开方法)""" - import warnings - from typing import Optional - from pydantic import Field - from zep_cloud.external_clients.ontology import EntityModel, EntityText, EdgeModel - - # 抑制 Pydantic v2 关于 Field(default=None) 的警告 - # 这是 Zep SDK 要求的用法,警告来自动态类创建,可以安全忽略 - warnings.filterwarnings('ignore', category=UserWarning, module='pydantic') - - ontology, entity_name_mapping = normalize_ontology_for_zep(ontology) - renamed_entities = { - original: normalized - for original, normalized in entity_name_mapping.items() - if original != normalized - } - if renamed_entities: - logger.info("Normalized ontology entity names for Zep compatibility: %s", renamed_entities) - - # Zep 保留名称,不能作为属性名 - RESERVED_NAMES = {'uuid', 'name', 'group_id', 'name_embedding', 'summary', 'created_at'} - - def safe_attr_name(attr_name: str) -> str: - """将保留名称转换为安全名称""" - if attr_name.lower() in RESERVED_NAMES: - return f"entity_{attr_name}" - return attr_name - - # 动态创建实体类型 - entity_types = {} - for entity_def in ontology.get("entity_types", []): - name = entity_def["name"] - description = entity_def.get("description", f"A {name} entity.") - - # 创建属性字典和类型注解(Pydantic v2 需要) - attrs = {"__doc__": description} - annotations = {} - - for attr_def in entity_def.get("attributes", []): - attr_name = safe_attr_name(attr_def["name"]) # 使用安全名称 - attr_desc = attr_def.get("description", attr_name) - # Zep API 需要 Field 的 description,这是必需的 - attrs[attr_name] = Field(description=attr_desc, default=None) - annotations[attr_name] = Optional[EntityText] # 类型注解 - - attrs["__annotations__"] = annotations - - # 动态创建类 - entity_class = type(name, (EntityModel,), attrs) - entity_class.__doc__ = description - entity_types[name] = entity_class - - # 动态创建边类型 - edge_definitions = {} - for edge_def in ontology.get("edge_types", []): - name = edge_def["name"] - description = edge_def.get("description", f"A {name} relationship.") - - # 创建属性字典和类型注解 - attrs = {"__doc__": description} - annotations = {} - - for attr_def in edge_def.get("attributes", []): - attr_name = safe_attr_name(attr_def["name"]) # 使用安全名称 - attr_desc = attr_def.get("description", attr_name) - # Zep API 需要 Field 的 description,这是必需的 - attrs[attr_name] = Field(description=attr_desc, default=None) - annotations[attr_name] = Optional[str] # 边属性用str类型 - - attrs["__annotations__"] = annotations - - # 动态创建类 - class_name = ''.join(word.capitalize() for word in name.split('_')) - edge_class = type(class_name, (EdgeModel,), attrs) - edge_class.__doc__ = description - - # 构建source_targets - source_targets = [] - for st in edge_def.get("source_targets", []): - source_targets.append( - EntityEdgeSourceTarget( - source=st.get("source", "Entity"), - target=st.get("target", "Entity") - ) - ) - - if source_targets: - edge_definitions[name] = (edge_class, source_targets) - - # 调用Zep API设置本体 - if entity_types or edge_definitions: - self.client.graph.set_ontology( - graph_ids=[graph_id], - entities=entity_types if entity_types else None, - edges=edge_definitions if edge_definitions else None, - ) + self.provider.set_ontology(graph_id, ontology) def add_text_batches( self, @@ -307,114 +198,32 @@ def add_text_batches( progress_callback: Optional[Callable] = None ) -> List[str]: """分批添加文本到图谱,返回所有 episode 的 uuid 列表""" - episode_uuids = [] - total_chunks = len(chunks) - - for i in range(0, total_chunks, batch_size): - batch_chunks = chunks[i:i + batch_size] - batch_num = i // batch_size + 1 - total_batches = (total_chunks + batch_size - 1) // batch_size - - if progress_callback: - progress = (i + len(batch_chunks)) / total_chunks - progress_callback( - f"发送第 {batch_num}/{total_batches} 批数据 ({len(batch_chunks)} 块)...", - progress - ) - - # 构建episode数据 - episodes = [ - EpisodeData(data=chunk, type="text") - for chunk in batch_chunks - ] - - # 发送到Zep - try: - batch_result = self.client.graph.add_batch( - graph_id=graph_id, - episodes=episodes - ) - - # 收集返回的 episode uuid - if batch_result and isinstance(batch_result, list): - for ep in batch_result: - ep_uuid = getattr(ep, 'uuid_', None) or getattr(ep, 'uuid', None) - if ep_uuid: - episode_uuids.append(ep_uuid) - - # 避免请求过快 - time.sleep(1) - - except Exception as e: - if progress_callback: - progress_callback(f"批次 {batch_num} 发送失败: {str(e)}", 0) - raise - - return episode_uuids + return self.provider.add_text_batches( + graph_id=graph_id, + chunks=chunks, + batch_size=batch_size, + progress_callback=progress_callback, + ) def _wait_for_episodes( self, + graph_id: str, episode_uuids: List[str], progress_callback: Optional[Callable] = None, timeout: int = 600 ): - """等待所有 episode 处理完成(通过查询每个 episode 的 processed 状态)""" - if not episode_uuids: - if progress_callback: - progress_callback("无需等待(没有 episode)", 1.0) - return - - start_time = time.time() - pending_episodes = set(episode_uuids) - completed_count = 0 - total_episodes = len(episode_uuids) - - if progress_callback: - progress_callback(f"开始等待 {total_episodes} 个文本块处理...", 0) - - while pending_episodes: - if time.time() - start_time > timeout: - if progress_callback: - progress_callback( - f"部分文本块超时,已完成 {completed_count}/{total_episodes}", - completed_count / total_episodes - ) - break - - # 检查每个 episode 的处理状态 - for ep_uuid in list(pending_episodes): - try: - episode = self.client.graph.episode.get(uuid_=ep_uuid) - is_processed = getattr(episode, 'processed', False) - - if is_processed: - pending_episodes.remove(ep_uuid) - completed_count += 1 - - except Exception as e: - # 忽略单个查询错误,继续 - pass - - elapsed = int(time.time() - start_time) - if progress_callback: - progress_callback( - f"Zep处理中... {completed_count}/{total_episodes} 完成, {len(pending_episodes)} 待处理 ({elapsed}秒)", - completed_count / total_episodes if total_episodes > 0 else 0 - ) - - if pending_episodes: - time.sleep(3) # 每3秒检查一次 - - if progress_callback: - progress_callback(f"处理完成: {completed_count}/{total_episodes}", 1.0) + """等待图谱文本处理完成""" + self.provider.wait_for_episodes( + graph_id=graph_id, + episode_uuids=episode_uuids, + progress_callback=progress_callback, + timeout=timeout, + ) def _get_graph_info(self, graph_id: str) -> GraphInfo: """获取图谱信息""" - # 获取节点(分页) - nodes = fetch_all_nodes(self.client, graph_id) - - # 获取边(分页) - edges = fetch_all_edges(self.client, graph_id) + nodes = self.provider.get_all_nodes(graph_id) + edges = self.provider.get_all_edges(graph_id) # 统计实体类型 entity_types = set() @@ -441,63 +250,40 @@ def get_graph_data(self, graph_id: str) -> Dict[str, Any]: Returns: 包含nodes和edges的字典,包括时间信息、属性等详细数据 """ - nodes = fetch_all_nodes(self.client, graph_id) - edges = fetch_all_edges(self.client, graph_id) + nodes = self.provider.get_all_nodes(graph_id) + edges = self.provider.get_all_edges(graph_id) # 创建节点映射用于获取节点名称 - node_map = {} - for node in nodes: - node_map[node.uuid_] = node.name or "" + node_map = {node.uuid: node.name or "" for node in nodes} nodes_data = [] for node in nodes: - # 获取创建时间 - created_at = getattr(node, 'created_at', None) - if created_at: - created_at = str(created_at) - nodes_data.append({ - "uuid": node.uuid_, + "uuid": node.uuid, "name": node.name, "labels": node.labels or [], "summary": node.summary or "", "attributes": node.attributes or {}, - "created_at": created_at, + "created_at": node.created_at, }) edges_data = [] for edge in edges: - # 获取时间信息 - created_at = getattr(edge, 'created_at', None) - valid_at = getattr(edge, 'valid_at', None) - invalid_at = getattr(edge, 'invalid_at', None) - expired_at = getattr(edge, 'expired_at', None) - - # 获取 episodes - episodes = getattr(edge, 'episodes', None) or getattr(edge, 'episode_ids', None) - if episodes and not isinstance(episodes, list): - episodes = [str(episodes)] - elif episodes: - episodes = [str(e) for e in episodes] - - # 获取 fact_type - fact_type = getattr(edge, 'fact_type', None) or edge.name or "" - edges_data.append({ - "uuid": edge.uuid_, + "uuid": edge.uuid, "name": edge.name or "", "fact": edge.fact or "", - "fact_type": fact_type, + "fact_type": edge.name or "", "source_node_uuid": edge.source_node_uuid, "target_node_uuid": edge.target_node_uuid, "source_node_name": node_map.get(edge.source_node_uuid, ""), "target_node_name": node_map.get(edge.target_node_uuid, ""), "attributes": edge.attributes or {}, - "created_at": str(created_at) if created_at else None, - "valid_at": str(valid_at) if valid_at else None, - "invalid_at": str(invalid_at) if invalid_at else None, - "expired_at": str(expired_at) if expired_at else None, - "episodes": episodes or [], + "created_at": edge.created_at, + "valid_at": edge.valid_at, + "invalid_at": edge.invalid_at, + "expired_at": edge.expired_at, + "episodes": edge.episodes or [], }) return { @@ -510,4 +296,4 @@ def get_graph_data(self, graph_id: str) -> Dict[str, Any]: def delete_graph(self, graph_id: str): """删除图谱""" - self.client.graph.delete(graph_id=graph_id) + self.provider.delete_graph(graph_id) diff --git a/backend/app/services/graph_provider/__init__.py b/backend/app/services/graph_provider/__init__.py new file mode 100644 index 000000000..673e8fe1b --- /dev/null +++ b/backend/app/services/graph_provider/__init__.py @@ -0,0 +1,16 @@ +""" +Graph provider exports. +""" + +from .base import BaseGraphProvider +from .factory import create_graph_provider, initialize_selected_graph_backend +from .models import GraphEdgeRecord, GraphNodeRecord, GraphSearchResult + +__all__ = [ + 'BaseGraphProvider', + 'GraphEdgeRecord', + 'GraphNodeRecord', + 'GraphSearchResult', + 'create_graph_provider', + 'initialize_selected_graph_backend', +] diff --git a/backend/app/services/graph_provider/base.py b/backend/app/services/graph_provider/base.py new file mode 100644 index 000000000..e12b5ae09 --- /dev/null +++ b/backend/app/services/graph_provider/base.py @@ -0,0 +1,89 @@ +""" +Abstract graph provider interface. +""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from collections.abc import Callable +from typing import Any, Optional + +from .models import GraphEdgeRecord, GraphNodeRecord, GraphSearchResult + +ProgressCallback = Callable[[str, float], None] + + +class BaseGraphProvider(ABC): + """Provider-neutral graph backend interface.""" + + def ensure_initialized(self) -> None: + """Perform one-time backend initialization when needed.""" + + @abstractmethod + def create_graph(self, name: str) -> str: + raise NotImplementedError + + @abstractmethod + def set_ontology(self, graph_id: str, ontology: dict[str, Any]) -> None: + raise NotImplementedError + + @abstractmethod + def add_text_batches( + self, + graph_id: str, + chunks: list[str], + batch_size: int = 3, + progress_callback: Optional[ProgressCallback] = None, + ) -> list[str]: + raise NotImplementedError + + @abstractmethod + def wait_for_episodes( + self, + graph_id: str, + episode_uuids: list[str], + progress_callback: Optional[ProgressCallback] = None, + timeout: int = 600, + ) -> None: + raise NotImplementedError + + @abstractmethod + def get_all_nodes(self, graph_id: str) -> list[GraphNodeRecord]: + raise NotImplementedError + + @abstractmethod + def get_all_edges(self, graph_id: str) -> list[GraphEdgeRecord]: + raise NotImplementedError + + @abstractmethod + def get_node(self, graph_id: str, node_uuid: str) -> GraphNodeRecord | None: + raise NotImplementedError + + @abstractmethod + def get_node_edges(self, graph_id: str, node_uuid: str) -> list[GraphEdgeRecord]: + raise NotImplementedError + + @abstractmethod + def search( + self, + graph_id: str, + query: str, + limit: int = 10, + scope: str = "edges", + reranker: str = "cross_encoder", + ) -> GraphSearchResult: + raise NotImplementedError + + @abstractmethod + def add_text( + self, + graph_id: str, + data: str, + source_description: str = "MiroFish", + ) -> str | None: + raise NotImplementedError + + @abstractmethod + def delete_graph(self, graph_id: str) -> None: + raise NotImplementedError + diff --git a/backend/app/services/graph_provider/factory.py b/backend/app/services/graph_provider/factory.py new file mode 100644 index 000000000..3b2a5cc1f --- /dev/null +++ b/backend/app/services/graph_provider/factory.py @@ -0,0 +1,33 @@ +""" +Graph provider factory and backend bootstrap helpers. +""" + +from __future__ import annotations + +from functools import lru_cache + +from ...config import Config + + +@lru_cache(maxsize=2) +def _create_graph_provider_for_backend(backend: str): + if backend == "zep_cloud": + from .zep_cloud_provider import ZepCloudGraphProvider + + return ZepCloudGraphProvider() + + if backend == "graphiti_local": + from .graphiti_local_provider import GraphitiLocalGraphProvider + + return GraphitiLocalGraphProvider() + + raise ValueError(f"Unsupported GRAPH_BACKEND: {backend}") + + +def create_graph_provider(): + return _create_graph_provider_for_backend(Config.GRAPH_BACKEND) + + +def initialize_selected_graph_backend() -> None: + provider = create_graph_provider() + provider.ensure_initialized() diff --git a/backend/app/services/graph_provider/graphiti_local_provider.py b/backend/app/services/graph_provider/graphiti_local_provider.py new file mode 100644 index 000000000..14c13d991 --- /dev/null +++ b/backend/app/services/graph_provider/graphiti_local_provider.py @@ -0,0 +1,638 @@ +""" +Local Graphiti + Neo4j graph provider implementation. +""" + +from __future__ import annotations + +import atexit +import asyncio +import json +import os +import re +import threading +import uuid +from dataclasses import dataclass +from datetime import datetime, timedelta, timezone +from typing import Any, Optional + +from pydantic import BaseModel, Field + +from ...config import Config +from ...utils.logger import get_logger +from ...utils.ontology_normalizer import normalize_ontology_for_zep +from .base import BaseGraphProvider, ProgressCallback +from .models import GraphEdgeRecord, GraphNodeRecord, GraphSearchResult + +logger = get_logger('mirofish.graph_provider.graphiti_local') + + +class _AsyncRunner: + """Run all Graphiti/Neo4j async work on one dedicated event loop thread.""" + + def __init__(self): + self._loop = asyncio.new_event_loop() + self._thread = threading.Thread(target=self._run_loop, name="graphiti-local-loop", daemon=True) + self._started = threading.Event() + self._closed = False + self._thread.start() + self._started.wait() + + def _run_loop(self) -> None: + asyncio.set_event_loop(self._loop) + self._started.set() + self._loop.run_forever() + + def run(self, coro): + if self._closed: + raise RuntimeError("Async runner is already closed") + + future = asyncio.run_coroutine_threadsafe(coro, self._loop) + return future.result() + + def close(self) -> None: + if self._closed: + return + + self._closed = True + self._loop.call_soon_threadsafe(self._loop.stop) + self._thread.join(timeout=5) + self._loop.close() + + +_ASYNC_RUNNER = _AsyncRunner() +atexit.register(_ASYNC_RUNNER.close) + + +def _run_async(coro): + return _ASYNC_RUNNER.run(coro) + + +@dataclass +class _OntologyBundle: + entity_types: dict[str, type[BaseModel]] + edge_types: dict[str, type[BaseModel]] + edge_type_map: dict[tuple[str, str], list[str]] + attribute_free_entity_types: dict[str, type[BaseModel]] + attribute_free_edge_types: dict[str, type[BaseModel]] + + +class GraphitiLocalGraphProvider(BaseGraphProvider): + """Graphiti + Neo4j backed graph provider.""" + + _initialized = False + _init_lock = threading.Lock() + + def __init__(self): + try: + from graphiti_core import Graphiti + from graphiti_core.cross_encoder.openai_reranker_client import OpenAIRerankerClient + from graphiti_core.driver.neo4j_driver import Neo4jDriver + from graphiti_core.embedder.openai import OpenAIEmbedder, OpenAIEmbedderConfig + from graphiti_core.errors import GroupsEdgesNotFoundError, GroupsNodesNotFoundError, NodeNotFoundError + from graphiti_core.llm_client.config import LLMConfig + from graphiti_core.llm_client.openai_generic_client import OpenAIGenericClient + from neo4j.exceptions import ClientError + except ImportError as exc: # pragma: no cover - depends on installed extras + raise ImportError( + "graphiti-core and neo4j must be installed to use GRAPH_BACKEND=graphiti_local" + ) from exc + + self._Graphiti = Graphiti + self._Neo4jDriver = Neo4jDriver + self._OpenAIEmbedder = OpenAIEmbedder + self._OpenAIEmbedderConfig = OpenAIEmbedderConfig + self._OpenAIRerankerClient = OpenAIRerankerClient + self._OpenAIGenericClient = OpenAIGenericClient + self._LLMConfig = LLMConfig + self._GroupsEdgesNotFoundError = GroupsEdgesNotFoundError + self._GroupsNodesNotFoundError = GroupsNodesNotFoundError + self._NodeNotFoundError = NodeNotFoundError + self._ClientError = ClientError + + # Graphiti reads this env var directly. + os.environ.setdefault('GRAPHITI_TELEMETRY_ENABLED', str(Config.GRAPHITI_TELEMETRY_ENABLED).lower()) + + self._llm_config = self._LLMConfig( + api_key=Config.GRAPHITI_LLM_API_KEY, + base_url=Config.GRAPHITI_LLM_BASE_URL, + model=Config.GRAPHITI_LLM_MODEL, + ) + self._reranker_config = self._LLMConfig( + api_key=Config.GRAPHITI_RERANKER_API_KEY, + base_url=Config.GRAPHITI_RERANKER_BASE_URL, + model=Config.GRAPHITI_RERANKER_MODEL, + ) + self._embedder_config = self._OpenAIEmbedderConfig( + api_key=Config.GRAPHITI_EMBEDDER_API_KEY, + base_url=Config.GRAPHITI_EMBEDDER_BASE_URL, + embedding_model=Config.GRAPHITI_EMBEDDER_MODEL, + ) + + self.driver = self._Neo4jDriver( + uri=Config.NEO4J_URI, + user=Config.NEO4J_USER, + password=Config.NEO4J_PASSWORD, + database=Config.NEO4J_DATABASE, + ) + self.client = self._make_graphiti_client(self.driver) + self._ontology_cache: dict[str, _OntologyBundle] = {} + self._client_ready = False + + def ensure_initialized(self) -> None: + if GraphitiLocalGraphProvider._initialized or not Config.GRAPHITI_AUTO_INIT: + return + + with GraphitiLocalGraphProvider._init_lock: + if GraphitiLocalGraphProvider._initialized: + return + self._ensure_client_ready() + GraphitiLocalGraphProvider._initialized = True + + def _ensure_client_ready(self) -> None: + if self._client_ready: + return + + with GraphitiLocalGraphProvider._init_lock: + if self._client_ready: + return + logger.info("Checking local Neo4j connectivity...") + _run_async(self.driver.health_check()) + logger.info("Local Neo4j connectivity confirmed") + logger.info("Initializing local Graphiti indices and constraints...") + _run_async(self.client.build_indices_and_constraints()) + self._client_ready = True + logger.info("Local Graphiti initialization completed") + + def create_graph(self, name: str) -> str: + self._ensure_client_ready() + graph_id = f"mirofish_{uuid.uuid4().hex[:16]}" + logger.info("Created local Graphiti graph namespace %s (%s)", graph_id, name) + return graph_id + + def set_ontology(self, graph_id: str, ontology: dict[str, Any]) -> None: + self._ontology_cache[graph_id] = self._build_ontology_bundle(ontology) + + def add_text_batches( + self, + graph_id: str, + chunks: list[str], + batch_size: int = 3, + progress_callback: Optional[ProgressCallback] = None, + ) -> list[str]: + self._ensure_client_ready() + client = self._get_graphiti_client(graph_id) + bundle = self._ontology_cache.get(graph_id) + episode_uuids: list[str] = [] + total_chunks = len(chunks) + + from graphiti_core.nodes import EpisodeType + + base_time = datetime.now(timezone.utc) + + for i in range(0, total_chunks, batch_size): + batch_chunks = chunks[i:i + batch_size] + batch_num = i // batch_size + 1 + total_batches = (total_chunks + batch_size - 1) // batch_size + + if progress_callback: + progress_callback( + f"Sending local batch {batch_num}/{total_batches} ({len(batch_chunks)} chunks)...", + (i + len(batch_chunks)) / total_chunks if total_chunks else 1.0, + ) + + for index, chunk in enumerate(batch_chunks): + result = _run_async( + self._add_episode( + client=client, + graph_id=graph_id, + name=f"{graph_id}_chunk_{i + index + 1}", + episode_body=chunk, + source_description="MiroFish document chunk", + reference_time=base_time + timedelta(seconds=i + index), + source=EpisodeType.text, + bundle=bundle, + ) + ) + self._persist_graph_result(client, result) + + episode = getattr(result, 'episode', None) + episode_uuid = getattr(episode, 'uuid', None) or getattr(episode, 'uuid_', None) + if episode_uuid: + episode_uuids.append(str(episode_uuid)) + + return episode_uuids + + def wait_for_episodes( + self, + graph_id: str, + episode_uuids: list[str], + progress_callback: Optional[ProgressCallback] = None, + timeout: int = 600, + ) -> None: + if progress_callback: + progress_callback( + "Local Graphiti ingestion completed", + 1.0, + ) + + def get_all_nodes(self, graph_id: str) -> list[GraphNodeRecord]: + self._ensure_client_ready() + from graphiti_core.nodes import EntityNode + + return [ + self._normalize_node(node) + for node in self._fetch_group_records(EntityNode.get_by_group_ids, graph_id) + ] + + def get_all_edges(self, graph_id: str) -> list[GraphEdgeRecord]: + self._ensure_client_ready() + from graphiti_core.edges import EntityEdge + + return [ + self._normalize_edge(edge) + for edge in self._fetch_group_records(EntityEdge.get_by_group_ids, graph_id) + ] + + def get_node(self, graph_id: str, node_uuid: str) -> GraphNodeRecord | None: + self._ensure_client_ready() + from graphiti_core.nodes import EntityNode + + graph_driver = self._get_graph_driver(graph_id) + try: + node = _run_async(EntityNode.get_by_uuid(graph_driver, node_uuid)) + except self._NodeNotFoundError: + return None + + if graph_id and getattr(node, 'group_id', None) not in (None, *self._graph_namespaces(graph_id)): + return None + return self._normalize_node(node) + + def get_node_edges(self, graph_id: str, node_uuid: str) -> list[GraphEdgeRecord]: + self._ensure_client_ready() + from graphiti_core.edges import EntityEdge + + graph_driver = self._get_graph_driver(graph_id) + edges = _run_async(EntityEdge.get_by_node_uuid(graph_driver, node_uuid)) + return [ + self._normalize_edge(edge) + for edge in edges + if not graph_id or getattr(edge, 'group_id', None) in (None, *self._graph_namespaces(graph_id)) + ] + + def search( + self, + graph_id: str, + query: str, + limit: int = 10, + scope: str = "edges", + reranker: str = "cross_encoder", + ) -> GraphSearchResult: + self._ensure_client_ready() + client = self._get_graphiti_client(graph_id) + from graphiti_core.search.search_config_recipes import ( + EDGE_HYBRID_SEARCH_CROSS_ENCODER, + EDGE_HYBRID_SEARCH_RRF, + NODE_HYBRID_SEARCH_CROSS_ENCODER, + NODE_HYBRID_SEARCH_RRF, + ) + + effective_reranker = Config.GRAPHITI_SEARCH_RERANKER or reranker or "rrf" + + if scope == "nodes": + config = ( + NODE_HYBRID_SEARCH_CROSS_ENCODER.model_copy(deep=True) + if effective_reranker == "cross_encoder" + else NODE_HYBRID_SEARCH_RRF.model_copy(deep=True) + ) + else: + config = ( + EDGE_HYBRID_SEARCH_CROSS_ENCODER.model_copy(deep=True) + if effective_reranker == "cross_encoder" + else EDGE_HYBRID_SEARCH_RRF.model_copy(deep=True) + ) + config.limit = limit + + results = _run_async( + client.search_( + query=query, + config=config, + group_ids=self._graph_namespaces(graph_id), + ) + ) + + edges = [self._normalize_edge(edge) for edge in results.edges] + nodes = [self._normalize_node(node) for node in results.nodes] + + facts = [edge.fact for edge in edges if edge.fact] + if scope == "nodes": + facts.extend(f"[{node.name}]: {node.summary}" for node in nodes if node.summary) + + return GraphSearchResult(facts=facts, edges=edges, nodes=nodes) + + def add_text( + self, + graph_id: str, + data: str, + source_description: str = "MiroFish", + ) -> str | None: + self._ensure_client_ready() + client = self._get_graphiti_client(graph_id) + from graphiti_core.nodes import EpisodeType + + result = _run_async( + self._add_episode( + client=client, + graph_id=graph_id, + name=f"{graph_id}_activity_{uuid.uuid4().hex[:8]}", + episode_body=data, + source_description=source_description, + reference_time=datetime.now(timezone.utc), + source=EpisodeType.text, + ) + ) + self._persist_graph_result(client, result) + episode = getattr(result, 'episode', None) + episode_uuid = getattr(episode, 'uuid', None) if episode else None + return str(episode_uuid) if episode_uuid else None + + def delete_graph(self, graph_id: str) -> None: + self._ensure_client_ready() + from graphiti_core.edges import EntityEdge, EpisodicEdge + from graphiti_core.nodes import EntityNode, EpisodicNode + + graph_driver = self._get_graph_driver(graph_id) + entity_edges = self._fetch_group_records(EntityEdge.get_by_group_ids, graph_id) + episodic_edges = self._fetch_group_records(EpisodicEdge.get_by_group_ids, graph_id) + episodic_nodes = self._fetch_group_records(EpisodicNode.get_by_group_ids, graph_id) + entity_nodes = self._fetch_group_records(EntityNode.get_by_group_ids, graph_id) + + if episodic_edges: + _run_async(EpisodicEdge.delete_by_uuids(graph_driver, [edge.uuid for edge in episodic_edges])) + if entity_edges: + _run_async(EntityEdge.delete_by_uuids(graph_driver, [edge.uuid for edge in entity_edges])) + if episodic_nodes: + _run_async(EpisodicNode.delete_by_uuids(graph_driver, [node.uuid for node in episodic_nodes])) + if entity_nodes: + _run_async(EntityNode.delete_by_uuids(graph_driver, [node.uuid for node in entity_nodes])) + + self._ontology_cache.pop(graph_id, None) + + def _fetch_group_records(self, fetcher, graph_id: str, page_size: int = 100) -> list[Any]: + graph_driver = self._get_graph_driver(graph_id) + graph_namespaces = self._graph_namespaces(graph_id) + records: list[Any] = [] + cursor: str | None = None + + while True: + try: + batch = _run_async( + fetcher( + graph_driver, + graph_namespaces, + limit=page_size, + uuid_cursor=cursor, + ) + ) + except (self._GroupsEdgesNotFoundError, self._GroupsNodesNotFoundError): + break + if not batch: + break + + records.extend(batch) + if len(batch) < page_size: + break + + cursor = getattr(batch[-1], 'uuid', None) or getattr(batch[-1], 'uuid_', None) + if cursor is None: + break + + return records + + def _graph_namespace(self, graph_id: str) -> str: + if not graph_id or not re.fullmatch(r'[A-Za-z0-9_-]+', graph_id): + raise ValueError(f"Invalid graph_id for local Graphiti backend: {graph_id}") + return graph_id + + def _graph_namespaces(self, graph_id: str) -> list[str]: + primary = self._graph_namespace(graph_id) + namespaces = [primary] + legacy = primary.replace('_', '-') + if legacy != primary: + namespaces.append(legacy) + return namespaces + + def _make_graphiti_client(self, graph_driver) -> Any: + return self._Graphiti( + graph_driver=graph_driver, + llm_client=self._OpenAIGenericClient(config=self._llm_config), + embedder=self._OpenAIEmbedder(config=self._embedder_config), + cross_encoder=self._OpenAIRerankerClient(config=self._reranker_config), + max_coroutines=Config.GRAPHITI_MAX_COROUTINES, + ) + + def _get_graphiti_client(self, graph_id: str): + self._graph_namespace(graph_id) + self._ensure_client_ready() + return self.client + + def _get_graph_driver(self, graph_id: str): + return self._get_graphiti_client(graph_id).driver + + async def _add_episode( + self, + client, + graph_id: str, + name: str, + episode_body: str, + source_description: str, + reference_time: datetime, + source, + bundle: _OntologyBundle | None = None, + ): + episode_kwargs = { + "name": name, + "episode_body": episode_body, + "source_description": source_description, + "reference_time": reference_time, + "source": source, + "group_id": self._graph_namespace(graph_id), + "entity_types": bundle.entity_types if bundle else None, + "edge_types": bundle.edge_types if bundle else None, + "edge_type_map": bundle.edge_type_map if bundle else None, + } + + try: + return await client.add_episode(**episode_kwargs) + except Exception as exc: + if not bundle or not self._is_non_primitive_property_error(exc): + raise + + logger.warning( + "Local Graphiti ontology extraction returned non-primitive Neo4j properties for %s; retrying without ontology attributes. Error: %s", + graph_id, + exc, + ) + fallback_kwargs = dict(episode_kwargs) + fallback_kwargs.update( + entity_types=bundle.attribute_free_entity_types, + edge_types=bundle.attribute_free_edge_types, + edge_type_map=bundle.edge_type_map, + ) + return await client.add_episode(**fallback_kwargs) + + def _persist_graph_result(self, client, result: Any) -> None: + for node in getattr(result, 'nodes', []) or []: + node.attributes = self._sanitize_attributes(getattr(node, 'attributes', {}) or {}) + if getattr(node, 'name_embedding', None) is None: + _run_async(node.generate_name_embedding(client.embedder)) + _run_async(node.save(client.driver)) + + for edge in getattr(result, 'edges', []) or []: + edge.attributes = self._sanitize_attributes(getattr(edge, 'attributes', {}) or {}) + if getattr(edge, 'fact_embedding', None) is None: + _run_async(edge.generate_embedding(client.embedder)) + _run_async(edge.save(client.driver)) + + @staticmethod + def _is_non_primitive_property_error(exc: Exception) -> bool: + return 'Property values can only be of primitive types or arrays thereof' in str(exc) + + def _sanitize_attributes(self, attributes: dict[str, Any]) -> dict[str, Any]: + sanitized: dict[str, Any] = {} + for key, value in attributes.items(): + sanitized[key] = self._sanitize_property_value(key, value) + return sanitized + + def _sanitize_property_value(self, key: str, value: Any) -> Any: + if value is None or isinstance(value, (str, int, float, bool)): + return value + + if isinstance(value, (list, tuple)): + return [ + item + if isinstance(item, (str, int, float, bool)) or item is None + else json.dumps(item, ensure_ascii=False, default=str) + for item in value + ] + + if isinstance(value, dict): + if key in value: + return self._sanitize_property_value(key, value[key]) + if len(value) == 1: + return self._sanitize_property_value(key, next(iter(value.values()))) + return json.dumps(value, ensure_ascii=False, default=str) + + return str(value) + + def _build_ontology_bundle(self, ontology: dict[str, Any]) -> _OntologyBundle: + ontology, _ = normalize_ontology_for_zep(ontology) + reserved_names = { + 'uuid', + 'name', + 'group_id', + 'labels', + 'created_at', + 'summary', + 'attributes', + 'name_embedding', + } + + def safe_attr_name(attr_name: str) -> str: + if attr_name.lower() in reserved_names: + return f"entity_{attr_name}" + return attr_name + + entity_types: dict[str, type[BaseModel]] = {} + for entity_def in ontology.get("entity_types", []): + name = entity_def["name"] + description = entity_def.get("description", f"A {name} entity.") + attrs: dict[str, Any] = {"__doc__": description} + annotations: dict[str, Any] = {} + for attr_def in entity_def.get("attributes", []): + attr_name = safe_attr_name(attr_def["name"]) + attr_desc = attr_def.get("description", attr_name) + attrs[attr_name] = Field(default=None, description=attr_desc) + annotations[attr_name] = Optional[str] + attrs["__annotations__"] = annotations + entity_class = type(name, (BaseModel,), attrs) + entity_class.__doc__ = description + entity_types[name] = entity_class + + edge_types: dict[str, type[BaseModel]] = {} + edge_type_map: dict[tuple[str, str], list[str]] = {} + for edge_def in ontology.get("edge_types", []): + name = edge_def["name"] + description = edge_def.get("description", f"A {name} relationship.") + attrs = {"__doc__": description} + annotations = {} + for attr_def in edge_def.get("attributes", []): + attr_name = safe_attr_name(attr_def["name"]) + attr_desc = attr_def.get("description", attr_name) + attrs[attr_name] = Field(default=None, description=attr_desc) + annotations[attr_name] = Optional[str] + attrs["__annotations__"] = annotations + edge_class = type(name, (BaseModel,), attrs) + edge_class.__doc__ = description + edge_types[name] = edge_class + + source_targets = edge_def.get("source_targets", []) or [{"source": "Entity", "target": "Entity"}] + for source_target in source_targets: + signature = ( + source_target.get("source", "Entity"), + source_target.get("target", "Entity"), + ) + edge_type_map.setdefault(signature, []).append(name) + + return _OntologyBundle( + entity_types=entity_types, + edge_types=edge_types, + edge_type_map=edge_type_map, + attribute_free_entity_types=self._build_attribute_free_models(entity_types), + attribute_free_edge_types=self._build_attribute_free_models(edge_types), + ) + + @staticmethod + def _build_attribute_free_models( + typed_models: dict[str, type[BaseModel]] + ) -> dict[str, type[BaseModel]]: + stripped_models: dict[str, type[BaseModel]] = {} + for model_name, model_type in typed_models.items(): + attrs: dict[str, Any] = { + "__doc__": model_type.__doc__ or f"A {model_name} type.", + "__annotations__": {}, + } + stripped_model = type(model_name, (BaseModel,), attrs) + stripped_model.__doc__ = model_type.__doc__ + stripped_models[model_name] = stripped_model + return stripped_models + + @staticmethod + def _normalize_node(node: Any) -> GraphNodeRecord: + created_at = getattr(node, 'created_at', None) + return GraphNodeRecord( + uuid=str(getattr(node, 'uuid', None) or getattr(node, 'uuid_', None) or ""), + name=getattr(node, 'name', '') or "", + labels=getattr(node, 'labels', []) or [], + summary=getattr(node, 'summary', '') or "", + attributes=getattr(node, 'attributes', {}) or {}, + created_at=str(created_at) if created_at else None, + ) + + @staticmethod + def _normalize_edge(edge: Any) -> GraphEdgeRecord: + episodes = getattr(edge, 'episodes', None) or [] + if not isinstance(episodes, list): + episodes = [str(episodes)] + return GraphEdgeRecord( + uuid=str(getattr(edge, 'uuid', None) or getattr(edge, 'uuid_', None) or ""), + name=getattr(edge, 'name', '') or "", + fact=getattr(edge, 'fact', '') or "", + source_node_uuid=getattr(edge, 'source_node_uuid', '') or "", + target_node_uuid=getattr(edge, 'target_node_uuid', '') or "", + attributes=getattr(edge, 'attributes', {}) or {}, + created_at=str(getattr(edge, 'created_at', None)) if getattr(edge, 'created_at', None) else None, + valid_at=str(getattr(edge, 'valid_at', None)) if getattr(edge, 'valid_at', None) else None, + invalid_at=str(getattr(edge, 'invalid_at', None)) if getattr(edge, 'invalid_at', None) else None, + expired_at=str(getattr(edge, 'expired_at', None)) if getattr(edge, 'expired_at', None) else None, + episodes=[str(episode) for episode in episodes], + ) diff --git a/backend/app/services/graph_provider/models.py b/backend/app/services/graph_provider/models.py new file mode 100644 index 000000000..7532f3ee8 --- /dev/null +++ b/backend/app/services/graph_provider/models.py @@ -0,0 +1,41 @@ +""" +Provider-neutral graph data models. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Any, Dict, List, Optional + + +@dataclass +class GraphNodeRecord: + uuid: str + name: str + labels: List[str] = field(default_factory=list) + summary: str = "" + attributes: Dict[str, Any] = field(default_factory=dict) + created_at: Optional[str] = None + + +@dataclass +class GraphEdgeRecord: + uuid: str + name: str + fact: str + source_node_uuid: str + target_node_uuid: str + attributes: Dict[str, Any] = field(default_factory=dict) + created_at: Optional[str] = None + valid_at: Optional[str] = None + invalid_at: Optional[str] = None + expired_at: Optional[str] = None + episodes: List[str] = field(default_factory=list) + + +@dataclass +class GraphSearchResult: + facts: List[str] = field(default_factory=list) + edges: List[GraphEdgeRecord] = field(default_factory=list) + nodes: List[GraphNodeRecord] = field(default_factory=list) + diff --git a/backend/app/services/graph_provider/zep_cloud_provider.py b/backend/app/services/graph_provider/zep_cloud_provider.py new file mode 100644 index 000000000..9eba11bdd --- /dev/null +++ b/backend/app/services/graph_provider/zep_cloud_provider.py @@ -0,0 +1,297 @@ +""" +Zep Cloud graph provider implementation. +""" + +from __future__ import annotations + +import time +import uuid +from typing import Any, Optional + +from zep_cloud import EpisodeData, EntityEdgeSourceTarget +from zep_cloud.client import Zep + +from ...config import Config +from ...utils.logger import get_logger +from ...utils.ontology_normalizer import normalize_ontology_for_zep +from ...utils.zep_paging import fetch_all_edges, fetch_all_nodes +from .base import BaseGraphProvider, ProgressCallback +from .models import GraphEdgeRecord, GraphNodeRecord, GraphSearchResult + +logger = get_logger('mirofish.graph_provider.zep_cloud') + + +class ZepCloudGraphProvider(BaseGraphProvider): + """Zep Cloud backed graph provider.""" + + def __init__(self, api_key: Optional[str] = None): + self.api_key = api_key or Config.ZEP_API_KEY + if not self.api_key: + raise ValueError("ZEP_API_KEY 未配置") + + self.client = Zep(api_key=self.api_key) + + def create_graph(self, name: str) -> str: + graph_id = f"mirofish_{uuid.uuid4().hex[:16]}" + self.client.graph.create( + graph_id=graph_id, + name=name, + description="MiroFish Social Simulation Graph", + ) + return graph_id + + def set_ontology(self, graph_id: str, ontology: dict[str, Any]) -> None: + import warnings + from pydantic import Field + from zep_cloud.external_clients.ontology import EdgeModel, EntityModel, EntityText + + warnings.filterwarnings('ignore', category=UserWarning, module='pydantic') + + ontology, entity_name_mapping = normalize_ontology_for_zep(ontology) + renamed_entities = { + original: normalized + for original, normalized in entity_name_mapping.items() + if original != normalized + } + if renamed_entities: + logger.info("Normalized ontology entity names for Zep compatibility: %s", renamed_entities) + + reserved_names = {'uuid', 'name', 'group_id', 'name_embedding', 'summary', 'created_at'} + + def safe_attr_name(attr_name: str) -> str: + if attr_name.lower() in reserved_names: + return f"entity_{attr_name}" + return attr_name + + entity_types: dict[str, type[EntityModel]] = {} + for entity_def in ontology.get("entity_types", []): + name = entity_def["name"] + description = entity_def.get("description", f"A {name} entity.") + attrs: dict[str, Any] = {"__doc__": description} + annotations: dict[str, Any] = {} + + for attr_def in entity_def.get("attributes", []): + attr_name = safe_attr_name(attr_def["name"]) + attr_desc = attr_def.get("description", attr_name) + attrs[attr_name] = Field(description=attr_desc, default=None) + annotations[attr_name] = Optional[EntityText] + + attrs["__annotations__"] = annotations + entity_class = type(name, (EntityModel,), attrs) + entity_class.__doc__ = description + entity_types[name] = entity_class + + edge_definitions = {} + for edge_def in ontology.get("edge_types", []): + name = edge_def["name"] + description = edge_def.get("description", f"A {name} relationship.") + attrs = {"__doc__": description} + annotations = {} + + for attr_def in edge_def.get("attributes", []): + attr_name = safe_attr_name(attr_def["name"]) + attr_desc = attr_def.get("description", attr_name) + attrs[attr_name] = Field(description=attr_desc, default=None) + annotations[attr_name] = Optional[str] + + attrs["__annotations__"] = annotations + class_name = ''.join(word.capitalize() for word in name.split('_')) + edge_class = type(class_name, (EdgeModel,), attrs) + edge_class.__doc__ = description + + source_targets = [] + for st in edge_def.get("source_targets", []): + source_targets.append( + EntityEdgeSourceTarget( + source=st.get("source", "Entity"), + target=st.get("target", "Entity"), + ) + ) + + if source_targets: + edge_definitions[name] = (edge_class, source_targets) + + if entity_types or edge_definitions: + self.client.graph.set_ontology( + graph_ids=[graph_id], + entities=entity_types if entity_types else None, + edges=edge_definitions if edge_definitions else None, + ) + + def add_text_batches( + self, + graph_id: str, + chunks: list[str], + batch_size: int = 3, + progress_callback: Optional[ProgressCallback] = None, + ) -> list[str]: + episode_uuids: list[str] = [] + total_chunks = len(chunks) + + for i in range(0, total_chunks, batch_size): + batch_chunks = chunks[i:i + batch_size] + batch_num = i // batch_size + 1 + total_batches = (total_chunks + batch_size - 1) // batch_size + + if progress_callback: + progress_callback( + f"发送第 {batch_num}/{total_batches} 批数据 ({len(batch_chunks)} 块)...", + (i + len(batch_chunks)) / total_chunks if total_chunks else 1.0, + ) + + episodes = [EpisodeData(data=chunk, type="text") for chunk in batch_chunks] + batch_result = self.client.graph.add_batch(graph_id=graph_id, episodes=episodes) + + if batch_result and isinstance(batch_result, list): + for episode in batch_result: + episode_uuid = getattr(episode, 'uuid_', None) or getattr(episode, 'uuid', None) + if episode_uuid: + episode_uuids.append(str(episode_uuid)) + + time.sleep(1) + + return episode_uuids + + def wait_for_episodes( + self, + graph_id: str, + episode_uuids: list[str], + progress_callback: Optional[ProgressCallback] = None, + timeout: int = 600, + ) -> None: + if not episode_uuids: + if progress_callback: + progress_callback("无需等待(没有 episode)", 1.0) + return + + start_time = time.time() + pending_episodes = set(episode_uuids) + completed_count = 0 + total_episodes = len(episode_uuids) + + if progress_callback: + progress_callback(f"开始等待 {total_episodes} 个文本块处理...", 0) + + while pending_episodes: + if time.time() - start_time > timeout: + if progress_callback: + progress_callback( + f"部分文本块超时,已完成 {completed_count}/{total_episodes}", + completed_count / total_episodes if total_episodes else 1.0, + ) + break + + for episode_uuid in list(pending_episodes): + try: + episode = self.client.graph.episode.get(uuid_=episode_uuid) + except Exception: + continue + + if getattr(episode, 'processed', False): + pending_episodes.remove(episode_uuid) + completed_count += 1 + + if progress_callback: + elapsed = int(time.time() - start_time) + progress_callback( + f"Zep处理中... {completed_count}/{total_episodes} 完成, {len(pending_episodes)} 待处理 ({elapsed}秒)", + completed_count / total_episodes if total_episodes else 1.0, + ) + + if pending_episodes: + time.sleep(3) + + if progress_callback: + progress_callback(f"处理完成: {completed_count}/{total_episodes}", 1.0) + + def get_all_nodes(self, graph_id: str) -> list[GraphNodeRecord]: + return [self._normalize_node(node) for node in fetch_all_nodes(self.client, graph_id)] + + def get_all_edges(self, graph_id: str) -> list[GraphEdgeRecord]: + return [self._normalize_edge(edge) for edge in fetch_all_edges(self.client, graph_id)] + + def get_node(self, graph_id: str, node_uuid: str) -> GraphNodeRecord | None: + node = self.client.graph.node.get(uuid_=node_uuid) + return self._normalize_node(node) if node else None + + def get_node_edges(self, graph_id: str, node_uuid: str) -> list[GraphEdgeRecord]: + edges = self.client.graph.node.get_entity_edges(node_uuid=node_uuid) + return [self._normalize_edge(edge) for edge in edges] + + def search( + self, + graph_id: str, + query: str, + limit: int = 10, + scope: str = "edges", + reranker: str = "cross_encoder", + ) -> GraphSearchResult: + search_results = self.client.graph.search( + graph_id=graph_id, + query=query, + limit=limit, + scope=scope, + reranker=reranker, + ) + + edges = [ + self._normalize_edge(edge) + for edge in getattr(search_results, 'edges', []) or [] + ] + nodes = [ + self._normalize_node(node) + for node in getattr(search_results, 'nodes', []) or [] + ] + + facts = [edge.fact for edge in edges if edge.fact] + if scope == "nodes": + facts.extend(f"[{node.name}]: {node.summary}" for node in nodes if node.summary) + + return GraphSearchResult(facts=facts, edges=edges, nodes=nodes) + + def add_text( + self, + graph_id: str, + data: str, + source_description: str = "MiroFish", + ) -> str | None: + result = self.client.graph.add(graph_id=graph_id, type="text", data=data) + episode_uuid = getattr(result, 'uuid_', None) or getattr(result, 'uuid', None) + return str(episode_uuid) if episode_uuid else None + + def delete_graph(self, graph_id: str) -> None: + self.client.graph.delete(graph_id=graph_id) + + @staticmethod + def _normalize_node(node: Any) -> GraphNodeRecord: + node_uuid = getattr(node, 'uuid_', None) or getattr(node, 'uuid', None) or "" + created_at = getattr(node, 'created_at', None) + return GraphNodeRecord( + uuid=str(node_uuid), + name=getattr(node, 'name', '') or "", + labels=getattr(node, 'labels', []) or [], + summary=getattr(node, 'summary', '') or "", + attributes=getattr(node, 'attributes', {}) or {}, + created_at=str(created_at) if created_at else None, + ) + + @staticmethod + def _normalize_edge(edge: Any) -> GraphEdgeRecord: + edge_uuid = getattr(edge, 'uuid_', None) or getattr(edge, 'uuid', None) or "" + episodes = getattr(edge, 'episodes', None) or getattr(edge, 'episode_ids', None) or [] + if not isinstance(episodes, list): + episodes = [str(episodes)] + return GraphEdgeRecord( + uuid=str(edge_uuid), + name=getattr(edge, 'name', '') or "", + fact=getattr(edge, 'fact', '') or "", + source_node_uuid=getattr(edge, 'source_node_uuid', '') or "", + target_node_uuid=getattr(edge, 'target_node_uuid', '') or "", + attributes=getattr(edge, 'attributes', {}) or {}, + created_at=str(getattr(edge, 'created_at', None)) if getattr(edge, 'created_at', None) else None, + valid_at=str(getattr(edge, 'valid_at', None)) if getattr(edge, 'valid_at', None) else None, + invalid_at=str(getattr(edge, 'invalid_at', None)) if getattr(edge, 'invalid_at', None) else None, + expired_at=str(getattr(edge, 'expired_at', None)) if getattr(edge, 'expired_at', None) else None, + episodes=[str(episode) for episode in episodes], + ) + diff --git a/backend/app/services/oasis_profile_generator.py b/backend/app/services/oasis_profile_generator.py index 57836c539..0b2616754 100644 --- a/backend/app/services/oasis_profile_generator.py +++ b/backend/app/services/oasis_profile_generator.py @@ -16,10 +16,10 @@ from datetime import datetime from openai import OpenAI -from zep_cloud.client import Zep from ..config import Config from ..utils.logger import get_logger +from .graph_provider import create_graph_provider from .zep_entity_reader import EntityNode, ZepEntityReader logger = get_logger('mirofish.oasis_profile') @@ -197,16 +197,9 @@ def __init__( base_url=self.base_url ) - # Zep客户端用于检索丰富上下文 self.zep_api_key = zep_api_key or Config.ZEP_API_KEY - self.zep_client = None + self.graph_provider = create_graph_provider() self.graph_id = graph_id - - if self.zep_api_key: - try: - self.zep_client = Zep(api_key=self.zep_api_key) - except Exception as e: - logger.warning(f"Zep客户端初始化失败: {e}") def generate_profile_from_entity( self, @@ -297,9 +290,6 @@ def _search_zep_for_entity(self, entity: EntityNode) -> Dict[str, Any]: """ import concurrent.futures - if not self.zep_client: - return {"facts": [], "node_summaries": [], "context": ""} - entity_name = entity.name results = { @@ -323,7 +313,7 @@ def search_edges(): for attempt in range(max_retries): try: - return self.zep_client.graph.search( + return self.graph_provider.search( query=comprehensive_query, graph_id=self.graph_id, limit=30, @@ -348,7 +338,7 @@ def search_nodes(): for attempt in range(max_retries): try: - return self.zep_client.graph.search( + return self.graph_provider.search( query=comprehensive_query, graph_id=self.graph_id, limit=20, @@ -377,19 +367,19 @@ def search_nodes(): # 处理边搜索结果 all_facts = set() - if edge_result and hasattr(edge_result, 'edges') and edge_result.edges: + if edge_result and edge_result.edges: for edge in edge_result.edges: - if hasattr(edge, 'fact') and edge.fact: + if edge.fact: all_facts.add(edge.fact) results["facts"] = list(all_facts) # 处理节点搜索结果 all_summaries = set() - if node_result and hasattr(node_result, 'nodes') and node_result.nodes: + if node_result and node_result.nodes: for node in node_result.nodes: - if hasattr(node, 'summary') and node.summary: + if node.summary: all_summaries.add(node.summary) - if hasattr(node, 'name') and node.name and node.name != entity_name: + if node.name and node.name != entity_name: all_summaries.add(f"相关实体: {node.name}") results["node_summaries"] = list(all_summaries) @@ -1197,4 +1187,3 @@ def save_profiles_to_json( """[已废弃] 请使用 save_profiles() 方法""" logger.warning("save_profiles_to_json已废弃,请使用save_profiles方法") self.save_profiles(profiles, file_path, platform) - diff --git a/backend/app/services/zep_entity_reader.py b/backend/app/services/zep_entity_reader.py index 71661be49..6592b6ed0 100644 --- a/backend/app/services/zep_entity_reader.py +++ b/backend/app/services/zep_entity_reader.py @@ -7,11 +7,9 @@ from typing import Dict, Any, List, Optional, Set, Callable, TypeVar from dataclasses import dataclass, field -from zep_cloud.client import Zep - from ..config import Config +from .graph_provider import create_graph_provider from ..utils.logger import get_logger -from ..utils.zep_paging import fetch_all_nodes, fetch_all_edges logger = get_logger('mirofish.zep_entity_reader') @@ -80,10 +78,7 @@ class ZepEntityReader: def __init__(self, api_key: Optional[str] = None): self.api_key = api_key or Config.ZEP_API_KEY - if not self.api_key: - raise ValueError("ZEP_API_KEY 未配置") - - self.client = Zep(api_key=self.api_key) + self.provider = create_graph_provider() def _call_with_retry( self, @@ -136,12 +131,12 @@ def get_all_nodes(self, graph_id: str) -> List[Dict[str, Any]]: """ logger.info(f"获取图谱 {graph_id} 的所有节点...") - nodes = fetch_all_nodes(self.client, graph_id) + nodes = self.provider.get_all_nodes(graph_id) nodes_data = [] for node in nodes: nodes_data.append({ - "uuid": getattr(node, 'uuid_', None) or getattr(node, 'uuid', ''), + "uuid": node.uuid, "name": node.name or "", "labels": node.labels or [], "summary": node.summary or "", @@ -163,12 +158,12 @@ def get_all_edges(self, graph_id: str) -> List[Dict[str, Any]]: """ logger.info(f"获取图谱 {graph_id} 的所有边...") - edges = fetch_all_edges(self.client, graph_id) + edges = self.provider.get_all_edges(graph_id) edges_data = [] for edge in edges: edges_data.append({ - "uuid": getattr(edge, 'uuid_', None) or getattr(edge, 'uuid', ''), + "uuid": edge.uuid, "name": edge.name or "", "fact": edge.fact or "", "source_node_uuid": edge.source_node_uuid, @@ -179,7 +174,7 @@ def get_all_edges(self, graph_id: str) -> List[Dict[str, Any]]: logger.info(f"共获取 {len(edges_data)} 条边") return edges_data - def get_node_edges(self, node_uuid: str) -> List[Dict[str, Any]]: + def get_node_edges(self, graph_id: str, node_uuid: str) -> List[Dict[str, Any]]: """ 获取指定节点的所有相关边(带重试机制) @@ -192,14 +187,14 @@ def get_node_edges(self, node_uuid: str) -> List[Dict[str, Any]]: try: # 使用重试机制调用Zep API edges = self._call_with_retry( - func=lambda: self.client.graph.node.get_entity_edges(node_uuid=node_uuid), + func=lambda: self.provider.get_node_edges(graph_id, node_uuid), operation_name=f"获取节点边(node={node_uuid[:8]}...)" ) edges_data = [] for edge in edges: edges_data.append({ - "uuid": getattr(edge, 'uuid_', None) or getattr(edge, 'uuid', ''), + "uuid": edge.uuid, "name": edge.name or "", "fact": edge.fact or "", "source_node_uuid": edge.source_node_uuid, @@ -348,7 +343,7 @@ def get_entity_with_context( try: # 使用重试机制获取节点 node = self._call_with_retry( - func=lambda: self.client.graph.node.get(uuid_=entity_uuid), + func=lambda: self.provider.get_node(graph_id, entity_uuid), operation_name=f"获取节点详情(uuid={entity_uuid[:8]}...)" ) @@ -356,7 +351,7 @@ def get_entity_with_context( return None # 获取节点的边 - edges = self.get_node_edges(entity_uuid) + edges = self.get_node_edges(graph_id, entity_uuid) # 获取所有节点用于关联查找 all_nodes = self.get_all_nodes(graph_id) @@ -397,7 +392,7 @@ def get_entity_with_context( }) return EntityNode( - uuid=getattr(node, 'uuid_', None) or getattr(node, 'uuid', ''), + uuid=node.uuid, name=node.name or "", labels=node.labels or [], summary=node.summary or "", @@ -433,5 +428,3 @@ def get_entities_by_type( enrich_with_edges=enrich_with_edges ) return result.entities - - diff --git a/backend/app/services/zep_graph_memory_updater.py b/backend/app/services/zep_graph_memory_updater.py index a8f3cecd9..0d814727d 100644 --- a/backend/app/services/zep_graph_memory_updater.py +++ b/backend/app/services/zep_graph_memory_updater.py @@ -12,9 +12,8 @@ from datetime import datetime from queue import Queue, Empty -from zep_cloud.client import Zep - from ..config import Config +from .graph_provider import create_graph_provider from ..utils.logger import get_logger logger = get_logger('mirofish.zep_graph_memory_updater') @@ -238,11 +237,7 @@ def __init__(self, graph_id: str, api_key: Optional[str] = None): """ self.graph_id = graph_id self.api_key = api_key or Config.ZEP_API_KEY - - if not self.api_key: - raise ValueError("ZEP_API_KEY未配置") - - self.client = Zep(api_key=self.api_key) + self.provider = create_graph_provider() # 活动队列 self._activity_queue: Queue = Queue() @@ -405,15 +400,15 @@ def _send_batch_activities(self, activities: List[AgentActivity], platform: str) # 带重试的发送 for attempt in range(self.MAX_RETRIES): try: - self.client.graph.add( + display_name = self._get_platform_display_name(platform) + self.provider.add_text( graph_id=self.graph_id, - type="text", - data=combined_text + data=combined_text, + source_description=f"MiroFish {display_name} activity", ) self._total_sent += 1 self._total_items_sent += len(activities) - display_name = self._get_platform_display_name(platform) logger.info(f"成功批量发送 {len(activities)} 条{display_name}活动到图谱 {self.graph_id}") logger.debug(f"批量内容预览: {combined_text[:200]}...") return diff --git a/backend/app/services/zep_tools.py b/backend/app/services/zep_tools.py index 144a22f15..ca861a5f1 100644 --- a/backend/app/services/zep_tools.py +++ b/backend/app/services/zep_tools.py @@ -13,12 +13,10 @@ from typing import Dict, Any, List, Optional from dataclasses import dataclass, field -from zep_cloud.client import Zep - from ..config import Config +from .graph_provider import create_graph_provider from ..utils.logger import get_logger from ..utils.llm_client import LLMClient -from ..utils.zep_paging import fetch_all_nodes, fetch_all_edges logger = get_logger('mirofish.zep_tools') @@ -423,10 +421,7 @@ class ZepToolsService: def __init__(self, api_key: Optional[str] = None, llm_client: Optional[LLMClient] = None): self.api_key = api_key or Config.ZEP_API_KEY - if not self.api_key: - raise ValueError("ZEP_API_KEY is not configured") - - self.client = Zep(api_key=self.api_key) + self.provider = create_graph_provider() # LLM客户端用于InsightForge生成子问题 self._llm_client = llm_client logger.info("ZepToolsService initialized") @@ -485,48 +480,38 @@ def search_graph( """ logger.info(f"图谱搜索: graph_id={graph_id}, query={query[:50]}...") - # 尝试使用Zep Cloud Search API try: search_results = self._call_with_retry( - func=lambda: self.client.graph.search( + func=lambda: self.provider.search( graph_id=graph_id, query=query, limit=limit, scope=scope, - reranker="cross_encoder" + reranker="cross_encoder", ), operation_name=f"图谱搜索(graph={graph_id})" ) - facts = [] - edges = [] - nodes = [] - - # 解析边搜索结果 - if hasattr(search_results, 'edges') and search_results.edges: - for edge in search_results.edges: - if hasattr(edge, 'fact') and edge.fact: - facts.append(edge.fact) - edges.append({ - "uuid": getattr(edge, 'uuid_', None) or getattr(edge, 'uuid', ''), - "name": getattr(edge, 'name', ''), - "fact": getattr(edge, 'fact', ''), - "source_node_uuid": getattr(edge, 'source_node_uuid', ''), - "target_node_uuid": getattr(edge, 'target_node_uuid', ''), - }) - - # 解析节点搜索结果 - if hasattr(search_results, 'nodes') and search_results.nodes: - for node in search_results.nodes: - nodes.append({ - "uuid": getattr(node, 'uuid_', None) or getattr(node, 'uuid', ''), - "name": getattr(node, 'name', ''), - "labels": getattr(node, 'labels', []), - "summary": getattr(node, 'summary', ''), - }) - # 节点摘要也算作事实 - if hasattr(node, 'summary') and node.summary: - facts.append(f"[{node.name}]: {node.summary}") + facts = list(search_results.facts) + edges = [ + { + "uuid": edge.uuid, + "name": edge.name, + "fact": edge.fact, + "source_node_uuid": edge.source_node_uuid, + "target_node_uuid": edge.target_node_uuid, + } + for edge in search_results.edges + ] + nodes = [ + { + "uuid": node.uuid, + "name": node.name, + "labels": node.labels, + "summary": node.summary, + } + for node in search_results.nodes + ] logger.info(f"搜索完成: 找到 {len(facts)} 条相关事实") @@ -659,13 +644,12 @@ def get_all_nodes(self, graph_id: str) -> List[NodeInfo]: """ logger.info(f"获取图谱 {graph_id} 的所有节点...") - nodes = fetch_all_nodes(self.client, graph_id) + nodes = self.provider.get_all_nodes(graph_id) result = [] for node in nodes: - node_uuid = getattr(node, 'uuid_', None) or getattr(node, 'uuid', None) or "" result.append(NodeInfo( - uuid=str(node_uuid) if node_uuid else "", + uuid=node.uuid, name=node.name or "", labels=node.labels or [], summary=node.summary or "", @@ -688,13 +672,12 @@ def get_all_edges(self, graph_id: str, include_temporal: bool = True) -> List[Ed """ logger.info(f"获取图谱 {graph_id} 的所有边...") - edges = fetch_all_edges(self.client, graph_id) + edges = self.provider.get_all_edges(graph_id) result = [] for edge in edges: - edge_uuid = getattr(edge, 'uuid_', None) or getattr(edge, 'uuid', None) or "" edge_info = EdgeInfo( - uuid=str(edge_uuid) if edge_uuid else "", + uuid=edge.uuid, name=edge.name or "", fact=edge.fact or "", source_node_uuid=edge.source_node_uuid or "", @@ -727,7 +710,7 @@ def get_node_detail(self, node_uuid: str) -> Optional[NodeInfo]: try: node = self._call_with_retry( - func=lambda: self.client.graph.node.get(uuid_=node_uuid), + func=lambda: self.provider.get_node("", node_uuid), operation_name=f"获取节点详情(uuid={node_uuid[:8]}...)" ) @@ -761,14 +744,21 @@ def get_node_edges(self, graph_id: str, node_uuid: str) -> List[EdgeInfo]: logger.info(f"获取节点 {node_uuid[:8]}... 的相关边") try: - # 获取图谱所有边,然后过滤 - all_edges = self.get_all_edges(graph_id) - + provider_edges = self.provider.get_node_edges(graph_id, node_uuid) result = [] - for edge in all_edges: - # 检查边是否与指定节点相关(作为源或目标) - if edge.source_node_uuid == node_uuid or edge.target_node_uuid == node_uuid: - result.append(edge) + for edge in provider_edges: + edge_info = EdgeInfo( + uuid=edge.uuid, + name=edge.name or "", + fact=edge.fact or "", + source_node_uuid=edge.source_node_uuid or "", + target_node_uuid=edge.target_node_uuid or "", + created_at=edge.created_at, + valid_at=edge.valid_at, + invalid_at=edge.invalid_at, + expired_at=edge.expired_at, + ) + result.append(edge_info) logger.info(f"找到 {len(result)} 条与节点相关的边") return result From 5bf5eb46767f82889abe54aef6e00f70e0663d6a Mon Sep 17 00:00:00 2001 From: Anurag Date: Sat, 28 Mar 2026 18:46:29 +0530 Subject: [PATCH 04/13] build add local graphiti runtime and neo4j setup --- .env.example | 33 +++++++++++++++++++++++++++++++-- Dockerfile | 3 ++- backend/pyproject.toml | 7 +++++-- backend/requirements.txt | 10 ++++++++-- backend/uv.lock | 34 +++++++++++++++++++++++++++++++++- docker-compose.yml | 19 +++++++++++++++++++ package.json | 2 +- 7 files changed, 99 insertions(+), 9 deletions(-) diff --git a/.env.example b/.env.example index 4e2a83405..8300e4254 100644 --- a/.env.example +++ b/.env.example @@ -5,10 +5,39 @@ LLM_API_KEY=your_api_key_here LLM_BASE_URL=https://api.openai.com/v1 LLM_MODEL_NAME=gpt-4o -# ===== ZEP memory graph configuration ===== -# The free monthly quota is enough for basic usage: https://app.getzep.com/ +# ===== Graph backend selection ===== +# Use zep_cloud for hosted Zep, or graphiti_local for local Neo4j + Graphiti +GRAPH_BACKEND=zep_cloud + +# ===== Zep Cloud configuration ===== +# Required only when GRAPH_BACKEND=zep_cloud ZEP_API_KEY=your_zep_api_key_here +# ===== Local Graphiti + Neo4j configuration ===== +# Required only when GRAPH_BACKEND=graphiti_local +# Note: the local Graphiti backend stores all graphs in one Neo4j database +# and isolates each MiroFish graph by Graphiti `group_id`. +NEO4J_URI=bolt://localhost:7687 +NEO4J_USER=neo4j +NEO4J_PASSWORD=your_neo4j_password_here +NEO4J_DATABASE=neo4j +GRAPHITI_AUTO_INIT=true +GRAPHITI_TELEMETRY_ENABLED=false +GRAPHITI_MAX_COROUTINES=10 +GRAPHITI_SEARCH_RERANKER=rrf + +# Optional: override Graphiti model settings +# If omitted, Graphiti falls back to the main LLM settings above +GRAPHITI_LLM_API_KEY= +GRAPHITI_LLM_BASE_URL= +GRAPHITI_LLM_MODEL= +GRAPHITI_EMBEDDER_API_KEY= +GRAPHITI_EMBEDDER_BASE_URL= +GRAPHITI_EMBEDDER_MODEL=text-embedding-3-small +GRAPHITI_RERANKER_API_KEY= +GRAPHITI_RERANKER_BASE_URL= +GRAPHITI_RERANKER_MODEL= + # ===== Accelerated LLM configuration (optional) ===== # If you are not using accelerated configuration, do not include the fields below in your env file LLM_BOOST_API_KEY=your_api_key_here diff --git a/Dockerfile b/Dockerfile index 6a73d0a4b..b635d4795 100644 --- a/Dockerfile +++ b/Dockerfile @@ -18,7 +18,8 @@ COPY backend/pyproject.toml backend/uv.lock ./backend/ # Install dependencies (Node + Python) RUN npm ci \ && npm ci --prefix frontend \ - && cd backend && uv sync --frozen + && cd backend && uv sync --frozen \ + && uv pip install --python .venv/bin/python --no-deps graphiti-core==0.28.2 # Copy the project source COPY . . diff --git a/backend/pyproject.toml b/backend/pyproject.toml index ec46a6ab1..9dc96ac56 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -14,10 +14,13 @@ dependencies = [ "flask-cors>=6.0.0", # LLM support - "openai>=1.0.0", + "openai>=1.91.0", - # Zep Cloud + # Graph backends "zep-cloud==3.13.0", + "numpy>=1.0.0", + "posthog>=3.0.0", + "tenacity>=9.0.0", # OASIS social media simulation "camel-oasis==0.2.5", diff --git a/backend/requirements.txt b/backend/requirements.txt index 93a3b5bf7..a5a053c42 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -11,10 +11,16 @@ flask-cors>=6.0.0 # ============= LLM Support ============= # OpenAI SDK (all LLM calls use the OpenAI-compatible format) -openai>=1.0.0 +openai>=1.91.0 -# ============= Zep Cloud ============= +# ============= Graph Backends ============= zep-cloud==3.13.0 +# Local Graphiti support is installed separately with: +# uv pip install --python .venv/bin/python --no-deps graphiti-core==0.28.2 +# This avoids conflicting Neo4j driver pins between Graphiti and camel-oasis. +numpy>=1.0.0 +posthog>=3.0.0 +tenacity>=9.0.0 # ============= OASIS Social Media Simulation ============= # OASIS social simulation framework diff --git a/backend/uv.lock b/backend/uv.lock index f1ce4b60e..303dbb21c 100644 --- a/backend/uv.lock +++ b/backend/uv.lock @@ -1248,10 +1248,13 @@ dependencies = [ { name = "charset-normalizer" }, { name = "flask" }, { name = "flask-cors" }, + { name = "numpy" }, { name = "openai" }, + { name = "posthog" }, { name = "pydantic" }, { name = "pymupdf" }, { name = "python-dotenv" }, + { name = "tenacity" }, { name = "zep-cloud" }, ] @@ -1276,13 +1279,16 @@ requires-dist = [ { name = "charset-normalizer", specifier = ">=3.0.0" }, { name = "flask", specifier = ">=3.0.0" }, { name = "flask-cors", specifier = ">=6.0.0" }, - { name = "openai", specifier = ">=1.0.0" }, + { name = "numpy", specifier = ">=1.0.0" }, + { name = "openai", specifier = ">=1.91.0" }, { name = "pipreqs", marker = "extra == 'dev'", specifier = ">=0.5.0" }, + { name = "posthog", specifier = ">=3.0.0" }, { name = "pydantic", specifier = ">=2.0.0" }, { name = "pymupdf", specifier = ">=1.24.0" }, { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.0.0" }, { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.23.0" }, { name = "python-dotenv", specifier = ">=1.0.0" }, + { name = "tenacity", specifier = ">=9.0.0" }, { name = "zep-cloud", specifier = "==3.13.0" }, ] provides-extras = ["dev"] @@ -1840,6 +1846,23 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, ] +[[package]] +name = "posthog" +version = "7.9.12" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "backoff" }, + { name = "distro" }, + { name = "python-dateutil" }, + { name = "requests" }, + { name = "six" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/1c/a7/2865487853061fbd62383492237b546d2d8f7c1846272350d2b9e14138cd/posthog-7.9.12.tar.gz", hash = "sha256:ebabf2eb2e1c1fbf22b0759df4644623fa43cc6c9dcbe9fd429b7937d14251ec", size = 176828, upload-time = "2026-03-12T09:01:15.184Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/65/a9/7a803aed5a5649cf78ea7b31e90d0080181ba21f739243e1741a1e607f1f/posthog-7.9.12-py3-none-any.whl", hash = "sha256:7175bd1698a566bfea98a016c64e3456399f8046aeeca8f1d04ae5bf6c5a38d0", size = 202469, upload-time = "2026-03-12T09:01:13.38Z" }, +] + [[package]] name = "prance" version = "23.6.21.0" @@ -2987,6 +3010,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f", size = 35252, upload-time = "2022-10-06T17:21:44.262Z" }, ] +[[package]] +name = "tenacity" +version = "9.1.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/47/c6/ee486fd809e357697ee8a44d3d69222b344920433d3b6666ccd9b374630c/tenacity-9.1.4.tar.gz", hash = "sha256:adb31d4c263f2bd041081ab33b498309a57c77f9acf2db65aadf0898179cf93a", size = 49413, upload-time = "2026-02-07T10:45:33.841Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d7/c1/eb8f9debc45d3b7918a32ab756658a0904732f75e555402972246b0b8e71/tenacity-9.1.4-py3-none-any.whl", hash = "sha256:6095a360c919085f28c6527de529e76a06ad89b23659fa881ae0649b867a9d55", size = 28926, upload-time = "2026-02-07T10:45:32.24Z" }, +] + [[package]] name = "texttable" version = "1.7.0" diff --git a/docker-compose.yml b/docker-compose.yml index b7ea34507..e8035489d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -12,6 +12,25 @@ services: ports: - "3000:3000" - "5001:5001" + depends_on: + - neo4j restart: unless-stopped volumes: - ./backend/uploads:/app/backend/uploads + neo4j: + image: neo4j:5.26.22-enterprise + container_name: mirofish-neo4j + environment: + NEO4J_AUTH: ${NEO4J_USER:-neo4j}/${NEO4J_PASSWORD:-mirofish-local-password} + NEO4J_ACCEPT_LICENSE_AGREEMENT: "yes" + ports: + - "7474:7474" + - "7687:7687" + restart: unless-stopped + volumes: + - neo4j_data:/data + - neo4j_logs:/logs + +volumes: + neo4j_data: + neo4j_logs: diff --git a/package.json b/package.json index a22f2300c..3429d39a8 100644 --- a/package.json +++ b/package.json @@ -4,7 +4,7 @@ "description": "MiroFish - A simple, universal swarm intelligence engine for predicting anything", "scripts": { "setup": "npm install && cd frontend && npm install", - "setup:backend": "cd backend && uv sync", + "setup:backend": "cd backend && uv sync && uv pip install --python .venv/bin/python --no-deps graphiti-core==0.28.2", "setup:all": "npm run setup && npm run setup:backend", "dev": "concurrently --kill-others -n \"backend,frontend\" -c \"green,cyan\" \"npm run backend\" \"npm run frontend\"", "backend": "cd backend && uv run python run.py", From ee6d99d4c5ba0cc7bfa68b3d640273f20b3a7013 Mon Sep 17 00:00:00 2001 From: Anurag Date: Sat, 28 Mar 2026 18:46:33 +0530 Subject: [PATCH 05/13] docs describe zep cloud to local graphiti migration --- README-EN.md | 28 +- README.md | 28 +- docs/zep-cloud-to-local-migration-plan.md | 663 ++++++++++++++++++++++ 3 files changed, 715 insertions(+), 4 deletions(-) create mode 100644 docs/zep-cloud-to-local-migration-plan.md diff --git a/README-EN.md b/README-EN.md index 023f304a9..fc58b26ef 100644 --- a/README-EN.md +++ b/README-EN.md @@ -122,9 +122,21 @@ LLM_API_KEY=your_api_key LLM_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1 LLM_MODEL_NAME=qwen-plus -# Zep Cloud Configuration -# Free monthly quota is sufficient for simple usage: https://app.getzep.com/ +# Graph backend selection +# Use zep_cloud for hosted Zep, or graphiti_local for local Neo4j + Graphiti +GRAPH_BACKEND=zep_cloud + +# Zep Cloud configuration +# Required only when GRAPH_BACKEND=zep_cloud ZEP_API_KEY=your_zep_api_key + +# Local Graphiti + Neo4j configuration +# Required only when GRAPH_BACKEND=graphiti_local +# Note: the local Graphiti backend stores all graphs in one Neo4j database +# and isolates each MiroFish graph by Graphiti `group_id`. +NEO4J_URI=bolt://localhost:7687 +NEO4J_USER=neo4j +NEO4J_PASSWORD=your_neo4j_password ``` #### 2. Install Dependencies @@ -151,6 +163,17 @@ npm run setup:backend npm run dev ``` +If you use `GRAPH_BACKEND=graphiti_local`, start Neo4j too: + +```bash +docker compose up -d neo4j +``` + +The bundled `docker-compose.yml` uses `neo4j:5.26.22-enterprise` with +`NEO4J_ACCEPT_LICENSE_AGREEMENT=yes` as the safe local default. +The current local backend still keeps all graphs in the default Neo4j database +and maps each MiroFish `graph_id` directly to a Graphiti `group_id`. + **Service URLs:** - Frontend: `http://localhost:3000` - Backend API: `http://localhost:5001` @@ -175,6 +198,7 @@ docker compose up -d Reads `.env` from root directory by default, maps ports `3000 (frontend) / 5001 (backend)` > Mirror address for faster pulling is provided as comments in `docker-compose.yml`, replace if needed. +> When `GRAPH_BACKEND=graphiti_local`, the bundled compose stack starts a local Neo4j instance for Graphiti storage. The repo keeps the enterprise image as the default compose target because existing local stores may use the block format. ## 📬 Join the Conversation diff --git a/README.md b/README.md index 0b1763b75..7013265eb 100644 --- a/README.md +++ b/README.md @@ -122,9 +122,21 @@ LLM_API_KEY=your_api_key LLM_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1 LLM_MODEL_NAME=qwen-plus -# ZEP memory graph configuration -# The free monthly quota is enough for basic usage: https://app.getzep.com/ +# Graph backend selection +# Use zep_cloud for hosted Zep, or graphiti_local for local Neo4j + Graphiti +GRAPH_BACKEND=zep_cloud + +# Zep Cloud configuration +# Required only when GRAPH_BACKEND=zep_cloud ZEP_API_KEY=your_zep_api_key + +# Local Graphiti + Neo4j configuration +# Required only when GRAPH_BACKEND=graphiti_local +# Note: the local Graphiti backend stores all graphs in one Neo4j database +# and isolates each MiroFish graph by Graphiti `group_id`. +NEO4J_URI=bolt://localhost:7687 +NEO4J_USER=neo4j +NEO4J_PASSWORD=your_neo4j_password ``` #### 2. Install Dependencies @@ -151,6 +163,17 @@ npm run setup:backend npm run dev ``` +If you use `GRAPH_BACKEND=graphiti_local`, start Neo4j too: + +```bash +docker compose up -d neo4j +``` + +The bundled `docker-compose.yml` uses `neo4j:5.26.22-enterprise` with +`NEO4J_ACCEPT_LICENSE_AGREEMENT=yes` as the safe default for local compatibility. +The current local backend still keeps all graphs in the default Neo4j database +and maps each MiroFish `graph_id` directly to a Graphiti `group_id`. + **Service URLs:** - Frontend: `http://localhost:3000` - Backend API: `http://localhost:5001` @@ -175,6 +198,7 @@ docker compose up -d Docker reads `.env` from the project root by default and maps ports `3000 (frontend) / 5001 (backend)`. > A mirror image URL is provided as a comment in `docker-compose.yml` if you need a faster pull source. +> When `GRAPH_BACKEND=graphiti_local`, the bundled compose stack starts a local Neo4j instance for Graphiti storage. The repo keeps the enterprise image as the default compose target because existing local stores may use the block format. ## 📬 Join the Conversation diff --git a/docs/zep-cloud-to-local-migration-plan.md b/docs/zep-cloud-to-local-migration-plan.md new file mode 100644 index 000000000..51cbf08d5 --- /dev/null +++ b/docs/zep-cloud-to-local-migration-plan.md @@ -0,0 +1,663 @@ +# Zep Cloud to Local Migration Plan + +Date: 2026-03-28 + +Status: Planning document only. No runtime changes are included in this document. + +## Executive summary + +MiroFish is not using Zep Cloud in just one place. It depends on Zep Cloud for: + +- Graph creation and deletion +- Ontology registration +- Text ingestion and episode processing +- Full graph reads for visualization and simulation prep +- Semantic and hybrid search for the report agent +- Live simulation memory updates back into the graph + +Because of that, migrating from Zep Cloud to a local setup is not a simple API-key swap. + +As of 2026-03-28, Zep’s current docs say that Zep Community Edition is deprecated and no longer supported, and point self-hosted users to Graphiti or BYOC instead. That means the safest “Zep local” target for this repo is: + +- `graphiti-core` running inside the backend +- A local Neo4j instance for graph storage +- A provider abstraction layer so MiroFish can run `zep_cloud` and `graphiti_local` side by side during rollout + +This plan assumes that target. + +## Official references + +- Zep FAQ: https://help.getzep.com/faq +- Zep open-source direction announcement, published April 2, 2025: https://blog.getzep.com/announcing-a-new-direction-for-zeps-open-source-strategy/ +- Graphiti quick start: https://help.getzep.com/graphiti/getting-started/quick-start +- Graphiti Neo4j configuration: https://help.getzep.com/graphiti/configuration/neo-4-j-configuration +- Graphiti custom entity and edge types: https://help.getzep.com/graphiti/core-concepts/custom-entity-and-edge-types +- Graphiti graph namespacing: https://help.getzep.com/graphiti/core-concepts/graph-namespacing +- Graphiti search: https://help.getzep.com/graphiti/working-with-data/searching +- Graphiti CRUD operations: https://help.getzep.com/graphiti/working-with-data/crud-operations +- Graphiti fact triples: https://help.getzep.com/graphiti/working-with-data/adding-fact-triples + +## Current MiroFish dependency map + +These files are the main Zep Cloud touch points in the repo today: + +- `backend/app/config.py` + - Requires `ZEP_API_KEY` +- `backend/requirements.txt` + - Pins `zep-cloud==3.13.0` +- `backend/pyproject.toml` + - Pins `zep-cloud==3.13.0` +- `backend/app/services/graph_builder.py` + - Creates graphs, sets ontology, ingests batches, waits for episode processing, reads graph data, deletes graphs +- `backend/app/services/zep_entity_reader.py` + - Reads all nodes and edges, enriches entities with edge context +- `backend/app/services/zep_tools.py` + - Runs graph search and powers report-agent retrieval +- `backend/app/services/zep_graph_memory_updater.py` + - Pushes live simulation activity back into the graph +- `backend/app/services/oasis_profile_generator.py` + - Uses graph search to enrich agent profiles +- `backend/app/utils/zep_paging.py` + - Implements graph-wide pagination on Zep node and edge list APIs +- `backend/app/api/graph.py` + - Exposes graph build, read, and delete flows +- `backend/app/api/simulation.py` + - Exposes graph entity read flows used by simulation setup +- `backend/app/api/report.py` + - Exposes report search/statistics endpoints +- `.env.example`, `README.md`, `README-EN.md` + - Document Zep Cloud setup +- `docker-compose.yml` + - Does not currently start any local graph database + +## What must keep working after migration + +The migration is successful only if these user-visible flows still work: + +- Build a graph from uploaded source material +- Store and reuse a project-level `graph_id` +- Load graph nodes and edges in the UI +- Read entities from the graph to prepare simulation agents +- Search the graph for report generation +- Keep live simulation memory updates enabled +- Delete graph data when a project is removed or rebuilt + +## Recommended target architecture + +### 1. Keep the existing product-level `graph_id` + +Do not change the frontend contract if we can avoid it. + +- Keep storing `graph_id` in project and simulation JSON +- In the new local provider, keep `graph_id` as the app-level identifier +- Reuse `graph_id` as the primary Graphiti `group_id` +- This preserves almost all frontend behavior and avoids a database migration in the UI layer + +Reason: + +- Graphiti uses `group_id` for isolated graph namespaces +- In the currently targeted `graphiti-core` release, MiroFish can stay on a single Neo4j database and isolate graphs by `group_id` +- MiroFish already thinks in terms of one graph per project +- `graph_id -> group_id` is the cleanest compatibility bridge for this repo + +### 2. Add a graph provider abstraction in the backend + +Create a small internal interface, for example: + +- `create_graph(name) -> graph_id` +- `delete_graph(graph_id)` +- `set_ontology(graph_id, ontology)` +- `add_text_batch(graph_id, chunks)` +- `wait_for_ingestion(graph_id, job_ref)` +- `get_all_nodes(graph_id)` +- `get_all_edges(graph_id)` +- `get_node(graph_id, node_uuid)` +- `get_node_edges(graph_id, node_uuid)` +- `search_graph(graph_id, query, scope, limit)` +- `append_activity(graph_id, text)` +- `get_graph_statistics(graph_id)` + +Implement two providers: + +- `ZepCloudGraphProvider` +- `GraphitiLocalGraphProvider` + +Add a factory selected by an env var such as: + +- `GRAPH_BACKEND=zep_cloud` +- `GRAPH_BACKEND=graphiti_local` + +### 3. Embed Graphiti in the backend first + +For the first migration, do not introduce a second application service unless it becomes necessary. + +Recommended first version: + +- Flask backend remains the API server +- Graphiti runs as a Python library inside the backend process +- Neo4j runs as a local service in Docker Compose + +Why this is the safest first move: + +- Fewer moving parts +- Easier local debugging +- Lower operational complexity +- Faster path to dual-run and rollback + +### 4. Treat async Graphiti calls as a real design task + +This is an important implementation detail. + +Current MiroFish backend code is mostly synchronous Flask code. Graphiti’s documented API is async-first. That means we need one of these approaches: + +- Wrap Graphiti calls in a sync adapter using a controlled event-loop helper +- Move graph operations into a worker or async service + +Recommendation: + +- Phase 1 and Phase 2 should use a thin sync adapter around Graphiti +- Only move to a separate async service if latency or concurrency becomes a real problem + +## API and data-model mapping + +### Current Zep Cloud behavior to replace + +| Current behavior | Current MiroFish usage | Local replacement strategy | +| --- | --- | --- | +| `graph.create` | Creates one graph per project | No separate graph object in Graphiti; create and reserve a `group_id` namespace | +| `graph.set_ontology` | Registers project ontology before ingestion | Pass custom entity types, edge types, and edge maps during episode ingestion | +| `graph.add_batch` | Sends document chunks for extraction | Loop over `graphiti.add_episode(...)` or bulk helper if adopted | +| `episode.get(...processed)` | Polls until ingestion finishes | Track ingestion at app level; treat Graphiti call completion as local completion, or store background job state | +| `graph.node.get_by_graph_id` | Reads all nodes | Query by `group_id`, using Graphiti CRUD utilities or direct Neo4j Cypher | +| `graph.edge.get_by_graph_id` | Reads all edges | Query by `group_id`, using Graphiti CRUD utilities or direct Neo4j Cypher | +| `graph.search(scope="edges" / "nodes")` | Report and simulation retrieval | Use `graphiti.search()` or `graphiti._search()` recipes for edge-only and node-only search | +| `graph.add(type="text", data=...)` | Live simulation memory writeback | Convert activity batches into Graphiti text episodes | +| `graph.delete` | Removes a graph | Delete all nodes, edges, and episodes for the namespace | + +### Ontology compatibility notes + +Graphiti supports custom entity and edge types using Pydantic models, which matches MiroFish’s current ontology-generation approach well. It also has similar protected attribute names, including: + +- `uuid` +- `name` +- `group_id` +- `labels` +- `created_at` +- `summary` +- `attributes` +- `name_embedding` + +This is good news for MiroFish because `backend/app/services/graph_builder.py` already normalizes ontology names and protected attributes for Zep-style constraints. That logic should be reused, not rewritten from scratch. + +### Search compatibility notes + +Graphiti supports: + +- Hybrid search +- Node distance reranking +- Configurable `_search()` recipes for node-only and edge-only search + +The main migration work is not “can Graphiti search,” but: + +- matching the result shape expected by `zep_tools.py` +- preserving edge facts, node summaries, and score ordering closely enough for report quality + +### Full graph reads + +The current UI and simulation setup rely on full-graph reads, not just search. + +That means the local provider must expose: + +- list all nodes for a namespace +- list all edges for a namespace +- get one node by UUID +- get related edges for a node + +Recommendation: + +- Use direct Neo4j reads by `group_id` for graph-wide list endpoints +- Keep Graphiti itself focused on ingestion and search + +This is simpler than trying to force every UI read through search APIs. + +## Migration phases + +## Phase 0: Lock the target and scope + +Goal: + +- Avoid starting implementation against the wrong “local Zep” target + +Tasks: + +- Confirm the target is `Graphiti + Neo4j`, not deprecated Zep Community Edition +- Decide whether the migration must cover: + - existing historical graphs + - only newly created graphs + - both +- Decide whether local graph storage is the only goal, or whether local LLM/embedding providers are also required +- Define feature parity as: + - required for launch + - acceptable with minor quality drift + - allowed to defer + +Exit criteria: + +- One agreed target architecture +- One agreed feature-parity list +- One agreed data-migration scope + +## Phase 1: Prepare infrastructure + +Goal: + +- Make the repo able to run a local graph backend + +Tasks: + +- Add Neo4j service to `docker-compose.yml` +- Add persistent Neo4j volume +- Add backend env vars: + - `GRAPH_BACKEND` + - `NEO4J_URI` + - `NEO4J_USER` + - `NEO4J_PASSWORD` + - optional `GRAPHITI_TELEMETRY_ENABLED=false` + - optional `SEMAPHORE_LIMIT` +- Add Graphiti-compatible runtime deps to `backend/requirements.txt` and `backend/pyproject.toml` +- Install `graphiti-core` separately with `uv pip install --no-deps ...` to avoid the `neo4j` version conflict with `camel-oasis` +- Keep `zep-cloud` installed during dual-run +- Add backend startup initialization: + - connect to Graphiti + - call `build_indices_and_constraints()` once +- Update `.env.example`, `README.md`, and `README-EN.md` + +Recommended first Docker addition: + +- Neo4j 5.26+ image +- For this repo, keeping the enterprise variant in `docker-compose.yml` is the safer default if you may reuse volumes that were created with Neo4j block format +- Ports `7474` and `7687` +- Persistent `/data` and `/logs` volumes +- One shared Neo4j database with Graphiti `group_id` isolation per MiroFish graph + +Exit criteria: + +- Local `docker compose up` starts Neo4j and MiroFish +- Backend can connect to Neo4j +- Graphiti indices are created successfully + +## Phase 2: Add the provider abstraction without changing behavior + +Goal: + +- Isolate Zep-specific code before swapping implementations + +Tasks: + +- Create `graph_provider` interface and factory +- Move Zep client construction behind the provider +- Keep existing Zep behavior as the default implementation +- Refactor these services to depend on the provider instead of importing `zep_cloud` directly: + - `graph_builder.py` + - `zep_entity_reader.py` + - `zep_tools.py` + - `zep_graph_memory_updater.py` + - `oasis_profile_generator.py` +- Refactor `zep_paging.py` into provider-neutral graph read helpers or retire it in favor of provider methods + +Important rule: + +- Do not change API response shapes in this phase + +Exit criteria: + +- App still works exactly as before with `GRAPH_BACKEND=zep_cloud` +- Zep-specific imports are limited to the Zep provider module + +## Phase 3: Implement the local Graphiti provider + +Goal: + +- Support the same MiroFish workflows on a local graph backend + +Tasks: + +- Map `graph_id` to Graphiti `group_id` +- Implement `create_graph` as namespace bootstrap +- Reuse ontology normalization logic from current graph builder +- Convert MiroFish ontology into: + - Graphiti custom entity types + - Graphiti custom edge types + - Graphiti edge type map +- Implement chunk ingestion with `add_episode` +- Implement live simulation memory writes with `add_episode` +- Implement search using: + - edge-focused `_search()` recipe for fact retrieval + - node-focused `_search()` recipe for entity retrieval +- Implement graph-wide reads by `group_id` +- Implement delete-by-namespace logic + +Important local-behavior differences to handle: + +- Graphiti is namespace-based, not graph-object-based +- Ingestion lifecycle is different from Zep Cloud polling +- Search result objects will not be identical to Zep Cloud result objects + +Exit criteria: + +- A new graph can be built locally +- The UI can render graph nodes and edges +- Simulation prep can read filtered entities +- Report agent search returns usable facts + +## Phase 4: Data migration and backfill + +Goal: + +- Move old project graphs, not just new ones + +Recommended order of preference: + +### Option A: Re-ingest from original source documents + +This is the best option when the original uploaded material still exists. + +Why it is preferred: + +- It preserves the intended extraction pipeline +- It preserves ontology-guided classification +- It avoids lossy conversion from already-extracted node and edge summaries back into raw text + +Use this when: + +- the original uploaded text or PDF is still available +- the project ontology is still stored + +### Option B: Rebuild from exported facts and nodes + +Use this only for projects where the original source text is missing. + +Approach: + +- Export Zep Cloud nodes and edges with existing MiroFish read code +- Convert important edges into fact triples or synthetic episodes +- Rebuild the namespace in Graphiti + +Tradeoff: + +- Faster for stranded data +- Lower fidelity than re-ingesting original source material + +### Required migration script + +Create a script such as: + +- `backend/scripts/migrate_zep_cloud_to_graphiti.py` + +Suggested responsibilities: + +- list existing projects with `graph_id` +- detect whether original source text is available +- choose migration mode per project +- create local namespace +- ingest data +- validate node and edge counts +- write migration report JSON + +Suggested validation fields per project: + +- old graph id +- new group id +- migration mode used +- old node count +- new node count +- old edge count +- new edge count +- top 10 search comparison queries +- status +- error details if failed + +Exit criteria: + +- A representative sample of old projects has been migrated and validated + +## Phase 5: Dual-run and comparison + +Goal: + +- Prove that local results are good enough before cutover + +Tasks: + +- Add a temporary comparison mode +- For selected projects: + - build or migrate graph in both backends + - run the same search queries against both + - compare: + - returned facts + - node summaries + - graph statistics + - report quality +- Log mismatches for manual review + +Recommended comparison set: + +- graph build from a medium-size document +- entity list for simulation setup +- 10 report-agent queries from real historical runs +- live memory update during a short simulation + +Exit criteria: + +- No blocker regressions in core flows +- Search quality is acceptable for report generation + +## Phase 6: Cutover + +Goal: + +- Move production behavior to the local backend with low risk + +Tasks: + +- Keep both backends available behind `GRAPH_BACKEND` +- Start with local backend in dev only +- Then test on a small set of staging projects +- Then switch default backend for new graphs only +- After confidence is high, migrate old projects and switch all reads to local + +Safe cutover order: + +1. New graph builds go to local +2. New simulation live updates go to local +3. Report/search reads go to local +4. Historical projects are backfilled +5. Zep Cloud becomes fallback only + +Rollback: + +- Flip `GRAPH_BACKEND` back to `zep_cloud` +- Leave dual-write or dual-read disabled unless specifically needed + +## Phase 7: Cleanup + +Goal: + +- Remove cloud-only assumptions after the local backend is stable + +Tasks: + +- Remove `ZEP_API_KEY` from required config if no longer needed +- Remove `zep-cloud` dependency +- Remove Zep-specific code paths and helpers +- Rename files and classes so they are provider-neutral + - example: `zep_tools.py` -> `graph_tools.py` + - example: `ZepEntityReader` -> `GraphEntityReader` +- Update docs to describe local-first setup + +Exit criteria: + +- No runtime path depends on Zep Cloud +- Local setup is the documented default + +## File-level implementation plan + +### Config and infra + +- `backend/app/config.py` + - add `GRAPH_BACKEND` + - add `NEO4J_URI` + - add `NEO4J_USER` + - add `NEO4J_PASSWORD` + - stop hard-failing on missing `ZEP_API_KEY` when local backend is selected +- `.env.example` + - document both cloud and local modes +- `docker-compose.yml` + - add Neo4j service and volume +- `backend/requirements.txt` + - add Graphiti dependency +- `backend/pyproject.toml` + - add Graphiti dependency + +### New provider layer + +Recommended new files: + +- `backend/app/services/graph_provider/base.py` +- `backend/app/services/graph_provider/factory.py` +- `backend/app/services/graph_provider/zep_cloud_provider.py` +- `backend/app/services/graph_provider/graphiti_local_provider.py` +- `backend/app/services/graph_provider/models.py` + +### Existing service refactors + +- `backend/app/services/graph_builder.py` + - use provider for graph lifecycle and ingestion +- `backend/app/services/zep_entity_reader.py` + - make provider-neutral +- `backend/app/services/zep_tools.py` + - make provider-neutral +- `backend/app/services/zep_graph_memory_updater.py` + - write activities through provider +- `backend/app/services/oasis_profile_generator.py` + - search via provider +- `backend/app/api/graph.py` + - leave API shape stable +- `backend/app/api/simulation.py` + - leave API shape stable +- `backend/app/api/report.py` + - leave API shape stable + +### Frontend impact + +Frontend changes should be minimal if backend response shapes stay stable. + +Likely no required frontend changes beyond wording updates in docs or setup screens. + +## Testing plan + +### Unit tests + +- provider factory selection +- ontology normalization compatibility +- graph-id to group-id mapping +- node and edge shape normalization +- search result shape normalization + +### Integration tests + +- create graph locally +- ingest text chunks +- read all nodes and edges +- retrieve filtered entities +- run report search +- push live simulation activity and confirm graph updates +- delete namespace and confirm cleanup + +### Regression tests + +Use at least one real project fixture and compare: + +- node count difference stays within an agreed threshold +- edge count difference stays within an agreed threshold +- top search results are semantically comparable +- report output remains acceptable to a human reviewer + +## Risks and mitigation + +### Risk 1: Search quality differs from Zep Cloud + +Mitigation: + +- dual-run search comparisons +- tune Graphiti search recipes +- add fallback local keyword search only as backup, not primary behavior + +### Risk 2: Full graph reads are harder than search + +Mitigation: + +- use direct Neo4j namespace queries for UI graph rendering and entity listing + +### Risk 3: Async Graphiti calls complicate Flask integration + +Mitigation: + +- start with a sync adapter +- isolate async logic inside the provider + +### Risk 4: Old graphs cannot be migrated losslessly from summaries alone + +Mitigation: + +- prefer original source-document re-ingestion +- use fact-triple fallback only where necessary + +### Risk 5: Config sprawl during rollout + +Mitigation: + +- one `GRAPH_BACKEND` switch +- one local env block +- keep cloud env vars optional once local mode is supported + +## Acceptance criteria + +The migration can be called complete when all of the following are true: + +- MiroFish can build a project graph locally without Zep Cloud +- The graph viewer loads local nodes and edges correctly +- Simulation setup reads local graph entities correctly +- Report generation can retrieve relevant facts from the local graph +- Live simulation memory updates work against the local graph +- Existing important projects are migrated or rebuildable +- Zep Cloud is no longer required for normal operation + +## Recommended execution order + +If this work is implemented as an engineering project, the lowest-risk order is: + +1. Add infra and config +2. Add provider abstraction +3. Keep Zep Cloud as the default provider +4. Implement local Graphiti provider +5. Validate new graph creation locally +6. Validate report search locally +7. Validate simulation entity loading and live updates locally +8. Add migration/backfill script +9. Dual-run and compare +10. Cut over new graphs +11. Migrate old graphs +12. Remove Zep Cloud dependency + +## Bottom line + +This migration is very feasible, but it should be treated as a backend provider replacement project, not a config tweak. + +The key decisions that make it safe are: + +- use Graphiti plus local Neo4j as the supported local target +- keep `graph_id` as the app-level identifier and reuse it as Graphiti `group_id` +- add a provider abstraction before changing behavior +- prefer re-ingesting original source documents for old projects +- dual-run before cutover From 7bc4963f85ee61e59bab8e27a38e64e0fa33154b Mon Sep 17 00:00:00 2001 From: Anurag Date: Sat, 28 Mar 2026 19:30:15 +0530 Subject: [PATCH 06/13] feat: update graphiti local provider and add setup readme --- README-SETUP.md | 266 ++++++++++++++++++ .../graph_provider/graphiti_local_provider.py | 4 +- 2 files changed, 269 insertions(+), 1 deletion(-) create mode 100644 README-SETUP.md diff --git a/README-SETUP.md b/README-SETUP.md new file mode 100644 index 000000000..fc7d55cfd --- /dev/null +++ b/README-SETUP.md @@ -0,0 +1,266 @@ +# MiroFish Setup Guide + +This file is a practical setup guide for the current state of this fork. +It is based on the main README, but focuses on the startup paths that are +working in this repository today. + +## What Changed + +MiroFish now supports two graph backends: + +- `zep_cloud`: hosted Zep Cloud +- `graphiti_local`: local Graphiti + Neo4j + +The local backend keeps all project graphs inside one Neo4j database and +isolates them with Graphiti `group_id`. + +## Recommended Paths + +Choose one of these: + +- Docker: run frontend, backend, and Neo4j with `docker compose` +- Local development: run frontend/backend locally and Neo4j in Docker + +## Prerequisites + +For Docker: + +- Docker Desktop or Docker Engine with Compose support + +For local development: + +- Node.js 18+ +- Python 3.11 or 3.12 +- `uv` +- Docker, if you want the local Neo4j service + +## Environment File + +Create the env file from the example: + +```bash +cp .env.example .env +``` + +## Option 1: Docker Startup + +This is the easiest way to run the full stack. + +### 1. Configure `.env` + +For the local Graphiti backend, a minimal working config looks like this: + +```env +GRAPH_BACKEND=graphiti_local + +LLM_API_KEY=your_llm_api_key +LLM_BASE_URL=https://api.openai.com/v1 +LLM_MODEL_NAME=gpt-4o-mini + +NEO4J_URI=bolt://neo4j:7687 +NEO4J_USER=neo4j +NEO4J_PASSWORD=mirofish-local-password +NEO4J_DATABASE=neo4j +``` + +Notes: + +- `GRAPHITI_LLM_*`, `GRAPHITI_EMBEDDER_*`, and `GRAPHITI_RERANKER_*` are optional +- if they are omitted, the backend falls back to the main `LLM_*` settings + +If you want to keep using hosted Zep Cloud instead, use: + +```env +GRAPH_BACKEND=zep_cloud + +LLM_API_KEY=your_llm_api_key +LLM_BASE_URL=https://api.openai.com/v1 +LLM_MODEL_NAME=gpt-4o-mini + +ZEP_API_KEY=your_zep_api_key +``` + +### 2. Build and start + +```bash +docker compose up -d --build +``` + +### 3. Check status + +```bash +docker compose ps +docker compose logs -f +curl http://localhost:5001/health +``` + +When healthy, the backend should answer with a payload that includes: + +```json +{ + "status": "ok", + "service": "MiroFish Backend", + "graph_backend": "graphiti_local" +} +``` + +### 4. Open the app + +- Frontend: `http://localhost:3000` +- Backend: `http://localhost:5001` +- Neo4j Browser: `http://localhost:7474` + +### Useful Docker commands + +Stop the stack: + +```bash +docker compose down +``` + +Stop and remove volumes too: + +```bash +docker compose down -v +``` + +Rebuild after dependency or Dockerfile changes: + +```bash +docker compose up -d --build +``` + +Restart only Neo4j: + +```bash +docker compose up -d neo4j +``` + +## Option 2: Local Development Startup + +Use this when you want hot reload or easier debugging. + +### 1. Configure `.env` + +For local Graphiti, use: + +```env +GRAPH_BACKEND=graphiti_local + +LLM_API_KEY=your_llm_api_key +LLM_BASE_URL=https://api.openai.com/v1 +LLM_MODEL_NAME=gpt-4o-mini + +NEO4J_URI=bolt://localhost:7687 +NEO4J_USER=neo4j +NEO4J_PASSWORD=mirofish-local-password +NEO4J_DATABASE=neo4j +``` + +### 2. Install dependencies + +```bash +npm run setup:all +``` + +This does all of the following: + +- installs root Node dependencies +- installs frontend dependencies +- creates and syncs the backend `uv` environment +- installs `graphiti-core==0.28.2` separately into the backend venv + +### 3. Start Neo4j + +```bash +docker compose up -d neo4j +``` + +### 4. Start frontend and backend + +```bash +npm run dev +``` + +Or individually: + +```bash +npm run backend +npm run frontend +``` + +## Current Neo4j Note + +The local compose stack uses: + +- `neo4j:5.26.22-enterprise` + +This repo keeps the enterprise image as the default compose target because +existing local volumes may already use Neo4j block format. The application +logic itself is using a single Neo4j database plus Graphiti `group_id` +isolation, not one database per project. + +## Troubleshooting + +### Backend health is failing + +Check: + +- `LLM_API_KEY` is set +- `GRAPH_BACKEND` is correct +- if `GRAPH_BACKEND=graphiti_local`, `NEO4J_PASSWORD` is set +- Neo4j is running + +### Docker app builds but does not start correctly + +Watch logs: + +```bash +docker compose logs -f mirofish neo4j +``` + +### Neo4j starts but the backend cannot connect + +For Docker: + +- use `NEO4J_URI=bolt://neo4j:7687` + +For local development: + +- use `NEO4J_URI=bolt://localhost:7687` + +### You are on x86_64 and Docker build fails + +The app service currently pins: + +- `platform: linux/arm64` + +in `docker-compose.yml`. + +If your machine is not ARM64, remove or change that line before building. + +## Fast Start + +If you just want the shortest path for local Graphiti in Docker: + +```bash +cp .env.example .env +``` + +Put this in `.env`: + +```env +GRAPH_BACKEND=graphiti_local +LLM_API_KEY=your_llm_api_key +NEO4J_PASSWORD=mirofish-local-password +NEO4J_URI=bolt://neo4j:7687 +NEO4J_USER=neo4j +NEO4J_DATABASE=neo4j +``` + +Then run: + +```bash +docker compose up -d --build +curl http://localhost:5001/health +``` diff --git a/backend/app/services/graph_provider/graphiti_local_provider.py b/backend/app/services/graph_provider/graphiti_local_provider.py index 14c13d991..a8f1ef4e8 100644 --- a/backend/app/services/graph_provider/graphiti_local_provider.py +++ b/backend/app/services/graph_provider/graphiti_local_provider.py @@ -80,7 +80,9 @@ class GraphitiLocalGraphProvider(BaseGraphProvider): """Graphiti + Neo4j backed graph provider.""" _initialized = False - _init_lock = threading.Lock() + # Startup can flow through ensure_initialized() -> _ensure_client_ready(), so this + # lock must be re-entrant to avoid self-deadlocking during app bootstrap. + _init_lock = threading.RLock() def __init__(self): try: From 6f29db53bbc66985f2be30e2095003a4e120f55a Mon Sep 17 00:00:00 2001 From: Anurag Date: Sat, 28 Mar 2026 20:28:56 +0530 Subject: [PATCH 07/13] chore(frontend): translate remaining Chinese UI text to English --- frontend/src/components/GraphPanel.vue | 168 +++++----- frontend/src/components/HistoryDatabase.vue | 246 +++++++-------- frontend/src/components/Step1GraphBuild.vue | 4 +- frontend/src/components/Step2EnvSetup.vue | 62 ++-- frontend/src/components/Step3Simulation.vue | 94 +++--- frontend/src/components/Step4Report.vue | 308 ++++++++----------- frontend/src/components/Step5Interaction.vue | 64 ++-- frontend/src/views/Home.vue | 87 +++--- frontend/src/views/Process.vue | 246 +++++++-------- 9 files changed, 624 insertions(+), 655 deletions(-) diff --git a/frontend/src/components/GraphPanel.vue b/frontend/src/components/GraphPanel.vue index 1e53c838c..9d2f5edc5 100644 --- a/frontend/src/components/GraphPanel.vue +++ b/frontend/src/components/GraphPanel.vue @@ -2,7 +2,7 @@
Graph Relationship Visualization - +
- +
- +
@@ -30,7 +30,7 @@ {{ isSimulating ? 'GraphRAG long-term and short-term memory updating live' : 'Live updating...' }}
- +
@@ -48,7 +48,7 @@
- +
{{ selectedItem.type === 'node' ? 'Node Details' : 'Relationship' }} @@ -58,7 +58,7 @@
- +
Name: @@ -101,9 +101,9 @@
- +
- + - +