diff --git a/.gitignore b/.gitignore index 5d1e9ac..cac7987 100644 --- a/.gitignore +++ b/.gitignore @@ -2,7 +2,7 @@ .DS_Store Thumbs.db -# 环境变量(保护敏感信息) +# Environment variables (protect sensitive information) .env .env.local .env.*.local @@ -36,7 +36,7 @@ yarn-error.log* *.swp *.swo -# 测试 +# Test artifacts .pytest_cache/ .coverage htmlcov/ @@ -45,17 +45,17 @@ htmlcov/ .cursor/ .claude/ -# 文档与测试程序 +# Local docs and test programs mydoc/ mytest/ -# 日志文件 +# Log files backend/logs/ *.log -# 上传文件 +# Uploaded files backend/uploads/ -# Docker 数据 +# Docker data data/backend/venv311/ backend/venv311/ diff --git a/Dockerfile b/Dockerfile index df28213..4947493 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,29 +1,29 @@ FROM python:3.11 -# 安装 Node.js (满足 >=18)及必要工具 +# Install Node.js (version 18+) and required tools RUN apt-get update \ && apt-get install -y --no-install-recommends nodejs npm \ && rm -rf /var/lib/apt/lists/* -# 从 uv 官方镜像复制 uv +# Copy `uv` from the official image COPY --from=ghcr.io/astral-sh/uv:0.9.26 /uv /uvx /bin/ WORKDIR /app -# 先复制依赖描述文件以利用缓存 +# Copy dependency manifests first to maximize layer caching COPY package.json package-lock.json ./ COPY frontend/package.json frontend/package-lock.json ./frontend/ COPY backend/pyproject.toml backend/uv.lock ./backend/ -# 安装依赖(Node + Python) +# Install dependencies (Node + Python) RUN npm ci \ && npm ci --prefix frontend \ && cd backend && uv sync -# 复制项目源码 +# Copy the project source COPY . . EXPOSE 3000 5001 -# 同时启动前后端(开发模式) +# Start the frontend and backend together in development mode CMD ["npm", "run", "dev"] diff --git a/backend/app/services/report_agent.py b/backend/app/services/report_agent.py index 0f8a4d1..4fd0544 100644 --- a/backend/app/services/report_agent.py +++ b/backend/app/services/report_agent.py @@ -651,11 +651,11 @@ def to_dict(self) -> Dict[str, Any]: > "Certain groups will state: original content..." - These quotes are core evidence of simulation predictions -3. [Language Consistency - Quoted Content Must Be Translated to Report Language] - - Tool returned content may contain English or mixed Chinese-English expressions - - If the simulation requirement and source material are in Chinese, the report must be entirely in Chinese - - When you quote English or mixed Chinese-English content from tools, you must translate it to fluent Chinese before including it in the report - - When translating, preserve the original meaning and ensure natural expression +3. [Language Consistency - Match the User's Input Language] + - Tool returned content may contain language inconsistencies or mixed-language expressions + - Write the report in the primary language used by the simulation requirement and source material + - When quoting tool output that is in a different language, translate it into the report language before including it + - Preserve the original meaning when translating and keep the phrasing natural - This rule applies to both regular text and quoted blocks (> format) 4. [Faithfully Present Prediction Results] @@ -1456,7 +1456,7 @@ def _generate_section_react( unused_tools = all_tools - used_tools unused_hint = "" if unused_tools and tool_calls_count < self.MAX_TOOL_CALLS_PER_SECTION: - unused_hint = REACT_UNUSED_TOOLS_HINT.format(unused_list="、".join(unused_tools)) + unused_hint = REACT_UNUSED_TOOLS_HINT.format(unused_list=", ".join(unused_tools)) messages.append({"role": "assistant", "content": response}) messages.append({ @@ -1961,13 +1961,14 @@ def _get_console_log_path(cls, report_id: str) -> str: @classmethod def get_console_log(cls, report_id: str, from_line: int = 0) -> Dict[str, Any]: """ - Getconsolelogcontent - - This isReportgenerateduring processconsoleoutputlog(INFO、WARNINGetc), - and agent_log.jsonl structured logsdifferent。 + Get console log content. + + This contains console output produced during report generation + (INFO, WARNING, etc.) and differs from the structured logs in + `agent_log.jsonl`. Args: - report_id: ReportID + report_id: Report ID from_line: from which rowrowStartRead(for incrementalGet,0 means from the beginningStart) Returns: diff --git a/backend/app/services/simulation_config_generator.py b/backend/app/services/simulation_config_generator.py index f04b4be..b4b5c99 100644 --- a/backend/app/services/simulation_config_generator.py +++ b/backend/app/services/simulation_config_generator.py @@ -24,8 +24,8 @@ logger = get_logger('mirofish.simulation_config') -# Time zone configuration for Chinese work schedules (Beijing Time) -CHINA_TIMEZONE_CONFIG = { +# Default social activity rhythm configuration +DEFAULT_ACTIVITY_RHYTHM_CONFIG = { # Dead hours (almost no activity) "dead_hours": [0, 1, 2, 3, 4, 5], # Morning hours (gradually waking up) @@ -81,7 +81,7 @@ class AgentActivityConfig: @dataclass class TimeSimulationConfig: - """Time simulation configuration (based on Chinese work schedule habits)""" + """Time simulation configuration based on a generic social activity rhythm""" # Total simulation time (simulation hours) total_simulation_hours: int = 72 # Default 72 hours (3 days) @@ -92,7 +92,7 @@ class TimeSimulationConfig: agents_per_hour_min: int = 5 agents_per_hour_max: int = 20 - # Peak hours (evening 19-22, most active time for Chinese people) + # Peak hours (evening 19-22 by default) peak_hours: List[int] = field(default_factory=lambda: [19, 20, 21, 22]) peak_activity_multiplier: float = 1.5 @@ -546,17 +546,18 @@ def _generate_time_config(self, context: str, num_entities: int) -> Dict[str, An ## Task Please generate time configuration JSON. -### Basic principles (for reference only, adjust flexibly based on event nature and participant characteristics): -- User base is Chinese people, must follow Beijing Time work schedule habits -- 0-5am almost no activity (activity coefficient 0.05) -- 6-8am gradually active (activity coefficient 0.4) -- 9-18 work time moderately active (activity coefficient 0.7) -- 19-22 evening is peak period (activity coefficient 1.5) -- After 23 activity decreases (activity coefficient 0.5) -- General rule: low activity early morning, gradually increasing morning, moderate work time, evening peak -- **Important**: Example values below are for reference only, adjust specific time periods based on event nature and participant characteristics - - Example: student peak may be 21-23; media active all day; official institutions only during work hours - - Example: breaking news may cause late night discussions, off_peak_hours can be shortened appropriately +### Basic principles (for reference only, adjust flexibly based on the event nature, participant characteristics, and implied locale in the source material): +- Do not assume any specific country, timezone, or culture unless the simulation requirement or source material clearly indicates one +- Use a realistic daily rhythm for the population being simulated +- 0-5am is often low activity (activity coefficient around 0.05), but adjust if the scenario suggests otherwise +- 6-8am is often a ramp-up period (activity coefficient around 0.4) +- 9-18 is often moderately active for workday-oriented populations (activity coefficient around 0.7) +- 19-22 is often an evening peak period (activity coefficient around 1.5) +- After 23 activity often decreases (activity coefficient around 0.5) +- General rule: low activity early morning, gradually increasing in the morning, moderate daytime activity, evening peak +- **Important**: Example values below are only defaults. Adjust specific time periods based on the event nature, participant characteristics, geography, and platform behavior implied by the inputs + - Example: students may peak later in the evening; media may stay active most of the day; official institutions may be concentrated in work hours + - Example: breaking news may cause late-night discussion, so off_peak_hours can be shortened appropriately ### Return JSON format (no markdown) @@ -584,7 +585,7 @@ def _generate_time_config(self, context: str, num_entities: int) -> Dict[str, An - work_hours (int array): Work hours - reasoning (string): Brief explanation for this configuration""" - system_prompt = "You are a social media simulation expert. Return pure JSON format, time configuration must follow Chinese work schedule habits." + system_prompt = "You are a social media simulation expert. Return pure JSON format. Infer a realistic activity rhythm from the provided context, and do not assume any specific country or timezone unless the input clearly implies one." try: return self._call_llm_with_retry(prompt, system_prompt) @@ -593,7 +594,7 @@ def _generate_time_config(self, context: str, num_entities: int) -> Dict[str, An return self._get_default_time_config(num_entities) def _get_default_time_config(self, num_entities: int) -> Dict[str, Any]: - """Get default time configuration (Chinese work schedule)""" + """Get the default time configuration""" return { "total_simulation_hours": 72, "minutes_per_round": 60, # 1 hour per round, speed up time @@ -603,7 +604,7 @@ def _get_default_time_config(self, num_entities: int) -> Dict[str, Any]: "off_peak_hours": [0, 1, 2, 3, 4, 5], "morning_hours": [6, 7, 8], "work_hours": [9, 10, 11, 12, 13, 14, 15, 16, 17, 18], - "reasoning": "Using default Chinese work schedule configuration (1 hour per round)" + "reasoning": "Using the default general-purpose social activity rhythm configuration (1 hour per round)" } def _parse_time_config(self, result: Dict[str, Any], num_entities: int) -> TimeSimulationConfig: @@ -838,7 +839,7 @@ def _generate_agent_configs_batch( ## Task Generate activity configuration for each entity, noting: -- **Time follows Chinese work schedule**: Almost no activity 0-5am, most active 19-22 +- **Time should follow the likely audience rhythm implied by the inputs**: use the provided context to estimate low-activity and peak-activity hours instead of assuming a specific country - **Official institutions** (University/GovernmentAgency): Low activity (0.1-0.3), active during work hours (9-17), slow response (60-240 min), high influence (2.5-3.0) - **Media** (MediaOutlet): Medium activity (0.4-0.6), active all day (8-23), fast response (5-30 min), high influence (2.0-2.5) - **Individuals** (Student/Person/Alumni): High activity (0.6-0.9), mainly evening activity (18-23), fast response (1-15 min), low influence (0.8-1.2) @@ -852,7 +853,7 @@ def _generate_agent_configs_batch( "activity_level": <0.0-1.0>, "posts_per_hour": , "comments_per_hour": , - "active_hours": [], + "active_hours": [], "response_delay_min": , "response_delay_max": , "sentiment_bias": <-1.0 to 1.0>, @@ -863,7 +864,7 @@ def _generate_agent_configs_batch( ] }}""" - system_prompt = "You are a social media behavior analysis expert. Return pure JSON, configuration must follow Chinese work schedule habits." + system_prompt = "You are a social media behavior analysis expert. Return pure JSON. Infer activity schedules from the entities, event type, and provided context, and do not assume any specific country or timezone unless the input clearly implies one." try: result = self._call_llm_with_retry(prompt, system_prompt) @@ -902,7 +903,7 @@ def _generate_agent_configs_batch( return configs def _generate_agent_config_by_rule(self, entity: EntityNode) -> Dict[str, Any]: - """Generate single agent configuration based on rules (Chinese work schedule)""" + """Generate a single agent configuration based on generic role-driven activity rules""" entity_type = (entity.get_entity_type() or "Unknown").lower() if entity_type in ["university", "governmentagency", "ngo"]: @@ -984,4 +985,3 @@ def _generate_agent_config_by_rule(self, entity: EntityNode) -> Dict[str, Any]: "influence_weight": 1.0 } - diff --git a/backend/app/utils/file_parser.py b/backend/app/utils/file_parser.py index 250a41e..a54aa39 100644 --- a/backend/app/utils/file_parser.py +++ b/backend/app/utils/file_parser.py @@ -172,7 +172,7 @@ def split_text_into_chunks( # Try to split at sentence boundaries if end < len(text): # Find nearest sentence ending - for sep in ['。', '!', '?', '.\n', '!\n', '?\n', '\n\n', '. ', '! ', '? ']: + for sep in ['\u3002', '\uFF01', '\uFF1F', '.\n', '!\n', '?\n', '\n\n', '. ', '! ', '? ']: last_sep = text[start:end].rfind(sep) if last_sep != -1 and last_sep > chunk_size * 0.3: end = start + last_sep + len(sep) @@ -186,4 +186,3 @@ def split_text_into_chunks( start = end - overlap if end < len(text) else len(text) return chunks - diff --git a/backend/pyproject.toml b/backend/pyproject.toml index 233ef23..5f382ef 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -9,27 +9,27 @@ authors = [ ] dependencies = [ - # 核心框架 + # Core framework "flask>=3.0.0", "flask-cors>=6.0.0", - # LLM 相关 + # LLM integrations "openai>=1.0.0", # Neo4j graph database driver "neo4j>=5.15.0", - # OASIS 社交媒体模拟 + # OASIS social media simulation "camel-oasis==0.2.5", "camel-ai==0.2.78", - # 文件处理 + # File processing "PyMuPDF>=1.24.0", - # 编码检测(支持非UTF-8编码的文本文件) + # Encoding detection (supports non-UTF-8 text files) "charset-normalizer>=3.0.0", "chardet>=5.0.0", - # 工具库 + # Utility libraries "python-dotenv>=1.0.0", "pydantic>=2.0.0", ] diff --git a/backend/requirements.txt b/backend/requirements.txt index 5cffdbf..d342c92 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -5,12 +5,12 @@ # Install: pip install -r requirements.txt # =========================================== -# ============= 核心框架 ============= +# ============= Core Framework ============= flask>=3.0.0 flask-cors>=6.0.0 -# ============= LLM 相关 ============= -# OpenAI SDK(统一使用 OpenAI 格式调用 LLM / Ollama) +# ============= LLM Integrations ============= +# OpenAI SDK (used with the unified OpenAI-style interface for LLM / Ollama) openai>=1.0.0 # HTTP client for Ollama embedding API requests>=2.28.0 @@ -18,20 +18,20 @@ requests>=2.28.0 # ============= Neo4j Graph Database ============= neo4j>=5.15.0 -# ============= OASIS 社交媒体模拟 ============= -# OASIS 社交模拟框架 +# ============= OASIS Social Media Simulation ============= +# OASIS social simulation framework camel-oasis==0.2.5 camel-ai==0.2.78 -# ============= 文件处理 ============= +# ============= File Processing ============= PyMuPDF>=1.24.0 -# 编码检测(支持非UTF-8编码的文本文件) +# Encoding detection (supports non-UTF-8 text files) charset-normalizer>=3.0.0 chardet>=5.0.0 -# ============= 工具库 ============= -# 环境变量加载 +# ============= Utility Libraries ============= +# Environment variable loading python-dotenv>=1.0.0 -# 数据验证 +# Data validation pydantic>=2.0.0 diff --git a/frontend/src/components/Step4Report.vue b/frontend/src/components/Step4Report.vue index 3557d68..22f91db 100644 --- a/frontend/src/components/Step4Report.vue +++ b/frontend/src/components/Step4Report.vue @@ -1286,8 +1286,8 @@ const InterviewDisplay = { // Clean quote text - remove leading list numbers to avoid double numbering const cleanQuoteText = (text) => { if (!text) return '' - // Remove leading patterns like "1. ", "2. ", "1、", "(1)", "(1)" etc. - return text.replace(/^\s*\d+[\.\、\))]\s*/, '').trim() + // Remove leading patterns like "1. ", "2. ", "1,", "(1)", or full-width numbered variants. + return text.replace(/^\s*\d+[\.\u3001\)\uFF09]\s*/, '').trim() } const activeIndex = ref(0) diff --git a/frontend/src/views/Process.vue b/frontend/src/views/Process.vue index 6bb930d..18b6a73 100644 --- a/frontend/src/views/Process.vue +++ b/frontend/src/views/Process.vue @@ -503,7 +503,7 @@ const formatDate = (dateStr) => { if (!dateStr) return '-' try { const date = new Date(dateStr) - return date.toLocaleString('zh-CN', { + return date.toLocaleString('en-US', { year: 'numeric', month: 'short', day: 'numeric', @@ -2057,4 +2057,4 @@ onUnmounted(() => { display: none; } } - \ No newline at end of file +