Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
.DS_Store
Thumbs.db

# 环境变量(保护敏感信息)
# Environment variables (protect sensitive information)
.env
.env.local
.env.*.local
Expand Down Expand Up @@ -36,7 +36,7 @@ yarn-error.log*
*.swp
*.swo

# 测试
# Test artifacts
.pytest_cache/
.coverage
htmlcov/
Expand All @@ -45,17 +45,17 @@ htmlcov/
.cursor/
.claude/

# 文档与测试程序
# Local docs and test programs
mydoc/
mytest/

# 日志文件
# Log files
backend/logs/
*.log

# 上传文件
# Uploaded files
backend/uploads/

# Docker 数据
# Docker data
data/backend/venv311/
backend/venv311/
12 changes: 6 additions & 6 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,29 +1,29 @@
FROM python:3.11

# 安装 Node.js (满足 >=18)及必要工具
# Install Node.js (version 18+) and required tools
RUN apt-get update \
&& apt-get install -y --no-install-recommends nodejs npm \
&& rm -rf /var/lib/apt/lists/*

# 从 uv 官方镜像复制 uv
# Copy `uv` from the official image
COPY --from=ghcr.io/astral-sh/uv:0.9.26 /uv /uvx /bin/

WORKDIR /app

# 先复制依赖描述文件以利用缓存
# Copy dependency manifests first to maximize layer caching
COPY package.json package-lock.json ./
COPY frontend/package.json frontend/package-lock.json ./frontend/
COPY backend/pyproject.toml backend/uv.lock ./backend/

# 安装依赖(Node + Python
# Install dependencies (Node + Python)
RUN npm ci \
&& npm ci --prefix frontend \
&& cd backend && uv sync

# 复制项目源码
# Copy the project source
COPY . .

EXPOSE 3000 5001

# 同时启动前后端(开发模式)
# Start the frontend and backend together in development mode
CMD ["npm", "run", "dev"]
23 changes: 12 additions & 11 deletions backend/app/services/report_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -651,11 +651,11 @@ def to_dict(self) -> Dict[str, Any]:
> "Certain groups will state: original content..."
- These quotes are core evidence of simulation predictions

3. [Language Consistency - Quoted Content Must Be Translated to Report Language]
- Tool returned content may contain English or mixed Chinese-English expressions
- If the simulation requirement and source material are in Chinese, the report must be entirely in Chinese
- When you quote English or mixed Chinese-English content from tools, you must translate it to fluent Chinese before including it in the report
- When translating, preserve the original meaning and ensure natural expression
3. [Language Consistency - Match the User's Input Language]
- Tool returned content may contain language inconsistencies or mixed-language expressions
- Write the report in the primary language used by the simulation requirement and source material
- When quoting tool output that is in a different language, translate it into the report language before including it
- Preserve the original meaning when translating and keep the phrasing natural
- This rule applies to both regular text and quoted blocks (> format)

4. [Faithfully Present Prediction Results]
Expand Down Expand Up @@ -1456,7 +1456,7 @@ def _generate_section_react(
unused_tools = all_tools - used_tools
unused_hint = ""
if unused_tools and tool_calls_count < self.MAX_TOOL_CALLS_PER_SECTION:
unused_hint = REACT_UNUSED_TOOLS_HINT.format(unused_list="".join(unused_tools))
unused_hint = REACT_UNUSED_TOOLS_HINT.format(unused_list=", ".join(unused_tools))

messages.append({"role": "assistant", "content": response})
messages.append({
Expand Down Expand Up @@ -1961,13 +1961,14 @@ def _get_console_log_path(cls, report_id: str) -> str:
@classmethod
def get_console_log(cls, report_id: str, from_line: int = 0) -> Dict[str, Any]:
"""
Getconsolelogcontent

This isReportgenerateduring processconsoleoutputlog(INFO、WARNINGetc),
and agent_log.jsonl structured logsdifferent。
Get console log content.

This contains console output produced during report generation
(INFO, WARNING, etc.) and differs from the structured logs in
`agent_log.jsonl`.

Args:
report_id: ReportID
report_id: Report ID
from_line: from which rowrowStartRead(for incrementalGet,0 means from the beginningStart)

Returns:
Expand Down
46 changes: 23 additions & 23 deletions backend/app/services/simulation_config_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@

logger = get_logger('mirofish.simulation_config')

# Time zone configuration for Chinese work schedules (Beijing Time)
CHINA_TIMEZONE_CONFIG = {
# Default social activity rhythm configuration
DEFAULT_ACTIVITY_RHYTHM_CONFIG = {
# Dead hours (almost no activity)
"dead_hours": [0, 1, 2, 3, 4, 5],
# Morning hours (gradually waking up)
Expand Down Expand Up @@ -81,7 +81,7 @@ class AgentActivityConfig:

@dataclass
class TimeSimulationConfig:
"""Time simulation configuration (based on Chinese work schedule habits)"""
"""Time simulation configuration based on a generic social activity rhythm"""
# Total simulation time (simulation hours)
total_simulation_hours: int = 72 # Default 72 hours (3 days)

Expand All @@ -92,7 +92,7 @@ class TimeSimulationConfig:
agents_per_hour_min: int = 5
agents_per_hour_max: int = 20

# Peak hours (evening 19-22, most active time for Chinese people)
# Peak hours (evening 19-22 by default)
peak_hours: List[int] = field(default_factory=lambda: [19, 20, 21, 22])
peak_activity_multiplier: float = 1.5

Expand Down Expand Up @@ -546,17 +546,18 @@ def _generate_time_config(self, context: str, num_entities: int) -> Dict[str, An
## Task
Please generate time configuration JSON.

### Basic principles (for reference only, adjust flexibly based on event nature and participant characteristics):
- User base is Chinese people, must follow Beijing Time work schedule habits
- 0-5am almost no activity (activity coefficient 0.05)
- 6-8am gradually active (activity coefficient 0.4)
- 9-18 work time moderately active (activity coefficient 0.7)
- 19-22 evening is peak period (activity coefficient 1.5)
- After 23 activity decreases (activity coefficient 0.5)
- General rule: low activity early morning, gradually increasing morning, moderate work time, evening peak
- **Important**: Example values below are for reference only, adjust specific time periods based on event nature and participant characteristics
- Example: student peak may be 21-23; media active all day; official institutions only during work hours
- Example: breaking news may cause late night discussions, off_peak_hours can be shortened appropriately
### Basic principles (for reference only, adjust flexibly based on the event nature, participant characteristics, and implied locale in the source material):
- Do not assume any specific country, timezone, or culture unless the simulation requirement or source material clearly indicates one
- Use a realistic daily rhythm for the population being simulated
- 0-5am is often low activity (activity coefficient around 0.05), but adjust if the scenario suggests otherwise
- 6-8am is often a ramp-up period (activity coefficient around 0.4)
- 9-18 is often moderately active for workday-oriented populations (activity coefficient around 0.7)
- 19-22 is often an evening peak period (activity coefficient around 1.5)
- After 23 activity often decreases (activity coefficient around 0.5)
- General rule: low activity early morning, gradually increasing in the morning, moderate daytime activity, evening peak
- **Important**: Example values below are only defaults. Adjust specific time periods based on the event nature, participant characteristics, geography, and platform behavior implied by the inputs
- Example: students may peak later in the evening; media may stay active most of the day; official institutions may be concentrated in work hours
- Example: breaking news may cause late-night discussion, so off_peak_hours can be shortened appropriately

### Return JSON format (no markdown)

Expand Down Expand Up @@ -584,7 +585,7 @@ def _generate_time_config(self, context: str, num_entities: int) -> Dict[str, An
- work_hours (int array): Work hours
- reasoning (string): Brief explanation for this configuration"""

system_prompt = "You are a social media simulation expert. Return pure JSON format, time configuration must follow Chinese work schedule habits."
system_prompt = "You are a social media simulation expert. Return pure JSON format. Infer a realistic activity rhythm from the provided context, and do not assume any specific country or timezone unless the input clearly implies one."

try:
return self._call_llm_with_retry(prompt, system_prompt)
Expand All @@ -593,7 +594,7 @@ def _generate_time_config(self, context: str, num_entities: int) -> Dict[str, An
return self._get_default_time_config(num_entities)

def _get_default_time_config(self, num_entities: int) -> Dict[str, Any]:
"""Get default time configuration (Chinese work schedule)"""
"""Get the default time configuration"""
return {
"total_simulation_hours": 72,
"minutes_per_round": 60, # 1 hour per round, speed up time
Expand All @@ -603,7 +604,7 @@ def _get_default_time_config(self, num_entities: int) -> Dict[str, Any]:
"off_peak_hours": [0, 1, 2, 3, 4, 5],
"morning_hours": [6, 7, 8],
"work_hours": [9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
"reasoning": "Using default Chinese work schedule configuration (1 hour per round)"
"reasoning": "Using the default general-purpose social activity rhythm configuration (1 hour per round)"
}

def _parse_time_config(self, result: Dict[str, Any], num_entities: int) -> TimeSimulationConfig:
Expand Down Expand Up @@ -838,7 +839,7 @@ def _generate_agent_configs_batch(

## Task
Generate activity configuration for each entity, noting:
- **Time follows Chinese work schedule**: Almost no activity 0-5am, most active 19-22
- **Time should follow the likely audience rhythm implied by the inputs**: use the provided context to estimate low-activity and peak-activity hours instead of assuming a specific country
- **Official institutions** (University/GovernmentAgency): Low activity (0.1-0.3), active during work hours (9-17), slow response (60-240 min), high influence (2.5-3.0)
- **Media** (MediaOutlet): Medium activity (0.4-0.6), active all day (8-23), fast response (5-30 min), high influence (2.0-2.5)
- **Individuals** (Student/Person/Alumni): High activity (0.6-0.9), mainly evening activity (18-23), fast response (1-15 min), low influence (0.8-1.2)
Expand All @@ -852,7 +853,7 @@ def _generate_agent_configs_batch(
"activity_level": <0.0-1.0>,
"posts_per_hour": <posting frequency>,
"comments_per_hour": <comment frequency>,
"active_hours": [<active hours list, consider Chinese work schedule>],
"active_hours": [<active hours list, inferred from role, event type, and likely audience rhythm>],
"response_delay_min": <minimum response delay minutes>,
"response_delay_max": <maximum response delay minutes>,
"sentiment_bias": <-1.0 to 1.0>,
Expand All @@ -863,7 +864,7 @@ def _generate_agent_configs_batch(
]
}}"""

system_prompt = "You are a social media behavior analysis expert. Return pure JSON, configuration must follow Chinese work schedule habits."
system_prompt = "You are a social media behavior analysis expert. Return pure JSON. Infer activity schedules from the entities, event type, and provided context, and do not assume any specific country or timezone unless the input clearly implies one."

try:
result = self._call_llm_with_retry(prompt, system_prompt)
Expand Down Expand Up @@ -902,7 +903,7 @@ def _generate_agent_configs_batch(
return configs

def _generate_agent_config_by_rule(self, entity: EntityNode) -> Dict[str, Any]:
"""Generate single agent configuration based on rules (Chinese work schedule)"""
"""Generate a single agent configuration based on generic role-driven activity rules"""
entity_type = (entity.get_entity_type() or "Unknown").lower()

if entity_type in ["university", "governmentagency", "ngo"]:
Expand Down Expand Up @@ -984,4 +985,3 @@ def _generate_agent_config_by_rule(self, entity: EntityNode) -> Dict[str, Any]:
"influence_weight": 1.0
}


3 changes: 1 addition & 2 deletions backend/app/utils/file_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ def split_text_into_chunks(
# Try to split at sentence boundaries
if end < len(text):
# Find nearest sentence ending
for sep in ['', '', '', '.\n', '!\n', '?\n', '\n\n', '. ', '! ', '? ']:
for sep in ['\u3002', '\uFF01', '\uFF1F', '.\n', '!\n', '?\n', '\n\n', '. ', '! ', '? ']:
last_sep = text[start:end].rfind(sep)
if last_sep != -1 and last_sep > chunk_size * 0.3:
end = start + last_sep + len(sep)
Expand All @@ -186,4 +186,3 @@ def split_text_into_chunks(
start = end - overlap if end < len(text) else len(text)

return chunks

12 changes: 6 additions & 6 deletions backend/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,27 +9,27 @@ authors = [
]

dependencies = [
# 核心框架
# Core framework
"flask>=3.0.0",
"flask-cors>=6.0.0",

# LLM 相关
# LLM integrations
"openai>=1.0.0",

# Neo4j graph database driver
"neo4j>=5.15.0",

# OASIS 社交媒体模拟
# OASIS social media simulation
"camel-oasis==0.2.5",
"camel-ai==0.2.78",

# 文件处理
# File processing
"PyMuPDF>=1.24.0",
# 编码检测(支持非UTF-8编码的文本文件)
# Encoding detection (supports non-UTF-8 text files)
"charset-normalizer>=3.0.0",
"chardet>=5.0.0",

# 工具库
# Utility libraries
"python-dotenv>=1.0.0",
"pydantic>=2.0.0",
]
Expand Down
20 changes: 10 additions & 10 deletions backend/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,33 +5,33 @@
# Install: pip install -r requirements.txt
# ===========================================

# ============= 核心框架 =============
# ============= Core Framework =============
flask>=3.0.0
flask-cors>=6.0.0

# ============= LLM 相关 =============
# OpenAI SDK(统一使用 OpenAI 格式调用 LLM / Ollama
# ============= LLM Integrations =============
# OpenAI SDK (used with the unified OpenAI-style interface for LLM / Ollama)
openai>=1.0.0
# HTTP client for Ollama embedding API
requests>=2.28.0

# ============= Neo4j Graph Database =============
neo4j>=5.15.0

# ============= OASIS 社交媒体模拟 =============
# OASIS 社交模拟框架
# ============= OASIS Social Media Simulation =============
# OASIS social simulation framework
camel-oasis==0.2.5
camel-ai==0.2.78

# ============= 文件处理 =============
# ============= File Processing =============
PyMuPDF>=1.24.0
# 编码检测(支持非UTF-8编码的文本文件)
# Encoding detection (supports non-UTF-8 text files)
charset-normalizer>=3.0.0
chardet>=5.0.0

# ============= 工具库 =============
# 环境变量加载
# ============= Utility Libraries =============
# Environment variable loading
python-dotenv>=1.0.0

# 数据验证
# Data validation
pydantic>=2.0.0
4 changes: 2 additions & 2 deletions frontend/src/components/Step4Report.vue
Original file line number Diff line number Diff line change
Expand Up @@ -1286,8 +1286,8 @@ const InterviewDisplay = {
// Clean quote text - remove leading list numbers to avoid double numbering
const cleanQuoteText = (text) => {
if (!text) return ''
// Remove leading patterns like "1. ", "2. ", "1、", "(1)", "(1)" etc.
return text.replace(/^\s*\d+[\.\、\))]\s*/, '').trim()
// Remove leading patterns like "1. ", "2. ", "1,", "(1)", or full-width numbered variants.
return text.replace(/^\s*\d+[\.\u3001\)\uFF09]\s*/, '').trim()
}

const activeIndex = ref(0)
Expand Down
4 changes: 2 additions & 2 deletions frontend/src/views/Process.vue
Original file line number Diff line number Diff line change
Expand Up @@ -503,7 +503,7 @@ const formatDate = (dateStr) => {
if (!dateStr) return '-'
try {
const date = new Date(dateStr)
return date.toLocaleString('zh-CN', {
return date.toLocaleString('en-US', {
year: 'numeric',
month: 'short',
day: 'numeric',
Expand Down Expand Up @@ -2057,4 +2057,4 @@ onUnmounted(() => {
display: none;
}
}
</style>
</style>