From 834c0245ce35dec8d01ce57576d93eff5d48f734 Mon Sep 17 00:00:00 2001
From: bytecraftii <bytecraftii@users.noreply.github.com>
Date: Wed, 21 Jan 2026 17:27:46 -0800
Subject: [PATCH 01/18] fix task nonstopping error if model key is expired

---
 .env.development                          |   14 +-
 CONTRIBUTING.md                           |    4 +-
 backend/.pre-commit-config.yaml           |   87 ++
 backend/app/controller/chat_controller.py |  243 +++--
 backend/app/service/chat_service.py       | 1114 ++++++++++++++-------
 backend/app/service/error_handler.py      |   50 +
 backend/app/utils/agent.py                |  600 +++++------
 utils/traceroot_wrapper.py                |    3 +-
 8 files changed, 1349 insertions(+), 766 deletions(-)
 create mode 100644 backend/.pre-commit-config.yaml
 create mode 100644 backend/app/service/error_handler.py

diff --git a/.env.development b/.env.development
index 9d26a2f4f..f9436b1bf 100644
--- a/.env.development
+++ b/.env.development
@@ -1,11 +1,10 @@
 VITE_BASE_URL=/api
 
-VITE_PROXY_URL=https://dev.eigent.ai
+# VITE_PROXY_URL=https://dev.eigent.ai
+# VITE_USE_LOCAL_PROXY=false
 
-VITE_USE_LOCAL_PROXY=false
-
-# VITE_PROXY_URL=http://localhost:3001
-# VITE_USE_LOCAL_PROXY=true
+VITE_PROXY_URL=http://localhost:3001
+VITE_USE_LOCAL_PROXY=true
 
 TRACEROOT_TOKEN=your_traceroot_token_here
 
@@ -27,4 +26,7 @@ TRACEROOT_ENABLE_LOG_CONSOLE_EXPORT=false
 
 TRACEROOT_TRACER_VERBOSE=false
 
-TRACEROOT_LOGGER_VERBOSE=false
\ No newline at end of file
+TRACEROOT_LOGGER_VERBOSE=false
+
+# Disable OpenTelemetry SDK completely
+OTEL_SDK_DISABLED=true
\ No newline at end of file
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 8af8f38bc..50b0dd068 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -235,7 +235,9 @@ npm run dev
 
 # In a separate terminal, start the backend server
 cd eigent/server
-docker compose up
+docker compose up -d
+# Stream the logs if you needed
+docker compose logs -f
 ```
 
 To run the application locally in developer mode:
diff --git a/backend/.pre-commit-config.yaml b/backend/.pre-commit-config.yaml
new file mode 100644
index 000000000..ff741a850
--- /dev/null
+++ b/backend/.pre-commit-config.yaml
@@ -0,0 +1,87 @@
+ci:
+  # https://pre-commit.ci/#configuration
+  autofix_prs: true
+  autoupdate_commit_msg: '[pre-commit.ci] pre-commit suggestions'
+  autoupdate_schedule: monthly
+
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v6.0.0
+    hooks:
+      - id: no-commit-to-branch
+        name: No commits to master
+      - id: end-of-file-fixer
+        name: End-of-file fixer
+      - name: mixed-line-ending
+        id: mixed-line-ending
+        args: [--fix, lf]
+      - id: trailing-whitespace
+        name: Remove trailing whitespaces
+      - id: check-toml
+        name: Check toml
+      - id: check-yaml
+        name: Check yaml
+
+  - repo: https://github.com/adrienverge/yamllint.git
+    rev: v1.37.1
+    hooks:
+      - id: yamllint
+        name: Lint yaml
+        args: [-d, '{extends: default, rules: {line-length: disable, document-start: disable, truthy: {level: error}, braces: {max-spaces-inside: 1}}}']
+
+  - repo: https://github.com/asottile/pyupgrade
+    rev: v3.21.0
+    hooks:
+      - id: pyupgrade
+        name: Upgrade Python syntax
+        args: [--py38-plus]
+
+  - repo: https://github.com/PyCQA/autoflake
+    rev: v2.3.1
+    hooks:
+      - id: autoflake
+        name: Remove unused imports and variables
+        args: [
+          --remove-all-unused-imports,
+          --remove-unused-variables,
+          --remove-duplicate-keys,
+          --ignore-init-module-imports,
+          --in-place,
+        ]
+
+  - repo: https://github.com/google/yapf
+    rev: v0.43.0
+    hooks:
+      - id: yapf
+        name: Format code
+        additional_dependencies: [toml]
+
+  - repo: https://github.com/pycqa/isort
+    rev: 7.0.0
+    hooks:
+      - id: isort
+        name: Sort imports
+
+  - repo: https://github.com/PyCQA/flake8
+    rev: 7.3.0
+    hooks:
+      - id: flake8
+        name: Check PEP8
+        additional_dependencies: [Flake8-pyproject]
+
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.14.3
+    hooks:
+      - id: ruff
+        name: Ruff formatting
+        args: [--fix, --exit-non-zero-on-fix]
+
+  - repo: https://github.com/executablebooks/mdformat
+    rev: 0.7.22
+    hooks:
+      - id: mdformat
+        name: Format Markdown
+        additional_dependencies:
+          - mdformat-gfm
+          - mdformat_frontmatter
+          - mdformat_footnote
diff --git a/backend/app/controller/chat_controller.py b/backend/app/controller/chat_controller.py
index b3b8db9aa..83756df81 100644
--- a/backend/app/controller/chat_controller.py
+++ b/backend/app/controller/chat_controller.py
@@ -3,33 +3,24 @@
 import re
 import time
 from pathlib import Path
-from dotenv import load_dotenv
-from fastapi import APIRouter, HTTPException, Request, Response
-from fastapi.responses import StreamingResponse
-from utils import traceroot_wrapper as traceroot
+
 from app.component import code
+from app.component.environment import set_user_env_path
 from app.exception.exception import UserException
-from app.model.chat import Chat, HumanReply, McpServers, Status, SupplementChat, AddTaskRequest, sse_json
+from app.model.chat import (AddTaskRequest, Chat, HumanReply, McpServers,
+                            Status, SupplementChat, sse_json)
 from app.service.chat_service import step_solve
-from app.service.task import (
-    Action,
-    ActionImproveData,
-    ActionInstallMcpData,
-    ActionStopData,
-    ActionSupplementData,
-    ActionAddTaskData,
-    ActionRemoveTaskData,
-    ActionSkipTaskData,
-    get_or_create_task_lock,
-    get_task_lock,
-    set_current_task_id,
-    delete_task_lock,
-    task_locks,
-)
-from app.component.environment import set_user_env_path
-from app.utils.workforce import Workforce
-from camel.tasks.task import Task
+from app.service.task import (Action, ActionAddTaskData, ActionImproveData,
+                              ActionInstallMcpData, ActionRemoveTaskData,
+                              ActionSkipTaskData, ActionStopData,
+                              ActionSupplementData, delete_task_lock,
+                              get_or_create_task_lock, get_task_lock,
+                              set_current_task_id, task_locks)
+from dotenv import load_dotenv
+from fastapi import APIRouter, Request, Response
+from fastapi.responses import StreamingResponse
 
+from utils import traceroot_wrapper as traceroot
 
 router = APIRouter()
 
@@ -55,23 +46,30 @@ async def _cleanup_task_lock_safe(task_lock, reason: str) -> bool:
 
     # Check if task_lock still exists before attempting cleanup
     if task_lock.id not in task_locks:
-        chat_logger.debug(f"[{reason}] Task lock already removed, skipping cleanup",
-                         extra={"task_id": task_lock.id})
+        chat_logger.debug(
+            f"[{reason}] Task lock already removed, skipping cleanup",
+            extra={"task_id": task_lock.id})
         return False
 
     try:
         task_lock.status = Status.done
         await delete_task_lock(task_lock.id)
         chat_logger.info(f"[{reason}] Task lock cleanup completed",
-                        extra={"task_id": task_lock.id})
+                         extra={"task_id": task_lock.id})
         return True
     except Exception as e:
         chat_logger.error(f"[{reason}] Failed to cleanup task lock",
-                         extra={"task_id": task_lock.id, "error": str(e)}, exc_info=True)
+                          extra={
+                              "task_id": task_lock.id,
+                              "error": str(e)
+                          },
+                          exc_info=True)
         return False
 
 
-async def timeout_stream_wrapper(stream_generator, timeout_seconds: int = SSE_TIMEOUT_SECONDS, task_lock=None):
+async def timeout_stream_wrapper(stream_generator,
+                                 timeout_seconds: int = SSE_TIMEOUT_SECONDS,
+                                 task_lock=None):
     """Wraps a stream generator with timeout handling.
 
     Closes the SSE connection if no data is received within the timeout period.
@@ -87,26 +85,35 @@ async def timeout_stream_wrapper(stream_generator, timeout_seconds: int = SSE_TI
             remaining_timeout = timeout_seconds - elapsed
 
             try:
-                data = await asyncio.wait_for(generator.__anext__(), timeout=remaining_timeout)
+                data = await asyncio.wait_for(generator.__anext__(),
+                                              timeout=remaining_timeout)
                 last_data_time = time.time()
                 yield data
             except asyncio.TimeoutError:
-                chat_logger.warning("SSE timeout: No data received, closing connection",
-                                   extra={"timeout_seconds": timeout_seconds})
-                yield sse_json("error", {"message": f"Connection timeout: No data received for {timeout_seconds // 60} minutes"})
-                cleanup_triggered = await _cleanup_task_lock_safe(task_lock, "TIMEOUT")
+                chat_logger.warning(
+                    "SSE timeout: No data received, closing connection",
+                    extra={"timeout_seconds": timeout_seconds})
+                yield sse_json(
+                    "error", {
+                        "message":
+                        f"Connection timeout: No data received for {timeout_seconds // 60} minutes"
+                    })
+                cleanup_triggered = await _cleanup_task_lock_safe(
+                    task_lock, "TIMEOUT")
                 break
             except StopAsyncIteration:
                 break
 
     except asyncio.CancelledError:
-        chat_logger.info("[STREAM-CANCELLED] Stream cancelled, triggering cleanup")
+        chat_logger.info(
+            "[STREAM-CANCELLED] Stream cancelled, triggering cleanup")
         if not cleanup_triggered:
             await _cleanup_task_lock_safe(task_lock, "CANCELLED")
         raise
     except Exception as e:
         chat_logger.error("[STREAM-ERROR] Unexpected error in stream wrapper",
-                         extra={"error": str(e)}, exc_info=True)
+                          extra={"error": str(e)},
+                          exc_info=True)
         if not cleanup_triggered:
             await _cleanup_task_lock_safe(task_lock, "ERROR")
         raise
@@ -115,10 +122,12 @@ async def timeout_stream_wrapper(stream_generator, timeout_seconds: int = SSE_TI
 @router.post("/chat", name="start chat")
 @traceroot.trace()
 async def post(data: Chat, request: Request):
-    chat_logger.info(
-        "Starting new chat session",
-        extra={"project_id": data.project_id, "task_id": data.task_id, "user": data.email}
-    )
+    chat_logger.info("Starting new chat session",
+                     extra={
+                         "project_id": data.project_id,
+                         "task_id": data.task_id,
+                         "user": data.email
+                     })
 
     task_lock = get_or_create_task_lock(data.project_id)
 
@@ -128,8 +137,13 @@ async def post(data: Chat, request: Request):
 
     os.environ["file_save_path"] = data.file_save_path()
     os.environ["browser_port"] = str(data.browser_port)
-    os.environ["OPENAI_API_KEY"] = data.api_key
-    os.environ["OPENAI_API_BASE_URL"] = data.api_url or "https://api.openai.com/v1"
+    # TEMPORARY: Force fake API key to test error handling
+    fake_key = "sk-fake-invalid-key-for-testing-12345"
+    chat_logger.error(f"🔧 TESTING: Using fake API key: {fake_key}")
+    os.environ[
+        "OPENAI_API_KEY"] = fake_key  # TODO: Change back to data.api_key
+    os.environ[
+        "OPENAI_API_BASE_URL"] = data.api_url or "https://api.openai.com/v1"
     os.environ["CAMEL_MODEL_LOG_ENABLED"] = "true"
 
     # Set user-specific search engine configuration if provided
@@ -137,17 +151,14 @@ async def post(data: Chat, request: Request):
         for key, value in data.search_config.items():
             if value:
                 os.environ[key] = value
-                chat_logger.debug(f"Set search config: {key}", extra={"project_id": data.project_id})
-
-    email_sanitized = re.sub(r'[\\/*?:"<>|\s]', "_", data.email.split("@")[0]).strip(".")
-    camel_log = (
-        Path.home()
-        / ".eigent"
-        / email_sanitized
-        / ("project_" + data.project_id)
-        / ("task_" + data.task_id)
-        / "camel_logs"
-    )
+                chat_logger.debug(f"Set search config: {key}",
+                                  extra={"project_id": data.project_id})
+
+    email_sanitized = re.sub(r'[\\/*?:"<>|\s]', "_",
+                             data.email.split("@")[0]).strip(".")
+    camel_log = (Path.home() / ".eigent" / email_sanitized /
+                 ("project_" + data.project_id) / ("task_" + data.task_id) /
+                 "camel_logs")
     camel_log.mkdir(parents=True, exist_ok=True)
 
     os.environ["CAMEL_LOG_DIR"] = str(camel_log)
@@ -159,21 +170,31 @@ async def post(data: Chat, request: Request):
     set_current_task_id(data.project_id, data.task_id)
 
     # Put initial action in queue to start processing
-    await task_lock.put_queue(ActionImproveData(data=data.question, new_task_id=data.task_id))
+    await task_lock.put_queue(
+        ActionImproveData(data=data.question, new_task_id=data.task_id))
 
     chat_logger.info(
         "Chat session initialized",
-        extra={"project_id": data.project_id, "task_id": data.task_id, "log_dir": str(camel_log)},
-    )
-    return StreamingResponse(
-        timeout_stream_wrapper(step_solve(data, request, task_lock), task_lock=task_lock), media_type="text/event-stream"
+        extra={
+            "project_id": data.project_id,
+            "task_id": data.task_id,
+            "log_dir": str(camel_log)
+        },
     )
+    return StreamingResponse(timeout_stream_wrapper(step_solve(
+        data, request, task_lock),
+                                                    task_lock=task_lock),
+                             media_type="text/event-stream")
 
 
 @router.post("/chat/{id}", name="improve chat")
 @traceroot.trace()
 def improve(id: str, data: SupplementChat):
-    chat_logger.info("Chat improvement requested", extra={"task_id": id, "question_length": len(data.question)})
+    chat_logger.info("Chat improvement requested",
+                     extra={
+                         "task_id": id,
+                         "question_length": len(data.question)
+                     })
     task_lock = get_task_lock(id)
 
     # Allow continuing conversation even after task is done
@@ -188,9 +209,13 @@ def improve(id: str, data: SupplementChat):
 
         # Log context preservation
         if hasattr(task_lock, "conversation_history"):
-            chat_logger.info(f"[CONTEXT] Preserved {len(task_lock.conversation_history)} conversation entries")
+            chat_logger.info(
+                f"[CONTEXT] Preserved {len(task_lock.conversation_history)} conversation entries"
+            )
         if hasattr(task_lock, "last_task_result"):
-            chat_logger.info(f"[CONTEXT] Preserved task result: {len(task_lock.last_task_result)} chars")
+            chat_logger.info(
+                f"[CONTEXT] Preserved task result: {len(task_lock.last_task_result)} chars"
+            )
 
     # If task_id is provided, optimistically update file_save_path (will be destroyed if task is not complex)
     # this is because a NEW workforce instance may be created for this task
@@ -212,21 +237,30 @@ def improve(id: str, data: SupplementChat):
             # If we have the necessary information, update the file_save_path
             if current_email and id:
                 # Create new path using the existing pattern: email/project_{project_id}/task_{task_id}
-                new_folder_path = Path.home() / "eigent" / current_email / f"project_{id}" / f"task_{data.task_id}"
+                new_folder_path = Path.home(
+                ) / "eigent" / current_email / f"project_{id}" / f"task_{data.task_id}"
                 new_folder_path.mkdir(parents=True, exist_ok=True)
                 os.environ["file_save_path"] = str(new_folder_path)
-                chat_logger.info(f"Updated file_save_path to: {new_folder_path}")
+                chat_logger.info(
+                    f"Updated file_save_path to: {new_folder_path}")
 
                 # Store the new folder path in task_lock for potential cleanup and persistence
                 task_lock.new_folder_path = new_folder_path
             else:
-                chat_logger.warning(f"Could not update file_save_path - email: {current_email}, project_id: {id}")
+                chat_logger.warning(
+                    f"Could not update file_save_path - email: {current_email}, project_id: {id}"
+                )
 
         except Exception as e:
-            chat_logger.error(f"Error updating file path for project_id: {id}, task_id: {data.task_id}: {e}")
-
-    asyncio.run(task_lock.put_queue(ActionImproveData(data=data.question, new_task_id=data.task_id)))
-    chat_logger.info("Improvement request queued with preserved context", extra={"project_id": id})
+            chat_logger.error(
+                f"Error updating file path for project_id: {id}, task_id: {data.task_id}: {e}"
+            )
+
+    asyncio.run(
+        task_lock.put_queue(
+            ActionImproveData(data=data.question, new_task_id=data.task_id)))
+    chat_logger.info("Improvement request queued with preserved context",
+                     extra={"project_id": id})
     return Response(status_code=201)
 
 
@@ -247,25 +281,38 @@ def supplement(id: str, data: SupplementChat):
 def stop(id: str):
     """stop the task"""
     chat_logger.info("=" * 80)
-    chat_logger.info("🛑 [STOP-BUTTON] DELETE /chat/{id} request received from frontend")
+    chat_logger.info(
+        "🛑 [STOP-BUTTON] DELETE /chat/{id} request received from frontend")
     chat_logger.info(f"[STOP-BUTTON] project_id/task_id: {id}")
     chat_logger.info("=" * 80)
     try:
         task_lock = get_task_lock(id)
-        chat_logger.info(f"[STOP-BUTTON] Task lock retrieved, task_lock.id: {task_lock.id}, task_lock.status: {task_lock.status}")
-        chat_logger.info(f"[STOP-BUTTON] Queueing ActionStopData(Action.stop) to task_lock queue")
+        chat_logger.info(
+            f"[STOP-BUTTON] Task lock retrieved, task_lock.id: {task_lock.id}, task_lock.status: {task_lock.status}"
+        )
+        chat_logger.info(
+            "[STOP-BUTTON] Queueing ActionStopData(Action.stop) to task_lock queue"
+        )
         asyncio.run(task_lock.put_queue(ActionStopData(action=Action.stop)))
-        chat_logger.info(f"[STOP-BUTTON] ✅ ActionStopData queued successfully, this will trigger workforce.stop_gracefully()")
+        chat_logger.info(
+            "[STOP-BUTTON] ✅ ActionStopData queued successfully, this will trigger workforce.stop_gracefully()"
+        )
     except Exception as e:
         # Task lock may not exist if task is already finished or never started
-        chat_logger.warning(f"[STOP-BUTTON] ⚠️  Task lock not found or already stopped, task_id: {id}, error: {str(e)}")
+        chat_logger.warning(
+            f"[STOP-BUTTON] ⚠️  Task lock not found or already stopped, task_id: {id}, error: {str(e)}"
+        )
     return Response(status_code=204)
 
 
 @router.post("/chat/{id}/human-reply")
 @traceroot.trace()
 def human_reply(id: str, data: HumanReply):
-    chat_logger.info("Human reply received", extra={"task_id": id, "reply_length": len(data.reply)})
+    chat_logger.info("Human reply received",
+                     extra={
+                         "task_id": id,
+                         "reply_length": len(data.reply)
+                     })
     task_lock = get_task_lock(id)
     asyncio.run(task_lock.put_human_input(data.agent, data.reply))
     chat_logger.debug("Human reply processed", extra={"task_id": id})
@@ -275,9 +322,15 @@ def human_reply(id: str, data: HumanReply):
 @router.post("/chat/{id}/install-mcp")
 @traceroot.trace()
 def install_mcp(id: str, data: McpServers):
-    chat_logger.info("Installing MCP servers", extra={"task_id": id, "servers_count": len(data.get("mcpServers", {}))})
+    chat_logger.info("Installing MCP servers",
+                     extra={
+                         "task_id": id,
+                         "servers_count": len(data.get("mcpServers", {}))
+                     })
     task_lock = get_task_lock(id)
-    asyncio.run(task_lock.put_queue(ActionInstallMcpData(action=Action.install_mcp, data=data)))
+    asyncio.run(
+        task_lock.put_queue(
+            ActionInstallMcpData(action=Action.install_mcp, data=data)))
     chat_logger.info("MCP installation queued", extra={"task_id": id})
     return Response(status_code=201)
 
@@ -286,7 +339,9 @@ def install_mcp(id: str, data: McpServers):
 @traceroot.trace()
 def add_task(id: str, data: AddTaskRequest):
     """Add a new task to the workforce"""
-    chat_logger.info(f"Adding task to workforce for task_id: {id}, content: {data.content[:100]}...")
+    chat_logger.info(
+        f"Adding task to workforce for task_id: {id}, content: {data.content[:100]}..."
+    )
     task_lock = get_task_lock(id)
 
     try:
@@ -306,23 +361,29 @@ def add_task(id: str, data: AddTaskRequest):
         raise UserException(code.error, f"Failed to add task: {str(e)}")
 
 
-@router.delete("/chat/{project_id}/remove-task/{task_id}", name="remove task from workforce")
+@router.delete("/chat/{project_id}/remove-task/{task_id}",
+               name="remove task from workforce")
 @traceroot.trace()
 def remove_task(project_id: str, task_id: str):
     """Remove a task from the workforce"""
-    chat_logger.info(f"Removing task {task_id} from workforce for project_id: {project_id}")
+    chat_logger.info(
+        f"Removing task {task_id} from workforce for project_id: {project_id}")
     task_lock = get_task_lock(project_id)
 
     try:
         # Queue the remove task action
-        remove_task_action = ActionRemoveTaskData(task_id=task_id, project_id=project_id)
+        remove_task_action = ActionRemoveTaskData(task_id=task_id,
+                                                  project_id=project_id)
         asyncio.run(task_lock.put_queue(remove_task_action))
 
-        chat_logger.info(f"Task removal request queued for project_id: {project_id}, removing task: {task_id}")
+        chat_logger.info(
+            f"Task removal request queued for project_id: {project_id}, removing task: {task_id}"
+        )
         return Response(status_code=204)
 
     except Exception as e:
-        chat_logger.error(f"Error removing task {task_id} for project_id: {project_id}: {e}")
+        chat_logger.error(
+            f"Error removing task {task_id} for project_id: {project_id}: {e}")
         raise UserException(code.error, f"Failed to remove task: {str(e)}")
 
 
@@ -341,21 +402,31 @@ def skip_task(project_id: str):
     - Keeps SSE connection alive for multi-turn conversation
     """
     chat_logger.info("=" * 80)
-    chat_logger.info(f"🛑 [STOP-BUTTON] SKIP-TASK request received from frontend (User clicked Stop)")
+    chat_logger.info(
+        "🛑 [STOP-BUTTON] SKIP-TASK request received from frontend (User clicked Stop)"
+    )
     chat_logger.info(f"[STOP-BUTTON] project_id: {project_id}")
     chat_logger.info("=" * 80)
     task_lock = get_task_lock(project_id)
-    chat_logger.info(f"[STOP-BUTTON] Task lock retrieved, task_lock.id: {task_lock.id}, task_lock.status: {task_lock.status}")
+    chat_logger.info(
+        f"[STOP-BUTTON] Task lock retrieved, task_lock.id: {task_lock.id}, task_lock.status: {task_lock.status}"
+    )
 
     try:
         # Queue the skip task action - this will preserve context for multi-turn
         skip_task_action = ActionSkipTaskData(project_id=project_id)
-        chat_logger.info(f"[STOP-BUTTON] Queueing ActionSkipTaskData (preserves context, marks as done)")
+        chat_logger.info(
+            "[STOP-BUTTON] Queueing ActionSkipTaskData (preserves context, marks as done)"
+        )
         asyncio.run(task_lock.put_queue(skip_task_action))
 
-        chat_logger.info(f"[STOP-BUTTON] ✅ Skip request queued - task will stop gracefully and preserve context")
+        chat_logger.info(
+            "[STOP-BUTTON] ✅ Skip request queued - task will stop gracefully and preserve context"
+        )
         return Response(status_code=201)
 
     except Exception as e:
-        chat_logger.error(f"[STOP-BUTTON] Error skipping task for project_id: {project_id}: {e}")
+        chat_logger.error(
+            f"[STOP-BUTTON] Error skipping task for project_id: {project_id}: {e}"
+        )
         raise UserException(code.error, f"Failed to skip task: {str(e)}")
diff --git a/backend/app/service/chat_service.py b/backend/app/service/chat_service.py
index 04accf8bf..146052621 100644
--- a/backend/app/service/chat_service.py
+++ b/backend/app/service/chat_service.py
@@ -1,58 +1,42 @@
 import asyncio
 import datetime
-import json
-from pathlib import Path
+import os
 import platform
-from typing import Any, Literal
-from fastapi import Request
-from inflection import titleize
-from pydash import chain
-from app.component.debug import dump_class
-from app.component.environment import env
+from pathlib import Path
+from typing import Any
+
+from app.model.chat import Chat, NewAgent, Status, TaskContent, sse_json
+from app.service.error_handler import prepare_model_error_response
+from app.service.task import (Action, ActionDecomposeProgressData,
+                              ActionDecomposeTextData, ActionImproveData,
+                              ActionInstallMcpData, ActionNewAgent, Agents,
+                              TaskLock, delete_task_lock, set_current_task_id)
+from app.utils.agent import (ListenChatAgent, agent_model, browser_agent,
+                             developer_agent, document_agent, get_mcp_tools,
+                             get_toolkits, mcp_agent, multi_modal_agent,
+                             question_confirm_agent, set_main_event_loop,
+                             task_summary_agent)
 from app.utils.file_utils import get_working_directory
-from app.service.task import (
-    ActionImproveData,
-    ActionInstallMcpData,
-    ActionNewAgent,
-    ActionTimeoutData,
-    TaskLock,
-    delete_task_lock,
-    set_current_task_id,
-    ActionDecomposeProgressData,
-    ActionDecomposeTextData,
-)
-from camel.toolkits import AgentCommunicationToolkit, ToolkitMessageIntegration
+from app.utils.server.sync_step import sync_step
 from app.utils.toolkit.human_toolkit import HumanToolkit
 from app.utils.toolkit.note_taking_toolkit import NoteTakingToolkit
 from app.utils.workforce import Workforce
-from app.model.chat import Chat, NewAgent, Status, sse_json, TaskContent
+from camel.models import ModelProcessingError
 from camel.tasks import Task
-from app.utils.agent import (
-    ListenChatAgent,
-    agent_model,
-    get_mcp_tools,
-    get_toolkits,
-    mcp_agent,
-    developer_agent,
-    document_agent,
-    multi_modal_agent,
-    browser_agent,
-    social_medium_agent,
-    task_summary_agent,
-    question_confirm_agent,
-    set_main_event_loop,
-)
-from app.service.task import Action, Agents
-from app.utils.server.sync_step import sync_step
+from camel.toolkits import ToolkitMessageIntegration
 from camel.types import ModelPlatformType
-from camel.models import ModelProcessingError
+from fastapi import Request
+from inflection import titleize
+from pydash import chain
+
 from utils import traceroot_wrapper as traceroot
-import os
 
 logger = traceroot.get_logger("chat_service")
 
 
-def format_task_context(task_data: dict, seen_files: set | None = None, skip_files: bool = False) -> str:
+def format_task_context(task_data: dict,
+                        seen_files: set | None = None,
+                        skip_files: bool = False) -> str:
     """Format structured task data into a readable context string.
 
     Args:
@@ -66,7 +50,8 @@ def format_task_context(task_data: dict, seen_files: set | None = None, skip_fil
         context_parts.append(f"Previous Task: {task_data['task_content']}")
 
     if task_data.get('task_result'):
-        context_parts.append(f"Previous Task Result: {task_data['task_result']}")
+        context_parts.append(
+            f"Previous Task Result: {task_data['task_result']}")
 
     # Skip file listing if requested
     if not skip_files:
@@ -76,9 +61,13 @@ def format_task_context(task_data: dict, seen_files: set | None = None, skip_fil
                 if os.path.exists(working_directory):
                     generated_files = []
                     for root, dirs, files in os.walk(working_directory):
-                        dirs[:] = [d for d in dirs if not d.startswith('.') and d not in ['node_modules', '__pycache__', 'venv']]
+                        dirs[:] = [
+                            d for d in dirs if not d.startswith('.') and d
+                            not in ['node_modules', '__pycache__', 'venv']
+                        ]
                         for file in files:
-                            if not file.startswith('.') and not file.endswith(('.pyc', '.tmp')):
+                            if not file.startswith('.') and not file.endswith(
+                                ('.pyc', '.tmp')):
                                 file_path = os.path.join(root, file)
                                 absolute_path = os.path.abspath(file_path)
 
@@ -89,7 +78,8 @@ def format_task_context(task_data: dict, seen_files: set | None = None, skip_fil
                                         seen_files.add(absolute_path)
 
                     if generated_files:
-                        context_parts.append("Generated Files from Previous Task:")
+                        context_parts.append(
+                            "Generated Files from Previous Task:")
                         for file_path in sorted(generated_files):
                             context_parts.append(f"  - {file_path}")
             except Exception as e:
@@ -98,7 +88,10 @@ def format_task_context(task_data: dict, seen_files: set | None = None, skip_fil
     return "\n".join(context_parts)
 
 
-def collect_previous_task_context(working_directory: str, previous_task_content: str, previous_task_result: str, previous_summary: str = "") -> str:
+def collect_previous_task_context(working_directory: str,
+                                  previous_task_content: str,
+                                  previous_task_result: str,
+                                  previous_summary: str = "") -> str:
     """
     Collect context from previous task including content, result, summary, and generated files.
 
@@ -127,16 +120,21 @@ def collect_previous_task_context(working_directory: str, previous_task_content:
 
     # Add previous task result
     if previous_task_result:
-        context_parts.append(f"Previous Task Result:\n{previous_task_result}\n")
+        context_parts.append(
+            f"Previous Task Result:\n{previous_task_result}\n")
 
     # Collect generated files from working directory
     try:
         if os.path.exists(working_directory):
             generated_files = []
             for root, dirs, files in os.walk(working_directory):
-                dirs[:] = [d for d in dirs if not d.startswith('.') and d not in ['node_modules', '__pycache__', 'venv']]
+                dirs[:] = [
+                    d for d in dirs if not d.startswith('.')
+                    and d not in ['node_modules', '__pycache__', 'venv']
+                ]
                 for file in files:
-                    if not file.startswith('.') and not file.endswith(('.pyc', '.tmp')):
+                    if not file.startswith('.') and not file.endswith(
+                        ('.pyc', '.tmp')):
                         file_path = os.path.join(root, file)
                         absolute_path = os.path.abspath(file_path)
                         generated_files.append(absolute_path)
@@ -154,14 +152,18 @@ def collect_previous_task_context(working_directory: str, previous_task_content:
     return "\n".join(context_parts)
 
 
-def check_conversation_history_length(task_lock: TaskLock, max_length: int = 200000) -> tuple[bool, int]:
+def check_conversation_history_length(task_lock: TaskLock,
+                                      max_length: int = 200000
+                                      ) -> tuple[bool, int]:
     """
     Check if conversation history exceeds maximum length
 
     Returns:
         tuple: (is_exceeded, total_length)
     """
-    if not hasattr(task_lock, 'conversation_history') or not task_lock.conversation_history:
+    if not hasattr(
+            task_lock,
+            'conversation_history') or not task_lock.conversation_history:
         return False, 0
 
     total_length = 0
@@ -171,12 +173,16 @@ def check_conversation_history_length(task_lock: TaskLock, max_length: int = 200
     is_exceeded = total_length > max_length
 
     if is_exceeded:
-        logger.warning(f"Conversation history length {total_length} exceeds maximum {max_length}")
+        logger.warning(
+            f"Conversation history length {total_length} exceeds maximum {max_length}"
+        )
 
     return is_exceeded, total_length
 
 
-def build_conversation_context(task_lock: TaskLock, header: str = "=== CONVERSATION HISTORY ===") -> str:
+def build_conversation_context(
+        task_lock: TaskLock,
+        header: str = "=== CONVERSATION HISTORY ===") -> str:
     """Build conversation context from task_lock history with files listed only once at the end.
 
     Args:
@@ -195,10 +201,12 @@ def build_conversation_context(task_lock: TaskLock, header: str = "=== CONVERSAT
         for entry in task_lock.conversation_history:
             if entry['role'] == 'task_result':
                 if isinstance(entry['content'], dict):
-                    formatted_context = format_task_context(entry['content'], skip_files=True)
+                    formatted_context = format_task_context(entry['content'],
+                                                            skip_files=True)
                     context += formatted_context + "\n\n"
                     if entry['content'].get('working_directory'):
-                        working_directories.add(entry['content']['working_directory'])
+                        working_directories.add(
+                            entry['content']['working_directory'])
                 else:
                     context += entry['content'] + "\n"
             elif entry['role'] == 'assistant':
@@ -210,14 +218,21 @@ def build_conversation_context(task_lock: TaskLock, header: str = "=== CONVERSAT
                 try:
                     if os.path.exists(working_directory):
                         for root, dirs, files in os.walk(working_directory):
-                            dirs[:] = [d for d in dirs if not d.startswith('.') and d not in ['node_modules', '__pycache__', 'venv']]
+                            dirs[:] = [
+                                d for d in dirs if not d.startswith('.') and d
+                                not in ['node_modules', '__pycache__', 'venv']
+                            ]
                             for file in files:
-                                if not file.startswith('.') and not file.endswith(('.pyc', '.tmp')):
+                                if not file.startswith(
+                                        '.') and not file.endswith(
+                                            ('.pyc', '.tmp')):
                                     file_path = os.path.join(root, file)
                                     absolute_path = os.path.abspath(file_path)
                                     all_generated_files.add(absolute_path)
                 except Exception as e:
-                    logger.warning(f"Failed to collect generated files from {working_directory}: {e}")
+                    logger.warning(
+                        f"Failed to collect generated files from {working_directory}: {e}"
+                    )
 
             if all_generated_files:
                 context += "Generated Files from Previous Tasks:\n"
@@ -232,7 +247,8 @@ def build_conversation_context(task_lock: TaskLock, header: str = "=== CONVERSAT
 
 def build_context_for_workforce(task_lock: TaskLock, options: Chat) -> str:
     """Build context information for workforce."""
-    return build_conversation_context(task_lock, header="=== CONVERSATION HISTORY ===")
+    return build_conversation_context(task_lock,
+                                      header="=== CONVERSATION HISTORY ===")
 
 
 @sync_step
@@ -254,7 +270,9 @@ async def step_solve(options: Chat, request: Request, task_lock: TaskLock):
     if task_lock.question_agent is None:
         task_lock.question_agent = question_confirm_agent(options)
     else:
-        logger.debug(f"Reusing existing question_agent with {len(task_lock.conversation_history)} history entries")
+        logger.debug(
+            f"Reusing existing question_agent with {len(task_lock.conversation_history)} history entries"
+        )
 
     question_agent = task_lock.question_agent
 
@@ -269,47 +287,83 @@ async def step_solve(options: Chat, request: Request, task_lock: TaskLock):
     sub_tasks: list[Task] = []
 
     logger.info("=" * 80)
-    logger.info("🚀 [LIFECYCLE] step_solve STARTED", extra={"project_id": options.project_id, "task_id": options.task_id})
+    logger.info("🚀 [LIFECYCLE] step_solve STARTED",
+                extra={
+                    "project_id": options.project_id,
+                    "task_id": options.task_id
+                })
     logger.info("=" * 80)
-    logger.debug("Step solve options", extra={"task_id": options.task_id, "model_platform": options.model_platform})
+    logger.debug("Step solve options",
+                 extra={
+                     "task_id": options.task_id,
+                     "model_platform": options.model_platform
+                 })
 
     while True:
         loop_iteration += 1
-        logger.debug(f"[LIFECYCLE] step_solve loop iteration #{loop_iteration}", extra={"project_id": options.project_id, "task_id": options.task_id})
+        logger.debug(
+            f"[LIFECYCLE] step_solve loop iteration #{loop_iteration}",
+            extra={
+                "project_id": options.project_id,
+                "task_id": options.task_id
+            })
 
         if await request.is_disconnected():
             logger.warning("=" * 80)
-            logger.warning(f"⚠️  [LIFECYCLE] CLIENT DISCONNECTED for project {options.project_id}")
+            logger.warning(
+                f"⚠️  [LIFECYCLE] CLIENT DISCONNECTED for project {options.project_id}"
+            )
             logger.warning("=" * 80)
             if workforce is not None:
-                logger.info(f"[LIFECYCLE] Stopping workforce due to client disconnect, workforce._running={workforce._running}")
+                logger.info(
+                    f"[LIFECYCLE] Stopping workforce due to client disconnect, workforce._running={workforce._running}"
+                )
                 if workforce._running:
                     workforce.stop()
                 workforce.stop_gracefully()
-                logger.info(f"[LIFECYCLE] Workforce stopped after client disconnect")
+                logger.info(
+                    "[LIFECYCLE] Workforce stopped after client disconnect")
             else:
-                logger.info(f"[LIFECYCLE] Workforce is None, no need to stop")
+                logger.info("[LIFECYCLE] Workforce is None, no need to stop")
             task_lock.status = Status.done
             try:
                 await delete_task_lock(task_lock.id)
-                logger.info(f"[LIFECYCLE] Task lock deleted after client disconnect")
+                logger.info(
+                    "[LIFECYCLE] Task lock deleted after client disconnect")
             except Exception as e:
                 logger.error(f"Error deleting task lock on disconnect: {e}")
-            logger.info(f"[LIFECYCLE] Breaking out of step_solve loop due to client disconnect")
+            logger.info(
+                "[LIFECYCLE] Breaking out of step_solve loop due to client disconnect"
+            )
             break
         try:
             item = await task_lock.get_queue()
         except Exception as e:
-            logger.error("Error getting item from queue", extra={"project_id": options.project_id, "task_id": options.task_id, "error": str(e)}, exc_info=True)
+            logger.error("Error getting item from queue",
+                         extra={
+                             "project_id": options.project_id,
+                             "task_id": options.task_id,
+                             "error": str(e)
+                         },
+                         exc_info=True)
             # Continue waiting instead of breaking on queue error
             continue
 
         try:
             if item.action == Action.improve or start_event_loop:
                 logger.info("=" * 80)
-                logger.info(f"💬 [NEW-QUESTION] Action.improve received or start_event_loop", extra={"project_id": options.project_id, "start_event_loop": start_event_loop})
-                logger.info(f"[NEW-QUESTION] Current workforce state: workforce={'None' if workforce is None else f'exists(id={id(workforce)})'}")
-                logger.info(f"[NEW-QUESTION] Current camel_task state: camel_task={'None' if camel_task is None else f'exists(id={camel_task.id})'}")
+                logger.info(
+                    "💬 [NEW-QUESTION] Action.improve received or start_event_loop",
+                    extra={
+                        "project_id": options.project_id,
+                        "start_event_loop": start_event_loop
+                    })
+                logger.info(
+                    f"[NEW-QUESTION] Current workforce state: workforce={'None' if workforce is None else f'exists(id={id(workforce)})'}"
+                )
+                logger.info(
+                    f"[NEW-QUESTION] Current camel_task state: camel_task={'None' if camel_task is None else f'exists(id={camel_task.id})'}"
+                )
                 logger.info("=" * 80)
                 # from viztracer import VizTracer
 
@@ -317,114 +371,175 @@ async def step_solve(options: Chat, request: Request, task_lock: TaskLock):
                 # tracer.start()
                 if start_event_loop is True:
                     question = options.question
-                    logger.info(f"[NEW-QUESTION] Initial question from options.question: '{question[:100]}...'")
+                    logger.info(
+                        f"[NEW-QUESTION] Initial question from options.question: '{question[:100]}...'"
+                    )
                     start_event_loop = False
                 else:
                     assert isinstance(item, ActionImproveData)
                     question = item.data
-                    logger.info(f"[NEW-QUESTION] Follow-up question from ActionImproveData: '{question[:100]}...'")
+                    logger.info(
+                        f"[NEW-QUESTION] Follow-up question from ActionImproveData: '{question[:100]}...'"
+                    )
 
-                is_exceeded, total_length = check_conversation_history_length(task_lock)
+                is_exceeded, total_length = check_conversation_history_length(
+                    task_lock)
                 if is_exceeded:
-                    logger.error("Conversation history too long", extra={"project_id": options.project_id, "current_length": total_length, "max_length": 100000})
-                    yield sse_json("context_too_long", {
-                        "message": "The conversation history is too long. Please create a new project to continue.",
-                        "current_length": total_length,
-                        "max_length": 100000
-                    })
+                    logger.error("Conversation history too long",
+                                 extra={
+                                     "project_id": options.project_id,
+                                     "current_length": total_length,
+                                     "max_length": 100000
+                                 })
+                    yield sse_json(
+                        "context_too_long", {
+                            "message":
+                            "The conversation history is too long. Please create a new project to continue.",
+                            "current_length": total_length,
+                            "max_length": 100000
+                        })
                     continue
 
                 # Determine task complexity: attachments mean workforce, otherwise let agent decide
                 is_complex_task: bool
                 if len(options.attaches) > 0:
                     is_complex_task = True
-                    logger.info(f"[NEW-QUESTION] Has attachments, treating as complex task")
+                    logger.info(
+                        "[NEW-QUESTION] Has attachments, treating as complex task"
+                    )
                 else:
-                    is_complex_task = await question_confirm(question_agent, question, task_lock)
-                    logger.info(f"[NEW-QUESTION] question_confirm result: is_complex={is_complex_task}")
+                    is_complex_task = await question_confirm(
+                        question_agent, question, task_lock)
+                    logger.info(
+                        f"[NEW-QUESTION] question_confirm result: is_complex={is_complex_task}"
+                    )
 
                 if not is_complex_task:
-                    logger.info(f"[NEW-QUESTION] ✅ Simple question, providing direct answer without workforce")
+                    logger.info(
+                        "[NEW-QUESTION] ✅ Simple question, providing direct answer without workforce"
+                    )
                     simple_answer_prompt = f"{build_conversation_context(task_lock, header='=== Previous Conversation ===')}User Query: {question}\n\nProvide a direct, helpful answer to this simple question."
 
                     try:
                         simple_resp = question_agent.step(simple_answer_prompt)
-                        answer_content = simple_resp.msgs[0].content if simple_resp and simple_resp.msgs else "I understand your question, but I'm having trouble generating a response right now."
+                        answer_content = simple_resp.msgs[
+                            0].content if simple_resp and simple_resp.msgs else "I understand your question, but I'm having trouble generating a response right now."
 
                         task_lock.add_conversation('assistant', answer_content)
 
-                        yield sse_json("wait_confirm", {"content": answer_content, "question": question})
+                        yield sse_json("wait_confirm", {
+                            "content": answer_content,
+                            "question": question
+                        })
                     except Exception as e:
                         logger.error(f"Error generating simple answer: {e}")
-                        yield sse_json("wait_confirm", {"content": "I encountered an error while processing your question.", "question": question})
+                        yield sse_json(
+                            "wait_confirm", {
+                                "content":
+                                "I encountered an error while processing your question.",
+                                "question": question
+                            })
 
                     # Clean up empty folder if it was created for this task
-                    if hasattr(task_lock, 'new_folder_path') and task_lock.new_folder_path:
+                    if hasattr(
+                            task_lock,
+                            'new_folder_path') and task_lock.new_folder_path:
                         try:
                             folder_path = Path(task_lock.new_folder_path)
                             if folder_path.exists() and folder_path.is_dir():
                                 # Check if folder is empty
                                 if not any(folder_path.iterdir()):
                                     folder_path.rmdir()
-                                    logger.info(f"Cleaned up empty folder: {folder_path}")
+                                    logger.info(
+                                        f"Cleaned up empty folder: {folder_path}"
+                                    )
                                     # Also clean up parent project folder if it becomes empty
                                     project_folder = folder_path.parent
-                                    if project_folder.exists() and not any(project_folder.iterdir()):
+                                    if project_folder.exists() and not any(
+                                            project_folder.iterdir()):
                                         project_folder.rmdir()
-                                        logger.info(f"Cleaned up empty project folder: {project_folder}")
+                                        logger.info(
+                                            f"Cleaned up empty project folder: {project_folder}"
+                                        )
                                 else:
-                                    logger.info(f"Folder not empty, keeping: {folder_path}")
+                                    logger.info(
+                                        f"Folder not empty, keeping: {folder_path}"
+                                    )
                             # Reset the folder path
                             task_lock.new_folder_path = None
                         except Exception as e:
                             logger.error(f"Error cleaning up folder: {e}")
                 else:
-                    logger.info(f"[NEW-QUESTION] Complex task, creating workforce and decomposing")
+                    logger.info(
+                        "[NEW-QUESTION] Complex task, creating workforce and decomposing"
+                    )
                     # Update the sync_step with new task_id
                     if hasattr(item, 'new_task_id') and item.new_task_id:
-                        set_current_task_id(options.project_id, item.new_task_id)
+                        set_current_task_id(options.project_id,
+                                            item.new_task_id)
                         task_lock.summary_generated = False
 
                     yield sse_json("confirmed", {"question": question})
 
-                    context_for_coordinator = build_context_for_workforce(task_lock, options)
+                    context_for_coordinator = build_context_for_workforce(
+                        task_lock, options)
 
                     # Check if workforce exists - if so, reuse it; otherwise create new workforce
                     if workforce is not None:
-                        logger.debug(f"[NEW-QUESTION] Reusing existing workforce (id={id(workforce)})")
+                        logger.debug(
+                            f"[NEW-QUESTION] Reusing existing workforce (id={id(workforce)})"
+                        )
                     else:
-                        logger.info(f"[NEW-QUESTION] Creating NEW workforce instance")
+                        logger.info(
+                            "[NEW-QUESTION] Creating NEW workforce instance")
                         (workforce, mcp) = await construct_workforce(options)
                         for new_agent in options.new_agents:
                             workforce.add_single_agent_worker(
-                                format_agent_description(new_agent), await new_agent_model(new_agent, options)
-                            )
+                                format_agent_description(new_agent), await
+                                new_agent_model(new_agent, options))
                     task_lock.status = Status.confirmed
 
                     # Create camel_task for the question
                     clean_task_content = question + options.summary_prompt
-                    camel_task = Task(content=clean_task_content, id=options.task_id)
+                    camel_task = Task(content=clean_task_content,
+                                      id=options.task_id)
                     if len(options.attaches) > 0:
-                        camel_task.additional_info = {Path(file_path).name: file_path for file_path in options.attaches}
+                        camel_task.additional_info = {
+                            Path(file_path).name: file_path
+                            for file_path in options.attaches
+                        }
 
                     # Stream decomposition in background
-                    stream_state = {"subtasks": [], "seen_ids": set(), "last_content": ""}
-                    state_holder: dict[str, Any] = {"sub_tasks": [], "summary_task": ""}
+                    stream_state = {
+                        "subtasks": [],
+                        "seen_ids": set(),
+                        "last_content": ""
+                    }
+                    state_holder: dict[str, Any] = {
+                        "sub_tasks": [],
+                        "summary_task": ""
+                    }
 
-                    def on_stream_batch(new_tasks: list[Task], is_final: bool = False):
-                        fresh_tasks = [t for t in new_tasks if t.id not in stream_state["seen_ids"]]
+                    def on_stream_batch(new_tasks: list[Task],
+                                        is_final: bool = False):
+                        fresh_tasks = [
+                            t for t in new_tasks
+                            if t.id not in stream_state["seen_ids"]
+                        ]
                         for t in fresh_tasks:
                             stream_state["seen_ids"].add(t.id)
                         stream_state["subtasks"].extend(fresh_tasks)
 
                     def on_stream_text(chunk):
                         try:
-                            accumulated_content = chunk.msg.content if hasattr(chunk, 'msg') and chunk.msg else str(chunk)
+                            accumulated_content = chunk.msg.content if hasattr(
+                                chunk, 'msg') and chunk.msg else str(chunk)
                             last_content = stream_state["last_content"]
 
                             # Calculate delta: new content that wasn't in the previous chunk
                             if accumulated_content.startswith(last_content):
-                                delta_content = accumulated_content[len(last_content):]
+                                delta_content = accumulated_content[
+                                    len(last_content):]
                             else:
                                 delta_content = accumulated_content
 
@@ -435,16 +550,16 @@ def on_stream_text(chunk):
                                     task_lock.put_queue(
                                         ActionDecomposeTextData(
                                             data={
-                                                "project_id": options.project_id,
+                                                "project_id":
+                                                options.project_id,
                                                 "task_id": options.task_id,
                                                 "content": delta_content,
-                                            }
-                                        )
-                                    ),
+                                            })),
                                     event_loop,
                                 )
                         except Exception as e:
-                            logger.warning(f"Failed to stream decomposition text: {e}")
+                            logger.warning(
+                                f"Failed to stream decomposition text: {e}")
 
                     async def run_decomposition():
                         nonlocal camel_task, summary_task_content
@@ -460,9 +575,12 @@ async def run_decomposition():
                             if stream_state["subtasks"]:
                                 sub_tasks = stream_state["subtasks"]
                             state_holder["sub_tasks"] = sub_tasks
-                            logger.info(f"Task decomposed into {len(sub_tasks)} subtasks")
+                            logger.info(
+                                f"Task decomposed into {len(sub_tasks)} subtasks"
+                            )
                             try:
-                                setattr(task_lock, "decompose_sub_tasks", sub_tasks)
+                                setattr(task_lock, "decompose_sub_tasks",
+                                        sub_tasks)
                             except Exception:
                                 pass
 
@@ -470,42 +588,68 @@ async def run_decomposition():
                             summary_task_agent = task_summary_agent(options)
                             try:
                                 summary_task_content = await asyncio.wait_for(
-                                    summary_task(summary_task_agent, camel_task), timeout=10
-                                )
+                                    summary_task(summary_task_agent,
+                                                 camel_task),
+                                    timeout=10)
                                 task_lock.summary_generated = True
                             except asyncio.TimeoutError:
-                                logger.warning("summary_task timeout", extra={"project_id": options.project_id, "task_id": options.task_id})
+                                logger.warning("summary_task timeout",
+                                               extra={
+                                                   "project_id":
+                                                   options.project_id,
+                                                   "task_id": options.task_id
+                                               })
                                 task_lock.summary_generated = True
-                                content_preview = camel_task.content if hasattr(camel_task, "content") else ""
+                                content_preview = camel_task.content if hasattr(
+                                    camel_task, "content") else ""
                                 if content_preview is None:
                                     content_preview = ""
-                                summary_task_content = (content_preview[:80] + "...") if len(content_preview) > 80 else content_preview
+                                summary_task_content = (
+                                    content_preview[:80] +
+                                    "...") if len(content_preview
+                                                  ) > 80 else content_preview
                                 summary_task_content = f"Task|{summary_task_content}"
                             except Exception:
                                 task_lock.summary_generated = True
-                                content_preview = camel_task.content if hasattr(camel_task, "content") else ""
+                                content_preview = camel_task.content if hasattr(
+                                    camel_task, "content") else ""
                                 if content_preview is None:
                                     content_preview = ""
-                                summary_task_content = (content_preview[:80] + "...") if len(content_preview) > 80 else content_preview
+                                summary_task_content = (
+                                    content_preview[:80] +
+                                    "...") if len(content_preview
+                                                  ) > 80 else content_preview
                                 summary_task_content = f"Task|{summary_task_content}"
 
                             state_holder["summary_task"] = summary_task_content
                             try:
-                                setattr(task_lock, "summary_task_content", summary_task_content)
+                                setattr(task_lock, "summary_task_content",
+                                        summary_task_content)
                             except Exception:
                                 pass
 
                             payload = {
                                 "project_id": options.project_id,
                                 "task_id": options.task_id,
-                                "sub_tasks": tree_sub_tasks(camel_task.subtasks),
+                                "sub_tasks":
+                                tree_sub_tasks(camel_task.subtasks),
                                 "delta_sub_tasks": tree_sub_tasks(sub_tasks),
                                 "is_final": True,
                                 "summary_task": summary_task_content,
                             }
-                            await task_lock.put_queue(ActionDecomposeProgressData(data=payload))
+                            await task_lock.put_queue(
+                                ActionDecomposeProgressData(data=payload))
+                        except ModelProcessingError as e:
+                            # Log model errors during task decomposition (background task)
+                            logger.error(
+                                f"Task decomposition failed due to model error: {e}",
+                                exc_info=True)
+                            # TODO: Error is only logged, not sent to UI (background task limitation)
+                            # To send to UI, we'd need to restore Action.error + ActionErrorData + handler
                         except Exception as e:
-                            logger.error(f"Error in background decomposition: {e}", exc_info=True)
+                            logger.error(
+                                f"Error in background decomposition: {e}",
+                                exc_info=True)
 
                     bg_task = asyncio.create_task(run_decomposition())
                     task_lock.add_background_task(bg_task)
@@ -518,28 +662,39 @@ async def run_decomposition():
                     sub_tasks = getattr(task_lock, "decompose_sub_tasks", [])
                 sub_tasks = update_sub_tasks(sub_tasks, update_tasks)
                 add_sub_tasks(camel_task, item.data.task)
-                summary_task_content_local = getattr(task_lock, "summary_task_content", summary_task_content)
+                summary_task_content_local = getattr(task_lock,
+                                                     "summary_task_content",
+                                                     summary_task_content)
                 yield to_sub_tasks(camel_task, summary_task_content_local)
             elif item.action == Action.add_task:
 
                 # Check if this might be a misrouted second question
                 if camel_task is None and workforce is None:
-                    logger.error(f"Cannot add task: both camel_task and workforce are None for project {options.project_id}")
-                    yield sse_json("error", {"message": "Cannot add task: task not initialized. Please start a task first."})
+                    logger.error(
+                        f"Cannot add task: both camel_task and workforce are None for project {options.project_id}"
+                    )
+                    yield sse_json(
+                        "error", {
+                            "message":
+                            "Cannot add task: task not initialized. Please start a task first."
+                        })
                     continue
 
                 assert camel_task is not None
                 if workforce is None:
-                    logger.error(f"Cannot add task: workforce not initialized for project {options.project_id}")
-                    yield sse_json("error", {"message": "Workforce not initialized. Please start the task first."})
+                    logger.error(
+                        f"Cannot add task: workforce not initialized for project {options.project_id}"
+                    )
+                    yield sse_json(
+                        "error", {
+                            "message":
+                            "Workforce not initialized. Please start the task first."
+                        })
                     continue
 
                 # Add task to the workforce queue
-                workforce.add_task(
-                    item.content,
-                    item.task_id,
-                    item.additional_info
-                )
+                workforce.add_task(item.content, item.task_id,
+                                   item.additional_info)
 
                 returnData = {
                     "project_id": item.project_id,
@@ -548,8 +703,14 @@ async def run_decomposition():
                 yield sse_json("add_task", returnData)
             elif item.action == Action.remove_task:
                 if workforce is None:
-                    logger.error(f"Cannot remove task: workforce not initialized for project {options.project_id}")
-                    yield sse_json("error", {"message": "Workforce not initialized. Please start the task first."})
+                    logger.error(
+                        f"Cannot remove task: workforce not initialized for project {options.project_id}"
+                    )
+                    yield sse_json(
+                        "error", {
+                            "message":
+                            "Workforce not initialized. Please start the task first."
+                        })
                     continue
 
                 workforce.remove_task(item.task_id)
@@ -560,34 +721,50 @@ async def run_decomposition():
                 yield sse_json("remove_task", returnData)
             elif item.action == Action.skip_task:
                 logger.info("=" * 80)
-                logger.info(f"🛑 [LIFECYCLE] SKIP_TASK action received (User clicked Stop button)", extra={"project_id": options.project_id, "item_project_id": item.project_id})
+                logger.info(
+                    "🛑 [LIFECYCLE] SKIP_TASK action received (User clicked Stop button)",
+                    extra={
+                        "project_id": options.project_id,
+                        "item_project_id": item.project_id
+                    })
                 logger.info("=" * 80)
 
                 # Prevent duplicate skip processing
                 if task_lock.status == Status.done:
-                    logger.warning(f"⚠️  [LIFECYCLE] SKIP_TASK received but task already marked as done. Ignoring.")
+                    logger.warning(
+                        "⚠️  [LIFECYCLE] SKIP_TASK received but task already marked as done. Ignoring."
+                    )
                     continue
 
                 if workforce is not None and item.project_id == options.project_id:
-                    logger.info(f"[LIFECYCLE] Workforce exists (id={id(workforce)}), state={workforce._state.name}, _running={workforce._running}")
+                    logger.info(
+                        f"[LIFECYCLE] Workforce exists (id={id(workforce)}), state={workforce._state.name}, _running={workforce._running}"
+                    )
 
                     # Stop workforce completely
-                    logger.info(f"[LIFECYCLE] 🛑 Stopping workforce")
+                    logger.info("[LIFECYCLE] 🛑 Stopping workforce")
                     if workforce._running:
                         # Import correct BaseWorkforce from camel
-                        from camel.societies.workforce.workforce import Workforce as BaseWorkforce
+                        from camel.societies.workforce.workforce import \
+                            Workforce as BaseWorkforce
                         BaseWorkforce.stop(workforce)
-                        logger.info(f"[LIFECYCLE] ✅ BaseWorkforce.stop() completed, state={workforce._state.name}, _running={workforce._running}")
+                        logger.info(
+                            f"[LIFECYCLE] ✅ BaseWorkforce.stop() completed, state={workforce._state.name}, _running={workforce._running}"
+                        )
 
                     workforce.stop_gracefully()
-                    logger.info(f"[LIFECYCLE] ✅ Workforce stopped gracefully")
+                    logger.info("[LIFECYCLE] ✅ Workforce stopped gracefully")
 
                     # Clear workforce to avoid state issues
                     # Next question will create fresh workforce
                     workforce = None
-                    logger.info(f"[LIFECYCLE] Workforce set to None, will be recreated on next question")
+                    logger.info(
+                        "[LIFECYCLE] Workforce set to None, will be recreated on next question"
+                    )
                 else:
-                    logger.warning(f"[LIFECYCLE] Cannot skip: workforce is None or project_id mismatch")
+                    logger.warning(
+                        "[LIFECYCLE] Cannot skip: workforce is None or project_id mismatch"
+                    )
 
                 # Mark task as done and preserve context (like Action.end does)
                 task_lock.status = Status.done
@@ -598,35 +775,47 @@ async def run_decomposition():
                 if camel_task is not None:
                     task_content: str = camel_task.content
                     if "=== CURRENT TASK ===" in task_content:
-                        task_content = task_content.split("=== CURRENT TASK ===")[-1].strip()
+                        task_content = task_content.split(
+                            "=== CURRENT TASK ===")[-1].strip()
                 else:
                     task_content: str = f"Task {options.task_id}"
 
-                task_lock.add_conversation('task_result', {
-                    'task_content': task_content,
-                    'task_result': end_message,
-                    'working_directory': get_working_directory(options, task_lock)
-                })
+                task_lock.add_conversation(
+                    'task_result', {
+                        'task_content':
+                        task_content,
+                        'task_result':
+                        end_message,
+                        'working_directory':
+                        get_working_directory(options, task_lock)
+                    })
 
                 # Clear camel_task as well (workforce is cleared, so camel_task should be too)
                 camel_task = None
-                logger.info(f"[LIFECYCLE] ✅ Task marked as done, workforce and camel_task cleared, ready for multi-turn")
+                logger.info(
+                    "[LIFECYCLE] ✅ Task marked as done, workforce and camel_task cleared, ready for multi-turn"
+                )
 
                 # Send end event to frontend with string format (matching normal end event format)
                 yield sse_json("end", end_message)
-                logger.info(f"[LIFECYCLE] Sent 'end' SSE event to frontend")
+                logger.info("[LIFECYCLE] Sent 'end' SSE event to frontend")
 
                 # Continue loop to accept new questions (don't break, don't delete task_lock)
             elif item.action == Action.start:
                 # Check conversation history length before starting task
-                is_exceeded, total_length = check_conversation_history_length(task_lock)
+                is_exceeded, total_length = check_conversation_history_length(
+                    task_lock)
                 if is_exceeded:
-                    logger.error(f"Cannot start task: conversation history too long ({total_length} chars) for project {options.project_id}")
-                    yield sse_json("context_too_long", {
-                        "message": "The conversation history is too long. Please create a new project to continue.",
-                        "current_length": total_length,
-                        "max_length": 100000
-                    })
+                    logger.error(
+                        f"Cannot start task: conversation history too long ({total_length} chars) for project {options.project_id}"
+                    )
+                    yield sse_json(
+                        "context_too_long", {
+                            "message":
+                            "The conversation history is too long. Please create a new project to continue.",
+                            "current_length": total_length,
+                            "max_length": 100000
+                        })
                     continue
 
                 if workforce is not None:
@@ -648,136 +837,204 @@ async def run_decomposition():
                 task_state = item.data.get('state', 'unknown')
                 task_result = item.data.get('result', '')
 
-
                 if task_state == 'DONE' and task_result:
                     last_completed_task_result = task_result
 
                 yield sse_json("task_state", item.data)
             elif item.action == Action.new_task_state:
                 logger.info("=" * 80)
-                logger.info(f"🔄 [LIFECYCLE] NEW_TASK_STATE action received (Multi-turn)", extra={"project_id": options.project_id})
+                logger.info(
+                    "🔄 [LIFECYCLE] NEW_TASK_STATE action received (Multi-turn)",
+                    extra={"project_id": options.project_id})
                 logger.info("=" * 80)
 
                 # Log new task state details
                 new_task_id = item.data.get('task_id', 'unknown')
                 new_task_state = item.data.get('state', 'unknown')
                 new_task_result = item.data.get('result', '')
-                logger.info(f"[LIFECYCLE] New task details: task_id={new_task_id}, state={new_task_state}")
+                logger.info(
+                    f"[LIFECYCLE] New task details: task_id={new_task_id}, state={new_task_state}"
+                )
 
                 if camel_task is None:
-                    logger.error(f"NEW_TASK_STATE action received but camel_task is None for project {options.project_id}, task {new_task_id}")
-                    yield sse_json("error", {"message": "Cannot process new task state: current task not initialized."})
+                    logger.error(
+                        f"NEW_TASK_STATE action received but camel_task is None for project {options.project_id}, task {new_task_id}"
+                    )
+                    yield sse_json(
+                        "error", {
+                            "message":
+                            "Cannot process new task state: current task not initialized."
+                        })
                     continue
 
                 old_task_content: str = camel_task.content
-                old_task_result: str = await get_task_result_with_optional_summary(camel_task, options)
+                old_task_result: str = await get_task_result_with_optional_summary(
+                    camel_task, options)
 
                 old_task_content_clean: str = old_task_content
                 if "=== CURRENT TASK ===" in old_task_content_clean:
-                    old_task_content_clean = old_task_content_clean.split("=== CURRENT TASK ===")[-1].strip()
-
-                task_lock.add_conversation('task_result', {
-                    'task_content': old_task_content_clean,
-                    'task_result': old_task_result,
-                    'working_directory': get_working_directory(options, task_lock)
-                })
+                    old_task_content_clean = old_task_content_clean.split(
+                        "=== CURRENT TASK ===")[-1].strip()
+
+                task_lock.add_conversation(
+                    'task_result', {
+                        'task_content':
+                        old_task_content_clean,
+                        'task_result':
+                        old_task_result,
+                        'working_directory':
+                        get_working_directory(options, task_lock)
+                    })
 
                 new_task_content = item.data.get('content', '')
 
                 if new_task_content:
                     import time
-                    task_id = item.data.get('task_id', f"{int(time.time() * 1000)}-multi")
+                    task_id = item.data.get(
+                        'task_id', f"{int(time.time() * 1000)}-multi")
                     new_camel_task = Task(content=new_task_content, id=task_id)
-                    if hasattr(camel_task, 'additional_info') and camel_task.additional_info:
+                    if hasattr(
+                            camel_task,
+                            'additional_info') and camel_task.additional_info:
                         new_camel_task.additional_info = camel_task.additional_info
                     camel_task = new_camel_task
 
                 # Now trigger end of previous task using stored result
                 yield sse_json("end", old_task_result)
-                
+
                 # Always yield new_task_state first - this is not optional
                 yield sse_json("new_task_state", item.data)
                 # Trigger Queue Removal
-                yield sse_json("remove_task", {"task_id": item.data.get("task_id")})
+                yield sse_json("remove_task",
+                               {"task_id": item.data.get("task_id")})
 
                 # Then handle multi-turn processing
                 if workforce is not None and new_task_content:
-                    logger.info(f"[LIFECYCLE] Multi-turn: workforce exists (id={id(workforce)}), pausing for question confirmation")
+                    logger.info(
+                        f"[LIFECYCLE] Multi-turn: workforce exists (id={id(workforce)}), pausing for question confirmation"
+                    )
                     task_lock.status = Status.confirming
                     workforce.pause()
-                    logger.info(f"[LIFECYCLE] Multi-turn: workforce paused, state={workforce._state.name}")
+                    logger.info(
+                        f"[LIFECYCLE] Multi-turn: workforce paused, state={workforce._state.name}"
+                    )
 
                     try:
-                        logger.info(f"[LIFECYCLE] Multi-turn: calling question_confirm for new task")
-                        is_multi_turn_complex = await question_confirm(question_agent, new_task_content, task_lock)
-                        logger.info(f"[LIFECYCLE] Multi-turn: question_confirm result: is_complex={is_multi_turn_complex}")
+                        logger.info(
+                            "[LIFECYCLE] Multi-turn: calling question_confirm for new task"
+                        )
+                        is_multi_turn_complex = await question_confirm(
+                            question_agent, new_task_content, task_lock)
+                        logger.info(
+                            f"[LIFECYCLE] Multi-turn: question_confirm result: is_complex={is_multi_turn_complex}"
+                        )
 
                         if not is_multi_turn_complex:
-                            logger.info(f"[LIFECYCLE] Multi-turn: task is simple, providing direct answer without workforce")
+                            logger.info(
+                                "[LIFECYCLE] Multi-turn: task is simple, providing direct answer without workforce"
+                            )
                             simple_answer_prompt = f"{build_conversation_context(task_lock, header='=== Previous Conversation ===')}User Query: {new_task_content}\n\nProvide a direct, helpful answer to this simple question."
 
                             try:
-                                simple_resp = question_agent.step(simple_answer_prompt)
-                                answer_content = simple_resp.msgs[0].content if simple_resp and simple_resp.msgs else "I understand your question, but I'm having trouble generating a response right now."
+                                simple_resp = question_agent.step(
+                                    simple_answer_prompt)
+                                answer_content = simple_resp.msgs[
+                                    0].content if simple_resp and simple_resp.msgs else "I understand your question, but I'm having trouble generating a response right now."
 
-                                task_lock.add_conversation('assistant', answer_content)
+                                task_lock.add_conversation(
+                                    'assistant', answer_content)
 
                                 # Send response to user (don't send confirmed if simple response)
-                                yield sse_json("wait_confirm", {"content": answer_content, "question": new_task_content})
+                                yield sse_json(
+                                    "wait_confirm", {
+                                        "content": answer_content,
+                                        "question": new_task_content
+                                    })
                             except Exception as e:
-                                logger.error(f"Error generating simple answer in multi-turn: {e}")
-                                yield sse_json("wait_confirm", {"content": "I encountered an error while processing your question.", "question": new_task_content})
-
-                            logger.info(f"[LIFECYCLE] Multi-turn: simple answer provided, resuming workforce")
+                                logger.error(
+                                    f"Error generating simple answer in multi-turn: {e}"
+                                )
+                                yield sse_json(
+                                    "wait_confirm", {
+                                        "content":
+                                        "I encountered an error while processing your question.",
+                                        "question": new_task_content
+                                    })
+
+                            logger.info(
+                                "[LIFECYCLE] Multi-turn: simple answer provided, resuming workforce"
+                            )
                             workforce.resume()
-                            logger.info(f"[LIFECYCLE] Multi-turn: workforce resumed, continuing to next iteration")
+                            logger.info(
+                                "[LIFECYCLE] Multi-turn: workforce resumed, continuing to next iteration"
+                            )
                             continue  # This continues the main while loop, waiting for next action
 
                         # Update the sync_step with new task_id before sending new task sse events
-                        logger.info(f"[LIFECYCLE] Multi-turn: task is complex, setting new task_id={task_id}")
+                        logger.info(
+                            f"[LIFECYCLE] Multi-turn: task is complex, setting new task_id={task_id}"
+                        )
                         set_current_task_id(options.project_id, task_id)
 
-                        yield sse_json("confirmed", {"question": new_task_content})
+                        yield sse_json("confirmed",
+                                       {"question": new_task_content})
                         task_lock.status = Status.confirmed
 
-                        logger.info(f"[LIFECYCLE] Multi-turn: building context for workforce")
-                        context_for_multi_turn = build_context_for_workforce(task_lock, options)
+                        logger.info(
+                            "[LIFECYCLE] Multi-turn: building context for workforce"
+                        )
+                        context_for_multi_turn = build_context_for_workforce(
+                            task_lock, options)
 
-                        stream_state = {"subtasks": [], "seen_ids": set(), "last_content": ""}
+                        stream_state = {
+                            "subtasks": [],
+                            "seen_ids": set(),
+                            "last_content": ""
+                        }
 
-                        def on_stream_batch(new_tasks: list[Task], is_final: bool = False):
-                            fresh_tasks = [t for t in new_tasks if t.id not in stream_state["seen_ids"]]
+                        def on_stream_batch(new_tasks: list[Task],
+                                            is_final: bool = False):
+                            fresh_tasks = [
+                                t for t in new_tasks
+                                if t.id not in stream_state["seen_ids"]
+                            ]
                             for t in fresh_tasks:
                                 stream_state["seen_ids"].add(t.id)
                             stream_state["subtasks"].extend(fresh_tasks)
 
                         def on_stream_text(chunk):
                             try:
-                                accumulated_content = chunk.msg.content if hasattr(chunk, 'msg') and chunk.msg else str(chunk)
+                                accumulated_content = chunk.msg.content if hasattr(
+                                    chunk, 'msg') and chunk.msg else str(chunk)
                                 last_content = stream_state["last_content"]
 
-                                if accumulated_content.startswith(last_content):
-                                    delta_content = accumulated_content[len(last_content):]
+                                if accumulated_content.startswith(
+                                        last_content):
+                                    delta_content = accumulated_content[
+                                        len(last_content):]
                                 else:
                                     delta_content = accumulated_content
 
-                                stream_state["last_content"] = accumulated_content
+                                stream_state[
+                                    "last_content"] = accumulated_content
 
                                 if delta_content:
                                     asyncio.run_coroutine_threadsafe(
                                         task_lock.put_queue(
                                             ActionDecomposeTextData(
                                                 data={
-                                                    "project_id": options.project_id,
+                                                    "project_id":
+                                                    options.project_id,
                                                     "task_id": options.task_id,
                                                     "content": delta_content,
-                                                }
-                                            )
-                                        ),
+                                                })),
                                         event_loop,
                                     )
                             except Exception as e:
-                                logger.warning(f"Failed to stream decomposition text: {e}")
+                                logger.warning(
+                                    f"Failed to stream decomposition text: {e}"
+                                )
+
                         new_sub_tasks = await workforce.handle_decompose_append_task(
                             camel_task,
                             reset=False,
@@ -787,17 +1044,28 @@ def on_stream_text(chunk):
                         )
                         if stream_state["subtasks"]:
                             new_sub_tasks = stream_state["subtasks"]
-                        logger.info(f"[LIFECYCLE] Multi-turn: task decomposed into {len(new_sub_tasks)} subtasks")
+                        logger.info(
+                            f"[LIFECYCLE] Multi-turn: task decomposed into {len(new_sub_tasks)} subtasks"
+                        )
 
                         # Generate proper LLM summary for multi-turn tasks instead of hardcoded fallback
                         try:
-                            multi_turn_summary_agent = task_summary_agent(options)
+                            multi_turn_summary_agent = task_summary_agent(
+                                options)
                             new_summary_content = await asyncio.wait_for(
-                                summary_task(multi_turn_summary_agent, camel_task), timeout=10
-                            )
-                            logger.info("Generated LLM summary for multi-turn task", extra={"project_id": options.project_id})
+                                summary_task(multi_turn_summary_agent,
+                                             camel_task),
+                                timeout=10)
+                            logger.info(
+                                "Generated LLM summary for multi-turn task",
+                                extra={"project_id": options.project_id})
                         except asyncio.TimeoutError:
-                            logger.warning("Multi-turn summary_task timeout", extra={"project_id": options.project_id, "task_id": task_id})
+                            logger.warning("Multi-turn summary_task timeout",
+                                           extra={
+                                               "project_id":
+                                               options.project_id,
+                                               "task_id": task_id
+                                           })
                             # Fallback to descriptive but not generic summary
                             task_content_for_summary = new_task_content
                             if len(task_content_for_summary) > 100:
@@ -805,7 +1073,9 @@ def on_stream_text(chunk):
                             else:
                                 new_summary_content = f"Follow-up Task|{task_content_for_summary}"
                         except Exception as e:
-                            logger.error(f"Error generating multi-turn task summary: {e}")
+                            logger.error(
+                                f"Error generating multi-turn task summary: {e}"
+                            )
                             # Fallback to descriptive but not generic summary
                             task_content_for_summary = new_task_content
                             if len(task_content_for_summary) > 100:
@@ -822,23 +1092,39 @@ def on_stream_text(chunk):
                             "is_final": True,
                             "summary_task": new_summary_content,
                         }
-                        await task_lock.put_queue(ActionDecomposeProgressData(data=final_payload))
+                        await task_lock.put_queue(
+                            ActionDecomposeProgressData(data=final_payload))
 
                         # Update the context with new task data
                         sub_tasks = new_sub_tasks
                         summary_task_content = new_summary_content
 
+                    except ModelProcessingError as e:
+                        # Handle model errors (especially invalid API keys) during multi-turn task decomposition
+                        error_payload, _, _ = prepare_model_error_response(
+                            e, options.project_id, task_id,
+                            "multi-turn task decomposition")
 
+                        # Send error notification to frontend
+                        yield sse_json("error", error_payload)
+
+                        # Mark task as done (failed state)
+                        task_lock.status = Status.done
                     except Exception as e:
                         import traceback
-                        logger.error(f"[TRACE] Traceback: {traceback.format_exc()}")
+                        logger.error(
+                            f"[TRACE] Traceback: {traceback.format_exc()}")
                         # Continue with existing context if decomposition fails
-                        yield sse_json("error", {"message": f"Failed to process task: {str(e)}"})
+                        yield sse_json(
+                            "error",
+                            {"message": f"Failed to process task: {str(e)}"})
                 else:
                     if workforce is None:
-                        logger.warning(f"[TRACE] Workforce is None - this might be the issue")
+                        logger.warning(
+                            "[TRACE] Workforce is None - this might be the issue"
+                        )
                     if not new_task_content:
-                        logger.warning(f"[TRACE] No new task content provided")
+                        logger.warning("[TRACE] No new task content provided")
             elif item.action == Action.create_agent:
                 yield sse_json("create_agent", item.data)
             elif item.action == Action.activate_agent:
@@ -854,41 +1140,62 @@ def on_stream_text(chunk):
             elif item.action == Action.write_file:
                 yield sse_json(
                     "write_file",
-                    {"file_path": item.data, "process_task_id": item.process_task_id},
+                    {
+                        "file_path": item.data,
+                        "process_task_id": item.process_task_id
+                    },
                 )
             elif item.action == Action.ask:
                 yield sse_json("ask", item.data)
             elif item.action == Action.notice:
                 yield sse_json(
                     "notice",
-                    {"notice": item.data, "process_task_id": item.process_task_id},
+                    {
+                        "notice": item.data,
+                        "process_task_id": item.process_task_id
+                    },
                 )
             elif item.action == Action.search_mcp:
                 yield sse_json("search_mcp", item.data)
             elif item.action == Action.install_mcp:
                 if mcp is None:
-                    logger.error(f"Cannot install MCP: mcp agent not initialized for project {options.project_id}")
-                    yield sse_json("error", {"message": "MCP agent not initialized. Please start a complex task first."})
+                    logger.error(
+                        f"Cannot install MCP: mcp agent not initialized for project {options.project_id}"
+                    )
+                    yield sse_json(
+                        "error", {
+                            "message":
+                            "MCP agent not initialized. Please start a complex task first."
+                        })
                     continue
                 task = asyncio.create_task(install_mcp(mcp, item))
                 task_lock.add_background_task(task)
             elif item.action == Action.terminal:
                 yield sse_json(
                     "terminal",
-                    {"output": item.data, "process_task_id": item.process_task_id},
+                    {
+                        "output": item.data,
+                        "process_task_id": item.process_task_id
+                    },
                 )
             elif item.action == Action.pause:
                 if workforce is not None:
                     workforce.pause()
-                    logger.info(f"Workforce paused for project {options.project_id}")
+                    logger.info(
+                        f"Workforce paused for project {options.project_id}")
                 else:
-                    logger.warning(f"Cannot pause: workforce is None for project {options.project_id}")
+                    logger.warning(
+                        f"Cannot pause: workforce is None for project {options.project_id}"
+                    )
             elif item.action == Action.resume:
                 if workforce is not None:
                     workforce.resume()
-                    logger.info(f"Workforce resumed for project {options.project_id}")
+                    logger.info(
+                        f"Workforce resumed for project {options.project_id}")
                 else:
-                    logger.warning(f"Cannot resume: workforce is None for project {options.project_id}")
+                    logger.warning(
+                        f"Cannot resume: workforce is None for project {options.project_id}"
+                    )
             elif item.action == Action.decompose_text:
                 yield sse_json("decompose_text", item.data)
             elif item.action == Action.decompose_progress:
@@ -897,51 +1204,67 @@ def on_stream_text(chunk):
                 if workforce is not None:
                     workforce.pause()
                     workforce.add_single_agent_worker(
-                        format_agent_description(item), await new_agent_model(item, options)
-                    )
+                        format_agent_description(item), await
+                        new_agent_model(item, options))
                     workforce.resume()
             elif item.action == Action.timeout:
                 logger.info("=" * 80)
-                logger.info(f"⏰ [LIFECYCLE] TIMEOUT action received for project {options.project_id}, task {options.task_id}")
+                logger.info(
+                    f"⏰ [LIFECYCLE] TIMEOUT action received for project {options.project_id}, task {options.task_id}"
+                )
                 logger.info(f"[LIFECYCLE] Timeout data: {item.data}")
                 logger.info("=" * 80)
 
                 # Send timeout error to frontend
-                timeout_message = item.data.get("message", "Task execution timeout")
+                timeout_message = item.data.get("message",
+                                                "Task execution timeout")
                 in_flight = item.data.get("in_flight_tasks", 0)
                 pending = item.data.get("pending_tasks", 0)
                 timeout_seconds = item.data.get("timeout_seconds", 0)
 
-                yield sse_json("error", {
-                    "message": timeout_message,
-                    "type": "timeout",
-                    "details": {
-                        "in_flight_tasks": in_flight,
-                        "pending_tasks": pending,
-                        "timeout_seconds": timeout_seconds,
-                    }
-                })
+                yield sse_json(
+                    "error", {
+                        "message": timeout_message,
+                        "type": "timeout",
+                        "details": {
+                            "in_flight_tasks": in_flight,
+                            "pending_tasks": pending,
+                            "timeout_seconds": timeout_seconds,
+                        }
+                    })
 
             elif item.action == Action.end:
                 logger.info("=" * 80)
-                logger.info(f"🏁 [LIFECYCLE] END action received for project {options.project_id}, task {options.task_id}")
-                logger.info(f"[LIFECYCLE] camel_task exists: {camel_task is not None}, current status: {task_lock.status}, workforce exists: {workforce is not None}")
+                logger.info(
+                    f"🏁 [LIFECYCLE] END action received for project {options.project_id}, task {options.task_id}"
+                )
+                logger.info(
+                    f"[LIFECYCLE] camel_task exists: {camel_task is not None}, current status: {task_lock.status}, workforce exists: {workforce is not None}"
+                )
                 if workforce is not None:
-                    logger.info(f"[LIFECYCLE] Workforce state at END: _state={workforce._state.name}, _running={workforce._running}")
+                    logger.info(
+                        f"[LIFECYCLE] Workforce state at END: _state={workforce._state.name}, _running={workforce._running}"
+                    )
                 logger.info("=" * 80)
 
                 # Prevent duplicate end processing
                 if task_lock.status == Status.done:
-                    logger.warning(f"⚠️  [LIFECYCLE] END action received but task already marked as done. Ignoring duplicate END action.")
+                    logger.warning(
+                        "⚠️  [LIFECYCLE] END action received but task already marked as done. Ignoring duplicate END action."
+                    )
                     continue
-                
+
                 if camel_task is None:
-                    logger.warning(f"END action received but camel_task is None for project {options.project_id}, task {options.task_id}. This may indicate multiple END actions or improper task lifecycle management.")
+                    logger.warning(
+                        f"END action received but camel_task is None for project {options.project_id}, task {options.task_id}. This may indicate multiple END actions or improper task lifecycle management."
+                    )
                     # Use the item data as the final result if camel_task is None
-                    final_result: str = str(item.data) if item.data else "Task completed"
+                    final_result: str = str(
+                        item.data) if item.data else "Task completed"
                 else:
-                    final_result: str = await get_task_result_with_optional_summary(camel_task, options)
-                
+                    final_result: str = await get_task_result_with_optional_summary(
+                        camel_task, options)
+
                 task_lock.status = Status.done
 
                 task_lock.last_task_result = final_result
@@ -950,40 +1273,58 @@ def on_stream_text(chunk):
                 if camel_task is not None:
                     task_content: str = camel_task.content
                     if "=== CURRENT TASK ===" in task_content:
-                        task_content = task_content.split("=== CURRENT TASK ===")[-1].strip()
+                        task_content = task_content.split(
+                            "=== CURRENT TASK ===")[-1].strip()
                 else:
                     task_content: str = f"Task {options.task_id}"
-                
-                task_lock.add_conversation('task_result', {
-                    'task_content': task_content,
-                    'task_result': final_result,
-                    'working_directory': get_working_directory(options, task_lock)
-                })
 
+                task_lock.add_conversation(
+                    'task_result', {
+                        'task_content':
+                        task_content,
+                        'task_result':
+                        final_result,
+                        'working_directory':
+                        get_working_directory(options, task_lock)
+                    })
 
                 yield sse_json("end", final_result)
 
                 if workforce is not None:
-                    logger.info(f"[LIFECYCLE] 🛑 Calling workforce.stop_gracefully() for project {options.project_id}, workforce id={id(workforce)}")
+                    logger.info(
+                        f"[LIFECYCLE] 🛑 Calling workforce.stop_gracefully() for project {options.project_id}, workforce id={id(workforce)}"
+                    )
                     workforce.stop_gracefully()
-                    logger.info(f"[LIFECYCLE] ✅ Workforce stopped gracefully for project {options.project_id}")
+                    logger.info(
+                        f"[LIFECYCLE] ✅ Workforce stopped gracefully for project {options.project_id}"
+                    )
                     workforce = None
-                    logger.info(f"[LIFECYCLE] Workforce set to None")
+                    logger.info("[LIFECYCLE] Workforce set to None")
                 else:
-                    logger.warning(f"[LIFECYCLE] ⚠️  Workforce already None at end action for project {options.project_id}")
+                    logger.warning(
+                        f"[LIFECYCLE] ⚠️  Workforce already None at end action for project {options.project_id}"
+                    )
 
                 camel_task = None
-                logger.info(f"[LIFECYCLE] camel_task set to None")
+                logger.info("[LIFECYCLE] camel_task set to None")
 
                 if question_agent is not None:
                     question_agent.reset()
-                    logger.info(f"[LIFECYCLE] question_agent reset for project {options.project_id}")
+                    logger.info(
+                        f"[LIFECYCLE] question_agent reset for project {options.project_id}"
+                    )
             elif item.action == Action.supplement:
 
                 # Check if this might be a misrouted second question
                 if camel_task is None:
-                    logger.warning(f"SUPPLEMENT action received but camel_task is None for project {options.project_id}")
-                    yield sse_json("error", {"message": "Cannot supplement task: task not initialized. Please start a task first."})
+                    logger.warning(
+                        f"SUPPLEMENT action received but camel_task is None for project {options.project_id}"
+                    )
+                    yield sse_json(
+                        "error", {
+                            "message":
+                            "Cannot supplement task: task not initialized. Please start a task first."
+                        })
                     continue
                 else:
                     task_lock.status = Status.processing
@@ -991,50 +1332,82 @@ def on_stream_text(chunk):
                         Task(
                             content=item.data.question,
                             id=f"{camel_task.id}.{len(camel_task.subtasks)}",
-                        )
-                    )
+                        ))
                     if workforce is not None:
-                        task = asyncio.create_task(workforce.eigent_start(camel_task.subtasks))
+                        task = asyncio.create_task(
+                            workforce.eigent_start(camel_task.subtasks))
                         task_lock.add_background_task(task)
             elif item.action == Action.budget_not_enough:
                 if workforce is not None:
                     workforce.pause()
-                yield sse_json(Action.budget_not_enough, {"message": "budget not enouth"})
+                yield sse_json(Action.budget_not_enough,
+                               {"message": "budget not enouth"})
             elif item.action == Action.stop:
                 logger.info("=" * 80)
-                logger.info(f"⏹️  [LIFECYCLE] STOP action received for project {options.project_id}")
+                logger.info(
+                    f"⏹️  [LIFECYCLE] STOP action received for project {options.project_id}"
+                )
                 logger.info("=" * 80)
                 if workforce is not None:
-                    logger.info(f"[LIFECYCLE] Workforce exists (id={id(workforce)}), _running={workforce._running}, _state={workforce._state.name}")
+                    logger.info(
+                        f"[LIFECYCLE] Workforce exists (id={id(workforce)}), _running={workforce._running}, _state={workforce._state.name}"
+                    )
                     if workforce._running:
-                        logger.info(f"[LIFECYCLE] Calling workforce.stop() because _running=True")
+                        logger.info(
+                            "[LIFECYCLE] Calling workforce.stop() because _running=True"
+                        )
                         workforce.stop()
-                        logger.info(f"[LIFECYCLE] workforce.stop() completed")
-                    logger.info(f"[LIFECYCLE] Calling workforce.stop_gracefully()")
+                        logger.info("[LIFECYCLE] workforce.stop() completed")
+                    logger.info(
+                        "[LIFECYCLE] Calling workforce.stop_gracefully()")
                     workforce.stop_gracefully()
-                    logger.info(f"[LIFECYCLE] ✅ Workforce stopped for project {options.project_id}")
+                    logger.info(
+                        f"[LIFECYCLE] ✅ Workforce stopped for project {options.project_id}"
+                    )
                 else:
-                    logger.warning(f"[LIFECYCLE] ⚠️  Workforce is None at stop action for project {options.project_id}")
-                logger.info(f"[LIFECYCLE] Deleting task lock")
+                    logger.warning(
+                        f"[LIFECYCLE] ⚠️  Workforce is None at stop action for project {options.project_id}"
+                    )
+                logger.info("[LIFECYCLE] Deleting task lock")
                 await delete_task_lock(task_lock.id)
-                logger.info(f"[LIFECYCLE] Task lock deleted, breaking out of loop")
+                logger.info(
+                    "[LIFECYCLE] Task lock deleted, breaking out of loop")
                 break
             else:
                 logger.warning(f"Unknown action: {item.action}")
         except ModelProcessingError as e:
             if "Budget has been exceeded" in str(e):
-                logger.warning(f"Budget exceeded for task {options.task_id}, action: {item.action}")
+                logger.warning(
+                    f"Budget exceeded for task {options.task_id}, action: {item.action}"
+                )
                 # workforce decompose task don't use ListenAgent, this need return sse
                 if "workforce" in locals() and workforce is not None:
                     workforce.pause()
-                yield sse_json(Action.budget_not_enough, {"message": "budget not enouth"})
+                yield sse_json(Action.budget_not_enough,
+                               {"message": "budget not enouth"})
             else:
-                logger.error(f"ModelProcessingError for task {options.task_id}, action {item.action}: {e}", exc_info=True)
-                yield sse_json("error", {"message": str(e)})
-                if "workforce" in locals() and workforce is not None and workforce._running:
+                logger.error(
+                    f"ModelProcessingError for task {options.task_id}, action {item.action}: {e}",
+                    exc_info=True)
+                # Use error formatter to send properly formatted error to frontend
+                from app.service.error_handler import \
+                    prepare_model_error_response
+                error_payload, _, _ = prepare_model_error_response(
+                    e, options.project_id, options.task_id,
+                    f"action {item.action}")
+                yield sse_json("error", error_payload)
+
+                # Stop workforce if running
+                if "workforce" in locals(
+                ) and workforce is not None and workforce._running:
                     workforce.stop()
+
+                # Mark task as done
+                task_lock.status = Status.done
         except Exception as e:
-            logger.error(f"Unhandled exception for task {options.task_id}, action {item.action}: {e}", exc_info=True)
+            logger.error(
+                f"Unhandled exception for task {options.task_id}, action {item.action}: {e}",
+                exc_info=True)
             yield sse_json("error", {"message": str(e)})
             # Continue processing other items instead of breaking
 
@@ -1044,7 +1417,9 @@ async def install_mcp(
     mcp: ListenChatAgent,
     install_mcp: ActionInstallMcpData,
 ):
-    logger.info(f"Installing MCP tools: {list(install_mcp.data.get('mcpServers', {}).keys())}")
+    logger.info(
+        f"Installing MCP tools: {list(install_mcp.data.get('mcpServers', {}).keys())}"
+    )
     try:
         mcp.add_tools(await get_mcp_tools(install_mcp.data))
         logger.info("MCP tools installed successfully")
@@ -1054,8 +1429,10 @@ async def install_mcp(
 
 
 def to_sub_tasks(task: Task, summary_task_content: str):
-    logger.info(f"[TO-SUB-TASKS] 📋 Creating to_sub_tasks SSE event")
-    logger.info(f"[TO-SUB-TASKS] task.id={task.id}, summary={summary_task_content[:50]}..., subtasks_count={len(task.subtasks)}")
+    logger.info("[TO-SUB-TASKS] 📋 Creating to_sub_tasks SSE event")
+    logger.info(
+        f"[TO-SUB-TASKS] task.id={task.id}, summary={summary_task_content[:50]}..., subtasks_count={len(task.subtasks)}"
+    )
     result = sse_json(
         "to_sub_tasks",
         {
@@ -1063,29 +1440,25 @@ def to_sub_tasks(task: Task, summary_task_content: str):
             "sub_tasks": tree_sub_tasks(task.subtasks),
         },
     )
-    logger.info(f"[TO-SUB-TASKS] ✅ to_sub_tasks SSE event created")
+    logger.info("[TO-SUB-TASKS] ✅ to_sub_tasks SSE event created")
     return result
 
 
 def tree_sub_tasks(sub_tasks: list[Task], depth: int = 0):
     if depth > 5:
         return []
-    return (
-        chain(sub_tasks)
-        .filter(lambda x: x.content != "")
-        .map(
-            lambda x: {
-                "id": x.id,
-                "content": x.content,
-                "state": x.state,
-                "subtasks": tree_sub_tasks(x.subtasks, depth + 1),
-            }
-        )
-        .value()
-    )
-
-
-def update_sub_tasks(sub_tasks: list[Task], update_tasks: dict[str, TaskContent], depth: int = 0):
+    return (chain(sub_tasks).filter(lambda x: x.content != "").map(
+        lambda x: {
+            "id": x.id,
+            "content": x.content,
+            "state": x.state,
+            "subtasks": tree_sub_tasks(x.subtasks, depth + 1),
+        }).value())
+
+
+def update_sub_tasks(sub_tasks: list[Task],
+                     update_tasks: dict[str, TaskContent],
+                     depth: int = 0):
     if depth > 5:  # limit the depth of the recursion
         return []
 
@@ -1108,16 +1481,18 @@ def add_sub_tasks(camel_task: Task, update_tasks: list[TaskContent]):
                 Task(
                     content=item.content,
                     id=f"{camel_task.id}.{len(camel_task.subtasks) + 1}",
-                )
-            )
+                ))
 
 
-async def question_confirm(agent: ListenChatAgent, prompt: str, task_lock: TaskLock | None = None) -> bool:
+async def question_confirm(agent: ListenChatAgent,
+                           prompt: str,
+                           task_lock: TaskLock | None = None) -> bool:
     """Simple question confirmation - returns True for complex tasks, False for simple questions."""
 
     context_prompt = ""
     if task_lock:
-        context_prompt = build_conversation_context(task_lock, header="=== Previous Conversation ===")
+        context_prompt = build_conversation_context(
+            task_lock, header="=== Previous Conversation ===")
 
     full_prompt = f"""{context_prompt}User Query: {prompt}
 
@@ -1137,24 +1512,40 @@ async def question_confirm(agent: ListenChatAgent, prompt: str, task_lock: TaskL
         resp = agent.step(full_prompt)
 
         if not resp or not resp.msgs or len(resp.msgs) == 0:
-            logger.warning("No response from agent, defaulting to complex task")
+            logger.warning(
+                "No response from agent, defaulting to complex task")
             return True
 
         content = resp.msgs[0].content
         if not content:
-            logger.warning("Empty content from agent, defaulting to complex task")
+            logger.warning(
+                "Empty content from agent, defaulting to complex task")
             return True
 
         normalized = content.strip().lower()
         is_complex = "yes" in normalized
 
-        logger.info(f"Question confirm result: {'complex task' if is_complex else 'simple question'}",
-                   extra={"response": content, "is_complex": is_complex})
+        logger.info(
+            f"Question confirm result: {'complex task' if is_complex else 'simple question'}",
+            extra={
+                "response": content,
+                "is_complex": is_complex
+            })
 
         return is_complex
 
+    except ModelProcessingError as e:
+        logger.error(f"Model error in question_confirm: {e}")
+        raise ModelProcessingError(
+            f"Failed to determine task complexity due to model error: {str(e)}"
+        )
     except Exception as e:
         logger.error(f"Error in question_confirm: {e}")
+        # Check if this is an authentication/API key error
+        error_str = str(e).lower()
+        if "401" in error_str or "authentication" in error_str or "api key" in error_str or "unauthorized" in error_str:
+            # This is an API key error, raise it as ModelProcessingError so it gets caught properly
+            raise ModelProcessingError(f"Invalid API key: {str(e)}")
         return True
 
 
@@ -1179,7 +1570,9 @@ async def summary_task(agent: ListenChatAgent, task: Task) -> str:
         logger.info("Task summary generated", extra={"summary": summary})
         return summary
     except Exception as e:
-        logger.error("Error generating task summary", extra={"error": str(e)}, exc_info=True)
+        logger.error("Error generating task summary",
+                     extra={"error": str(e)},
+                     exc_info=True)
         raise
 
 
@@ -1224,12 +1617,15 @@ async def summary_subtasks_result(agent: ListenChatAgent, task: Task) -> str:
     res = agent.step(prompt)
     summary = res.msgs[0].content
 
-    logger.info(f"Generated subtasks summary for task {task.id} with {len(task.subtasks)} subtasks")
+    logger.info(
+        f"Generated subtasks summary for task {task.id} with {len(task.subtasks)} subtasks"
+    )
 
     return summary
 
 
-async def get_task_result_with_optional_summary(task: Task, options: Chat) -> str:
+async def get_task_result_with_optional_summary(task: Task,
+                                                options: Chat) -> str:
     """
     Get the task result, with LLM summary if there are multiple subtasks.
 
@@ -1243,10 +1639,13 @@ async def get_task_result_with_optional_summary(task: Task, options: Chat) -> st
     result = str(task.result or "")
 
     if task.subtasks and len(task.subtasks) > 1:
-        logger.info(f"Task {task.id} has {len(task.subtasks)} subtasks, generating summary")
+        logger.info(
+            f"Task {task.id} has {len(task.subtasks)} subtasks, generating summary"
+        )
         try:
             summary_agent = task_summary_agent(options)
-            summarized_result = await summary_subtasks_result(summary_agent, task)
+            summarized_result = await summary_subtasks_result(
+                summary_agent, task)
             result = summarized_result
             logger.info(f"Successfully generated summary for task {task.id}")
         except Exception as e:
@@ -1262,13 +1661,18 @@ async def get_task_result_with_optional_summary(task: Task, options: Chat) -> st
 
 
 @traceroot.trace()
-async def construct_workforce(options: Chat) -> tuple[Workforce, ListenChatAgent]:
+async def construct_workforce(
+        options: Chat) -> tuple[Workforce, ListenChatAgent]:
     """Construct a workforce with all required agents.
 
     This function creates all agents in PARALLEL to minimize startup time.
     Sync functions are run in thread pool, async functions are awaited concurrently.
     """
-    logger.debug("construct_workforce started", extra={"project_id": options.project_id, "task_id": options.task_id})
+    logger.debug("construct_workforce started",
+                 extra={
+                     "project_id": options.project_id,
+                     "task_id": options.task_id
+                 })
 
     # Store main event loop reference for thread-safe async task scheduling
     # This allows agent_model() to schedule tasks when called from worker threads
@@ -1288,21 +1692,23 @@ def _create_coordinator_and_task_agents() -> list[ListenChatAgent]:
                 prompt,
                 options,
                 [
-                    *(
-                        ToolkitMessageIntegration(
-                            message_handler=HumanToolkit(options.project_id, key).send_message_to_user
-                        ).register_toolkits(NoteTakingToolkit(options.project_id, working_directory=working_directory))
-                    ).get_tools()
+                    *(ToolkitMessageIntegration(message_handler=HumanToolkit(
+                        options.project_id, key
+                    ).send_message_to_user).register_toolkits(
+                        NoteTakingToolkit(
+                            options.project_id,
+                            working_directory=working_directory))).get_tools()
                 ],
-            )
-            for key, prompt in {
-                Agents.coordinator_agent: f"""
+            ) for key, prompt in {
+                Agents.coordinator_agent:
+                f"""
 You are a helpful coordinator.
 - You are now working in system {platform.system()} with architecture
 {platform.machine()} at working directory `{working_directory}`. All local file operations must occur here, but you can access files from any place in the file system. For all file system operations, you MUST use absolute paths to ensure precision and avoid ambiguity.
 The current date is {datetime.date.today()}. For any date-related tasks, you MUST use this as the current date.
             """,
-                Agents.task_agent: f"""
+                Agents.task_agent:
+                f"""
 You are a helpful task planner.
 - You are now working in system {platform.system()} with architecture
 {platform.machine()} at working directory `{working_directory}`. All local file operations must occur here, but you can access files from any place in the file system. For all file system operations, you MUST use absolute paths to ensure precision and avoid ambiguity.
@@ -1323,12 +1729,14 @@ def _create_new_worker_agent() -> ListenChatAgent:
         """,
             options,
             [
-                *HumanToolkit.get_can_use_tools(options.project_id, Agents.new_worker_agent),
-                *(
-                    ToolkitMessageIntegration(
-                        message_handler=HumanToolkit(options.project_id, Agents.new_worker_agent).send_message_to_user
-                    ).register_toolkits(NoteTakingToolkit(options.project_id, working_directory=working_directory))
-                ).get_tools(),
+                *HumanToolkit.get_can_use_tools(options.project_id,
+                                                Agents.new_worker_agent),
+                *(ToolkitMessageIntegration(message_handler=HumanToolkit(
+                    options.project_id, Agents.new_worker_agent
+                ).send_message_to_user).register_toolkits(
+                    NoteTakingToolkit(
+                        options.project_id,
+                        working_directory=working_directory))).get_tools(),
             ],
         )
 
@@ -1349,7 +1757,8 @@ def _create_new_worker_agent() -> ListenChatAgent:
             mcp_agent(options),
         )
     except Exception as e:
-        logger.error(f"Failed to create agents in parallel: {e}", exc_info=True)
+        logger.error(f"Failed to create agents in parallel: {e}",
+                     exc_info=True)
         raise
     finally:
         # Always clear event loop reference after parallel agent creation completes
@@ -1386,7 +1795,8 @@ def _create_new_worker_agent() -> ListenChatAgent:
         coordinator_agent=coordinator_agent,
         task_agent=task_agent,
         new_worker_agent=new_worker_agent,
-        use_structured_output_handler=False if model_platform_enum == ModelPlatformType.OPENAI else True,
+        use_structured_output_handler=False
+        if model_platform_enum == ModelPlatformType.OPENAI else True,
     )
 
     workforce.add_single_agent_worker(
@@ -1443,15 +1853,22 @@ def format_agent_description(agent_data: NewAgent | ActionNewAgent) -> str:
             tool_names.append(titleize(mcp_server))
 
     if tool_names:
-        description_parts.append(f"with access to {', '.join(tool_names)} tools : <{tool_names}>")
+        description_parts.append(
+            f"with access to {', '.join(tool_names)} tools : <{tool_names}>")
 
     return " ".join(description_parts)
 
 
 @traceroot.trace()
 async def new_agent_model(data: NewAgent | ActionNewAgent, options: Chat):
-    logger.info("Creating new agent", extra={"agent_name": data.name, "project_id": options.project_id, "task_id": options.task_id})
-    logger.debug("New agent data", extra={"agent_data": data.model_dump_json()})
+    logger.info("Creating new agent",
+                extra={
+                    "agent_name": data.name,
+                    "project_id": options.project_id,
+                    "task_id": options.task_id
+                })
+    logger.debug("New agent data",
+                 extra={"agent_data": data.model_dump_json()})
     working_directory = get_working_directory(options)
     tool_names = []
     tools = [*await get_toolkits(data.tools, data.name, options.project_id)]
@@ -1463,13 +1880,18 @@ async def new_agent_model(data: NewAgent | ActionNewAgent, options: Chat):
             tool_names.append(titleize(item))
     for item in tools:
         logger.debug(f"Agent {data.name} tool: {item.func.__name__}")
-    logger.info(f"Agent {data.name} created with {len(tools)} tools: {tool_names}")
+    logger.info(
+        f"Agent {data.name} created with {len(tools)} tools: {tool_names}")
     # Enhanced system message with platform information
     enhanced_description = f"""{data.description}
 - You are now working in system {platform.system()} with architecture
 {platform.machine()} at working directory `{working_directory}`. All local file operations must occur here, but you can access files from any place in the file system. For all file system operations, you MUST use absolute paths to ensure precision and avoid ambiguity.
-The current date is {datetime.date.today()}. For any date-related tasks, you 
+The current date is {datetime.date.today()}. For any date-related tasks, you
 MUST use this as the current date.
 """
 
-    return agent_model(data.name, enhanced_description, options, tools, tool_names=tool_names)
+    return agent_model(data.name,
+                       enhanced_description,
+                       options,
+                       tools,
+                       tool_names=tool_names)
diff --git a/backend/app/service/error_handler.py b/backend/app/service/error_handler.py
new file mode 100644
index 000000000..45154330c
--- /dev/null
+++ b/backend/app/service/error_handler.py
@@ -0,0 +1,50 @@
+from camel.models import ModelProcessingError
+from app.component.error_format import normalize_error_to_openai_format
+from utils import traceroot_wrapper as traceroot
+
+logger = traceroot.get_logger("error_handler")
+
+
+def prepare_model_error_response(
+    error: ModelProcessingError,
+    project_id: str,
+    task_id: str,
+    context: str = "task decomposition"
+) -> tuple[dict, str, str | None]:
+    """Prepare error response for ModelProcessingError.
+
+    This function normalizes the error and prepares the payload for frontend notification.
+
+    Args:
+        error: The ModelProcessingError to handle
+        project_id: Project ID for logging
+        task_id: Task ID for logging
+        context: Description of where the error occurred (for logging)
+
+    Returns:
+        tuple: (error_payload, message, error_code)
+            - error_payload: SSE error payload ready to send to frontend
+            - message: Human-readable error message
+            - error_code: Error code (e.g., "invalid_api_key")
+    """
+    message, error_code, error_obj = normalize_error_to_openai_format(error)
+
+    logger.error(
+        f"{context.capitalize()} failed due to model error: {message}",
+        extra={
+            "project_id": project_id,
+            "task_id": task_id,
+            "error_code": error_code,
+            "error": str(error)
+        },
+        exc_info=True
+    )
+
+    # Prepare error payload
+    error_payload = {
+        "message": message,
+        "error_code": error_code,
+        "error": error_obj
+    }
+
+    return error_payload, message, error_code
diff --git a/backend/app/utils/agent.py b/backend/app/utils/agent.py
index 91ef926ea..39af318c8 100644
--- a/backend/app/utils/agent.py
+++ b/backend/app/utils/agent.py
@@ -3,17 +3,18 @@
 import json
 import os
 import platform
+import uuid
 from threading import Event, Lock
-import traceback
 from typing import Any, Callable, Dict, List, Tuple
-import uuid
+
 from utils import traceroot_wrapper as traceroot
 
 # Thread-safe reference to main event loop using contextvars
 # This ensures each request has its own event loop reference, avoiding race conditions
-_main_event_loop_var: contextvars.ContextVar[asyncio.AbstractEventLoop | None] = contextvars.ContextVar(
-    '_main_event_loop', default=None
-)
+_main_event_loop_var: contextvars.ContextVar[asyncio.AbstractEventLoop
+                                             | None] = contextvars.ContextVar(
+                                                 '_main_event_loop',
+                                                 default=None)
 
 # Global fallback for main event loop reference
 # Used when contextvars don't propagate to worker threads (e.g., asyncio.to_thread)
@@ -59,110 +60,97 @@ def _schedule_async_task(coro):
                 "No event loop available for async task scheduling, task skipped. "
                 "Ensure set_main_event_loop() is called before parallel agent creation."
             )
-from camel.agents import ChatAgent
-from camel.agents.chat_agent import (
-    StreamingChatAgentResponse,
-    AsyncStreamingChatAgentResponse,
-)
-from camel.agents._types import ToolCallRequest
-from camel.memories import AgentMemory
-from camel.messages import BaseMessage
-from camel.models import (
-    BaseModelBackend,
-    ModelFactory,
-    ModelManager,
-    OpenAIAudioModels,
-    ModelProcessingError,
-)
-from camel.responses import ChatAgentResponse
-from camel.terminators import ResponseTerminator
-from camel.toolkits import FunctionTool, RegisteredAgentToolkit
-from camel.types.agents import ToolCallingRecord
+
+
+import datetime
+
 from app.component.environment import env
+from app.model.chat import Chat, McpServers
 from app.utils.file_utils import get_working_directory
 from app.utils.toolkit.abstract_toolkit import AbstractToolkit
-from app.utils.toolkit.hybrid_browser_toolkit import HybridBrowserToolkit
+from app.utils.toolkit.audio_analysis_toolkit import AudioAnalysisToolkit
 from app.utils.toolkit.excel_toolkit import ExcelToolkit
 from app.utils.toolkit.file_write_toolkit import FileToolkit
+from app.utils.toolkit.github_toolkit import GithubToolkit
 from app.utils.toolkit.google_calendar_toolkit import GoogleCalendarToolkit
 from app.utils.toolkit.google_drive_mcp_toolkit import GoogleDriveMCPToolkit
 from app.utils.toolkit.google_gmail_mcp_toolkit import GoogleGmailMCPToolkit
 from app.utils.toolkit.human_toolkit import HumanToolkit
+from app.utils.toolkit.hybrid_browser_toolkit import HybridBrowserToolkit
+from app.utils.toolkit.image_analysis_toolkit import ImageAnalysisToolkit
+from app.utils.toolkit.lark_toolkit import LarkToolkit
+from app.utils.toolkit.linkedin_toolkit import LinkedInToolkit
 from app.utils.toolkit.markitdown_toolkit import MarkItDownToolkit
 from app.utils.toolkit.mcp_search_toolkit import McpSearchToolkit
 from app.utils.toolkit.note_taking_toolkit import NoteTakingToolkit
 from app.utils.toolkit.notion_mcp_toolkit import NotionMCPToolkit
+from app.utils.toolkit.openai_image_toolkit import OpenAIImageToolkit
 from app.utils.toolkit.pptx_toolkit import PPTXToolkit
+from app.utils.toolkit.reddit_toolkit import RedditToolkit
 from app.utils.toolkit.screenshot_toolkit import ScreenshotToolkit
-from app.utils.toolkit.terminal_toolkit import TerminalToolkit
-from app.utils.toolkit.github_toolkit import GithubToolkit
 from app.utils.toolkit.search_toolkit import SearchToolkit
-from app.utils.toolkit.video_download_toolkit import VideoDownloaderToolkit
-from app.utils.toolkit.audio_analysis_toolkit import AudioAnalysisToolkit
+from app.utils.toolkit.slack_toolkit import SlackToolkit
+from app.utils.toolkit.terminal_toolkit import TerminalToolkit
+from app.utils.toolkit.twitter_toolkit import TwitterToolkit
 from app.utils.toolkit.video_analysis_toolkit import VideoAnalysisToolkit
-from app.utils.toolkit.image_analysis_toolkit import ImageAnalysisToolkit
-from app.utils.toolkit.openai_image_toolkit import OpenAIImageToolkit
+from app.utils.toolkit.video_download_toolkit import VideoDownloaderToolkit
 from app.utils.toolkit.web_deploy_toolkit import WebDeployToolkit
 from app.utils.toolkit.whatsapp_toolkit import WhatsAppToolkit
-from app.utils.toolkit.twitter_toolkit import TwitterToolkit
-from app.utils.toolkit.linkedin_toolkit import LinkedInToolkit
-from app.utils.toolkit.reddit_toolkit import RedditToolkit
-from app.utils.toolkit.slack_toolkit import SlackToolkit
-from app.utils.toolkit.lark_toolkit import LarkToolkit
+from camel.agents import ChatAgent
+from camel.agents._types import ToolCallRequest
+from camel.agents.chat_agent import (AsyncStreamingChatAgentResponse,
+                                     StreamingChatAgentResponse)
+from camel.memories import AgentMemory
+from camel.messages import BaseMessage
+from camel.models import (BaseModelBackend, ModelFactory, ModelManager,
+                          ModelProcessingError, OpenAIAudioModels)
+from camel.responses import ChatAgentResponse
+from camel.terminators import ResponseTerminator
+from camel.toolkits import (FunctionTool, MCPToolkit, RegisteredAgentToolkit,
+                            ToolkitMessageIntegration)
 from camel.types import ModelPlatformType, ModelType
-from camel.toolkits import MCPToolkit, ToolkitMessageIntegration
-import datetime
+from camel.types.agents import ToolCallingRecord
 from pydantic import BaseModel
-from app.model.chat import Chat, McpServers
 
 # Create traceroot logger for agent tracking
 traceroot_logger = traceroot.get_logger("agent")
-from app.service.task import (
-    Action,
-    ActionActivateAgentData,
-    ActionActivateToolkitData,
-    ActionBudgetNotEnough,
-    ActionCreateAgentData,
-    ActionDeactivateAgentData,
-    ActionDeactivateToolkitData,
-    Agents,
-    get_task_lock,
-)
-from app.service.task import set_process_task
+from app.service.task import (Action, ActionActivateAgentData,
+                              ActionActivateToolkitData, ActionBudgetNotEnough,
+                              ActionCreateAgentData, ActionDeactivateAgentData,
+                              ActionDeactivateToolkitData, Agents,
+                              get_task_lock, set_process_task)
 
 NOW_STR = datetime.datetime.now().strftime("%Y-%m-%d %H:00:00")
 
 
 class ListenChatAgent(ChatAgent):
+
     @traceroot.trace()
     def __init__(
         self,
         api_task_id: str,
         agent_name: str,
         system_message: BaseMessage | str | None = None,
-        model: (
-            BaseModelBackend
-            | ModelManager
-            | Tuple[str, str]
-            | str
-            | ModelType
-            | Tuple[ModelPlatformType, ModelType]
-            | List[BaseModelBackend]
-            | List[str]
-            | List[ModelType]
-            | List[Tuple[str, str]]
-            | List[Tuple[ModelPlatformType, ModelType]]
-            | None
-        ) = None,
+        model: (BaseModelBackend
+                | ModelManager
+                | Tuple[str, str]
+                | str
+                | ModelType
+                | Tuple[ModelPlatformType, ModelType]
+                | List[BaseModelBackend]
+                | List[str]
+                | List[ModelType]
+                | List[Tuple[str, str]]
+                | List[Tuple[ModelPlatformType, ModelType]]
+                | None) = None,
         memory: AgentMemory | None = None,
         message_window_size: int | None = None,
         token_limit: int | None = None,
         output_language: str | None = None,
         tools: List[FunctionTool | Callable[..., Any]] | None = None,
         toolkits_to_register_agent: List[RegisteredAgentToolkit] | None = None,
-        external_tools: (
-            List[FunctionTool | Callable[..., Any] | Dict[str, Any]] | None
-        ) = None,
+        external_tools: (List[FunctionTool | Callable[..., Any]
+                              | Dict[str, Any]] | None) = None,
         response_terminators: List[ResponseTerminator] | None = None,
         scheduling_strategy: str = "round_robin",
         max_iteration: int | None = None,
@@ -213,20 +201,16 @@ def step(
         task_lock = get_task_lock(self.api_task_id)
         asyncio.create_task(
             task_lock.put_queue(
-                ActionActivateAgentData(
-                    data={
-                        "agent_name": self.agent_name,
-                        "process_task_id": self.process_task_id,
-                        "agent_id": self.agent_id,
-                        "message": (
-                            input_message.content
-                            if isinstance(input_message, BaseMessage)
-                            else input_message
-                        ),
-                    },
-                )
-            )
-        )
+                ActionActivateAgentData(data={
+                    "agent_name":
+                    self.agent_name,
+                    "process_task_id":
+                    self.process_task_id,
+                    "agent_id":
+                    self.agent_id,
+                    "message": (input_message.content if isinstance(
+                        input_message, BaseMessage) else input_message),
+                }, )))
         error_info = None
         message = None
         res = None
@@ -240,20 +224,21 @@ def step(
             error_info = e
             if "Budget has been exceeded" in str(e):
                 message = "Budget has been exceeded"
-                traceroot_logger.warning(f"Agent {self.agent_name} budget exceeded")
-                asyncio.create_task(task_lock.put_queue(ActionBudgetNotEnough()))
+                traceroot_logger.warning(
+                    f"Agent {self.agent_name} budget exceeded")
+                asyncio.create_task(
+                    task_lock.put_queue(ActionBudgetNotEnough()))
             else:
                 message = str(e)
                 traceroot_logger.error(
-                    f"Agent {self.agent_name} model processing error: {e}"
-                )
+                    f"Agent {self.agent_name} model processing error: {e}")
             total_tokens = 0
         except Exception as e:
             res = None
             error_info = e
             traceroot_logger.error(
-                f"Agent {self.agent_name} unexpected error in step: {e}", exc_info=True
-            )
+                f"Agent {self.agent_name} unexpected error in step: {e}",
+                exc_info=True)
             message = f"Error processing message: {e!s}"
             total_tokens = 0
 
@@ -274,32 +259,34 @@ def _stream_with_deactivate():
                     finally:
                         total_tokens = 0
                         if last_response:
-                            usage_info = (
-                                last_response.info.get("usage")
-                                or last_response.info.get("token_usage")
-                                or {}
-                            )
+                            usage_info = (last_response.info.get("usage") or
+                                          last_response.info.get("token_usage")
+                                          or {})
                             if usage_info:
-                                total_tokens = usage_info.get("total_tokens", 0)
+                                total_tokens = usage_info.get(
+                                    "total_tokens", 0)
                         asyncio.create_task(
                             task_lock.put_queue(
-                                ActionDeactivateAgentData(
-                                    data={
-                                        "agent_name": self.agent_name,
-                                        "process_task_id": self.process_task_id,
-                                        "agent_id": self.agent_id,
-                                        "message": accumulated_content,
-                                        "tokens": total_tokens,
-                                    },
-                                )
-                            )
-                        )
+                                ActionDeactivateAgentData(data={
+                                    "agent_name":
+                                    self.agent_name,
+                                    "process_task_id":
+                                    self.process_task_id,
+                                    "agent_id":
+                                    self.agent_id,
+                                    "message":
+                                    accumulated_content,
+                                    "tokens":
+                                    total_tokens,
+                                }, )))
 
                 return StreamingChatAgentResponse(_stream_with_deactivate())
 
             message = res.msg.content if res.msg else ""
-            usage_info = res.info.get("usage") or res.info.get("token_usage") or {}
-            total_tokens = usage_info.get("total_tokens", 0) if usage_info else 0
+            usage_info = res.info.get("usage") or res.info.get(
+                "token_usage") or {}
+            total_tokens = usage_info.get("total_tokens",
+                                          0) if usage_info else 0
             traceroot_logger.info(
                 f"Agent {self.agent_name} completed step, tokens used: {total_tokens}"
             )
@@ -308,17 +295,13 @@ def _stream_with_deactivate():
 
         asyncio.create_task(
             task_lock.put_queue(
-                ActionDeactivateAgentData(
-                    data={
-                        "agent_name": self.agent_name,
-                        "process_task_id": self.process_task_id,
-                        "agent_id": self.agent_id,
-                        "message": message,
-                        "tokens": total_tokens,
-                    },
-                )
-            )
-        )
+                ActionDeactivateAgentData(data={
+                    "agent_name": self.agent_name,
+                    "process_task_id": self.process_task_id,
+                    "agent_id": self.agent_id,
+                    "message": message,
+                    "tokens": total_tokens,
+                }, )))
 
         if error_info is not None:
             raise error_info
@@ -336,17 +319,16 @@ async def astep(
             ActionActivateAgentData(
                 action=Action.activate_agent,
                 data={
-                    "agent_name": self.agent_name,
-                    "process_task_id": self.process_task_id,
-                    "agent_id": self.agent_id,
-                    "message": (
-                        input_message.content
-                        if isinstance(input_message, BaseMessage)
-                        else input_message
-                    ),
+                    "agent_name":
+                    self.agent_name,
+                    "process_task_id":
+                    self.process_task_id,
+                    "agent_id":
+                    self.agent_id,
+                    "message": (input_message.content if isinstance(
+                        input_message, BaseMessage) else input_message),
                 },
-            )
-        )
+            ))
 
         error_info = None
         message = None
@@ -364,13 +346,14 @@ async def astep(
             error_info = e
             if "Budget has been exceeded" in str(e):
                 message = "Budget has been exceeded"
-                traceroot_logger.warning(f"Agent {self.agent_name} budget exceeded")
-                asyncio.create_task(task_lock.put_queue(ActionBudgetNotEnough()))
+                traceroot_logger.warning(
+                    f"Agent {self.agent_name} budget exceeded")
+                asyncio.create_task(
+                    task_lock.put_queue(ActionBudgetNotEnough()))
             else:
                 message = str(e)
                 traceroot_logger.error(
-                    f"Agent {self.agent_name} model processing error: {e}"
-                )
+                    f"Agent {self.agent_name} model processing error: {e}")
             total_tokens = 0
         except Exception as e:
             res = None
@@ -393,17 +376,13 @@ async def astep(
 
         asyncio.create_task(
             task_lock.put_queue(
-                ActionDeactivateAgentData(
-                    data={
-                        "agent_name": self.agent_name,
-                        "process_task_id": self.process_task_id,
-                        "agent_id": self.agent_id,
-                        "message": message,
-                        "tokens": total_tokens,
-                    },
-                )
-            )
-        )
+                ActionDeactivateAgentData(data={
+                    "agent_name": self.agent_name,
+                    "process_task_id": self.process_task_id,
+                    "agent_id": self.agent_id,
+                    "message": message,
+                    "tokens": total_tokens,
+                }, )))
 
         if error_info is not None:
             raise error_info
@@ -411,7 +390,8 @@ async def astep(
         return res
 
     @traceroot.trace()
-    def _execute_tool(self, tool_call_request: ToolCallRequest) -> ToolCallingRecord:
+    def _execute_tool(self,
+                      tool_call_request: ToolCallRequest) -> ToolCallingRecord:
         func_name = tool_call_request.tool_name
         tool: FunctionTool = self._internal_tools[func_name]
         # Route async functions to async execution even if they have __wrapped__
@@ -432,11 +412,8 @@ def _execute_tool(self, tool_call_request: ToolCallRequest) -> ToolCallingRecord
         try:
             task_lock = get_task_lock(self.api_task_id)
 
-            toolkit_name = (
-                getattr(tool, "_toolkit_name")
-                if hasattr(tool, "_toolkit_name")
-                else "mcp_toolkit"
-            )
+            toolkit_name = (getattr(tool, "_toolkit_name") if hasattr(
+                tool, "_toolkit_name") else "mcp_toolkit")
             traceroot_logger.debug(
                 f"Agent {self.agent_name} executing tool: {func_name} from toolkit: {toolkit_name} with args: {json.dumps(args, ensure_ascii=False)}"
             )
@@ -445,17 +422,18 @@ def _execute_tool(self, tool_call_request: ToolCallRequest) -> ToolCallingRecord
             if not has_listen_decorator:
                 asyncio.create_task(
                     task_lock.put_queue(
-                        ActionActivateToolkitData(
-                            data={
-                                "agent_name": self.agent_name,
-                                "process_task_id": self.process_task_id,
-                                "toolkit_name": toolkit_name,
-                                "method_name": func_name,
-                                "message": json.dumps(args, ensure_ascii=False),
-                            },
-                        )
-                    )
-                )
+                        ActionActivateToolkitData(data={
+                            "agent_name":
+                            self.agent_name,
+                            "process_task_id":
+                            self.process_task_id,
+                            "toolkit_name":
+                            toolkit_name,
+                            "method_name":
+                            func_name,
+                            "message":
+                            json.dumps(args, ensure_ascii=False),
+                        }, )))
             # Set process_task context for all tool executions
             with set_process_task(self.process_task_id):
                 raw_result = tool(**args)
@@ -464,8 +442,7 @@ def _execute_tool(self, tool_call_request: ToolCallRequest) -> ToolCallingRecord
                 self._secure_result_store[tool_call_id] = raw_result
                 result = (
                     "[The tool has been executed successfully, but the output"
-                    " from the tool is masked. You can move forward]"
-                )
+                    " from the tool is masked. You can move forward]")
                 mask_flag = True
             else:
                 result = raw_result
@@ -478,8 +455,8 @@ def _execute_tool(self, tool_call_request: ToolCallRequest) -> ToolCallingRecord
                 MAX_RESULT_LENGTH = 500
                 if len(result_str) > MAX_RESULT_LENGTH:
                     result_msg = (
-                        result_str[:MAX_RESULT_LENGTH]
-                        + f"... (truncated, total length: {len(result_str)} chars)"
+                        result_str[:MAX_RESULT_LENGTH] +
+                        f"... (truncated, total length: {len(result_str)} chars)"
                     )
                 else:
                     result_msg = result_str
@@ -488,25 +465,20 @@ def _execute_tool(self, tool_call_request: ToolCallRequest) -> ToolCallingRecord
             if not has_listen_decorator:
                 asyncio.create_task(
                     task_lock.put_queue(
-                        ActionDeactivateToolkitData(
-                            data={
-                                "agent_name": self.agent_name,
-                                "process_task_id": self.process_task_id,
-                                "toolkit_name": toolkit_name,
-                                "method_name": func_name,
-                                "message": result_msg,
-                            },
-                        )
-                    )
-                )
+                        ActionDeactivateToolkitData(data={
+                            "agent_name": self.agent_name,
+                            "process_task_id": self.process_task_id,
+                            "toolkit_name": toolkit_name,
+                            "method_name": func_name,
+                            "message": result_msg,
+                        }, )))
         except Exception as e:
             # Capture the error message to prevent framework crash
             error_msg = f"Error executing tool '{func_name}': {e!s}"
             result = f"Tool execution failed: {error_msg}"
             mask_flag = False
             traceroot_logger.error(
-                f"Tool execution failed for {func_name}: {e}", exc_info=True
-            )
+                f"Tool execution failed for {func_name}: {e}", exc_info=True)
 
         return self._record_tool_calling(
             func_name,
@@ -519,8 +491,7 @@ def _execute_tool(self, tool_call_request: ToolCallRequest) -> ToolCallingRecord
 
     @traceroot.trace()
     async def _aexecute_tool(
-        self, tool_call_request: ToolCallRequest
-    ) -> ToolCallingRecord:
+            self, tool_call_request: ToolCallRequest) -> ToolCallingRecord:
         func_name = tool_call_request.tool_name
         tool: FunctionTool = self._internal_tools[func_name]
 
@@ -537,24 +508,20 @@ async def _aexecute_tool(
             toolkit_name = tool._toolkit_name
 
         # Method 2: For MCP tools, check if func has __self__ (the toolkit instance)
-        if (
-            not toolkit_name
-            and hasattr(tool, "func")
-            and hasattr(tool.func, "__self__")
-        ):
+        if (not toolkit_name and hasattr(tool, "func")
+                and hasattr(tool.func, "__self__")):
             toolkit_instance = tool.func.__self__
             if hasattr(toolkit_instance, "toolkit_name") and callable(
-                toolkit_instance.toolkit_name
-            ):
+                    toolkit_instance.toolkit_name):
                 toolkit_name = toolkit_instance.toolkit_name()
 
         # Method 3: Check if tool.func is a bound method with toolkit
         if not toolkit_name and hasattr(tool, "func"):
-            if hasattr(tool.func, "func") and hasattr(tool.func.func, "__self__"):
+            if hasattr(tool.func, "func") and hasattr(tool.func.func,
+                                                      "__self__"):
                 toolkit_instance = tool.func.func.__self__
                 if hasattr(toolkit_instance, "toolkit_name") and callable(
-                    toolkit_instance.toolkit_name
-                ):
+                        toolkit_instance.toolkit_name):
                     toolkit_name = toolkit_instance.toolkit_name()
 
         # Default fallback
@@ -572,16 +539,18 @@ async def _aexecute_tool(
         # Only send activate event if tool is NOT wrapped by @listen_toolkit
         if not has_listen_decorator:
             await task_lock.put_queue(
-                ActionActivateToolkitData(
-                    data={
-                        "agent_name": self.agent_name,
-                        "process_task_id": self.process_task_id,
-                        "toolkit_name": toolkit_name,
-                        "method_name": func_name,
-                        "message": json.dumps(args, ensure_ascii=False),
-                    },
-                )
-            )
+                ActionActivateToolkitData(data={
+                    "agent_name":
+                    self.agent_name,
+                    "process_task_id":
+                    self.process_task_id,
+                    "toolkit_name":
+                    toolkit_name,
+                    "method_name":
+                    func_name,
+                    "message":
+                    json.dumps(args, ensure_ascii=False),
+                }, ))
         try:
             # Set process_task context for all tool executions
             with set_process_task(self.process_task_id):
@@ -611,7 +580,8 @@ async def _aexecute_tool(
                         # Async tool: use async_call
                         result = await tool.async_call(**args)
 
-                elif hasattr(tool, "func") and asyncio.iscoroutinefunction(tool.func):
+                elif hasattr(tool, "func") and asyncio.iscoroutinefunction(
+                        tool.func):
                     # Case: tool wraps a direct async function
                     result = await tool.func(**args)
 
@@ -632,8 +602,8 @@ async def _aexecute_tool(
             error_msg = f"Error executing async tool '{func_name}': {e!s}"
             result = {"error": error_msg}
             traceroot_logger.error(
-                f"Async tool execution failed for {func_name}: {e}", exc_info=True
-            )
+                f"Async tool execution failed for {func_name}: {e}",
+                exc_info=True)
 
         # Prepare result message with truncation
         if isinstance(result, str):
@@ -643,25 +613,21 @@ async def _aexecute_tool(
             MAX_RESULT_LENGTH = 500
             if len(result_str) > MAX_RESULT_LENGTH:
                 result_msg = (
-                    result_str[:MAX_RESULT_LENGTH]
-                    + f"... (truncated, total length: {len(result_str)} chars)"
-                )
+                    result_str[:MAX_RESULT_LENGTH] +
+                    f"... (truncated, total length: {len(result_str)} chars)")
             else:
                 result_msg = result_str
 
         # Only send deactivate event if tool is NOT wrapped by @listen_toolkit
         if not has_listen_decorator:
             await task_lock.put_queue(
-                ActionDeactivateToolkitData(
-                    data={
-                        "agent_name": self.agent_name,
-                        "process_task_id": self.process_task_id,
-                        "toolkit_name": toolkit_name,
-                        "method_name": func_name,
-                        "message": result_msg,
-                    },
-                )
-            )
+                ActionDeactivateToolkitData(data={
+                    "agent_name": self.agent_name,
+                    "process_task_id": self.process_task_id,
+                    "toolkit_name": toolkit_name,
+                    "method_name": func_name,
+                    "message": result_msg,
+                }, ))
         return self._record_tool_calling(
             func_name,
             args,
@@ -685,13 +651,17 @@ def clone(self, with_memory: bool = False) -> ChatAgent:
             model=self.model_backend.models,  # Pass the existing model_backend
             memory=None,  # clone memory later
             message_window_size=getattr(self.memory, "window_size", None),
-            token_limit=getattr(self.memory.get_context_creator(), "token_limit", None),
+            token_limit=getattr(self.memory.get_context_creator(),
+                                "token_limit", None),
             output_language=self._output_language,
             tools=cloned_tools,
             toolkits_to_register_agent=toolkits_to_register,
-            external_tools=[schema for schema in self._external_tool_schemas.values()],
+            external_tools=[
+                schema for schema in self._external_tool_schemas.values()
+            ],
             response_terminators=self.response_terminators,
-            scheduling_strategy=self.model_backend.scheduling_strategy.__name__,
+            scheduling_strategy=self.model_backend.scheduling_strategy.
+            __name__,
             max_iteration=self.max_iteration,
             stop_event=self.stop_event,
             tool_execution_timeout=self.tool_execution_timeout,
@@ -740,10 +710,7 @@ def agent_model(
                     "agent_name": agent_name,
                     "agent_id": agent_id,
                     "tools": tool_names or [],
-                }
-            )
-        )
-    )
+                })))
 
     # Build model config, defaulting to streaming for planner
     extra_params = options.extra_params or {}
@@ -783,13 +750,14 @@ def agent_model(
         model_config["stream"] = True
     if agent_name == Agents.browser_agent:
         try:
-            model_platform_enum = ModelPlatformType(options.model_platform.lower())
+            model_platform_enum = ModelPlatformType(
+                options.model_platform.lower())
             if model_platform_enum in {
-                ModelPlatformType.OPENAI,
-                ModelPlatformType.AZURE,
-                ModelPlatformType.OPENAI_COMPATIBLE_MODEL,
-                ModelPlatformType.LITELLM,
-                ModelPlatformType.OPENROUTER,
+                    ModelPlatformType.OPENAI,
+                    ModelPlatformType.AZURE,
+                    ModelPlatformType.OPENAI_COMPATIBLE_MODEL,
+                    ModelPlatformType.LITELLM,
+                    ModelPlatformType.OPENROUTER,
             }:
                 model_config["parallel_tool_calls"] = False
         except (ValueError, AttributeError):
@@ -799,10 +767,15 @@ def agent_model(
             )
             model_platform_enum = None
 
+    # TEMPORARY: Force fake API key to test error handling
+    fake_api_key = "sk-fake-invalid-key-for-testing-12345"  # TODO: Change back to options.api_key
+    traceroot_logger.error(
+        f"🔧 TESTING: Agent '{agent_name}' using fake API key: {fake_api_key}")
+
     model = ModelFactory.create(
         model_platform=options.model_platform,
         model_type=options.model_type,
-        api_key=options.api_key,
+        api_key=fake_api_key,
         url=options.api_url,
         model_config_dict=model_config or None,
         timeout=600,  # 10 minutes
@@ -849,9 +822,7 @@ async def developer_agent(options: Chat):
     )
     message_integration = ToolkitMessageIntegration(
         message_handler=HumanToolkit(
-            options.project_id, Agents.developer_agent
-        ).send_message_to_user
-    )
+            options.project_id, Agents.developer_agent).send_message_to_user)
     note_toolkit = NoteTakingToolkit(
         api_task_id=options.project_id,
         agent_name=Agents.developer_agent,
@@ -859,11 +830,12 @@ async def developer_agent(options: Chat):
     )
     note_toolkit = message_integration.register_toolkits(note_toolkit)
     web_deploy_toolkit = WebDeployToolkit(api_task_id=options.project_id)
-    web_deploy_toolkit = message_integration.register_toolkits(web_deploy_toolkit)
-    screenshot_toolkit = ScreenshotToolkit(
-        options.project_id, working_directory=working_directory
-    )
-    screenshot_toolkit = message_integration.register_toolkits(screenshot_toolkit)
+    web_deploy_toolkit = message_integration.register_toolkits(
+        web_deploy_toolkit)
+    screenshot_toolkit = ScreenshotToolkit(options.project_id,
+                                           working_directory=working_directory)
+    screenshot_toolkit = message_integration.register_toolkits(
+        screenshot_toolkit)
 
     terminal_toolkit = TerminalToolkit(
         options.project_id,
@@ -875,7 +847,8 @@ async def developer_agent(options: Chat):
     terminal_toolkit = message_integration.register_toolkits(terminal_toolkit)
 
     tools = [
-        *HumanToolkit.get_can_use_tools(options.project_id, Agents.developer_agent),
+        *HumanToolkit.get_can_use_tools(options.project_id,
+                                        Agents.developer_agent),
         *note_toolkit.get_tools(),
         *web_deploy_toolkit.get_tools(),
         *terminal_toolkit.get_tools(),
@@ -1041,9 +1014,7 @@ def browser_agent(options: Chat):
 
     message_integration = ToolkitMessageIntegration(
         message_handler=HumanToolkit(
-            options.project_id, Agents.browser_agent
-        ).send_message_to_user
-    )
+            options.project_id, Agents.browser_agent).send_message_to_user)
 
     web_toolkit_custom = HybridBrowserToolkit(
         options.project_id,
@@ -1073,7 +1044,8 @@ def browser_agent(options: Chat):
 
     # Save reference before registering for toolkits_to_register_agent
     web_toolkit_for_agent_registration = web_toolkit_custom
-    web_toolkit_custom = message_integration.register_toolkits(web_toolkit_custom)
+    web_toolkit_custom = message_integration.register_toolkits(
+        web_toolkit_custom)
 
     terminal_toolkit = TerminalToolkit(
         options.project_id,
@@ -1083,12 +1055,11 @@ def browser_agent(options: Chat):
         clone_current_env=True,
     )
     terminal_toolkit = message_integration.register_functions(
-        [terminal_toolkit.shell_exec]
-    )
+        [terminal_toolkit.shell_exec])
 
-    note_toolkit = NoteTakingToolkit(
-        options.project_id, Agents.browser_agent, working_directory=working_directory
-    )
+    note_toolkit = NoteTakingToolkit(options.project_id,
+                                     Agents.browser_agent,
+                                     working_directory=working_directory)
     note_toolkit = message_integration.register_toolkits(note_toolkit)
 
     search_tools = SearchToolkit.get_can_use_tools(options.project_id)
@@ -1098,7 +1069,8 @@ def browser_agent(options: Chat):
         search_tools = []
 
     tools = [
-        *HumanToolkit.get_can_use_tools(options.project_id, Agents.browser_agent),
+        *HumanToolkit.get_can_use_tools(options.project_id,
+                                        Agents.browser_agent),
         *web_toolkit_custom.get_tools(),
         *terminal_toolkit,
         *note_toolkit.get_tools(),
@@ -1246,24 +1218,22 @@ async def document_agent(options: Chat):
     )
 
     message_integration = ToolkitMessageIntegration(
-        message_handler=HumanToolkit(
-            options.project_id, Agents.task_agent
-        ).send_message_to_user
-    )
-    file_write_toolkit = FileToolkit(
-        options.project_id, working_directory=working_directory
-    )
-    pptx_toolkit = PPTXToolkit(options.project_id, working_directory=working_directory)
+        message_handler=HumanToolkit(options.project_id,
+                                     Agents.task_agent).send_message_to_user)
+    file_write_toolkit = FileToolkit(options.project_id,
+                                     working_directory=working_directory)
+    pptx_toolkit = PPTXToolkit(options.project_id,
+                               working_directory=working_directory)
     pptx_toolkit = message_integration.register_toolkits(pptx_toolkit)
     mark_it_down_toolkit = MarkItDownToolkit(options.project_id)
-    mark_it_down_toolkit = message_integration.register_toolkits(mark_it_down_toolkit)
-    excel_toolkit = ExcelToolkit(
-        options.project_id, working_directory=working_directory
-    )
+    mark_it_down_toolkit = message_integration.register_toolkits(
+        mark_it_down_toolkit)
+    excel_toolkit = ExcelToolkit(options.project_id,
+                                 working_directory=working_directory)
     excel_toolkit = message_integration.register_toolkits(excel_toolkit)
-    note_toolkit = NoteTakingToolkit(
-        options.project_id, Agents.document_agent, working_directory=working_directory
-    )
+    note_toolkit = NoteTakingToolkit(options.project_id,
+                                     Agents.document_agent,
+                                     working_directory=working_directory)
     note_toolkit = message_integration.register_toolkits(note_toolkit)
 
     terminal_toolkit = TerminalToolkit(
@@ -1276,13 +1246,13 @@ async def document_agent(options: Chat):
     terminal_toolkit = message_integration.register_toolkits(terminal_toolkit)
 
     google_drive_tools = await GoogleDriveMCPToolkit.get_can_use_tools(
-        options.project_id, options.get_bun_env()
-    )
+        options.project_id, options.get_bun_env())
 
     tools = [
         *file_write_toolkit.get_tools(),
         *pptx_toolkit.get_tools(),
-        *HumanToolkit.get_can_use_tools(options.project_id, Agents.document_agent),
+        *HumanToolkit.get_can_use_tools(options.project_id,
+                                        Agents.document_agent),
         *mark_it_down_toolkit.get_tools(),
         *excel_toolkit.get_tools(),
         *note_toolkit.get_tools(),
@@ -1476,19 +1446,14 @@ def multi_modal_agent(options: Chat):
 
     message_integration = ToolkitMessageIntegration(
         message_handler=HumanToolkit(
-            options.project_id, Agents.multi_modal_agent
-        ).send_message_to_user
-    )
+            options.project_id, Agents.multi_modal_agent).send_message_to_user)
     video_download_toolkit = VideoDownloaderToolkit(
-        options.project_id, working_directory=working_directory
-    )
+        options.project_id, working_directory=working_directory)
     video_download_toolkit = message_integration.register_toolkits(
-        video_download_toolkit
-    )
+        video_download_toolkit)
     image_analysis_toolkit = ImageAnalysisToolkit(options.project_id)
     image_analysis_toolkit = message_integration.register_toolkits(
-        image_analysis_toolkit
-    )
+        image_analysis_toolkit)
 
     terminal_toolkit = TerminalToolkit(
         options.project_id,
@@ -1508,7 +1473,8 @@ def multi_modal_agent(options: Chat):
     tools = [
         *video_download_toolkit.get_tools(),
         *image_analysis_toolkit.get_tools(),
-        *HumanToolkit.get_can_use_tools(options.project_id, Agents.multi_modal_agent),
+        *HumanToolkit.get_can_use_tools(options.project_id,
+                                        Agents.multi_modal_agent),
         *terminal_toolkit.get_tools(),
         *note_toolkit.get_tools(),
     ]
@@ -1524,8 +1490,7 @@ def multi_modal_agent(options: Chat):
             url=options.api_url,
         )
         open_ai_image_toolkit = message_integration.register_toolkits(
-            open_ai_image_toolkit
-        )
+            open_ai_image_toolkit)
         tools = [
             *tools,
             *open_ai_image_toolkit.get_tools(),
@@ -1546,8 +1511,7 @@ def multi_modal_agent(options: Chat):
             ),
         )
         audio_analysis_toolkit = message_integration.register_toolkits(
-            audio_analysis_toolkit
-        )
+            audio_analysis_toolkit)
         tools.extend(audio_analysis_toolkit.get_tools())
 
     # if env("EXA_API_KEY") or options.is_cloud():
@@ -1680,11 +1644,11 @@ async def social_medium_agent(options: Chat):
         *RedditToolkit.get_can_use_tools(options.project_id),
         *await NotionMCPToolkit.get_can_use_tools(options.project_id),
         # *SlackToolkit.get_can_use_tools(options.project_id),
-        *await GoogleGmailMCPToolkit.get_can_use_tools(
-            options.project_id, options.get_bun_env()
-        ),
+        *await GoogleGmailMCPToolkit.get_can_use_tools(options.project_id,
+                                                       options.get_bun_env()),
         *GoogleCalendarToolkit.get_can_use_tools(options.project_id),
-        *HumanToolkit.get_can_use_tools(options.project_id, Agents.social_medium_agent),
+        *HumanToolkit.get_can_use_tools(options.project_id,
+                                        Agents.social_medium_agent),
         *TerminalToolkit(
             options.project_id,
             agent_name=Agents.social_medium_agent,
@@ -1809,14 +1773,9 @@ async def mcp_agent(options: Chat):
                 f"Retrieved {len(mcp_tools)} MCP tools for task {options.project_id}"
             )
             if mcp_tools:
-                tool_names = [
-                    (
-                        tool.get_function_name()
-                        if hasattr(tool, "get_function_name")
-                        else str(tool)
-                    )
-                    for tool in mcp_tools
-                ]
+                tool_names = [(tool.get_function_name() if hasattr(
+                    tool, "get_function_name") else str(tool))
+                              for tool in mcp_tools]
                 traceroot_logger.debug(f"MCP tools: {tool_names}")
             tools = [*tools, *mcp_tools]
         except Exception as e:
@@ -1831,31 +1790,28 @@ async def mcp_agent(options: Chat):
         task_lock.put_queue(
             ActionCreateAgentData(
                 data={
-                    "agent_name": Agents.mcp_agent,
-                    "agent_id": agent_id,
+                    "agent_name":
+                    Agents.mcp_agent,
+                    "agent_id":
+                    agent_id,
                     "tools": [
-                        key for key in options.installed_mcp["mcpServers"].keys()
+                        key
+                        for key in options.installed_mcp["mcpServers"].keys()
                     ],
-                }
-            )
-        )
-    )
+                })))
     return ListenChatAgent(
         options.project_id,
         Agents.mcp_agent,
-        system_message="You are a helpful assistant that can help users search mcp servers. The found mcp services will be returned to the user, and you will ask the user via ask_human_via_gui whether they want to install these mcp services.",
+        system_message=
+        "You are a helpful assistant that can help users search mcp servers. The found mcp services will be returned to the user, and you will ask the user via ask_human_via_gui whether they want to install these mcp services.",
         model=ModelFactory.create(
             model_platform=options.model_platform,
             model_type=options.model_type,
             api_key=options.api_key,
             url=options.api_url,
-            model_config_dict=(
-                {
-                    "user": str(options.project_id),
-                }
-                if options.is_cloud()
-                else None
-            ),
+            model_config_dict=({
+                "user": str(options.project_id),
+            } if options.is_cloud() else None),
             timeout=600,  # 10 minutes
             **{
                 k: v
@@ -1904,22 +1860,20 @@ async def get_toolkits(tools: list[str], agent_name: str, api_task_id: str):
             toolkit: AbstractToolkit = toolkits[item]
             toolkit.agent_name = agent_name
             toolkit_tools = toolkit.get_can_use_tools(api_task_id)
-            toolkit_tools = (
-                await toolkit_tools
-                if asyncio.iscoroutine(toolkit_tools)
-                else toolkit_tools
-            )
+            toolkit_tools = (await toolkit_tools
+                             if asyncio.iscoroutine(toolkit_tools) else
+                             toolkit_tools)
             res.extend(toolkit_tools)
         else:
-            traceroot_logger.warning(f"Toolkit {item} not found for agent {agent_name}")
+            traceroot_logger.warning(
+                f"Toolkit {item} not found for agent {agent_name}")
     return res
 
 
 @traceroot.trace()
 async def get_mcp_tools(mcp_server: McpServers):
     traceroot_logger.info(
-        f"Getting MCP tools for {len(mcp_server['mcpServers'])} servers"
-    )
+        f"Getting MCP tools for {len(mcp_server['mcpServers'])} servers")
     if len(mcp_server["mcpServers"]) == 0:
         return []
 
@@ -1931,8 +1885,7 @@ async def get_mcp_tools(mcp_server: McpServers):
         # Set global auth directory to persist authentication across tasks
         if "MCP_REMOTE_CONFIG_DIR" not in server_config["env"]:
             server_config["env"]["MCP_REMOTE_CONFIG_DIR"] = env(
-                "MCP_REMOTE_CONFIG_DIR", os.path.expanduser("~/.mcp-auth")
-            )
+                "MCP_REMOTE_CONFIG_DIR", os.path.expanduser("~/.mcp-auth"))
 
     mcp_toolkit = None
     try:
@@ -1944,19 +1897,14 @@ async def get_mcp_tools(mcp_server: McpServers):
         )
         tools = mcp_toolkit.get_tools()
         if tools:
-            tool_names = [
-                (
-                    tool.get_function_name()
-                    if hasattr(tool, "get_function_name")
-                    else str(tool)
-                )
-                for tool in tools
-            ]
+            tool_names = [(tool.get_function_name() if hasattr(
+                tool, "get_function_name") else str(tool)) for tool in tools]
             traceroot_logger.debug(f"MCP tool names: {tool_names}")
         return tools
     except asyncio.CancelledError:
         traceroot_logger.info("MCP connection cancelled during get_mcp_tools")
         return []
     except Exception as e:
-        traceroot_logger.error(f"Failed to connect MCP toolkit: {e}", exc_info=True)
+        traceroot_logger.error(f"Failed to connect MCP toolkit: {e}",
+                               exc_info=True)
         return []
diff --git a/utils/traceroot_wrapper.py b/utils/traceroot_wrapper.py
index 5ad9c8e32..3b7c5c258 100644
--- a/utils/traceroot_wrapper.py
+++ b/utils/traceroot_wrapper.py
@@ -33,7 +33,8 @@ def _get_module_name():
 
 load_dotenv(env_path)
 
-if TRACEROOT_AVAILABLE and traceroot.init():
+# DISABLED: Do not initialize traceroot to avoid OpenTelemetry connection errors
+if False:  # TRACEROOT_AVAILABLE and traceroot.init():
     from traceroot.logger import get_logger as _get_traceroot_logger
 
     trace = traceroot.trace

From da4a0a4fb07c27be0c4f2620d5e888b72b548c98 Mon Sep 17 00:00:00 2001
From: bytecraftii <bytecraftii@users.noreply.github.com>
Date: Wed, 21 Jan 2026 17:57:27 -0800
Subject: [PATCH 02/18] update

---
 backend/app/controller/chat_controller.py | 6 +-----
 backend/app/utils/agent.py                | 7 +------
 2 files changed, 2 insertions(+), 11 deletions(-)

diff --git a/backend/app/controller/chat_controller.py b/backend/app/controller/chat_controller.py
index 83756df81..18901f224 100644
--- a/backend/app/controller/chat_controller.py
+++ b/backend/app/controller/chat_controller.py
@@ -137,11 +137,7 @@ async def post(data: Chat, request: Request):
 
     os.environ["file_save_path"] = data.file_save_path()
     os.environ["browser_port"] = str(data.browser_port)
-    # TEMPORARY: Force fake API key to test error handling
-    fake_key = "sk-fake-invalid-key-for-testing-12345"
-    chat_logger.error(f"🔧 TESTING: Using fake API key: {fake_key}")
-    os.environ[
-        "OPENAI_API_KEY"] = fake_key  # TODO: Change back to data.api_key
+    os.environ["OPENAI_API_KEY"] = data.api_key
     os.environ[
         "OPENAI_API_BASE_URL"] = data.api_url or "https://api.openai.com/v1"
     os.environ["CAMEL_MODEL_LOG_ENABLED"] = "true"
diff --git a/backend/app/utils/agent.py b/backend/app/utils/agent.py
index 39af318c8..ac3e8753d 100644
--- a/backend/app/utils/agent.py
+++ b/backend/app/utils/agent.py
@@ -767,15 +767,10 @@ def agent_model(
             )
             model_platform_enum = None
 
-    # TEMPORARY: Force fake API key to test error handling
-    fake_api_key = "sk-fake-invalid-key-for-testing-12345"  # TODO: Change back to options.api_key
-    traceroot_logger.error(
-        f"🔧 TESTING: Agent '{agent_name}' using fake API key: {fake_api_key}")
-
     model = ModelFactory.create(
         model_platform=options.model_platform,
         model_type=options.model_type,
-        api_key=fake_api_key,
+        api_key=options.api_key,
         url=options.api_url,
         model_config_dict=model_config or None,
         timeout=600,  # 10 minutes

From dc1974d932bfa15fd24c332219775222b7775fa9 Mon Sep 17 00:00:00 2001
From: bytecraftii <bytecraftii@users.noreply.github.com>
Date: Wed, 21 Jan 2026 18:18:54 -0800
Subject: [PATCH 03/18] update

---
 .github/workflows/test.yml                    |  41 +++++
 backend/app/service/chat_service.py           |  48 +++---
 backend/app/service/error_handler.py          |  36 +++--
 .../tests/app/service/test_chat_service.py    | 117 ++++++++++++++
 .../tests/app/service/test_error_handler.py   | 144 ++++++++++++++++++
 5 files changed, 358 insertions(+), 28 deletions(-)
 create mode 100644 .github/workflows/test.yml
 create mode 100644 backend/tests/app/service/test_chat_service.py
 create mode 100644 backend/tests/app/service/test_error_handler.py

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
new file mode 100644
index 000000000..698a268f0
--- /dev/null
+++ b/.github/workflows/test.yml
@@ -0,0 +1,41 @@
+name: Test
+
+'on':
+  pull_request:
+    branches:
+      - main
+  push:
+    branches:
+      - main
+
+jobs:
+  pytest:
+    name: Run Python Tests
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+        with:
+          enable-cache: true
+
+      - name: Set up Python
+        run: uv python install 3.10
+
+      - name: Install dependencies
+        run: |
+          cd backend
+          uv sync
+
+      - name: Run error_handler tests
+        run: |
+          cd backend
+          PYTHONPATH=/home/runner/work/eigent/eigent:$PYTHONPATH uv run pytest tests/app/service/test_error_handler.py -v
+
+      - name: Run chat_service tests
+        run: |
+          cd backend
+          PYTHONPATH=/home/runner/work/eigent/eigent:$PYTHONPATH uv run pytest tests/app/service/test_chat_service.py -v
diff --git a/backend/app/service/chat_service.py b/backend/app/service/chat_service.py
index 146052621..038159c92 100644
--- a/backend/app/service/chat_service.py
+++ b/backend/app/service/chat_service.py
@@ -6,7 +6,8 @@
 from typing import Any
 
 from app.model.chat import Chat, NewAgent, Status, TaskContent, sse_json
-from app.service.error_handler import prepare_model_error_response
+from app.service.error_handler import (prepare_model_error_response,
+                                       should_stop_task)
 from app.service.task import (Action, ActionDecomposeProgressData,
                               ActionDecomposeTextData, ActionImproveData,
                               ActionInstallMcpData, ActionNewAgent, Agents,
@@ -1100,16 +1101,26 @@ def on_stream_text(chunk):
                         summary_task_content = new_summary_content
 
                     except ModelProcessingError as e:
-                        # Handle model errors (especially invalid API keys) during multi-turn task decomposition
-                        error_payload, _, _ = prepare_model_error_response(
+                        # Handle model errors (especially invalid API keys)
+                        # during multi-turn task decomposition
+                        error_payload, _, error_code = prepare_model_error_response(
                             e, options.project_id, task_id,
                             "multi-turn task decomposition")
 
-                        # Send error notification to frontend
-                        yield sse_json("error", error_payload)
+                        logger.error(f"Multi-turn error_code: {error_code}")
 
-                        # Mark task as done (failed state)
-                        task_lock.status = Status.done
+                        # Only send error and stop workforce for critical errors
+                        if should_stop_task(error_code):
+                            # Send error notification to frontend
+                            yield sse_json("error", error_payload)
+
+                            # Stop workforce if running
+                            if "workforce" in locals(
+                            ) and workforce is not None and workforce._running:
+                                workforce.stop()
+
+                            # Mark task as done (failed state)
+                            task_lock.status = Status.done
                     except Exception as e:
                         import traceback
                         logger.error(
@@ -1390,20 +1401,23 @@ def on_stream_text(chunk):
                     f"ModelProcessingError for task {options.task_id}, action {item.action}: {e}",
                     exc_info=True)
                 # Use error formatter to send properly formatted error to frontend
-                from app.service.error_handler import \
-                    prepare_model_error_response
-                error_payload, _, _ = prepare_model_error_response(
+                error_payload, _, error_code = prepare_model_error_response(
                     e, options.project_id, options.task_id,
                     f"action {item.action}")
-                yield sse_json("error", error_payload)
 
-                # Stop workforce if running
-                if "workforce" in locals(
-                ) and workforce is not None and workforce._running:
-                    workforce.stop()
+                logger.error(f"Main flow error_code: {error_code}")
 
-                # Mark task as done
-                task_lock.status = Status.done
+                # Only send error and stop workforce for critical errors
+                if should_stop_task(error_code):
+                    yield sse_json("error", error_payload)
+
+                    # Stop workforce if running
+                    if "workforce" in locals(
+                    ) and workforce is not None and workforce._running:
+                        workforce.stop()
+
+                    # Mark task as done
+                    task_lock.status = Status.done
         except Exception as e:
             logger.error(
                 f"Unhandled exception for task {options.task_id}, action {item.action}: {e}",
diff --git a/backend/app/service/error_handler.py b/backend/app/service/error_handler.py
index 45154330c..940b1975b 100644
--- a/backend/app/service/error_handler.py
+++ b/backend/app/service/error_handler.py
@@ -1,19 +1,34 @@
-from camel.models import ModelProcessingError
 from app.component.error_format import normalize_error_to_openai_format
+from camel.models import ModelProcessingError
+
 from utils import traceroot_wrapper as traceroot
 
 logger = traceroot.get_logger("error_handler")
 
 
-def prepare_model_error_response(
-    error: ModelProcessingError,
-    project_id: str,
-    task_id: str,
-    context: str = "task decomposition"
-) -> tuple[dict, str, str | None]:
-    """Prepare error response for ModelProcessingError.
+def should_stop_task(error_code: str | None) -> bool:
+    """Check if the error code represents a critical error that should stop
+    the task. This includes invalid API keys, quota errors, and model not
+    found errors.
+
+    Args:
+        error_code: Error code from normalize_error_to_openai_format
 
-    This function normalizes the error and prepares the payload for frontend notification.
+    Returns:
+        bool: True if this is a critical error that should stop the task
+    """
+    return error_code in ("invalid_api_key", "insufficient_quota",
+                          "model_not_found")
+
+
+def prepare_model_error_response(
+        error: ModelProcessingError,
+        project_id: str,
+        task_id: str,
+        context: str = "task decomposition") -> tuple[dict, str, str | None]:
+    """Prepare error response for ModelProcessingError. This function
+    normalizes the error and prepares the payload for frontend
+    notification.
 
     Args:
         error: The ModelProcessingError to handle
@@ -37,8 +52,7 @@ def prepare_model_error_response(
             "error_code": error_code,
             "error": str(error)
         },
-        exc_info=True
-    )
+        exc_info=True)
 
     # Prepare error payload
     error_payload = {
diff --git a/backend/tests/app/service/test_chat_service.py b/backend/tests/app/service/test_chat_service.py
new file mode 100644
index 000000000..c99efea8a
--- /dev/null
+++ b/backend/tests/app/service/test_chat_service.py
@@ -0,0 +1,117 @@
+from unittest.mock import patch
+
+from app.service.error_handler import (prepare_model_error_response,
+                                       should_stop_task)
+from camel.models import ModelProcessingError
+
+
+@patch('app.service.error_handler.normalize_error_to_openai_format')
+def test_invalid_api_key_error_should_stop_task(mock_normalize):
+    """Test that invalid API key error results in task being stopped."""
+    # Setup mock to return invalid_api_key error
+    mock_normalize.return_value = ("Invalid key. Validation failed.",
+                                   "invalid_api_key", {
+                                       "message":
+                                       "Invalid key. Validation failed.",
+                                       "type": "invalid_request_error",
+                                       "param": None,
+                                       "code": "invalid_api_key",
+                                   })
+
+    error = ModelProcessingError("Error code: 401 - unauthorized")
+    _, _, error_code = prepare_model_error_response(error, "project-id",
+                                                    "task-id", "test context")
+
+    # Verify this is an invalid API key error
+    assert should_stop_task(error_code) is True
+
+    # Simulate the chat_service logic
+    should_stop = should_stop_task(error_code)
+    assert should_stop is True
+
+
+@patch('app.service.error_handler.normalize_error_to_openai_format')
+def test_model_not_found_error_should_stop_task(mock_normalize):
+    """Test that model_not_found error SHOULD stop task."""
+    # Setup mock to return model_not_found error
+    mock_normalize.return_value = (
+        "Invalid model name. Validation failed.", "model_not_found", {
+            "message": "Invalid model name. Validation failed.",
+            "type": "invalid_request_error",
+            "param": None,
+            "code": "model_not_found",
+        })
+
+    error = ModelProcessingError("Error code: 404 - model does not exist")
+    _, _, error_code = prepare_model_error_response(error, "project-id",
+                                                    "task-id", "test context")
+
+    # Verify this is a critical error that should stop the task
+    assert should_stop_task(error_code) is True
+
+    # Simulate the chat_service logic
+    should_stop = should_stop_task(error_code)
+    assert should_stop is True
+
+
+@patch('app.service.error_handler.normalize_error_to_openai_format')
+def test_quota_error_should_stop_task(mock_normalize):
+    """Test that insufficient_quota error SHOULD stop task."""
+    # Setup mock to return insufficient_quota error
+    mock_normalize.return_value = ((
+        "You exceeded your current quota, please check "
+        "your plan and billing details."), "insufficient_quota", {
+            "message": ("You exceeded your current quota, please "
+                        "check your plan and billing details."),
+            "type":
+            "insufficient_quota",
+            "param":
+            None,
+            "code":
+            "insufficient_quota",
+        })
+
+    error = ModelProcessingError("Error code: 429 - quota exceeded")
+    _, _, error_code = prepare_model_error_response(error, "project-id",
+                                                    "task-id", "test context")
+
+    # Verify this is a critical error that should stop the task
+    assert should_stop_task(error_code) is True
+
+    # Simulate the chat_service logic
+    should_stop = should_stop_task(error_code)
+    assert should_stop is True
+
+
+@patch('app.service.error_handler.normalize_error_to_openai_format')
+def test_unknown_error_should_not_stop_task(mock_normalize):
+    """Test that unknown/generic errors do NOT stop task."""
+    # Setup mock to return no specific error code
+    mock_normalize.return_value = ("Some generic error message", None, None)
+
+    error = ModelProcessingError("Some generic error")
+    _, _, error_code = prepare_model_error_response(error, "project-id",
+                                                    "task-id", "test context")
+
+    # Verify this is NOT an invalid API key error
+    assert should_stop_task(error_code) is False
+
+    # Simulate the chat_service logic
+    should_stop = should_stop_task(error_code)
+    assert should_stop is False
+
+
+def test_task_stop_logic_workflow():
+    """Test the complete workflow of error handling and task stopping logic."""
+    test_cases = [
+        # (error_code, should_stop_task, description)
+        ("invalid_api_key", True, "Invalid API key should stop task"),
+        ("insufficient_quota", True, "Insufficient quota should stop task"),
+        ("model_not_found", True, "Model not found should stop task"),
+        ("rate_limit_exceeded", False, "Rate limit should not stop task"),
+        (None, False, "Unknown error should not stop task"),
+    ]
+
+    for error_code, expected_stop, description in test_cases:
+        should_stop = should_stop_task(error_code)
+        assert should_stop == expected_stop, f"Failed: {description}"
diff --git a/backend/tests/app/service/test_error_handler.py b/backend/tests/app/service/test_error_handler.py
new file mode 100644
index 000000000..45c62c5b8
--- /dev/null
+++ b/backend/tests/app/service/test_error_handler.py
@@ -0,0 +1,144 @@
+from unittest.mock import patch
+
+from app.service.error_handler import (prepare_model_error_response,
+                                       should_stop_task)
+from camel.models import ModelProcessingError
+
+
+def test_should_stop_task_with_invalid_key():
+    """Test that should_stop_task returns True for
+    invalid_api_key error code.
+    """
+    assert should_stop_task("invalid_api_key") is True
+
+
+def test_should_stop_task_with_quota_error():
+    """Test that should_stop_task returns True for quota errors."""
+    assert should_stop_task("insufficient_quota") is True
+
+
+def test_should_stop_task_with_model_not_found():
+    """Test that should_stop_task returns True for model_not_found errors."""
+    assert should_stop_task("model_not_found") is True
+
+
+def test_should_stop_task_with_other_error():
+    """Test that should_stop_task returns False for non-critical errors."""
+    assert should_stop_task("rate_limit_exceeded") is False
+    assert should_stop_task(None) is False
+
+
+@patch('app.service.error_handler.normalize_error_to_openai_format')
+@patch('app.service.error_handler.logger')
+def test_prepare_model_error_response_with_invalid_api_key(
+        mock_logger, mock_normalize):
+    """Test prepare_model_error_response with invalid API key error."""
+    # Setup mock
+    mock_normalize.return_value = ("Invalid key. Validation failed.",
+                                   "invalid_api_key", {
+                                       "message":
+                                       "Invalid key. Validation failed.",
+                                       "type": "invalid_request_error",
+                                       "param": None,
+                                       "code": "invalid_api_key",
+                                   })
+
+    error = ModelProcessingError("Error code: 401 - unauthorized")
+    project_id = "test-project"
+    task_id = "test-task"
+
+    # Call function
+    error_payload, message, error_code = prepare_model_error_response(
+        error, project_id, task_id, "test context")
+
+    # Assertions
+    assert error_code == "invalid_api_key"
+    assert message == "Invalid key. Validation failed."
+    assert error_payload["error_code"] == "invalid_api_key"
+    assert error_payload["message"] == "Invalid key. Validation failed."
+
+    # Verify logger was called
+    mock_logger.error.assert_called_once()
+    mock_normalize.assert_called_once_with(error)
+
+
+@patch('app.service.error_handler.normalize_error_to_openai_format')
+@patch('app.service.error_handler.logger')
+def test_prepare_model_error_response_with_model_not_found(
+        mock_logger, mock_normalize):
+    """Test prepare_model_error_response with model_not_found
+    error (should stop task).
+    """
+    # Setup mock
+    mock_normalize.return_value = (
+        "Invalid model name. Validation failed.", "model_not_found", {
+            "message": "Invalid model name. Validation failed.",
+            "type": "invalid_request_error",
+            "param": None,
+            "code": "model_not_found",
+        })
+
+    error = ModelProcessingError("Error code: 404 - model does not exist")
+    project_id = "test-project"
+    task_id = "test-task"
+
+    # Call function
+    error_payload, message, error_code = prepare_model_error_response(
+        error, project_id, task_id, "test context")
+
+    # Assertions
+    assert error_code == "model_not_found"
+    assert message == "Invalid model name. Validation failed."
+    assert error_payload["error_code"] == "model_not_found"
+    assert error_payload["message"] == "Invalid model name. Validation failed."
+
+    # Verify this SHOULD stop the task
+    # (checked by caller using should_stop_task)
+    assert should_stop_task(error_code) is True
+
+    # Verify logger was called
+    mock_logger.error.assert_called_once()
+    mock_normalize.assert_called_once_with(error)
+
+
+@patch('app.service.error_handler.normalize_error_to_openai_format')
+@patch('app.service.error_handler.logger')
+def test_prepare_model_error_response_with_quota_error(mock_logger,
+                                                       mock_normalize):
+    """Test prepare_model_error_response with
+    insufficient_quota error (should stop task).
+    """
+    # Setup mock
+    mock_normalize.return_value = ((
+        "You exceeded your current quota, please "
+        "check your plan and billing details."), "insufficient_quota", {
+            "message": ("You exceeded your current quota, please "
+                        "check your plan and billing details."),
+            "type":
+            "insufficient_quota",
+            "param":
+            None,
+            "code":
+            "insufficient_quota",
+        })
+
+    error = ModelProcessingError("Error code: 429 - quota exceeded")
+    project_id = "test-project"
+    task_id = "test-task"
+
+    # Call function
+    error_payload, message, error_code = prepare_model_error_response(
+        error, project_id, task_id, "test context")
+
+    # Assertions
+    assert error_code == "insufficient_quota"
+    assert message == ("You exceeded your current quota, "
+                       "please check your plan and billing details.")
+
+    # Verify this SHOULD stop the task
+    # (checked by caller using should_stop_task)
+    assert should_stop_task(error_code) is True
+
+    # Verify logger was called
+    mock_logger.error.assert_called_once()
+    mock_normalize.assert_called_once_with(error)

From 239b4462442c19ad0bea03809bcd60f45944fffa Mon Sep 17 00:00:00 2001
From: bytecraftii <bytecraftii@users.noreply.github.com>
Date: Wed, 21 Jan 2026 18:24:25 -0800
Subject: [PATCH 04/18] update

---
 .env.development | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/.env.development b/.env.development
index f9436b1bf..ce47a762c 100644
--- a/.env.development
+++ b/.env.development
@@ -1,10 +1,10 @@
 VITE_BASE_URL=/api
 
-# VITE_PROXY_URL=https://dev.eigent.ai
-# VITE_USE_LOCAL_PROXY=false
+VITE_PROXY_URL=https://dev.eigent.ai
+VITE_USE_LOCAL_PROXY=false
 
-VITE_PROXY_URL=http://localhost:3001
-VITE_USE_LOCAL_PROXY=true
+# VITE_PROXY_URL=http://localhost:3001
+# VITE_USE_LOCAL_PROXY=true
 
 TRACEROOT_TOKEN=your_traceroot_token_here
 
@@ -27,6 +27,3 @@ TRACEROOT_ENABLE_LOG_CONSOLE_EXPORT=false
 TRACEROOT_TRACER_VERBOSE=false
 
 TRACEROOT_LOGGER_VERBOSE=false
-
-# Disable OpenTelemetry SDK completely
-OTEL_SDK_DISABLED=true
\ No newline at end of file

From 6ff0fe9524da84decc2f00e2b0e00ad5efe39815 Mon Sep 17 00:00:00 2001
From: bytecraftii <bytecraftii@users.noreply.github.com>
Date: Wed, 21 Jan 2026 18:27:57 -0800
Subject: [PATCH 05/18] update

---
 backend/tests/app/service/test_error_handler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/tests/app/service/test_error_handler.py b/backend/tests/app/service/test_error_handler.py
index 45c62c5b8..e252260a7 100644
--- a/backend/tests/app/service/test_error_handler.py
+++ b/backend/tests/app/service/test_error_handler.py
@@ -127,7 +127,7 @@ def test_prepare_model_error_response_with_quota_error(mock_logger,
     task_id = "test-task"
 
     # Call function
-    error_payload, message, error_code = prepare_model_error_response(
+    _, message, error_code = prepare_model_error_response(
         error, project_id, task_id, "test context")
 
     # Assertions

From e0d034e83e24ffc4c087dbd0da6d2c992bdd215d Mon Sep 17 00:00:00 2001
From: bytecraftii <bytecraftii@users.noreply.github.com>
Date: Thu, 22 Jan 2026 10:28:24 -0800
Subject: [PATCH 06/18] update

---
 .github/workflows/test.yml                                    | 4 ++--
 ...st_chat_service.py => test_chat_service_error_handling.py} | 0
 2 files changed, 2 insertions(+), 2 deletions(-)
 rename backend/tests/app/service/{test_chat_service.py => test_chat_service_error_handling.py} (100%)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 698a268f0..bfddd1b94 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -35,7 +35,7 @@ jobs:
           cd backend
           PYTHONPATH=/home/runner/work/eigent/eigent:$PYTHONPATH uv run pytest tests/app/service/test_error_handler.py -v
 
-      - name: Run chat_service tests
+      - name: Run chat_service_error_handling tests
         run: |
           cd backend
-          PYTHONPATH=/home/runner/work/eigent/eigent:$PYTHONPATH uv run pytest tests/app/service/test_chat_service.py -v
+          PYTHONPATH=/home/runner/work/eigent/eigent:$PYTHONPATH uv run pytest tests/app/service/test_chat_service_error_handling.py -v
diff --git a/backend/tests/app/service/test_chat_service.py b/backend/tests/app/service/test_chat_service_error_handling.py
similarity index 100%
rename from backend/tests/app/service/test_chat_service.py
rename to backend/tests/app/service/test_chat_service_error_handling.py

From c2dd9658fcdde57db2b5193513b23d9e145f0320 Mon Sep 17 00:00:00 2001
From: bytecraftii <bytecraftii@users.noreply.github.com>
Date: Thu, 22 Jan 2026 13:27:24 -0800
Subject: [PATCH 07/18] update

---
 .github/workflows/test.yml                    |   8 +-
 backend/app/component/model_validation.py     |  18 +-
 backend/app/controller/chat_controller.py     |   3 +
 backend/app/service/chat_service.py           |  96 +++---
 backend/app/service/error_handler.py          |  64 ----
 backend/app/service/task.py                   | 293 +++++++++++++-----
 backend/app/utils/agent.py                    |   2 +-
 .../test_chat_service_error_handling.py       | 117 -------
 .../tests/app/service/test_error_handler.py   | 144 ---------
 .../tests/app/service/test_task_validate.py   | 161 ++++++++++
 10 files changed, 437 insertions(+), 469 deletions(-)
 delete mode 100644 backend/app/service/error_handler.py
 delete mode 100644 backend/tests/app/service/test_chat_service_error_handling.py
 delete mode 100644 backend/tests/app/service/test_error_handler.py
 create mode 100644 backend/tests/app/service/test_task_validate.py

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index bfddd1b94..2513545d4 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -30,12 +30,12 @@ jobs:
           cd backend
           uv sync
 
-      - name: Run error_handler tests
+      - name: Run model validation tests
         run: |
           cd backend
-          PYTHONPATH=/home/runner/work/eigent/eigent:$PYTHONPATH uv run pytest tests/app/service/test_error_handler.py -v
+          PYTHONPATH=/home/runner/work/eigent/eigent:$PYTHONPATH uv run pytest tests/app/service/test_model_validation.py -v || echo "No validation tests yet"
 
-      - name: Run chat_service_error_handling tests
+      - name: Run task validation tests
         run: |
           cd backend
-          PYTHONPATH=/home/runner/work/eigent/eigent:$PYTHONPATH uv run pytest tests/app/service/test_chat_service_error_handling.py -v
+          PYTHONPATH=/home/runner/work/eigent/eigent/backend:/home/runner/work/eigent/eigent:$PYTHONPATH uv run pytest tests/app/service/test_task_validate.py -v
diff --git a/backend/app/component/model_validation.py b/backend/app/component/model_validation.py
index c0b52ea6c..bc8feb799 100644
--- a/backend/app/component/model_validation.py
+++ b/backend/app/component/model_validation.py
@@ -1,6 +1,5 @@
 from camel.agents import ChatAgent
 from camel.models import ModelFactory
-from camel.types import ModelPlatformType, ModelType
 
 
 def get_website_content(url: str) -> str:
@@ -12,12 +11,17 @@ def get_website_content(url: str) -> str:
     Returns:
         str: The content of the website.
     """
-    return f"Tool execution completed successfully for https://www.camel-ai.org, Website Content: Welcome to CAMEL AI!"
+    return ("Tool execution completed successfully for "
+            "https://www.camel-ai.org, "
+            "Website Content: Welcome to CAMEL AI!")
 
 
-def create_agent(
-    model_platform: str, model_type: str, api_key: str = None, url: str = None, model_config_dict: dict = None, **kwargs
-) -> ChatAgent:
+def create_agent(model_platform: str,
+                 model_type: str,
+                 api_key: str = None,
+                 url: str = None,
+                 model_config_dict: dict = None,
+                 **kwargs) -> ChatAgent:
     platform = model_platform
     mtype = model_type
     if mtype is None:
@@ -34,7 +38,9 @@ def create_agent(
         **kwargs,
     )
     agent = ChatAgent(
-        system_message="You are a helpful assistant that must use the tool get_website_content to get the content of a website.",
+        system_message=("You are a helpful assistant that must use "
+                        "the tool get_website_content to get the content "
+                        "of a website."),
         model=model,
         tools=[get_website_content],
         step_timeout=1800,  # 30 minutes
diff --git a/backend/app/controller/chat_controller.py b/backend/app/controller/chat_controller.py
index 18901f224..f2700f755 100644
--- a/backend/app/controller/chat_controller.py
+++ b/backend/app/controller/chat_controller.py
@@ -137,6 +137,9 @@ async def post(data: Chat, request: Request):
 
     os.environ["file_save_path"] = data.file_save_path()
     os.environ["browser_port"] = str(data.browser_port)
+
+    # Override API key with fake key for testing
+    data.api_key = "sk-fake-invalid-key-for-testing"
     os.environ["OPENAI_API_KEY"] = data.api_key
     os.environ[
         "OPENAI_API_BASE_URL"] = data.api_url or "https://api.openai.com/v1"
diff --git a/backend/app/service/chat_service.py b/backend/app/service/chat_service.py
index b2fa6d187..a6c38b9ff 100644
--- a/backend/app/service/chat_service.py
+++ b/backend/app/service/chat_service.py
@@ -6,12 +6,11 @@
 from typing import Any
 
 from app.model.chat import Chat, NewAgent, Status, TaskContent, sse_json
-from app.service.error_handler import (prepare_model_error_response,
-                                       should_stop_task)
 from app.service.task import (Action, ActionDecomposeProgressData,
                               ActionDecomposeTextData, ActionImproveData,
                               ActionInstallMcpData, ActionNewAgent, Agents,
-                              TaskLock, delete_task_lock, set_current_task_id)
+                              TaskLock, delete_task_lock, set_current_task_id,
+                              validate_model_before_task)
 from app.utils.agent import (ListenChatAgent, agent_model, browser_agent,
                              developer_agent, document_agent, get_mcp_tools,
                              get_toolkits, mcp_agent, multi_modal_agent,
@@ -255,8 +254,33 @@ def build_context_for_workforce(task_lock: TaskLock, options: Chat) -> str:
 @sync_step
 @traceroot.trace()
 async def step_solve(options: Chat, request: Request, task_lock: TaskLock):
-    start_event_loop = True
+    """Main task execution loop. Called when POST /chat endpoint
+    is hit to start a new chat session.
+
+    Validates model configuration, processes task queue, manages
+    workforce lifecycle, and streams responses back to the client
+    via SSE.
+
+    Args:
+        options (Chat): Chat configuration containing task details and
+            model settings.
+        request (Request): FastAPI request object for client connection
+            management.
+        task_lock (TaskLock): Shared task state and queue for the project.
+
+    Yields:
+        SSE formatted responses for task progress, errors, and results
+    """
+    # Validate model configuration before starting task
+    is_valid, error_msg = await validate_model_before_task(options)
+    if not is_valid:
+        yield sse_json("error", {
+            "message": f"Model validation failed: {error_msg}"
+        })
+        task_lock.status = Status.done
+        return
 
+    start_event_loop = True
     # Initialize task_lock attributes
     if not hasattr(task_lock, 'conversation_history'):
         task_lock.conversation_history = []
@@ -409,11 +433,17 @@ async def step_solve(options: Chat, request: Request, task_lock: TaskLock):
                         "[NEW-QUESTION] Has attachments, treating as complex task"
                     )
                 else:
-                    is_complex_task = await question_confirm(
-                        question_agent, question, task_lock)
-                    logger.info(
-                        f"[NEW-QUESTION] question_confirm result: is_complex={is_complex_task}"
-                    )
+                    try:
+                        is_complex_task = await question_confirm(
+                            question_agent, question, task_lock)
+                        logger.info(
+                            f"[NEW-QUESTION] question_confirm result: is_complex={is_complex_task}"
+                        )
+                    except Exception as e:
+                        # Log the error and treat as complex task
+                        # (Model validation should have caught critical errors upfront)
+                        logger.error(f"Error in question_confirm: {e}", exc_info=True)
+                        is_complex_task = True
 
                 if not is_complex_task:
                     logger.info(
@@ -1106,26 +1136,12 @@ def on_stream_text(chunk):
                         summary_task_content = new_summary_content
 
                     except ModelProcessingError as e:
-                        # Handle model errors (especially invalid API keys)
-                        # during multi-turn task decomposition
-                        error_payload, _, error_code = prepare_model_error_response(
-                            e, options.project_id, task_id,
-                            "multi-turn task decomposition")
-
-                        logger.error(f"Multi-turn error_code: {error_code}")
-
-                        # Only send error and stop workforce for critical errors
-                        if should_stop_task(error_code):
-                            # Send error notification to frontend
-                            yield sse_json("error", error_payload)
-
-                            # Stop workforce if running
-                            if "workforce" in locals(
-                            ) and workforce is not None and workforce._running:
-                                workforce.stop()
-
-                            # Mark task as done (failed state)
-                            task_lock.status = Status.done
+                        # Log error - validation should have caught config issues
+                        logger.error(f"Multi-turn task decomposition error: {e}", exc_info=True)
+                        yield sse_json("error", {"message": f"Task decomposition failed: {str(e)}"})
+                        if "workforce" in locals() and workforce is not None and workforce._running:
+                            workforce.stop()
+                        task_lock.status = Status.done
                     except Exception as e:
                         import traceback
                         logger.error(
@@ -1401,28 +1417,6 @@ def on_stream_text(chunk):
                     workforce.pause()
                 yield sse_json(Action.budget_not_enough,
                                {"message": "budget not enouth"})
-            else:
-                logger.error(
-                    f"ModelProcessingError for task {options.task_id}, action {item.action}: {e}",
-                    exc_info=True)
-                # Use error formatter to send properly formatted error to frontend
-                error_payload, _, error_code = prepare_model_error_response(
-                    e, options.project_id, options.task_id,
-                    f"action {item.action}")
-
-                logger.error(f"Main flow error_code: {error_code}")
-
-                # Only send error and stop workforce for critical errors
-                if should_stop_task(error_code):
-                    yield sse_json("error", error_payload)
-
-                    # Stop workforce if running
-                    if "workforce" in locals(
-                    ) and workforce is not None and workforce._running:
-                        workforce.stop()
-
-                    # Mark task as done
-                    task_lock.status = Status.done
         except Exception as e:
             logger.error(
                 f"Unhandled exception for task {options.task_id}, action {item.action}: {e}",
diff --git a/backend/app/service/error_handler.py b/backend/app/service/error_handler.py
deleted file mode 100644
index 940b1975b..000000000
--- a/backend/app/service/error_handler.py
+++ /dev/null
@@ -1,64 +0,0 @@
-from app.component.error_format import normalize_error_to_openai_format
-from camel.models import ModelProcessingError
-
-from utils import traceroot_wrapper as traceroot
-
-logger = traceroot.get_logger("error_handler")
-
-
-def should_stop_task(error_code: str | None) -> bool:
-    """Check if the error code represents a critical error that should stop
-    the task. This includes invalid API keys, quota errors, and model not
-    found errors.
-
-    Args:
-        error_code: Error code from normalize_error_to_openai_format
-
-    Returns:
-        bool: True if this is a critical error that should stop the task
-    """
-    return error_code in ("invalid_api_key", "insufficient_quota",
-                          "model_not_found")
-
-
-def prepare_model_error_response(
-        error: ModelProcessingError,
-        project_id: str,
-        task_id: str,
-        context: str = "task decomposition") -> tuple[dict, str, str | None]:
-    """Prepare error response for ModelProcessingError. This function
-    normalizes the error and prepares the payload for frontend
-    notification.
-
-    Args:
-        error: The ModelProcessingError to handle
-        project_id: Project ID for logging
-        task_id: Task ID for logging
-        context: Description of where the error occurred (for logging)
-
-    Returns:
-        tuple: (error_payload, message, error_code)
-            - error_payload: SSE error payload ready to send to frontend
-            - message: Human-readable error message
-            - error_code: Error code (e.g., "invalid_api_key")
-    """
-    message, error_code, error_obj = normalize_error_to_openai_format(error)
-
-    logger.error(
-        f"{context.capitalize()} failed due to model error: {message}",
-        extra={
-            "project_id": project_id,
-            "task_id": task_id,
-            "error_code": error_code,
-            "error": str(error)
-        },
-        exc_info=True)
-
-    # Prepare error payload
-    error_payload = {
-        "message": message,
-        "error_code": error_code,
-        "error": error_obj
-    }
-
-    return error_payload, message, error_code
diff --git a/backend/app/service/task.py b/backend/app/service/task.py
index d4958dd9c..304f76abe 100644
--- a/backend/app/service/task.py
+++ b/backend/app/service/task.py
@@ -1,15 +1,18 @@
-from typing_extensions import Any, Literal, TypedDict
-from typing import List, Dict, Optional
-from pydantic import BaseModel
-from app.exception.exception import ProgramException
-from app.model.chat import McpServers, Status, SupplementChat, Chat, UpdateData
 import asyncio
-from enum import Enum
-from camel.tasks import Task
+import weakref
 from contextlib import contextmanager
 from contextvars import ContextVar
 from datetime import datetime, timedelta
-import weakref
+from enum import Enum
+from typing import Dict, List, Optional
+
+from app.component.model_validation import create_agent
+from app.exception.exception import ProgramException
+from app.model.chat import Chat, McpServers, Status, SupplementChat, UpdateData
+from camel.tasks import Task
+from pydantic import BaseModel
+from typing_extensions import Any, Literal, TypedDict
+
 from utils import traceroot_wrapper as traceroot
 
 logger = traceroot.get_logger("task_service")
@@ -20,8 +23,10 @@ class Action(str, Enum):
     update_task = "update_task"  # user -> backend
     task_state = "task_state"  # backend -> user
     new_task_state = "new_task_state"  # backend -> user
-    decompose_progress = "decompose_progress"  # backend -> user (streaming decomposition)
-    decompose_text = "decompose_text"  # backend -> user (raw streaming text)
+    # backend -> user (streaming decomposition)
+    decompose_progress = "decompose_progress"
+    # backend -> user (raw streaming text)
+    decompose_text = "decompose_text"
     start = "start"  # user -> backend
     create_agent = "create_agent"  # backend -> user
     activate_agent = "activate_agent"  # backend -> user
@@ -65,7 +70,8 @@ class ActionUpdateTaskData(BaseModel):
 
 class ActionTaskStateData(BaseModel):
     action: Literal[Action.task_state] = Action.task_state
-    data: dict[Literal["task_id", "content", "state", "result", "failure_count"], str | int]
+    data: dict[Literal["task_id", "content", "state", "result",
+                       "failure_count"], str | int]
 
 
 class ActionDecomposeProgressData(BaseModel):
@@ -80,7 +86,8 @@ class ActionDecomposeTextData(BaseModel):
 
 class ActionNewTaskStateData(BaseModel):
     action: Literal[Action.new_task_state] = Action.new_task_state
-    data: dict[Literal["task_id", "content", "state", "result", "failure_count"], str | int]
+    data: dict[Literal["task_id", "content", "state", "result",
+                       "failure_count"], str | int]
 
 
 class ActionAskData(BaseModel):
@@ -101,7 +108,8 @@ class ActionCreateAgentData(BaseModel):
 
 class ActionActivateAgentData(BaseModel):
     action: Literal[Action.activate_agent] = Action.activate_agent
-    data: dict[Literal["agent_name", "process_task_id", "agent_id", "message"], str]
+    data: dict[Literal["agent_name", "process_task_id", "agent_id", "message"],
+               str]
 
 
 class DataDict(TypedDict):
@@ -119,13 +127,15 @@ class ActionDeactivateAgentData(BaseModel):
 
 class ActionAssignTaskData(BaseModel):
     action: Literal[Action.assign_task] = Action.assign_task
-    data: dict[Literal["assignee_id", "task_id", "content", "state", "failure_count"], str | int]
+    data: dict[Literal["assignee_id", "task_id", "content", "state",
+                       "failure_count"], str | int]
 
 
 class ActionActivateToolkitData(BaseModel):
     action: Literal[Action.activate_toolkit] = Action.activate_toolkit
     data: dict[
-        Literal["agent_name", "toolkit_name", "process_task_id", "method_name", "message"],
+        Literal["agent_name", "toolkit_name", "process_task_id", "method_name",
+                "message"],
         str,
     ]
 
@@ -133,7 +143,8 @@ class ActionActivateToolkitData(BaseModel):
 class ActionDeactivateToolkitData(BaseModel):
     action: Literal[Action.deactivate_toolkit] = Action.deactivate_toolkit
     data: dict[
-        Literal["agent_name", "toolkit_name", "process_task_id", "method_name", "message"],
+        Literal["agent_name", "toolkit_name", "process_task_id", "method_name",
+                "message"],
         str,
     ]
 
@@ -176,7 +187,8 @@ class ActionEndData(BaseModel):
 
 class ActionTimeoutData(BaseModel):
     action: Literal[Action.timeout] = Action.timeout
-    data: dict[Literal["message", "in_flight_tasks", "pending_tasks", "timeout_seconds"], str | int]
+    data: dict[Literal["message", "in_flight_tasks", "pending_tasks",
+                       "timeout_seconds"], str | int]
 
 
 class ActionSupplementData(BaseModel):
@@ -220,36 +232,34 @@ class ActionSkipTaskData(BaseModel):
     project_id: str
 
 
-ActionData = (
-    ActionImproveData
-    | ActionStartData
-    | ActionUpdateTaskData
-    | ActionTaskStateData
-    | ActionAskData
-    | ActionCreateAgentData
-    | ActionActivateAgentData
-    | ActionDeactivateAgentData
-    | ActionAssignTaskData
-    | ActionActivateToolkitData
-    | ActionDeactivateToolkitData
-    | ActionWriteFileData
-    | ActionNoticeData
-    | ActionSearchMcpData
-    | ActionInstallMcpData
-    | ActionTerminalData
-    | ActionStopData
-    | ActionEndData
-    | ActionTimeoutData
-    | ActionSupplementData
-    | ActionTakeControl
-    | ActionNewAgent
-    | ActionBudgetNotEnough
-    | ActionAddTaskData
-    | ActionRemoveTaskData
-    | ActionSkipTaskData
-    | ActionDecomposeTextData
-    | ActionDecomposeProgressData
-)
+ActionData = (ActionImproveData
+              | ActionStartData
+              | ActionUpdateTaskData
+              | ActionTaskStateData
+              | ActionAskData
+              | ActionCreateAgentData
+              | ActionActivateAgentData
+              | ActionDeactivateAgentData
+              | ActionAssignTaskData
+              | ActionActivateToolkitData
+              | ActionDeactivateToolkitData
+              | ActionWriteFileData
+              | ActionNoticeData
+              | ActionSearchMcpData
+              | ActionInstallMcpData
+              | ActionTerminalData
+              | ActionStopData
+              | ActionEndData
+              | ActionTimeoutData
+              | ActionSupplementData
+              | ActionTakeControl
+              | ActionNewAgent
+              | ActionBudgetNotEnough
+              | ActionAddTaskData
+              | ActionRemoveTaskData
+              | ActionSkipTaskData
+              | ActionDecomposeTextData
+              | ActionDecomposeProgressData)
 
 
 class Agents(str, Enum):
@@ -272,7 +282,8 @@ class TaskLock:
     queue: asyncio.Queue[ActionData]
     """Queue monitoring for SSE response"""
     human_input: dict[str, asyncio.Queue[str]]
-    """After receiving user's reply, put the reply into the corresponding agent's queue"""
+    """After receiving user's reply, put the reply into the
+    corresponding agent's queue"""
     created_at: datetime
     last_accessed: datetime
     background_tasks: set[asyncio.Task]
@@ -292,7 +303,8 @@ class TaskLock:
     current_task_id: Optional[str]
     """Current task ID to be used in SSE responses"""
 
-    def __init__(self, id: str, queue: asyncio.Queue, human_input: dict) -> None:
+    def __init__(self, id: str, queue: asyncio.Queue,
+                 human_input: dict) -> None:
         self.id = id
         self.queue = queue
         self.human_input = human_input
@@ -308,39 +320,69 @@ def __init__(self, id: str, queue: asyncio.Queue, human_input: dict) -> None:
         self.question_agent = None
         self.current_task_id = None
 
-        logger.info("Task lock initialized", extra={"task_id": id, "created_at": self.created_at.isoformat()})
+        logger.info("Task lock initialized",
+                    extra={
+                        "task_id": id,
+                        "created_at": self.created_at.isoformat()
+                    })
 
     async def put_queue(self, data: ActionData):
         self.last_accessed = datetime.now()
-        logger.debug("Adding item to task queue", extra={"task_id": self.id, "action": data.action})
+        logger.debug("Adding item to task queue",
+                     extra={
+                         "task_id": self.id,
+                         "action": data.action
+                     })
         await self.queue.put(data)
 
     async def get_queue(self):
         self.last_accessed = datetime.now()
-        logger.debug("Getting item from task queue", extra={"task_id": self.id})
+        logger.debug("Getting item from task queue",
+                     extra={"task_id": self.id})
         return await self.queue.get()
 
     async def put_human_input(self, agent: str, data: Any = None):
-        logger.debug("Adding human input", extra={"task_id": self.id, "agent": agent, "has_data": data is not None})
+        logger.debug("Adding human input",
+                     extra={
+                         "task_id": self.id,
+                         "agent": agent,
+                         "has_data": data is not None
+                     })
         await self.human_input[agent].put(data)
 
     async def get_human_input(self, agent: str):
-        logger.debug("Getting human input", extra={"task_id": self.id, "agent": agent})
+        logger.debug("Getting human input",
+                     extra={
+                         "task_id": self.id,
+                         "agent": agent
+                     })
         return await self.human_input[agent].get()
 
     def add_human_input_listen(self, agent: str):
-        logger.debug("Adding human input listener", extra={"task_id": self.id, "agent": agent})
+        logger.debug("Adding human input listener",
+                     extra={
+                         "task_id": self.id,
+                         "agent": agent
+                     })
         self.human_input[agent] = asyncio.Queue(1)
 
     def add_background_task(self, task: asyncio.Task) -> None:
         r"""Add a task to track and clean up weak references"""
-        logger.debug("Adding background task", extra={"task_id": self.id, "background_tasks_count": len(self.background_tasks)})
+        logger.debug("Adding background task",
+                     extra={
+                         "task_id": self.id,
+                         "background_tasks_count": len(self.background_tasks)
+                     })
         self.background_tasks.add(task)
         task.add_done_callback(lambda t: self.background_tasks.discard(t))
 
     async def cleanup(self):
         r"""Cancel all background tasks and clean up resources"""
-        logger.info("Starting task lock cleanup", extra={"task_id": self.id, "background_tasks_count": len(self.background_tasks)})
+        logger.info("Starting task lock cleanup",
+                    extra={
+                        "task_id": self.id,
+                        "background_tasks_count": len(self.background_tasks)
+                    })
         for task in list(self.background_tasks):
             if not task.done():
                 task.cancel()
@@ -349,17 +391,25 @@ async def cleanup(self):
                 except asyncio.CancelledError:
                     pass
         self.background_tasks.clear()
-        
+
         # Clean up registered toolkits (e.g., remove TerminalToolkit venvs)
         for toolkit in self.registered_toolkits:
             try:
                 if hasattr(toolkit, 'cleanup'):
                     toolkit.cleanup()
-                    logger.info("Toolkit cleanup completed", extra={"task_id": self.id, "toolkit": type(toolkit).__name__})
+                    logger.info("Toolkit cleanup completed",
+                                extra={
+                                    "task_id": self.id,
+                                    "toolkit": type(toolkit).__name__
+                                })
             except Exception as e:
-                logger.warning(f"Failed to cleanup toolkit: {e}", extra={"task_id": self.id, "toolkit": type(toolkit).__name__})
+                logger.warning(f"Failed to cleanup toolkit: {e}",
+                               extra={
+                                   "task_id": self.id,
+                                   "toolkit": type(toolkit).__name__
+                               })
         self.registered_toolkits.clear()
-        
+
         logger.info("Task lock cleanup completed", extra={"task_id": self.id})
 
     def register_toolkit(self, toolkit: Any) -> None:
@@ -372,26 +422,36 @@ def register_toolkit(self, toolkit: Any) -> None:
         """
         # Prevent duplicate registration of the same toolkit instance
         if any(t is toolkit for t in self.registered_toolkits):
-            logger.debug("Toolkit already registered, skipping", extra={
-                "task_id": self.id,
-                "toolkit": type(toolkit).__name__
-            })
+            logger.debug("Toolkit already registered, skipping",
+                         extra={
+                             "task_id": self.id,
+                             "toolkit": type(toolkit).__name__
+                         })
             return
 
         self.registered_toolkits.append(toolkit)
-        logger.debug("Toolkit registered for cleanup", extra={
-            "task_id": self.id,
-            "toolkit": type(toolkit).__name__,
-            "total_registered": len(self.registered_toolkits)
-        })
+        logger.debug("Toolkit registered for cleanup",
+                     extra={
+                         "task_id": self.id,
+                         "toolkit": type(toolkit).__name__,
+                         "total_registered": len(self.registered_toolkits)
+                     })
 
     def add_conversation(self, role: str, content: str | dict):
         """Add a conversation entry to history"""
-        logger.debug("Adding conversation entry", extra={"task_id": self.id, "role": role, "content_length": len(str(content))})
+        logger.debug("Adding conversation entry",
+                     extra={
+                         "task_id": self.id,
+                         "role": role,
+                         "content_length": len(str(content))
+                     })
         self.conversation_history.append({
-            'role': role,
-            'content': content,
-            'timestamp': datetime.now().isoformat()
+            'role':
+            role,
+            'content':
+            content,
+            'timestamp':
+            datetime.now().isoformat()
         })
 
     def get_recent_context(self, max_entries: int = None) -> str:
@@ -400,7 +460,10 @@ def get_recent_context(self, max_entries: int = None) -> str:
             return ""
 
         context = "=== Recent Conversation ===\n"
-        history_to_use = self.conversation_history if max_entries is None else self.conversation_history[-max_entries:]
+        if max_entries is None:
+            history_to_use = self.conversation_history
+        else:
+            history_to_use = self.conversation_history[-max_entries:]
         for entry in history_to_use:
             context += f"{entry['role']}: {entry['content']}\n"
         return context
@@ -429,12 +492,17 @@ def set_current_task_id(project_id: str, task_id: str) -> None:
     """Set the current task ID for a project's task lock"""
     task_lock = get_task_lock(project_id)
     task_lock.current_task_id = task_id
-    logger.info("Updated current task ID", extra={"project_id": project_id, "task_id": task_id})
+    logger.info("Updated current task ID",
+                extra={
+                    "project_id": project_id,
+                    "task_id": task_id
+                })
 
 
 def create_task_lock(id: str) -> TaskLock:
     if id in task_locks:
-        logger.warning("Attempting to create task lock that already exists", extra={"task_id": id})
+        logger.warning("Attempting to create task lock that already exists",
+                       extra={"task_id": id})
         raise ProgramException("Task already exists")
 
     logger.info("Creating new task lock", extra={"task_id": id})
@@ -445,7 +513,11 @@ def create_task_lock(id: str) -> TaskLock:
     # if _cleanup_task is None or _cleanup_task.done():
     #     _cleanup_task = asyncio.create_task(_periodic_cleanup())
 
-    logger.info("Task lock created successfully", extra={"task_id": id, "total_task_locks": len(task_locks)})
+    logger.info("Task lock created successfully",
+                extra={
+                    "task_id": id,
+                    "total_task_locks": len(task_locks)
+                })
     return task_locks[id]
 
 
@@ -460,16 +532,25 @@ def get_or_create_task_lock(id: str) -> TaskLock:
 
 async def delete_task_lock(id: str):
     if id not in task_locks:
-        logger.warning("Attempting to delete non-existent task lock", extra={"task_id": id})
+        logger.warning("Attempting to delete non-existent task lock",
+                       extra={"task_id": id})
         raise ProgramException("Task not found")
 
     # Clean up background tasks before deletion
     task_lock = task_locks[id]
-    logger.info("Cleaning up task lock", extra={"task_id": id, "background_tasks": len(task_lock.background_tasks)})
+    logger.info("Cleaning up task lock",
+                extra={
+                    "task_id": id,
+                    "background_tasks": len(task_lock.background_tasks)
+                })
     await task_lock.cleanup()
 
     del task_locks[id]
-    logger.info("Task lock deleted successfully", extra={"task_id": id, "remaining_task_locks": len(task_locks)})
+    logger.info("Task lock deleted successfully",
+                extra={
+                    "task_id": id,
+                    "remaining_task_locks": len(task_locks)
+                })
 
 
 def get_camel_task(id: str, tasks: list[Task]) -> None | Task:
@@ -503,7 +584,8 @@ async def _periodic_cleanup():
             await asyncio.sleep(300)  # Run every 5 minutes
 
             current_time = datetime.now()
-            stale_timeout = timedelta(hours=4)  # Consider tasks stale after 4 hours
+            stale_timeout = timedelta(
+                hours=4)  # Consider tasks stale after 4 hours
 
             stale_ids = []
             for task_id, task_lock in task_locks.items():
@@ -530,3 +612,50 @@ def set_process_task(process_task_id: str):
         yield
     finally:
         process_task.reset(origin)
+
+
+async def validate_model_before_task(options: Chat) -> tuple[bool, str | None]:
+    """
+    Validate model configuration before starting a task.
+    Makes a simple test request to ensure the API key and model are valid.
+
+    Args:
+        options (Chat): Chat options containing model configuration.
+
+    Returns:
+        (is_valid, error_message)
+            - is_valid: True if validation passed
+            - error_message: Raw error message if validation failed,
+                None otherwise
+    """
+    try:
+        logger.info(f"Validating model configuration "
+                    f"for task {options.task_id}")
+
+        # Create test agent with same config as task will use
+        agent = create_agent(
+            model_platform=options.model_platform,
+            model_type=options.model_type,
+            api_key=options.api_key,
+            url=options.api_url,
+            model_config_dict=options.model_config,
+        )
+
+        # Make a simple test call in executor to avoid blocking
+        loop = asyncio.get_event_loop()
+        await loop.run_in_executor(None, lambda: agent.step("test"))
+
+        logger.info(f"Model validation passed for task {options.task_id}")
+        return True, None
+
+    except Exception as e:
+        error_msg = str(e)
+        logger.error(
+            f"Model validation failed for task {options.task_id}: {error_msg}",
+            extra={
+                "project_id": options.project_id,
+                "task_id": options.task_id,
+                "error": error_msg
+            },
+            exc_info=True)
+        return False, error_msg
diff --git a/backend/app/utils/agent.py b/backend/app/utils/agent.py
index ac3e8753d..a43825bc0 100644
--- a/backend/app/utils/agent.py
+++ b/backend/app/utils/agent.py
@@ -770,7 +770,7 @@ def agent_model(
     model = ModelFactory.create(
         model_platform=options.model_platform,
         model_type=options.model_type,
-        api_key=options.api_key,
+        api_key="sk-fake-invalid-key-for-testing",  # Fake key for testing
         url=options.api_url,
         model_config_dict=model_config or None,
         timeout=600,  # 10 minutes
diff --git a/backend/tests/app/service/test_chat_service_error_handling.py b/backend/tests/app/service/test_chat_service_error_handling.py
deleted file mode 100644
index c99efea8a..000000000
--- a/backend/tests/app/service/test_chat_service_error_handling.py
+++ /dev/null
@@ -1,117 +0,0 @@
-from unittest.mock import patch
-
-from app.service.error_handler import (prepare_model_error_response,
-                                       should_stop_task)
-from camel.models import ModelProcessingError
-
-
-@patch('app.service.error_handler.normalize_error_to_openai_format')
-def test_invalid_api_key_error_should_stop_task(mock_normalize):
-    """Test that invalid API key error results in task being stopped."""
-    # Setup mock to return invalid_api_key error
-    mock_normalize.return_value = ("Invalid key. Validation failed.",
-                                   "invalid_api_key", {
-                                       "message":
-                                       "Invalid key. Validation failed.",
-                                       "type": "invalid_request_error",
-                                       "param": None,
-                                       "code": "invalid_api_key",
-                                   })
-
-    error = ModelProcessingError("Error code: 401 - unauthorized")
-    _, _, error_code = prepare_model_error_response(error, "project-id",
-                                                    "task-id", "test context")
-
-    # Verify this is an invalid API key error
-    assert should_stop_task(error_code) is True
-
-    # Simulate the chat_service logic
-    should_stop = should_stop_task(error_code)
-    assert should_stop is True
-
-
-@patch('app.service.error_handler.normalize_error_to_openai_format')
-def test_model_not_found_error_should_stop_task(mock_normalize):
-    """Test that model_not_found error SHOULD stop task."""
-    # Setup mock to return model_not_found error
-    mock_normalize.return_value = (
-        "Invalid model name. Validation failed.", "model_not_found", {
-            "message": "Invalid model name. Validation failed.",
-            "type": "invalid_request_error",
-            "param": None,
-            "code": "model_not_found",
-        })
-
-    error = ModelProcessingError("Error code: 404 - model does not exist")
-    _, _, error_code = prepare_model_error_response(error, "project-id",
-                                                    "task-id", "test context")
-
-    # Verify this is a critical error that should stop the task
-    assert should_stop_task(error_code) is True
-
-    # Simulate the chat_service logic
-    should_stop = should_stop_task(error_code)
-    assert should_stop is True
-
-
-@patch('app.service.error_handler.normalize_error_to_openai_format')
-def test_quota_error_should_stop_task(mock_normalize):
-    """Test that insufficient_quota error SHOULD stop task."""
-    # Setup mock to return insufficient_quota error
-    mock_normalize.return_value = ((
-        "You exceeded your current quota, please check "
-        "your plan and billing details."), "insufficient_quota", {
-            "message": ("You exceeded your current quota, please "
-                        "check your plan and billing details."),
-            "type":
-            "insufficient_quota",
-            "param":
-            None,
-            "code":
-            "insufficient_quota",
-        })
-
-    error = ModelProcessingError("Error code: 429 - quota exceeded")
-    _, _, error_code = prepare_model_error_response(error, "project-id",
-                                                    "task-id", "test context")
-
-    # Verify this is a critical error that should stop the task
-    assert should_stop_task(error_code) is True
-
-    # Simulate the chat_service logic
-    should_stop = should_stop_task(error_code)
-    assert should_stop is True
-
-
-@patch('app.service.error_handler.normalize_error_to_openai_format')
-def test_unknown_error_should_not_stop_task(mock_normalize):
-    """Test that unknown/generic errors do NOT stop task."""
-    # Setup mock to return no specific error code
-    mock_normalize.return_value = ("Some generic error message", None, None)
-
-    error = ModelProcessingError("Some generic error")
-    _, _, error_code = prepare_model_error_response(error, "project-id",
-                                                    "task-id", "test context")
-
-    # Verify this is NOT an invalid API key error
-    assert should_stop_task(error_code) is False
-
-    # Simulate the chat_service logic
-    should_stop = should_stop_task(error_code)
-    assert should_stop is False
-
-
-def test_task_stop_logic_workflow():
-    """Test the complete workflow of error handling and task stopping logic."""
-    test_cases = [
-        # (error_code, should_stop_task, description)
-        ("invalid_api_key", True, "Invalid API key should stop task"),
-        ("insufficient_quota", True, "Insufficient quota should stop task"),
-        ("model_not_found", True, "Model not found should stop task"),
-        ("rate_limit_exceeded", False, "Rate limit should not stop task"),
-        (None, False, "Unknown error should not stop task"),
-    ]
-
-    for error_code, expected_stop, description in test_cases:
-        should_stop = should_stop_task(error_code)
-        assert should_stop == expected_stop, f"Failed: {description}"
diff --git a/backend/tests/app/service/test_error_handler.py b/backend/tests/app/service/test_error_handler.py
deleted file mode 100644
index e252260a7..000000000
--- a/backend/tests/app/service/test_error_handler.py
+++ /dev/null
@@ -1,144 +0,0 @@
-from unittest.mock import patch
-
-from app.service.error_handler import (prepare_model_error_response,
-                                       should_stop_task)
-from camel.models import ModelProcessingError
-
-
-def test_should_stop_task_with_invalid_key():
-    """Test that should_stop_task returns True for
-    invalid_api_key error code.
-    """
-    assert should_stop_task("invalid_api_key") is True
-
-
-def test_should_stop_task_with_quota_error():
-    """Test that should_stop_task returns True for quota errors."""
-    assert should_stop_task("insufficient_quota") is True
-
-
-def test_should_stop_task_with_model_not_found():
-    """Test that should_stop_task returns True for model_not_found errors."""
-    assert should_stop_task("model_not_found") is True
-
-
-def test_should_stop_task_with_other_error():
-    """Test that should_stop_task returns False for non-critical errors."""
-    assert should_stop_task("rate_limit_exceeded") is False
-    assert should_stop_task(None) is False
-
-
-@patch('app.service.error_handler.normalize_error_to_openai_format')
-@patch('app.service.error_handler.logger')
-def test_prepare_model_error_response_with_invalid_api_key(
-        mock_logger, mock_normalize):
-    """Test prepare_model_error_response with invalid API key error."""
-    # Setup mock
-    mock_normalize.return_value = ("Invalid key. Validation failed.",
-                                   "invalid_api_key", {
-                                       "message":
-                                       "Invalid key. Validation failed.",
-                                       "type": "invalid_request_error",
-                                       "param": None,
-                                       "code": "invalid_api_key",
-                                   })
-
-    error = ModelProcessingError("Error code: 401 - unauthorized")
-    project_id = "test-project"
-    task_id = "test-task"
-
-    # Call function
-    error_payload, message, error_code = prepare_model_error_response(
-        error, project_id, task_id, "test context")
-
-    # Assertions
-    assert error_code == "invalid_api_key"
-    assert message == "Invalid key. Validation failed."
-    assert error_payload["error_code"] == "invalid_api_key"
-    assert error_payload["message"] == "Invalid key. Validation failed."
-
-    # Verify logger was called
-    mock_logger.error.assert_called_once()
-    mock_normalize.assert_called_once_with(error)
-
-
-@patch('app.service.error_handler.normalize_error_to_openai_format')
-@patch('app.service.error_handler.logger')
-def test_prepare_model_error_response_with_model_not_found(
-        mock_logger, mock_normalize):
-    """Test prepare_model_error_response with model_not_found
-    error (should stop task).
-    """
-    # Setup mock
-    mock_normalize.return_value = (
-        "Invalid model name. Validation failed.", "model_not_found", {
-            "message": "Invalid model name. Validation failed.",
-            "type": "invalid_request_error",
-            "param": None,
-            "code": "model_not_found",
-        })
-
-    error = ModelProcessingError("Error code: 404 - model does not exist")
-    project_id = "test-project"
-    task_id = "test-task"
-
-    # Call function
-    error_payload, message, error_code = prepare_model_error_response(
-        error, project_id, task_id, "test context")
-
-    # Assertions
-    assert error_code == "model_not_found"
-    assert message == "Invalid model name. Validation failed."
-    assert error_payload["error_code"] == "model_not_found"
-    assert error_payload["message"] == "Invalid model name. Validation failed."
-
-    # Verify this SHOULD stop the task
-    # (checked by caller using should_stop_task)
-    assert should_stop_task(error_code) is True
-
-    # Verify logger was called
-    mock_logger.error.assert_called_once()
-    mock_normalize.assert_called_once_with(error)
-
-
-@patch('app.service.error_handler.normalize_error_to_openai_format')
-@patch('app.service.error_handler.logger')
-def test_prepare_model_error_response_with_quota_error(mock_logger,
-                                                       mock_normalize):
-    """Test prepare_model_error_response with
-    insufficient_quota error (should stop task).
-    """
-    # Setup mock
-    mock_normalize.return_value = ((
-        "You exceeded your current quota, please "
-        "check your plan and billing details."), "insufficient_quota", {
-            "message": ("You exceeded your current quota, please "
-                        "check your plan and billing details."),
-            "type":
-            "insufficient_quota",
-            "param":
-            None,
-            "code":
-            "insufficient_quota",
-        })
-
-    error = ModelProcessingError("Error code: 429 - quota exceeded")
-    project_id = "test-project"
-    task_id = "test-task"
-
-    # Call function
-    _, message, error_code = prepare_model_error_response(
-        error, project_id, task_id, "test context")
-
-    # Assertions
-    assert error_code == "insufficient_quota"
-    assert message == ("You exceeded your current quota, "
-                       "please check your plan and billing details.")
-
-    # Verify this SHOULD stop the task
-    # (checked by caller using should_stop_task)
-    assert should_stop_task(error_code) is True
-
-    # Verify logger was called
-    mock_logger.error.assert_called_once()
-    mock_normalize.assert_called_once_with(error)
diff --git a/backend/tests/app/service/test_task_validate.py b/backend/tests/app/service/test_task_validate.py
new file mode 100644
index 000000000..b086ec29d
--- /dev/null
+++ b/backend/tests/app/service/test_task_validate.py
@@ -0,0 +1,161 @@
+"""
+Unit tests for validate_model_before_task function.
+
+TODO: Rename this file to test_task.py after fixing errors
+in backend/tests/unit/service/test_task.py
+"""
+from unittest.mock import Mock, patch
+
+import pytest
+from app.model.chat import Chat
+from app.service.task import validate_model_before_task
+from camel.types import ModelPlatformType
+
+# Test data constants
+TEST_PROJECT_ID = "test_project"
+TEST_TASK_ID = "test_task_123"
+TEST_QUESTION = "Test question"
+TEST_EMAIL = "test@example.com"
+TEST_MODEL_PLATFORM = ModelPlatformType.OPENAI
+TEST_MODEL_TYPE = "gpt-4o"
+TEST_API_URL = "https://api.openai.com/v1"
+TEST_VALID_API_KEY = "sk-valid-key"
+TEST_INVALID_API_KEY = "sk-invalid-key"
+
+
+@pytest.mark.asyncio
+async def test_validate_model_success():
+    """Test successful model validation."""
+    options = Chat(project_id=TEST_PROJECT_ID,
+                   task_id=TEST_TASK_ID,
+                   question=TEST_QUESTION,
+                   email=TEST_EMAIL,
+                   model_platform=TEST_MODEL_PLATFORM,
+                   model_type=TEST_MODEL_TYPE,
+                   api_key=TEST_VALID_API_KEY,
+                   api_url=TEST_API_URL,
+                   model_config={})
+
+    # Mock the create_agent and agent.step
+    mock_agent = Mock()
+    mock_agent.step = Mock(return_value="test response")
+
+    with patch('app.service.task.create_agent', return_value=mock_agent):
+        is_valid, error_msg = await validate_model_before_task(options)
+
+    assert is_valid is True
+    assert error_msg is None
+
+
+@pytest.mark.asyncio
+async def test_validate_model_invalid_api_key():
+    """Test model validation with invalid API key."""
+    options = Chat(project_id=TEST_PROJECT_ID,
+                   task_id=TEST_TASK_ID,
+                   question=TEST_QUESTION,
+                   email=TEST_EMAIL,
+                   model_platform=TEST_MODEL_PLATFORM,
+                   model_type=TEST_MODEL_TYPE,
+                   api_key=TEST_INVALID_API_KEY,
+                   api_url=TEST_API_URL,
+                   model_config={})
+
+    # Mock the create_agent to raise authentication error
+    with patch('app.service.task.create_agent') as mock_create:
+        mock_agent = Mock()
+        mock_agent.step = Mock(
+            side_effect=Exception("Error code: 401 - Invalid API key"))
+        mock_create.return_value = mock_agent
+
+        is_valid, error_msg = await validate_model_before_task(options)
+
+    assert is_valid is False
+    assert error_msg is not None
+    assert "401" in error_msg or "Invalid API key" in error_msg
+
+
+@pytest.mark.asyncio
+async def test_validate_model_network_error():
+    """Test model validation with network error."""
+    options = Chat(project_id=TEST_PROJECT_ID,
+                   task_id=TEST_TASK_ID,
+                   question=TEST_QUESTION,
+                   email=TEST_EMAIL,
+                   model_platform=TEST_MODEL_PLATFORM,
+                   model_type=TEST_MODEL_TYPE,
+                   api_key=TEST_VALID_API_KEY,
+                   api_url="https://invalid-url.com",
+                   model_config={})
+
+    # Mock the create_agent to raise network error
+    with patch('app.service.task.create_agent') as mock_create:
+        mock_agent = Mock()
+        mock_agent.step = Mock(side_effect=Exception("Connection error"))
+        mock_create.return_value = mock_agent
+
+        is_valid, error_msg = await validate_model_before_task(options)
+
+    assert is_valid is False
+    assert error_msg is not None
+    assert "Connection error" in error_msg
+
+
+@pytest.mark.asyncio
+async def test_validate_model_with_custom_config():
+    """Test model validation with custom model configuration."""
+    custom_config = {"temperature": 0.7, "max_tokens": 1000}
+
+    options = Chat(project_id=TEST_PROJECT_ID,
+                   task_id=TEST_TASK_ID,
+                   question=TEST_QUESTION,
+                   email=TEST_EMAIL,
+                   model_platform=TEST_MODEL_PLATFORM,
+                   model_type=TEST_MODEL_TYPE,
+                   api_key=TEST_VALID_API_KEY,
+                   api_url=TEST_API_URL,
+                   model_config=custom_config)
+
+    mock_agent = Mock()
+    mock_agent.step = Mock(return_value="test response")
+
+    with patch('app.service.task.create_agent',
+               return_value=mock_agent) as mock_create:
+        is_valid, error_msg = await validate_model_before_task(options)
+
+        # Verify create_agent was called
+        mock_create.assert_called_once()
+        call_args = mock_create.call_args
+        assert call_args.kwargs['model_platform'] == options.model_platform
+        assert call_args.kwargs['model_type'] == options.model_type
+        assert call_args.kwargs['api_key'] == options.api_key
+        assert call_args.kwargs['url'] == options.api_url
+
+    assert is_valid is True
+    assert error_msg is None
+
+
+@pytest.mark.asyncio
+async def test_validate_model_rate_limit_error():
+    """Test model validation with rate limit error."""
+    options = Chat(project_id=TEST_PROJECT_ID,
+                   task_id=TEST_TASK_ID,
+                   question=TEST_QUESTION,
+                   email=TEST_EMAIL,
+                   model_platform=TEST_MODEL_PLATFORM,
+                   model_type=TEST_MODEL_TYPE,
+                   api_key=TEST_VALID_API_KEY,
+                   api_url=TEST_API_URL,
+                   model_config={})
+
+    # Mock the create_agent to raise rate limit error
+    with patch('app.service.task.create_agent') as mock_create:
+        mock_agent = Mock()
+        mock_agent.step = Mock(
+            side_effect=Exception("Error code: 429 - Rate limit exceeded"))
+        mock_create.return_value = mock_agent
+
+        is_valid, error_msg = await validate_model_before_task(options)
+
+    assert is_valid is False
+    assert error_msg is not None
+    assert "429" in error_msg or "Rate limit" in error_msg

From b040f5183888218040a692e393ea95855ec7dafe Mon Sep 17 00:00:00 2001
From: bytecraftii <bytecraftii@users.noreply.github.com>
Date: Thu, 22 Jan 2026 13:28:21 -0800
Subject: [PATCH 08/18] update

---
 backend/app/service/chat_service.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/backend/app/service/chat_service.py b/backend/app/service/chat_service.py
index a6c38b9ff..ad9a98a02 100644
--- a/backend/app/service/chat_service.py
+++ b/backend/app/service/chat_service.py
@@ -1135,13 +1135,6 @@ def on_stream_text(chunk):
                         sub_tasks = new_sub_tasks
                         summary_task_content = new_summary_content
 
-                    except ModelProcessingError as e:
-                        # Log error - validation should have caught config issues
-                        logger.error(f"Multi-turn task decomposition error: {e}", exc_info=True)
-                        yield sse_json("error", {"message": f"Task decomposition failed: {str(e)}"})
-                        if "workforce" in locals() and workforce is not None and workforce._running:
-                            workforce.stop()
-                        task_lock.status = Status.done
                     except Exception as e:
                         import traceback
                         logger.error(

From c71aecef33905010635efd64503c726cb1af6718 Mon Sep 17 00:00:00 2001
From: bytecraftii <bytecraftii@users.noreply.github.com>
Date: Thu, 22 Jan 2026 14:31:15 -0800
Subject: [PATCH 09/18] update

---
 backend/app/service/chat_service.py | 33 +++++------------------------
 1 file changed, 5 insertions(+), 28 deletions(-)

diff --git a/backend/app/service/chat_service.py b/backend/app/service/chat_service.py
index ad9a98a02..1e41293b6 100644
--- a/backend/app/service/chat_service.py
+++ b/backend/app/service/chat_service.py
@@ -433,17 +433,11 @@ async def step_solve(options: Chat, request: Request, task_lock: TaskLock):
                         "[NEW-QUESTION] Has attachments, treating as complex task"
                     )
                 else:
-                    try:
-                        is_complex_task = await question_confirm(
-                            question_agent, question, task_lock)
-                        logger.info(
-                            f"[NEW-QUESTION] question_confirm result: is_complex={is_complex_task}"
-                        )
-                    except Exception as e:
-                        # Log the error and treat as complex task
-                        # (Model validation should have caught critical errors upfront)
-                        logger.error(f"Error in question_confirm: {e}", exc_info=True)
-                        is_complex_task = True
+                    is_complex_task = await question_confirm(
+                        question_agent, question, task_lock)
+                    logger.info(
+                        f"[NEW-QUESTION] question_confirm result: is_complex={is_complex_task}"
+                    )
 
                 if not is_complex_task:
                     logger.info(
@@ -670,13 +664,6 @@ async def run_decomposition():
                             }
                             await task_lock.put_queue(
                                 ActionDecomposeProgressData(data=payload))
-                        except ModelProcessingError as e:
-                            # Log model errors during task decomposition (background task)
-                            logger.error(
-                                f"Task decomposition failed due to model error: {e}",
-                                exc_info=True)
-                            # TODO: Error is only logged, not sent to UI (background task limitation)
-                            # To send to UI, we'd need to restore Action.error + ActionErrorData + handler
                         except Exception as e:
                             logger.error(
                                 f"Error in background decomposition: {e}",
@@ -1544,18 +1531,8 @@ async def question_confirm(agent: ListenChatAgent,
 
         return is_complex
 
-    except ModelProcessingError as e:
-        logger.error(f"Model error in question_confirm: {e}")
-        raise ModelProcessingError(
-            f"Failed to determine task complexity due to model error: {str(e)}"
-        )
     except Exception as e:
         logger.error(f"Error in question_confirm: {e}")
-        # Check if this is an authentication/API key error
-        error_str = str(e).lower()
-        if "401" in error_str or "authentication" in error_str or "api key" in error_str or "unauthorized" in error_str:
-            # This is an API key error, raise it as ModelProcessingError so it gets caught properly
-            raise ModelProcessingError(f"Invalid API key: {str(e)}")
         return True
 
 

From 71cc904cf7cf8e1bc3e052639ee2501b37e0e7f7 Mon Sep 17 00:00:00 2001
From: bytecraftii <bytecraftii@users.noreply.github.com>
Date: Thu, 22 Jan 2026 15:21:17 -0800
Subject: [PATCH 10/18] update

---
 backend/app/controller/chat_controller.py | 3 ---
 backend/app/utils/agent.py                | 2 +-
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/backend/app/controller/chat_controller.py b/backend/app/controller/chat_controller.py
index f2700f755..18901f224 100644
--- a/backend/app/controller/chat_controller.py
+++ b/backend/app/controller/chat_controller.py
@@ -137,9 +137,6 @@ async def post(data: Chat, request: Request):
 
     os.environ["file_save_path"] = data.file_save_path()
     os.environ["browser_port"] = str(data.browser_port)
-
-    # Override API key with fake key for testing
-    data.api_key = "sk-fake-invalid-key-for-testing"
     os.environ["OPENAI_API_KEY"] = data.api_key
     os.environ[
         "OPENAI_API_BASE_URL"] = data.api_url or "https://api.openai.com/v1"
diff --git a/backend/app/utils/agent.py b/backend/app/utils/agent.py
index a43825bc0..ac3e8753d 100644
--- a/backend/app/utils/agent.py
+++ b/backend/app/utils/agent.py
@@ -770,7 +770,7 @@ def agent_model(
     model = ModelFactory.create(
         model_platform=options.model_platform,
         model_type=options.model_type,
-        api_key="sk-fake-invalid-key-for-testing",  # Fake key for testing
+        api_key=options.api_key,
         url=options.api_url,
         model_config_dict=model_config or None,
         timeout=600,  # 10 minutes

From 33fadc810f4ecc5e783f76c09f1c241d54733885 Mon Sep 17 00:00:00 2001
From: bytecraftii <bytecraftii@users.noreply.github.com>
Date: Thu, 29 Jan 2026 03:20:05 -0800
Subject: [PATCH 11/18] update

---
 backend/app/service/chat_service.py |  4 ++
 backend/app/service/task.py         |  3 +
 utils/traceroot_wrapper.py          | 86 -----------------------------
 3 files changed, 7 insertions(+), 86 deletions(-)
 delete mode 100644 utils/traceroot_wrapper.py

diff --git a/backend/app/service/chat_service.py b/backend/app/service/chat_service.py
index 8952cfabe..88f0efb3a 100644
--- a/backend/app/service/chat_service.py
+++ b/backend/app/service/chat_service.py
@@ -19,6 +19,10 @@
 from pathlib import Path
 from typing import Any
 
+from fastapi import Request
+from camel.toolkits import ToolkitMessageIntegration
+from inflection import titleize
+
 from app.model.chat import Chat, NewAgent, Status, TaskContent, sse_json
 from app.service.task import (Action, ActionDecomposeProgressData,
                               ActionDecomposeTextData, ActionImproveData,
diff --git a/backend/app/service/task.py b/backend/app/service/task.py
index 9ebf8de78..f83a81c50 100644
--- a/backend/app/service/task.py
+++ b/backend/app/service/task.py
@@ -12,11 +12,13 @@
 # limitations under the License.
 # ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. =========
 
+from enum import Enum
 from typing_extensions import Any, Literal, TypedDict
 from typing import List, Dict, Optional
 from pydantic import BaseModel
 from app.exception.exception import ProgramException
 from app.model.chat import AgentModelConfig, McpServers, Status, SupplementChat, Chat, UpdateData
+from app.component.model_validation import create_agent
 import asyncio
 import weakref
 from contextlib import contextmanager
@@ -24,6 +26,7 @@
 from datetime import datetime, timedelta
 import weakref
 import logging
+from camel.tasks import Task
 
 logger = logging.getLogger("task_service")
 
diff --git a/utils/traceroot_wrapper.py b/utils/traceroot_wrapper.py
deleted file mode 100644
index 3b7c5c258..000000000
--- a/utils/traceroot_wrapper.py
+++ /dev/null
@@ -1,86 +0,0 @@
-from pathlib import Path
-from typing import Callable
-import logging
-from dotenv import load_dotenv
-
-# Try to import traceroot, but handle gracefully if not available
-try:
-    import traceroot
-    TRACEROOT_AVAILABLE = True
-except ImportError:
-    TRACEROOT_AVAILABLE = False
-    traceroot = None
-
-# Auto-detect module name based on caller's path
-def _get_module_name():
-    """Automatically detect if this is being called from backend or server."""
-    import inspect
-    frame = inspect.currentframe()
-    try:
-        # Go up the stack to find the caller
-        caller_frame = frame.f_back.f_back if frame and frame.f_back else None
-        if caller_frame:
-            caller_file = caller_frame.f_globals.get('__file__', '')
-            if 'backend' in caller_file:
-                return 'backend'
-            elif 'server' in caller_file:
-                return 'server'
-    finally:
-        del frame
-    return 'unknown'
-
-env_path = Path(__file__).resolve().parents[1] / '.env'
-
-load_dotenv(env_path)
-
-# DISABLED: Do not initialize traceroot to avoid OpenTelemetry connection errors
-if False:  # TRACEROOT_AVAILABLE and traceroot.init():
-    from traceroot.logger import get_logger as _get_traceroot_logger
-
-    trace = traceroot.trace
-
-    def get_logger(name: str = __name__):
-        """Get TraceRoot logger instance."""
-        return _get_traceroot_logger(name)
-
-    def is_enabled() -> bool:
-        """Check if TraceRoot is enabled."""
-        return True
-
-    # Log successful initialization
-    module_name = _get_module_name()
-    _init_logger = _get_traceroot_logger("traceroot_wrapper")
-    _init_logger.info("TraceRoot initialized successfully", extra={"backend": "traceroot", "service_module": module_name})
-else:
-    # No-op implementations when TraceRoot is not configured
-    def trace(*args, **kwargs):
-        """No-op trace decorator."""
-        def decorator(func: Callable) -> Callable:
-            return func
-        return decorator
-
-    def get_logger(name: str = __name__):
-        """Get standard Python logger when TraceRoot is disabled."""
-        logger = logging.getLogger(name)
-        if not logger.handlers:
-            # Configure basic logging if no handlers exist
-            handler = logging.StreamHandler()
-            formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
-            handler.setFormatter(formatter)
-            logger.addHandler(handler)
-            logger.setLevel(logging.INFO)
-        return logger
-
-    def is_enabled() -> bool:
-        """Check if TraceRoot is enabled."""
-        return False
-
-    # Log fallback mode
-    _fallback_logger = logging.getLogger("traceroot_wrapper")
-    if TRACEROOT_AVAILABLE:
-        _fallback_logger.warning("TraceRoot available but not initialized - using Python logging as fallback")
-    else:
-        _fallback_logger.warning("TraceRoot not available - using Python logging as fallback")
-
-
-__all__ = ['trace', 'get_logger', 'is_enabled']

From 54c7a7264049fb91764675362f4ea46f2b0d62a3 Mon Sep 17 00:00:00 2001
From: bytecraftii <bytecraftii@users.noreply.github.com>
Date: Thu, 29 Jan 2026 03:20:57 -0800
Subject: [PATCH 12/18] update

---
 .env.development | 7 +++++++
 1 file changed, 7 insertions(+)
 create mode 100644 .env.development

diff --git a/.env.development b/.env.development
new file mode 100644
index 000000000..5314e7834
--- /dev/null
+++ b/.env.development
@@ -0,0 +1,7 @@
+VITE_BASE_URL=/api
+
+VITE_PROXY_URL=https://dev.eigent.ai
+VITE_USE_LOCAL_PROXY=false
+
+# VITE_PROXY_URL=http://localhost:3001
+# VITE_USE_LOCAL_PROXY=true

From ef38bb2bbebb21dc09d8f8c9a2a0c95106e284e5 Mon Sep 17 00:00:00 2001
From: bytecraftii <bytecraftii@users.noreply.github.com>
Date: Thu, 29 Jan 2026 03:27:21 -0800
Subject: [PATCH 13/18] update

---
 .gitignore                                |  1 -
 backend/app/controller/chat_controller.py |  1 -
 backend/app/service/chat_service.py       | 33 ++++++++++++-----------
 3 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/.gitignore b/.gitignore
index e0411bce3..7e0663f86 100644
--- a/.gitignore
+++ b/.gitignore
@@ -37,7 +37,6 @@ yarn.lock
 .env
 .env.local
 .env.production
-.env.development
 
 .cursor
 
diff --git a/backend/app/controller/chat_controller.py b/backend/app/controller/chat_controller.py
index c9f45054b..45c90c46b 100644
--- a/backend/app/controller/chat_controller.py
+++ b/backend/app/controller/chat_controller.py
@@ -60,7 +60,6 @@
 from fastapi import APIRouter, Request, Response
 from fastapi.responses import StreamingResponse
 
-from utils import traceroot_wrapper as traceroot
 
 router = APIRouter()
 
diff --git a/backend/app/service/chat_service.py b/backend/app/service/chat_service.py
index 88f0efb3a..8658688d4 100644
--- a/backend/app/service/chat_service.py
+++ b/backend/app/service/chat_service.py
@@ -14,28 +14,29 @@
 
 import asyncio
 import datetime
-import os
-import platform
+import json
 from pathlib import Path
-from typing import Any
+import platform
+from typing import Any, Literal
 
 from fastapi import Request
-from camel.toolkits import ToolkitMessageIntegration
 from inflection import titleize
+from pydash import chain
 
-from app.model.chat import Chat, NewAgent, Status, TaskContent, sse_json
-from app.service.task import (Action, ActionDecomposeProgressData,
-                              ActionDecomposeTextData, ActionImproveData,
-                              ActionInstallMcpData, ActionNewAgent, Agents,
-                              TaskLock, delete_task_lock, set_current_task_id,
-                              validate_model_before_task)
-from app.utils.agent import (ListenChatAgent, agent_model, browser_agent,
-                             developer_agent, document_agent, get_mcp_tools,
-                             get_toolkits, mcp_agent, multi_modal_agent,
-                             question_confirm_agent, set_main_event_loop,
-                             task_summary_agent)
 from app.utils.file_utils import get_working_directory
-from app.utils.server.sync_step import sync_step
+from app.service.task import (
+    ActionImproveData,
+    ActionInstallMcpData,
+    ActionNewAgent,
+    ActionTimeoutData,
+    TaskLock,
+    delete_task_lock,
+    set_current_task_id,
+    ActionDecomposeProgressData,
+    ActionDecomposeTextData,
+    validate_model_before_task,
+)
+from camel.toolkits import AgentCommunicationToolkit, ToolkitMessageIntegration
 from app.utils.toolkit.human_toolkit import HumanToolkit
 from app.utils.toolkit.note_taking_toolkit import NoteTakingToolkit
 from app.utils.toolkit.terminal_toolkit import TerminalToolkit

From 746a7e00ba61e0b6cb4a3cf60fd9fc758b481f36 Mon Sep 17 00:00:00 2001
From: bytecraftii <bytecraftii@users.noreply.github.com>
Date: Thu, 29 Jan 2026 03:29:26 -0800
Subject: [PATCH 14/18] update

---
 backend/app/service/chat_service.py | 104 +++++++++++-----------------
 backend/app/service/task.py         |  19 ++---
 2 files changed, 49 insertions(+), 74 deletions(-)

diff --git a/backend/app/service/chat_service.py b/backend/app/service/chat_service.py
index 8658688d4..7b29c4668 100644
--- a/backend/app/service/chat_service.py
+++ b/backend/app/service/chat_service.py
@@ -14,57 +14,37 @@
 
 import asyncio
 import datetime
-import json
-from pathlib import Path
+import logging
+import os
 import platform
-from typing import Any, Literal
-
-from fastapi import Request
-from inflection import titleize
-from pydash import chain
-
+from pathlib import Path
+from typing import Any
+
+from app.model.chat import Chat, NewAgent, Status, TaskContent, sse_json
+from app.service.task import (Action, ActionDecomposeProgressData,
+                              ActionDecomposeTextData, ActionImproveData,
+                              ActionInstallMcpData, ActionNewAgent, Agents,
+                              TaskLock, delete_task_lock, set_current_task_id,
+                              validate_model_before_task)
+from app.utils.agent import (ListenChatAgent, agent_model, browser_agent,
+                             developer_agent, document_agent, get_mcp_tools,
+                             get_toolkits, mcp_agent, multi_modal_agent,
+                             question_confirm_agent, set_main_event_loop,
+                             task_summary_agent)
 from app.utils.file_utils import get_working_directory
-from app.service.task import (
-    ActionImproveData,
-    ActionInstallMcpData,
-    ActionNewAgent,
-    ActionTimeoutData,
-    TaskLock,
-    delete_task_lock,
-    set_current_task_id,
-    ActionDecomposeProgressData,
-    ActionDecomposeTextData,
-    validate_model_before_task,
-)
-from camel.toolkits import AgentCommunicationToolkit, ToolkitMessageIntegration
+from app.utils.server.sync_step import sync_step
+from app.utils.telemetry.workforce_metrics import WorkforceMetricsCallback
 from app.utils.toolkit.human_toolkit import HumanToolkit
 from app.utils.toolkit.note_taking_toolkit import NoteTakingToolkit
 from app.utils.toolkit.terminal_toolkit import TerminalToolkit
 from app.utils.workforce import Workforce
-from app.utils.telemetry.workforce_metrics import WorkforceMetricsCallback
-from app.model.chat import Chat, NewAgent, Status, sse_json, TaskContent
+from camel.models import ModelProcessingError
 from camel.tasks import Task
-from app.utils.agent import (
-    ListenChatAgent,
-    agent_model,
-    get_mcp_tools,
-    get_toolkits,
-    mcp_agent,
-    developer_agent,
-    document_agent,
-    multi_modal_agent,
-    browser_agent,
-    social_medium_agent,
-    task_summary_agent,
-    question_confirm_agent,
-    set_main_event_loop,
-)
-from app.service.task import Action, Agents
-from app.utils.server.sync_step import sync_step
+from camel.toolkits import ToolkitMessageIntegration
 from camel.types import ModelPlatformType
-from camel.models import ModelProcessingError
-import logging
-import os
+from fastapi import Request
+from inflection import titleize
+from pydash import chain
 
 logger = logging.getLogger("chat_service")
 
@@ -308,9 +288,8 @@ async def step_solve(options: Chat, request: Request, task_lock: TaskLock):
     # Validate model configuration before starting task
     is_valid, error_msg = await validate_model_before_task(options)
     if not is_valid:
-        yield sse_json("error", {
-            "message": f"Model validation failed: {error_msg}"
-        })
+        yield sse_json("error",
+                       {"message": f"Model validation failed: {error_msg}"})
         task_lock.status = Status.done
         return
 
@@ -721,7 +700,9 @@ async def run_decomposition():
                 sub_tasks.extend(new_tasks)
                 # Save updated sub_tasks back to task_lock so Action.start uses the correct list
                 setattr(task_lock, "decompose_sub_tasks", sub_tasks)
-                summary_task_content_local = getattr(task_lock, "summary_task_content", summary_task_content)
+                summary_task_content_local = getattr(task_lock,
+                                                     "summary_task_content",
+                                                     summary_task_content)
                 yield to_sub_tasks(camel_task, summary_task_content_local)
             elif item.action == Action.add_task:
 
@@ -1474,19 +1455,13 @@ def tree_sub_tasks(sub_tasks: list[Task], depth: int = 0):
     if depth > 5:
         return []
 
-    result = (
-        chain(sub_tasks)
-        .filter(lambda x: x.content != "")
-        .map(
-            lambda x: {
-                "id": x.id,
-                "content": x.content,
-                "state": x.state,
-                "subtasks": tree_sub_tasks(x.subtasks, depth + 1),
-            }
-        )
-        .value()
-    )
+    result = (chain(sub_tasks).filter(lambda x: x.content != "").map(
+        lambda x: {
+            "id": x.id,
+            "content": x.content,
+            "state": x.state,
+            "subtasks": tree_sub_tasks(x.subtasks, depth + 1),
+        }).value())
 
     return result
 
@@ -1509,7 +1484,8 @@ def update_sub_tasks(sub_tasks: list[Task],
     return sub_tasks
 
 
-def add_sub_tasks(camel_task: Task, update_tasks: list[TaskContent]) -> list[Task]:
+def add_sub_tasks(camel_task: Task,
+                  update_tasks: list[TaskContent]) -> list[Task]:
     """Add new tasks (with empty id) to camel_task and return the list of added tasks."""
     added_tasks = []
     for item in update_tasks:
@@ -1815,10 +1791,8 @@ def _create_new_worker_agent() -> ListenChatAgent:
         model_platform_enum = None
 
     # Create workforce metrics callback for workforce analytics
-    workforce_metrics = WorkforceMetricsCallback(
-        project_id=options.project_id,
-        task_id=options.task_id
-    )
+    workforce_metrics = WorkforceMetricsCallback(project_id=options.project_id,
+                                                 task_id=options.task_id)
 
     workforce = Workforce(
         options.project_id,
diff --git a/backend/app/service/task.py b/backend/app/service/task.py
index f83a81c50..562907d54 100644
--- a/backend/app/service/task.py
+++ b/backend/app/service/task.py
@@ -12,21 +12,22 @@
 # limitations under the License.
 # ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. =========
 
-from enum import Enum
-from typing_extensions import Any, Literal, TypedDict
-from typing import List, Dict, Optional
-from pydantic import BaseModel
-from app.exception.exception import ProgramException
-from app.model.chat import AgentModelConfig, McpServers, Status, SupplementChat, Chat, UpdateData
-from app.component.model_validation import create_agent
 import asyncio
+import logging
 import weakref
 from contextlib import contextmanager
 from contextvars import ContextVar
 from datetime import datetime, timedelta
-import weakref
-import logging
+from enum import Enum
+from typing import Dict, List, Optional
+
+from app.component.model_validation import create_agent
+from app.exception.exception import ProgramException
+from app.model.chat import (AgentModelConfig, Chat, McpServers, Status,
+                            SupplementChat, UpdateData)
 from camel.tasks import Task
+from pydantic import BaseModel
+from typing_extensions import Any, Literal, TypedDict
 
 logger = logging.getLogger("task_service")
 

From da3d655a1ae471311519b3ff441228e2c0f120ec Mon Sep 17 00:00:00 2001
From: bytecraftii <bytecraftii@users.noreply.github.com>
Date: Thu, 29 Jan 2026 03:31:23 -0800
Subject: [PATCH 15/18] update

---
 .github/workflows/test.yml                |  3 +++
 backend/app/controller/chat_controller.py | 30 ++---------------------
 2 files changed, 5 insertions(+), 28 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 2513545d4..0b3726510 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -8,6 +8,9 @@ name: Test
     branches:
       - main
 
+permissions:
+  contents: read
+
 jobs:
   pytest:
     name: Run Python Tests
diff --git a/backend/app/controller/chat_controller.py b/backend/app/controller/chat_controller.py
index 45c90c46b..34d85d181 100644
--- a/backend/app/controller/chat_controller.py
+++ b/backend/app/controller/chat_controller.py
@@ -13,39 +13,14 @@
 # ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. =========
 
 import asyncio
+import logging
 import os
 import re
 import time
 from pathlib import Path
-from dotenv import load_dotenv
-from fastapi import APIRouter, Request, Response
-from fastapi.responses import StreamingResponse
-import logging
-from app.component import code
-from app.exception.exception import UserException
-from app.model.chat import Chat, HumanReply, McpServers, Status, SupplementChat, AddTaskRequest, sse_json
-from app.service.chat_service import step_solve
-from app.service.task import (
-    Action,
-    ActionImproveData,
-    ActionInstallMcpData,
-    ActionStopData,
-    ActionSupplementData,
-    ActionAddTaskData,
-    ActionRemoveTaskData,
-    ActionSkipTaskData,
-    get_or_create_task_lock,
-    get_task_lock,
-    set_current_task_id,
-    delete_task_lock,
-    task_locks,
-)
-from app.component.environment import set_user_env_path, sanitize_env_path
-from app.utils.workforce import Workforce
-from camel.tasks.task import Task
 
 from app.component import code
-from app.component.environment import set_user_env_path
+from app.component.environment import sanitize_env_path, set_user_env_path
 from app.exception.exception import UserException
 from app.model.chat import (AddTaskRequest, Chat, HumanReply, McpServers,
                             Status, SupplementChat, sse_json)
@@ -60,7 +35,6 @@
 from fastapi import APIRouter, Request, Response
 from fastapi.responses import StreamingResponse
 
-
 router = APIRouter()
 
 # Logger for chat controller

From 2d76b7171566223b7ac3a2e3b98ac9cb8d4e3e10 Mon Sep 17 00:00:00 2001
From: bytecraftii <bytecraftii@users.noreply.github.com>
Date: Thu, 29 Jan 2026 03:32:23 -0800
Subject: [PATCH 16/18] update

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 7e0663f86..e0411bce3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -37,6 +37,7 @@ yarn.lock
 .env
 .env.local
 .env.production
+.env.development
 
 .cursor
 

From 766c8f566a957e8fbff585d2bdb25422b4117066 Mon Sep 17 00:00:00 2001
From: bytecii <bytecii@users.noreply.github.com>
Date: Thu, 5 Feb 2026 21:31:51 -0800
Subject: [PATCH 17/18] update

---
 backend/app/utils/agent.py | 1855 ------------------------------------
 1 file changed, 1855 deletions(-)
 delete mode 100644 backend/app/utils/agent.py

diff --git a/backend/app/utils/agent.py b/backend/app/utils/agent.py
deleted file mode 100644
index fdd566efe..000000000
--- a/backend/app/utils/agent.py
+++ /dev/null
@@ -1,1855 +0,0 @@
-# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. =========
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. =========
-
-import asyncio
-import contextvars
-import json
-import os
-import platform
-from threading import Event, Lock
-from typing import Any, Callable, Dict, List, Tuple
-import uuid
-import logging
-
-from app.model.chat import AgentModelConfig
-
-# Thread-safe reference to main event loop using contextvars
-# This ensures each request has its own event loop reference, avoiding race conditions
-_main_event_loop_var: contextvars.ContextVar[asyncio.AbstractEventLoop | None] = contextvars.ContextVar(
-    "_main_event_loop", default=None
-)
-
-# Global fallback for main event loop reference
-# Used when contextvars don't propagate to worker threads (e.g., asyncio.to_thread)
-_GLOBAL_MAIN_LOOP: asyncio.AbstractEventLoop | None = None
-_GLOBAL_MAIN_LOOP_LOCK = Lock()
-
-
-def set_main_event_loop(loop: asyncio.AbstractEventLoop | None):
-    """Set the main event loop reference for thread-safe task scheduling.
-
-    This should be called from the main async context before spawning threads
-    that need to schedule async tasks. Uses both contextvars (for request isolation)
-    and a global fallback (for thread pool workers where contextvars may not propagate).
-    """
-    global _GLOBAL_MAIN_LOOP
-    _main_event_loop_var.set(loop)
-    with _GLOBAL_MAIN_LOOP_LOCK:
-        _GLOBAL_MAIN_LOOP = loop
-
-
-def _schedule_async_task(coro):
-    """Schedule an async coroutine as a task, thread-safe.
-
-    This function handles scheduling from both the main event loop thread
-    and from worker threads (e.g., when using asyncio.to_thread).
-    """
-    try:
-        # Try to get the running loop (works in main event loop thread)
-        loop = asyncio.get_running_loop()
-        loop.create_task(coro)
-    except RuntimeError:
-        # No running loop in this thread (we're in a worker thread)
-        # First try contextvars, then fallback to global reference
-        main_loop = _main_event_loop_var.get()
-        if main_loop is None:
-            with _GLOBAL_MAIN_LOOP_LOCK:
-                main_loop = _GLOBAL_MAIN_LOOP
-        if main_loop is not None and main_loop.is_running():
-            asyncio.run_coroutine_threadsafe(coro, main_loop)
-        else:
-            # This should not happen in normal operation - log error and skip
-            logging.error(
-                "No event loop available for async task scheduling, task skipped. "
-                "Ensure set_main_event_loop() is called before parallel agent creation."
-            )
-
-
-from camel.agents import ChatAgent
-from camel.agents.chat_agent import (
-    StreamingChatAgentResponse,
-    AsyncStreamingChatAgentResponse,
-)
-from camel.agents._types import ToolCallRequest
-from camel.memories import AgentMemory
-from camel.messages import BaseMessage
-from camel.models import (
-    BaseModelBackend,
-    ModelFactory,
-    ModelManager,
-    OpenAIAudioModels,
-    ModelProcessingError,
-)
-from camel.responses import ChatAgentResponse
-from camel.terminators import ResponseTerminator
-from camel.toolkits import FunctionTool, RegisteredAgentToolkit
-from camel.types.agents import ToolCallingRecord
-from app.component.environment import env
-from app.utils.file_utils import get_working_directory
-from app.utils.toolkit.abstract_toolkit import AbstractToolkit
-from app.utils.toolkit.hybrid_browser_toolkit import HybridBrowserToolkit
-from app.utils.toolkit.excel_toolkit import ExcelToolkit
-from app.utils.toolkit.file_write_toolkit import FileToolkit
-from app.utils.toolkit.google_calendar_toolkit import GoogleCalendarToolkit
-from app.utils.toolkit.google_drive_mcp_toolkit import GoogleDriveMCPToolkit
-from app.utils.toolkit.google_gmail_mcp_toolkit import GoogleGmailMCPToolkit
-from app.utils.toolkit.human_toolkit import HumanToolkit
-from app.utils.toolkit.markitdown_toolkit import MarkItDownToolkit
-from app.utils.toolkit.mcp_search_toolkit import McpSearchToolkit
-from app.utils.toolkit.note_taking_toolkit import NoteTakingToolkit
-from app.utils.toolkit.notion_mcp_toolkit import NotionMCPToolkit
-from app.utils.toolkit.pptx_toolkit import PPTXToolkit
-from app.utils.toolkit.screenshot_toolkit import ScreenshotToolkit
-from app.utils.toolkit.terminal_toolkit import TerminalToolkit
-from app.utils.toolkit.github_toolkit import GithubToolkit
-from app.utils.toolkit.search_toolkit import SearchToolkit
-from app.utils.toolkit.video_download_toolkit import VideoDownloaderToolkit
-from app.utils.toolkit.audio_analysis_toolkit import AudioAnalysisToolkit
-from app.utils.toolkit.video_analysis_toolkit import VideoAnalysisToolkit
-from app.utils.toolkit.image_analysis_toolkit import ImageAnalysisToolkit
-from app.utils.toolkit.openai_image_toolkit import OpenAIImageToolkit
-from app.utils.toolkit.web_deploy_toolkit import WebDeployToolkit
-from app.utils.toolkit.whatsapp_toolkit import WhatsAppToolkit
-from app.utils.toolkit.twitter_toolkit import TwitterToolkit
-from app.utils.toolkit.linkedin_toolkit import LinkedInToolkit
-from app.utils.toolkit.reddit_toolkit import RedditToolkit
-from app.utils.toolkit.slack_toolkit import SlackToolkit
-from app.utils.toolkit.lark_toolkit import LarkToolkit
-from camel.types import ModelPlatformType, ModelType
-from camel.toolkits import MCPToolkit, ToolkitMessageIntegration
-import datetime
-from pydantic import BaseModel
-from app.model.chat import Chat, McpServers
-
-# Logger for agent tracking
-logger = logging.getLogger("agent")
-from app.service.task import (
-    Action,
-    ActionActivateAgentData,
-    ActionActivateToolkitData,
-    ActionBudgetNotEnough,
-    ActionCreateAgentData,
-    ActionDeactivateAgentData,
-    ActionDeactivateToolkitData,
-    Agents,
-    get_task_lock,
-)
-from app.service.task import set_process_task
-
-NOW_STR = datetime.datetime.now().strftime("%Y-%m-%d %H:00:00")
-
-
-class ListenChatAgent(ChatAgent):
-    def __init__(
-        self,
-        api_task_id: str,
-        agent_name: str,
-        system_message: BaseMessage | str | None = None,
-        model: (
-            BaseModelBackend
-            | ModelManager
-            | Tuple[str, str]
-            | str
-            | ModelType
-            | Tuple[ModelPlatformType, ModelType]
-            | List[BaseModelBackend]
-            | List[str]
-            | List[ModelType]
-            | List[Tuple[str, str]]
-            | List[Tuple[ModelPlatformType, ModelType]]
-            | None
-        ) = None,
-        memory: AgentMemory | None = None,
-        message_window_size: int | None = None,
-        token_limit: int | None = None,
-        output_language: str | None = None,
-        tools: List[FunctionTool | Callable[..., Any]] | None = None,
-        toolkits_to_register_agent: List[RegisteredAgentToolkit] | None = None,
-        external_tools: (List[FunctionTool | Callable[..., Any] | Dict[str, Any]] | None) = None,
-        response_terminators: List[ResponseTerminator] | None = None,
-        scheduling_strategy: str = "round_robin",
-        max_iteration: int | None = None,
-        agent_id: str | None = None,
-        stop_event: Event | None = None,
-        tool_execution_timeout: float | None = None,
-        mask_tool_output: bool = False,
-        pause_event: asyncio.Event | None = None,
-        prune_tool_calls_from_memory: bool = False,
-        enable_snapshot_clean: bool = False,
-        step_timeout: float | None = 1800,  # 30 minutes
-        **kwargs: Any,
-    ) -> None:
-        super().__init__(
-            system_message=system_message,
-            model=model,
-            memory=memory,
-            message_window_size=message_window_size,
-            token_limit=token_limit,
-            output_language=output_language,
-            tools=tools,
-            toolkits_to_register_agent=toolkits_to_register_agent,
-            external_tools=external_tools,
-            response_terminators=response_terminators,
-            scheduling_strategy=scheduling_strategy,
-            max_iteration=max_iteration,
-            agent_id=agent_id,
-            stop_event=stop_event,
-            tool_execution_timeout=tool_execution_timeout,
-            mask_tool_output=mask_tool_output,
-            pause_event=pause_event,
-            prune_tool_calls_from_memory=prune_tool_calls_from_memory,
-            enable_snapshot_clean=enable_snapshot_clean,
-            step_timeout=step_timeout,
-            **kwargs,
-        )
-        self.api_task_id = api_task_id
-        self.agent_name = agent_name
-
-    process_task_id: str = ""
-
-    def step(
-        self,
-        input_message: BaseMessage | str,
-        response_format: type[BaseModel] | None = None,
-    ) -> ChatAgentResponse | StreamingChatAgentResponse:
-        task_lock = get_task_lock(self.api_task_id)
-        asyncio.create_task(
-            task_lock.put_queue(
-                ActionActivateAgentData(
-                    data={
-                        "agent_name": self.agent_name,
-                        "process_task_id": self.process_task_id,
-                        "agent_id": self.agent_id,
-                        "message": (input_message.content if isinstance(input_message, BaseMessage) else input_message),
-                    },
-                )
-            )
-        )
-        error_info = None
-        message = None
-        res = None
-        logger.info(
-            f"Agent {self.agent_name} starting step with message: {input_message.content if isinstance(input_message, BaseMessage) else input_message}"
-        )
-        try:
-            res = super().step(input_message, response_format)
-        except ModelProcessingError as e:
-            res = None
-            error_info = e
-            if "Budget has been exceeded" in str(e):
-                message = "Budget has been exceeded"
-                logger.warning(f"Agent {self.agent_name} budget exceeded")
-                asyncio.create_task(task_lock.put_queue(ActionBudgetNotEnough()))
-            else:
-                message = str(e)
-                logger.error(f"Agent {self.agent_name} model processing error: {e}")
-            total_tokens = 0
-        except Exception as e:
-            res = None
-            error_info = e
-            logger.error(f"Agent {self.agent_name} unexpected error in step: {e}", exc_info=True)
-            message = f"Error processing message: {e!s}"
-            total_tokens = 0
-
-        if res is not None:
-            if isinstance(res, StreamingChatAgentResponse):
-
-                def _stream_with_deactivate():
-                    last_response: ChatAgentResponse | None = None
-                    # With stream_accumulate=False, we need to accumulate delta content
-                    accumulated_content = ""
-                    try:
-                        for chunk in res:
-                            last_response = chunk
-                            # Accumulate content from each chunk (delta mode)
-                            if chunk.msg and chunk.msg.content:
-                                accumulated_content += chunk.msg.content
-                            yield chunk
-                    finally:
-                        total_tokens = 0
-                        if last_response:
-                            usage_info = last_response.info.get("usage") or last_response.info.get("token_usage") or {}
-                            if usage_info:
-                                total_tokens = usage_info.get("total_tokens", 0)
-                        asyncio.create_task(
-                            task_lock.put_queue(
-                                ActionDeactivateAgentData(
-                                    data={
-                                        "agent_name": self.agent_name,
-                                        "process_task_id": self.process_task_id,
-                                        "agent_id": self.agent_id,
-                                        "message": accumulated_content,
-                                        "tokens": total_tokens,
-                                    },
-                                )
-                            )
-                        )
-
-                return StreamingChatAgentResponse(_stream_with_deactivate())
-
-            message = res.msg.content if res.msg else ""
-            usage_info = res.info.get("usage") or res.info.get("token_usage") or {}
-            total_tokens = usage_info.get("total_tokens", 0) if usage_info else 0
-            logger.info(f"Agent {self.agent_name} completed step, tokens used: {total_tokens}")
-
-        assert message is not None
-
-        asyncio.create_task(
-            task_lock.put_queue(
-                ActionDeactivateAgentData(
-                    data={
-                        "agent_name": self.agent_name,
-                        "process_task_id": self.process_task_id,
-                        "agent_id": self.agent_id,
-                        "message": message,
-                        "tokens": total_tokens,
-                    },
-                )
-            )
-        )
-
-        if error_info is not None:
-            raise error_info
-        assert res is not None
-        return res
-
-    async def astep(
-        self,
-        input_message: BaseMessage | str,
-        response_format: type[BaseModel] | None = None,
-    ) -> ChatAgentResponse | AsyncStreamingChatAgentResponse:
-        task_lock = get_task_lock(self.api_task_id)
-        await task_lock.put_queue(
-            ActionActivateAgentData(
-                action=Action.activate_agent,
-                data={
-                    "agent_name": self.agent_name,
-                    "process_task_id": self.process_task_id,
-                    "agent_id": self.agent_id,
-                    "message": (input_message.content if isinstance(input_message, BaseMessage) else input_message),
-                },
-            )
-        )
-
-        error_info = None
-        message = None
-        res = None
-        logger.debug(
-            f"Agent {self.agent_name} starting async step with message: {input_message.content if isinstance(input_message, BaseMessage) else input_message}"
-        )
-
-        try:
-            res = await super().astep(input_message, response_format)
-            if isinstance(res, AsyncStreamingChatAgentResponse):
-                res = await res._get_final_response()
-        except ModelProcessingError as e:
-            res = None
-            error_info = e
-            if "Budget has been exceeded" in str(e):
-                message = "Budget has been exceeded"
-                logger.warning(f"Agent {self.agent_name} budget exceeded")
-                asyncio.create_task(task_lock.put_queue(ActionBudgetNotEnough()))
-            else:
-                message = str(e)
-                logger.error(f"Agent {self.agent_name} model processing error: {e}")
-            total_tokens = 0
-        except Exception as e:
-            res = None
-            error_info = e
-            logger.error(f"Agent {self.agent_name} unexpected error in async step: {e}", exc_info=True)
-            message = f"Error processing message: {e!s}"
-            total_tokens = 0
-
-        if res is not None:
-            message = res.msg.content if res.msg else ""
-            total_tokens = res.info["usage"]["total_tokens"]
-            logger.info(f"Agent {self.agent_name} completed step, tokens used: {total_tokens}")
-
-        assert message is not None
-
-        asyncio.create_task(
-            task_lock.put_queue(
-                ActionDeactivateAgentData(
-                    data={
-                        "agent_name": self.agent_name,
-                        "process_task_id": self.process_task_id,
-                        "agent_id": self.agent_id,
-                        "message": message,
-                        "tokens": total_tokens,
-                    },
-                )
-            )
-        )
-
-        if error_info is not None:
-            raise error_info
-        assert res is not None
-        return res
-
-    def _execute_tool(self, tool_call_request: ToolCallRequest) -> ToolCallingRecord:
-        func_name = tool_call_request.tool_name
-        tool: FunctionTool = self._internal_tools[func_name]
-        # Route async functions to async execution even if they have __wrapped__
-        if asyncio.iscoroutinefunction(tool.func):
-            # For async functions, we need to use the async execution path
-            return asyncio.run(self._aexecute_tool(tool_call_request))
-
-        # Handle all sync tools ourselves to maintain ContextVar context
-        args = tool_call_request.args
-        tool_call_id = tool_call_request.tool_call_id
-
-        # Check if tool is wrapped by @listen_toolkit decorator
-        # If so, the decorator will handle activate/deactivate events
-        # TODO: Refactor - current marker detection is a workaround. The proper fix is to
-        # unify event sending: remove activate/deactivate from @listen_toolkit, only send here
-        has_listen_decorator = getattr(tool.func, "__listen_toolkit__", False)
-
-        try:
-            task_lock = get_task_lock(self.api_task_id)
-
-            toolkit_name = getattr(tool, "_toolkit_name") if hasattr(tool, "_toolkit_name") else "mcp_toolkit"
-            logger.debug(
-                f"Agent {self.agent_name} executing tool: {func_name} from toolkit: {toolkit_name} with args: {json.dumps(args, ensure_ascii=False)}"
-            )
-
-            # Only send activate event if tool is NOT wrapped by @listen_toolkit
-            if not has_listen_decorator:
-                asyncio.create_task(
-                    task_lock.put_queue(
-                        ActionActivateToolkitData(
-                            data={
-                                "agent_name": self.agent_name,
-                                "process_task_id": self.process_task_id,
-                                "toolkit_name": toolkit_name,
-                                "method_name": func_name,
-                                "message": json.dumps(args, ensure_ascii=False),
-                            },
-                        )
-                    )
-                )
-            # Set process_task context for all tool executions
-            with set_process_task(self.process_task_id):
-                raw_result = tool(**args)
-            logger.debug(f"Tool {func_name} executed successfully")
-            if self.mask_tool_output:
-                self._secure_result_store[tool_call_id] = raw_result
-                result = (
-                    "[The tool has been executed successfully, but the output"
-                    " from the tool is masked. You can move forward]"
-                )
-                mask_flag = True
-            else:
-                result = raw_result
-                mask_flag = False
-            # Prepare result message with truncation
-            if isinstance(result, str):
-                result_msg = result
-            else:
-                result_str = repr(result)
-                MAX_RESULT_LENGTH = 500
-                if len(result_str) > MAX_RESULT_LENGTH:
-                    result_msg = (
-                        result_str[:MAX_RESULT_LENGTH] + f"... (truncated, total length: {len(result_str)} chars)"
-                    )
-                else:
-                    result_msg = result_str
-
-            # Only send deactivate event if tool is NOT wrapped by @listen_toolkit
-            if not has_listen_decorator:
-                asyncio.create_task(
-                    task_lock.put_queue(
-                        ActionDeactivateToolkitData(
-                            data={
-                                "agent_name": self.agent_name,
-                                "process_task_id": self.process_task_id,
-                                "toolkit_name": toolkit_name,
-                                "method_name": func_name,
-                                "message": result_msg,
-                            },
-                        )
-                    )
-                )
-        except Exception as e:
-            # Capture the error message to prevent framework crash
-            error_msg = f"Error executing tool '{func_name}': {e!s}"
-            result = f"Tool execution failed: {error_msg}"
-            mask_flag = False
-            logger.error(f"Tool execution failed for {func_name}: {e}", exc_info=True)
-
-        return self._record_tool_calling(
-            func_name,
-            args,
-            result,
-            tool_call_id,
-            mask_output=mask_flag,
-            extra_content=tool_call_request.extra_content,
-        )
-
-    async def _aexecute_tool(self, tool_call_request: ToolCallRequest) -> ToolCallingRecord:
-        func_name = tool_call_request.tool_name
-        tool: FunctionTool = self._internal_tools[func_name]
-
-        # Always handle tool execution ourselves to maintain ContextVar context
-        args = tool_call_request.args
-        tool_call_id = tool_call_request.tool_call_id
-        task_lock = get_task_lock(self.api_task_id)
-
-        # Try to get the real toolkit name
-        toolkit_name = None
-
-        # Method 1: Check _toolkit_name attribute
-        if hasattr(tool, "_toolkit_name"):
-            toolkit_name = tool._toolkit_name
-
-        # Method 2: For MCP tools, check if func has __self__ (the toolkit instance)
-        if not toolkit_name and hasattr(tool, "func") and hasattr(tool.func, "__self__"):
-            toolkit_instance = tool.func.__self__
-            if hasattr(toolkit_instance, "toolkit_name") and callable(toolkit_instance.toolkit_name):
-                toolkit_name = toolkit_instance.toolkit_name()
-
-        # Method 3: Check if tool.func is a bound method with toolkit
-        if not toolkit_name and hasattr(tool, "func"):
-            if hasattr(tool.func, "func") and hasattr(tool.func.func, "__self__"):
-                toolkit_instance = tool.func.func.__self__
-                if hasattr(toolkit_instance, "toolkit_name") and callable(toolkit_instance.toolkit_name):
-                    toolkit_name = toolkit_instance.toolkit_name()
-
-        # Default fallback
-        if not toolkit_name:
-            toolkit_name = "mcp_toolkit"
-
-        logger.info(
-            f"Agent {self.agent_name} executing async tool: {func_name} from toolkit: {toolkit_name} with args: {json.dumps(args, ensure_ascii=False)}"
-        )
-
-        # Check if tool is wrapped by @listen_toolkit decorator
-        # If so, the decorator will handle activate/deactivate events
-        has_listen_decorator = getattr(tool.func, "__listen_toolkit__", False)
-
-        # Only send activate event if tool is NOT wrapped by @listen_toolkit
-        if not has_listen_decorator:
-            await task_lock.put_queue(
-                ActionActivateToolkitData(
-                    data={
-                        "agent_name": self.agent_name,
-                        "process_task_id": self.process_task_id,
-                        "toolkit_name": toolkit_name,
-                        "method_name": func_name,
-                        "message": json.dumps(args, ensure_ascii=False),
-                    },
-                )
-            )
-        try:
-            # Set process_task context for all tool executions
-            with set_process_task(self.process_task_id):
-                # Try different invocation paths in order of preference
-                if hasattr(tool, "func") and hasattr(tool.func, "async_call"):
-                    # Case: FunctionTool wrapping an MCP tool
-                    # Check if the wrapped tool is sync to avoid run_in_executor
-                    if hasattr(tool, "is_async") and not tool.is_async:
-                        # Sync tool: call directly to preserve ContextVar
-                        result = tool(**args)
-                        if asyncio.iscoroutine(result):
-                            result = await result
-                    else:
-                        # Async tool: use async_call
-                        result = await tool.func.async_call(**args)
-
-                elif hasattr(tool, "async_call") and callable(tool.async_call):
-                    # Case: tool itself has async_call
-                    # Check if this is a sync tool to avoid run_in_executor (which breaks ContextVar)
-                    if hasattr(tool, "is_async") and not tool.is_async:
-                        # Sync tool: call directly to preserve ContextVar in same thread
-                        result = tool(**args)
-                        # Handle case where synchronous call returns a coroutine
-                        if asyncio.iscoroutine(result):
-                            result = await result
-                    else:
-                        # Async tool: use async_call
-                        result = await tool.async_call(**args)
-
-                elif hasattr(tool, "func") and asyncio.iscoroutinefunction(tool.func):
-                    # Case: tool wraps a direct async function
-                    result = await tool.func(**args)
-
-                elif asyncio.iscoroutinefunction(tool):
-                    # Case: tool is itself a coroutine function
-                    result = await tool(**args)
-
-                else:
-                    # Fallback: synchronous call - call directly in current context
-                    # DO NOT use run_in_executor to preserve ContextVar
-                    result = tool(**args)
-                    # Handle case where synchronous call returns a coroutine
-                    if asyncio.iscoroutine(result):
-                        result = await result
-
-        except Exception as e:
-            # Capture the error message to prevent framework crash
-            error_msg = f"Error executing async tool '{func_name}': {e!s}"
-            result = {"error": error_msg}
-            logger.error(f"Async tool execution failed for {func_name}: {e}", exc_info=True)
-
-        # Prepare result message with truncation
-        if isinstance(result, str):
-            result_msg = result
-        else:
-            result_str = repr(result)
-            MAX_RESULT_LENGTH = 500
-            if len(result_str) > MAX_RESULT_LENGTH:
-                result_msg = result_str[:MAX_RESULT_LENGTH] + f"... (truncated, total length: {len(result_str)} chars)"
-            else:
-                result_msg = result_str
-
-        # Only send deactivate event if tool is NOT wrapped by @listen_toolkit
-        if not has_listen_decorator:
-            await task_lock.put_queue(
-                ActionDeactivateToolkitData(
-                    data={
-                        "agent_name": self.agent_name,
-                        "process_task_id": self.process_task_id,
-                        "toolkit_name": toolkit_name,
-                        "method_name": func_name,
-                        "message": result_msg,
-                    },
-                )
-            )
-        return self._record_tool_calling(
-            func_name,
-            args,
-            result,
-            tool_call_id,
-            extra_content=tool_call_request.extra_content,
-        )
-
-    def clone(self, with_memory: bool = False) -> ChatAgent:
-        """Please see super.clone()"""
-        system_message = None if with_memory else self._original_system_message
-
-        # Clone tools and collect toolkits that need registration
-        cloned_tools, toolkits_to_register = self._clone_tools()
-
-        new_agent = ListenChatAgent(
-            api_task_id=self.api_task_id,
-            agent_name=self.agent_name,
-            system_message=system_message,
-            model=self.model_backend.models,  # Pass the existing model_backend
-            memory=None,  # clone memory later
-            message_window_size=getattr(self.memory, "window_size", None),
-            token_limit=getattr(self.memory.get_context_creator(), "token_limit", None),
-            output_language=self._output_language,
-            tools=cloned_tools,
-            toolkits_to_register_agent=toolkits_to_register,
-            external_tools=[schema for schema in self._external_tool_schemas.values()],
-            response_terminators=self.response_terminators,
-            scheduling_strategy=self.model_backend.scheduling_strategy.__name__,
-            max_iteration=self.max_iteration,
-            stop_event=self.stop_event,
-            tool_execution_timeout=self.tool_execution_timeout,
-            mask_tool_output=self.mask_tool_output,
-            pause_event=self.pause_event,
-            prune_tool_calls_from_memory=self.prune_tool_calls_from_memory,
-            enable_snapshot_clean=self._enable_snapshot_clean,
-            step_timeout=self.step_timeout,
-            stream_accumulate=self.stream_accumulate,
-        )
-
-        new_agent.process_task_id = self.process_task_id
-
-        # Copy memory if requested
-        if with_memory:
-            # Get all records from the current memory
-            context_records = self.memory.retrieve()
-            # Write them to the new agent's memory
-            for context_record in context_records:
-                new_agent.memory.write_record(context_record.memory_record)
-
-        return new_agent
-
-
-def agent_model(
-    agent_name: str,
-    system_message: str | BaseMessage,
-    options: Chat,
-    tools: list[FunctionTool | Callable] | None = None,
-    prune_tool_calls_from_memory: bool = False,
-    tool_names: list[str] | None = None,
-    toolkits_to_register_agent: list[RegisteredAgentToolkit] | None = None,
-    enable_snapshot_clean: bool = False,
-    custom_model_config: AgentModelConfig | None = None,
-):
-    task_lock = get_task_lock(options.project_id)
-    agent_id = str(uuid.uuid4())
-    logger.info(f"Creating agent: {agent_name} with id: {agent_id} for project: {options.project_id}")
-    # Use thread-safe scheduling to support parallel agent creation
-    _schedule_async_task(
-        task_lock.put_queue(
-            ActionCreateAgentData(
-                data={
-                    "agent_name": agent_name,
-                    "agent_id": agent_id,
-                    "tools": tool_names or [],
-                }
-            )
-        )
-    )
-
-    # Determine model configuration - use custom config if provided, otherwise use task defaults
-    config_attrs = ["model_platform", "model_type", "api_key", "api_url"]
-    effective_config = {}
-
-    if custom_model_config and custom_model_config.has_custom_config():
-        for attr in config_attrs:
-            effective_config[attr] = getattr(custom_model_config, attr, None) or getattr(options, attr)
-        extra_params = custom_model_config.extra_params or options.extra_params or {}
-        logger.info(
-            f"Agent {agent_name} using custom model config: platform={effective_config['model_platform']}, type={effective_config['model_type']}"
-        )
-    else:
-        for attr in config_attrs:
-            effective_config[attr] = getattr(options, attr)
-        extra_params = options.extra_params or {}
-    init_param_keys = {
-        "api_version",
-        "azure_ad_token",
-        "azure_ad_token_provider",
-        "max_retries",
-        "timeout",
-        "client",
-        "async_client",
-        "azure_deployment_name",
-    }
-
-    init_params = {}
-    model_config: dict[str, Any] = {}
-
-    if options.is_cloud():
-        model_config["user"] = str(options.project_id)
-
-    excluded_keys = {"model_platform", "model_type", "api_key", "url"}
-
-    # Distribute extra_params between init_params and model_config
-    for k, v in extra_params.items():
-        if k in excluded_keys:
-            continue
-        # Skip empty values
-        if v is None or (isinstance(v, str) and not v.strip()):
-            continue
-
-        if k in init_param_keys:
-            init_params[k] = v
-        else:
-            model_config[k] = v
-
-    if agent_name == Agents.task_agent:
-        model_config["stream"] = True
-    if agent_name == Agents.browser_agent:
-        try:
-            model_platform_enum = ModelPlatformType(effective_config["model_platform"].lower())
-            if model_platform_enum in {
-                ModelPlatformType.OPENAI,
-                ModelPlatformType.AZURE,
-                ModelPlatformType.OPENAI_COMPATIBLE_MODEL,
-                ModelPlatformType.LITELLM,
-                ModelPlatformType.OPENROUTER,
-            }:
-                model_config["parallel_tool_calls"] = False
-        except (ValueError, AttributeError):
-            logging.error(
-                f"Invalid model platform for browser agent: {effective_config['model_platform']}",
-                exc_info=True,
-            )
-            model_platform_enum = None
-
-    model = ModelFactory.create(
-        model_platform=effective_config["model_platform"],
-        model_type=effective_config["model_type"],
-        api_key=effective_config["api_key"],
-        url=effective_config["api_url"],
-        model_config_dict=model_config or None,
-        timeout=600,  # 10 minutes
-        **init_params,
-    )
-
-    return ListenChatAgent(
-        options.project_id,
-        agent_name,
-        system_message,
-        model=model,
-        tools=tools,
-        agent_id=agent_id,
-        prune_tool_calls_from_memory=prune_tool_calls_from_memory,
-        toolkits_to_register_agent=toolkits_to_register_agent,
-        enable_snapshot_clean=enable_snapshot_clean,
-        stream_accumulate=False,
-    )
-
-
-def question_confirm_agent(options: Chat):
-    return agent_model(
-        "question_confirm_agent",
-        f"You are a highly capable agent. Your primary function is to analyze a user's request and determine the appropriate course of action. The current date is {NOW_STR}(Accurate to the hour). For any date-related tasks, you MUST use this as the current date.",
-        options,
-    )
-
-
-def task_summary_agent(options: Chat):
-    return agent_model(
-        "task_summary_agent",
-        "You are a helpful task assistant that can help users summarize the content of their tasks",
-        options,
-    )
-
-
-async def developer_agent(options: Chat):
-    working_directory = get_working_directory(options)
-    logger.info(f"Creating developer agent for project: {options.project_id} in directory: {working_directory}")
-    message_integration = ToolkitMessageIntegration(
-        message_handler=HumanToolkit(options.project_id, Agents.developer_agent).send_message_to_user
-    )
-    note_toolkit = NoteTakingToolkit(
-        api_task_id=options.project_id,
-        agent_name=Agents.developer_agent,
-        working_directory=working_directory,
-    )
-    note_toolkit = message_integration.register_toolkits(note_toolkit)
-    web_deploy_toolkit = WebDeployToolkit(api_task_id=options.project_id)
-    web_deploy_toolkit = message_integration.register_toolkits(web_deploy_toolkit)
-    screenshot_toolkit = ScreenshotToolkit(options.project_id, working_directory=working_directory)
-    screenshot_toolkit = message_integration.register_toolkits(screenshot_toolkit)
-
-    terminal_toolkit = TerminalToolkit(
-        options.project_id,
-        Agents.developer_agent,
-        working_directory=working_directory,
-        safe_mode=True,
-        clone_current_env=True,
-    )
-    terminal_toolkit = message_integration.register_toolkits(terminal_toolkit)
-
-    tools = [
-        *HumanToolkit.get_can_use_tools(options.project_id, Agents.developer_agent),
-        *note_toolkit.get_tools(),
-        *web_deploy_toolkit.get_tools(),
-        *terminal_toolkit.get_tools(),
-        *screenshot_toolkit.get_tools(),
-    ]
-    system_message = f"""
-<role>
-You are a Lead Software Engineer, a master-level coding assistant with a
-powerful and unrestricted terminal. Your primary role is to solve any
-technical task by writing and executing code, installing necessary libraries,
-interacting with the operating system, and deploying applications. You are the
-team's go-to expert for all technical implementation.
-</role>
-
-<team_structure>
-You collaborate with the following agents who can work in parallel:
-- **Senior Research Analyst**: Gathers information from the web to support
-your development tasks.
-- **Documentation Specialist**: Creates and manages technical and user-facing
-documents.
-- **Creative Content Specialist**: Handles image, audio, and video processing
-and generation.
-</team_structure>
-
-<operating_environment>
-- **System**: {platform.system()} ({platform.machine()})
-- **Working Directory**: `{working_directory}`. All local file operations must
-occur here, but you can access files from any place in the file system. For all file system operations, you MUST use absolute paths to ensure precision and avoid ambiguity.
-The current date is {NOW_STR}(Accurate to the hour). For any date-related tasks, you MUST use this as the current date.
-</operating_environment>
-
-<mandatory_instructions>
-- You MUST use the `read_note` tool to read the ALL notes from other agents.
-
-You SHOULD keep the user informed by providing message_title and message_description
-    parameters when calling tools. These optional parameters are available on all tools
-    and will automatically notify the user of your progress.
-
-- When you complete your task, your final response must be a comprehensive
-summary of your work and the outcome, presented in a clear, detailed, and
-easy-to-read format. Avoid using markdown tables for presenting data; use
-plain text formatting instead.
-</mandatory_instructions>
-
-<capabilities>
-Your capabilities are extensive and powerful:
-- **Unrestricted Code Execution**: You can write and execute code in any
-  language to solve a task. You MUST first save your code to a file (e.g.,
-  `script.py`) and then run it from the terminal (e.g.,
-  `python script.py`).
-- **Full Terminal Control**: You have root-level access to the terminal. You
-  can run any command-line tool, manage files, and interact with the OS. If
-  a tool is missing, you MUST install it with the appropriate package manager
-  (e.g., `pip3`, `uv`, or `apt-get`). Your capabilities include:
-    - **Text & Data Processing**: `awk`, `sed`, `grep`, `jq`.
-    - **File System & Execution**: `find`, `xargs`, `tar`, `zip`, `unzip`,
-      `chmod`.
-    - **Networking & Web**: `curl`, `wget` for web requests; `ssh` for
-      remote access.
-- **Screen Observation**: You can take screenshots to analyze GUIs and visual
-  context, enabling you to perform tasks that require sight.
-- **Desktop Automation**: You can control desktop applications
-  programmatically.
-  - **On macOS**, you MUST prioritize using **AppleScript** for its robust
-    control over native applications. Execute simple commands with
-    `osascript -e '...'` or run complex scripts from a `.scpt` file.
-  - **On other systems**, use **pyautogui** for cross-platform GUI
-    automation.
-  - **IMPORTANT**: Always complete the full automation workflow—do not just
-    prepare or suggest actions. Execute them to completion.
-- **Solution Verification**: You can immediately test and verify your
-  solutions by executing them in the terminal.
-- **Web Deployment**: You can deploy web applications and content, serve
-  files, and manage deployments.
-- **Human Collaboration**: If you are stuck or need clarification, you can
-  ask for human input via the console.
-- **Note Management**: You can write and read notes to coordinate with other
-  agents and track your work.
-</capabilities>
-
-<philosophy>
-- **Bias for Action**: Your purpose is to take action. Don't just suggest
-solutions—implement them. Write code, run commands, and build things.
-- **Complete the Full Task**: When automating GUI applications, always finish
-what you start. If the task involves sending something, send it. If it
-involves submitting data, submit it. Never stop at just preparing or
-drafting—execute the complete workflow to achieve the desired outcome.
-- **Embrace Challenges**: Never say "I can't." If you
-encounter a limitation, find a way to overcome it.
-- **Resourcefulness**: If a tool is missing, install it. If information is
-lacking, find it. You have the full power of a terminal to acquire any
-resource you need.
-- **Think Like an Engineer**: Approach problems methodically. Analyze
-requirements, execute it, and verify the results. Your
-strength lies in your ability to engineer solutions.
-</philosophy>
-
-<terminal_tips>
-The terminal tools are session-based, identified by a unique `id`. Master
-these tips to maximize your effectiveness:
-
-- **GUI Automation Strategy**:
-  - **AppleScript (macOS Priority)**: For robust control of macOS apps, use
-    `osascript`.
-    - Example (open Slack):
-      `osascript -e 'tell application "Slack" to activate'`
-    - Example (run script file): `osascript my_script.scpt`
-  - **pyautogui (Cross-Platform)**: For other OSes or simple automation.
-    - Key functions: `pyautogui.click(x, y)`, `pyautogui.typewrite("text")`,
-      `pyautogui.hotkey('ctrl', 'c')`, `pyautogui.press('enter')`.
-    - Safety: Always use `time.sleep()` between actions to ensure stability
-      and add `pyautogui.FAILSAFE = True` to your scripts.
-    - Workflow: Your scripts MUST complete the entire task, from start to
-      final submission.
-
-- **Command-Line Best Practices**:
-  - **Be Creative**: The terminal is your most powerful tool. Use it boldly.
-  - **Automate Confirmation**: Use `-y` or `-f` flags to avoid interactive
-    prompts.
-  - **Manage Output**: Redirect long outputs to a file (e.g., `> output.txt`).
-  - **Chain Commands**: Use `&&` to link commands for sequential execution.
-  - **Piping**: Use `|` to pass output from one command to another.
-  - **Permissions**: Use `ls -F` to check file permissions.
-  - **Installation**: Use `pip3 install` or `apt-get install` for new
-    packages.If you encounter `ModuleNotFoundError` or `ImportError`, install
-    the missing package with `pip install <package>`.
-
-- Stop a Process: If a process needs to be terminated, use
-    `shell_kill_process(id="...")`.
-</terminal_tips>
-
-<collaboration_and_assistance>
-- If you get stuck, encounter an issue you cannot solve (like a CAPTCHA),
-    or need clarification, use the `ask_human_via_console` tool.
-- Document your progress and findings in notes so other agents can build
-    upon your work.
-</collaboration_and_assistance>
-"""
-
-    return agent_model(
-        Agents.developer_agent,
-        BaseMessage.make_assistant_message(
-            role_name="Developer Agent",
-            content=system_message,
-        ),
-        options,
-        tools,
-        tool_names=[
-            HumanToolkit.toolkit_name(),
-            TerminalToolkit.toolkit_name(),
-            NoteTakingToolkit.toolkit_name(),
-            WebDeployToolkit.toolkit_name(),
-        ],
-    )
-
-
-def browser_agent(options: Chat):
-    working_directory = get_working_directory(options)
-    logger.info(f"Creating browser agent for project: {options.project_id} in directory: {working_directory}")
-    message_integration = ToolkitMessageIntegration(
-        message_handler=HumanToolkit(options.project_id, Agents.browser_agent).send_message_to_user
-    )
-
-    web_toolkit_custom = HybridBrowserToolkit(
-        options.project_id,
-        headless=False,
-        browser_log_to_file=True,
-        stealth=True,
-        session_id=str(uuid.uuid4())[:8],
-        default_start_url="about:blank",
-        cdp_url=f"http://localhost:{env('browser_port', '9222')}",
-        enabled_tools=[
-            "browser_click",
-            "browser_type",
-            "browser_back",
-            "browser_forward",
-            "browser_select",
-            "browser_console_exec",
-            "browser_console_view",
-            "browser_switch_tab",
-            "browser_enter",
-            "browser_visit_page",
-            "browser_scroll",
-            "browser_sheet_read",
-            "browser_sheet_input",
-            "browser_get_page_snapshot",
-        ],
-    )
-
-    # Save reference before registering for toolkits_to_register_agent
-    web_toolkit_for_agent_registration = web_toolkit_custom
-    web_toolkit_custom = message_integration.register_toolkits(web_toolkit_custom)
-
-    terminal_toolkit = TerminalToolkit(
-        options.project_id,
-        Agents.browser_agent,
-        working_directory=working_directory,
-        safe_mode=True,
-        clone_current_env=True,
-    )
-    terminal_toolkit = message_integration.register_functions([terminal_toolkit.shell_exec])
-
-    note_toolkit = NoteTakingToolkit(options.project_id, Agents.browser_agent, working_directory=working_directory)
-    note_toolkit = message_integration.register_toolkits(note_toolkit)
-
-    search_tools = SearchToolkit.get_can_use_tools(options.project_id)
-    if search_tools:
-        search_tools = message_integration.register_functions(search_tools)
-    else:
-        search_tools = []
-
-    tools = [
-        *HumanToolkit.get_can_use_tools(options.project_id, Agents.browser_agent),
-        *web_toolkit_custom.get_tools(),
-        *terminal_toolkit,
-        *note_toolkit.get_tools(),
-        *search_tools,
-    ]
-
-    system_message = f"""
-<role>
-You are a Senior Research Analyst, a key member of a multi-agent team. Your
-primary responsibility is to conduct expert-level web research to gather,
-analyze, and document information required to solve the user's task. You
-operate with precision, efficiency, and a commitment to data quality.
-You must use the search/browser tools to get the information you need.
-</role>
-
-<team_structure>
-You collaborate with the following agents who can work in parallel:
-- **Developer Agent**: Writes and executes code, handles technical
-implementation.
-- **Document Agent**: Creates and manages documents and presentations.
-- **Multi-Modal Agent**: Processes and generates images and audio.
-Your research is the foundation of the team's work. Provide them with
-comprehensive and well-documented information.
-</team_structure>
-
-<operating_environment>
-- **System**: {platform.system()} ({platform.machine()})
-- **Working Directory**: `{working_directory}`. All local file operations must
-occur here, but you can access files from any place in the file system. For all file system operations, you MUST use absolute paths to ensure precision and avoid ambiguity.
-The current date is {NOW_STR}(Accurate to the hour). For any date-related tasks, you MUST use this as the current date.
-</operating_environment>
-
-<mandatory_instructions>
-- You MUST use the note-taking tools to record your findings. This is a
-    critical part of your role. Your notes are the primary source of
-    information for your teammates. To avoid information loss, you must not
-    summarize your findings. Instead, record all information in detail.
-    For every piece of information you gather, you must:
-    1.  **Extract ALL relevant details**: Quote all important sentences,
-        statistics, or data points. Your goal is to capture the information
-        as completely as possible.
-    2.  **Cite your source**: Include the exact URL where you found the
-        information.
-    Your notes should be a detailed and complete record of the information
-    you have discovered. High-quality, detailed notes are essential for the
-    team's success.
-
-- **CRITICAL URL POLICY**: You are STRICTLY FORBIDDEN from inventing,
-    guessing, or constructing URLs yourself. You MUST only use URLs from
-    trusted sources:
-    1. URLs returned by search tools (`search_google`)
-    2. URLs found on webpages you have visited through browser tools
-    3. URLs provided by the user in their request
-    Fabricating or guessing URLs is considered a critical error and must
-    never be done under any circumstances.
-
-- You SHOULD keep the user informed by providing message_title and
-    message_description
-    parameters when calling tools. These optional parameters are available on
-    all tools and will automatically notify the user of your progress.
-
-- You MUST NOT answer from your own knowledge. All information
-    MUST be sourced from the web using the available tools. If you don't know
-    something, find it out using your tools.
-
-- When you complete your task, your final response must be a comprehensive
-    summary of your findings, presented in a clear, detailed, and
-    easy-to-read format. Avoid using markdown tables for presenting data;
-    use plain text formatting instead.
-<mandatory_instructions>
-
-<capabilities>
-Your capabilities include:
-- Search and get information from the web using the search tools.
-- Use the rich browser related toolset to investigate websites.
-- Use the terminal tools to perform local operations. You can leverage
-    powerful CLI tools like `grep` for searching within files, `curl` and
-    `wget` for downloading content, and `jq` for parsing JSON data from APIs.
-- Use the note-taking tools to record your findings.
-- Use the human toolkit to ask for help when you are stuck.
-</capabilities>
-
-<web_search_workflow>
-Your approach depends on available search tools:
-
-**If Google Search is Available:**
-- Initial Search: Start with `search_google` to get a list of relevant URLs
-- Browser-Based Exploration: Use the browser tools to investigate the URLs
-
-**If Google Search is NOT Available:**
-- **MUST start with direct website search**: Use `browser_visit_page` to go
-  directly to popular search engines and informational websites such as:
-  * General search: google.com, bing.com, duckduckgo.com
-  * Academic: scholar.google.com, pubmed.ncbi.nlm.nih.gov
-  * News: news.google.com, bbc.com/news, reuters.com
-  * Technical: stackoverflow.com, github.com
-  * Reference: wikipedia.org, britannica.com
-- **Manual search process**: Type your query into the search boxes on these
-  sites using `browser_type` and submit with `browser_enter`
-- **Extract URLs from results**: Only use URLs that appear in the search
-  results on these websites
-
-**Common Browser Operations (both scenarios):**
-- **Navigation and Exploration**: Use `browser_visit_page` to open URLs.
-    `browser_visit_page` provides a snapshot of currently visible
-    interactive elements, not the full page text. To see more content on
-    long pages, Navigate with `browser_click`, `browser_back`, and
-    `browser_forward`. Manage multiple pages with `browser_switch_tab`.
-- **Interaction**: Use `browser_type` to fill out forms and
-    `browser_enter` to submit or confirm search.
-
-- In your response, you should mention the URLs you have visited and processed.
-
-- When encountering verification challenges (like login, CAPTCHAs or
-    robot checks), you MUST request help using the human toolkit.
-</web_search_workflow>
-    """
-
-    return agent_model(
-        Agents.browser_agent,
-        BaseMessage.make_assistant_message(
-            role_name="Browser Agent",
-            content=system_message,
-        ),
-        options,
-        tools,
-        prune_tool_calls_from_memory=True,
-        tool_names=[
-            SearchToolkit.toolkit_name(),
-            HybridBrowserToolkit.toolkit_name(),
-            HumanToolkit.toolkit_name(),
-            NoteTakingToolkit.toolkit_name(),
-            TerminalToolkit.toolkit_name(),
-        ],
-        toolkits_to_register_agent=[web_toolkit_for_agent_registration],
-        enable_snapshot_clean=True,
-    )
-
-
-async def document_agent(options: Chat):
-    working_directory = get_working_directory(options)
-    logger.info(f"Creating document agent for project: {options.project_id} in directory: {working_directory}")
-
-    message_integration = ToolkitMessageIntegration(
-        message_handler=HumanToolkit(options.project_id, Agents.task_agent).send_message_to_user
-    )
-    file_write_toolkit = FileToolkit(options.project_id, working_directory=working_directory)
-    pptx_toolkit = PPTXToolkit(options.project_id, working_directory=working_directory)
-    pptx_toolkit = message_integration.register_toolkits(pptx_toolkit)
-    mark_it_down_toolkit = MarkItDownToolkit(options.project_id)
-    mark_it_down_toolkit = message_integration.register_toolkits(mark_it_down_toolkit)
-    excel_toolkit = ExcelToolkit(options.project_id, working_directory=working_directory)
-    excel_toolkit = message_integration.register_toolkits(excel_toolkit)
-    note_toolkit = NoteTakingToolkit(options.project_id, Agents.document_agent, working_directory=working_directory)
-    note_toolkit = message_integration.register_toolkits(note_toolkit)
-
-    terminal_toolkit = TerminalToolkit(
-        options.project_id,
-        Agents.document_agent,
-        working_directory=working_directory,
-        safe_mode=True,
-        clone_current_env=True,
-    )
-    terminal_toolkit = message_integration.register_toolkits(terminal_toolkit)
-
-    google_drive_tools = await GoogleDriveMCPToolkit.get_can_use_tools(options.project_id, options.get_bun_env())
-
-    tools = [
-        *file_write_toolkit.get_tools(),
-        *pptx_toolkit.get_tools(),
-        *HumanToolkit.get_can_use_tools(options.project_id, Agents.document_agent),
-        *mark_it_down_toolkit.get_tools(),
-        *excel_toolkit.get_tools(),
-        *note_toolkit.get_tools(),
-        *terminal_toolkit.get_tools(),
-        *google_drive_tools,
-    ]
-    # if env("EXA_API_KEY") or options.is_cloud():
-    #     search_toolkit = SearchToolkit(options.project_id, Agents.document_agent).search_exa
-    #     search_toolkit = message_integration.register_functions([search_toolkit])
-    #     tools.extend(search_toolkit)
-    system_message = f"""
-<role>
-You are a Documentation Specialist, responsible for creating, modifying, and
-managing a wide range of documents. Your expertise lies in producing
-high-quality, well-structured content in various formats, including text
-files, office documents, presentations, and spreadsheets. You are the team's
-authority on all things related to documentation.
-</role>
-
-<team_structure>
-You collaborate with the following agents who can work in parallel:
-- **Lead Software Engineer**: Provides technical details and code examples for
-documentation.
-- **Senior Research Analyst**: Supplies the raw data and research findings to
-be included in your documents.
-- **Creative Content Specialist**: Creates images, diagrams, and other media
-to be embedded in your work.
-</team_structure>
-
-<operating_environment>
-- **System**: {platform.system()} ({platform.machine()})
-- **Working Directory**: `{working_directory}`. All local file operations must
-occur here, but you can access files from any place in the file system. For all file system operations, you MUST use absolute paths to ensure precision and avoid ambiguity.
-The current date is {NOW_STR}(Accurate to the hour). For any date-related tasks, you MUST use this as the current date.
-</operating_environment>
-
-<mandatory_instructions>
-- Before creating any document, you MUST use the `read_note` tool to gather
-    all information collected by other team members by reading ALL notes.
-
-- You MUST use the available tools to create or modify documents (e.g.,
-    `write_to_file`, `create_presentation`). Your primary output should be
-    a file, not just content within your response.
-
-- If there's no specified format for the document/report/paper, you should use
-    the `write_to_file` tool to create a HTML file.
-
-- If the document has many data, you MUST use the terminal tool to
-    generate charts and graphs and add them to the document.
-
-- When you complete your task, your final response must be a summary of
-    your work and the path to the final document, presented in a clear,
-    detailed, and easy-to-read format. Avoid using markdown tables for
-    presenting data; use plain text formatting instead.
-
-- You SHOULD keep the user informed by providing message_title and
-    message_description
-    parameters when calling tools. These optional parameters are available on
-    all tools and will automatically notify the user of your progress.
-</mandatory_instructions>
-
-<capabilities>
-Your capabilities include:
-- Document Reading:
-    - Read and understand the content of various file formats including
-        - PDF (.pdf)
-        - Microsoft Office: Word (.doc, .docx), Excel (.xls, .xlsx),
-          PowerPoint (.ppt, .pptx)
-        - EPUB (.epub)
-        - HTML (.html, .htm)
-        - Images (.jpg, .jpeg, .png) for OCR
-        - Audio (.mp3, .wav) for transcription
-        - Text-based formats (.csv, .json, .xml, .txt)
-        - ZIP archives (.zip) using the `read_files` tool.
-
-- Document Creation & Editing:
-    - Create and write to various file formats including Markdown (.md),
-    Word documents (.docx), PDFs, CSV files, JSON, YAML, and HTML using
-    UTF-8 encoding for default.
-    - Apply formatting options including custom encoding, font styles, and
-    layout settings
-    - Modify existing files with automatic backup functionality
-    - Support for mathematical expressions in PDF documents through LaTeX
-    rendering
-
-- PowerPoint Presentation Creation:
-    - Create professional PowerPoint presentations with title slides and
-    content slides
-    - Format text with bold and italic styling
-    - Create bullet point lists with proper hierarchical structure
-    - Support for step-by-step process slides with visual indicators
-    - Create tables with headers and rows of data
-    - Support for custom templates and slide layouts
-    - IMPORTANT: The `create_presentation` tool requires content to be a JSON
-    string, not plain text. You must format your content as a JSON array of
-    slide objects, then use `json.dumps()` to convert it to a string. Example:
-      ```python
-      import json
-      slides = [
-          {{"title": "Main Title", "subtitle": "Subtitle"}},
-          {{"heading": "Slide Title", "bullet_points": ["Point 1", "Point 2"]}},
-          {{"heading": "Data", "table": {{"headers": ["Col1", "Col2"], "rows": [["A", "B"]]}}}}
-      ]
-      content_json = json.dumps(slides)
-      create_presentation(content=content_json, filename="presentation.pptx")
-      ```
-
-- Excel Spreadsheet Management:
-    - Extract and analyze content from Excel files (.xlsx, .xls, .csv)
-    with detailed cell information and markdown formatting
-    - Create new Excel workbooks from scratch with multiple sheets
-    - Perform comprehensive spreadsheet operations including:
-        * Sheet creation, deletion, and data clearing
-        * Cell-level operations (read, write, find specific values)
-        * Row and column manipulation (add, update, delete)
-        * Range operations for bulk data processing
-        * Data export to CSV format for compatibility
-    - Handle complex data structures with proper formatting and validation
-    - Support for both programmatic data entry and manual cell updates
-
-- Terminal and File System:
-    - You have access to a full suite of terminal tools to interact with
-    the file system within your working directory (`{working_directory}`).
-    - You can execute shell commands (`shell_exec`), list files, and manage
-    your workspace as needed to support your document creation tasks. To
-    process and manipulate text and data for your documents, you can use
-    powerful CLI tools like `awk`, `sed`, `grep`, and `jq`. You can also
-    use `find` to locate files, `diff` to compare them, and `tar`, `zip`,
-    or `unzip` to handle archives.
-    - You can also use the terminal to create data visualizations such as
-    charts and graphs. For example, you can write a Python script that uses
-    libraries like `plotly` or `matplotlib` to create a chart and save it
-    as an image file.
-
-- Human Interaction:
-    - Ask questions to users and receive their responses
-    - Send informative messages to users without requiring responses
-</capabilities>
-
-<document_creation_workflow>
-When working with documents, you should:
-- Suggest appropriate file formats based on content requirements
-- Maintain proper formatting and structure in all created documents
-- Provide clear feedback about document creation and modification processes
-- Ask clarifying questions when user requirements are ambiguous
-- Recommend best practices for document organization and presentation
-- For PowerPoint presentations, ALWAYS convert your slide content to JSON
-  format before calling `create_presentation`. Never pass plain text or
-  instructions - only properly formatted JSON strings as shown in the
-  capabilities section
-- For Excel files, always provide clear data structure and organization
-- When creating spreadsheets, consider data relationships and use
-appropriate sheet naming conventions
-- To include data visualizations, write and execute Python scripts using
-  the terminal. Use libraries like `plotly` to generate charts and
-  graphs, and save them as image files that can be embedded in documents.
-</document_creation_workflow>
-
-Your goal is to help users efficiently create, modify, and manage their
-documents with professional quality and appropriate formatting across all
-supported formats including advanced spreadsheet functionality.
-"""
-
-    return agent_model(
-        Agents.document_agent,
-        BaseMessage.make_assistant_message(
-            role_name="Document Agent",
-            content=system_message,
-        ),
-        options,
-        tools,
-        tool_names=[
-            FileToolkit.toolkit_name(),
-            PPTXToolkit.toolkit_name(),
-            HumanToolkit.toolkit_name(),
-            MarkItDownToolkit.toolkit_name(),
-            ExcelToolkit.toolkit_name(),
-            NoteTakingToolkit.toolkit_name(),
-            TerminalToolkit.toolkit_name(),
-            GoogleDriveMCPToolkit.toolkit_name(),
-        ],
-    )
-
-
-def multi_modal_agent(options: Chat):
-    working_directory = get_working_directory(options)
-    logger.info(f"Creating multi-modal agent for project: {options.project_id} in directory: {working_directory}")
-
-    message_integration = ToolkitMessageIntegration(
-        message_handler=HumanToolkit(options.project_id, Agents.multi_modal_agent).send_message_to_user
-    )
-    video_download_toolkit = VideoDownloaderToolkit(options.project_id, working_directory=working_directory)
-    video_download_toolkit = message_integration.register_toolkits(video_download_toolkit)
-    image_analysis_toolkit = ImageAnalysisToolkit(options.project_id)
-    image_analysis_toolkit = message_integration.register_toolkits(image_analysis_toolkit)
-
-    terminal_toolkit = TerminalToolkit(
-        options.project_id,
-        agent_name=Agents.multi_modal_agent,
-        working_directory=working_directory,
-        safe_mode=True,
-        clone_current_env=True,
-    )
-    terminal_toolkit = message_integration.register_toolkits(terminal_toolkit)
-
-    note_toolkit = NoteTakingToolkit(
-        options.project_id,
-        Agents.multi_modal_agent,
-        working_directory=working_directory,
-    )
-    note_toolkit = message_integration.register_toolkits(note_toolkit)
-    tools = [
-        *video_download_toolkit.get_tools(),
-        *image_analysis_toolkit.get_tools(),
-        *HumanToolkit.get_can_use_tools(options.project_id, Agents.multi_modal_agent),
-        *terminal_toolkit.get_tools(),
-        *note_toolkit.get_tools(),
-    ]
-    if options.is_cloud():
-        open_ai_image_toolkit = OpenAIImageToolkit(  # todo check llm has this model
-            options.project_id,
-            model="dall-e-3",
-            response_format="b64_json",
-            size="1024x1024",
-            quality="standard",
-            working_directory=working_directory,
-            api_key=options.api_key,
-            url=options.api_url,
-        )
-        open_ai_image_toolkit = message_integration.register_toolkits(open_ai_image_toolkit)
-        tools = [
-            *tools,
-            *open_ai_image_toolkit.get_tools(),
-        ]
-    # Convert string model_platform to enum for comparison
-    try:
-        model_platform_enum = ModelPlatformType(options.model_platform.lower())
-    except (ValueError, AttributeError):
-        model_platform_enum = None
-
-    if model_platform_enum == ModelPlatformType.OPENAI:
-        audio_analysis_toolkit = AudioAnalysisToolkit(
-            options.project_id,
-            working_directory,
-            OpenAIAudioModels(
-                api_key=options.api_key,
-                url=options.api_url,
-            ),
-        )
-        audio_analysis_toolkit = message_integration.register_toolkits(audio_analysis_toolkit)
-        tools.extend(audio_analysis_toolkit.get_tools())
-
-    # if env("EXA_API_KEY") or options.is_cloud():
-    #     search_toolkit = SearchToolkit(options.project_id, Agents.multi_modal_agent).search_exa
-    #     search_toolkit = message_integration.register_functions([search_toolkit])
-    #     tools.extend(search_toolkit)
-
-    system_message = f"""
-<role>
-You are a Creative Content Specialist, specializing in analyzing and
-generating various types of media content. Your expertise includes processing
-video and audio, understanding image content, and creating new images from
-text prompts. You are the team's expert for all multi-modal tasks.
-</role>
-
-<team_structure>
-You collaborate with the following agents who can work in parallel:
-- **Lead Software Engineer**: Integrates your generated media into
-applications and websites.
-- **Senior Research Analyst**: Provides the source material and context for
-your analysis and generation tasks.
-- **Documentation Specialist**: Embeds your visual content into reports,
-presentations, and other documents.
-</team_structure>
-
-<operating_environment>
-- **System**: {platform.system()} ({platform.machine()})
-- **Working Directory**: `{working_directory}`. All local file operations must
-occur here, but you can access files from any place in the file system. For all file system operations, you MUST use absolute paths to ensure precision and avoid ambiguity.
-The current date is {NOW_STR}(Accurate to the hour). For any date-related tasks, you MUST use this as the current date.
-</operating_environment>
-
-<mandatory_instructions>
-- You MUST use the `read_note` tool to to gather all information collected
-    by other team members by reading ALL notes and write down your findings in
-    the notes.
-
-- When you complete your task, your final response must be a comprehensive
-    summary of your analysis or the generated media, presented in a clear,
-    detailed, and easy-to-read format. Avoid using markdown tables for
-    presenting data; use plain text formatting instead.
-
-- You SHOULD keep the user informed by providing message_title and
-    message_description
-    parameters when calling tools. These optional parameters are available on
-    all tools and will automatically notify the user of your progress.
-<mandatory_instructions>
-
-<capabilities>
-Your capabilities include:
-- Video & Audio Analysis:
-    - Download videos from URLs for analysis.
-    - Transcribe speech from audio files to text with high accuracy
-    - Answer specific questions about audio content
-    - Process audio from both local files and URLs
-    - Handle various audio formats including MP3, WAV, and OGG
-
-- Image Analysis & Understanding:
-    - Generate detailed descriptions of image content
-    - Answer specific questions about images
-    - Identify objects, text, people, and scenes in images
-    - Process images from both local files and URLs
-
-- Image Generation:
-    - Create high-quality images based on detailed text prompts using DALL-E
-    - Generate images in 1024x1792 resolution
-    - Save generated images to specified directories
-
-- Terminal and File System:
-    - You have access to terminal tools to manage media files. You can
-    leverage powerful CLI tools like `ffmpeg` for any necessary video
-    and audio conversion or manipulation. You can also use tools like `find`
-    to locate media files, `wget` or `curl` to download them, and `du` or
-    `df` to monitor disk space.
-
-- Human Interaction:
-    - Ask questions to users and receive their responses
-    - Send informative messages to users without requiring responses
-
-</capabilities>
-
-<multi_modal_processing_workflow>
-When working with multi-modal content, you should:
-- Provide detailed and accurate descriptions of media content
-- Extract relevant information based on user queries
-- Generate appropriate media when requested
-- Explain your analysis process and reasoning
-- Ask clarifying questions when user requirements are ambiguous
-</multi_modal_processing_workflow>
-
-Your goal is to help users effectively process, understand, and create
-multi-modal content across audio and visual domains.
-"""
-
-    return agent_model(
-        Agents.multi_modal_agent,
-        BaseMessage.make_assistant_message(
-            role_name="Multi Modal Agent",
-            content=system_message,
-        ),
-        options,
-        tools,
-        tool_names=[
-            VideoDownloaderToolkit.toolkit_name(),
-            AudioAnalysisToolkit.toolkit_name(),
-            ImageAnalysisToolkit.toolkit_name(),
-            OpenAIImageToolkit.toolkit_name(),
-            HumanToolkit.toolkit_name(),
-            TerminalToolkit.toolkit_name(),
-            NoteTakingToolkit.toolkit_name(),
-            SearchToolkit.toolkit_name(),
-        ],
-    )
-
-
-async def social_medium_agent(options: Chat):
-    """
-    Agent to handling tasks related to social media:
-    include toolkits: WhatsApp, Twitter, LinkedIn, Reddit, Notion, Slack, Discord and Google Suite.
-    """
-    working_directory = get_working_directory(options)
-    logger.info(f"Creating social medium agent for project: {options.project_id} in directory: {working_directory}")
-    tools = [
-        *WhatsAppToolkit.get_can_use_tools(options.project_id),
-        *TwitterToolkit.get_can_use_tools(options.project_id),
-        *LinkedInToolkit.get_can_use_tools(options.project_id),
-        *RedditToolkit.get_can_use_tools(options.project_id),
-        *await NotionMCPToolkit.get_can_use_tools(options.project_id),
-        # *SlackToolkit.get_can_use_tools(options.project_id),
-        *await GoogleGmailMCPToolkit.get_can_use_tools(options.project_id, options.get_bun_env()),
-        *GoogleCalendarToolkit.get_can_use_tools(options.project_id),
-        *HumanToolkit.get_can_use_tools(options.project_id, Agents.social_medium_agent),
-        *TerminalToolkit(
-            options.project_id,
-            agent_name=Agents.social_medium_agent,
-            working_directory=working_directory,
-            clone_current_env=True,
-        ).get_tools(),
-        *NoteTakingToolkit(
-            options.project_id,
-            Agents.social_medium_agent,
-            working_directory=working_directory,
-        ).get_tools(),
-        # *DiscordToolkit(options.project_id).get_tools(),  # Not supported temporarily
-        # *GoogleSuiteToolkit(options.project_id).get_tools(),  # Not supported temporarily
-    ]
-    # if env("EXA_API_KEY") or options.is_cloud():
-    #     tools.append(FunctionTool(SearchToolkit(options.project_id, Agents.social_medium_agent).search_exa))
-    return agent_model(
-        Agents.social_medium_agent,
-        BaseMessage.make_assistant_message(
-            role_name="Social Medium Agent",
-            content=f"""
-You are a Social Media Management Assistant with comprehensive capabilities
-across multiple platforms. You MUST use the `send_message_to_user` tool to
-inform the user of every decision and action you take. Your message must
-include a short title and a one-sentence description. This is a mandatory
-part of your workflow. When you complete your task, your final response must
-be a comprehensive summary of your actions, presented in a clear, detailed,
-and easy-to-read format. Avoid using markdown tables for presenting data;
-use plain text formatting instead.
-
-- **Working Directory**: `{working_directory}`. All local file operations must
-occur here, but you can access files from any place in the file system. For all file system operations, you MUST use absolute paths to ensure precision and avoid ambiguity.
-The current date is {NOW_STR}(Accurate to the hour). For any date-related tasks, you MUST use this as the current date.
-
-Your integrated toolkits enable you to:
-
-1. WhatsApp Business Management (WhatsAppToolkit):
-   - Send text and template messages to customers via the WhatsApp Business
-   API.
-   - Retrieve business profile information.
-
-2. Twitter Account Management (TwitterToolkit):
-   - Create tweets with text content, polls, or as quote tweets.
-   - Delete existing tweets.
-   - Retrieve user profile information.
-
-3. LinkedIn Professional Networking (LinkedInToolkit):
-   - Create posts on LinkedIn.
-   - Delete existing posts.
-   - Retrieve authenticated user's profile information.
-
-4. Reddit Content Analysis (RedditToolkit):
-   - Collect top posts and comments from specified subreddits.
-   - Perform sentiment analysis on Reddit comments.
-   - Track keyword discussions across multiple subreddits.
-
-5. Notion Workspace Management (NotionToolkit):
-   - List all pages and users in a Notion workspace.
-   - Retrieve and extract text content from Notion blocks.
-
-6. Slack Workspace Interaction (SlackToolkit):
-   - Create new Slack channels (public or private).
-   - Join or leave existing channels.
-   - Send and delete messages in channels.
-   - Retrieve channel information and message history.
-
-7. Human Interaction (HumanToolkit):
-   - Ask questions to users and send messages via console.
-
-8. Agent Communication:
-   - Communicate with other agents using messaging tools when collaboration
-   is needed. Use `list_available_agents` to see available team members and
-   `send_message` to coordinate with them, especially when you need content
-   from document agents or research from browser agents.
-
-9. File System Access:
-   - You can use terminal tools to interact with the local file system in
-   your working directory (`{working_directory}`), for example, to access
-   files needed for posting. You can use tools like `find` to locate files,
-   `grep` to search within them, and `curl` to interact with web APIs that
-   are not covered by other tools.
-
-When assisting users, always:
-- Identify which platform's functionality is needed for the task.
-- Check if required API credentials are available before attempting
-operations.
-- Provide clear explanations of what actions you're taking.
-- Handle rate limits and API restrictions appropriately.
-- Ask clarifying questions when user requests are ambiguous.
-""",
-        ),
-        options,
-        tools,
-        tool_names=[
-            WhatsAppToolkit.toolkit_name(),
-            TwitterToolkit.toolkit_name(),
-            LinkedInToolkit.toolkit_name(),
-            RedditToolkit.toolkit_name(),
-            NotionMCPToolkit.toolkit_name(),
-            GoogleGmailMCPToolkit.toolkit_name(),
-            GoogleCalendarToolkit.toolkit_name(),
-            HumanToolkit.toolkit_name(),
-            TerminalToolkit.toolkit_name(),
-            NoteTakingToolkit.toolkit_name(),
-        ],
-    )
-
-
-async def mcp_agent(options: Chat):
-    logger.info(
-        f"Creating MCP agent for project: {options.project_id} with {len(options.installed_mcp['mcpServers'])} MCP servers"
-    )
-    tools = [
-        # *HumanToolkit.get_can_use_tools(options.project_id, Agents.mcp_agent),
-        *McpSearchToolkit(options.project_id).get_tools(),
-    ]
-    if len(options.installed_mcp["mcpServers"]) > 0:
-        try:
-            mcp_tools = await get_mcp_tools(options.installed_mcp)
-            logger.info(f"Retrieved {len(mcp_tools)} MCP tools for task {options.project_id}")
-            if mcp_tools:
-                tool_names = [
-                    (tool.get_function_name() if hasattr(tool, "get_function_name") else str(tool))
-                    for tool in mcp_tools
-                ]
-                logger.debug(f"MCP tools: {tool_names}")
-            tools = [*tools, *mcp_tools]
-        except Exception as e:
-            logger.debug(repr(e))
-
-    task_lock = get_task_lock(options.project_id)
-    agent_id = str(uuid.uuid4())
-    logger.info(f"Creating MCP agent: {Agents.mcp_agent} with id: {agent_id} for task: {options.project_id}")
-    asyncio.create_task(
-        task_lock.put_queue(
-            ActionCreateAgentData(
-                data={
-                    "agent_name": Agents.mcp_agent,
-                    "agent_id": agent_id,
-                    "tools": [key for key in options.installed_mcp["mcpServers"].keys()],
-                }
-            )
-        )
-    )
-    return ListenChatAgent(
-        options.project_id,
-        Agents.mcp_agent,
-        system_message="You are a helpful assistant that can help users search mcp servers. The found mcp services will be returned to the user, and you will ask the user via ask_human_via_gui whether they want to install these mcp services.",
-        model=ModelFactory.create(
-            model_platform=options.model_platform,
-            model_type=options.model_type,
-            api_key=options.api_key,
-            url=options.api_url,
-            model_config_dict=(
-                {
-                    "user": str(options.project_id),
-                }
-                if options.is_cloud()
-                else None
-            ),
-            timeout=600,  # 10 minutes
-            **{
-                k: v
-                for k, v in (options.extra_params or {}).items()
-                if k not in ["model_platform", "model_type", "api_key", "url"]
-            },
-        ),
-        # output_language=options.language,
-        tools=tools,
-        agent_id=agent_id,
-    )
-
-
-async def get_toolkits(tools: list[str], agent_name: str, api_task_id: str):
-    logger.info(f"Getting toolkits for agent: {agent_name}, task: {api_task_id}, tools: {tools}")
-    toolkits = {
-        "audio_analysis_toolkit": AudioAnalysisToolkit,
-        "openai_image_toolkit": OpenAIImageToolkit,
-        "excel_toolkit": ExcelToolkit,
-        "file_write_toolkit": FileToolkit,
-        "github_toolkit": GithubToolkit,
-        "google_calendar_toolkit": GoogleCalendarToolkit,
-        "google_drive_mcp_toolkit": GoogleDriveMCPToolkit,
-        "google_gmail_mcp_toolkit": GoogleGmailMCPToolkit,
-        "image_analysis_toolkit": ImageAnalysisToolkit,
-        "linkedin_toolkit": LinkedInToolkit,
-        "lark_toolkit": LarkToolkit,
-        "mcp_search_toolkit": McpSearchToolkit,
-        "notion_mcp_toolkit": NotionMCPToolkit,
-        "pptx_toolkit": PPTXToolkit,
-        "reddit_toolkit": RedditToolkit,
-        "search_toolkit": SearchToolkit,
-        "slack_toolkit": SlackToolkit,
-        "terminal_toolkit": TerminalToolkit,
-        "twitter_toolkit": TwitterToolkit,
-        "video_analysis_toolkit": VideoAnalysisToolkit,
-        "video_download_toolkit": VideoDownloaderToolkit,
-        "whatsapp_toolkit": WhatsAppToolkit,
-    }
-    res = []
-    for item in tools:
-        if item in toolkits:
-            toolkit: AbstractToolkit = toolkits[item]
-            toolkit.agent_name = agent_name
-            toolkit_tools = toolkit.get_can_use_tools(api_task_id)
-            toolkit_tools = await toolkit_tools if asyncio.iscoroutine(toolkit_tools) else toolkit_tools
-            res.extend(toolkit_tools)
-        else:
-            logger.warning(f"Toolkit {item} not found for agent {agent_name}")
-    return res
-
-
-async def get_mcp_tools(mcp_server: McpServers):
-    logger.info(f"Getting MCP tools for {len(mcp_server['mcpServers'])} servers")
-    if len(mcp_server["mcpServers"]) == 0:
-        return []
-
-    # Ensure unified auth directory for all mcp-remote servers to avoid re-authentication on each task
-    config_dict = {**mcp_server}
-    for server_config in config_dict["mcpServers"].values():
-        if "env" not in server_config:
-            server_config["env"] = {}
-        # Set global auth directory to persist authentication across tasks
-        if "MCP_REMOTE_CONFIG_DIR" not in server_config["env"]:
-            server_config["env"]["MCP_REMOTE_CONFIG_DIR"] = env(
-                "MCP_REMOTE_CONFIG_DIR", os.path.expanduser("~/.mcp-auth")
-            )
-
-    mcp_toolkit = None
-    try:
-        mcp_toolkit = MCPToolkit(config_dict=config_dict, timeout=180)
-        await mcp_toolkit.connect()
-
-        logger.info(f"Successfully connected to MCP toolkit with {len(mcp_server['mcpServers'])} servers")
-        tools = mcp_toolkit.get_tools()
-        if tools:
-            tool_names = [
-                (tool.get_function_name() if hasattr(tool, "get_function_name") else str(tool)) for tool in tools
-            ]
-            logging.debug(f"MCP tool names: {tool_names}")
-        return tools
-    except asyncio.CancelledError:
-        logger.info("MCP connection cancelled during get_mcp_tools")
-        return []
-    except Exception as e:
-        logger.error(f"Failed to connect MCP toolkit: {e}", exc_info=True)
-        return []

From 5f5d3d0fc6700d6a09882ad50655fb3e0f5c24e7 Mon Sep 17 00:00:00 2001
From: bytecii <bytecii@users.noreply.github.com>
Date: Thu, 5 Feb 2026 21:34:25 -0800
Subject: [PATCH 18/18] update

---
 .../tests/app/service/test_task_validate.py   | 131 ++++++++++--------
 1 file changed, 73 insertions(+), 58 deletions(-)

diff --git a/backend/tests/app/service/test_task_validate.py b/backend/tests/app/service/test_task_validate.py
index b086ec29d..662dfa91e 100644
--- a/backend/tests/app/service/test_task_validate.py
+++ b/backend/tests/app/service/test_task_validate.py
@@ -4,12 +4,14 @@
 TODO: Rename this file to test_task.py after fixing errors
 in backend/tests/unit/service/test_task.py
 """
+
 from unittest.mock import Mock, patch
 
 import pytest
+from camel.types import ModelPlatformType
+
 from app.model.chat import Chat
 from app.service.task import validate_model_before_task
-from camel.types import ModelPlatformType
 
 # Test data constants
 TEST_PROJECT_ID = "test_project"
@@ -26,21 +28,23 @@
 @pytest.mark.asyncio
 async def test_validate_model_success():
     """Test successful model validation."""
-    options = Chat(project_id=TEST_PROJECT_ID,
-                   task_id=TEST_TASK_ID,
-                   question=TEST_QUESTION,
-                   email=TEST_EMAIL,
-                   model_platform=TEST_MODEL_PLATFORM,
-                   model_type=TEST_MODEL_TYPE,
-                   api_key=TEST_VALID_API_KEY,
-                   api_url=TEST_API_URL,
-                   model_config={})
+    options = Chat(
+        project_id=TEST_PROJECT_ID,
+        task_id=TEST_TASK_ID,
+        question=TEST_QUESTION,
+        email=TEST_EMAIL,
+        model_platform=TEST_MODEL_PLATFORM,
+        model_type=TEST_MODEL_TYPE,
+        api_key=TEST_VALID_API_KEY,
+        api_url=TEST_API_URL,
+        model_config={},
+    )
 
     # Mock the create_agent and agent.step
     mock_agent = Mock()
     mock_agent.step = Mock(return_value="test response")
 
-    with patch('app.service.task.create_agent', return_value=mock_agent):
+    with patch("app.service.task.create_agent", return_value=mock_agent):
         is_valid, error_msg = await validate_model_before_task(options)
 
     assert is_valid is True
@@ -50,21 +54,24 @@ async def test_validate_model_success():
 @pytest.mark.asyncio
 async def test_validate_model_invalid_api_key():
     """Test model validation with invalid API key."""
-    options = Chat(project_id=TEST_PROJECT_ID,
-                   task_id=TEST_TASK_ID,
-                   question=TEST_QUESTION,
-                   email=TEST_EMAIL,
-                   model_platform=TEST_MODEL_PLATFORM,
-                   model_type=TEST_MODEL_TYPE,
-                   api_key=TEST_INVALID_API_KEY,
-                   api_url=TEST_API_URL,
-                   model_config={})
+    options = Chat(
+        project_id=TEST_PROJECT_ID,
+        task_id=TEST_TASK_ID,
+        question=TEST_QUESTION,
+        email=TEST_EMAIL,
+        model_platform=TEST_MODEL_PLATFORM,
+        model_type=TEST_MODEL_TYPE,
+        api_key=TEST_INVALID_API_KEY,
+        api_url=TEST_API_URL,
+        model_config={},
+    )
 
     # Mock the create_agent to raise authentication error
-    with patch('app.service.task.create_agent') as mock_create:
+    with patch("app.service.task.create_agent") as mock_create:
         mock_agent = Mock()
         mock_agent.step = Mock(
-            side_effect=Exception("Error code: 401 - Invalid API key"))
+            side_effect=Exception("Error code: 401 - Invalid API key")
+        )
         mock_create.return_value = mock_agent
 
         is_valid, error_msg = await validate_model_before_task(options)
@@ -77,18 +84,20 @@ async def test_validate_model_invalid_api_key():
 @pytest.mark.asyncio
 async def test_validate_model_network_error():
     """Test model validation with network error."""
-    options = Chat(project_id=TEST_PROJECT_ID,
-                   task_id=TEST_TASK_ID,
-                   question=TEST_QUESTION,
-                   email=TEST_EMAIL,
-                   model_platform=TEST_MODEL_PLATFORM,
-                   model_type=TEST_MODEL_TYPE,
-                   api_key=TEST_VALID_API_KEY,
-                   api_url="https://invalid-url.com",
-                   model_config={})
+    options = Chat(
+        project_id=TEST_PROJECT_ID,
+        task_id=TEST_TASK_ID,
+        question=TEST_QUESTION,
+        email=TEST_EMAIL,
+        model_platform=TEST_MODEL_PLATFORM,
+        model_type=TEST_MODEL_TYPE,
+        api_key=TEST_VALID_API_KEY,
+        api_url="https://invalid-url.com",
+        model_config={},
+    )
 
     # Mock the create_agent to raise network error
-    with patch('app.service.task.create_agent') as mock_create:
+    with patch("app.service.task.create_agent") as mock_create:
         mock_agent = Mock()
         mock_agent.step = Mock(side_effect=Exception("Connection error"))
         mock_create.return_value = mock_agent
@@ -105,30 +114,33 @@ async def test_validate_model_with_custom_config():
     """Test model validation with custom model configuration."""
     custom_config = {"temperature": 0.7, "max_tokens": 1000}
 
-    options = Chat(project_id=TEST_PROJECT_ID,
-                   task_id=TEST_TASK_ID,
-                   question=TEST_QUESTION,
-                   email=TEST_EMAIL,
-                   model_platform=TEST_MODEL_PLATFORM,
-                   model_type=TEST_MODEL_TYPE,
-                   api_key=TEST_VALID_API_KEY,
-                   api_url=TEST_API_URL,
-                   model_config=custom_config)
+    options = Chat(
+        project_id=TEST_PROJECT_ID,
+        task_id=TEST_TASK_ID,
+        question=TEST_QUESTION,
+        email=TEST_EMAIL,
+        model_platform=TEST_MODEL_PLATFORM,
+        model_type=TEST_MODEL_TYPE,
+        api_key=TEST_VALID_API_KEY,
+        api_url=TEST_API_URL,
+        model_config=custom_config,
+    )
 
     mock_agent = Mock()
     mock_agent.step = Mock(return_value="test response")
 
-    with patch('app.service.task.create_agent',
-               return_value=mock_agent) as mock_create:
+    with patch(
+        "app.service.task.create_agent", return_value=mock_agent
+    ) as mock_create:
         is_valid, error_msg = await validate_model_before_task(options)
 
         # Verify create_agent was called
         mock_create.assert_called_once()
         call_args = mock_create.call_args
-        assert call_args.kwargs['model_platform'] == options.model_platform
-        assert call_args.kwargs['model_type'] == options.model_type
-        assert call_args.kwargs['api_key'] == options.api_key
-        assert call_args.kwargs['url'] == options.api_url
+        assert call_args.kwargs["model_platform"] == options.model_platform
+        assert call_args.kwargs["model_type"] == options.model_type
+        assert call_args.kwargs["api_key"] == options.api_key
+        assert call_args.kwargs["url"] == options.api_url
 
     assert is_valid is True
     assert error_msg is None
@@ -137,21 +149,24 @@ async def test_validate_model_with_custom_config():
 @pytest.mark.asyncio
 async def test_validate_model_rate_limit_error():
     """Test model validation with rate limit error."""
-    options = Chat(project_id=TEST_PROJECT_ID,
-                   task_id=TEST_TASK_ID,
-                   question=TEST_QUESTION,
-                   email=TEST_EMAIL,
-                   model_platform=TEST_MODEL_PLATFORM,
-                   model_type=TEST_MODEL_TYPE,
-                   api_key=TEST_VALID_API_KEY,
-                   api_url=TEST_API_URL,
-                   model_config={})
+    options = Chat(
+        project_id=TEST_PROJECT_ID,
+        task_id=TEST_TASK_ID,
+        question=TEST_QUESTION,
+        email=TEST_EMAIL,
+        model_platform=TEST_MODEL_PLATFORM,
+        model_type=TEST_MODEL_TYPE,
+        api_key=TEST_VALID_API_KEY,
+        api_url=TEST_API_URL,
+        model_config={},
+    )
 
     # Mock the create_agent to raise rate limit error
-    with patch('app.service.task.create_agent') as mock_create:
+    with patch("app.service.task.create_agent") as mock_create:
         mock_agent = Mock()
         mock_agent.step = Mock(
-            side_effect=Exception("Error code: 429 - Rate limit exceeded"))
+            side_effect=Exception("Error code: 429 - Rate limit exceeded")
+        )
         mock_create.return_value = mock_agent
 
         is_valid, error_msg = await validate_model_before_task(options)