shelajev · sebastiand-cerebras · Nov 19, 2025
diff --git a/.env-sample b/.env-sample
@@ -1,3 +1,3 @@
 CEREBRAS_API_KEY=
 CEREBRAS_BASE_URL=https://api.cerebras.ai/v1
-CEREBRAS_CHAT_MODEL=qwen-3-coder-480b
+CEREBRAS_CHAT_MODEL=gpt-oss-120b
diff --git a/README.md b/README.md
@@ -1,8 +1,8 @@
 # DevDuck agents
 
 A multi-agent system for Node.js programming assistance built with Google Agent Development Kit (ADK). This
-project features a coordinating agent (DevDuck) that manages two specialized sub-agents (Local Agent and
-Cerebras) for different programming tasks.
+project features a coordinating agent (DevDuck) that manages two specialized sub-agents (
+Cerebras Agents) for different programming tasks.
 
 ## Architecture
 
@@ -12,7 +12,7 @@ The system consists of three main agents orchestrated by Docker Compose, which p
 ### 🐙 Docker Compose Orchestration
 
 - **Central Role**: Docker Compose serves as the foundation for the entire multi-agent system
-- **Service Orchestration**: Manages the lifecycle of all three agents (DevDuck, Local Agent, and Cerebras)
+- **Service Orchestration**: Manages the lifecycle of all agents (DevDuck and two Cerebras sub-agents)
 - **Configuration Management**: Defines agent prompts, model configurations, and service dependencies
   directly in the compose file
 - **Network Coordination**: Establishes secure inter-agent communication channels
@@ -23,21 +23,16 @@ The system consists of three main agents orchestrated by Docker Compose, which p
 ### 🦆 DevDuck (Main Agent)
 
 - **Role**: Main development assistant and project coordinator
-- **Model**: Qwen3 (unsloth/qwen3-gguf:4B-UD-Q4_K_XL)
 - **Capabilities**: Routes requests to appropriate sub-agents based on user needs
 
-### 👨‍💻 Local Agent Agent
+### 🧠 Cerebras Agents
 
-- **Role**: General development tasks and project coordination
-- **Model**: Qwen2.5 (ai/qwen2.5:latest)
-- **Specialization**: Node.js programming expert for understanding code, explaining concepts, and generating code snippets
-
-### 🧠 Cerebras Agent
-
-- **Role**: Advanced computational tasks and complex problem-solving
-- **Model**: Llama-4 Scout (llama-4-scout-17b-16e-instruct)
+- **Role**: The system features two specialized Cerebras sub-agents for different programming tasks.
+- **Model**: gpt-oss-120b
 - **Provider**: Cerebras API
-- **Specialization**: Node.js programming expert for complex problem-solving scenarios
+- **Specializations**:
+  - General development tasks, code explanation, and conceptual understanding.
+  - Advanced computational tasks, code generation, and complex problem-solving.
 
 ## Features
 
@@ -52,11 +47,6 @@ The system consists of three main agents orchestrated by Docker Compose, which p
 ### Prerequisites
 
 - **[Docker Desktop] 4.43.0+ or [Docker Engine]** installed.
-- **A laptop or workstation with a GPU** (e.g., a MacBook) for running open models locally. If you
-  don't have a GPU, you can alternatively use **[Docker Offload]**.
-- If you're using [Docker Engine] on Linux or [Docker Desktop] on Windows, ensure that the
-  [Docker Model Runner requirements] are met (specifically that GPU
-  support is enabled) and the necessary drivers are installed.
 - If you're using Docker Engine on Linux, ensure you have [Docker Compose] 2.38.1 or later installed.
 
 ### Configuration
@@ -67,7 +57,7 @@ The system consists of three main agents orchestrated by Docker Compose, which p
 ```env
 CEREBRAS_API_KEY=<your_cerebras_api_key>
 CEREBRAS_BASE_URL=https://api.cerebras.ai/v1
-CEREBRAS_CHAT_MODEL=llama-4-scout-17b-16e-instruct
+CEREBRAS_CHAT_MODEL=gpt-oss-120b
 ```
 
 > look at the `.env.sample` file
@@ -89,37 +79,33 @@ The agents can be accessed through the web interface or API endpoints.
 
 > Activate Token Streaming
 
-**You can try this**:
+**Quick Example**:
 
 ```text
 Hello I'm Phil
 
-Local Agent generate a Node.js hello world program
+Cerebras, generate a Node.js hello world program.
 
-Add a Person class with a greet method
+Now, add a Person class with a greet method.
 
-Cerebras can you analyse and comment this code
+Can you analyze and comment this code?
 
-Can you generate the tests
+Finally, generate the tests.
 ```
 
-> ✋ For a public demo, stay simple, the above examples are working.
-
 **🎥 How to use the demo**: [https://youtu.be/WYB31bzfXnM](https://youtu.be/WYB31bzfXnM)
 
 #### Routing Requests
 
 - **General requests**: Handled by DevDuck, who routes to appropriate sub-agents
-- **Specific agent requests**
-  + "I want to speak with Local Agent" → Routes to Local Agent agent
-  + "I want to speak with Cerebras" → Routes to Cerebras agent
+- **Specific agent requests**: To direct a request to a specific agent, mention its name (e.g., "Cerebras, analyze this code"). DevDuck will route the request accordingly.
 
 ## Tips
 
-If for any reason, you cannot go back from the Cerebras agent to the Local Agent agent, try this:
+If you need to reset the conversation and return to the main coordinator, you can say:
 
 ```text
-go back to devduck
+Go back to DevDuck
 ```
 
 [Docker Compose]: https://github.com/docker/compose

diff --git a/agents/devduck/agent.py b/agents/devduck/agent.py
@@ -2,14 +2,13 @@
 
 from google.adk.agents import Agent
 from google.adk.models.lite_llm import LiteLlm
-
 from .sub_agents import local_agent, cerebras_agent
 
 root_agent = Agent(
     model=LiteLlm(
-        model=f"openai/{os.environ.get('DEVDUCK_CHAT_MODEL')}",
-        api_base=os.environ.get("DEVDUCK_BASE_URL"),
-        api_key="tada",
+        model=f"cerebras/{os.environ.get('CEREBRAS_CHAT_MODEL')}",
+        api_base=os.environ.get("CEREBRAS_BASE_URL"),
+        api_key=os.environ.get("CEREBRAS_API_KEY"),
         temperature=0.0,
     ),
     name=os.environ.get("DEVDUCK_AGENT_NAME"),

diff --git a/agents/devduck/sub_agents/cerebras/agent.py b/agents/devduck/sub_agents/cerebras/agent.py
@@ -1,57 +1,13 @@
 import os
-from typing import Any, Dict
-
 from google.adk.agents import Agent
 from google.adk.models.lite_llm import LiteLlm
 from .tools import create_mcp_toolsets
 
-
-class CerebrasCompatibleLiteLlm(LiteLlm):
-    """LiteLLM wrapper that filters out Cerebras-unsupported JSON schema fields."""
-
-    def _filter_json_schema(self, schema: Dict[str, Any]) -> Dict[str, Any]:
-        """Recursively remove unsupported JSON schema fields."""
-        if not isinstance(schema, dict):
-            return schema
-
-        filtered = {}
-        unsupported_fields = {'min_length', 'minLength', 'max_length', 'maxLength', 'pattern'}
-
-        for key, value in schema.items():
-            if key in unsupported_fields:
-                continue  # Skip unsupported fields
-            elif isinstance(value, dict):
-                filtered[key] = self._filter_json_schema(value)
-            elif isinstance(value, list):
-                filtered[key] = [self._filter_json_schema(item) if isinstance(item, dict) else item for item in value]
-            else:
-                filtered[key] = value
-
-        return filtered
-
-    async def acompletion(self, *args, **kwargs):
-        """Override acompletion to filter tool schemas."""
-        if 'tools' in kwargs and kwargs['tools']:
-            filtered_tools = []
-            for tool in kwargs['tools']:
-                if isinstance(tool, dict) and 'function' in tool:
-                    filtered_tool = tool.copy()
-                    if 'parameters' in filtered_tool['function']:
-                        filtered_tool['function']['parameters'] = self._filter_json_schema(
-                            filtered_tool['function']['parameters']
-                        )
-                    filtered_tools.append(filtered_tool)
-                else:
-                    filtered_tools.append(tool)
-            kwargs['tools'] = filtered_tools
-
-        return await super().acompletion(*args, **kwargs)
-
 tools = create_mcp_toolsets()
 
 cerebras_agent = Agent(
-    model=CerebrasCompatibleLiteLlm(
-        model=f"openai/{os.environ.get('CEREBRAS_CHAT_MODEL')}",
+    model=LiteLlm(
+        model=f"cerebras/{os.environ.get('CEREBRAS_CHAT_MODEL')}",
         api_base=os.environ.get("CEREBRAS_BASE_URL"),
         api_key=os.environ.get("CEREBRAS_API_KEY"),
         temperature=0.0,

diff --git a/agents/devduck/sub_agents/localagent/agent.py b/agents/devduck/sub_agents/localagent/agent.py
@@ -5,9 +5,9 @@
 
 local_agent = Agent(
     model=LiteLlm(
-        model=f"openai/{os.environ.get('LOCAL_AGENT_CHAT_MODEL')}",
-        api_base=os.environ.get("LOCAL_AGENT_BASE_URL"),
-        api_key="tada",
+        model=f"cerebras/{os.environ.get('CEREBRAS_CHAT_MODEL')}",
+        api_base=os.environ.get("CEREBRAS_BASE_URL"),
+        api_key=os.environ.get("CEREBRAS_API_KEY"),
         temperature=0.0,
     ),
     name=os.environ.get("LOCAL_AGENT_NAME"),

diff --git a/agents/main.py b/agents/main.py
@@ -3,11 +3,6 @@
 from fastapi import FastAPI
 from google.adk.cli.fast_api import get_fast_api_app
 import uvicorn
-import litellm
-
-# Enable LiteLLM debug mode
-# litellm.set_verbose = True
-# litellm._turn_on_debug()
 
 # Get the directory where main.py is located
 AGENT_DIR = os.path.dirname(os.path.abspath(__file__))

diff --git a/agents/requirements.txt b/agents/requirements.txt
@@ -1,2 +1,12 @@
-litellm==1.73.6
 google-adk==1.5.0
+litellm>=1.73.6
+openai>=1.68.2
+pydantic==2.11.1
+fastapi==0.118.3
+starlette==0.48.0
+click==8.3.0
+google-api-python-client==2.186.0
+google-auth-httplib2==0.2.0
+google-cloud-aiplatform==1.95.1
+google-cloud-storage==2.18.0
+google-genai==1.27.0
diff --git a/compose.yml b/compose.yml
@@ -4,19 +4,19 @@ services:
       context: ./agents
       dockerfile: Dockerfile
 
-    models:
-      qwen2_5:
-        endpoint_var: LOCAL_AGENT_BASE_URL
-        model_var: LOCAL_AGENT_CHAT_MODEL
-      qwen3:
-        endpoint_var: DEVDUCK_BASE_URL
-        model_var: DEVDUCK_CHAT_MODEL
-
     ports:
       - 8000:8000
     environment:
       MCPGATEWAY_ENDPOINT: http://mcp-gateway:8811/sse
       PORT: 8000
+
+      # --------------------------------------
+      # Cerebras API Configuration (used by all agents)
+      # --------------------------------------
+      CEREBRAS_BASE_URL: ${CEREBRAS_BASE_URL}
+      CEREBRAS_CHAT_MODEL: ${CEREBRAS_CHAT_MODEL}
+      CEREBRAS_API_KEY: ${CEREBRAS_API_KEY}
+
       # --------------------------------------
       # Parent Agent configuration
       # --------------------------------------
@@ -50,9 +50,6 @@ services:
       # --------------------------------------
       # Cerebras Agent Configuration
       # --------------------------------------
-      CEREBRAS_BASE_URL: ${CEREBRAS_BASE_URL}
-      CEREBRAS_CHAT_MODEL: ${CEREBRAS_CHAT_MODEL}
-      CEREBRAS_API_KEY: ${CEREBRAS_API_KEY}
       CEREBRAS_AGENT_NAME: cerebras_agent
       CEREBRAS_AGENT_DESCRIPTION: Specialized agent for advanced computational tasks and complex problem-solving
       CEREBRAS_AGENT_INSTRUCTION: |
@@ -76,10 +73,4 @@ services:
       - --servers=context7,node-code-sandbox
       - --catalog=/mcp-gateway-catalog.yaml
     volumes:
-      - ./mcp-gateway-catalog.yaml:/mcp-gateway-catalog.yaml:ro
-# NOTE: Define models
-models:
-  qwen2_5:
-    model: hf.co/unsloth/qwen3-30b-a3b-instruct-2507-gguf:q5_k_m
-  qwen3:
-    model: hf.co/unsloth/qwen3-30b-a3b-instruct-2507-gguf:q5_k_m
+      - ./mcp-gateway-catalog.yaml:/mcp-gateway-catalog.yaml:ro
diff --git a/pyproject.toml b/pyproject.toml