AVADSA25 · AVADSA25 · May 3, 2026 · May 3, 2026
diff --git a/codec_agent_plan.py b/codec_agent_plan.py
@@ -230,8 +230,27 @@ def validate_plan_skills(plan: Plan, registry=None) -> Tuple[bool, List[str]]:
 
 
 # ── Qwen-3.6 client ───────────────────────────────────────────────────────────
-QWEN_URL = "http://127.0.0.1:8090/v1/chat/completions"
-QWEN_MODEL = "qwen3.6"
+# Hotfix: read URL+model from ~/.codec/config.json:llm_base_url+llm_model.
+# Falls back to codec_config defaults. Hardcoded values were wrong (8090 is
+# the dashboard port; the LLM lives at 8083).
+def _qwen_url() -> str:
+    try:
+        from codec_config import QWEN_BASE_URL
+        return f"{QWEN_BASE_URL.rstrip('/')}/chat/completions"
+    except Exception:
+        return "http://localhost:8083/v1/chat/completions"
+
+
+def _qwen_model() -> str:
+    try:
+        from codec_config import QWEN_MODEL as _m
+        return _m
+    except Exception:
+        return "mlx-community/Qwen3.6-35B-A3B-4bit"
+
+
+QWEN_URL = _qwen_url()       # back-compat — module-level constant for tests
+QWEN_MODEL = _qwen_model()   # back-compat
 QWEN_TIMEOUT = 60  # seconds
 
 
@@ -247,11 +266,15 @@ def _qwen_chat(user_prompt: str, system_prompt: str = "",
                max_tokens: int = 4000) -> str:
     """Call local Qwen-3.6 OpenAI-compatible endpoint. Returns the
     assistant's content string. Raises QwenUnavailableError on
-    network failure or non-2xx response."""
+    network failure or non-2xx response.
+
+    URL + model resolved at call time via _qwen_url() / _qwen_model()
+    so they pick up ~/.codec/config.json:llm_base_url + :llm_model
+    rather than the deploy-time hardcoded values."""
     import requests  # lazy import — avoid forcing requests on test machines without it
 
     payload = {
-        "model": QWEN_MODEL,
+        "model": _qwen_model(),
         "messages": [
             {"role": "system", "content": system_prompt or ""},
             {"role": "user",   "content": user_prompt},
@@ -260,7 +283,7 @@ def _qwen_chat(user_prompt: str, system_prompt: str = "",
         "temperature": 0.2,
     }
     try:
-        r = requests.post(QWEN_URL, json=payload, timeout=QWEN_TIMEOUT)
+        r = requests.post(_qwen_url(), json=payload, timeout=QWEN_TIMEOUT)
     except requests.exceptions.ConnectionError as e:
         raise QwenUnavailableError(f"qwen3.6 unreachable: {e}")
     except requests.exceptions.Timeout:

diff --git a/codec_agent_runner.py b/codec_agent_runner.py
@@ -135,8 +135,26 @@ def permission_gate(action: Action, agent_grants: Dict[str, Any],
 
 
 # ── Qwen-3.6 client (mirrors codec_agent_plan pattern) ────────────────────────
-QWEN_URL = "http://127.0.0.1:8090/v1/chat/completions"
-QWEN_MODEL = "qwen3.6"
+# Hotfix: read URL+model from ~/.codec/config.json via codec_config (8090
+# was the dashboard port; LLM lives at 8083).
+def _qwen_url() -> str:
+    try:
+        from codec_config import QWEN_BASE_URL
+        return f"{QWEN_BASE_URL.rstrip('/')}/chat/completions"
+    except Exception:
+        return "http://localhost:8083/v1/chat/completions"
+
+
+def _qwen_model() -> str:
+    try:
+        from codec_config import QWEN_MODEL as _m
+        return _m
+    except Exception:
+        return "mlx-community/Qwen3.6-35B-A3B-4bit"
+
+
+QWEN_URL = _qwen_url()
+QWEN_MODEL = _qwen_model()
 QWEN_TIMEOUT = 60
 
 
@@ -176,10 +194,13 @@ class QwenUnavailableError(RuntimeError):
 def _qwen_chat(user_prompt: str, system_prompt: str = "",
                max_tokens: int = 2000) -> str:
     """Local Qwen-3.6 OpenAI-compatible call. Same shape as
-    codec_agent_plan._qwen_chat — keep them parallel."""
+    codec_agent_plan._qwen_chat — keep them parallel.
+
+    URL + model resolved at call time so config.json changes are picked
+    up without a process restart."""
     import requests
     payload = {
-        "model": QWEN_MODEL,
+        "model": _qwen_model(),
         "messages": [
             {"role": "system", "content": system_prompt or ""},
             {"role": "user",   "content": user_prompt},
@@ -188,7 +209,7 @@ def _qwen_chat(user_prompt: str, system_prompt: str = "",
         "temperature": 0.2,
     }
     try:
-        r = requests.post(QWEN_URL, json=payload, timeout=QWEN_TIMEOUT)
+        r = requests.post(_qwen_url(), json=payload, timeout=QWEN_TIMEOUT)
     except requests.exceptions.ConnectionError as e:
         raise QwenUnavailableError(f"qwen3.6 unreachable: {e}")
     except requests.exceptions.Timeout:

diff --git a/codec_dashboard.py b/codec_dashboard.py
@@ -2444,19 +2444,23 @@ def _try_skill_by_name(name: str, query: str):
 
 def _qwen_chat_classify(user_text: str, max_tokens: int = 300) -> str:
     """Call Qwen-3.6 with the auto-escalation classifier prompt. Returns
-    raw response string. Caller handles JSON parsing + error fallback."""
+    raw response string. Caller handles JSON parsing + error fallback.
+
+    Hotfix: URL + model resolved from codec_config (was hardcoded to the
+    wrong dashboard port 8090; LLM lives at 8083 per ~/.codec/config.json)."""
     try:
         import requests
+        from codec_config import QWEN_BASE_URL, QWEN_MODEL as _qmodel
         payload = {
-            "model": "qwen3.6",
+            "model": _qmodel,
             "messages": [
                 {"role": "system", "content": _AUTO_ESCALATE_SYSTEM_PROMPT},
                 {"role": "user", "content": user_text[:2000]},
             ],
             "max_tokens": max_tokens,
             "temperature": 0.1,
         }
-        r = requests.post("http://127.0.0.1:8090/v1/chat/completions",
+        r = requests.post(f"{QWEN_BASE_URL.rstrip('/')}/chat/completions",
                           json=payload, timeout=15)
         if r.status_code != 200:
             return ""