Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 28 additions & 5 deletions codec_agent_plan.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,8 +230,27 @@ def validate_plan_skills(plan: Plan, registry=None) -> Tuple[bool, List[str]]:


# ── Qwen-3.6 client ───────────────────────────────────────────────────────────
QWEN_URL = "http://127.0.0.1:8090/v1/chat/completions"
QWEN_MODEL = "qwen3.6"
# Hotfix: read URL+model from ~/.codec/config.json:llm_base_url+llm_model.
# Falls back to codec_config defaults. Hardcoded values were wrong (8090 is
# the dashboard port; the LLM lives at 8083).
def _qwen_url() -> str:
try:
from codec_config import QWEN_BASE_URL
return f"{QWEN_BASE_URL.rstrip('/')}/chat/completions"
except Exception:
return "http://localhost:8083/v1/chat/completions"


def _qwen_model() -> str:
try:
from codec_config import QWEN_MODEL as _m
return _m
except Exception:
return "mlx-community/Qwen3.6-35B-A3B-4bit"


QWEN_URL = _qwen_url() # back-compat — module-level constant for tests
QWEN_MODEL = _qwen_model() # back-compat
QWEN_TIMEOUT = 60 # seconds


Expand All @@ -247,11 +266,15 @@ def _qwen_chat(user_prompt: str, system_prompt: str = "",
max_tokens: int = 4000) -> str:
"""Call local Qwen-3.6 OpenAI-compatible endpoint. Returns the
assistant's content string. Raises QwenUnavailableError on
network failure or non-2xx response."""
network failure or non-2xx response.

URL + model resolved at call time via _qwen_url() / _qwen_model()
so they pick up ~/.codec/config.json:llm_base_url + :llm_model
rather than the deploy-time hardcoded values."""
import requests # lazy import — avoid forcing requests on test machines without it

payload = {
"model": QWEN_MODEL,
"model": _qwen_model(),
"messages": [
{"role": "system", "content": system_prompt or ""},
{"role": "user", "content": user_prompt},
Expand All @@ -260,7 +283,7 @@ def _qwen_chat(user_prompt: str, system_prompt: str = "",
"temperature": 0.2,
}
try:
r = requests.post(QWEN_URL, json=payload, timeout=QWEN_TIMEOUT)
r = requests.post(_qwen_url(), json=payload, timeout=QWEN_TIMEOUT)
except requests.exceptions.ConnectionError as e:
raise QwenUnavailableError(f"qwen3.6 unreachable: {e}")
except requests.exceptions.Timeout:
Expand Down
31 changes: 26 additions & 5 deletions codec_agent_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,26 @@ def permission_gate(action: Action, agent_grants: Dict[str, Any],


# ── Qwen-3.6 client (mirrors codec_agent_plan pattern) ────────────────────────
QWEN_URL = "http://127.0.0.1:8090/v1/chat/completions"
QWEN_MODEL = "qwen3.6"
# Hotfix: read URL+model from ~/.codec/config.json via codec_config (8090
# was the dashboard port; LLM lives at 8083).
def _qwen_url() -> str:
try:
from codec_config import QWEN_BASE_URL
return f"{QWEN_BASE_URL.rstrip('/')}/chat/completions"
except Exception:
return "http://localhost:8083/v1/chat/completions"


def _qwen_model() -> str:
try:
from codec_config import QWEN_MODEL as _m
return _m
except Exception:
return "mlx-community/Qwen3.6-35B-A3B-4bit"


QWEN_URL = _qwen_url()
QWEN_MODEL = _qwen_model()
QWEN_TIMEOUT = 60


Expand Down Expand Up @@ -176,10 +194,13 @@ class QwenUnavailableError(RuntimeError):
def _qwen_chat(user_prompt: str, system_prompt: str = "",
max_tokens: int = 2000) -> str:
"""Local Qwen-3.6 OpenAI-compatible call. Same shape as
codec_agent_plan._qwen_chat — keep them parallel."""
codec_agent_plan._qwen_chat — keep them parallel.

URL + model resolved at call time so config.json changes are picked
up without a process restart."""
import requests
payload = {
"model": QWEN_MODEL,
"model": _qwen_model(),
"messages": [
{"role": "system", "content": system_prompt or ""},
{"role": "user", "content": user_prompt},
Expand All @@ -188,7 +209,7 @@ def _qwen_chat(user_prompt: str, system_prompt: str = "",
"temperature": 0.2,
}
try:
r = requests.post(QWEN_URL, json=payload, timeout=QWEN_TIMEOUT)
r = requests.post(_qwen_url(), json=payload, timeout=QWEN_TIMEOUT)
except requests.exceptions.ConnectionError as e:
raise QwenUnavailableError(f"qwen3.6 unreachable: {e}")
except requests.exceptions.Timeout:
Expand Down
10 changes: 7 additions & 3 deletions codec_dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -2444,19 +2444,23 @@ def _try_skill_by_name(name: str, query: str):

def _qwen_chat_classify(user_text: str, max_tokens: int = 300) -> str:
"""Call Qwen-3.6 with the auto-escalation classifier prompt. Returns
raw response string. Caller handles JSON parsing + error fallback."""
raw response string. Caller handles JSON parsing + error fallback.

Hotfix: URL + model resolved from codec_config (was hardcoded to the
wrong dashboard port 8090; LLM lives at 8083 per ~/.codec/config.json)."""
try:
import requests
from codec_config import QWEN_BASE_URL, QWEN_MODEL as _qmodel
payload = {
"model": "qwen3.6",
"model": _qmodel,
"messages": [
{"role": "system", "content": _AUTO_ESCALATE_SYSTEM_PROMPT},
{"role": "user", "content": user_text[:2000]},
],
"max_tokens": max_tokens,
"temperature": 0.1,
}
r = requests.post("http://127.0.0.1:8090/v1/chat/completions",
r = requests.post(f"{QWEN_BASE_URL.rstrip('/')}/chat/completions",
json=payload, timeout=15)
if r.status_code != 200:
return ""
Expand Down
Loading