Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 0 additions & 21 deletions proxy/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,27 +253,6 @@ async def inference_endpoint(request: InferenceRequest, http_request: Request):
# request.model = "deepseek-ai/DeepSeek-V3-0324"

try:
# Don't log this stuff it provides no value

# Log only the last incoming message to avoid flooding the console
# if request.messages:
# last_msg = request.messages[-1]
# snippet = (last_msg.content[:300] + "…") if last_msg.content and len(last_msg.content) > 300 else last_msg.content
# logger.info(
# "Inference request | model=%s | run_id=%s | total_msgs=%d | last_role=%s | last_preview=%s",
# request.model,
# request.run_id,
# len(request.messages),
# last_msg.role,
# snippet,
# )
# else:
# logger.info(
# "Inference request | model=%s | run_id=%s | total_msgs=0",
# request.model,
# request.run_id,
# )

if ENV != 'dev':
# Production mode - run_id is required
if not request.run_id:
Expand Down
11 changes: 8 additions & 3 deletions proxy/providers/inference_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,14 @@ def __init__(self):

def _find_provider(self, model: str) -> InferenceProvider:
"""Find the primary provider for the given model"""
# Prioritize Targon for models it supports (avoid unnecessary Chutes attempts)
# if self.targon.supports_model(model) and self.targon.is_available():
# return self.targon
# Prioritize Targon for models it supports, 25% of the time
import random
if (
self.targon.supports_model(model)
and self.targon.is_available()
and random.random() < 0.25
):
return self.targon

for provider in self.providers:
if provider.supports_model(model) and provider.is_available():
Expand Down
Loading