diff --git a/proxy/main.py b/proxy/main.py index c0395a54..a0f10d38 100644 --- a/proxy/main.py +++ b/proxy/main.py @@ -253,27 +253,6 @@ async def inference_endpoint(request: InferenceRequest, http_request: Request): # request.model = "deepseek-ai/DeepSeek-V3-0324" try: - # Don't log this stuff it provides no value - - # Log only the last incoming message to avoid flooding the console - # if request.messages: - # last_msg = request.messages[-1] - # snippet = (last_msg.content[:300] + "…") if last_msg.content and len(last_msg.content) > 300 else last_msg.content - # logger.info( - # "Inference request | model=%s | run_id=%s | total_msgs=%d | last_role=%s | last_preview=%s", - # request.model, - # request.run_id, - # len(request.messages), - # last_msg.role, - # snippet, - # ) - # else: - # logger.info( - # "Inference request | model=%s | run_id=%s | total_msgs=0", - # request.model, - # request.run_id, - # ) - if ENV != 'dev': # Production mode - run_id is required if not request.run_id: diff --git a/proxy/providers/inference_manager.py b/proxy/providers/inference_manager.py index fb24ff48..b9fee9cf 100644 --- a/proxy/providers/inference_manager.py +++ b/proxy/providers/inference_manager.py @@ -33,9 +33,14 @@ def __init__(self): def _find_provider(self, model: str) -> InferenceProvider: """Find the primary provider for the given model""" - # Prioritize Targon for models it supports (avoid unnecessary Chutes attempts) - # if self.targon.supports_model(model) and self.targon.is_available(): - # return self.targon + # Prioritize Targon for models it supports, 25% of the time + import random + if ( + self.targon.supports_model(model) + and self.targon.is_available() + and random.random() < 0.25 + ): + return self.targon for provider in self.providers: if provider.supports_model(model) and provider.is_available():