diff --git a/internal/providers/configs/openrouter.json b/internal/providers/configs/openrouter.json index ec8b3fc2..741e3e7c 100644 --- a/internal/providers/configs/openrouter.json +++ b/internal/providers/configs/openrouter.json @@ -190,7 +190,7 @@ "cost_per_1m_in_cached": 3.75, "cost_per_1m_out_cached": 0.3, "context_window": 200000, - "default_max_tokens": 32000, + "default_max_tokens": 64000, "can_reason": true, "reasoning_levels": [ "low", @@ -731,7 +731,7 @@ "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 1048576, - "default_max_tokens": 32768, + "default_max_tokens": 32767, "can_reason": true, "reasoning_levels": [ "low", @@ -1437,10 +1437,10 @@ { "id": "moonshotai/kimi-k2", "name": "MoonshotAI: Kimi K2 0711", - "cost_per_1m_in": 0.5, - "cost_per_1m_out": 2.4, + "cost_per_1m_in": 0.6, + "cost_per_1m_out": 2.5, "cost_per_1m_in_cached": 0, - "cost_per_1m_out_cached": 0, + "cost_per_1m_out_cached": 0.15, "context_window": 131072, "default_max_tokens": 13107, "can_reason": false, @@ -1760,9 +1760,9 @@ "cost_per_1m_in": 0.09999999999999999, "cost_per_1m_out": 0.39999999999999997, "cost_per_1m_in_cached": 0, - "cost_per_1m_out_cached": 0.024999999999999998, + "cost_per_1m_out_cached": 0.03, "context_window": 1047576, - "default_max_tokens": 16384, + "default_max_tokens": 104757, "can_reason": false, "supports_attachments": true, "options": {} @@ -3056,7 +3056,7 @@ "id": "alibaba/tongyi-deepresearch-30b-a3b", "name": "Tongyi DeepResearch 30B A3B", "cost_per_1m_in": 0.09, - "cost_per_1m_out": 0.44999999999999996, + "cost_per_1m_out": 0.39999999999999997, "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 131072, @@ -3242,9 +3242,9 @@ "cost_per_1m_in": 0.3, "cost_per_1m_out": 0.8999999999999999, "cost_per_1m_in_cached": 0, - "cost_per_1m_out_cached": 0, + "cost_per_1m_out_cached": 0.055, "context_window": 131072, - "default_max_tokens": 13107, + "default_max_tokens": 16384, "can_reason": true, "reasoning_levels": [ "low", @@ -3284,10 +3284,10 @@ { "id": "x-ai/grok-3-mini", "name": "xAI: Grok 3 Mini", - "cost_per_1m_in": 0.3, - "cost_per_1m_out": 0.5, + "cost_per_1m_in": 0.6, + "cost_per_1m_out": 4, "cost_per_1m_in_cached": 0, - "cost_per_1m_out_cached": 0.075, + "cost_per_1m_out_cached": 0.15, "context_window": 131072, "default_max_tokens": 13107, "can_reason": true, @@ -3303,10 +3303,10 @@ { "id": "x-ai/grok-3-mini-beta", "name": "xAI: Grok 3 Mini Beta", - "cost_per_1m_in": 0.3, - "cost_per_1m_out": 0.5, + "cost_per_1m_in": 0.6, + "cost_per_1m_out": 4, "cost_per_1m_in_cached": 0, - "cost_per_1m_out_cached": 0.075, + "cost_per_1m_out_cached": 0.15, "context_window": 131072, "default_max_tokens": 13107, "can_reason": true, diff --git a/internal/providers/configs/vultr.json b/internal/providers/configs/vultr.json new file mode 100644 index 00000000..02b33293 --- /dev/null +++ b/internal/providers/configs/vultr.json @@ -0,0 +1,53 @@ +{ + "name": "Vultr", + "id": "vultr", + "type": "openai-compat", + "api_key": "$VULTR_API_KEY", + "api_endpoint": "https://api.vultrinference.com/", + "default_large_model_id": "kimi-k2-instruct", + "default_small_model_id": "qwen2.5-32b-instruct", + "models": [ + { + "id": "kimi-k2-instruct", + "name": "Kimi K2 Instruct", + "cost_per_1m_in": 1, + "cost_per_1m_out": 3, + "context_window": 262144, + "default_max_tokens": 8192, + "can_reason": true + }, + { + "id": "llama-3.1-70b-instruct-fp8", + "name": "Llama 3.1 70B Instruct FP8", + "cost_per_1m_in": 0.35, + "cost_per_1m_out": 0.35, + "context_window": 131072, + "default_max_tokens": 4096 + }, + { + "id": "mistral-7b-v0.3", + "name": "Mistral 7B v0.3", + "cost_per_1m_in": 0.05, + "cost_per_1m_out": 0.05, + "context_window": 32768, + "default_max_tokens": 4096 + }, + { + "id": "deepseek-r1", + "name": "DeepSeek R1", + "cost_per_1m_in": 0.55, + "cost_per_1m_out": 2.19, + "context_window": 128000, + "default_max_tokens": 4096, + "can_reason": true + }, + { + "id": "qwen2.5-32b-instruct", + "name": "Qwen 2.5 32B Instruct", + "cost_per_1m_in": 0.14, + "cost_per_1m_out": 0.42, + "context_window": 32768, + "default_max_tokens": 4096 + } + ] +} diff --git a/internal/providers/providers.go b/internal/providers/providers.go index d5299c49..5cf74309 100644 --- a/internal/providers/providers.go +++ b/internal/providers/providers.go @@ -63,6 +63,8 @@ var aiHubMixConfig []byte //go:embed configs/kimi.json var kimiCodingConfig []byte +//go:embed configs/vultr.json +var vultrConfig []byte //go:embed configs/copilot.json var copilotConfig []byte @@ -87,6 +89,7 @@ var providerRegistry = []ProviderFunc{ deepSeekProvider, huggingFaceProvider, aiHubMixProvider, + vultrProvider, syntheticProvider, copilotProvider, } @@ -181,6 +184,8 @@ func kimiCodingProvider() catwalk.Provider { return loadProviderFromConfig(kimiCodingConfig) } +func vultrProvider() catwalk.Provider { + return loadProviderFromConfig(vultrConfig) func copilotProvider() catwalk.Provider { return loadProviderFromConfig(copilotConfig) }