diff --git a/Taskfile.yaml b/Taskfile.yaml index 9bb3f6ec..1da403fe 100644 --- a/Taskfile.yaml +++ b/Taskfile.yaml @@ -61,6 +61,7 @@ tasks: - task: gen:huggingface - task: gen:openrouter - task: gen:synthetic + - task: gen:venice gen:copilot: desc: Generate copilot provider configurations @@ -81,3 +82,8 @@ tasks: desc: Generate synthetic provider configurations cmds: - go run cmd/synthetic/main.go + + gen:venice: + desc: Generate venice provider configurations + cmds: + - go run cmd/venice/main.go diff --git a/cmd/venice/main.go b/cmd/venice/main.go new file mode 100644 index 00000000..c80f7141 --- /dev/null +++ b/cmd/venice/main.go @@ -0,0 +1,272 @@ +// Package main provides a command-line tool to fetch models from Venice +// and generate a configuration file for the provider. +package main + +import ( + "context" + "encoding/json" + "fmt" + "io" + "log" + "math" + "net/http" + "os" + "slices" + "strings" + "time" + + "github.com/charmbracelet/catwalk/pkg/catwalk" +) + +type ModelsResponse struct { + Data []VeniceModel `json:"data"` +} + +type VeniceModel struct { + Created int64 `json:"created"` + ID string `json:"id"` + ModelSpec VeniceModelSpec `json:"model_spec"` + Object string `json:"object"` + OwnedBy string `json:"owned_by"` + Type string `json:"type"` +} + +type VeniceModelSpec struct { + AvailableContextTokens int64 `json:"availableContextTokens"` + Capabilities VeniceModelCapabilities `json:"capabilities"` + Constraints VeniceModelConstraints `json:"constraints"` + Name string `json:"name"` + ModelSource string `json:"modelSource"` + Offline bool `json:"offline"` + Pricing VeniceModelPricing `json:"pricing"` + Traits []string `json:"traits"` + Beta bool `json:"beta"` +} + +type VeniceModelCapabilities struct { + OptimizedForCode bool `json:"optimizedForCode"` + Quantization string `json:"quantization"` + SupportsFunctionCalling bool `json:"supportsFunctionCalling"` + SupportsReasoning bool `json:"supportsReasoning"` + SupportsResponseSchema bool `json:"supportsResponseSchema"` + SupportsVision bool `json:"supportsVision"` + SupportsWebSearch bool `json:"supportsWebSearch"` + SupportsLogProbs bool `json:"supportsLogProbs"` +} + +type VeniceModelConstraints struct { + Temperature *VeniceDefaultFloat `json:"temperature"` + TopP *VeniceDefaultFloat `json:"top_p"` +} + +type VeniceDefaultFloat struct { + Default float64 `json:"default"` +} + +type VeniceModelPricing struct { + Input VeniceModelPricingValue `json:"input"` + Output VeniceModelPricingValue `json:"output"` +} + +type VeniceModelPricingValue struct { + USD float64 `json:"usd"` + Diem float64 `json:"diem"` +} + +func fetchVeniceModels(apiEndpoint string) (*ModelsResponse, error) { + client := &http.Client{Timeout: 30 * time.Second} + url := strings.TrimRight(apiEndpoint, "/") + "/models" + req, _ := http.NewRequestWithContext(context.Background(), "GET", url, nil) + req.Header.Set("User-Agent", "Crush-Client/1.0") + + if apiKey := strings.TrimSpace(os.Getenv("VENICE_API_KEY")); apiKey != "" && !strings.HasPrefix(apiKey, "$") { + req.Header.Set("Authorization", "Bearer "+apiKey) + } + + resp, err := client.Do(req) + if err != nil { + return nil, err //nolint:wrapcheck + } + defer resp.Body.Close() //nolint:errcheck + if resp.StatusCode != 200 { + body, _ := io.ReadAll(resp.Body) + return nil, fmt.Errorf("status %d: %s", resp.StatusCode, body) + } + + var mr ModelsResponse + if err := json.NewDecoder(resp.Body).Decode(&mr); err != nil { + return nil, err //nolint:wrapcheck + } + return &mr, nil +} + +func minInt64(a, b int64) int64 { + if a < b { + return a + } + return b +} + +func maxInt64(a, b int64) int64 { + if a > b { + return a + } + return b +} + +func bestLargeModelID(models []catwalk.Model) string { + var best *catwalk.Model + for i := range models { + m := &models[i] + + if best == nil { + best = m + continue + } + mCost := m.CostPer1MIn + m.CostPer1MOut + bestCost := best.CostPer1MIn + best.CostPer1MOut + if mCost > bestCost { + best = m + continue + } + if mCost == bestCost && m.ContextWindow > best.ContextWindow { + best = m + } + } + if best == nil { + return "" + } + return best.ID +} + +func bestSmallModelID(models []catwalk.Model) string { + var best *catwalk.Model + for i := range models { + m := &models[i] + if best == nil { + best = m + continue + } + mCost := m.CostPer1MIn + m.CostPer1MOut + bestCost := best.CostPer1MIn + best.CostPer1MOut + if mCost < bestCost { + best = m + continue + } + if mCost == bestCost && m.ContextWindow < best.ContextWindow { + best = m + } + } + if best == nil { + return "" + } + return best.ID +} + +func main() { + veniceProvider := catwalk.Provider{ + Name: "Venice AI", + ID: catwalk.InferenceProviderVenice, + APIKey: "$VENICE_API_KEY", + APIEndpoint: "https://api.venice.ai/api/v1", + Type: catwalk.TypeOpenAICompat, + Models: []catwalk.Model{}, + } + + codeOptimizedModels := []catwalk.Model{} + + modelsResp, err := fetchVeniceModels(veniceProvider.APIEndpoint) + if err != nil { + log.Fatal("Error fetching Venice models:", err) + } + + for _, model := range modelsResp.Data { + if strings.ToLower(model.Type) != "text" { + continue + } + if model.ModelSpec.Offline { + continue + } + if !model.ModelSpec.Capabilities.SupportsFunctionCalling { + continue + } + + if model.ModelSpec.Beta { + continue + } + + contextWindow := model.ModelSpec.AvailableContextTokens + if contextWindow <= 0 { + continue + } + + defaultMaxTokens := minInt64(contextWindow/4, 32768) + defaultMaxTokens = maxInt64(defaultMaxTokens, 2048) + + canReason := model.ModelSpec.Capabilities.SupportsReasoning + var reasoningLevels []string + var defaultReasoning string + if canReason { + reasoningLevels = []string{"low", "medium", "high"} + defaultReasoning = "medium" + } + + options := catwalk.ModelOptions{} + if model.ModelSpec.Constraints.Temperature != nil { + v := model.ModelSpec.Constraints.Temperature.Default + if !math.IsNaN(v) { + options.Temperature = &v + } + } + if model.ModelSpec.Constraints.TopP != nil { + v := model.ModelSpec.Constraints.TopP.Default + if !math.IsNaN(v) { + options.TopP = &v + } + } + + m := catwalk.Model{ + ID: model.ID, + Name: model.ModelSpec.Name, + CostPer1MIn: model.ModelSpec.Pricing.Input.USD, + CostPer1MOut: model.ModelSpec.Pricing.Output.USD, + CostPer1MInCached: 0, + CostPer1MOutCached: 0, + ContextWindow: contextWindow, + DefaultMaxTokens: defaultMaxTokens, + CanReason: canReason, + ReasoningLevels: reasoningLevels, + DefaultReasoningEffort: defaultReasoning, + SupportsImages: model.ModelSpec.Capabilities.SupportsVision, + Options: options, + } + + veniceProvider.Models = append(veniceProvider.Models, m) + if model.ModelSpec.Capabilities.OptimizedForCode { + codeOptimizedModels = append(codeOptimizedModels, m) + } + } + + candidateModels := veniceProvider.Models + if len(codeOptimizedModels) > 0 { + candidateModels = codeOptimizedModels + } + + veniceProvider.DefaultLargeModelID = bestLargeModelID(candidateModels) + veniceProvider.DefaultSmallModelID = bestSmallModelID(candidateModels) + + slices.SortFunc(veniceProvider.Models, func(a catwalk.Model, b catwalk.Model) int { + return strings.Compare(a.Name, b.Name) + }) + + data, err := json.MarshalIndent(veniceProvider, "", " ") + if err != nil { + log.Fatal("Error marshaling Venice provider:", err) + } + + if err := os.WriteFile("internal/providers/configs/venice.json", data, 0o600); err != nil { + log.Fatal("Error writing Venice provider config:", err) + } + + fmt.Printf("Generated venice.json with %d models\n", len(veniceProvider.Models)) +} diff --git a/internal/providers/configs/venice.json b/internal/providers/configs/venice.json index 145d965d..71abbd96 100644 --- a/internal/providers/configs/venice.json +++ b/internal/providers/configs/venice.json @@ -1,47 +1,175 @@ { "name": "Venice AI", "id": "venice", - "type": "openai-compat", "api_key": "$VENICE_API_KEY", "api_endpoint": "https://api.venice.ai/api/v1", - "default_large_model_id": "qwen3-235b:strip_thinking_response=true", - "default_small_model_id": "mistral-31-24b", + "type": "openai-compat", + "default_large_model_id": "claude-opus-45", + "default_small_model_id": "qwen3-coder-480b-a35b-instruct", "models": [ { - "id": "qwen3-235b:strip_thinking_response=true", - "name": "Venice Large (qwen3-235b)", - "cost_per_1m_in": 1.5, - "cost_per_1m_out": 6, + "id": "claude-opus-45", + "name": "Claude Opus 4.5", + "cost_per_1m_in": 6, + "cost_per_1m_out": 30, "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, - "context_window": 131072, - "default_max_tokens": 50000, + "context_window": 202752, + "default_max_tokens": 32768, "can_reason": true, - "supports_attachments": false + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": true, + "options": { + "temperature": 0.7, + "top_p": 0.9 + } }, { - "id": "qwen3-4b:strip_thinking_response=true", - "name": "Venice Small (qwen3-4b)", - "cost_per_1m_in": 0.15, - "cost_per_1m_out": 0.6, + "id": "zai-org-glm-4.6", + "name": "GLM 4.6", + "cost_per_1m_in": 0.85, + "cost_per_1m_out": 2.75, "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, - "context_window": 32768, - "default_max_tokens": 25000, + "context_window": 202752, + "default_max_tokens": 32768, + "can_reason": false, + "supports_attachments": false, + "options": { + "temperature": 0.7, + "top_p": 0.9 + } + }, + { + "id": "openai-gpt-52", + "name": "GPT-5.2", + "cost_per_1m_in": 2.19, + "cost_per_1m_out": 17.5, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 262144, + "default_max_tokens": 32768, "can_reason": true, - "supports_attachments": false + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false, + "options": { + "temperature": 0.7, + "top_p": 0.9 + } }, { - "id": "mistral-31-24b", - "name": "Venice Medium (mistral-31-24b)", - "cost_per_1m_in": 0.5, - "cost_per_1m_out": 2, + "id": "gemini-3-flash-preview", + "name": "Gemini 3 Flash Preview", + "cost_per_1m_in": 0.7, + "cost_per_1m_out": 3.75, "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, - "context_window": 131072, - "default_max_tokens": 50000, + "context_window": 262144, + "default_max_tokens": 32768, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": true, + "options": { + "temperature": 1, + "top_p": 0.95 + } + }, + { + "id": "gemini-3-pro-preview", + "name": "Gemini 3 Pro Preview", + "cost_per_1m_in": 2.5, + "cost_per_1m_out": 15, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 202752, + "default_max_tokens": 32768, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": true, + "options": { + "temperature": 0.7, + "top_p": 0.8 + } + }, + { + "id": "google-gemma-3-27b-it", + "name": "Google Gemma 3 27B Instruct", + "cost_per_1m_in": 0.12, + "cost_per_1m_out": 0.2, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 202752, + "default_max_tokens": 32768, "can_reason": false, - "supports_attachments": true + "supports_attachments": true, + "options": { + "temperature": 0.7, + "top_p": 0.9 + } + }, + { + "id": "grok-41-fast", + "name": "Grok 4.1 Fast", + "cost_per_1m_in": 0.5, + "cost_per_1m_out": 1.25, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 262144, + "default_max_tokens": 32768, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": true, + "options": { + "temperature": 0.7, + "top_p": 0.8 + } + }, + { + "id": "kimi-k2-thinking", + "name": "Kimi K2 Thinking", + "cost_per_1m_in": 0.75, + "cost_per_1m_out": 3.2, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 262144, + "default_max_tokens": 32768, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false, + "options": { + "temperature": 0.7, + "top_p": 0.8 + } }, { "id": "llama-3.2-3b", @@ -51,9 +179,13 @@ "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, "context_window": 131072, - "default_max_tokens": 25000, + "default_max_tokens": 32768, "can_reason": false, - "supports_attachments": false + "supports_attachments": false, + "options": { + "temperature": 0.6, + "top_p": 0.95 + } }, { "id": "llama-3.3-70b", @@ -62,10 +194,138 @@ "cost_per_1m_out": 2.8, "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, - "context_window": 65536, - "default_max_tokens": 32000, + "context_window": 131072, + "default_max_tokens": 32768, + "can_reason": false, + "supports_attachments": false, + "options": { + "temperature": 0.6, + "top_p": 0.95 + } + }, + { + "id": "openai-gpt-oss-120b", + "name": "OpenAI GPT OSS 120B", + "cost_per_1m_in": 0.07, + "cost_per_1m_out": 0.3, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 131072, + "default_max_tokens": 32768, + "can_reason": false, + "supports_attachments": false, + "options": { + "temperature": 0.7, + "top_p": 0.9 + } + }, + { + "id": "qwen3-235b-a22b-instruct-2507", + "name": "Qwen 3 235B A22B Instruct 2507", + "cost_per_1m_in": 0.15, + "cost_per_1m_out": 0.75, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 131072, + "default_max_tokens": 32768, + "can_reason": false, + "supports_attachments": false, + "options": { + "temperature": 0.7, + "top_p": 0.8 + } + }, + { + "id": "qwen3-235b-a22b-thinking-2507", + "name": "Qwen 3 235B A22B Thinking 2507", + "cost_per_1m_in": 0.45, + "cost_per_1m_out": 3.5, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 131072, + "default_max_tokens": 32768, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false, + "options": { + "temperature": 0.6, + "top_p": 0.95 + } + }, + { + "id": "qwen3-coder-480b-a35b-instruct", + "name": "Qwen 3 Coder 480b", + "cost_per_1m_in": 0.75, + "cost_per_1m_out": 3, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 262144, + "default_max_tokens": 32768, "can_reason": false, - "supports_attachments": false + "supports_attachments": false, + "options": { + "temperature": 0.7, + "top_p": 0.8 + } + }, + { + "id": "qwen3-next-80b", + "name": "Qwen 3 Next 80b", + "cost_per_1m_in": 0.35, + "cost_per_1m_out": 1.9, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 262144, + "default_max_tokens": 32768, + "can_reason": false, + "supports_attachments": false, + "options": { + "temperature": 0.7, + "top_p": 0.8 + } + }, + { + "id": "mistral-31-24b", + "name": "Venice Medium", + "cost_per_1m_in": 0.5, + "cost_per_1m_out": 2, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 131072, + "default_max_tokens": 32768, + "can_reason": false, + "supports_attachments": true, + "options": { + "temperature": 0.15, + "top_p": 1 + } + }, + { + "id": "qwen3-4b", + "name": "Venice Small", + "cost_per_1m_in": 0.05, + "cost_per_1m_out": 0.15, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 32768, + "default_max_tokens": 8192, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false, + "options": { + "temperature": 0.6, + "top_p": 0.95 + } } ] -} +} \ No newline at end of file