-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy path4-custom-provider.js
More file actions
134 lines (116 loc) · 3.78 KB
/
4-custom-provider.js
File metadata and controls
134 lines (116 loc) · 3.78 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
/**
* Example 4: Custom Provider - Add Your Own LLM
*
* This example shows how to:
* - Register a custom LLM provider adapter
* - Set custom pricing
* - Track usage for self-hosted or new providers
*/
const {
createBudgetGuard,
patchGlobalFetch,
registerAdapter,
registerPricing,
getBudgetStatus
} = require("../dist/index.js");
// Step 1: Register custom adapter for your LLM provider
// Example: Ollama (self-hosted)
registerAdapter({
name: "ollama",
// Detect if response is from your provider
detect: (response) => {
return response &&
typeof response === "object" &&
response.model &&
response.prompt_eval_count !== undefined;
},
// Normalize response to standard format
normalize: (response) => {
return {
provider: "ollama",
model: response.model,
inputTokens: response.prompt_eval_count || 0,
outputTokens: response.eval_count || 0,
totalTokens: (response.prompt_eval_count || 0) + (response.eval_count || 0)
};
}
});
// Step 2: Register pricing for your models
// For self-hosted: use $0 but still track usage
registerPricing("ollama", "llama3.2", {
input: 0.0, // Free (self-hosted)
output: 0.0
});
// For paid custom providers: set actual costs
registerPricing("ollama", "custom-model", {
input: 0.5, // $0.50 per 1M input tokens
output: 1.0 // $1.00 per 1M output tokens
});
// Step 3: Set up tracking
createBudgetGuard({
monthlyLimit: 100,
mode: "warn"
});
patchGlobalFetch();
// Step 4: Use your custom provider
async function main() {
console.log("🔥 TokenFirewall - Custom Provider Example\n");
console.log("=".repeat(60) + "\n");
try {
console.log("📞 Calling custom Ollama provider...\n");
const response = await fetch("http://localhost:11434/api/generate", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
model: "llama3.2",
prompt: "Why is the sky blue?",
stream: false
})
});
const data = await response.json();
console.log("✅ Response:", data.response.substring(0, 100) + "...");
// Check budget status
const status = getBudgetStatus();
console.log("\n📊 Budget Status:");
console.log(` Spent: $${status.totalSpent.toFixed(4)}`);
console.log(` Usage tracked even for free models!`);
} catch (error) {
console.log("ℹ️ Make sure Ollama is running: ollama serve");
console.log(" Or replace with your own custom provider\n");
}
console.log("\n" + "=".repeat(60));
}
main();
// 💡 Key Points:
// - Works with ANY LLM provider (self-hosted, new APIs, etc.)
// - detect() identifies your provider's response format
// - normalize() converts to standard { provider, model, inputTokens, outputTokens }
// - Set pricing to $0 for free/self-hosted models
// - Usage is still tracked and logged even at $0 cost
// 🔧 Other Custom Provider Examples:
//
// Example: Hugging Face Inference API
// registerAdapter({
// name: "huggingface",
// detect: (response) => response?.model && response?.usage,
// normalize: (response) => ({
// provider: "huggingface",
// model: response.model,
// inputTokens: response.usage.prompt_tokens || 0,
// outputTokens: response.usage.completion_tokens || 0,
// totalTokens: response.usage.total_tokens || 0
// })
// });
//
// Example: Azure OpenAI (if different format)
// registerAdapter({
// name: "azure-openai",
// detect: (response) => response?.id && response?.object === "chat.completion",
// normalize: (response) => ({
// provider: "azure-openai",
// model: response.model,
// inputTokens: response.usage.prompt_tokens || 0,
// outputTokens: response.usage.completion_tokens || 0,
// totalTokens: response.usage.total_tokens || 0
// })
// });