@@ -41,7 +41,16 @@ function log(...args) {
4141}
4242const DEFAULT_MAX_TOKENS = 4096 ;
4343// Per-model last output tokens for adaptive max_tokens (avoids cross-request pollution)
44+ const MAX_TRACKED_MODELS = 50 ;
4445const lastOutputByModel = new Map ( ) ;
46+ function trackOutputTokens ( model , tokens ) {
47+ if ( lastOutputByModel . size >= MAX_TRACKED_MODELS ) {
48+ const firstKey = lastOutputByModel . keys ( ) . next ( ) . value ;
49+ if ( firstKey )
50+ lastOutputByModel . delete ( firstKey ) ;
51+ }
52+ lastOutputByModel . set ( model , tokens ) ;
53+ }
4554// Model shortcuts for quick switching
4655const MODEL_SHORTCUTS = {
4756 // Routing profiles
@@ -388,7 +397,7 @@ export function createProxy(options) {
388397 const inputMatch = fullResponse . match ( / " i n p u t _ t o k e n s " \s * : \s * ( \d + ) / ) ;
389398 if ( lastOutputMatch ) {
390399 const outputTokens = parseInt ( lastOutputMatch [ 1 ] , 10 ) ;
391- lastOutputByModel . set ( finalModel , outputTokens ) ;
400+ trackOutputTokens ( finalModel , outputTokens ) ;
392401 const inputTokens = inputMatch
393402 ? parseInt ( inputMatch [ 1 ] , 10 )
394403 : 0 ;
@@ -419,7 +428,7 @@ export function createProxy(options) {
419428 const parsed = JSON . parse ( text ) ;
420429 if ( parsed . usage ?. output_tokens ) {
421430 const outputTokens = parsed . usage . output_tokens ;
422- lastOutputByModel . set ( finalModel , outputTokens ) ;
431+ trackOutputTokens ( finalModel , outputTokens ) ;
423432 const inputTokens = parsed . usage ?. input_tokens || 0 ;
424433 const latencyMs = Date . now ( ) - requestStartTime ;
425434 const cost = estimateCost ( finalModel , inputTokens , outputTokens ) ;
0 commit comments