99import logging
1010from collections import defaultdict
1111
12- DEFAULT_MAX_TOKENS = 8000
12+ DEFAULT_MAX_TOKENS = 4000
1313
1414
1515def set_all_loggers_to_ERROR ():
@@ -36,7 +36,7 @@ def set_all_loggers_to_ERROR():
3636
3737
3838class LLMProvider :
39- DEFAULT_MODEL = "gpt-3.5-turbo-1106 "
39+ DEFAULT_MODEL = "gpt-4o-mini "
4040 DEFAULT_MAX_TOKENS = 4000
4141 DEFAULT_TEMPERATURE = 0
4242 DEFAULT_MODEL_CONFIG = {"model" : DEFAULT_MODEL }
@@ -233,7 +233,12 @@ def is_inside_token_limit(self, PROMPT: str, percentage: float = 0.8) -> bool:
233233 {"role" : "user" , "content" : PROMPT },
234234 ]
235235 token_count = litellm .token_counter (model = self .model , messages = messages )
236- max_tokens = litellm .get_max_tokens (self .model )
236+ if token_count is None :
237+ token_count = litellm .token_counter (model = self .DEFAULT_MODEL , text = PROMPT )
238+ try :
239+ max_tokens = litellm .get_max_tokens (self .model )
240+ except Exception :
241+ max_tokens = DEFAULT_MAX_TOKENS
237242 if not max_tokens :
238243 max_tokens = DEFAULT_MAX_TOKENS
239244 return token_count <= max_tokens * percentage
@@ -243,7 +248,10 @@ def available_tokens(
243248 ) -> int :
244249 if not model :
245250 model = self .model
246- max_tokens = litellm .get_max_tokens (model )
251+ try :
252+ max_tokens = litellm .get_max_tokens (model )
253+ except Exception :
254+ max_tokens = DEFAULT_MAX_TOKENS
247255 used_tokens = litellm .token_counter (model = model , text = message )
248256 if max_tokens :
249257 return int (max_tokens * percentage ) - used_tokens
0 commit comments