@@ -561,6 +561,7 @@ def _request_with_payment(self, endpoint: str, body: Dict[str, Any]) -> ChatResp
561561 # Auto-retry on transient server errors
562562 if response .status_code in (502 , 503 ):
563563 import time
564+
564565 time .sleep (1 )
565566 response = self ._client .post (url , json = body , headers = req_headers )
566567
@@ -665,6 +666,7 @@ def _handle_payment_and_retry(
665666 )
666667 if retry_response .status_code in (502 , 503 ):
667668 import time
669+
668670 time .sleep (1 )
669671 retry_response = self ._client .post (
670672 url , json = body , headers = payment_headers , timeout = request_timeout
@@ -696,6 +698,7 @@ def _handle_payment_and_retry(
696698
697699 # Save full response locally (cost log + response archive)
698700 from .cache import save_to_cache
701+
699702 save_to_cache ("/v1/chat/completions" , body , response_data , cost_usd = cost_usd )
700703
701704 return chat_response
@@ -723,6 +726,7 @@ def _request_with_payment_raw(self, endpoint: str, body: Dict[str, Any]) -> Dict
723726 # Auto-retry on transient server errors
724727 if response .status_code in (502 , 503 ):
725728 import time
729+
726730 time .sleep (1 )
727731 response = self ._client .post (url , json = body , headers = req_headers )
728732
@@ -806,6 +810,7 @@ def _handle_payment_and_retry_raw(
806810 )
807811 if retry_response .status_code in (502 , 503 ):
808812 import time
813+
809814 time .sleep (1 )
810815 retry_response = self ._client .post (
811816 url , json = body , headers = payment_headers , timeout = self .timeout
@@ -1522,6 +1527,7 @@ async def _request_with_payment(self, endpoint: str, body: Dict[str, Any]) -> Ch
15221527 # Auto-retry on transient server errors
15231528 if response .status_code in (502 , 503 ):
15241529 import asyncio
1530+
15251531 await asyncio .sleep (1 )
15261532 response = await self ._client .post (url , json = body , headers = req_headers )
15271533
@@ -1605,6 +1611,7 @@ async def _handle_payment_and_retry(
16051611 )
16061612 if retry_response .status_code in (502 , 503 ):
16071613 import asyncio
1614+
16081615 await asyncio .sleep (1 )
16091616 retry_response = await self ._client .post (
16101617 url , json = body , headers = payment_headers , timeout = request_timeout
@@ -1631,11 +1638,16 @@ async def _handle_payment_and_retry(
16311638 price_info = resp_body .get ("price" , {})
16321639 except Exception :
16331640 pass
1634- cost_usd = float (price_info .get ("amount" , 0 )) if price_info else float (details .get ("amount" , 0 )) / 1e6
1641+ cost_usd = (
1642+ float (price_info .get ("amount" , 0 ))
1643+ if price_info
1644+ else float (details .get ("amount" , 0 )) / 1e6
1645+ )
16351646 self ._last_call_cost = cost_usd
16361647
16371648 response_data = retry_response .json ()
16381649 from .cache import save_to_cache
1650+
16391651 save_to_cache ("/v1/chat/completions" , body , response_data , cost_usd = cost_usd )
16401652
16411653 return ChatResponse (** response_data )
@@ -1659,6 +1671,7 @@ async def _request_with_payment_raw(
16591671 # Auto-retry on transient server errors
16601672 if response .status_code in (502 , 503 ):
16611673 import asyncio
1674+
16621675 await asyncio .sleep (1 )
16631676 response = await self ._client .post (url , json = body , headers = req_headers )
16641677
@@ -1733,6 +1746,7 @@ async def _handle_payment_and_retry_raw(
17331746 )
17341747 if retry_response .status_code in (502 , 503 ):
17351748 import asyncio
1749+
17361750 await asyncio .sleep (1 )
17371751 retry_response = await self ._client .post (
17381752 url , json = body , headers = payment_headers , timeout = self .timeout
0 commit comments