1414 List ,
1515 Optional ,
1616 Sequence ,
17- Tuple ,
1817 Union ,
1918 cast ,
2019)
@@ -569,9 +568,14 @@ def _capture_generation(
569568 event_properties ["$ai_is_error" ] = True
570569 else :
571570 # Add usage
572- input_tokens , output_tokens = _parse_usage (output )
573- event_properties ["$ai_input_tokens" ] = input_tokens
574- event_properties ["$ai_output_tokens" ] = output_tokens
571+ usage = _parse_usage (output )
572+ event_properties ["$ai_input_tokens" ] = usage .input_tokens
573+ event_properties ["$ai_output_tokens" ] = usage .output_tokens
574+ event_properties ["$ai_cache_creation_input_tokens" ] = (
575+ usage .cache_write_tokens
576+ )
577+ event_properties ["$ai_cache_read_input_tokens" ] = usage .cache_read_tokens
578+ event_properties ["$ai_reasoning_tokens" ] = usage .reasoning_tokens
575579
576580 # Generation results
577581 generation_result = output .generations [- 1 ]
@@ -647,25 +651,42 @@ def _convert_message_to_dict(message: BaseMessage) -> Dict[str, Any]:
647651 return message_dict
648652
649653
654+ @dataclass
655+ class ModelUsage :
656+ input_tokens : Optional [int ]
657+ output_tokens : Optional [int ]
658+ cache_write_tokens : Optional [int ]
659+ cache_read_tokens : Optional [int ]
660+ reasoning_tokens : Optional [int ]
661+
662+
650663def _parse_usage_model (
651- usage : Union [BaseModel , Dict ],
652- ) -> Tuple [ Union [ int , None ], Union [ int , None ]] :
664+ usage : Union [BaseModel , dict ],
665+ ) -> ModelUsage :
653666 if isinstance (usage , BaseModel ):
654667 usage = usage .__dict__
655668
656669 conversion_list = [
657670 # https://pypi.org/project/langchain-anthropic/ (works also for Bedrock-Anthropic)
658671 ("input_tokens" , "input" ),
659672 ("output_tokens" , "output" ),
673+ ("cache_creation_input_tokens" , "cache_write" ),
674+ ("cache_read_input_tokens" , "cache_read" ),
660675 # https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/get-token-count
661676 ("prompt_token_count" , "input" ),
662677 ("candidates_token_count" , "output" ),
678+ ("cached_content_token_count" , "cache_read" ),
679+ ("thoughts_token_count" , "reasoning" ),
663680 # Bedrock: https://docs.aws.amazon.com/bedrock/latest/userguide/monitoring-cw.html#runtime-cloudwatch-metrics
664681 ("inputTokenCount" , "input" ),
665682 ("outputTokenCount" , "output" ),
683+ ("cacheCreationInputTokenCount" , "cache_write" ),
684+ ("cacheReadInputTokenCount" , "cache_read" ),
666685 # Bedrock Anthropic
667686 ("prompt_tokens" , "input" ),
668687 ("completion_tokens" , "output" ),
688+ ("cache_creation_input_tokens" , "cache_write" ),
689+ ("cache_read_input_tokens" , "cache_read" ),
669690 # langchain-ibm https://pypi.org/project/langchain-ibm/
670691 ("input_token_count" , "input" ),
671692 ("generated_token_count" , "output" ),
@@ -683,13 +704,45 @@ def _parse_usage_model(
683704
684705 parsed_usage [type_key ] = final_count
685706
686- return parsed_usage .get ("input" ), parsed_usage .get ("output" )
707+ # Caching (OpenAI & langchain 0.3.9+)
708+ if "input_token_details" in usage and isinstance (
709+ usage ["input_token_details" ], dict
710+ ):
711+ parsed_usage ["cache_write" ] = usage ["input_token_details" ].get ("cache_creation" )
712+ parsed_usage ["cache_read" ] = usage ["input_token_details" ].get ("cache_read" )
713+
714+ # Reasoning (OpenAI & langchain 0.3.9+)
715+ if "output_token_details" in usage and isinstance (
716+ usage ["output_token_details" ], dict
717+ ):
718+ parsed_usage ["reasoning" ] = usage ["output_token_details" ].get ("reasoning" )
719+
720+ field_mapping = {
721+ "input" : "input_tokens" ,
722+ "output" : "output_tokens" ,
723+ "cache_write" : "cache_write_tokens" ,
724+ "cache_read" : "cache_read_tokens" ,
725+ "reasoning" : "reasoning_tokens" ,
726+ }
727+ return ModelUsage (
728+ ** {
729+ dataclass_key : parsed_usage .get (mapped_key ) or 0
730+ for mapped_key , dataclass_key in field_mapping .items ()
731+ },
732+ )
687733
688734
689- def _parse_usage (response : LLMResult ):
735+ def _parse_usage (response : LLMResult ) -> ModelUsage :
690736 # langchain-anthropic uses the usage field
691737 llm_usage_keys = ["token_usage" , "usage" ]
692- llm_usage : Tuple [Union [int , None ], Union [int , None ]] = (None , None )
738+ llm_usage : ModelUsage = ModelUsage (
739+ input_tokens = None ,
740+ output_tokens = None ,
741+ cache_write_tokens = None ,
742+ cache_read_tokens = None ,
743+ reasoning_tokens = None ,
744+ )
745+
693746 if response .llm_output is not None :
694747 for key in llm_usage_keys :
695748 if response .llm_output .get (key ):
0 commit comments