Skip to content

Commit 52df246

Browse files
authored
feat(ai): langchain cached and reasoning tokens (#258)
* fix: reasoning and cached tokens * test: new flows * fix: missing field * chore: bump * fix: make sure we send write/read/reasoning tokens
1 parent f1f9ecf commit 52df246

File tree

5 files changed

+328
-21
lines changed

5 files changed

+328
-21
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
## 4.9.0 - 2025-06-13
2+
3+
- feat(ai): track reasoning and cache tokens in the LangChain callback
4+
15
## 4.8.0 - 2025-06-10
26

37
- fix: export scoped, rather than tracked, decorator

posthog/ai/langchain/callbacks.py

Lines changed: 62 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
List,
1515
Optional,
1616
Sequence,
17-
Tuple,
1817
Union,
1918
cast,
2019
)
@@ -569,9 +568,14 @@ def _capture_generation(
569568
event_properties["$ai_is_error"] = True
570569
else:
571570
# Add usage
572-
input_tokens, output_tokens = _parse_usage(output)
573-
event_properties["$ai_input_tokens"] = input_tokens
574-
event_properties["$ai_output_tokens"] = output_tokens
571+
usage = _parse_usage(output)
572+
event_properties["$ai_input_tokens"] = usage.input_tokens
573+
event_properties["$ai_output_tokens"] = usage.output_tokens
574+
event_properties["$ai_cache_creation_input_tokens"] = (
575+
usage.cache_write_tokens
576+
)
577+
event_properties["$ai_cache_read_input_tokens"] = usage.cache_read_tokens
578+
event_properties["$ai_reasoning_tokens"] = usage.reasoning_tokens
575579

576580
# Generation results
577581
generation_result = output.generations[-1]
@@ -647,25 +651,42 @@ def _convert_message_to_dict(message: BaseMessage) -> Dict[str, Any]:
647651
return message_dict
648652

649653

654+
@dataclass
655+
class ModelUsage:
656+
input_tokens: Optional[int]
657+
output_tokens: Optional[int]
658+
cache_write_tokens: Optional[int]
659+
cache_read_tokens: Optional[int]
660+
reasoning_tokens: Optional[int]
661+
662+
650663
def _parse_usage_model(
651-
usage: Union[BaseModel, Dict],
652-
) -> Tuple[Union[int, None], Union[int, None]]:
664+
usage: Union[BaseModel, dict],
665+
) -> ModelUsage:
653666
if isinstance(usage, BaseModel):
654667
usage = usage.__dict__
655668

656669
conversion_list = [
657670
# https://pypi.org/project/langchain-anthropic/ (works also for Bedrock-Anthropic)
658671
("input_tokens", "input"),
659672
("output_tokens", "output"),
673+
("cache_creation_input_tokens", "cache_write"),
674+
("cache_read_input_tokens", "cache_read"),
660675
# https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/get-token-count
661676
("prompt_token_count", "input"),
662677
("candidates_token_count", "output"),
678+
("cached_content_token_count", "cache_read"),
679+
("thoughts_token_count", "reasoning"),
663680
# Bedrock: https://docs.aws.amazon.com/bedrock/latest/userguide/monitoring-cw.html#runtime-cloudwatch-metrics
664681
("inputTokenCount", "input"),
665682
("outputTokenCount", "output"),
683+
("cacheCreationInputTokenCount", "cache_write"),
684+
("cacheReadInputTokenCount", "cache_read"),
666685
# Bedrock Anthropic
667686
("prompt_tokens", "input"),
668687
("completion_tokens", "output"),
688+
("cache_creation_input_tokens", "cache_write"),
689+
("cache_read_input_tokens", "cache_read"),
669690
# langchain-ibm https://pypi.org/project/langchain-ibm/
670691
("input_token_count", "input"),
671692
("generated_token_count", "output"),
@@ -683,13 +704,45 @@ def _parse_usage_model(
683704

684705
parsed_usage[type_key] = final_count
685706

686-
return parsed_usage.get("input"), parsed_usage.get("output")
707+
# Caching (OpenAI & langchain 0.3.9+)
708+
if "input_token_details" in usage and isinstance(
709+
usage["input_token_details"], dict
710+
):
711+
parsed_usage["cache_write"] = usage["input_token_details"].get("cache_creation")
712+
parsed_usage["cache_read"] = usage["input_token_details"].get("cache_read")
713+
714+
# Reasoning (OpenAI & langchain 0.3.9+)
715+
if "output_token_details" in usage and isinstance(
716+
usage["output_token_details"], dict
717+
):
718+
parsed_usage["reasoning"] = usage["output_token_details"].get("reasoning")
719+
720+
field_mapping = {
721+
"input": "input_tokens",
722+
"output": "output_tokens",
723+
"cache_write": "cache_write_tokens",
724+
"cache_read": "cache_read_tokens",
725+
"reasoning": "reasoning_tokens",
726+
}
727+
return ModelUsage(
728+
**{
729+
dataclass_key: parsed_usage.get(mapped_key) or 0
730+
for mapped_key, dataclass_key in field_mapping.items()
731+
},
732+
)
687733

688734

689-
def _parse_usage(response: LLMResult):
735+
def _parse_usage(response: LLMResult) -> ModelUsage:
690736
# langchain-anthropic uses the usage field
691737
llm_usage_keys = ["token_usage", "usage"]
692-
llm_usage: Tuple[Union[int, None], Union[int, None]] = (None, None)
738+
llm_usage: ModelUsage = ModelUsage(
739+
input_tokens=None,
740+
output_tokens=None,
741+
cache_write_tokens=None,
742+
cache_read_tokens=None,
743+
reasoning_tokens=None,
744+
)
745+
693746
if response.llm_output is not None:
694747
for key in llm_usage_keys:
695748
if response.llm_output.get(key):

0 commit comments

Comments
 (0)