diff --git a/logfire/_internal/integrations/google_genai.py b/logfire/_internal/integrations/google_genai.py index 368618af1..80d4e0055 100644 --- a/logfire/_internal/integrations/google_genai.py +++ b/logfire/_internal/integrations/google_genai.py @@ -67,6 +67,37 @@ def default_json(x: Any) -> str: return base64.b64encode(x).decode('utf-8') if isinstance(x, bytes) else x +def _strip_cycles(obj: Any, _seen: set[int] | None = None) -> Any: + """Return a copy of ``obj`` with any container cycles replaced by ``safe_repr``. + + ``json.dumps`` raises ``ValueError: Circular reference detected`` when a dict/list + contains itself anywhere in its descendants. This can happen when upstream + instrumentation captures Gemini SDK objects (e.g. an uploaded ``File``) whose + ``_to_dict`` representation contains self-references. See pydantic/logfire#1881. + """ + if _seen is None: + _seen = set() + if isinstance(obj, dict): + obj_id = id(obj) + if obj_id in _seen: + return safe_repr(obj) + _seen.add(obj_id) + try: + return {k: _strip_cycles(v, _seen) for k, v in obj.items()} + finally: + _seen.discard(obj_id) + if isinstance(obj, (list, tuple)): + obj_id = id(obj) + if obj_id in _seen: + return safe_repr(obj) + _seen.add(obj_id) + try: + return [_strip_cycles(v, _seen) for v in obj] + finally: + _seen.discard(obj_id) + return obj + + class SpanEventLogger(Logger): @handle_internal_errors def emit(self, record: LogRecord) -> None: # pyright: ignore[reportIncompatibleMethodOverride] @@ -87,7 +118,13 @@ def emit(self, record: LogRecord) -> None: # pyright: ignore[reportIncompatible body['content'] = transform_part(body['content']) body['role'] = body.get('role', record.event_name.split('.')[1]) - span.add_event(record.event_name, attributes={'event_body': json.dumps(body, default=default_json)}) + try: + event_body = json.dumps(body, default=default_json) + except ValueError: + # Fall back to a cycle-stripped copy so a single bad payload (e.g. a + # Gemini File reference with a self-loop) cannot drop the span event. + event_body = json.dumps(_strip_cycles(body), default=default_json) + span.add_event(record.event_name, attributes={'event_body': event_body}) def transform_part(part: Part) -> Part: diff --git a/tests/otel_integrations/test_google_genai.py b/tests/otel_integrations/test_google_genai.py index 3b14b7670..f4b93ac47 100644 --- a/tests/otel_integrations/test_google_genai.py +++ b/tests/otel_integrations/test_google_genai.py @@ -332,3 +332,48 @@ def test_span_event_logger_with_none_parts(exporter: TestExporter) -> None: } ] ) + + +def test_span_event_logger_with_circular_reference(exporter: TestExporter) -> None: + """Test that SpanEventLogger does not drop the span event when ``body`` contains + a circular reference. + + The upstream google_genai instrumentation can capture SDK objects (e.g. an + uploaded ``google.genai.types.File``) whose ``_to_dict`` representation contains + a self-loop. Without the fallback in ``emit``, ``json.dumps`` would raise + ``ValueError: Circular reference detected`` and the event would be swallowed by + ``handle_internal_errors``. + See https://github.com/pydantic/logfire/issues/1881. + """ + from typing import Any as _Any + + from logfire._internal.integrations.google_genai import SpanEventLogger + + # Build a body that mimics a Gemini File-like dict that references itself. + file_part: dict[str, _Any] = {'name': 'files/abc123', 'mime_type': 'audio/wav'} + file_part['self'] = file_part # circular reference + + with logfire.span('test'): + logger = SpanEventLogger('test_logger') + record = LogRecord( + event_name='gen_ai.user.message', + timestamp=2, + severity_number=SeverityNumber.INFO, + body={'content': file_part, 'role': 'user'}, + ) + # Should not raise; ValueError from json.dumps is recovered via _strip_cycles. + logger.emit(record) + + spans = exporter.exported_spans_as_dict(parse_json_attributes=True) + assert len(spans) == 1 + events = spans[0]['events'] + assert len(events) == 1 + assert events[0]['name'] == 'gen_ai.user.message' + event_body = events[0]['attributes']['event_body'] + # The non-cyclic fields are preserved; the eventual self-loop becomes a safe_repr string. + assert event_body['role'] == 'user' + assert event_body['content']['name'] == 'files/abc123' + assert event_body['content']['mime_type'] == 'audio/wav' + # The first level of recursion expands the cycle once, then the back-edge is stringified. + assert event_body['content']['self']['name'] == 'files/abc123' + assert isinstance(event_body['content']['self']['self'], str)