diff --git a/javascript/src/agents/judge/__tests__/judge-span-digest-formatter.test.ts b/javascript/src/agents/judge/__tests__/judge-span-digest-formatter.test.ts index 52177789..bb9488f7 100644 --- a/javascript/src/agents/judge/__tests__/judge-span-digest-formatter.test.ts +++ b/javascript/src/agents/judge/__tests__/judge-span-digest-formatter.test.ts @@ -1,5 +1,6 @@ import type { ReadableSpan } from "@opentelemetry/sdk-trace-base"; import { describe, it, expect } from "vitest"; +import { attributes } from "langwatch/observability"; import { JudgeSpanDigestFormatter } from "../judge-span-digest-formatter"; /** @@ -280,7 +281,7 @@ describe("JudgeSpanDigestFormatter", () => { startTime: [1700000000, 0], endTime: [1700000000, 100_000_000], attributes: { - "langwatch.thread.id": "thread-123", + [attributes.ATTR_LANGWATCH_THREAD_ID]: "thread-123", "langwatch.scenario.id": "scenario-456", "langwatch.scenario.name": "my-scenario", "relevant.attribute": "should-appear", diff --git a/javascript/src/agents/judge/judge-span-collector.ts b/javascript/src/agents/judge/judge-span-collector.ts index cd64e09e..326a4193 100644 --- a/javascript/src/agents/judge/judge-span-collector.ts +++ b/javascript/src/agents/judge/judge-span-collector.ts @@ -1,4 +1,5 @@ import { SpanProcessor, ReadableSpan } from "@opentelemetry/sdk-trace-base"; +import { attributes } from "langwatch/observability"; /** * Collects OpenTelemetry spans for judge evaluation. @@ -37,7 +38,7 @@ export class JudgeSpanCollector implements SpanProcessor { // Check if span or any ancestor belongs to thread const belongsToThread = (span: ReadableSpan): boolean => { - if (span.attributes["langwatch.thread.id"] === threadId) { + if (span.attributes[attributes.ATTR_LANGWATCH_THREAD_ID] === threadId) { return true; } const parentId = span.parentSpanContext?.spanId; diff --git a/javascript/src/agents/judge/judge-span-digest-formatter.ts b/javascript/src/agents/judge/judge-span-digest-formatter.ts index 3b898b33..bec9a2a1 100644 --- a/javascript/src/agents/judge/judge-span-digest-formatter.ts +++ b/javascript/src/agents/judge/judge-span-digest-formatter.ts @@ -1,4 +1,5 @@ import type { ReadableSpan } from "@opentelemetry/sdk-trace-base"; +import { attributes } from "langwatch/observability"; import { deepTransform } from "./deep-transform"; import { StringDeduplicator } from "./string-deduplicator"; @@ -181,11 +182,17 @@ export class JudgeSpanDigestFormatter { const cleaned: Record = {}; const seen = new Set(); + const excludedKeys = [ + attributes.ATTR_LANGWATCH_THREAD_ID, + "langwatch.scenario.id", + "langwatch.scenario.name", + ]; + for (const [key, value] of Object.entries(attrs)) { - const cleanKey = key.replace(/^(langwatch)\./, ""); - if (["thread.id", "scenario.id", "scenario.name"].includes(cleanKey)) { + if (excludedKeys.includes(key)) { continue; } + const cleanKey = key.replace(/^(langwatch)\./, ""); if (!seen.has(cleanKey)) { seen.add(cleanKey); cleaned[cleanKey] = value; diff --git a/javascript/src/execution/scenario-execution.ts b/javascript/src/execution/scenario-execution.ts index 7ba110bc..310a03c4 100644 --- a/javascript/src/execution/scenario-execution.ts +++ b/javascript/src/execution/scenario-execution.ts @@ -2,6 +2,7 @@ import { context, type Span } from "@opentelemetry/api"; import { trace } from "@opentelemetry/api"; import { ModelMessage } from "ai"; import { getLangWatchTracer } from "langwatch"; +import { attributes } from "langwatch/observability"; import { filter, Observable, Subject } from "rxjs"; import { ScenarioExecutionState, @@ -538,7 +539,7 @@ export class ScenarioExecution implements ScenarioExecutionLike { agentSpanName, { attributes: { - "langwatch.thread.id": this.state.threadId, + [attributes.ATTR_LANGWATCH_THREAD_ID]: this.state.threadId, }, }, agentContext, @@ -1120,8 +1121,7 @@ export class ScenarioExecution implements ScenarioExecutionLike { attributes: { "scenario.name": this.config.name, "scenario.id": this.config.id, - "thread.id": this.state.threadId, - "langwatch.thread.id": this.state.threadId, + [attributes.ATTR_LANGWATCH_THREAD_ID]: this.state.threadId, "scenario.turn": this.state.currentTurn, }, }); diff --git a/python/scenario/_judge/judge_span_digest_formatter.py b/python/scenario/_judge/judge_span_digest_formatter.py index 459dcf34..1a5bb967 100644 --- a/python/scenario/_judge/judge_span_digest_formatter.py +++ b/python/scenario/_judge/judge_span_digest_formatter.py @@ -8,6 +8,7 @@ from datetime import datetime, timezone from typing import Any, Dict, List, Mapping, Optional, Sequence, Tuple, cast +from langwatch.attributes import AttributeKey from opentelemetry.sdk.trace import ReadableSpan from opentelemetry.trace import StatusCode from opentelemetry.util.types import AttributeValue @@ -189,14 +190,20 @@ def _clean_attributes(self, attrs: Dict[str, Any]) -> Dict[str, Any]: cleaned: Dict[str, Any] = {} seen: set = set() + excluded_keys = [ + AttributeKey.LangWatchThreadId, + "langwatch.scenario.id", + "langwatch.scenario.name", + ] + for key, value in attrs.items(): + if key in excluded_keys: + continue clean_key = ( key.replace("langwatch.", "", 1) if key.startswith("langwatch.") else key ) - if clean_key in ["thread.id", "scenario.id", "scenario.name"]: - continue if clean_key not in seen: seen.add(clean_key) cleaned[clean_key] = value diff --git a/python/scenario/_tracing/judge_span_collector.py b/python/scenario/_tracing/judge_span_collector.py index 53fd7ffc..f244e8eb 100644 --- a/python/scenario/_tracing/judge_span_collector.py +++ b/python/scenario/_tracing/judge_span_collector.py @@ -8,6 +8,7 @@ from typing import List, Dict, Optional from opentelemetry.context import Context from opentelemetry.sdk.trace import SpanProcessor, ReadableSpan +from langwatch.attributes import AttributeKey class JudgeSpanCollector(SpanProcessor): @@ -65,7 +66,7 @@ def get_spans_for_thread(self, thread_id: str) -> List[ReadableSpan]: def belongs_to_thread(span: ReadableSpan) -> bool: """Check if span or any ancestor belongs to thread.""" attrs = span.attributes or {} - if attrs.get("langwatch.thread.id") == thread_id: + if attrs.get(AttributeKey.LangWatchThreadId) == thread_id: return True parent_ctx = span.parent diff --git a/python/scenario/scenario_executor.py b/python/scenario/scenario_executor.py index 01b7041f..74eca430 100644 --- a/python/scenario/scenario_executor.py +++ b/python/scenario/scenario_executor.py @@ -27,6 +27,7 @@ import concurrent.futures from scenario.config import ScenarioConfig +from langwatch.attributes import AttributeKey from scenario._utils import ( convert_agent_return_types_to_openai_messages, check_valid_return_type, @@ -531,7 +532,9 @@ async def _call_agent( with self._trace.span( type="agent", name=f"{agent.__class__.__name__}.call" ) as span: - span.set_attributes({"langwatch.thread.id": self._state.thread_id}) + span.set_attributes( + {AttributeKey.LangWatchThreadId: self._state.thread_id} + ) with show_spinner( text=( "Judging..." diff --git a/python/tests/test_judge_span_collector.py b/python/tests/test_judge_span_collector.py index d23ca9d4..26690ad9 100644 --- a/python/tests/test_judge_span_collector.py +++ b/python/tests/test_judge_span_collector.py @@ -2,6 +2,7 @@ import pytest from unittest.mock import MagicMock +from langwatch.attributes import AttributeKey from scenario._tracing.judge_span_collector import JudgeSpanCollector @@ -77,12 +78,12 @@ def test_returns_spans_with_matching_thread_id(self) -> None: span1 = create_mock_span( span_id=1, name="span1", - attributes={"langwatch.thread.id": "thread-123"}, + attributes={AttributeKey.LangWatchThreadId: "thread-123"}, ) span2 = create_mock_span( span_id=2, name="span2", - attributes={"langwatch.thread.id": "thread-456"}, + attributes={AttributeKey.LangWatchThreadId: "thread-456"}, ) collector.on_end(span1) collector.on_end(span2) @@ -98,7 +99,7 @@ def test_returns_empty_list_when_no_matches(self) -> None: span = create_mock_span( span_id=1, name="span1", - attributes={"langwatch.thread.id": "thread-123"}, + attributes={AttributeKey.LangWatchThreadId: "thread-123"}, ) collector.on_end(span) @@ -112,7 +113,7 @@ def test_returns_child_spans_of_matching_parent(self) -> None: parent = create_mock_span( span_id=1, name="parent", - attributes={"langwatch.thread.id": "thread-123"}, + attributes={AttributeKey.LangWatchThreadId: "thread-123"}, ) child = create_mock_span( span_id=2, @@ -136,7 +137,7 @@ def test_returns_deeply_nested_child_spans(self) -> None: grandparent = create_mock_span( span_id=1, name="grandparent", - attributes={"langwatch.thread.id": "thread-123"}, + attributes={AttributeKey.LangWatchThreadId: "thread-123"}, ) parent = create_mock_span( span_id=2, @@ -164,12 +165,12 @@ def test_excludes_unrelated_spans(self) -> None: span_a = create_mock_span( span_id=1, name="span_a", - attributes={"langwatch.thread.id": "thread-A"}, + attributes={AttributeKey.LangWatchThreadId: "thread-A"}, ) span_b = create_mock_span( span_id=2, name="span_b", - attributes={"langwatch.thread.id": "thread-B"}, + attributes={AttributeKey.LangWatchThreadId: "thread-B"}, ) collector.on_end(span_a) collector.on_end(span_b) diff --git a/python/tests/test_judge_span_digest_formatter.py b/python/tests/test_judge_span_digest_formatter.py index 14938921..a0875299 100644 --- a/python/tests/test_judge_span_digest_formatter.py +++ b/python/tests/test_judge_span_digest_formatter.py @@ -4,6 +4,7 @@ from unittest.mock import MagicMock import pytest +from langwatch.attributes import AttributeKey from opentelemetry.trace import StatusCode from scenario._judge.judge_span_digest_formatter import JudgeSpanDigestFormatter @@ -258,7 +259,7 @@ def test_excludes_internal_attributes(self) -> None: start_time=1700000000_000_000_000, end_time=1700000000_100_000_000, attributes={ - "langwatch.thread.id": "thread-123", + AttributeKey.LangWatchThreadId: "thread-123", "langwatch.scenario.id": "scenario-456", "langwatch.scenario.name": "my-scenario", "relevant.attribute": "should-appear",