Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import type { ReadableSpan } from "@opentelemetry/sdk-trace-base";
import { describe, it, expect } from "vitest";
import { attributes } from "langwatch/observability";
import { JudgeSpanDigestFormatter } from "../judge-span-digest-formatter";

/**
Expand Down Expand Up @@ -280,7 +281,7 @@ describe("JudgeSpanDigestFormatter", () => {
startTime: [1700000000, 0],
endTime: [1700000000, 100_000_000],
attributes: {
"langwatch.thread.id": "thread-123",
[attributes.ATTR_LANGWATCH_THREAD_ID]: "thread-123",
"langwatch.scenario.id": "scenario-456",
"langwatch.scenario.name": "my-scenario",
"relevant.attribute": "should-appear",
Expand Down
3 changes: 2 additions & 1 deletion javascript/src/agents/judge/judge-span-collector.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { SpanProcessor, ReadableSpan } from "@opentelemetry/sdk-trace-base";
import { attributes } from "langwatch/observability";

/**
* Collects OpenTelemetry spans for judge evaluation.
Expand Down Expand Up @@ -37,7 +38,7 @@ export class JudgeSpanCollector implements SpanProcessor {

// Check if span or any ancestor belongs to thread
const belongsToThread = (span: ReadableSpan): boolean => {
if (span.attributes["langwatch.thread.id"] === threadId) {
if (span.attributes[attributes.ATTR_LANGWATCH_THREAD_ID] === threadId) {
return true;
}
const parentId = span.parentSpanContext?.spanId;
Expand Down
11 changes: 9 additions & 2 deletions javascript/src/agents/judge/judge-span-digest-formatter.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import type { ReadableSpan } from "@opentelemetry/sdk-trace-base";
import { attributes } from "langwatch/observability";

import { deepTransform } from "./deep-transform";
import { StringDeduplicator } from "./string-deduplicator";
Expand Down Expand Up @@ -181,11 +182,17 @@ export class JudgeSpanDigestFormatter {
const cleaned: Record<string, unknown> = {};
const seen = new Set<string>();

const excludedKeys = [
attributes.ATTR_LANGWATCH_THREAD_ID,
"langwatch.scenario.id",
"langwatch.scenario.name",
];

for (const [key, value] of Object.entries(attrs)) {
const cleanKey = key.replace(/^(langwatch)\./, "");
if (["thread.id", "scenario.id", "scenario.name"].includes(cleanKey)) {
if (excludedKeys.includes(key)) {
continue;
}
const cleanKey = key.replace(/^(langwatch)\./, "");
if (!seen.has(cleanKey)) {
seen.add(cleanKey);
cleaned[cleanKey] = value;
Expand Down
6 changes: 3 additions & 3 deletions javascript/src/execution/scenario-execution.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { context, type Span } from "@opentelemetry/api";
import { trace } from "@opentelemetry/api";
import { ModelMessage } from "ai";
import { getLangWatchTracer } from "langwatch";
import { attributes } from "langwatch/observability";
import { filter, Observable, Subject } from "rxjs";
import {
ScenarioExecutionState,
Expand Down Expand Up @@ -538,7 +539,7 @@ export class ScenarioExecution implements ScenarioExecutionLike {
agentSpanName,
{
attributes: {
"langwatch.thread.id": this.state.threadId,
[attributes.ATTR_LANGWATCH_THREAD_ID]: this.state.threadId,
},
},
agentContext,
Expand Down Expand Up @@ -1120,8 +1121,7 @@ export class ScenarioExecution implements ScenarioExecutionLike {
attributes: {
"scenario.name": this.config.name,
"scenario.id": this.config.id,
"thread.id": this.state.threadId,
"langwatch.thread.id": this.state.threadId,
[attributes.ATTR_LANGWATCH_THREAD_ID]: this.state.threadId,
"scenario.turn": this.state.currentTurn,
},
});
Expand Down
11 changes: 9 additions & 2 deletions python/scenario/_judge/judge_span_digest_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from datetime import datetime, timezone
from typing import Any, Dict, List, Mapping, Optional, Sequence, Tuple, cast

from langwatch.attributes import AttributeKey
from opentelemetry.sdk.trace import ReadableSpan
from opentelemetry.trace import StatusCode
from opentelemetry.util.types import AttributeValue
Expand Down Expand Up @@ -189,14 +190,20 @@ def _clean_attributes(self, attrs: Dict[str, Any]) -> Dict[str, Any]:
cleaned: Dict[str, Any] = {}
seen: set = set()

excluded_keys = [
AttributeKey.LangWatchThreadId,
"langwatch.scenario.id",
"langwatch.scenario.name",
]

for key, value in attrs.items():
if key in excluded_keys:
continue
clean_key = (
key.replace("langwatch.", "", 1)
if key.startswith("langwatch.")
else key
)
if clean_key in ["thread.id", "scenario.id", "scenario.name"]:
continue
if clean_key not in seen:
seen.add(clean_key)
cleaned[clean_key] = value
Expand Down
3 changes: 2 additions & 1 deletion python/scenario/_tracing/judge_span_collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from typing import List, Dict, Optional
from opentelemetry.context import Context
from opentelemetry.sdk.trace import SpanProcessor, ReadableSpan
from langwatch.attributes import AttributeKey


class JudgeSpanCollector(SpanProcessor):
Expand Down Expand Up @@ -65,7 +66,7 @@ def get_spans_for_thread(self, thread_id: str) -> List[ReadableSpan]:
def belongs_to_thread(span: ReadableSpan) -> bool:
"""Check if span or any ancestor belongs to thread."""
attrs = span.attributes or {}
if attrs.get("langwatch.thread.id") == thread_id:
if attrs.get(AttributeKey.LangWatchThreadId) == thread_id:
return True

parent_ctx = span.parent
Expand Down
5 changes: 4 additions & 1 deletion python/scenario/scenario_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import concurrent.futures

from scenario.config import ScenarioConfig
from langwatch.attributes import AttributeKey
from scenario._utils import (
convert_agent_return_types_to_openai_messages,
check_valid_return_type,
Expand Down Expand Up @@ -531,7 +532,9 @@ async def _call_agent(
with self._trace.span(
type="agent", name=f"{agent.__class__.__name__}.call"
) as span:
span.set_attributes({"langwatch.thread.id": self._state.thread_id})
span.set_attributes(
{AttributeKey.LangWatchThreadId: self._state.thread_id}
)
with show_spinner(
text=(
"Judging..."
Expand Down
15 changes: 8 additions & 7 deletions python/tests/test_judge_span_collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import pytest
from unittest.mock import MagicMock
from langwatch.attributes import AttributeKey
from scenario._tracing.judge_span_collector import JudgeSpanCollector


Expand Down Expand Up @@ -77,12 +78,12 @@ def test_returns_spans_with_matching_thread_id(self) -> None:
span1 = create_mock_span(
span_id=1,
name="span1",
attributes={"langwatch.thread.id": "thread-123"},
attributes={AttributeKey.LangWatchThreadId: "thread-123"},
)
span2 = create_mock_span(
span_id=2,
name="span2",
attributes={"langwatch.thread.id": "thread-456"},
attributes={AttributeKey.LangWatchThreadId: "thread-456"},
)
collector.on_end(span1)
collector.on_end(span2)
Expand All @@ -98,7 +99,7 @@ def test_returns_empty_list_when_no_matches(self) -> None:
span = create_mock_span(
span_id=1,
name="span1",
attributes={"langwatch.thread.id": "thread-123"},
attributes={AttributeKey.LangWatchThreadId: "thread-123"},
)
collector.on_end(span)

Expand All @@ -112,7 +113,7 @@ def test_returns_child_spans_of_matching_parent(self) -> None:
parent = create_mock_span(
span_id=1,
name="parent",
attributes={"langwatch.thread.id": "thread-123"},
attributes={AttributeKey.LangWatchThreadId: "thread-123"},
)
child = create_mock_span(
span_id=2,
Expand All @@ -136,7 +137,7 @@ def test_returns_deeply_nested_child_spans(self) -> None:
grandparent = create_mock_span(
span_id=1,
name="grandparent",
attributes={"langwatch.thread.id": "thread-123"},
attributes={AttributeKey.LangWatchThreadId: "thread-123"},
)
parent = create_mock_span(
span_id=2,
Expand Down Expand Up @@ -164,12 +165,12 @@ def test_excludes_unrelated_spans(self) -> None:
span_a = create_mock_span(
span_id=1,
name="span_a",
attributes={"langwatch.thread.id": "thread-A"},
attributes={AttributeKey.LangWatchThreadId: "thread-A"},
)
span_b = create_mock_span(
span_id=2,
name="span_b",
attributes={"langwatch.thread.id": "thread-B"},
attributes={AttributeKey.LangWatchThreadId: "thread-B"},
)
collector.on_end(span_a)
collector.on_end(span_b)
Expand Down
3 changes: 2 additions & 1 deletion python/tests/test_judge_span_digest_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from unittest.mock import MagicMock

import pytest
from langwatch.attributes import AttributeKey
from opentelemetry.trace import StatusCode

from scenario._judge.judge_span_digest_formatter import JudgeSpanDigestFormatter
Expand Down Expand Up @@ -258,7 +259,7 @@ def test_excludes_internal_attributes(self) -> None:
start_time=1700000000_000_000_000,
end_time=1700000000_100_000_000,
attributes={
"langwatch.thread.id": "thread-123",
AttributeKey.LangWatchThreadId: "thread-123",
"langwatch.scenario.id": "scenario-456",
"langwatch.scenario.name": "my-scenario",
"relevant.attribute": "should-appear",
Expand Down