Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions ddtrace/llmobs/_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,14 @@
from ddtrace.constants import ERROR_MSG
from ddtrace.constants import ERROR_STACK
from ddtrace.constants import ERROR_TYPE
from ddtrace.ext import git
from ddtrace.internal.logger import get_logger
from ddtrace.llmobs._constants import DD_SITE_STAGING
from ddtrace.llmobs._constants import DD_SITES_NEEDING_APP_SUBDOMAIN
from ddtrace.llmobs._utils import _annotate_llmobs_span_data
from ddtrace.llmobs._utils import convert_tags_dict_to_list
from ddtrace.llmobs._utils import get_asyncio
from ddtrace.llmobs._utils import resolve_llmobs_git_metadata
from ddtrace.llmobs._utils import safe_json
from ddtrace.llmobs._utils import validate_tags_list
from ddtrace.version import __version__
Expand Down Expand Up @@ -1722,6 +1724,11 @@ def __init__(
self._tags["project_name"] = project_name
self._tags["dataset_name"] = dataset.name
self._tags["experiment_name"] = name
repository_url, commit_sha = resolve_llmobs_git_metadata()
if repository_url and git.REPOSITORY_URL not in self._tags:
self._tags[git.REPOSITORY_URL] = repository_url
if commit_sha and git.COMMIT_SHA not in self._tags:
self._tags[git.COMMIT_SHA] = commit_sha
self._config: dict[str, JSONType] = config or {}
# Write dataset tags to experiment config
if dataset.filter_tags:
Expand Down
9 changes: 9 additions & 0 deletions ddtrace/llmobs/_llmobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from ddtrace.constants import ERROR_STACK
from ddtrace.constants import ERROR_TYPE
from ddtrace.ext import SpanTypes
from ddtrace.ext import git
from ddtrace.internal import atexit
from ddtrace.internal import core
from ddtrace.internal import forksafe
Expand Down Expand Up @@ -145,6 +146,7 @@
from ddtrace.llmobs._utils import get_llmobs_span_name
from ddtrace.llmobs._utils import get_llmobs_tags
from ddtrace.llmobs._utils import get_llmobs_trace_id
from ddtrace.llmobs._utils import resolve_llmobs_git_metadata
from ddtrace.llmobs._utils import resolve_ml_app
from ddtrace.llmobs._utils import safe_json
from ddtrace.llmobs._writer import LLMObsAPIClient
Expand Down Expand Up @@ -451,6 +453,8 @@ class LLMObs(Service):
enabled = False
_app_key: str = _env.get("DD_APP_KEY", "")
_project_name: str = _env.get("DD_LLMOBS_PROJECT_NAME", DEFAULT_PROJECT_NAME)
_git_repository_url: str = ""
_git_commit_sha: str = ""

def __init__(
self,
Expand Down Expand Up @@ -797,6 +801,7 @@ def enable(
config._dd_api_key = api_key or config._dd_api_key
cls._app_key = app_key or cls._app_key
cls._project_name = project_name or cls._project_name or DEFAULT_PROJECT_NAME
cls._git_repository_url, cls._git_commit_sha = resolve_llmobs_git_metadata()
config.env = env or config.env
config.service = service or config.service
config._llmobs_ml_app = ml_app or config._llmobs_ml_app
Expand Down Expand Up @@ -1937,6 +1942,10 @@ def _activate_llmobs_span(self, span: Span) -> None:
"ddtrace.version": __version__,
"language": "python",
}
if LLMObs._git_repository_url:
initial_tags[git.REPOSITORY_URL] = LLMObs._git_repository_url
if LLMObs._git_commit_sha:
initial_tags[git.COMMIT_SHA] = LLMObs._git_commit_sha
if session_id:
initial_tags["session_id"] = session_id
for baggage_key, tag_key in (
Expand Down
26 changes: 26 additions & 0 deletions ddtrace/llmobs/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@

from ddtrace import config
from ddtrace.ext import SpanTypes
from ddtrace.ext import git as _git
from ddtrace.ext.ci import _filter_sensitive_info
from ddtrace.internal import gitmetadata
from ddtrace.internal.logger import get_logger
from ddtrace.internal.utils.formats import format_trace_id
from ddtrace.llmobs._constants import DEFAULT_PROMPT_NAME
Expand Down Expand Up @@ -45,6 +48,29 @@
ValidatedPromptDict = dict[str, Union[str, dict[str, Any], list[str], list[dict[str, str]], list[Message]]]


def resolve_llmobs_git_metadata() -> tuple[str, str]:
"""Return ``(repository_url, commit_sha)`` from ``DD_GIT_*`` env vars or
package ``Project-URL``, falling back to ``git`` against the current
working directory. Honors ``DD_TRACE_GIT_METADATA_ENABLED``.
"""
if not gitmetadata.config.enabled:
return "", ""
repository_url, commit_sha, _ = gitmetadata.get_git_tags()
if repository_url and commit_sha:
return repository_url, commit_sha
if not commit_sha:
try:
commit_sha = _git.extract_commit_sha()
except Exception:
log.debug("git fallback: extract_commit_sha failed", exc_info=True)
if not repository_url:
try:
repository_url = _filter_sensitive_info(_git.extract_repository_url()) or ""
except Exception:
log.debug("git fallback: extract_repository_url failed", exc_info=True)
return repository_url, commit_sha


def get_asyncio():
# asyncio must NOT be imported at module level — this module is
# loaded at ddtrace.auto startup and an early asyncio import corrupts the
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
features:
- |
LLM Observability: automatically tags spans and experiments with ``git.commit.sha`` and
``git.repository_url``. Values come from ``DD_GIT_COMMIT_SHA`` / ``DD_GIT_REPOSITORY_URL``
or the main package's ``Project-URL`` metadata, falling back to running ``git`` against the
current working directory when those are empty (so notebooks and workstation runs work
out of the box). Honors ``DD_TRACE_GIT_METADATA_ENABLED``. User-supplied experiment tags
with the same keys take precedence.
110 changes: 110 additions & 0 deletions tests/llmobs/test_experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -4827,3 +4827,113 @@ def test_prepare_summary_evaluator_data_handles_none_metadata():
eval_results = [{"idx": 0, "evaluations": {"dummy_evaluator": {"value": True, "error": None}}}]
_, _, _, metadata_list, _ = exp._prepare_summary_evaluator_data(task_results, eval_results)
assert metadata_list == [{"experiment_config": {}}]


@pytest.mark.parametrize(
"gitmetadata_tags,fallback_url,fallback_sha,expected",
[
# gitmetadata wins outright; fallback would not be consulted
(
("https://github.com/from-env", "envsha", ""),
"ignored",
"ignored",
("https://github.com/from-env", "envsha"),
),
# gitmetadata empty, fallback supplies both
(("", "", ""), "https://github.com/from-shell", "shellsha", ("https://github.com/from-shell", "shellsha")),
# gitmetadata partial, fallback fills the missing field
(
("https://github.com/from-env", "", ""),
"https://github.com/from-shell",
"shellsha",
("https://github.com/from-env", "shellsha"),
),
],
ids=["gitmetadata-wins", "fallback-only", "partial-merge"],
)
def test_resolve_llmobs_git_metadata(gitmetadata_tags, fallback_url, fallback_sha, expected):
from ddtrace.llmobs._utils import resolve_llmobs_git_metadata

with (
mock.patch("ddtrace.llmobs._utils.gitmetadata.get_git_tags", return_value=gitmetadata_tags),
mock.patch("ddtrace.llmobs._utils._git.extract_commit_sha", return_value=fallback_sha),
mock.patch("ddtrace.llmobs._utils._git.extract_repository_url", return_value=fallback_url),
):
assert resolve_llmobs_git_metadata() == expected


def test_resolve_llmobs_git_metadata_returns_empty_when_fallback_fails():
from ddtrace.llmobs._utils import resolve_llmobs_git_metadata

with (
mock.patch("ddtrace.llmobs._utils.gitmetadata.get_git_tags", return_value=("", "", "")),
mock.patch("ddtrace.llmobs._utils._git.extract_commit_sha", side_effect=ValueError("not a git repo")),
mock.patch("ddtrace.llmobs._utils._git.extract_repository_url", side_effect=ValueError("not a git repo")),
):
assert resolve_llmobs_git_metadata() == ("", "")


def test_resolve_llmobs_git_metadata_strips_url_credentials():
from ddtrace.llmobs._utils import resolve_llmobs_git_metadata

with (
mock.patch("ddtrace.llmobs._utils.gitmetadata.get_git_tags", return_value=("", "", "")),
mock.patch("ddtrace.llmobs._utils._git.extract_commit_sha", return_value="abc"),
mock.patch(
"ddtrace.llmobs._utils._git.extract_repository_url",
return_value="https://x-token:secret@github.com/example/repo.git",
),
):
url, _ = resolve_llmobs_git_metadata()
assert "secret" not in url


def test_resolve_llmobs_git_metadata_honors_disable_flag():
"""Setting DD_TRACE_GIT_METADATA_ENABLED=false must suppress both the env-var read and the git-CLI fallback."""
from ddtrace.llmobs._utils import resolve_llmobs_git_metadata

with (
mock.patch("ddtrace.llmobs._utils.gitmetadata.config.enabled", False),
mock.patch("ddtrace.llmobs._utils.gitmetadata.get_git_tags") as gm_mock,
mock.patch("ddtrace.llmobs._utils._git.extract_commit_sha") as sha_mock,
mock.patch("ddtrace.llmobs._utils._git.extract_repository_url") as url_mock,
):
assert resolve_llmobs_git_metadata() == ("", "")
gm_mock.assert_not_called()
sha_mock.assert_not_called()
url_mock.assert_not_called()


def test_experiment_tags_pick_up_resolver_output():
dataset = _make_dataset_with_records([{"input_data": {"prompt": "hi"}}])
with mock.patch(
"ddtrace.llmobs._experiment.resolve_llmobs_git_metadata",
return_value=("https://github.com/example/repo", "abc123"),
):
exp = Experiment(
name="test",
task=dummy_task,
dataset=dataset,
evaluators=[dummy_evaluator],
project_name="test-project",
)
assert exp._tags["git.commit.sha"] == "abc123"
assert exp._tags["git.repository_url"] == "https://github.com/example/repo"


def test_experiment_user_supplied_git_tags_take_precedence():
dataset = _make_dataset_with_records([{"input_data": {"prompt": "hi"}}])
with mock.patch(
"ddtrace.llmobs._experiment.resolve_llmobs_git_metadata",
return_value=("https://github.com/example/repo", "abc123"),
):
exp = Experiment(
name="test",
task=dummy_task,
dataset=dataset,
evaluators=[dummy_evaluator],
project_name="test-project",
tags={"git.commit.sha": "user-override"},
)
assert exp._tags["git.commit.sha"] == "user-override"
assert exp._tags["git.repository_url"] == "https://github.com/example/repo"
24 changes: 24 additions & 0 deletions tests/llmobs/test_llmobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
from textwrap import dedent
from typing import Optional
from unittest import mock

import pytest

Expand Down Expand Up @@ -74,6 +75,29 @@ def test_propagates_ignore_non_llmobs_spans(self, llmobs, tracer, test_spans):
assert get_llmobs_tags(span)["ml_app"] == "test-ml-app"


class TestGitMetadata:
def test_git_tags_set_on_span_when_available(self, llmobs, tracer):
cls = llmobs._instance.__class__
with (
mock.patch.object(cls, "_git_repository_url", "https://github.com/example/repo"),
mock.patch.object(cls, "_git_commit_sha", "abc123def456"),
):
with llmobs.workflow("root_llm_span") as span:
pass
tags = get_llmobs_tags(span)
assert tags["git.commit.sha"] == "abc123def456"
assert tags["git.repository_url"] == "https://github.com/example/repo"

def test_git_tags_absent_when_unavailable(self, llmobs, tracer):
cls = llmobs._instance.__class__
with mock.patch.object(cls, "_git_repository_url", ""), mock.patch.object(cls, "_git_commit_sha", ""):
with llmobs.workflow("root_llm_span") as span:
pass
tags = get_llmobs_tags(span)
assert "git.commit.sha" not in tags
assert "git.repository_url" not in tags


def test_set_correct_parent_id(llmobs, tracer):
"""Test that the parent_id is set as the span_id of the nearest LLMObs span in the span's ancestor tree."""
with tracer.trace("root"):
Expand Down
Loading