Skip to content

Commit 4aeb734

Browse files
authored
feat(python): Populate example metadata in pytest, prefix experiment runs with ls_example (#1840)
1 parent 1c586d6 commit 4aeb734

File tree

4 files changed

+67
-11
lines changed

4 files changed

+67
-11
lines changed

python/langsmith/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
# Avoid calling into importlib on every call to __version__
2323

24-
__version__ = "0.4.4"
24+
__version__ = "0.4.5"
2525
version = __version__ # for backwards compatibility
2626

2727

python/langsmith/testing/_internal.py

Lines changed: 33 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,7 @@ def test_with_expected_output(some_input: str, expected_output: str):
301301
client=kwargs.pop("client", None),
302302
test_suite_name=kwargs.pop("test_suite_name", None),
303303
cache=ls_utils.get_cache_dir(kwargs.pop("cache", None)),
304+
metadata=kwargs.pop("metadata", None),
304305
)
305306
if kwargs:
306307
warnings.warn(f"Unexpected keyword arguments: {kwargs.keys()}")
@@ -648,6 +649,7 @@ def end_run(
648649
example_id,
649650
outputs,
650651
reference_outputs,
652+
metadata,
651653
pytest_plugin=None,
652654
pytest_nodeid=None,
653655
) -> Future:
@@ -657,6 +659,7 @@ def end_run(
657659
example_id=example_id,
658660
outputs=outputs,
659661
reference_outputs=reference_outputs,
662+
metadata=metadata,
660663
pytest_plugin=pytest_plugin,
661664
pytest_nodeid=pytest_nodeid,
662665
)
@@ -667,12 +670,18 @@ def _end_run(
667670
example_id,
668671
outputs,
669672
reference_outputs,
673+
metadata,
670674
pytest_plugin,
671675
pytest_nodeid,
672676
) -> None:
673677
# TODO: remove this hack so that run durations are correct
674678
# Ensure example is fully updated
675-
self.sync_example(example_id, inputs=run_tree.inputs, outputs=reference_outputs)
679+
self.sync_example(
680+
example_id,
681+
inputs=run_tree.inputs,
682+
outputs=reference_outputs,
683+
metadata=metadata,
684+
)
676685
run_tree.end(outputs=outputs)
677686
run_tree.patch()
678687

@@ -683,6 +692,7 @@ def __init__(
683692
test_suite: _LangSmithTestSuite,
684693
example_id: uuid.UUID,
685694
run_id: uuid.UUID,
695+
metadata: Optional[dict] = None,
686696
pytest_plugin: Any = None,
687697
pytest_nodeid: Any = None,
688698
inputs: Optional[dict] = None,
@@ -691,6 +701,7 @@ def __init__(
691701
self.test_suite = test_suite
692702
self.example_id = example_id
693703
self.run_id = run_id
704+
self.metadata = metadata
694705
self.pytest_plugin = pytest_plugin
695706
self.pytest_nodeid = pytest_nodeid
696707
self.inputs = inputs
@@ -714,6 +725,7 @@ def sync_example(
714725
self.example_id,
715726
inputs=inputs,
716727
outputs=outputs,
728+
metadata=self.metadata,
717729
pytest_plugin=self.pytest_plugin,
718730
pytest_nodeid=self.pytest_nodeid,
719731
)
@@ -783,6 +795,7 @@ def end_run(self, run_tree, outputs: Any) -> None:
783795
self.example_id,
784796
outputs,
785797
reference_outputs=self._logged_reference_outputs,
798+
metadata=self.metadata,
786799
pytest_plugin=self.pytest_plugin,
787800
pytest_nodeid=self.pytest_nodeid,
788801
)
@@ -797,14 +810,7 @@ class _UTExtra(TypedDict, total=False):
797810
output_keys: Optional[Sequence[str]]
798811
test_suite_name: Optional[str]
799812
cache: Optional[str]
800-
801-
802-
def _get_test_repr(func: Callable, sig: inspect.Signature) -> str:
803-
name = getattr(func, "__name__", None) or ""
804-
description = getattr(func, "__doc__", None) or ""
805-
if description:
806-
description = f" - {description.strip()}"
807-
return f"{name}{sig}{description}"
813+
metadata: Optional[dict]
808814

809815

810816
def _create_test_case(
@@ -816,6 +822,7 @@ def _create_test_case(
816822
) -> _TestCase:
817823
client = langtest_extra["client"] or rt.get_cached_client()
818824
output_keys = langtest_extra["output_keys"]
825+
metadata = langtest_extra["metadata"]
819826
signature = inspect.signature(func)
820827
inputs = rh._get_inputs_safe(signature, *args, **kwargs) or None
821828
outputs = None
@@ -850,6 +857,7 @@ def _create_test_case(
850857
test_suite,
851858
example_id,
852859
run_id=uuid.uuid4(),
860+
metadata=metadata,
853861
inputs=inputs,
854862
reference_outputs=outputs,
855863
pytest_plugin=pytest_plugin,
@@ -881,6 +889,14 @@ def _test():
881889
run_id=test_case.run_id,
882890
reference_example_id=test_case.example_id,
883891
inputs=test_case.inputs,
892+
metadata={
893+
# Experiment run metadata is prefixed with "ls_example_" in
894+
# the ingest backend, but we must reproduce this behavior here
895+
# because the example may not have been created before the trace
896+
# starts.
897+
f"ls_example_{k}": v
898+
for k, v in (test_case.metadata or {}).items()
899+
},
884900
project_name=test_case.test_suite.name,
885901
exceptions_to_handle=(SkipException,),
886902
_end_on_exit=False,
@@ -950,6 +966,14 @@ async def _test():
950966
run_id=test_case.run_id,
951967
reference_example_id=test_case.example_id,
952968
inputs=test_case.inputs,
969+
metadata={
970+
# Experiment run metadata is prefixed with "ls_example_" in
971+
# the ingest backend, but we must reproduce this behavior here
972+
# because the example may not have been created before the trace
973+
# starts.
974+
f"ls_example_{k}": v
975+
for k, v in (test_case.metadata or {}).items()
976+
},
953977
project_name=test_case.test_suite.name,
954978
exceptions_to_handle=(SkipException,),
955979
_end_on_exit=False,

python/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "langsmith"
3-
version = "0.4.4"
3+
version = "0.4.5"
44
description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform."
55
authors = ["LangChain <[email protected]>"]
66
license = "MIT"

python/tests/evaluation/test_decorator.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,3 +131,35 @@ def test_log_langchain_outputs() -> None:
131131

132132
t.log_inputs({"question": "foo"})
133133
t.log_outputs({"answer": AIMessage("bar")})
134+
135+
136+
@pytest.mark.langsmith(
137+
metadata={"test_type": "metadata_test", "custom_key": "custom_value"}
138+
)
139+
def test_metadata_parameter():
140+
"""Test that metadata parameter is properly passed to the decorator."""
141+
x = 5
142+
y = 10
143+
t.log_inputs({"x": x, "y": y})
144+
145+
result = x + y
146+
t.log_outputs({"sum": result})
147+
t.log_reference_outputs({"sum": 15})
148+
149+
assert result == 15
150+
151+
152+
@pytest.mark.langsmith(
153+
metadata={"test_type": "metadata_test_async", "custom_key": "custom_value_async"}
154+
)
155+
async def test_metadata_parameter_async():
156+
"""Test that metadata parameter is properly passed to the decorator."""
157+
x = 5
158+
y = 10
159+
t.log_inputs({"x": x, "y": y})
160+
161+
result = x + y
162+
t.log_outputs({"sum": result})
163+
t.log_reference_outputs({"sum": 15})
164+
165+
assert result == 15

0 commit comments

Comments
 (0)