feat(py): support metadata in target func (#1694)

baskaryan · web-flow · commit eb0892222713 · 2025-04-25T18:41:21.000Z
Add support for accessing example metadata in target

```python
def target(inputs: dict, metadata: dict) -&gt; dict: ...
```
diff --git a/python/langsmith/__init__.py b/python/langsmith/__init__.py
@@ -20,7 +20,7 @@
     from langsmith.utils import ContextThreadPoolExecutor
 
 # Avoid calling into importlib on every call to __version__
-__version__ = "0.3.34"
+__version__ = "0.3.35"
 version = __version__  # for backwards compatibility
 
 
diff --git a/python/langsmith/evaluation/_arunner.py b/python/langsmith/evaluation/_arunner.py
@@ -35,7 +35,7 @@
     _ExperimentManagerMixin,
     _extract_feedback_keys,
     _ForwardResults,
-    _include_attachments,
+    _get_target_args,
     _is_langchain_runnable,
     _load_examples_map,
     _load_experiment,
@@ -44,6 +44,7 @@
     _resolve_data,
     _resolve_evaluators,
     _resolve_experiment,
+    _target_include_attachments,
     _to_pandas,
     _wrap_summary_evaluators,
 )
@@ -464,6 +465,9 @@ async def _aevaluate(
         runs,
         client,
     )
+    num_include_attachments = int(
+        _target_include_attachments(target)
+    ) + _evaluators_include_attachments(evaluators)
     manager = await _AsyncExperimentManager(
         data,
         client=client,
@@ -472,14 +476,8 @@ async def _aevaluate(
         description=description,
         num_repetitions=num_repetitions,
         runs=runs,
-        include_attachments=_include_attachments(target)
-        or _evaluators_include_attachments(evaluators) > 0,
-        reuse_attachments=num_repetitions
-        * (
-            int(_include_attachments(target))
-            + _evaluators_include_attachments(evaluators)
-        )
-        > 1,
+        include_attachments=num_include_attachments > 0,
+        reuse_attachments=num_repetitions * num_include_attachments > 1,
         upload_results=upload_results,
     ).astart()
     cache_dir = ls_utils.get_cache_dir(None)
@@ -785,7 +783,7 @@ async def process_example(example: schemas.Example):
                 self.experiment_name,
                 self._metadata,
                 self.client,
-                _include_attachments(target),
+                _target_include_attachments(target),
             )
             example, run = pred["example"], pred["run"]
             result = await self._arun_evaluators(
@@ -842,7 +840,7 @@ async def awith_predictions(
         _experiment_results = self._apredict(
             target,
             max_concurrency=max_concurrency,
-            include_attachments=_include_attachments(target),
+            include_attachments=_target_include_attachments(target),
         )
         r1, r2 = aitertools.atee(_experiment_results, 2, lock=asyncio.Lock())
         return _AsyncExperimentManager(
@@ -1236,11 +1234,8 @@ def _get_run(r: run_trees.RunTree) -> None:
 
     with rh.tracing_context(enabled=True):
         try:
-            args = (
-                (example.inputs, example.attachments)
-                if include_attachments
-                else (example.inputs,)
-            )
+            arg_names = _get_target_args(fn)
+            args = [getattr(example, argn) for argn in arg_names]
             await fn(
                 *args,
                 langsmith_extra=rh.LangSmithExtra(
diff --git a/python/langsmith/evaluation/_runner.py b/python/langsmith/evaluation/_runner.py
@@ -1055,8 +1055,7 @@ def _evaluate(
         # If provided, we don't need to create a new experiment.
         runs=runs,
         # Create or resolve the experiment.
-        include_attachments=_include_attachments(target)
-        or _evaluators_include_attachments(evaluators) > 0,
+        include_attachments=_include_attachments(target, evaluators),
         upload_results=upload_results,
     ).start()
     cache_dir = ls_utils.get_cache_dir(None)
@@ -1459,7 +1458,7 @@ def with_predictions(
             self._predict,
             target,
             max_concurrency=max_concurrency,
-            include_attachments=_include_attachments(target),
+            include_attachments=_target_include_attachments(target),
         )
         r1, r2 = itertools.tee(_experiment_results, 2)
         return _ExperimentManager(
@@ -1901,15 +1900,10 @@ def _get_run(r: rt.RunTree) -> None:
             client=client,
         )
         try:
-            args = (
-                (example.inputs, example.attachments)
-                if include_attachments
-                else (example.inputs,)
-            )
-            fn(
-                *args,
-                langsmith_extra=langsmith_extra,
-            )
+            arg_names = _get_target_args(fn)
+            args = [getattr(example, argn) for argn in arg_names]
+            fn(*args, langsmith_extra=langsmith_extra)
+            # Reset attachment readers if attachments were used.
             if include_attachments and example.attachments is not None:
                 for attachment in example.attachments:
                     reader = example.attachments[attachment]["reader"]
@@ -1981,31 +1975,41 @@ def _ensure_traceable(
     return fn
 
 
-def _evaluators_include_attachments(
-    evaluators: Optional[Sequence[Union[EVALUATOR_T, AEVALUATOR_T]]],
-) -> int:
+def _include_attachments(target: Any, evaluators: Optional[Sequence]) -> bool:
+    return _target_include_attachments(target) or bool(
+        _evaluators_include_attachments(evaluators)
+    )
+
+
+def _evaluators_include_attachments(evaluators: Optional[Sequence]) -> int:
     if evaluators is None:
         return 0
 
-    def evaluator_uses_attachments(evaluator: Any) -> bool:
-        if not callable(evaluator):
-            return False
-        sig = inspect.signature(evaluator)
-        params = list(sig.parameters.values())
-        positional_params = [
-            p for p in params if p.kind in (p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD)
-        ]
-        return any(p.name == "attachments" for p in positional_params)
+    return sum(_evaluator_uses_attachments(e) for e in evaluators)
+
+
+def _evaluator_uses_attachments(evaluator: Any) -> bool:
+    if not callable(evaluator):
+        return False
+    sig = inspect.signature(evaluator)
+    params = list(sig.parameters.values())
+    positional_params = [
+        p for p in params if p.kind in (p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD)
+    ]
+    return any(p.name == "attachments" for p in positional_params)
+
 
-    return sum(evaluator_uses_attachments(e) for e in evaluators)
+def _target_include_attachments(target: Any) -> bool:
+    """Whether the target function accepts attachments."""
+    return "attachments" in _get_target_args(target)
 
 
-def _include_attachments(
-    target: Any,
-) -> bool:
+def _get_target_args(target: Any) -> list[str]:
     """Whether the target function accepts attachments."""
-    if _is_langchain_runnable(target) or not callable(target):
-        return False
+    if not callable(target):
+        return []
+    if _is_langchain_runnable(target):
+        return ["inputs"]
     # Check function signature
     sig = inspect.signature(target)
     params = list(sig.parameters.values())
@@ -2018,21 +2022,27 @@ def _include_attachments(
         raise ValueError(
             "Target function must accept at least one positional argument (inputs)."
         )
-    elif len(positional_no_default) > 2:
+    elif len(positional_no_default) > 3:
         raise ValueError(
-            "Target function must accept at most two "
-            "arguments without default values: (inputs, attachments)."
+            "Target function must accept at most three "
+            "arguments without default values: (inputs, attachments, metadata)."
+        )
+    elif len(positional_no_default) > 1 and {
+        p.name for p in positional_no_default
+    }.difference(["inputs", "attachments", "metadata"]):
+        raise ValueError(
+            "When passing multiple positional arguments without default values, they "
+            "must be named 'inputs', 'attachments', or 'metadata'. Received: "
+            f"{[p.name for p in positional_no_default]}"
         )
-    elif len(positional_no_default) == 2:
-        if [p.name for p in positional_no_default] != ["inputs", "attachments"]:
-            raise ValueError(
-                "When passing 2 positional arguments, they must be named "
-                "'inputs' and 'attachments', respectively. Received: "
-                f"{[p.name for p in positional_no_default]}"
-            )
-        return True
     else:
-        return [p.name for p in positional_params[:2]] == ["inputs", "attachments"]
+        args = []
+        for p in positional_params[:3]:
+            if p.name in {"inputs", "attachments", "metadata"}:
+                args.append(p.name)
+            else:
+                break
+        return args or ["inputs"]
 
 
 def _resolve_experiment(
diff --git a/python/pyproject.toml b/python/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "langsmith"
-version = "0.3.34"
+version = "0.3.35"
 description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform."
 authors = ["LangChain <support@langchain.dev>"]
 license = "MIT"
diff --git a/python/tests/unit_tests/evaluation/test_runner.py b/python/tests/unit_tests/evaluation/test_runner.py