feat(py,js): Allow logging feedback from nested traceables within pytest and jestlike tests (#1668)

jacoblee93 · web-flow · commit b2b57358634c · 2025-04-17T09:37:58.000-07:00
diff --git a/js/package.json b/js/package.json
@@ -1,6 +1,6 @@
 {
   "name": "langsmith",
-  "version": "0.3.17",
+  "version": "0.3.18",
   "description": "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform.",
   "packageManager": "yarn@1.22.19",
   "files": [
diff --git a/js/src/index.ts b/js/src/index.ts
@@ -18,4 +18,4 @@ export { RunTree, type RunTreeConfig } from "./run_trees.js";
 export { overrideFetchImplementation } from "./singletons/fetch.js";
 
 // Update using yarn bump-version
-export const __version__ = "0.3.17";
+export const __version__ = "0.3.18";
diff --git a/js/src/tests/jestlike/jest.test.ts b/js/src/tests/jestlike/jest.test.ts
@@ -4,6 +4,7 @@ import { AsyncLocalStorage } from "node:async_hooks";
 import * as ls from "../../jest/index.js";
 import { type SimpleEvaluator } from "../../jest/index.js";
 import { _objectHash } from "../../utils/jestlike/index.js";
+import { traceable } from "../../traceable.js";
 
 const myEvaluator: SimpleEvaluator = (params) => {
   const { referenceOutputs, outputs } = params;
@@ -93,6 +94,28 @@ ls.describe(
       }
     );
 
+    ls.test(
+      "Logging feedback should work in nested traceable",
+      {
+        inputs: { nested: "nested" },
+        referenceOutputs: { nested: "nested" },
+      },
+      async ({ inputs: _inputs, referenceOutputs: _referenceOutputs }) => {
+        const myApp = () => {
+          return { bar: "goodval" };
+        };
+        const res = myApp();
+        const nested = traceable(() => {
+          ls.logFeedback({
+            key: "nested",
+            score: 0.8,
+          });
+        });
+        await nested();
+        return res;
+      }
+    );
+
     ls.test(
       "Should fail with some defined evaluator",
       { inputs: { foo: "bad" }, referenceOutputs: { baz: "qux" } },
diff --git a/js/src/utils/jestlike/globals.ts b/js/src/utils/jestlike/globals.ts
@@ -20,6 +20,7 @@ export type TestWrapperAsyncLocalStorageData = {
   client: Client;
   suiteUuid: string;
   suiteName: string;
+  testRootRunTree?: RunTree;
 };
 
 export const testWrapperAsyncLocalStorageInstance =
diff --git a/js/src/utils/jestlike/index.ts b/js/src/utils/jestlike/index.ts
@@ -69,7 +69,7 @@ export function logFeedback(
     exampleId: context.currentExample.id,
     feedback: feedback,
     context,
-    runTree: trackingEnabled(context) ? getCurrentRunTree() : undefined,
+    runTree: context.testRootRunTree,
     client: context.client,
   });
 }
@@ -491,59 +491,72 @@ export function generateWrapperFromJestlikeMethods(
             };
             let exampleId: string;
             const runTestFn = async () => {
-              const testContext =
-                testWrapperAsyncLocalStorageInstance.getStore();
+              let testContext = testWrapperAsyncLocalStorageInstance.getStore();
               if (testContext === undefined) {
                 throw new Error(
                   "Could not identify test context. Please contact us for help."
                 );
               }
-              try {
-                const res = await testFn({
-                  ...rest,
-                  inputs: testInput,
-                  referenceOutputs: testOutput,
-                });
-                _logTestFeedback({
-                  exampleId,
-                  feedback: { key: "pass", score: true },
-                  context: testContext,
-                  runTree: trackingEnabled(testContext)
+              return testWrapperAsyncLocalStorageInstance.run(
+                {
+                  ...testContext,
+                  testRootRunTree: trackingEnabled(testContext)
                     ? getCurrentRunTree()
                     : undefined,
-                  client: testContext.client,
-                });
-                if (res != null) {
-                  if (loggedOutput !== undefined) {
-                    console.warn(
-                      `[WARN]: Returned value from test function will override output set by previous "logOutputs()" call.`
+                },
+                async () => {
+                  testContext = testWrapperAsyncLocalStorageInstance.getStore();
+                  if (testContext === undefined) {
+                    throw new Error(
+                      "Could not identify test context after setting test root run tree. Please contact us for help."
                     );
                   }
-                  loggedOutput =
-                    typeof res === "object"
-                      ? (res as Record<string, unknown>)
-                      : { result: res };
+                  try {
+                    const res = await testFn({
+                      ...rest,
+                      inputs: testInput,
+                      referenceOutputs: testOutput,
+                    });
+                    _logTestFeedback({
+                      exampleId,
+                      feedback: { key: "pass", score: true },
+                      context: testContext,
+                      runTree: testContext.testRootRunTree,
+                      client: testContext.client,
+                    });
+                    if (res != null) {
+                      if (loggedOutput !== undefined) {
+                        console.warn(
+                          `[WARN]: Returned value from test function will override output set by previous "logOutputs()" call.`
+                        );
+                      }
+                      loggedOutput =
+                        typeof res === "object"
+                          ? (res as Record<string, unknown>)
+                          : { result: res };
+                    }
+                    return loggedOutput;
+                  } catch (e: any) {
+                    _logTestFeedback({
+                      exampleId,
+                      feedback: { key: "pass", score: false },
+                      context: testContext,
+                      runTree: testContext.testRootRunTree,
+                      client: testContext.client,
+                    });
+                    const rawError = e;
+                    const strippedErrorMessage = e.message.replace(
+                      STRIP_ANSI_REGEX,
+                      ""
+                    );
+                    const langsmithFriendlyError = new Error(
+                      strippedErrorMessage
+                    );
+                    (langsmithFriendlyError as any).rawJestError = rawError;
+                    throw langsmithFriendlyError;
+                  }
                 }
-                return loggedOutput;
-              } catch (e: any) {
-                _logTestFeedback({
-                  exampleId,
-                  feedback: { key: "pass", score: false },
-                  context: testContext,
-                  runTree: trackingEnabled(testContext)
-                    ? getCurrentRunTree()
-                    : undefined,
-                  client: testContext.client,
-                });
-                const rawError = e;
-                const strippedErrorMessage = e.message.replace(
-                  STRIP_ANSI_REGEX,
-                  ""
-                );
-                const langsmithFriendlyError = new Error(strippedErrorMessage);
-                (langsmithFriendlyError as any).rawJestError = rawError;
-                throw langsmithFriendlyError;
-              }
+              );
             };
             try {
               if (trackingEnabled(context)) {
diff --git a/python/langsmith/testing/_internal.py b/python/langsmith/testing/_internal.py
@@ -695,9 +695,10 @@ def __init__(
         self.run_id = run_id
         self.pytest_plugin = pytest_plugin
         self.pytest_nodeid = pytest_nodeid
-        self._logged_reference_outputs: Optional[dict] = None
         self.inputs = inputs
         self.reference_outputs = reference_outputs
+        self._logged_reference_outputs: Optional[dict] = None
+        self._logged_outputs: Optional[dict] = None
 
         if pytest_plugin and pytest_nodeid:
             pytest_plugin.add_process_to_test_suite(
@@ -738,6 +739,7 @@ def log_inputs(self, inputs: dict) -> None:
             )
 
     def log_outputs(self, outputs: dict) -> None:
+        self._logged_outputs = outputs
         if self.pytest_plugin and self.pytest_nodeid:
             self.pytest_plugin.update_process_status(
                 self.pytest_nodeid, {"outputs": outputs}
@@ -1272,9 +1274,8 @@ def test_openai_says_hello():
         logger.info("LANGSMITH_TEST_TRACKING is set to 'false'. Skipping log_feedback.")
         yield None
         return
-    parent_run = rh.get_current_run_tree()
     test_case = _TEST_CASE.get()
-    if not parent_run or not test_case:
+    if not test_case:
         msg = (
             "trace_feedback should only be called within a pytest test decorated with "
             "@pytest.mark.langsmith, and with tracing enabled (by setting the "
@@ -1284,11 +1285,11 @@ def test_openai_says_hello():
     metadata = {
         "experiment": test_case.test_suite.experiment.name,
         "reference_example_id": test_case.example_id,
-        "reference_run_id": parent_run.id,
+        "reference_run_id": test_case.run_id,
     }
     with rh.trace(
         name=name,
-        inputs=parent_run.outputs,
+        inputs=test_case._logged_outputs,
         parent="ignore",
         project_name="evaluators",
         metadata=metadata,
diff --git a/python/pyproject.toml b/python/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "langsmith"
-version = "0.3.31"
+version = "0.3.32"
 description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform."
 authors = ["LangChain <support@langchain.dev>"]
 license = "MIT"
diff --git a/python/tests/evaluation/test_decorator.py b/python/tests/evaluation/test_decorator.py
@@ -5,6 +5,7 @@
 import pytest
 
 from langsmith import testing as t
+from langsmith import traceable
 
 pytestmark = pytest.mark.skipif(
     not os.getenv("LANGSMITH_TRACING"),
@@ -52,6 +53,27 @@ async def test_openai_says_hello():
     assert "hello" in response.lower()
 
 
+@pytest.mark.langsmith
+async def test_composite_evaluator():
+    # Traced code will be included in the test case
+    text = "Say hello!"
+    response = await my_app()
+    t.log_inputs({"text": text})
+    t.log_outputs({"response": response})
+    t.log_reference_outputs({"response": "hello!"})
+
+    @traceable
+    def my_composite_evaluator(response):
+        with t.trace_feedback():
+            grade = 1 if "hello" in response else 0
+            t.log_feedback(key="composite_judge", score=grade)
+            return grade
+
+    my_composite_evaluator(response)
+
+    assert "hello" in response.lower()
+
+
 @pytest.mark.xfail(reason="Test failure output case")
 @pytest.mark.langsmith(output_keys=["expected"])
 @pytest.mark.parametrize(

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "langsmith",`
`3`		`- "version": "0.3.17",`
	`3`	`+ "version": "0.3.18",`
`4`	`4`	`"description": "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform.",`
`5`	`5`	`"packageManager": "[email protected]",`
`6`	`6`	`"files": [`