Skip to content

Commit b2b5735

Browse files
authored
feat(py,js): Allow logging feedback from nested traceables within pytest and jestlike tests (#1668)
1 parent 73015bc commit b2b5735

File tree

8 files changed

+112
-52
lines changed

8 files changed

+112
-52
lines changed

js/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "langsmith",
3-
"version": "0.3.17",
3+
"version": "0.3.18",
44
"description": "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform.",
55
"packageManager": "[email protected]",
66
"files": [

js/src/index.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,4 @@ export { RunTree, type RunTreeConfig } from "./run_trees.js";
1818
export { overrideFetchImplementation } from "./singletons/fetch.js";
1919

2020
// Update using yarn bump-version
21-
export const __version__ = "0.3.17";
21+
export const __version__ = "0.3.18";

js/src/tests/jestlike/jest.test.ts

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import { AsyncLocalStorage } from "node:async_hooks";
44
import * as ls from "../../jest/index.js";
55
import { type SimpleEvaluator } from "../../jest/index.js";
66
import { _objectHash } from "../../utils/jestlike/index.js";
7+
import { traceable } from "../../traceable.js";
78

89
const myEvaluator: SimpleEvaluator = (params) => {
910
const { referenceOutputs, outputs } = params;
@@ -93,6 +94,28 @@ ls.describe(
9394
}
9495
);
9596

97+
ls.test(
98+
"Logging feedback should work in nested traceable",
99+
{
100+
inputs: { nested: "nested" },
101+
referenceOutputs: { nested: "nested" },
102+
},
103+
async ({ inputs: _inputs, referenceOutputs: _referenceOutputs }) => {
104+
const myApp = () => {
105+
return { bar: "goodval" };
106+
};
107+
const res = myApp();
108+
const nested = traceable(() => {
109+
ls.logFeedback({
110+
key: "nested",
111+
score: 0.8,
112+
});
113+
});
114+
await nested();
115+
return res;
116+
}
117+
);
118+
96119
ls.test(
97120
"Should fail with some defined evaluator",
98121
{ inputs: { foo: "bad" }, referenceOutputs: { baz: "qux" } },

js/src/utils/jestlike/globals.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ export type TestWrapperAsyncLocalStorageData = {
2020
client: Client;
2121
suiteUuid: string;
2222
suiteName: string;
23+
testRootRunTree?: RunTree;
2324
};
2425

2526
export const testWrapperAsyncLocalStorageInstance =

js/src/utils/jestlike/index.ts

Lines changed: 57 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ export function logFeedback(
6969
exampleId: context.currentExample.id,
7070
feedback: feedback,
7171
context,
72-
runTree: trackingEnabled(context) ? getCurrentRunTree() : undefined,
72+
runTree: context.testRootRunTree,
7373
client: context.client,
7474
});
7575
}
@@ -491,59 +491,72 @@ export function generateWrapperFromJestlikeMethods(
491491
};
492492
let exampleId: string;
493493
const runTestFn = async () => {
494-
const testContext =
495-
testWrapperAsyncLocalStorageInstance.getStore();
494+
let testContext = testWrapperAsyncLocalStorageInstance.getStore();
496495
if (testContext === undefined) {
497496
throw new Error(
498497
"Could not identify test context. Please contact us for help."
499498
);
500499
}
501-
try {
502-
const res = await testFn({
503-
...rest,
504-
inputs: testInput,
505-
referenceOutputs: testOutput,
506-
});
507-
_logTestFeedback({
508-
exampleId,
509-
feedback: { key: "pass", score: true },
510-
context: testContext,
511-
runTree: trackingEnabled(testContext)
500+
return testWrapperAsyncLocalStorageInstance.run(
501+
{
502+
...testContext,
503+
testRootRunTree: trackingEnabled(testContext)
512504
? getCurrentRunTree()
513505
: undefined,
514-
client: testContext.client,
515-
});
516-
if (res != null) {
517-
if (loggedOutput !== undefined) {
518-
console.warn(
519-
`[WARN]: Returned value from test function will override output set by previous "logOutputs()" call.`
506+
},
507+
async () => {
508+
testContext = testWrapperAsyncLocalStorageInstance.getStore();
509+
if (testContext === undefined) {
510+
throw new Error(
511+
"Could not identify test context after setting test root run tree. Please contact us for help."
520512
);
521513
}
522-
loggedOutput =
523-
typeof res === "object"
524-
? (res as Record<string, unknown>)
525-
: { result: res };
514+
try {
515+
const res = await testFn({
516+
...rest,
517+
inputs: testInput,
518+
referenceOutputs: testOutput,
519+
});
520+
_logTestFeedback({
521+
exampleId,
522+
feedback: { key: "pass", score: true },
523+
context: testContext,
524+
runTree: testContext.testRootRunTree,
525+
client: testContext.client,
526+
});
527+
if (res != null) {
528+
if (loggedOutput !== undefined) {
529+
console.warn(
530+
`[WARN]: Returned value from test function will override output set by previous "logOutputs()" call.`
531+
);
532+
}
533+
loggedOutput =
534+
typeof res === "object"
535+
? (res as Record<string, unknown>)
536+
: { result: res };
537+
}
538+
return loggedOutput;
539+
} catch (e: any) {
540+
_logTestFeedback({
541+
exampleId,
542+
feedback: { key: "pass", score: false },
543+
context: testContext,
544+
runTree: testContext.testRootRunTree,
545+
client: testContext.client,
546+
});
547+
const rawError = e;
548+
const strippedErrorMessage = e.message.replace(
549+
STRIP_ANSI_REGEX,
550+
""
551+
);
552+
const langsmithFriendlyError = new Error(
553+
strippedErrorMessage
554+
);
555+
(langsmithFriendlyError as any).rawJestError = rawError;
556+
throw langsmithFriendlyError;
557+
}
526558
}
527-
return loggedOutput;
528-
} catch (e: any) {
529-
_logTestFeedback({
530-
exampleId,
531-
feedback: { key: "pass", score: false },
532-
context: testContext,
533-
runTree: trackingEnabled(testContext)
534-
? getCurrentRunTree()
535-
: undefined,
536-
client: testContext.client,
537-
});
538-
const rawError = e;
539-
const strippedErrorMessage = e.message.replace(
540-
STRIP_ANSI_REGEX,
541-
""
542-
);
543-
const langsmithFriendlyError = new Error(strippedErrorMessage);
544-
(langsmithFriendlyError as any).rawJestError = rawError;
545-
throw langsmithFriendlyError;
546-
}
559+
);
547560
};
548561
try {
549562
if (trackingEnabled(context)) {

python/langsmith/testing/_internal.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -695,9 +695,10 @@ def __init__(
695695
self.run_id = run_id
696696
self.pytest_plugin = pytest_plugin
697697
self.pytest_nodeid = pytest_nodeid
698-
self._logged_reference_outputs: Optional[dict] = None
699698
self.inputs = inputs
700699
self.reference_outputs = reference_outputs
700+
self._logged_reference_outputs: Optional[dict] = None
701+
self._logged_outputs: Optional[dict] = None
701702

702703
if pytest_plugin and pytest_nodeid:
703704
pytest_plugin.add_process_to_test_suite(
@@ -738,6 +739,7 @@ def log_inputs(self, inputs: dict) -> None:
738739
)
739740

740741
def log_outputs(self, outputs: dict) -> None:
742+
self._logged_outputs = outputs
741743
if self.pytest_plugin and self.pytest_nodeid:
742744
self.pytest_plugin.update_process_status(
743745
self.pytest_nodeid, {"outputs": outputs}
@@ -1272,9 +1274,8 @@ def test_openai_says_hello():
12721274
logger.info("LANGSMITH_TEST_TRACKING is set to 'false'. Skipping log_feedback.")
12731275
yield None
12741276
return
1275-
parent_run = rh.get_current_run_tree()
12761277
test_case = _TEST_CASE.get()
1277-
if not parent_run or not test_case:
1278+
if not test_case:
12781279
msg = (
12791280
"trace_feedback should only be called within a pytest test decorated with "
12801281
"@pytest.mark.langsmith, and with tracing enabled (by setting the "
@@ -1284,11 +1285,11 @@ def test_openai_says_hello():
12841285
metadata = {
12851286
"experiment": test_case.test_suite.experiment.name,
12861287
"reference_example_id": test_case.example_id,
1287-
"reference_run_id": parent_run.id,
1288+
"reference_run_id": test_case.run_id,
12881289
}
12891290
with rh.trace(
12901291
name=name,
1291-
inputs=parent_run.outputs,
1292+
inputs=test_case._logged_outputs,
12921293
parent="ignore",
12931294
project_name="evaluators",
12941295
metadata=metadata,

python/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "langsmith"
3-
version = "0.3.31"
3+
version = "0.3.32"
44
description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform."
55
authors = ["LangChain <[email protected]>"]
66
license = "MIT"

python/tests/evaluation/test_decorator.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import pytest
66

77
from langsmith import testing as t
8+
from langsmith import traceable
89

910
pytestmark = pytest.mark.skipif(
1011
not os.getenv("LANGSMITH_TRACING"),
@@ -52,6 +53,27 @@ async def test_openai_says_hello():
5253
assert "hello" in response.lower()
5354

5455

56+
@pytest.mark.langsmith
57+
async def test_composite_evaluator():
58+
# Traced code will be included in the test case
59+
text = "Say hello!"
60+
response = await my_app()
61+
t.log_inputs({"text": text})
62+
t.log_outputs({"response": response})
63+
t.log_reference_outputs({"response": "hello!"})
64+
65+
@traceable
66+
def my_composite_evaluator(response):
67+
with t.trace_feedback():
68+
grade = 1 if "hello" in response else 0
69+
t.log_feedback(key="composite_judge", score=grade)
70+
return grade
71+
72+
my_composite_evaluator(response)
73+
74+
assert "hello" in response.lower()
75+
76+
5577
@pytest.mark.xfail(reason="Test failure output case")
5678
@pytest.mark.langsmith(output_keys=["expected"])
5779
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)