Skip to content

Commit 69f8726

Browse files
patch(python, js): Fix sampling logic for patch runs (#1881)
### Description We were not correctly filtering patch runs when the post would get filtered, meaning large amounts of patch requests were being sent to ingest API and counting toward usage limits, but not actually getting ingested. ### Changes Check patch trace id against trace id list, only remove trace ids from list for root runs. ### Alternative Solutions Use consistent hashing based on run_id to sample both post and patch run ids equally, prevents storage of uuids (minimal memory overhead) ## Tests Added unit tests confirming filtering logic correctly applies to both post and patch
1 parent e828faf commit 69f8726

File tree

6 files changed

+453
-9
lines changed

6 files changed

+453
-9
lines changed

js/src/client.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -935,10 +935,10 @@ export class Client implements LangSmithTracingClientInterface {
935935
if (patch) {
936936
const sampled = [];
937937
for (const run of runs) {
938-
if (!this.filteredPostUuids.has(run.id)) {
938+
if (!this.filteredPostUuids.has(run.trace_id)) {
939939
sampled.push(run);
940-
} else {
941-
this.filteredPostUuids.delete(run.id);
940+
} else if (run.id === run.trace_id) {
941+
this.filteredPostUuids.delete(run.trace_id);
942942
}
943943
}
944944
return sampled;

js/src/tests/client.test.ts

Lines changed: 214 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,4 +266,218 @@ describe("Client", () => {
266266
});
267267
});
268268
});
269+
270+
describe("_filterForSampling patch logic", () => {
271+
it("should filter patch runs based on trace_id instead of run.id", () => {
272+
const client = new Client({
273+
apiKey: "test-api-key",
274+
tracingSamplingRate: 0.5,
275+
});
276+
277+
// Mock the _shouldSample method to control sampling decisions
278+
let counter = 0;
279+
jest.spyOn(client as any, "_shouldSample").mockImplementation(() => {
280+
counter += 1;
281+
return counter % 2 === 0; // Accept even-numbered calls (2nd, 4th, etc.)
282+
});
283+
284+
// Create two traces
285+
const traceId1 = "trace-1";
286+
const traceId2 = "trace-2";
287+
const childRunId1 = "child-1";
288+
const childRunId2 = "child-2";
289+
290+
// Create root runs (these will be sampled)
291+
const rootRuns = [
292+
{
293+
id: traceId1,
294+
trace_id: traceId1,
295+
name: "root_run_1",
296+
run_type: "llm" as const,
297+
inputs: { text: "hello" },
298+
},
299+
{
300+
id: traceId2,
301+
trace_id: traceId2,
302+
name: "root_run_2",
303+
run_type: "llm" as const,
304+
inputs: { text: "world" },
305+
},
306+
];
307+
308+
// Test POST filtering (initial sampling)
309+
const postFiltered = (client as any)._filterForSampling(rootRuns, false);
310+
311+
// Based on our mock, first call returns false, second returns true
312+
// So only root_run_2 should be sampled
313+
expect(postFiltered).toHaveLength(1);
314+
expect(postFiltered[0].id).toBe(traceId2);
315+
316+
// Verify that traceId1 is in filtered set, traceId2 is not
317+
expect((client as any).filteredPostUuids.has(traceId1)).toBe(true);
318+
expect((client as any).filteredPostUuids.has(traceId2)).toBe(false);
319+
320+
// Test PATCH filtering - child runs should follow their trace's sampling decision
321+
const patchRuns = [
322+
{
323+
id: childRunId1,
324+
trace_id: traceId1,
325+
name: "child_run_1",
326+
run_type: "tool" as const,
327+
inputs: { text: "child hello" },
328+
outputs: { result: "child result 1" },
329+
},
330+
{
331+
id: childRunId2,
332+
trace_id: traceId2,
333+
name: "child_run_2",
334+
run_type: "tool" as const,
335+
inputs: { text: "child world" },
336+
outputs: { result: "child result 2" },
337+
},
338+
];
339+
340+
const patchFiltered = (client as any)._filterForSampling(patchRuns, true);
341+
342+
// Only child_run_2 should be included (its trace was sampled)
343+
// child_run_1 should be filtered out (its trace was not sampled)
344+
expect(patchFiltered).toHaveLength(1);
345+
expect(patchFiltered[0].id).toBe(childRunId2);
346+
expect(patchFiltered[0].trace_id).toBe(traceId2);
347+
});
348+
349+
it("should remove trace_id from filtered set when processing root run patches", () => {
350+
const client = new Client({
351+
apiKey: "test-api-key",
352+
tracingSamplingRate: 0.5,
353+
});
354+
355+
// Mock the _shouldSample method to reject first trace, accept second
356+
let counter = 0;
357+
jest.spyOn(client as any, "_shouldSample").mockImplementation(() => {
358+
counter += 1;
359+
return counter % 2 === 0;
360+
});
361+
362+
const traceId1 = "trace-1";
363+
const traceId2 = "trace-2";
364+
365+
// Create root runs and sample them
366+
const rootRuns = [
367+
{
368+
id: traceId1,
369+
trace_id: traceId1,
370+
name: "root_run_1",
371+
run_type: "llm" as const,
372+
inputs: { text: "hello" },
373+
},
374+
{
375+
id: traceId2,
376+
trace_id: traceId2,
377+
name: "root_run_2",
378+
run_type: "llm" as const,
379+
inputs: { text: "world" },
380+
},
381+
];
382+
383+
(client as any)._filterForSampling(rootRuns, false);
384+
385+
// Verify initial state
386+
expect((client as any).filteredPostUuids.has(traceId1)).toBe(true);
387+
expect((client as any).filteredPostUuids.has(traceId2)).toBe(false);
388+
389+
// Test PATCH filtering for root runs (updates to the root runs themselves)
390+
const rootPatchRuns = [
391+
{
392+
id: traceId1,
393+
trace_id: traceId1,
394+
name: "root_run_1",
395+
run_type: "llm" as const,
396+
inputs: { text: "hello" },
397+
outputs: { result: "root result 1" },
398+
},
399+
{
400+
id: traceId2,
401+
trace_id: traceId2,
402+
name: "root_run_2",
403+
run_type: "llm" as const,
404+
inputs: { text: "world" },
405+
outputs: { result: "root result 2" },
406+
},
407+
];
408+
409+
const rootPatchFiltered = (client as any)._filterForSampling(
410+
rootPatchRuns,
411+
true
412+
);
413+
414+
// Only root_run_2 should be included, and traceId1 should be removed from filtered set
415+
// since we're updating the root run that was originally filtered
416+
expect(rootPatchFiltered).toHaveLength(1);
417+
expect(rootPatchFiltered[0].id).toBe(traceId2);
418+
419+
// traceId1 should be removed from filtered set since we processed its root run
420+
expect((client as any).filteredPostUuids.has(traceId1)).toBe(false);
421+
expect((client as any).filteredPostUuids.has(traceId2)).toBe(false);
422+
});
423+
424+
it("should handle mixed traces with patch sampling", () => {
425+
const client = new Client({
426+
apiKey: "test-api-key",
427+
tracingSamplingRate: 0.5,
428+
});
429+
430+
// Mock sampling to accept every other trace
431+
let counter = 0;
432+
jest.spyOn(client as any, "_shouldSample").mockImplementation(() => {
433+
counter += 1;
434+
return counter % 2 === 1; // Accept odd-numbered calls (1st, 3rd, etc.)
435+
});
436+
437+
// Create multiple traces
438+
const traceIds = ["trace-0", "trace-1", "trace-2", "trace-3"];
439+
const childRunIds = ["child-0", "child-1", "child-2", "child-3"];
440+
441+
// Create root runs
442+
const rootRuns = traceIds.map((traceId, i) => ({
443+
id: traceId,
444+
trace_id: traceId,
445+
name: `root_run_${i}`,
446+
run_type: "llm" as const,
447+
inputs: { text: `hello ${i}` },
448+
}));
449+
450+
// Sample the root runs
451+
const postFiltered = (client as any)._filterForSampling(rootRuns, false);
452+
453+
// Based on our mock: 1st and 3rd calls return true (indices 0, 2)
454+
expect(postFiltered).toHaveLength(2);
455+
const sampledTraceIds = new Set(postFiltered.map((run: any) => run.id));
456+
expect(sampledTraceIds.has(traceIds[0])).toBe(true);
457+
expect(sampledTraceIds.has(traceIds[2])).toBe(true);
458+
459+
// Create child runs for all traces
460+
const childRuns = traceIds.map((traceId, i) => ({
461+
id: childRunIds[i],
462+
trace_id: traceId,
463+
name: `child_run_${i}`,
464+
run_type: "tool" as const,
465+
inputs: { text: `child ${i}` },
466+
outputs: { result: `child result ${i}` },
467+
}));
468+
469+
// Test patch filtering for child runs
470+
const patchFiltered = (client as any)._filterForSampling(childRuns, true);
471+
472+
// Only children of sampled traces should be included
473+
expect(patchFiltered).toHaveLength(2);
474+
const patchTraceIds = new Set(
475+
patchFiltered.map((run: any) => run.trace_id)
476+
);
477+
expect(patchTraceIds.has(traceIds[0])).toBe(true);
478+
expect(patchTraceIds.has(traceIds[2])).toBe(true);
479+
expect(patchTraceIds.has(traceIds[1])).toBe(false);
480+
expect(patchTraceIds.has(traceIds[3])).toBe(false);
481+
});
482+
});
269483
});

python/langsmith/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
# Avoid calling into importlib on every call to __version__
2323

24-
__version__ = "0.4.8"
24+
__version__ = "0.4.9"
2525
version = __version__ # for backwards compatibility
2626

2727

python/langsmith/client.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1284,11 +1284,11 @@ def _filter_for_sampling(
12841284
if patch:
12851285
sampled = []
12861286
for run in runs:
1287-
run_id = _as_uuid(run["id"])
1288-
if run_id not in self._filtered_post_uuids:
1287+
trace_id = _as_uuid(run["trace_id"])
1288+
if trace_id not in self._filtered_post_uuids:
12891289
sampled.append(run)
1290-
else:
1291-
self._filtered_post_uuids.remove(run_id)
1290+
elif run["id"] == trace_id:
1291+
self._filtered_post_uuids.remove(trace_id)
12921292
return sampled
12931293
else:
12941294
sampled = []

python/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "langsmith"
3-
version = "0.4.8"
3+
version = "0.4.9"
44
description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform."
55
authors = ["LangChain <[email protected]>"]
66
license = "MIT"

0 commit comments

Comments
 (0)