|
92 | 92 | serialized_run_operation_to_multipart_parts_and_context,
|
93 | 93 | )
|
94 | 94 | from langsmith._internal._serde import dumps_json as _dumps_json
|
95 |
| -from langsmith.schemas import AttachmentInfo |
| 95 | +from langsmith.schemas import AttachmentInfo, ExampleWithRuns |
96 | 96 |
|
97 | 97 |
|
98 | 98 | def _check_otel_enabled() -> bool:
|
@@ -8269,6 +8269,142 @@ async def helpfulness(outputs: dict) -> dict:
|
8269 | 8269 | **kwargs,
|
8270 | 8270 | )
|
8271 | 8271 |
|
| 8272 | + def _paginate_examples_with_runs( |
| 8273 | + self, |
| 8274 | + dataset_id: ID_TYPE, |
| 8275 | + session_id: uuid.UUID, |
| 8276 | + preview: bool = False, |
| 8277 | + comparative_experiment_id: Optional[uuid.UUID] = None, |
| 8278 | + filters: dict[uuid.UUID, list[str]] | None = None, |
| 8279 | + limit: Optional[int] = None, |
| 8280 | + ) -> Iterator[list[ExampleWithRuns]]: |
| 8281 | + """Paginate through examples with runs and yield batches. |
| 8282 | +
|
| 8283 | + Args: |
| 8284 | + dataset_id: Dataset UUID to fetch examples with runs |
| 8285 | + session_id: Session UUID to filter runs by, same as project_id |
| 8286 | + preview: Whether to return preview data only |
| 8287 | + comparative_experiment_id: Optional comparative experiment UUID |
| 8288 | + filters: Optional filters to apply |
| 8289 | + limit: Maximum total number of results to return |
| 8290 | +
|
| 8291 | + Yields: |
| 8292 | + Batches of run results as lists of ExampleWithRuns instances |
| 8293 | + """ |
| 8294 | + offset = 0 |
| 8295 | + results_count = 0 |
| 8296 | + |
| 8297 | + while True: |
| 8298 | + remaining = (limit - results_count) if limit else None |
| 8299 | + batch_limit = min(100, remaining) if remaining else 100 |
| 8300 | + |
| 8301 | + body = { |
| 8302 | + "session_ids": [session_id], |
| 8303 | + "offset": offset, |
| 8304 | + "limit": batch_limit, |
| 8305 | + "preview": preview, |
| 8306 | + "comparative_experiment_id": comparative_experiment_id, |
| 8307 | + "filters": filters, |
| 8308 | + } |
| 8309 | + |
| 8310 | + response = self.request_with_retries( |
| 8311 | + "POST", |
| 8312 | + f"/datasets/{dataset_id}/runs", |
| 8313 | + request_kwargs={"data": _dumps_json(body)}, |
| 8314 | + ) |
| 8315 | + |
| 8316 | + batch = response.json() |
| 8317 | + if not batch: |
| 8318 | + break |
| 8319 | + |
| 8320 | + # Transform raw dictionaries to ExampleWithRuns instances |
| 8321 | + examples_batch = [ls_schemas.ExampleWithRuns(**result) for result in batch] |
| 8322 | + yield examples_batch |
| 8323 | + results_count += len(batch) |
| 8324 | + |
| 8325 | + if len(batch) < batch_limit or (limit and results_count >= limit): |
| 8326 | + break |
| 8327 | + |
| 8328 | + offset += len(batch) |
| 8329 | + |
| 8330 | + def get_experiment_results( |
| 8331 | + self, |
| 8332 | + name: Optional[str] = None, |
| 8333 | + project_id: Optional[uuid.UUID] = None, |
| 8334 | + preview: bool = False, |
| 8335 | + comparative_experiment_id: Optional[uuid.UUID] = None, |
| 8336 | + filters: dict[uuid.UUID, list[str]] | None = None, |
| 8337 | + limit: Optional[int] = None, |
| 8338 | + ) -> ls_schemas.ExperimentResults: |
| 8339 | + """Get results for an experiment, including experiment session aggregated stats and experiment runs for each dataset example. |
| 8340 | +
|
| 8341 | + Experiment results may not be available immediately after the experiment is created. |
| 8342 | +
|
| 8343 | + Args: |
| 8344 | + name: The experiment name. |
| 8345 | + project_id: Experiment's tracing project id, also called session_id, can be found in the url of the LS experiment page |
| 8346 | + preview: Whether to return lightweight preview data only. When True, |
| 8347 | + fetches inputs_preview/outputs_preview summaries instead of full inputs/outputs from S3 storage. |
| 8348 | + Faster and less bandwidth. |
| 8349 | + comparative_experiment_id: Optional comparative experiment UUID for pairwise comparison experiment results. |
| 8350 | + filters: Optional filters to apply to results |
| 8351 | + limit: Maximum number of results to return |
| 8352 | +
|
| 8353 | + Returns: |
| 8354 | + ExperimentResults that has stats (TracerSessionResult) and iterator of examples_with_runs (ExampleWithRuns) |
| 8355 | +
|
| 8356 | + Raises: |
| 8357 | + ValueError: If project not found for the given session_id |
| 8358 | +
|
| 8359 | + Example: |
| 8360 | + >>> client = Client() |
| 8361 | + >>> results = client.get_experiment_results( |
| 8362 | + ... project_id="037ae90f-f297-4926-b93c-37d8abf6899f", |
| 8363 | + ... ) |
| 8364 | + >>> for example_with_runs in results["examples_with_runs"]: |
| 8365 | + ... print(example_with_runs.dict()) |
| 8366 | +
|
| 8367 | + >>> # Access aggregated experiment stats |
| 8368 | + >>> print(f"Total runs: {results['stats'].run_count}") |
| 8369 | + >>> print(f"Total cost: {results['stats'].total_cost}") |
| 8370 | + >>> print(f"P50 latency: {results['stats'].latency_p50}") |
| 8371 | +
|
| 8372 | + """ |
| 8373 | + if name and not project_id: |
| 8374 | + projects = list(self.list_projects(name=name)) |
| 8375 | + if not projects: |
| 8376 | + raise ValueError(f"No experiment found with name: '{name}'") |
| 8377 | + project_id = projects[0].id |
| 8378 | + |
| 8379 | + # Get aggregated stats for the experiment project/session |
| 8380 | + project_stats = list( |
| 8381 | + self.list_projects( |
| 8382 | + project_ids=[cast(uuid.UUID, project_id)], include_stats=True |
| 8383 | + ) |
| 8384 | + ) |
| 8385 | + |
| 8386 | + if not project_stats: |
| 8387 | + raise ValueError(f"No experiment found with project_id: '{project_id}'") |
| 8388 | + |
| 8389 | + dataset_id = project_stats[0].reference_dataset_id |
| 8390 | + |
| 8391 | + def _get_examples_with_runs_iterator(): |
| 8392 | + """Yield examples with corresponding experiment runs.""" |
| 8393 | + for batch in self._paginate_examples_with_runs( |
| 8394 | + dataset_id=dataset_id, |
| 8395 | + session_id=project_id, |
| 8396 | + preview=preview, |
| 8397 | + comparative_experiment_id=comparative_experiment_id, |
| 8398 | + filters=filters, |
| 8399 | + limit=limit, |
| 8400 | + ): |
| 8401 | + yield from batch |
| 8402 | + |
| 8403 | + return ls_schemas.ExperimentResults( |
| 8404 | + stats=project_stats[0], |
| 8405 | + examples_with_runs=_get_examples_with_runs_iterator(), |
| 8406 | + ) |
| 8407 | + |
8272 | 8408 |
|
8273 | 8409 | def convert_prompt_to_openai_format(
|
8274 | 8410 | messages: Any,
|
|
0 commit comments