diff --git a/.github/styles/config/vocabularies/Docs/accept.txt b/.github/styles/config/vocabularies/Docs/accept.txt index 146b4b82d..35c31c942 100644 --- a/.github/styles/config/vocabularies/Docs/accept.txt +++ b/.github/styles/config/vocabularies/Docs/accept.txt @@ -68,4 +68,5 @@ use_artifact wandb wandb.com W&B Server -[Ww]orkspace's \ No newline at end of file +Weave +[Ww]orkspace diff --git a/docs/guides/evaluations/evaluate-models-tables.md b/docs/guides/evaluations/evaluate-models-tables.md new file mode 100644 index 000000000..54041d17d --- /dev/null +++ b/docs/guides/evaluations/evaluate-models-tables.md @@ -0,0 +1,3 @@ +--- +title: Evaluate models with tables +--- diff --git a/docs/guides/evaluations/evaluate-models-weave.md b/docs/guides/evaluations/evaluate-models-weave.md new file mode 100644 index 000000000..9f3535e60 --- /dev/null +++ b/docs/guides/evaluations/evaluate-models-weave.md @@ -0,0 +1,26 @@ +--- +title: Evaluate models with Weave +--- +import { CTAButtons } from '@site/src/components/CTAButtons/CTAButtons.tsx' + +## What is Weave? + +W&B Weave helps developers who are building and iterating on their AI apps to create apples-to-apples evaluations that score the behavior of any aspect of their app, and examine and debug failures by easily inspecting inputs and outputs. + +## Get started with Weave + +First, create a W&B account at https://wandb.ai and copy your API key from https://wandb.ai/authorize. + +Then, you can follow along in the below Colab notebook that demonstrates Weave evaluating an LLM (in this case, OpenAI, for which you will also need [an API key](https://platform.openai.com/docs/quickstart/step-2-setup-your-api-key)). + + + +After running through the steps, browse your dashboard in Weave to see the tracing data Weave logs when executing your LLM app code, and see breakdowns of execution time, API cost, etc. Try the dashboard links Weave generates after every call in the Colab notebook and see how Weave breaks down errors and stack traces, tracks costs, and assists you in reverse-engineering the behavior of the LLM. + +![](https://weave-docs.wandb.ai/assets/images/weave-hero-188bbbbfcac1809f2529c62110d1553a.png) + +## Use Weave to evaluate models in production + +This [tutorial on how to build an evaluation pipeline with Weave](https://weave-docs.wandb.ai/tutorial-eval/) demonstrates how multiple versions of an application that uses a model is evolving using the `weave.Evaluation` function, which assess a Model's performance on a set of examples using a list of specified scoring functions or `weave.scorer.Scorer` classes, producing dashboards with advanced breakdowns of the model's performance. + +![](https://weave-docs.wandb.ai/assets/images/evals-hero-9bb44591b72ac8637e7e14bc73db1ba8.png) \ No newline at end of file diff --git a/docs/guides/intro.md b/docs/guides/intro.md index cf957a6d0..da7269328 100644 --- a/docs/guides/intro.md +++ b/docs/guides/intro.md @@ -11,15 +11,15 @@ Weights & Biases (W&B) is the AI developer platform, with tools for training mod ![](/images/general/architecture.png) -W&B consists of three major components: [Models](/guides/models.md), [Weave](https://wandb.github.io/weave/), and [Core](/guides/core.md): +W&B consists of three major components: [Weave](https://wandb.github.io/weave/), [Models](/guides/models.md), and [Core](/guides/core.md): + +**[W&B Weave](https://weave-docs.wandb.ai/)** is a lightweight toolkit for tracking and evaluating LLM applications. **[W&B Models](/guides/models.md)** is a set of lightweight, interoperable tools for machine learning practitioners training and fine-tuning models. - [Experiments](/guides/track/intro.md): Machine learning experiment tracking - [Sweeps](/guides/sweeps/intro.md): Hyperparameter tuning and model optimization - [Registry](/guides/registry/intro.md): Publish and share your ML models and datasets -**[W&B Weave](https://wandb.github.io/weave/)** is a lightweight toolkit for tracking and evaluating LLM applications. - **[W&B Core](/guides/core.md)** is set of powerful building blocks for tracking and visualizing data and models, and communicating results. - [Artifacts](/guides/artifacts/intro.md): Version assets and track lineage - [Tables](/guides/tables/intro.md): Visualize and query tabular data diff --git a/docs/quickstart.md b/docs/quickstart.md index a72740146..76df399df 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -1,7 +1,7 @@ --- description: W&B Quickstart displayed_sidebar: default -title: W&B Quickstart +title: Models Quickstart --- import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; diff --git a/sidebars.js b/sidebars.js index b1fe2261d..d28f6cb05 100644 --- a/sidebars.js +++ b/sidebars.js @@ -79,13 +79,13 @@ export default { ], default: [ 'guides/intro', - 'quickstart', { type: 'category', label: 'W&B Models', link: {type: 'doc', id: 'guides/models'}, collapsed: false, items: [ + 'quickstart', { type: 'category', label: 'Experiments', @@ -201,6 +201,25 @@ export default { 'guides/artifacts/project-scoped-automations', ], }, + { + type: 'category', + label: 'Evaluations', + items: [ + 'guides/evaluations/evaluate-models-weave', + 'guides/evaluations/evaluate-models-tables', + ], + }, + { + type: 'category', + label: 'Tables', + link: {type: 'doc', id: 'guides/tables/intro'}, + items: [ + 'guides/tables/tables-walkthrough', + 'guides/tables/visualize-tables', + 'guides/tables/tables-gallery', + 'guides/tables/tables-download', + ], + }, { type: 'category', label: 'W&B App UI Reference', @@ -311,17 +330,6 @@ export default { // 'guides/artifacts/examples', ], }, - { - type: 'category', - label: 'Tables', - link: {type: 'doc', id: 'guides/tables/intro'}, - items: [ - 'guides/tables/tables-walkthrough', - 'guides/tables/visualize-tables', - 'guides/tables/tables-gallery', - 'guides/tables/tables-download', - ], - }, { type: 'category', label: 'Reports',