diff --git a/README.md b/README.md index 9aabfda..5306ce0 100644 --- a/README.md +++ b/README.md @@ -10,18 +10,19 @@ Steel is an open-source browser API purpose-built for AI agents. | Rank | Agent | Organization | WebVoyager Score | Source | Open Source | New | SOTA | | ---- | --------------- | -------------- | ---------------- | ------------------------------------------------------------------------------------------------- | ----------- | --- | ---- | -| 1 | Magnitude | Magnitude | 93.9% | [Source](https://magnitude.run/webvoyager) | Yes | Yes | Yes | -| 2 | Browser Use | Browser Use | 89.1% | [Source](https://browser-use.com/posts/sota-technical-report) | Yes | Yes | | -| 3 | Operator | OpenAI | 87% | [Source](https://openai.com/index/introducing-operator/) | No | Yes | | -| 4 | Kura | Kura | 87% | [Source](https://www.trykura.com/benchmarks) | No | Yes | | -| 5 | Skyvern 2.0 | Skyvern | 85.85% | [Source](https://blog.skyvern.com/skyvern-2-0-state-of-the-art-web-navigation-with-85-8-on-webvoyager-eval/) | Yes | Yes | | -| 6 | Project Mariner | Google | 83.5% | [Source](https://deepmind.google/technologies/project-mariner/) | No | | | -| 7 | Proxy | Convergence AI | 82% | [Source](https://convergence.ai/training-web-agents-with-web-world-models-dec-2024/) | No | | | -| 8 | Agent-E | Emergence AI | 73.1% | [Source](https://www.emergence.ai/blog/agent-e-sota) | No | | | -| 9 | Runner H 0.1 | H Company | 67% | [Source](https://www.hcompany.ai/blog/a-research-update) | No | | | -| 10 | WILBUR | Academic Research | 60.6% | [Source](https://arxiv.org/abs/2404.05902) | No | | | -| 11 | WebVoyager | Academic Research | 59.1% | [Source](https://arxiv.org/abs/2401.13919) | Yes | | | -| 12 | Computer Use | Anthropic | 52% | [Source](https://www.hcompany.ai/blog/a-research-update) | No | | | +| 1 | Surfer 2 | H Company | 97.1% | [Source](https://www.hcompany.ai/blog/surfer-2) | No | Yes | Yes | +| 2 | Magnitude | Magnitude | 93.9% | [Source](https://magnitude.run/webvoyager) | Yes | Yes | | +| 3 | Browser Use | Browser Use | 89.1% | [Source](https://browser-use.com/posts/sota-technical-report) | Yes | Yes | | +| 4 | Operator | OpenAI | 87% | [Source](https://openai.com/index/introducing-operator/) | No | Yes | | +| 5 | Kura | Kura | 87% | [Source](https://www.trykura.com/benchmarks) | No | Yes | | +| 6 | Skyvern 2.0 | Skyvern | 85.85% | [Source](https://blog.skyvern.com/skyvern-2-0-state-of-the-art-web-navigation-with-85-8-on-webvoyager-eval/) | Yes | Yes | | +| 7 | Project Mariner | Google | 83.5% | [Source](https://deepmind.google/technologies/project-mariner/) | No | | | +| 8 | Proxy | Convergence AI | 82% | [Source](https://convergence.ai/training-web-agents-with-web-world-models-dec-2024/) | No | | | +| 9 | Agent-E | Emergence AI | 73.1% | [Source](https://www.emergence.ai/blog/agent-e-sota) | No | | | +| 10 | Runner H 0.1 | H Company | 67% | [Source](https://www.hcompany.ai/blog/a-research-update) | No | | | +| 11 | WILBUR | Academic Research | 60.6% | [Source](https://arxiv.org/abs/2404.05902) | No | | | +| 12 | WebVoyager | Academic Research | 59.1% | [Source](https://arxiv.org/abs/2401.13919) | Yes | | | +| 13 | Computer Use | Anthropic | 52% | [Source](https://www.hcompany.ai/blog/a-research-update) | No | | | **Notes:** diff --git a/src/lib/leaderboard.ts b/src/lib/leaderboard.ts index 9aa9de1..286bc17 100644 --- a/src/lib/leaderboard.ts +++ b/src/lib/leaderboard.ts @@ -11,6 +11,17 @@ export interface LeaderboardEntry { } export const leaderboardEntries: LeaderboardEntry[] = [ + { + agent: "Surfer 2", + organization: "H Company", + webVoyager: { + score: "97.1%", + source: "https://www.hcompany.ai/blog/surfer-2", + }, + isNew: true, + github: null, + homepage: "https://www.hcompany.ai", + }, { agent: "Magnitude", organization: "Magnitude",